pre-paper.tex 19.7 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
\documentclass[a4paper, 12pt]{article}

\usepackage[
    bibstyle=numeric,
    citestyle=numeric
]{biblatex} %Imports biblatex package
\addbibresource{zugzwang.bib} %Import the bibliography file

\usepackage[x11colors]{xcolor}
%
\usepackage{tikz}
\tikzset{
    event/.style={},
    smodel/.style={fill=gray!25},
    tchoice/.style={draw, circle},
    indep/.style={draw, dashed},
    proptc/.style = {-latex, dashed},
    propsm/.style = {-latex, thick},
    doubt/.style = {gray}
}
\usetikzlibrary{calc, positioning}
%
\usepackage{hyperref}
\hypersetup{
    colorlinks=true,
    linkcolor=blue,
    citecolor=blue,
}
%
\usepackage{commath}
\usepackage{amsthm}
\newtheorem{assumption}{Assumption}
\newtheorem{definition}{Definition}
\usepackage{amssymb}
\usepackage[nice]{nicefrac}
%
% Local commands
%
\newcommand{\note}[1]{\marginpar{\scriptsize #1}}
\newcommand{\naf}{\ensuremath{\sim\!}}
\newcommand{\larr}{\ensuremath{\leftarrow}}
\newcommand{\at}[1]{\ensuremath{\!\del{#1}}}
\newcommand{\co}[1]{\ensuremath{\overline{#1}}}
\newcommand{\fml}[1]{\ensuremath{{\cal #1}}}
\newcommand{\deft}[1]{\textbf{#1}}
\newcommand{\pset}[1]{\ensuremath{\mathbb{P}\at{#1}}}
\newcommand{\ent}{\ensuremath{\lhd}}
\newcommand{\cset}[2]{\ensuremath{\set{#1,~#2}}}
\newcommand{\langof}[1]{\ensuremath{\fml{L}\at{#1}}}
\newcommand{\uset}[1]{\ensuremath{\left|{#1}\right>}}
\newcommand{\lset}[1]{\ensuremath{\left<{#1}\right|}}
\newcommand{\pr}[1]{\ensuremath{\mathrm{P}\at{#1}}}
\newcommand{\class}[1]{\ensuremath{\nicefrac{#1}{\sim}}}
\newcommand{\urep}[1]{\ensuremath{\nicefrac{#1}{\varnothing}}}
\newcommand{\lrep}[1]{\ensuremath{\nicefrac{\varnothing}{#1}}}
\newcommand{\rep}[2]{\nicefrac{#1}{#2}}
\newcommand{\given}{\ensuremath{~\middle|~}}
\newcommand{\todo}[1]{\textbf{\color{orange}~(~#1~)~}}

\title{Zugzwang\\\textit{Logic and Artificial Intelligence}}
\author{
    \begin{tabular}{cc}
        Francisco Coelho & Bruno Dinis\\
        \texttt{fc@uevora.pt} & \texttt{bruno.dinis@uevora.pt}
    \end{tabular}    
}

\begin{document}

\maketitle

\nocite{*}

\begin{abstract}
    \todo{rewrite}
    A major limitation of logical representations is the implicit assumption that the Background Knowledge (BK) is perfect. This assumption is problematic if data is noisy, which is often the case. Here we aim to explore how ASP specifications with probabilistic facts can lead to characterizations of probability functions on the specification's domain. 
\end{abstract}

\section{Introduction and Motivation}

\todo{rewrite}
Answer Set Programming (ASP) is a logic programming paradigm based on the Stable Model semantics of Normal Logic Programs (NP) that can be implemented using the latest advances in SAT solving technology. ASP is a truly declarative language that supports language constructs such as disjunction in the head of a clause, choice rules, and hard and weak constraints.

The Distribution Semantics (DS) is a key approach to extend logical representations with probabilistic reasoning. Probabilistic Facts (PF) are the most basic stochastic DS primitive and they take the form of logical facts, $a$, labelled with a probability, such as $p::a$; Each probabilistic fact represents a boolean random variable that is true with probability $p$ and false with probability $1 - p$. A (consistent) combination of the PFs defines a \textit{total choice} $c = \set{p::a, \ldots}$ such that

\begin{equation}
    \pr{C = x} = \prod_{a\in c} p \prod_{a \not\in c} (1- p).
    \label{eq:prob.total.choice}
\end{equation}

Our goal is to extend this probability, from total choices, to cover the specification domain. We can foresee two key applications of this extended probability:

\begin{enumerate}
    \item Support any probabilistic reasoning/task on the specification domain.
    \item Also, given a dataset and a divergence measure, now the specification can be scored (by the divergence w.r.t.\ the \emph{empiric} distribution of the dataset), and sorted amongst other specifications. This is a key ingredient in algorithms searching, for example, an optimal specification of the dataset. 
\end{enumerate}

This goal faces a critical problem concerning situations where multiple standard models result from a given total choice, illustrated by the following example. The specification
\begin{equation}
    \begin{aligned}
        0.3::a&,\cr
        b \vee c& \leftarrow a.
    \end{aligned}
    \label{eq:example.1}
\end{equation}
has three stable models, $\co{a}, ab$ and $ac$. While it is straightforward to set $P(\co{a})=0.7$, there is \textit{no further information} to assign values to $P(ab)$ and $P(ac)$. At best, we can use a parameter $x$ such that
$$
\begin{aligned}
P(ab) &= 0.3 x,\cr
P(ac) &= 0.3 (1 - x).
\end{aligned}
$$

This uncertainty in inherent to the specification, but can be mitigated with the help of a dataset: the parameter $x$ can be estimated from the empirical distribution.

In summary, if an ASP specification is intended to describe some observable system then:

\begin{enumerate}
    \item The observations can be used to estimate the value of the parameters (such as $x$ above and others entailed from further clauses).
    \item With a probability set for the stable models, we want to extend it to all the events of the specification. 
    \item This extended probability can then be related to the \textit{empirical distribution}, using a probability divergence, such as Kullback-Leibler; and the divergence value used as a \textit{performance} measure of the specification with respect to the observations.
    \item If that specification is only but one of many possible candidates then that performance measure can be used, \textit{e.g.} as fitness, by algorithms searching (optimal) specifications of a dataset of observations.
\end{enumerate}

Currently, we are on the step two above: Extending a probability function (with parameters such as $x$), defined on the stable sets of a specification, to all the events of the specification. This extension must, of course, respect the axioms of probability so that probabilistic reasoning is consistent with the ASP specification. 

\section{Extending Probabilities}

\begin{figure}[t]
    \begin{center}
        \begin{tikzpicture}
            \node[event] (E) {$\bot$};
            \node[tchoice, above left = of E] (a) {$a$};
            \node[smodel, above left = of a] (ab) {$ab$};
            \node[smodel, above right = of a] (ac) {$ac$};
            \node[event, below = of ab] (b) {$b$};
            \node[event, below = of ac] (c) {$c$};
            \node[event, above right = of ab] (abc) {$abc$};
            \node[indep, right = of ac] (bc) {$bc$};
            \node[tchoice, smodel, below right = of bc] (A) {$\co{a}$};
            \node[event, above = of A] (Ac) {$\co{a}c$};
            \node[event, above right = of Ac] (Abc) {$\co{a}bc$};
            % ----
            \draw[proptc] (a) to[bend left] (ab);
            \draw[proptc] (a) to[bend right] (ac);

            \draw[propsm] (ab) to[bend left] (abc);
            \draw[propsm] (ac) to[bend right] (abc);

            \draw[propsm] (A) to (Ac);
            \draw[propsm] (A) to (Abc);

            \draw[doubt] (ab) to[bend right] (E);
            \draw[doubt] (ac) to[bend right] (E);
            \draw[doubt] (A) to[bend left] (E);

            \draw[doubt] (ab) to[bend right] (b);
            \draw[doubt] (ac) to[bend left] (c);
            \draw[doubt] (ab) to[bend left] (a);
            \draw[doubt] (ac) to[bend right] (a);
            \draw[doubt] (c) to[bend right] (bc);
            \draw[doubt] (abc) to[bend left] (bc);
            \draw[doubt] (Abc) to (bc);
            \draw[doubt] (c) to[bend right] (Ac);
        \end{tikzpicture}
    \end{center}
    \caption{Extending values, \textit{e.g.} probabilities, from total choice nodes to stable models and then to general events in a node-wise process quickly leads to coherence problems concerning probability, with no clear systematic approach.}
\end{figure}

Given an ASP specification, we consider the \textit{atoms} $a \in \fml{A}$ and \textit{literals}, $z \in \fml{L}$, \textit{events} $e \in \fml{E} \iff e \subseteq \fml{L}$ and \textit{worlds} $w \in \fml{W}$ (consistent events), \textit{total choices} $c \in \fml{C} \iff c = a \vee \neg a$ and \textit{stable models} $s \in \fml{S}$.

% In a statistical setting, the outcomes are the literals $x$, $\neg x$ for each atom $x$, the events express a set of possible outcomes (including $\emptyset$, $\set{a, b}$, $\set{a, \neg a, b}$, \textit{etc.}), and worlds are events with no contradictions.

Out path starts with a perspective of stable models as playing a role similar to \textit{prime} factors. The stable models of specification are the irreducible events entailed from that specification and any event must be interpreted under its relation with the stable models. This stance leads to definition \ref{def:rel.events}:

\begin{definition}\label{def:rel.events}
    Let $u,v \in \fml{E}$, and $\fml{S}, \fml{W}$ the set of stable models, resp. consistent events, of some specification. Define

    \begin{equation}
        u \sim v \iff u,v \not\in\fml{W} \vee (\uset{u} = \uset{v} \wedge \lset{u} = \lset{v}).\label{eq:rel.events}
    \end{equation}
\end{definition}

This equivalence relation defines a partition of the events space, where each class holds a unique relation with the stable models. In particular, we can denote each class by
\begin{equation}
    \class{e} = \begin{cases}
        \star &\text{if~} e \in \fml{E} \setminus \fml{W}, \\
        \rep{\uset{e}}{\lset{e}} &\text{otherwise}.
    \end{cases}
\end{equation}

Consider the example from \ref{eq:example.1}. The stable models are $\fml{S} = \co{a}, ab, ac$ so the quotient set of this relation is
\begin{equation}
\begin{aligned}
    &\star, \urep{\varnothing}, \\
    &\rep{\co{a}}{\co{a}} = \set{\co{a}}, \rep{ab}{ab} = \set{ab}, \rep{ac}{ac} = \set{ac}\\
    &\urep{\co{a}},  \urep{ab}, \urep{ac}, \lrep{\co{a}},  \lrep{ab}, \lrep{ac}, \\
    &\urep{\co{a}, ab}, \urep{\co{a}, ac},  \urep{ab, ac}, 
    \lrep{\co{a}, ab}, \lrep{\co{a}, ac},  \lrep{ab, ac},\\
    &\urep{\co{a}, ab, ac}, \lrep{\co{a}, ab, ac}.
\end{aligned}
\end{equation}

For example, $\class{a} = \urep{ab, ac}$, $\class{abc} = \lrep{ab, ac}$ and $\class{bc} = \urep{\varnothing}$.


\begin{itemize}
    \item Since all events within a equivalence class have the same relation with the stable models, probability assignment should be constant for the elements of that class.
    \item So, instead of dealing with $2^6$ events, we need only to handle $19$ classes, well defined in terms of combinations of the stable models. 
    \item The probability events are going to be the \textit{classes}.
    \item The physical system might have \textit{latent} variables, possibly also represented in the specification. These variables are never observed, so observations should be concentrated in the $\nicefrac{\uset{e}}{\varnothing}$ classes.
\end{itemize}

\todo{must adapt} Our path, traced by equations (\ref{eq:prob.total.choice}) and (\ref{eq:prob.stablemodel} --- \ref{eq:prob.events}), starts with the probability of total choices, $\pr{C = c}$, expands it to stable models, $\pr{S = s}$, and then to worlds $\pr{W = w}$ and events $\pr{E = e}$.

\begin{enumerate}
    \item \textbf{Total Choices.} This case is given by $\pr{C = c}$, from equation \ref{eq:prob.total.choice}. Each total choice $C = c$ (together with the facts and rules) entails some stable models, $s \in S_c$, and each stable model $S = s$ contains a single total choice $c_s \subseteq s$.
    \item \textbf{Stable Models.} Given a stable model $s \in \fml{S}$, and variables/values $x_{s,c} \in \intcc{0, 1}$, 
    \begin{equation}
        \pr{S = s \given C = c} = \begin{cases}
            x_{s,c} & \text{if~} s \in S_c,\cr
            0&\text{otherwise}
        \end{cases}
        \label{eq:prob.stablemodel}
    \end{equation}
    such that $\sum_{s \in S_c} x_{s,c} = 1$.
    \item\label{item:world.cases} \textbf{Worlds.} Each world $W  = w$ either:
    \begin{enumerate}
        \item Is a \textit{stable model}. Then 
        \begin{equation}
            \pr{W  = w \given C = c} = \pr{S = s \given C = c}.
            \label{eq:world.fold.stablemodel}
        \end{equation}
        \item \textit{Contains} some stable models. Then
        \begin{equation}
            \pr{W  = w \given C = c} = \prod_{s \subset w}\pr{S = s \given C = c}.
            \label{eq:world.fold.superset}
        \end{equation}
        \item \textit{Is contained} in some stable models. Then
        \begin{equation}
            \pr{W  = w \given C = c} = \sum_{s \supset w}\pr{S = s \given C = c}.
            \label{eq:world.fold.subset}
        \end{equation}
        \item Neither contains nor is contained by a stable model. Then
        \begin{equation}
            \pr{W  = w} = 0.
            \label{eq:world.fold.independent}
        \end{equation}
    \end{enumerate}
    \item \textbf{Events.} For each event $E = e$,
    \begin{equation}
        \pr{E = e \given C = c} = \begin{cases}
            \pr{W = e \given C = c} & e \in \fml{W}, \cr
            0 & \text{otherwise}.
            \end{cases}
        \label{eq:prob.events}
    \end{equation}
\end{enumerate}

Since stable model are minimal, there is no proper chain $s_1 \subset w \subset s_2$ so each world folds into exactly one ot the four cases of point \ref{item:world.cases} above.

% PARAMETERS FOR UNCERTAINTY

Equation (\ref{eq:prob.stablemodel}) expresses the lack of knowledge about the probability assignment when a single total choice entails more than one stable model. In this case, how to distribute the respective probability? Our answer to this problem consists in assigning an unknown probability, $x_{s,c}$, conditional on the total choice, $c$, to each stable model $s$. This approach allow the expression of an unknown quantity and future estimation, given observed data.

% STABLE MODEL
The stable model case, in equation (\ref{eq:world.fold.stablemodel}), identifies the probability of a stable model \textit{as a world} with its probability as defined previously in equation (\ref{eq:prob.stablemodel}), as a stable model.

% SUPERSET
Equation \ref{eq:world.fold.superset} results from conditional independence of the stable models $s \subset w$. Conditional independence of stable worlds asserts a least informed strategy that we make explicit:

\begin{assumption}
    Stable models are conditionally independent, given their total choices.
\end{assumption}

Consider the stable models $ab, ac$ from the example above. They result from the clause $b \vee c \leftarrow a$ and the total choice $a$. These formulas alone impose no relation between $b$ and $c$ (given $a$), so none should be assumed. Dependence relations are further discussed in Subsection (\ref{subsec:dependence}).

% SUBSET
\hrule

\bigskip
I'm not sure about what to say here.\marginpar{todo}

My first guess was
\begin{equation*}
    \pr{W  = w \given C = c} = \sum_{s \supset w}\pr{S = s \given C = c}.
\end{equation*}

$\pr{W = w \given C = c}$ already separates $\pr{W}$ into \textbf{disjoint} events!

Also, I am assuming that stable models are independent. 

This would entail $p(w) = p(s_1) + p(s_2) - p(s_1)p(s_2)$ \textit{if I'm bound to set inclusion}. But I'm not. I'm defining a relation

Also, if I set $p(w) =  p(s_1) + p(s_2)$ and respect the laws of probability, this entails $p(s_1)p(s_2) = 0$.

So, maybe what I want is (1) to define the cover $\hat{w} = \cup_{s \supset w} s$

\begin{equation*}
    \pr{W  = w \given C = c} = \sum_{s \supset w}\pr{S = s \given C = c} - \pr{W = \hat{w} \given C = c}.
\end{equation*}

But this doesn't works, because we'd get $\pr{W = a \given C = a} < 1$.
%

%
\bigskip
\hrule

% INDEPENDENCE

A world that neither contains nor is contained in a stable model describes a case that, according to the specification, should never be observed. So the respective probability is set to zero, per equation (\ref{eq:world.fold.independent}).
%
%   ================================================================
%
\subsection{Dependence}
\label{subsec:dependence}

Dependence relations in the underlying system can be explicitly expressed in the specification. 

For example, $b \leftarrow c \wedge d$, where $d$ is an atomic choice, explicitly expressing this dependence between $b$ and $c$. One would get, for example, the specification
$$
0.3::a, b \vee c \leftarrow a, 0.2::d, b \leftarrow c \wedge d.
$$
with the stable models
$
\co{ad}, \co{a}d, a\co{d}b, a\co{d}c, adb
$. 


The interesting case is the subtree of the total choice $ad$. Notice that no stable model $s$ contains $adc$ because (1)  $adb$ is a stable model and (2) if $adc \subset s$ then $b \in s$ so $adb \subset s$. 

Following equations (\ref{eq:world.fold.stablemodel}) and (\ref{eq:world.fold.independent}) this sets 
\begin{equation*}
    \begin{cases}
        \pr{W = adc \given C = ad} = 0,\cr
        \pr{W = adb \given C = ad} = 1
    \end{cases}
\end{equation*}
which concentrates all probability mass from the total choice $ad$ in the $adb$ branch, including the node $W = adbc$. This leads to the following cases:
$$
\begin{array}{l|r}
    x & \pr{W = x \given C = ad}\\
    \hline
    ad & 1 \\
    adb & 1\\
    adc & 0\\
    adbc & 1    
\end{array}
$$
so, for $C = ad$, 
$$
\begin{aligned}
    \pr{W = b} &= \frac{2}{4} \cr
    \pr{W = c} &= \frac{1}{4} \cr
    \pr{W = bc} &= \frac{1}{4} \cr
                &\not= \pr{W = b}\pr{W = c}
\end{aligned}
$$
\textit{i.e.} the events $W = b$ and $W = c$ are dependent and that dependence results directly from the segment $0.2::d, b \leftarrow c \wedge d$ in the specification.


%

%
\hrule
\begin{quotation}\note{Todo}
    
    Prove the four world cases (done), support the product (done) and sum (tbd) options, with the independence assumptions.
\end{quotation}
    
\section{Developed Example}

We continue with the specification from equation \ref{eq:example.1}. 

\textbf{Step 1: Total Choices.} The total choices, and respective stable models, are
\begin{center}
    \begin{tabular}{l|r|r}
        Total Choice ($c$) & $\pr{C = c}$ & Stable Models ($s$)\\
        \hline 
        $a$ & $0.3$ & $ab$ and $ac$.\\
        $\co{a} = \neg a$ & $\co{0.3} = 0.7$ & $\co{a}$.
    \end{tabular}
\end{center}

\textbf{Step 2: Stable Models.} Suppose now that 
\begin{center}
    \begin{tabular}{l|c|r}
        Stable Models ($s$) & Total Choice ($c$)  & $\pr{S = c \given C = c}$\\
        \hline 
        $\co{a}$    & $1.0$ & $\co{a}$. \\
        $ab$        & $0.8$ & $a$. \\
        $ac$        & $0.2 = \co{0.8}$ & $a$.
    \end{tabular}
\end{center}

\textbf{Step 3: Worlds.} Following equations \ref{eq:world.fold.stablemodel} --- \ref{eq:world.fold.independent} we get:
\begin{center}
    \begin{tabular}{l|c|l|c|r}
        Occ. ($o$) & S.M. ($s$) & Relation & T.C. ($c$)  & $\pr{W  = w}$\\
        \hline 
        $\emptyset$ & all           & contained & $a$, $\co{a}$ & $1.0$ \\ 
        $a$         & $ab$, $ac$    & contained & $a$ & $0.8\times 0.3 + 0.2\times 0.3 = 0.3$ \\
        $b$         & $ab$          & contained & $a$ & $0.8\times 0.3 = 0.24$ \\ 
        $c$         & $ac$          & contained & $a$ & $0.2\times 0.3 = 0.06$ \\ 
        $\co{a}$    & $\co{a}$      & stable model & $\co{a}$ & $1.0\times 0.3 = 0.3$ \\
        $\co{b}$    & none          & independent & none & $0.0$ \\
        $\co{c}$    & none              & \ldots & & \\
        $ab$        & $ab$          & stable model & $a$ & $0.24$ \\  
        $ac$        & $ac$          & stable model & $a$ & $0.06$ \\  
        $a\co{b}$   & none              & \ldots & & \\
        $a\co{c}$   & none              & \ldots & & \\
        $\co{a}b$   & $\co{a}$      & contains & $\co{a}$ & $1.0$ \\
        $\co{a}c$   &  $\co{a}$         & \ldots & & \\
        $\co{a}\co{b}$  & $\co{a}$      & \ldots & & \\
        $\co{a}\co{c}$  & $\co{a}$      & \ldots & & \\
        $abc$       & $ab$, $ac$    & contains & $a$ & $0.8\times 0.2 = 0.016$ \\
    \end{tabular}
\end{center}

\section*{References}

\printbibliography

\end{document}