\documentclass{beamer} \setbeamertemplate{navigation symbols}{} \setbeamertemplate{itemize items}[circle] \usepackage[overridenote]{pdfpc} \usepackage{tikz} \usepackage{commath} \usepackage{amssymb} \usepackage[T1]{fontenc} \usepackage{hyperref} \hypersetup{% colorlinks=true, allcolors=blue, } % % Local commands % \newcommand{\todo}[1]{{\color{orange}TODO #1}} \newcommand{\naf}{\ensuremath{\sim\!}} \newcommand{\larr}{\ensuremath{\leftarrow}} \newcommand{\at}[1]{\ensuremath{\!\del{#1}}} \newcommand{\co}[1]{\ensuremath{\overline{#1}}} \newcommand{\fml}[1]{\ensuremath{{\cal #1}}} \newcommand{\deft}[1]{\textbf{#1}} \newcommand{\pset}[1]{\ensuremath{\mathbb{P}\at{#1}}} \newcommand{\ent}{\ensuremath{\lhd}} \newcommand{\langof}[1]{\ensuremath{\fml{L}\at{#1}}} \newcommand{\uset}[1]{\ensuremath{\left|{#1}\right>}} \newcommand{\lset}[1]{\ensuremath{\left<{#1}\right|}} \begin{document} \begin{frame} The goal of this text is to \textbf{explore how ASP programs with probabilistic facts} can lead to characterizations of the \textbf{joint distributions} of the program's atoms. \end{frame} \section{Introduction} \begin{frame}{Notation} \note{ - We start with **common notations and assumptions** such as:\\ - Hi\\ - There\\ - And another note} \begin{itemize} \item The \textbf{complement} of $x$ is $\co{x} = 1 - x$. \item A \textbf{probabilistic atomic choice} $\alpha:a$ defines the disjunction $a \lor \neg a$ and assigns probabilities $P\at{a} = \alpha, P\at{\neg a} = \co{\alpha}$. \item $\delta a$ denotes the disjunction $a \lor \neg a$ associated to the probabilistic choice $\alpha : a$ and $\delta\! \set{\alpha: a, a \in A} = \set{\delta a, a \in A}$ for any set of atoms $A$. \item Start with the \textbf{closed world assumption}, where $\naf x \models \neg x$. \item Also assume that \textbf{probabilistic choices} and \textbf{subgoals} are iid. \end{itemize} \end{frame} \begin{frame} \note{Next, we consider the following **general setting**} \begin{itemize} \item Let $\fml{A}$ be a set of \textbf{atoms}, $\fml{Z}$ the respective set of \textbf{observations}, $\fml{Z} = \set{z = \alpha \cup \nu \middle| \alpha \subseteq \fml{A} \land \nu \subseteq \set{\neg a \middle| a \in \fml{A}} }$ and $\fml{I}$ the set of consistent observations or \textbf{interpretations}, $\fml{I} = \set{z \in \fml{Z} \middle| \forall a \in \fml{A}~\abs{ \set{a, \neg a} \cap z} \leq 1}$. \item A \textbf{PASP program} is $P = C \land F \land R$ and the sets of atoms, observations and interpretations of program $P$ are denoted $\fml{A}_P, \fml{Z}_P$ and $\fml{I}_P$. \item $C = C_P = \set{\alpha_i : a_i \middle| i = 1:n}$ is a set of probabilistic atomic choices and $\delta C_P$ the set of associated disjunctions, $F = F_P$ is a set of (common) facts and $R = R_P$ is a set of (common) rules. \item The \textbf{stable models} of $P = C \land F \land R$ are the stable models of $\delta P = \delta C + F + R$ and denoted $\fml{S} = \fml{S}_P$. \end{itemize} \end{frame} \begin{frame} \note{A model x has lower and upper "bounds".} \begin{itemize} \item \textbf{Proposition.} Let $x\in\fml{I}$ be an interpretation and $\lset{x} = \set{s\in \fml{S} \middle| s \subseteq x}$ and $\uset{x} = \set{s\in \fml{S} \middle| x \subseteq s}$. Exactly one of the following cases takes place \begin{enumerate} % \item\label{prop:lucases.a} $\lset{x} = \set{x} = \uset{x}$. If $a \in \lset{x}$ and $b \in \uset{x}$ then $a \subseteq b$. Since stable models are minimal must be $a = b = x$ and $x$ is a stable model. % \item\label{prop:lucases.b} $\lset{x} \neq \emptyset \land \uset{x} = \emptyset$. % \item\label{prop:lucases.c} $\lset{x} = \emptyset \land \uset{x} \neq \emptyset$. % \item\label{prop:lucases.d} $\lset{x} = \emptyset = \uset{x}$. \end{enumerate} \end{itemize} \end{frame} \begin{frame} \note{Total choice are key to define probability of a clause.} \begin{itemize} \item The probabilistic facts $C$ define a set $\Theta = \Theta_C$ of \textbf{total choices}, with $2^n$ elements, each one a set $\theta = \set{c_1, \ldots, c_n}$ where $c_i$ is either $a_i$ or $\neg a_i$. \item For each stable model $s\in\fml{S}$ let $\theta_s$ be the unique \textbf{total choice} contained in $s$ and $\fml{S}_\theta \subseteq \fml{S}$ the set of stable models that contains $\theta$. \item Define \begin{equation} p\at{\theta} = \prod_{a_i \in \theta}\alpha_i \prod_{\neg a_i \in \theta}\co{\alpha_i}.\label{eq:prob.tc} \end{equation} \end{itemize} \end{frame} \begin{frame} \note{Relate stable models with Sato's probabilistic semantics} \begin{quotation} The problem we address is how to \textbf{assign probabilities to observations} given that a total choice might entail zero or many stable models \emph{i.e.} How to assign probabilities to the stable models of $\fml{S}_\theta$ when $\envert{\fml{S}_\theta} \not= 1$? \end{quotation} \end{frame} \begin{frame} \note{There are some problems} As it turns out, it is quite easy to come out with a program from which result no single probability distribution. For example $$ \begin{aligned} 0.3:a,& \cr b \lor c \larr& a. \end{aligned} $$ has three stable models $$ \begin{aligned} s_1 &= \set{\neg a} \cr s_2 &= \set{a, b} \cr s_3 &= \set{a, c} \end{aligned} $$ and while $p\at{\set{\neg a}} = 0.7$ is quite natural, we have no further information to support the choice of a singular $\alpha\in\intcc{0,1}$ in the assignment $$ \begin{aligned} p\at{\set{a, b}} &= 0.3 \alpha \cr p\at{\set{a, c}} &= 0.3 \co{\alpha} \end{aligned} $$ \end{frame} \begin{frame} Next we try to formalize the possible configurations of this scenario. Consider the ASP program $P = C \land F \land R$ with total choices $\Theta $ and stable models $\fml{S}$. Let $d : \fml{S} \to \intcc{0,1}$ such that $\sum_{s\in\fml{S}_\theta} d\at{s} = 1$. \end{frame} \begin{frame} \begin{enumerate} \item For each $z\in\fml{Z}$ only one of the following cases takes place \begin{enumerate} \item $z$ is inconsistent. Then \textbf{define} \begin{equation} w_d\at{x} = 0.\label{def:w.inconsistent} \end{equation} % \item $z$ is an interpretation and $\lset{z} = \set{z} = \uset{x}$. Then $z = s$ is a stable model and \textbf{define} \begin{equation} w_d\at{z} = w\at{s} = d\at{s} p\at{\theta_s}.\label{eq:prob.sm} \end{equation} % \item $z$ is an interpretation and $\lset{z} \neq \emptyset \land \uset{x} = \emptyset$. Then \textbf{define} \begin{equation} w_d\at{z} = \sum_{s \in \lset{z}} w_d\at{s}.\label{def:w.disj} \end{equation} % \item $z$ is an interpretation and $\lset{z} = \emptyset \land \uset{z} \neq \emptyset$. Then \textbf{define} \begin{equation} w_d\at{z} = \prod_{s \in \uset{z}} w_d\at{s}.\label{def:w.conj} \end{equation} % \item $z$ is an interpretation and $\lset{z} = \emptyset \land \uset{z} = \emptyset$. Then \textbf{define} \begin{equation} w_d\at{z} = 0.\label{def:w.empty} \end{equation} \end{enumerate} % \item The last point defines a ``weight'' function on the observations that depends not only on the total choices and stable models of a PASP but also on a certain function $d$ that must respect some conditions. To simplify the notation we use the subscript in $w_d$ only when necessary. % \item At first, it may seem counter-intuitive that $w\at{\emptyset} = \sum_{s\in\fml{S}} w\at{s}$ is the largest ``weight'' in the lattice. But $\emptyset$, as an interpretation, sets zero restrictions on the ``compatible'' stable models. The ``complement'' of $\bot = \emptyset$ is the \emph{maximal inconsistent} observation $\top = \fml{A} \cup \set{\neg a \middle| a \in \fml{A}}$. % \item \textbf{We haven't yet defined a probability measure.} To do so we must define a set of samples $\Omega$, a set of events $F\subseteq \pset{\Omega}$ and a function $P:F\to\intcc{0,1}$ such that: \begin{enumerate} \item $P\at{E} \in \intcc{0, 1}$ for any $E \in F$. \item $P\at{\Omega} = 1$. \item if $E_1 \cap E_2 = \emptyset$ then $P\at{E_1 \cup E_2} = P\at{E_1} + P\at{E_2}$. \end{enumerate} % \item In the following, assume that the stable models are iid. % \item Let the sample space $\Omega = \fml{Z}$ and the event space $F = \pset{\Omega}$. Define $Z = \sum_{\zeta\in\fml{Z}} w\at{\zeta}$ and \begin{equation} P\at{z} = \frac{w\at{z}}{Z}, z \in \Omega \label{eq:def.prob} \end{equation} and \begin{equation} P\at{E} = \sum_{x\in E} P\at{x}, E \subseteq \Omega. \label{eq:def.prob.event} \end{equation} Now: \begin{enumerate} \item $P(E) \in \intcc{0,1}$ results directly from the definitions of $P$ and $w$. \item $P\at{\Omega} = 1$ also results directly from the definitions. \item Consider two disjunct events $A, B \subset \Omega \land A \cap B = \emptyset$. Then $$ \begin{aligned} P\at{A \cup B} &= \sum_{x \in A \cup B} P\at{x} \cr &= \sum_{x \in A} P\at{x} + \sum_{x \in B} P\at{x} - \sum_{x \in A \cap B} P\at{x} \cr &= \sum_{x \in A} P\at{x} + \sum_{x \in B} P\at{x} &\text{because}~A\cap B = \emptyset \cr &= P\at{A} + P\at{B}. \end{aligned} $$ \item So $\del{\Omega = \fml{Z}, F = \pset{\Omega}, P}$ is a probability space. {$\blacksquare$} \end{enumerate} \end{enumerate} \end{frame} \section{Cases \& Examples} \subsection{Programs with disjunctive heads} \begin{frame} Consider the program: $$ \begin{aligned} c_1 &= a \lor \neg a, \cr c_2 &= b \lor c \larr a. \end{aligned} $$ This program has two total choices, $$ \begin{aligned} \theta_1&= \set{ \neg a }, \cr \theta_2&= \set{ a }. \end{aligned} $$ and three stable models, $$ \begin{aligned} s_1 &= \set{ \neg a }, \cr s_2 &= \set{ a, b }, \cr s_3 &= \set{ a, c }. \end{aligned} $$ \end{frame} \begin{frame} Suppose that we add an annotation $\alpha:a$, which entails $\co{\alpha}:\neg a$. This is enough to get $w\at{s_1} = \co{\alpha}$ but, on the absence of further information, no fixed probability can be assigned to either model $s_2, s_3$ except that the respective sum must be $\alpha$. So, expressing our lack of knowledge using a parameter $d \in \intcc{0, 1}$ we get: $$ \begin{cases} w\at{s_1 } = &\co{\alpha}\cr w\at{s_2 } = &d\alpha\cr w\at{s_3} = &\co{d}\alpha. \end{cases} $$ \end{frame} \begin{frame} Now consider all the interpretations for this program: \begin{center} \begin{tikzpicture} %\draw [help lines] grid (11,3); % \node [draw, circle] (E) at (5.5,0) {$\emptyset$}; % \node [draw, circle] (a) at (2,2) {$a$}; \node [draw, circle] (b) at (3,2) {$b$}; \node [draw, circle] (c) at (4,2) {$c$}; \node [fill=gray!50] (A) at (9,2) {$\co{a}$}; \node (B) at (8,2) {$\co{b}$}; \node (C) at (7,2) {$\co{c}$}; % \node [fill=gray!50] (ab) at (0,4) {$ab$}; \node [fill=gray!50] (ac) at (1,4) {$ac$}; \node (aB) at (2,4) {$a\co{b}$}; \node (aC) at (3,4) {$a\co{c}$}; \node [draw] (Ab) at (4,4) {$\co{a}b$}; \node [draw] (Ac) at (5,4) {$\co{a}c$}; \node [draw] (AB) at (6,4) {$\co{a}\co{b}$}; \node [draw] (AC) at (7,4) {$\co{a}\co{c}$}; \node [fill=white] (bc) at (10,4) {$bc$}; \node [fill=white] (bC) at (11,4) {$b\co{c}$}; \node [fill=white] (Bc) at (12,4) {$\co{b}c$}; \node [fill=white] (BC) at (13,4) {$\co{b}\co{c}$}; % \node [draw] (abc) at (0.5,6) {$abc$}; \node (abC) at (3,6) {$ab\co{c}$}; \node (aBc) at (4,6) {$a\co{b}c$}; \node (aBC) at (5,6) {$a\co{b}\co{c}$}; \node [draw] (Abc) at (7,6) {$\co{a}bc$}; \node [draw] (AbC) at (8,6) {$\co{a}b\co{c}$}; \node [draw] (ABc) at (9,6) {$\co{a}\co{b}c$}; \node [draw] (ABC) at (10,6) {$\co{a}\co{b}\co{c}$}; % \draw [->] (ab) to [out=270,in=180] (E); \draw [->] (ab) to [out=270,in=90] (a); \draw [->] (ab) to [out=270,in=90] (b); \draw [->] (ab) to [out=90,in=270] (abc); % \draw [->] (ac) to [out=270,in=180] (E); \draw [->] (ac) to [out=270,in=90] (a); \draw [->] (ac) to [out=270,in=90] (c); \draw [->] (ac) to [out=90,in=270] (abc); % \draw [->] (A) to [out=270,in=0] (E); % \draw [->] (A) to [out=90,in=270] (Abc); \draw [->] (A) to [out=90,in=270] (AbC); \draw [->] (A) to [out=90,in=270] (ABc); \draw [->] (A) to [out=90,in=270] (ABC); % \draw [->] (A) to [out=90,in=270] (Ab); \draw [->] (A) to [out=90,in=270] (Ac); \draw [->] (A) to [out=90,in=270] (AB); \draw [->] (A) to [out=90,in=270] (AC); \end{tikzpicture} \end{center} \end{frame} \begin{frame} In this diagram: \begin{itemize} \item Negations are represented as \emph{e.g.} $\co{a}$ instead of $\neg a$; Stable models are denoted by shaded nodes as \tikz{\node[fill=gray!50] {$ab$}}. \item Interpretations in $\lset{x}$ are \emph{e.g.} \tikz{\node[draw, circle] {$a$}} and those in $\uset{x}$ are \emph{e.g.} \tikz{\node[draw] {$\co{a}b$}}. The remaining are simply denoted by \emph{e.g.} \tikz{\node {$a\co{b}$}}. \item The edges connect stable models with related interpretations. Up arrow indicate links to $\uset{s}$ and down arrows to $\lset{s}$. \item The \emph{weight propagation} sets: $$ \begin{aligned} w\at{abc} &= w\at{ab} w\at{ac} = \alpha^2d\co{d}, \cr w\at{\co{a}\cdot\cdot} &= w\at{\neg a} = \co{\alpha}, \cr w\at{a} &= w\at{ab} + w\at{ac} = \alpha(d + \co{d}) = \alpha, \cr w\at{b} &= w\at{ab} = d\alpha, \cr w\at{c} &= w\at{ac} = \co{d}\alpha, \cr w\at{\emptyset} &= w\at{ab} + w\at{ac} + w\at{\neg a} = d\alpha + \co{d}\alpha + \co{\alpha} = 1, \cr w\at{a\co{b}} &= 0. \end{aligned} $$ \item The total weight is $$ \begin{aligned} Z &= w\at{abc} + 8 w\at{\co{a}b}\cr &+ w\at{ab} + w\at{ac} + w\at{\co{a}}\cr &+ w\at{a}+ w\at{b}+ w\at{c}\cr &+ w\at{\emptyset}\cr % &= - \alpha^{2} d^{2} + \alpha^{2} d + 2 \alpha d - 7 \alpha + 10 \end{aligned} $$ \item Now, if $\alpha$ has an annotation to \emph{e.g.} $0.3$ we get $$ Z = - 0.09 d^{2} + 0.69 d + 7.9 $$ \item Now some statistics are possible. For example we get $$ P\at{abc \mid \alpha = 0.3} = \frac{0.09 d \left(d - 1\right)}{0.09 d^{2} - 0.69 d - 7.9} $$. \item This expression can be plotted for $d\in\intcc{0,1}$ \begin{center} \includegraphics[height=15em]{Pabc_alpha03.pdf} \end{center} \item If a data set $E$ entails \emph{e.g.} $P\at{abc \mid E} = 0.0015$ we can numerically solve $$ \begin{aligned} P\at{abc \mid \alpha = 0.3} &= P\at{abc \mid E} \cr \iff\cr \frac{0.09 d \del{d - 1}}{0.09 d^{2} - 0.69 d - 7.9} &= 0.0015 \end{aligned} $$ which has two solutions, $d \approx 0.15861$ or $d \approx 0.83138$. \end{itemize} \end{frame} \subsection{Non-stratified programs} \begin{frame} The following LP is non-stratified, because has a cycle with negated arcs: $$ \begin{aligned} c_1 &= a\lor \neg a,\cr c_2 &= b \larr \naf c \land \naf a, \cr c_3 &= c \larr \naf b. \end{aligned} $$ This program has three stable models $$ \begin{aligned} s_1 &= \set{ a, c }, \cr s_2 &= \set{ \neg a, b }, \cr s_3 &= \set{ \neg a, c }. \end{aligned} $$ \end{frame} \begin{frame} The disjunctive clause $a\lor\neg a$ defines a set of \textbf{total choices} $$ \Theta = \set{ \theta_1 = \set{ a }, \theta_2 = \set{ \neg a } }. $$ \end{frame} \begin{frame} Looking into probabilistic interpretations of the program and/or its models, we define $\alpha = P\at{\Theta = \theta_1}\in\intcc{0, 1}$ and $P\at{\Theta = \theta_2} = \co{\alpha}$. Since $s_1$ is the only stable model that results from $\Theta = \theta_1$, it is natural to extend $P\at{ s_1 } = P\at{\Theta = \theta_1} = \alpha$. However, there is no clear way to assign $P\at{s_2}, P\at{s_3}$ since \emph{both models result from the single total choice} $\Theta = \theta_2$. Clearly, $$P\at{s_2 \mid \Theta} + P\at{s_3 \mid \Theta} = \begin{cases} 0 & \text{if}~\Theta = \theta_1\cr 1 & \text{if}~\Theta = \theta_2 \end{cases} $$ but further assumptions are not supported \emph{a priori}. So let's \textbf{parameterize} the equation above, $$ \begin{cases} P\at{s_2 \mid \Theta = \theta_2} = &\beta \in \intcc{0, 1} \cr P\at{s_3 \mid \Theta = \theta_2} = &\co{\beta}, \end{cases} $$ in order to explicit our knowledge, or lack of, with numeric values and relations. \end{frame} \begin{frame} Now we are able to define the \textbf{joint distribution} of the boolean random variables $A,B,C$, : $$ \begin{array}{cc|l} A, B, C& P & \text{Obs.}\cr \hline a, \neg b, c & \alpha & s_1, \Theta=\theta_1\cr \neg a, b, \neg c & \co{\alpha}\beta & s_2, \Theta=\theta_2\cr \neg a, \neg b, c & \co{\alpha}\co{\beta} & s_3, \Theta=\theta_2\cr \ast & 0&\text{not stable models} \end{array} $$ where $\alpha, \beta\in\intcc{0,1}$. \end{frame} \section{Conclusions} \begin{frame} \begin{itemize} \item We can use the basics of probability theory and logic programming to assign explicit \emph{parameterized} probabilities to the (stable) models of a program. \item In the covered cases it was possible to define a (parameterized) \emph{family of joint distributions}. \item How far this approach can cover all the cases on logic programs is (still) an issue \emph{under investigation}. \item However, it is non-restrictive since \emph{no unusual assumptions are made}. \end{itemize} \end{frame} \section*{ASP \& related definitions} \begin{frame} \begin{itemize} \item An \deft{atom} is $r(t_1, \ldots t_n)$ where \begin{itemize} \item $r$ is a $n$-ary predicate symbol and each $t_i$ is a constant or a variable. \item A \deft{ground atom} has no variables; A \deft{literal} is either an atom $a$ or a negated atom $\neg a$. \end{itemize} \item An \deft{ASP Program} is a set of \deft{rules} such as $h_1 \vee \cdots \vee h_m \leftarrow b_1 \wedge \cdots \wedge b_n$. \begin{itemize} \item The \deft{head} of this rule is $h_1 \vee \cdots \vee h_m$, the \deft{body} is $b_1 \wedge \cdots \wedge b_n$ and each $b_i$ is a \deft{subgoal}. \item Each $h_i$ is a literal, each subgoal $b_j$ is a literal or a literal preceded by $\naf\;$ and $m + n > 0$. \item A \deft{propositional program} has no variables. \item A \deft{non-disjunctive rule} has $m \leq 1$; A \deft{normal rule} has $m = 1$; A \deft{constraint} has $m = 0$; A \deft{fact} is a normal rule with $n = 0$. \end{itemize} \item The \deft{Herbrand base} of a program is the set of ground literals that result from combining all the predicates and constants of the program. \begin{itemize} \item An \deft{interpretation} is a consistent subset (\emph{i.e.} doesn't contain $\set{a, \neg a}$) of the Herbrand base. \item Given an interpretation $I$, a ground literal $a$ is \deft{true}, $I \models a$, if $a \in I$; otherwise the literal is \deft{false}. \item A ground subgoal, $\naf b$, where $b$ is a ground literal, is \deft{true}, $I \models \naf b$, if $b \not\in I$; otherwise, if $b \in I$, it is \deft{false}. \item A ground rule $r = h_1 \vee \cdots \vee h_m \leftarrow b_1 \wedge \cdots \wedge b_n$ is \deft{satisfied} by the interpretation $I$, \emph{i.e.} $I \models r$, iff $$ \forall j \exists i~I \models b_j \implies I \models h_i. $$ \item A \deft{model} of a program is an interpretation that satisfies all its rules. Denote $\fml{M}_P$ the set of all models of $P$. \end{itemize} \item The \deft{dependency graph} of a program is a digraph where: \begin{itemize} \item Each grounded atom is a node. \item For each grounded rule there are edges from the atoms in the body to the atoms in the head. \item A \deft{negative edge} results from an atom with $\naf\;$; Otherwise it is a \deft{positive edge}. \item An \deft{acyclic program} has an acyclic dependency graph; A \deft{normal program} has only normal rules; A \deft{definite program} is a normal program that doesn't contains $\neg$ neither $\naf\;$. \item In the dependency graph of a \deft{stratified program} no cycle contains a negative edge. \item \textbf{A stratified program has a single minimal model} that assigns either true or false to each atom. \end{itemize} \item Every \emph{definite program} has a unique minimal model: its \deft{semantic}. \item Programs with negation may have no unique minimal model. \item Given a program $P$ and an interpretation $I$, their \deft{reduct}, $P^I$, is the propositional program that results from \begin{enumerate} \item Removing all the rules with $\naf b$ in the body where $b \in I$. \item Removing all the $\naf b$ subgoals from the remaining rules. \end{enumerate} \item A \deft{stable model} (or \deft{answer set}) of the program $P$ is an interpretation $I$ that is the minimal model of the reduct $P^I$. \item Denote $\fml{S}_P$ the set of all stable models of program $P$. The \deft{semantics} (or \deft{answer sets}) of a program $P$ is the set $\fml{S}_P$. \begin{itemize} \item Some programs, such as $a \leftarrow \naf a$, have no stable models. \item A stable model is an interpretation closed under the rules of the program. \end{itemize} \end{itemize} \end{frame} \end{document}