probast_draf.tex 19.6 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
\documentclass[bigger]{beamer}
\useinnertheme{circles}
\usefonttheme[onlymath]{serif}
\usefonttheme{structurebold}
\setbeamertemplate{navigation symbols}{}
\usepackage{xcolor}
\setbeamercolor{highlight block}{bg=gray}

\usepackage{tikz}
\usetikzlibrary{
    automata,%
    positioning,%
    calc,%
    decorations,%
    decorations.pathmorphing%
}
\usepackage{tkz-graph}
\newcommand{\qlr}[2]{\ensuremath{\begin{matrix}#1\cr\begin{aligned}\hline #2\end{aligned}\end{matrix}}}
\newcommand{\q}[1]{\mathbf{#1}}
\newcommand{\isep}{~,~}

\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[normalem]{ulem} % To strikeout
\usepackage{commath}

\newcommand{\naf}{\ensuremath{\sim\!\!}}

\title{Probabilistic Answer Set Programming}
\subtitle{A Research Draft}
\author{Francisco Coelho}
\institute[\texttt{fc@uevora.pt}]{
    NOVA LINCS \&\\
    High Performance Computing Chair \&\\
    Departamento de Informática, Universidade de Évora
}

\begin{document}
    \begin{frame}[plain]
        \titlepage
    \end{frame}
    
    \section*{Motivation}

    \begin{frame}
        \frametitle{In short}
    
        \begin{itemize}
            \item Use \textbf{logic programs} to formalize knowledge.
            \begin{itemize}
                \item logic program = formula = model.
                \item \textbf{Observations} not always agree with such models --- errors may result from sensors or from a wrong or incomplete model.
            \end{itemize}
            \item We can associate \textbf{quantities} to formulas and sub-formulas.
            \begin{itemize}
                \item And define how observations \textbf{update} those quantities.
            \end{itemize}            
            \item Adequate quantities and updates might be used to \textbf{interpret or evaluate the model} \emph{e.g.} define a joint distribution or measure the accuracy of a clause. 
        \end{itemize}            
    \end{frame}

    % \begin{frame}
    %     \frametitle{Plan}
    %     \begin{itemize}
    %         \item Use \textbf{answer set programs} as a logic programming language.
    %         \item Define the effect of an \textbf{observation} on quantities associated to clauses and their parts.
    %         \item Later, use those numbers to define \textbf{probabilistic interpretations} or \textbf{performance quantities} of a formula, clauses and atoms.
    %     \end{itemize}
    % \end{frame}
    
    \section{Development}
    
    \begin{frame}
        \tableofcontents[currentsection]
    \end{frame}

    % \begin{frame}
    %     \frametitle{The seed on an idea}
    %     We want to define the \textbf{joint distribution} of the stable models.
    %     \begin{enumerate} 
    %         \item A \textbf{boolean random variable} can be described by a disjunction $a; \neg a$.
    %         \item This ASP program has two stable models: $a$ and $\neg a$.
    %         \item A program with $n$ such facts $a_i; \neg a_i$ has $2^n$ stable models, the distinct combinations of those choices. 
    %         \item \textbf{If each $a_i$ has probability $p_i$ then the probability of a stable model $W$ would be} $$P(W) = \prod_{a_i \in W}p_i \prod_{\neg a_i \in W} (1 - p_i).$$            
    %     \end{enumerate}        
    %     \pause
    %     \begin{alertblock}{But this is wrong.}
    %         Even assuming that those facts are marginally independent --- which we will do.
    %     \end{alertblock}
    % \end{frame}

    \begin{frame}{Problem 1: Probabilities}
        The stable models of $c_1 \wedge c_2$ where
        $$        
        \begin{aligned}
            c_1 &: b \vee \neg b\\
            c_2 &: h_1 \vee h_2 &\leftarrow b
        \end{aligned}
        $$
        are $$\set{\neg b}, \set{b, h_1}~\text{and}~\set{b, h_2}.$$  

        \begin{block}{Associate quantities to clauses and update them with observations.}        
            \onslide*<1>{
                Then compute:
                \begin{itemize}
                    \item The probability of a stable model. 
                    \item The probability of an atom.
                    \item The joint distribution of all atoms.
                \end{itemize}
            }
            \onslide*<2>{
                \begin{itemize}
                    \item How to \textbf{match} an observation $z$ with a clause case $h_i,b$?
                    \item How do observations \textbf{update} the probabilities?
                    \item Is this enough to compute the \textbf{joint distribution of the atoms}?
                \end{itemize}
            }
        \end{block}    
    \end{frame}

    \begin{frame}
        \frametitle{Matching observations and sub-formulas}

        \begin{itemize}
            \item An \alert{observation} is a subset of the literals\footnote{The set of atoms, $a$, of the program and their classic negations, $\neg a$.} from a program.
            \item A \alert{consistent} observation has no subset $\set{p, \neg p}$.
            \item A \emph{consistent} observation $z$ is \alert{relevant} for the clause $h \leftarrow b$ if $b \subseteq z$.
            \item A disjunctive clause $$h_1 \vee \cdots \vee h_n \leftarrow b_1 \wedge \cdots \wedge b_m$$ has $n$ \alert{cases}: $\set{h_i, b_1, \ldots, b_m}, i = 1:n$.
            \item The \emph{consistent} observation $z$ and the case $\set{h, b_{1:n}}$  \alert{match} if 
            $\set{h, b_{1:n}} \subseteq z$.     
        \end{itemize}
        The above definitions apply to \textbf{facts}, $m=0$, and \textbf{constraints}, $n=0$.
    \end{frame}

    \begin{frame}
        \frametitle{Counters and updates}
        A consistent observation \textbf{relevant} for a clause $h_1 \vee \cdots \vee h_n \leftarrow b$ should \textbf{increase the probability of matched cases}.
        \begin{block}{Counters and updates}
            \onslide*<1>{
            \begin{enumerate}
                \item Associate \textbf{counters}, $u, r, n$, to clauses $h \leftarrow b$.
                \item Associate a \textbf{counter}, $m_i$, to cases $h_i, b$.
                \item \textbf{Initial} values result from \emph{prior} knowledge.
                \item Each \emph{consistent} observation \textbf{increments}:
                \begin{itemize}
                    \item The $u$ counters of relevant \alert{u}nmatched clauses (no matched cases).
                    \item The $r$ counters of \alert{r}elevant clauses. 
                    \item The $n$ counters of \alert{n}ot relevant clauses.
                    \item The $m_i$ counters of \alert{m}atched cases $h_i, b$.
                    \item Clause counters must verify $r \leq u + \sum_i m_i$.
                \end{itemize}
            \end{enumerate}  
            }
            \onslide*<2>{
                \begin{itemize}
                    \item Literals must be explicitly observed: $\neg b \not= \naf b$.
                    \item Counters relate a clause structure with observations.
                    \item So far stable models had no role. 
                \end{itemize}
            }
        \end{block}
    \end{frame}

    \begin{frame}
        \frametitle{Counters and updates: An example}
        Given the following clauses with \alert{annotated counters},
        $$        
        \begin{aligned}
            %&H \leftarrow B&&\text{counters:}~ m_{1:n} ; u, r, n \\
            &b \vee \neg b &&\text{counters:}~ 7, 2 ; 3, 12, 0 \\
            &h_1 \vee h_2 \leftarrow b  &&\text{counters:}~ 4, 3 ; 2, 6, 5
        \end{aligned}
        $$
        \onslide*<2>{
            \begin{columns}[t]
                \begin{column}{0.5\textwidth}                    
                \begin{block}{Counters of $b \vee \neg b$}\small
                    $0$ observations where not relevant (because the body is $\top$); 
                    
                    There where $12$ relevant observations; 
                    
                    Of those, $b$ was matched by $7$, $\neg b$ by $2$ and $3$ observations matched neither ($\models\naf b, \naf \neg b$).
                \end{block}
                \end{column}
                \begin{column}{0.5\textwidth}                    
                \begin{block}{Counters of $h_1 \vee h_2 \leftarrow b$}\small
                    There where $11 = 6 + 5$ observations, $6$ relevant to this clause; 
                    
                    From these, $4$ matched $h_1$, $3$ matched $h_2$ and $2$ matched no case.
                \end{block}
                \end{column}
            \end{columns}
        }
        \onslide*<3>{
        \begin{block}{What can be computed?}
            \begin{itemize}
                \item $P(\neg b) = \frac{2}{12}$ because $\neg b$ matched $2$ of $12$ relevant observations.
                \item $P(h_1 | b) = \frac{4}{6}$ because $h_1, b$ matched $4$ of $6$ relevant observations.
                \item \alert{$P(b)$ needs further information}. 
                \begin{itemize}
                    \item \emph{E.g.} \textbf{assuming independent observations},
                    $$P(b) = \frac{7 + 6}{12 + 0 + 6 + 5}.$$ 
                \end{itemize}                
            \end{itemize}            
        \end{block}}
        \onslide*<4>{
        \begin{block}{What can be computed? --- assuming independent observations}
            \begin{itemize}
                \item $P(b) + P(\neg b) = \frac{13}{23} + \frac{2}{12} \approx 0.73 < 1$ because some observations have neither $b$ nor $\neg b$.
                \item $P(h_1, b) = P(h_1 | b) P(b) = \frac{4}{6}\frac{13}{23}$ from above.
                \item $P(h_2, b) = P(h_2 | b) P(b)$ is analogous.
                \item \alert{But not \emph{e.g.} $P(h_1 | \neg b)$} because no clause relates $h_1$ and $\neg b$.
            \end{itemize}            
        \end{block}}
        \onslide*<5->{
            \begin{block}{Also\ldots}
                \onslide*<5>{Counters are local to clauses and, for distinct clauses, may result from distinct sources. \emph{E.g. the relevant counter of $h_1 \vee h_2 \leftarrow b$ and the match counter of $b$ in $b \vee \neg b$.}}
                \onslide*<6>{Some observations may have neither $b$ nor $\neg b$ so: $$P(b) + P(\neg b) < 1.$$}
                \onslide*<7>{Assuming independent observations, since $h_1$ and $h_2$ are not independent, $$\sum_m P(m) > 1.$$}
                \onslide*<8>{What's missing to define the \alert{joint distribution} $$P(H_1, H_2, B)?$$}
            \end{block}
        }
        \onslide*<8->{
            \begin{block}{The joint distribution, according to the clauses}
                \begin{tikzpicture}[node distance=35mm, ->, >=stealth, state/.style={draw, rounded corners}]\small
                    \node (B) [state] {\qlr{B}{
                        b && \neg b && \naf b \cr
                        \bullet && \bullet && \cdot
                        }};
                    \node (H1) [state, left of=B] {
                        \qlr{H_1}{
                            ~ && h_1 && \neg h_1 && \naf h_1 \cr
                            b && \bullet && \circ  && \circ \cr
                            \neg b && \circ  && \circ  && \circ \cr
                            \naf b && \circ  && \circ  && \circ 
                        }
                    };
                    \node (H2) [state, right of=B] {
                        \qlr{H_2}{
                            ~ && h_2 && \neg h_2 && \naf h_2 \cr
                            b && \bullet && \circ  && \circ \cr
                            \neg b && \circ  && \circ  && \circ \cr
                            \naf b && \circ  && \circ  && \circ 
                        }
                    };
                    \path
                        (B) edge (H1)
                        (B) edge (H2)
                    ;
                \end{tikzpicture}
            \end{block}
        }
    \end{frame}
    
    \begin{frame}
        \frametitle{Shortcomming 2: Default Negation}
    
        \begin{itemize}
            \item How to deal with rules with $\naf a$ parts?
            \item Should missing elements on observations be replaced with $\naf a$ atoms?
        \end{itemize}
    \end{frame}
    \section{Conclusions}
    
    \begin{frame}
        \tableofcontents[currentsection]
    \end{frame}
    
    \section*{Background Material}
    
    \begin{frame}
        \Huge Background Material
    \end{frame}

    \begin{frame}{Machine Learning}
        Models are numeric functions: $y \approx f_\theta(x),~\theta_i, x_j, y\in\mathbf{R}$.
        \begin{itemize}
            \item Amazing achievements.
            \item Noise tolerant.
            \item (as of today) Huge enterprise funding .
        \end{itemize}
        but
        \begin{itemize}
            \item (essentially) Academically solved.
            \item Models trained from ``large'' amounts of samples.
            \item Hard to add background knowledge.
            \item Models are hard to interpret.
            \item Single table, independent rows assumption.
        \end{itemize}
    \end{frame}
    
    \begin{frame}{Inductive Logic Programming}
        Models are logic program: $p_\theta(x, y),~\theta_i, x_j, y\in{\cal A}$.
        \begin{itemize}
            \item Amazing achievements, at scale.
            \item Models trained from ``small'' amounts of samples.
            \item Compact, readable models.
            \item Background knowledge is easy to incorporate and edit.
        \end{itemize}
        but
        \begin{itemize}
            \item as of today, Little enterprise commitment.
            \item as of today, Mostly academic interest.
            \item Noise sensitive.
        \end{itemize}
    \end{frame}

    \begin{frame}{Distribution Semantics}
        Assigns probability to (marginally independent) facts and derives probability of ground propositions.

        Let $F$ be set of facts, $S\subseteq F$, $R$ a set of definite clauses and $p$ a proposition:
        $$\small
        \begin{aligned}
            P_F(S) &= \prod_{f \in S} P(f) \prod_{f \not\in S} \left(1 - P(f) \right) \cr
            P(W) &= \sum_{S \subseteq F :~W=M(S\cup R)} P_F(S) \cr
            P(p) &= \sum_{S :~ S\cup R ~\vdash~ p} P_F(S) = \sum_{W :~ p\in W} P(W)
        \end{aligned}
        $$
        \begin{itemize}
            \item Amazing achievements, at scale.
            \item Lots of tools and research.
            \item The best of both ``worlds''?
        \end{itemize}
        
    \end{frame}

    \begin{frame}{Answer Set Programming}
        A program defines stable models.  
        \begin{itemize}
            \item Pure declarative language, unlike Prolog.
            \item Uses \emph{generate \& test} methods instead of proofs.
            \item Uses both default $\sim\!p$ and classical negation $\neg p$.
            \item Clauses can be disjunctive $a \vee b \leftarrow c \wedge d$.
        \end{itemize}
    \end{frame}

    \begin{frame}{ASP definitions}
        \begin{itemize}
            \item An \textbf{atom} is $r(t_1, \ldots t_n)$ where
            \begin{itemize}
                \item $r$ is a $n$-ary predicate symbol.
                \item each $t_i$ is a constant or a variable.
            \end{itemize}
            \item A \textbf{ground atom} has no variables.
            \item A \textbf{literal} is either an atom $a$ or a negated atom $\neg a$.
            \item An \textbf{ASP Program} is a set of \textbf{rules} such as $h_1 \vee \cdots \vee h_m \leftarrow b_1 \wedge \cdots \wedge b_n$ where
            \begin{itemize}
                \item Each $h_i$ is a literal, $a$ or $\neg a$.
                \item Each $b_j$ is a literal like above or preceded by $\naf~$.
                \item $m + n > 0$.
            \end{itemize}
            \item The \textbf{head} of such rule is $h_1 \vee \cdots \vee h_m$.
            \item The \textbf{body} of such rule is $b_1 \wedge \cdots \wedge b_n$.
            \item Each $b_i$ is a \textbf{subgoal}.
        \end{itemize}
    \end{frame}

    \begin{frame}{ASP definitions \hfill(cont.)}
        \begin{itemize}\setcounter{enumi}{7}
            \item A \textbf{non-disjunctive rule} has $m \leq 1$.
            \item A \textbf{normal rule} has $m = 1$.
            \item A \textbf{constraint} has $m = 0$.
            \item A \textbf{fact} is a normal rule with $n = 0$.
            \item The \textbf{dependency graph} of a program is a digraph where:
            \begin{itemize}
                \item Each grounded atom is a node.
                \item For each grounded rule there are edges from the atoms in the body to the atoms in the head.
            \end{itemize} 
            \item A \textbf{negative edge} results from an atom with $\naf~$; Otherwise it is a \textbf{positive edge}.
            \item An \textbf{acyclic program} has an acyclic dependency graph.
        \end{itemize}
    \end{frame}

    \begin{frame}{ASP definitions \hfill(cont.)}
        \begin{itemize}\setcounter{enumi}{14}
            \item A \textbf{normal program} has only normal rules.
            \item A \textbf{definite program} is a normal program that doesn't contains $\neg$ neither $\naf~$.
            \item In the dependency graph of a \textbf{stratified program} no cycle contains a negative edge.
            \begin{itemize}
                \item A stratified program has a single minimal model that assigns either true or false to each atom.
            \end{itemize}
            \item A \textbf{propositional program} has no variables.
        \end{itemize}
    \end{frame}

    \begin{frame}{ASP definitions \hfill(cont.)}
        \begin{itemize}\setcounter{enumi}{18}
            \item The \textbf{Herbrand base} of a program is the set of ground literals that result from combining all the predicates and constants of the program.
            \item An \textbf{interpretation} is a consistent subset (\emph{i.e.} doesn't contain $\set{a, \neg a}$) of the Herbrand base. 
            \item A ground literal is \textbf{true}, $I \models a$, if $a \in I$; otherwise the literal is \textbf{false}.
            \item A ground subgoal, $\naf b$, where $b$ is a ground literal, is \textbf{true}, $I \models \naf b$, if $b \not\in I$; otherwise, if $b \in I$,  it is \textbf{false}.
            \item A ground rule $r = h_1 \vee \cdots \vee h_m \leftarrow b_1 \wedge \cdots \wedge b_n$ is \textbf{satisfied} by the interpretation $I$, \emph{i.e.} $I \models r$, iff 
            \begin{itemize}
                \item $I \not\models b_j$ for some $j$ or $I \models h_i$ for some $i$,
            \end{itemize} 
            \item A \textbf{model} of a program is an interpretation that satisfies all the rules.
        \end{itemize}
    \end{frame}

    \begin{frame}{Stable Semantics}
        \begin{itemize}
            \item Every definite program has a unique minimal model; its \emph{semantics}.
            \item Programs with negation may have no unique minimal model.
            \item Given a program $P$ and an interpretation $I$, their \textbf{reduct}, $P^I$ is the propositional program that results from
            \begin{enumerate}
                \item Removing all the rules with $\naf b$ in the body where $b \in I$.
                \item Removing all the $\naf b$ subgoals from the remaining rules.
            \end{enumerate}
            \item A \textbf{stable model} of the program $P$ is an interpretation $I$ that is the minimal model of the reduct $P^I$.
            \item The \textbf{semantics} (the \textbf{answer sets}) of a program is the set of stable models of that program.
        \end{itemize}
    \end{frame}

    \begin{frame}{Stable Semantics}
        \begin{itemize}
            \item A program such as $a \leftarrow \naf a$ may have no stable models.
            \item A stable model is a closed interpretation (under the rules of program).
        \end{itemize}
    \end{frame}

    \subsection*{Stable Sets}
    
    \begin{frame}
        \tableofcontents[currentsection]
    \end{frame}

    \subsection*{References}
    
    \begin{frame}
        \tableofcontents[currentsection]
    \end{frame}
\end{document}