lecture reflow

neuppl · Oct 2, 2023 · 4e9b28b · 4e9b28b
1 parent d6e1013
commit 4e9b28b
Show file tree

Hide file tree

Showing 5 changed files with 204 additions and 177 deletions.
diff --git a/README.md b/README.md
@@ -26,4 +26,7 @@ reference:
 * Lecture 1: Introduction and overview
 * Lecture 2: Propositional Probability I: Semantics
 * Lecture 3: Propositional Probability II: Reasoning
-* Lecture 4: Propositional Probability III: Compiling
+* Lecture 4: Propositional Probability III: Compiling
+* Lecture 5: Binary Decision Diagrams I
+* Lecture 6: Binary Decision Diagrams II
+* Lecture 7: Discrete Probabilistic Programs
diff --git a/lecture-6/lecture-6.pdf b/lecture-6/lecture-6.pdf
diff --git a/lecture-6/lecture-6.tex b/lecture-6/lecture-6.tex
@@ -172,182 +172,6 @@ \section{Compositional compilation of BDDs}
   top-down compilation is faster than bottom-up and vice-versa.
 \end{itemize}
 
-\section{\disc{}: A simple discrete PPL}
-
-\begin{itemize}
-  \item Syntax:
-\begin{lstlisting}[mathescape=true]
-  e ::=
-  | x $\leftarrow$ e; e
-  | observe e; e
-  | flip q                         // q is a rational value
-  | if e then e else e
-  | return e
-  | true | false
-  | e $\land$ e | e $\lor$ e | $\neg$ e |
-  | ( e )
-p ::= e
-\end{lstlisting}
-
-\item \disc{} looks very similar to a standard functional programming language,
-but has two some interesting new keywords: \texttt{flip}, \texttt{observe}, and
-\texttt{return}
-
-\item $\texttt{flip}~\theta$ allocates a new random quantity that is $\true$
-with probability $\theta$ and $\false$ with probability $1-\theta$
-
-\item \texttt{return e} turns a non-probabilistic quantity into a probabilistic one, i.e. 
-\texttt{return true} is a \emph{probabilistic quantity} that is $\true$ with probability 1 and 
-$\false$ with probability 0
-
-\item Example program and its interpretation: 
-
-\begin{lstlisting}[mathescape=true]
-x $\leftarrow$ flip 0.5; 
-y $\leftarrow$ flip 0.5;
-return x $\land$ y
-\end{lstlisting}
-
-This program outputs the probability distribution $[\true \mapsto 0.25, \false
-\mapsto 0.75]$.
-
-\item \texttt{observe} is a powerful keyword that lets us \emph{condition} the
-program. For instance, suppose I want to model the following scenario: ``flip 
-two coins and observe that at least one of them is heads. What is the probability 
-that the first coin was heads?''
-
-We can encode this scenario as a \disc{} program:
-
-\begin{lstlisting}[mathescape=true]
-x $\leftarrow$ flip 0.5; 
-y $\leftarrow$ flip 0.5;
-observe x $\lor$ y;
-return x
-\end{lstlisting}
-
-This program outputs the probability distribution:
-\begin{align*}
-  [\true \mapsto (0.25 + 0.25) / 0.75, \false
-\mapsto 0.25 / 0.75]
-\end{align*} 
-
-\item \textbf{Type system}: terms can either be pure Booleans of type $\bool$ 
-or distributions on Booleans of type $\dist{\bool}$. So, we have the following 
-type definition:
-\begin{align}
-  \tau ::= \bool \mid \dist{\bool}.
-\end{align}
-
-\item We define a typing judgment $\Gamma \vdash \te : \tau$ that associates each 
-term with a type. The typing context $\Gamma$ is a map from identifiers to types.
-\begin{mathpar}
-  \inferrule{}{\Gamma \vdash \true{} : \bool} \and 
-  \inferrule{}{\Gamma \vdash \false{} : \bool} \and
-  \inferrule{}{\Gamma \vdash \texttt{flip}~\theta : \dist{\bool}} \and 
-  \inferrule{\Gamma \vdash \te : \bool}{\Gamma \vdash \texttt{return}~\te : \dist{\bool}} \and
-  \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau}
-    {\Gamma \vdash \texttt{observe}~\te_1; \te_2 : \tau} \and
-  \inferrule{\Gamma \vdash \te_1 : \dist{\bool} \and \Gamma \cup [x \mapsto \bool] \vdash \te_2 : \tau}
-    {\Gamma \vdash x \leftarrow \te_1; \te_2 : \tau} \and 
-  \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau \and \Gamma \vdash \te_3 : \tau}
-    {\Gamma \vdash \texttt{if}~\te_1~\texttt{then}~\te_2~\texttt{else}~\te_3 : \tau} \and
-  \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \bool}
-    {\Gamma \vdash \te_1 \land \te_2 : \bool}
-\end{mathpar}
-
-\end{itemize}
-
-
-
-\subsection{Denotational semantics of \disc{}}
-
-\begin{itemize}
-\item Associates each term with an \emph{unnormalized probability distribution} 
-(i.e., the total probability mass may be less than 1).
-\item Has the type $\dbracket{\te} : \texttt{Bool} \rightarrow [0, 1]$,
-and has the following inductive definition:
-
-$$
-[\![\texttt{flip}~\theta]\!](v) = 
-\begin{cases}
-\theta& \quad \text{if }v = T\\
-1-\theta& \quad \text{if }v = F\\
-\end{cases}
-$$
-
-$$
- [\![\texttt{return}~e]\!](v) = 
- \begin{cases}
- 1\quad& \text{if }v = [\![e]\!]\\
- 0\quad&  \text{otherwise}\\
- \end{cases}
-$$
-
-$$
-[\![x \leftarrow e_1; e_2]\!](v) = \sum_{v'} [\![{e_1}]\!](v') \times [\![{e_2[x \mapsto v']}]\!](v)
-$$
-
-$$
-[\![\texttt{observe}~e_1; e_2]\!](v) = 
-\sum_{\{v' \mid [\![{e_1}(v') = T\}]\!]} [\![{e_2}]\!](v)
-$$
-
-\item The semantics for non-probabilistic terms is standard. The semantic evaluation has type 
-$[[e]]: \texttt{Bool}$ for closed terms.
-
-\item These semantics give an unnormalized distribution. The main semantic object of interest is 
-the normalized distribution, which is given by the \defn{normalized semantics}:
-
-$$
-[\![e]\!]_D(T) = \frac{[\![e]\!](T)}{[\![e]\!](T) + [\![e]\!](F) },
-$$
-
-defined analogously for the false case.
-\end{itemize}
-
-% \subsection{Inference via enumeration}
-% \begin{itemize}
-%   \item Define a relation $\te \Downarrow^e $
-% \end{itemize}
-
-\subsection{Compiling \disc{} to \prop{}}
-\begin{itemize}
-  \item \textbf{Goal}: give a semantics-preserving compilation
-  $\rightsquigarrow$ that compiles \disc{} to \prop{}
-  \item In order to handle observations, we will compile \disc{} programs into 
-  \emph{two} \prop{} programs: one that computes the unnormalized probability of 
-  returning $\true$, and one that computes the probability of evidence (i.e. normalizing constant)
-  \item Inductive description has the shape $\te \compiles (\texttt{p}_1, \texttt{p}_2)$. We want to 
-  define this relation to satisfy the following \textbf{adequacy condition}:
-  \begin{align} 
-    \dbracket{\te}_D(\true) = \frac{\dbracket{\prog_1}}{\dbracket{\prog_2}}.
-  \end{align} 
-  We will shorten this description to $\te \compiles
-  (\varphi, \varphi_A, w)$ and assume that the two formulae share a common
-  $w$. The adequacy condition then becomes:
-  \begin{align} 
-    \dbracket{\te}_D(\true) = \frac{\dbracket{(\varphi, w)}}{\dbracket{(\varphi_A, w)}}.
-  \end{align} 
-  \item Compilation relation:
-  \begin{mathpar}
-    \inferrule{}{\true \compiles (\true, \true, \emptyset)} \and 
-    \inferrule{}{\false \compiles (\false, \true, \emptyset)} \and 
-    \inferrule{}{x \compiles (x, \true, \emptyset)} \and 
-    \inferrule{\texttt{fresh}~x}
-      {\texttt{flip}~\theta \compiles (x, \true, [x \mapsto \theta, \overline{x} \mapsto 1-\theta])} \and
-    \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')}
-      {x \leftarrow \te_1; \te_2 \compiles (\varphi'[\varphi/x], \varphi_A'[\varphi/x] \land \varphi_A, w_1 \cup w_2)} \and
-    \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')}
-      {\texttt{observe}~\te_1; \te_2 \compiles (\varphi', \varphi_A' \land \varphi_A, w_1 \cup w_2)}
-  \end{mathpar}
-
-\end{itemize}
-\begin{theorem}[Adequacy]
-  For well-typed term $\te$,
-  assume $\te \compiles (\varphi, \varphi_A, w)$. Then, 
-  $\dbracket{\te}_D(\true) = {\dbracket{(\varphi, w)}} / {\dbracket{(\varphi_A, w)}}$.
-\end{theorem}
-
   \begin{figure}
   \begin{mathpar}
     \inferrule{}{\Gamma \vdash \bddtrue{} \land \alpha \Downarrow \alpha} \and 

diff --git a/lecture-7/lecture-7.pdf b/lecture-7/lecture-7.pdf
diff --git a/lecture-7/lecture-7.tex b/lecture-7/lecture-7.tex
@@ -0,0 +1,200 @@
+\documentclass{tufte-handout}
+
+\title{Discrete Probabilistic Programming Languages\thanks{CS7470 Fall 2023: Foundations of Probabilistic Programming.}}
+
+
+\newcommand{\varset}[0]{\mathcal{V}}
+
+\author[]{Steven Holtzen\\[email protected]}
+
+%\date{28 March 2010} % without \date command, current date is supplied
+
+%\geometry{showframe} % display margins for debugging page layout
+\input{../header.tex}
+
+\begin{document}
+\maketitle% this prints the handout title, author, and date
+
+\section{\disc{}: A simple discrete PPL}
+
+\begin{itemize}
+  \item Syntax:
+\begin{lstlisting}[mathescape=true]
+  e ::=
+  | x $\leftarrow$ e; e
+  | observe e; e
+  | flip q                         // q is a rational value
+  | if e then e else e
+  | return e
+  | true | false
+  | e $\land$ e | e $\lor$ e | $\neg$ e |
+  | ( e )
+p ::= e
+\end{lstlisting}
+
+\item \disc{} looks very similar to a standard functional programming language,
+but has two some interesting new keywords: \texttt{flip}, \texttt{observe}, and
+\texttt{return}
+
+\item $\texttt{flip}~\theta$ allocates a new random quantity that is $\true$
+with probability $\theta$ and $\false$ with probability $1-\theta$
+
+\item \texttt{return e} turns a non-probabilistic quantity into a probabilistic one, i.e. 
+\texttt{return true} is a \emph{probabilistic quantity} that is $\true$ with probability 1 and 
+$\false$ with probability 0
+
+\item Example program and its interpretation: 
+
+\begin{lstlisting}[mathescape=true]
+x $\leftarrow$ flip 0.5; 
+y $\leftarrow$ flip 0.5;
+return x $\land$ y
+\end{lstlisting}
+
+This program outputs the probability distribution $[\true \mapsto 0.25, \false
+\mapsto 0.75]$.
+
+\item \texttt{observe} is a powerful keyword that lets us \emph{condition} the
+program. For instance, suppose I want to model the following scenario: ``flip 
+two coins and observe that at least one of them is heads. What is the probability 
+that the first coin was heads?''
+
+We can encode this scenario as a \disc{} program:
+
+\begin{lstlisting}[mathescape=true]
+x $\leftarrow$ flip 0.5; 
+y $\leftarrow$ flip 0.5;
+observe x $\lor$ y;
+return x
+\end{lstlisting}
+
+This program outputs the probability distribution:
+\begin{align*}
+  [\true \mapsto (0.25 + 0.25) / 0.75, \false
+\mapsto 0.25 / 0.75]
+\end{align*} 
+
+\item \textbf{Type system}: terms can either be pure Booleans of type $\bool$ 
+or distributions on Booleans of type $\dist{\bool}$. So, we have the following 
+type definition:
+\begin{align}
+  \tau ::= \bool \mid \dist{\bool}.
+\end{align}
+
+\item We define a typing judgment $\Gamma \vdash \te : \tau$ that associates each 
+term with a type. The typing context $\Gamma$ is a map from identifiers to types.
+\begin{mathpar}
+  \inferrule{}{\Gamma \vdash \true{} : \bool} \and 
+  \inferrule{}{\Gamma \vdash \false{} : \bool} \and
+  \inferrule{}{\Gamma \vdash \texttt{flip}~\theta : \dist{\bool}} \and 
+  \inferrule{\Gamma \vdash \te : \bool}{\Gamma \vdash \texttt{return}~\te : \dist{\bool}} \and
+  \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau}
+    {\Gamma \vdash \texttt{observe}~\te_1; \te_2 : \tau} \and
+  \inferrule{\Gamma \vdash \te_1 : \dist{\bool} \and \Gamma \cup [x \mapsto \bool] \vdash \te_2 : \tau}
+    {\Gamma \vdash x \leftarrow \te_1; \te_2 : \tau} \and 
+  \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau \and \Gamma \vdash \te_3 : \tau}
+    {\Gamma \vdash \texttt{if}~\te_1~\texttt{then}~\te_2~\texttt{else}~\te_3 : \tau} \and
+  \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \bool}
+    {\Gamma \vdash \te_1 \land \te_2 : \bool}
+\end{mathpar}
+
+\end{itemize}
+
+
+
+\subsection{Denotational semantics of \disc{}}
+
+\begin{itemize}
+\item Associates each term with an \emph{unnormalized probability distribution} 
+(i.e., the total probability mass may be less than 1).
+\item Has the type $\dbracket{\te} : \texttt{Bool} \rightarrow [0, 1]$,
+and has the following inductive definition:
+
+$$
+[\![\texttt{flip}~\theta]\!](v) = 
+\begin{cases}
+\theta& \quad \text{if }v = T\\
+1-\theta& \quad \text{if }v = F\\
+\end{cases}
+$$
+
+$$
+ [\![\texttt{return}~e]\!](v) = 
+ \begin{cases}
+ 1\quad& \text{if }v = [\![e]\!]\\
+ 0\quad&  \text{otherwise}\\
+ \end{cases}
+$$
+
+$$
+[\![x \leftarrow e_1; e_2]\!](v) = \sum_{v'} [\![{e_1}]\!](v') \times [\![{e_2[x \mapsto v']}]\!](v)
+$$
+
+$$
+[\![\texttt{observe}~e_1; e_2]\!](v) = 
+\sum_{\{v' \mid [\![{e_1}(v') = T\}]\!]} [\![{e_2}]\!](v)
+$$
+
+\item The semantics for non-probabilistic terms is standard. The semantic evaluation has type 
+$[[e]]: \texttt{Bool}$ for closed terms.
+
+\item These semantics give an unnormalized distribution. The main semantic object of interest is 
+the normalized distribution, which is given by the \defn{normalized semantics}:
+
+$$
+[\![e]\!]_D(T) = \frac{[\![e]\!](T)}{[\![e]\!](T) + [\![e]\!](F) },
+$$
+
+defined analogously for the false case.
+\end{itemize}
+
+% \subsection{Inference via enumeration}
+% \begin{itemize}
+%   \item Define a relation $\te \Downarrow^e $
+% \end{itemize}
+
+\section{Observation}
+
+\section{Compiling \disc{} to \prop{}}
+\begin{itemize}
+  \item \textbf{Goal}: give a semantics-preserving compilation
+  $\rightsquigarrow$ that compiles \disc{} to \prop{}
+  \item In order to handle observations, we will compile \disc{} programs into 
+  \emph{two} \prop{} programs: one that computes the unnormalized probability of 
+  returning $\true$, and one that computes the probability of evidence (i.e. normalizing constant)
+  \item Inductive description has the shape $\te \compiles (\texttt{p}_1, \texttt{p}_2)$. We want to 
+  define this relation to satisfy the following \textbf{adequacy condition}:
+  \begin{align} 
+    \dbracket{\te}_D(\true) = \frac{\dbracket{\prog_1}}{\dbracket{\prog_2}}.
+  \end{align} 
+  We will shorten this description to $\te \compiles
+  (\varphi, \varphi_A, w)$ and assume that the two formulae share a common
+  $w$. The adequacy condition then becomes:
+  \begin{align} 
+    \dbracket{\te}_D(\true) = \frac{\dbracket{(\varphi, w)}}{\dbracket{(\varphi_A, w)}}.
+  \end{align} 
+  \item Compilation relation:
+  \begin{mathpar}
+    \inferrule{}{\true \compiles (\true, \true, \emptyset)} \and 
+    \inferrule{}{\false \compiles (\false, \true, \emptyset)} \and 
+    \inferrule{}{x \compiles (x, \true, \emptyset)} \and 
+    \inferrule{\texttt{fresh}~x}
+      {\texttt{flip}~\theta \compiles (x, \true, [x \mapsto \theta, \overline{x} \mapsto 1-\theta])} \and
+    \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')}
+      {x \leftarrow \te_1; \te_2 \compiles (\varphi'[\varphi/x], \varphi_A'[\varphi/x] \land \varphi_A, w_1 \cup w_2)} \and
+    \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')}
+      {\texttt{observe}~\te_1; \te_2 \compiles (\varphi', \varphi_A' \land \varphi_A, w_1 \cup w_2)}
+  \end{mathpar}
+
+\end{itemize}
+\begin{theorem}[Adequacy]
+  For well-typed term $\te$,
+  assume $\te \compiles (\varphi, \varphi_A, w)$. Then, 
+  $\dbracket{\te}_D(\true) = {\dbracket{(\varphi, w)}} / {\dbracket{(\varphi_A, w)}}$.
+\end{theorem}
+
+\bibliographystyle{plainnat}
+\bibliography{../bib}
+
+
+\end{document}