diff --git a/README.md b/README.md index de1cae9..9afb054 100644 --- a/README.md +++ b/README.md @@ -26,4 +26,7 @@ reference: * Lecture 1: Introduction and overview * Lecture 2: Propositional Probability I: Semantics * Lecture 3: Propositional Probability II: Reasoning -* Lecture 4: Propositional Probability III: Compiling \ No newline at end of file +* Lecture 4: Propositional Probability III: Compiling +* Lecture 5: Binary Decision Diagrams I +* Lecture 6: Binary Decision Diagrams II +* Lecture 7: Discrete Probabilistic Programs \ No newline at end of file diff --git a/lecture-6/lecture-6.pdf b/lecture-6/lecture-6.pdf index 66efe32..b03a9e3 100644 Binary files a/lecture-6/lecture-6.pdf and b/lecture-6/lecture-6.pdf differ diff --git a/lecture-6/lecture-6.tex b/lecture-6/lecture-6.tex index 5496531..e3239fc 100644 --- a/lecture-6/lecture-6.tex +++ b/lecture-6/lecture-6.tex @@ -172,182 +172,6 @@ \section{Compositional compilation of BDDs} top-down compilation is faster than bottom-up and vice-versa. \end{itemize} -\section{\disc{}: A simple discrete PPL} - -\begin{itemize} - \item Syntax: -\begin{lstlisting}[mathescape=true] - e ::= - | x $\leftarrow$ e; e - | observe e; e - | flip q // q is a rational value - | if e then e else e - | return e - | true | false - | e $\land$ e | e $\lor$ e | $\neg$ e | - | ( e ) -p ::= e -\end{lstlisting} - -\item \disc{} looks very similar to a standard functional programming language, -but has two some interesting new keywords: \texttt{flip}, \texttt{observe}, and -\texttt{return} - -\item $\texttt{flip}~\theta$ allocates a new random quantity that is $\true$ -with probability $\theta$ and $\false$ with probability $1-\theta$ - -\item \texttt{return e} turns a non-probabilistic quantity into a probabilistic one, i.e. -\texttt{return true} is a \emph{probabilistic quantity} that is $\true$ with probability 1 and -$\false$ with probability 0 - -\item Example program and its interpretation: - -\begin{lstlisting}[mathescape=true] -x $\leftarrow$ flip 0.5; -y $\leftarrow$ flip 0.5; -return x $\land$ y -\end{lstlisting} - -This program outputs the probability distribution $[\true \mapsto 0.25, \false -\mapsto 0.75]$. - -\item \texttt{observe} is a powerful keyword that lets us \emph{condition} the -program. For instance, suppose I want to model the following scenario: ``flip -two coins and observe that at least one of them is heads. What is the probability -that the first coin was heads?'' - -We can encode this scenario as a \disc{} program: - -\begin{lstlisting}[mathescape=true] -x $\leftarrow$ flip 0.5; -y $\leftarrow$ flip 0.5; -observe x $\lor$ y; -return x -\end{lstlisting} - -This program outputs the probability distribution: -\begin{align*} - [\true \mapsto (0.25 + 0.25) / 0.75, \false -\mapsto 0.25 / 0.75] -\end{align*} - -\item \textbf{Type system}: terms can either be pure Booleans of type $\bool$ -or distributions on Booleans of type $\dist{\bool}$. So, we have the following -type definition: -\begin{align} - \tau ::= \bool \mid \dist{\bool}. -\end{align} - -\item We define a typing judgment $\Gamma \vdash \te : \tau$ that associates each -term with a type. The typing context $\Gamma$ is a map from identifiers to types. -\begin{mathpar} - \inferrule{}{\Gamma \vdash \true{} : \bool} \and - \inferrule{}{\Gamma \vdash \false{} : \bool} \and - \inferrule{}{\Gamma \vdash \texttt{flip}~\theta : \dist{\bool}} \and - \inferrule{\Gamma \vdash \te : \bool}{\Gamma \vdash \texttt{return}~\te : \dist{\bool}} \and - \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau} - {\Gamma \vdash \texttt{observe}~\te_1; \te_2 : \tau} \and - \inferrule{\Gamma \vdash \te_1 : \dist{\bool} \and \Gamma \cup [x \mapsto \bool] \vdash \te_2 : \tau} - {\Gamma \vdash x \leftarrow \te_1; \te_2 : \tau} \and - \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau \and \Gamma \vdash \te_3 : \tau} - {\Gamma \vdash \texttt{if}~\te_1~\texttt{then}~\te_2~\texttt{else}~\te_3 : \tau} \and - \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \bool} - {\Gamma \vdash \te_1 \land \te_2 : \bool} -\end{mathpar} - -\end{itemize} - - - -\subsection{Denotational semantics of \disc{}} - -\begin{itemize} -\item Associates each term with an \emph{unnormalized probability distribution} -(i.e., the total probability mass may be less than 1). -\item Has the type $\dbracket{\te} : \texttt{Bool} \rightarrow [0, 1]$, -and has the following inductive definition: - -$$ -[\![\texttt{flip}~\theta]\!](v) = -\begin{cases} -\theta& \quad \text{if }v = T\\ -1-\theta& \quad \text{if }v = F\\ -\end{cases} -$$ - -$$ - [\![\texttt{return}~e]\!](v) = - \begin{cases} - 1\quad& \text{if }v = [\![e]\!]\\ - 0\quad& \text{otherwise}\\ - \end{cases} -$$ - -$$ -[\![x \leftarrow e_1; e_2]\!](v) = \sum_{v'} [\![{e_1}]\!](v') \times [\![{e_2[x \mapsto v']}]\!](v) -$$ - -$$ -[\![\texttt{observe}~e_1; e_2]\!](v) = -\sum_{\{v' \mid [\![{e_1}(v') = T\}]\!]} [\![{e_2}]\!](v) -$$ - -\item The semantics for non-probabilistic terms is standard. The semantic evaluation has type -$[[e]]: \texttt{Bool}$ for closed terms. - -\item These semantics give an unnormalized distribution. The main semantic object of interest is -the normalized distribution, which is given by the \defn{normalized semantics}: - -$$ -[\![e]\!]_D(T) = \frac{[\![e]\!](T)}{[\![e]\!](T) + [\![e]\!](F) }, -$$ - -defined analogously for the false case. -\end{itemize} - -% \subsection{Inference via enumeration} -% \begin{itemize} -% \item Define a relation $\te \Downarrow^e $ -% \end{itemize} - -\subsection{Compiling \disc{} to \prop{}} -\begin{itemize} - \item \textbf{Goal}: give a semantics-preserving compilation - $\rightsquigarrow$ that compiles \disc{} to \prop{} - \item In order to handle observations, we will compile \disc{} programs into - \emph{two} \prop{} programs: one that computes the unnormalized probability of - returning $\true$, and one that computes the probability of evidence (i.e. normalizing constant) - \item Inductive description has the shape $\te \compiles (\texttt{p}_1, \texttt{p}_2)$. We want to - define this relation to satisfy the following \textbf{adequacy condition}: - \begin{align} - \dbracket{\te}_D(\true) = \frac{\dbracket{\prog_1}}{\dbracket{\prog_2}}. - \end{align} - We will shorten this description to $\te \compiles - (\varphi, \varphi_A, w)$ and assume that the two formulae share a common - $w$. The adequacy condition then becomes: - \begin{align} - \dbracket{\te}_D(\true) = \frac{\dbracket{(\varphi, w)}}{\dbracket{(\varphi_A, w)}}. - \end{align} - \item Compilation relation: - \begin{mathpar} - \inferrule{}{\true \compiles (\true, \true, \emptyset)} \and - \inferrule{}{\false \compiles (\false, \true, \emptyset)} \and - \inferrule{}{x \compiles (x, \true, \emptyset)} \and - \inferrule{\texttt{fresh}~x} - {\texttt{flip}~\theta \compiles (x, \true, [x \mapsto \theta, \overline{x} \mapsto 1-\theta])} \and - \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')} - {x \leftarrow \te_1; \te_2 \compiles (\varphi'[\varphi/x], \varphi_A'[\varphi/x] \land \varphi_A, w_1 \cup w_2)} \and - \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')} - {\texttt{observe}~\te_1; \te_2 \compiles (\varphi', \varphi_A' \land \varphi_A, w_1 \cup w_2)} - \end{mathpar} - -\end{itemize} -\begin{theorem}[Adequacy] - For well-typed term $\te$, - assume $\te \compiles (\varphi, \varphi_A, w)$. Then, - $\dbracket{\te}_D(\true) = {\dbracket{(\varphi, w)}} / {\dbracket{(\varphi_A, w)}}$. -\end{theorem} - \begin{figure} \begin{mathpar} \inferrule{}{\Gamma \vdash \bddtrue{} \land \alpha \Downarrow \alpha} \and diff --git a/lecture-7/lecture-7.pdf b/lecture-7/lecture-7.pdf new file mode 100644 index 0000000..2fbd038 Binary files /dev/null and b/lecture-7/lecture-7.pdf differ diff --git a/lecture-7/lecture-7.tex b/lecture-7/lecture-7.tex new file mode 100644 index 0000000..f635431 --- /dev/null +++ b/lecture-7/lecture-7.tex @@ -0,0 +1,200 @@ +\documentclass{tufte-handout} + +\title{Discrete Probabilistic Programming Languages\thanks{CS7470 Fall 2023: Foundations of Probabilistic Programming.}} + + +\newcommand{\varset}[0]{\mathcal{V}} + +\author[]{Steven Holtzen\\s.holtzen@northeastern.edu} + +%\date{28 March 2010} % without \date command, current date is supplied + +%\geometry{showframe} % display margins for debugging page layout +\input{../header.tex} + +\begin{document} +\maketitle% this prints the handout title, author, and date + +\section{\disc{}: A simple discrete PPL} + +\begin{itemize} + \item Syntax: +\begin{lstlisting}[mathescape=true] + e ::= + | x $\leftarrow$ e; e + | observe e; e + | flip q // q is a rational value + | if e then e else e + | return e + | true | false + | e $\land$ e | e $\lor$ e | $\neg$ e | + | ( e ) +p ::= e +\end{lstlisting} + +\item \disc{} looks very similar to a standard functional programming language, +but has two some interesting new keywords: \texttt{flip}, \texttt{observe}, and +\texttt{return} + +\item $\texttt{flip}~\theta$ allocates a new random quantity that is $\true$ +with probability $\theta$ and $\false$ with probability $1-\theta$ + +\item \texttt{return e} turns a non-probabilistic quantity into a probabilistic one, i.e. +\texttt{return true} is a \emph{probabilistic quantity} that is $\true$ with probability 1 and +$\false$ with probability 0 + +\item Example program and its interpretation: + +\begin{lstlisting}[mathescape=true] +x $\leftarrow$ flip 0.5; +y $\leftarrow$ flip 0.5; +return x $\land$ y +\end{lstlisting} + +This program outputs the probability distribution $[\true \mapsto 0.25, \false +\mapsto 0.75]$. + +\item \texttt{observe} is a powerful keyword that lets us \emph{condition} the +program. For instance, suppose I want to model the following scenario: ``flip +two coins and observe that at least one of them is heads. What is the probability +that the first coin was heads?'' + +We can encode this scenario as a \disc{} program: + +\begin{lstlisting}[mathescape=true] +x $\leftarrow$ flip 0.5; +y $\leftarrow$ flip 0.5; +observe x $\lor$ y; +return x +\end{lstlisting} + +This program outputs the probability distribution: +\begin{align*} + [\true \mapsto (0.25 + 0.25) / 0.75, \false +\mapsto 0.25 / 0.75] +\end{align*} + +\item \textbf{Type system}: terms can either be pure Booleans of type $\bool$ +or distributions on Booleans of type $\dist{\bool}$. So, we have the following +type definition: +\begin{align} + \tau ::= \bool \mid \dist{\bool}. +\end{align} + +\item We define a typing judgment $\Gamma \vdash \te : \tau$ that associates each +term with a type. The typing context $\Gamma$ is a map from identifiers to types. +\begin{mathpar} + \inferrule{}{\Gamma \vdash \true{} : \bool} \and + \inferrule{}{\Gamma \vdash \false{} : \bool} \and + \inferrule{}{\Gamma \vdash \texttt{flip}~\theta : \dist{\bool}} \and + \inferrule{\Gamma \vdash \te : \bool}{\Gamma \vdash \texttt{return}~\te : \dist{\bool}} \and + \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau} + {\Gamma \vdash \texttt{observe}~\te_1; \te_2 : \tau} \and + \inferrule{\Gamma \vdash \te_1 : \dist{\bool} \and \Gamma \cup [x \mapsto \bool] \vdash \te_2 : \tau} + {\Gamma \vdash x \leftarrow \te_1; \te_2 : \tau} \and + \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau \and \Gamma \vdash \te_3 : \tau} + {\Gamma \vdash \texttt{if}~\te_1~\texttt{then}~\te_2~\texttt{else}~\te_3 : \tau} \and + \inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \bool} + {\Gamma \vdash \te_1 \land \te_2 : \bool} +\end{mathpar} + +\end{itemize} + + + +\subsection{Denotational semantics of \disc{}} + +\begin{itemize} +\item Associates each term with an \emph{unnormalized probability distribution} +(i.e., the total probability mass may be less than 1). +\item Has the type $\dbracket{\te} : \texttt{Bool} \rightarrow [0, 1]$, +and has the following inductive definition: + +$$ +[\![\texttt{flip}~\theta]\!](v) = +\begin{cases} +\theta& \quad \text{if }v = T\\ +1-\theta& \quad \text{if }v = F\\ +\end{cases} +$$ + +$$ + [\![\texttt{return}~e]\!](v) = + \begin{cases} + 1\quad& \text{if }v = [\![e]\!]\\ + 0\quad& \text{otherwise}\\ + \end{cases} +$$ + +$$ +[\![x \leftarrow e_1; e_2]\!](v) = \sum_{v'} [\![{e_1}]\!](v') \times [\![{e_2[x \mapsto v']}]\!](v) +$$ + +$$ +[\![\texttt{observe}~e_1; e_2]\!](v) = +\sum_{\{v' \mid [\![{e_1}(v') = T\}]\!]} [\![{e_2}]\!](v) +$$ + +\item The semantics for non-probabilistic terms is standard. The semantic evaluation has type +$[[e]]: \texttt{Bool}$ for closed terms. + +\item These semantics give an unnormalized distribution. The main semantic object of interest is +the normalized distribution, which is given by the \defn{normalized semantics}: + +$$ +[\![e]\!]_D(T) = \frac{[\![e]\!](T)}{[\![e]\!](T) + [\![e]\!](F) }, +$$ + +defined analogously for the false case. +\end{itemize} + +% \subsection{Inference via enumeration} +% \begin{itemize} +% \item Define a relation $\te \Downarrow^e $ +% \end{itemize} + +\section{Observation} + +\section{Compiling \disc{} to \prop{}} +\begin{itemize} + \item \textbf{Goal}: give a semantics-preserving compilation + $\rightsquigarrow$ that compiles \disc{} to \prop{} + \item In order to handle observations, we will compile \disc{} programs into + \emph{two} \prop{} programs: one that computes the unnormalized probability of + returning $\true$, and one that computes the probability of evidence (i.e. normalizing constant) + \item Inductive description has the shape $\te \compiles (\texttt{p}_1, \texttt{p}_2)$. We want to + define this relation to satisfy the following \textbf{adequacy condition}: + \begin{align} + \dbracket{\te}_D(\true) = \frac{\dbracket{\prog_1}}{\dbracket{\prog_2}}. + \end{align} + We will shorten this description to $\te \compiles + (\varphi, \varphi_A, w)$ and assume that the two formulae share a common + $w$. The adequacy condition then becomes: + \begin{align} + \dbracket{\te}_D(\true) = \frac{\dbracket{(\varphi, w)}}{\dbracket{(\varphi_A, w)}}. + \end{align} + \item Compilation relation: + \begin{mathpar} + \inferrule{}{\true \compiles (\true, \true, \emptyset)} \and + \inferrule{}{\false \compiles (\false, \true, \emptyset)} \and + \inferrule{}{x \compiles (x, \true, \emptyset)} \and + \inferrule{\texttt{fresh}~x} + {\texttt{flip}~\theta \compiles (x, \true, [x \mapsto \theta, \overline{x} \mapsto 1-\theta])} \and + \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')} + {x \leftarrow \te_1; \te_2 \compiles (\varphi'[\varphi/x], \varphi_A'[\varphi/x] \land \varphi_A, w_1 \cup w_2)} \and + \inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')} + {\texttt{observe}~\te_1; \te_2 \compiles (\varphi', \varphi_A' \land \varphi_A, w_1 \cup w_2)} + \end{mathpar} + +\end{itemize} +\begin{theorem}[Adequacy] + For well-typed term $\te$, + assume $\te \compiles (\varphi, \varphi_A, w)$. Then, + $\dbracket{\te}_D(\true) = {\dbracket{(\varphi, w)}} / {\dbracket{(\varphi_A, w)}}$. +\end{theorem} + +\bibliographystyle{plainnat} +\bibliography{../bib} + + +\end{document} \ No newline at end of file