-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
204 additions
and
177 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
\documentclass{tufte-handout} | ||
|
||
\title{Discrete Probabilistic Programming Languages\thanks{CS7470 Fall 2023: Foundations of Probabilistic Programming.}} | ||
|
||
|
||
\newcommand{\varset}[0]{\mathcal{V}} | ||
|
||
\author[]{Steven Holtzen\\[email protected]} | ||
|
||
%\date{28 March 2010} % without \date command, current date is supplied | ||
|
||
%\geometry{showframe} % display margins for debugging page layout | ||
\input{../header.tex} | ||
|
||
\begin{document} | ||
\maketitle% this prints the handout title, author, and date | ||
|
||
\section{\disc{}: A simple discrete PPL} | ||
|
||
\begin{itemize} | ||
\item Syntax: | ||
\begin{lstlisting}[mathescape=true] | ||
e ::= | ||
| x $\leftarrow$ e; e | ||
| observe e; e | ||
| flip q // q is a rational value | ||
| if e then e else e | ||
| return e | ||
| true | false | ||
| e $\land$ e | e $\lor$ e | $\neg$ e | | ||
| ( e ) | ||
p ::= e | ||
\end{lstlisting} | ||
|
||
\item \disc{} looks very similar to a standard functional programming language, | ||
but has two some interesting new keywords: \texttt{flip}, \texttt{observe}, and | ||
\texttt{return} | ||
|
||
\item $\texttt{flip}~\theta$ allocates a new random quantity that is $\true$ | ||
with probability $\theta$ and $\false$ with probability $1-\theta$ | ||
|
||
\item \texttt{return e} turns a non-probabilistic quantity into a probabilistic one, i.e. | ||
\texttt{return true} is a \emph{probabilistic quantity} that is $\true$ with probability 1 and | ||
$\false$ with probability 0 | ||
|
||
\item Example program and its interpretation: | ||
|
||
\begin{lstlisting}[mathescape=true] | ||
x $\leftarrow$ flip 0.5; | ||
y $\leftarrow$ flip 0.5; | ||
return x $\land$ y | ||
\end{lstlisting} | ||
|
||
This program outputs the probability distribution $[\true \mapsto 0.25, \false | ||
\mapsto 0.75]$. | ||
|
||
\item \texttt{observe} is a powerful keyword that lets us \emph{condition} the | ||
program. For instance, suppose I want to model the following scenario: ``flip | ||
two coins and observe that at least one of them is heads. What is the probability | ||
that the first coin was heads?'' | ||
|
||
We can encode this scenario as a \disc{} program: | ||
|
||
\begin{lstlisting}[mathescape=true] | ||
x $\leftarrow$ flip 0.5; | ||
y $\leftarrow$ flip 0.5; | ||
observe x $\lor$ y; | ||
return x | ||
\end{lstlisting} | ||
|
||
This program outputs the probability distribution: | ||
\begin{align*} | ||
[\true \mapsto (0.25 + 0.25) / 0.75, \false | ||
\mapsto 0.25 / 0.75] | ||
\end{align*} | ||
|
||
\item \textbf{Type system}: terms can either be pure Booleans of type $\bool$ | ||
or distributions on Booleans of type $\dist{\bool}$. So, we have the following | ||
type definition: | ||
\begin{align} | ||
\tau ::= \bool \mid \dist{\bool}. | ||
\end{align} | ||
|
||
\item We define a typing judgment $\Gamma \vdash \te : \tau$ that associates each | ||
term with a type. The typing context $\Gamma$ is a map from identifiers to types. | ||
\begin{mathpar} | ||
\inferrule{}{\Gamma \vdash \true{} : \bool} \and | ||
\inferrule{}{\Gamma \vdash \false{} : \bool} \and | ||
\inferrule{}{\Gamma \vdash \texttt{flip}~\theta : \dist{\bool}} \and | ||
\inferrule{\Gamma \vdash \te : \bool}{\Gamma \vdash \texttt{return}~\te : \dist{\bool}} \and | ||
\inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau} | ||
{\Gamma \vdash \texttt{observe}~\te_1; \te_2 : \tau} \and | ||
\inferrule{\Gamma \vdash \te_1 : \dist{\bool} \and \Gamma \cup [x \mapsto \bool] \vdash \te_2 : \tau} | ||
{\Gamma \vdash x \leftarrow \te_1; \te_2 : \tau} \and | ||
\inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \tau \and \Gamma \vdash \te_3 : \tau} | ||
{\Gamma \vdash \texttt{if}~\te_1~\texttt{then}~\te_2~\texttt{else}~\te_3 : \tau} \and | ||
\inferrule{\Gamma \vdash \te_1 : \bool \and \Gamma \vdash \te_2 : \bool} | ||
{\Gamma \vdash \te_1 \land \te_2 : \bool} | ||
\end{mathpar} | ||
|
||
\end{itemize} | ||
|
||
|
||
|
||
\subsection{Denotational semantics of \disc{}} | ||
|
||
\begin{itemize} | ||
\item Associates each term with an \emph{unnormalized probability distribution} | ||
(i.e., the total probability mass may be less than 1). | ||
\item Has the type $\dbracket{\te} : \texttt{Bool} \rightarrow [0, 1]$, | ||
and has the following inductive definition: | ||
|
||
$$ | ||
[\![\texttt{flip}~\theta]\!](v) = | ||
\begin{cases} | ||
\theta& \quad \text{if }v = T\\ | ||
1-\theta& \quad \text{if }v = F\\ | ||
\end{cases} | ||
$$ | ||
|
||
$$ | ||
[\![\texttt{return}~e]\!](v) = | ||
\begin{cases} | ||
1\quad& \text{if }v = [\![e]\!]\\ | ||
0\quad& \text{otherwise}\\ | ||
\end{cases} | ||
$$ | ||
|
||
$$ | ||
[\![x \leftarrow e_1; e_2]\!](v) = \sum_{v'} [\![{e_1}]\!](v') \times [\![{e_2[x \mapsto v']}]\!](v) | ||
$$ | ||
|
||
$$ | ||
[\![\texttt{observe}~e_1; e_2]\!](v) = | ||
\sum_{\{v' \mid [\![{e_1}(v') = T\}]\!]} [\![{e_2}]\!](v) | ||
$$ | ||
|
||
\item The semantics for non-probabilistic terms is standard. The semantic evaluation has type | ||
$[[e]]: \texttt{Bool}$ for closed terms. | ||
|
||
\item These semantics give an unnormalized distribution. The main semantic object of interest is | ||
the normalized distribution, which is given by the \defn{normalized semantics}: | ||
|
||
$$ | ||
[\![e]\!]_D(T) = \frac{[\![e]\!](T)}{[\![e]\!](T) + [\![e]\!](F) }, | ||
$$ | ||
|
||
defined analogously for the false case. | ||
\end{itemize} | ||
|
||
% \subsection{Inference via enumeration} | ||
% \begin{itemize} | ||
% \item Define a relation $\te \Downarrow^e $ | ||
% \end{itemize} | ||
|
||
\section{Observation} | ||
|
||
\section{Compiling \disc{} to \prop{}} | ||
\begin{itemize} | ||
\item \textbf{Goal}: give a semantics-preserving compilation | ||
$\rightsquigarrow$ that compiles \disc{} to \prop{} | ||
\item In order to handle observations, we will compile \disc{} programs into | ||
\emph{two} \prop{} programs: one that computes the unnormalized probability of | ||
returning $\true$, and one that computes the probability of evidence (i.e. normalizing constant) | ||
\item Inductive description has the shape $\te \compiles (\texttt{p}_1, \texttt{p}_2)$. We want to | ||
define this relation to satisfy the following \textbf{adequacy condition}: | ||
\begin{align} | ||
\dbracket{\te}_D(\true) = \frac{\dbracket{\prog_1}}{\dbracket{\prog_2}}. | ||
\end{align} | ||
We will shorten this description to $\te \compiles | ||
(\varphi, \varphi_A, w)$ and assume that the two formulae share a common | ||
$w$. The adequacy condition then becomes: | ||
\begin{align} | ||
\dbracket{\te}_D(\true) = \frac{\dbracket{(\varphi, w)}}{\dbracket{(\varphi_A, w)}}. | ||
\end{align} | ||
\item Compilation relation: | ||
\begin{mathpar} | ||
\inferrule{}{\true \compiles (\true, \true, \emptyset)} \and | ||
\inferrule{}{\false \compiles (\false, \true, \emptyset)} \and | ||
\inferrule{}{x \compiles (x, \true, \emptyset)} \and | ||
\inferrule{\texttt{fresh}~x} | ||
{\texttt{flip}~\theta \compiles (x, \true, [x \mapsto \theta, \overline{x} \mapsto 1-\theta])} \and | ||
\inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')} | ||
{x \leftarrow \te_1; \te_2 \compiles (\varphi'[\varphi/x], \varphi_A'[\varphi/x] \land \varphi_A, w_1 \cup w_2)} \and | ||
\inferrule{\te_1 \compiles (\varphi, \varphi_A, w) \and \te_2 \compiles (\varphi', \varphi_A', w')} | ||
{\texttt{observe}~\te_1; \te_2 \compiles (\varphi', \varphi_A' \land \varphi_A, w_1 \cup w_2)} | ||
\end{mathpar} | ||
|
||
\end{itemize} | ||
\begin{theorem}[Adequacy] | ||
For well-typed term $\te$, | ||
assume $\te \compiles (\varphi, \varphi_A, w)$. Then, | ||
$\dbracket{\te}_D(\true) = {\dbracket{(\varphi, w)}} / {\dbracket{(\varphi_A, w)}}$. | ||
\end{theorem} | ||
|
||
\bibliographystyle{plainnat} | ||
\bibliography{../bib} | ||
|
||
|
||
\end{document} |