From 18d33daaa51afe3c0d703a78b067e7352bf30583 Mon Sep 17 00:00:00 2001 From: Nathan Lambert Date: Sun, 11 Aug 2024 19:36:38 -0400 Subject: [PATCH] Rejection Sampling + Run Again (#7) --- .github/workflows/static.yml | 4 -- Makefile | 2 +- README.md | 1 + ...{02-installation.md => 02-optimization.md} | 2 +- chapters/03-opt-rejection-sampling.md | 14 +++++++ chapters/03-usage.md | 3 -- chapters/bib.bib | 39 +++++++++++++++++++ templates/{pdf.latex => pdf.tex} | 14 +++++++ 8 files changed, 70 insertions(+), 9 deletions(-) rename chapters/{02-installation.md => 02-optimization.md} (91%) create mode 100644 chapters/03-opt-rejection-sampling.md delete mode 100644 chapters/03-usage.md rename templates/{pdf.latex => pdf.tex} (97%) diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index 0f28da8..0b4af43 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -38,10 +38,6 @@ jobs: run: | echo "/Library/TeX/texbin" >> $GITHUB_PATH echo "PATH=$PATH:/Library/TeX/texbin" >> $GITHUB_ENV - xelatex --version # Verify xelatex is accessible - if ! command -v xelatex &> /dev/null; then - sudo ln -s /Library/TeX/texbin/xelatex /usr/local/bin/xelatex - fi diff --git a/Makefile b/Makefile index 18ef77c..617533f 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ PANDOC_COMMAND = pandoc DOCX_ARGS = --standalone --reference-doc templates/docx.docx EPUB_ARGS = --template templates/epub.html --epub-cover-image $(COVER_IMAGE) HTML_ARGS = --template templates/html.html --standalone --to html5 -PDF_ARGS = --template templates/pdf.latex --pdf-engine xelatex +PDF_ARGS = --template templates/pdf.tex --pdf-engine xelatex NESTED_HTML_TEMPLATE = templates/chapter.html # Per-format file dependencies diff --git a/README.md b/README.md index 59c189a..324fde1 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ sudo apt-get install texlive-fonts-recommended texlive-xetex brew install pandoc brew install make ``` +(See below for `pandoc-crossref`) ### Folder structure diff --git a/chapters/02-installation.md b/chapters/02-optimization.md similarity index 91% rename from chapters/02-installation.md rename to chapters/02-optimization.md index 6b0f911..c24791c 100644 --- a/chapters/02-installation.md +++ b/chapters/02-optimization.md @@ -1,4 +1,4 @@ -# Installation +# Optimizaiton - Overview This is the installation chapter. We love the book [@russell2016artificial]. diff --git a/chapters/03-opt-rejection-sampling.md b/chapters/03-opt-rejection-sampling.md new file mode 100644 index 0000000..09f811b --- /dev/null +++ b/chapters/03-opt-rejection-sampling.md @@ -0,0 +1,14 @@ +# Rejection Sampling + +Rejection Sampling (RS) is a popular and simple baseline for performing preference fine-tuning. +Rejection sampling operates by curating new candidate instructions, filtering them based on a trained reward model, and then fine-tuning the original model only on the top completions. + +The name originates from computational statistics [@gilks1992adaptive], where one wishes to sample from a complex distribution, but does not have a direct method to do so. +To alleviate this, one samples from a simpler to model distribution and uses a heuristic to check if the sample is permissible. +With language models, the target distribution is high-quality answers to instructions, the filter is a reward model, and the sampling distribution is the current model. + +## Related works + +Many prominent RLHF and preference fine-tuning papers have used rejection sampling as a baseling, but a canonical implementation and documentation does not exist + +WebGPT [@nakano2021webgpt], Anthropic's Helpful and Harmless agent[@bai2022training], OpenAI's popular paper on process reward models [@lightman2023let], Llama 2 Chat models [@touvron2023llama], and other seminal works all use this baseline. \ No newline at end of file diff --git a/chapters/03-usage.md b/chapters/03-usage.md deleted file mode 100644 index ad06f02..0000000 --- a/chapters/03-usage.md +++ /dev/null @@ -1,3 +0,0 @@ -# Usage - -This is the usage chapter. diff --git a/chapters/bib.bib b/chapters/bib.bib index b57b158..8cf3dee 100644 --- a/chapters/bib.bib +++ b/chapters/bib.bib @@ -10,4 +10,43 @@ @book{russell2016artificial author={Russell, Stuart J and Norvig, Peter}, year={2016}, publisher={Pearson} +} + +@article{gilks1992adaptive, + title={Adaptive rejection sampling for Gibbs sampling}, + author={Gilks, Walter R and Wild, Pascal}, + journal={Journal of the Royal Statistical Society: Series C (Applied Statistics)}, + volume={41}, + number={2}, + pages={337--348}, + year={1992}, + publisher={Wiley Online Library} +} + +@article{nakano2021webgpt, + title={Webgpt: Browser-assisted question-answering with human feedback}, + author={Nakano, Reiichiro and Hilton, Jacob and Balaji, Suchir and Wu, Jeff and Ouyang, Long and Kim, Christina and Hesse, Christopher and Jain, Shantanu and Kosaraju, Vineet and Saunders, William and others}, + journal={arXiv preprint arXiv:2112.09332}, + year={2021} +} + +@article{bai2022training, + title={Training a helpful and harmless assistant with reinforcement learning from human feedback}, + author={Bai, Yuntao and Jones, Andy and Ndousse, Kamal and Askell, Amanda and Chen, Anna and DasSarma, Nova and Drain, Dawn and Fort, Stanislav and Ganguli, Deep and Henighan, Tom and others}, + journal={arXiv preprint arXiv:2204.05862}, + year={2022} +} + +@article{lightman2023let, + title={Let's verify step by step}, + author={Lightman, Hunter and Kosaraju, Vineet and Burda, Yura and Edwards, Harri and Baker, Bowen and Lee, Teddy and Leike, Jan and Schulman, John and Sutskever, Ilya and Cobbe, Karl}, + journal={arXiv preprint arXiv:2305.20050}, + year={2023} +} + +@article{touvron2023llama, + title={Llama 2: Open foundation and fine-tuned chat models}, + author={Touvron, Hugo and Martin, Louis and Stone, Kevin and Albert, Peter and Almahairi, Amjad and Babaei, Yasmine and Bashlykov, Nikolay and Batra, Soumya and Bhargava, Prajjwal and Bhosale, Shruti and others}, + journal={arXiv preprint arXiv:2307.09288}, + year={2023} } \ No newline at end of file diff --git a/templates/pdf.latex b/templates/pdf.tex similarity index 97% rename from templates/pdf.latex rename to templates/pdf.tex index 5b16e6c..10dc550 100644 --- a/templates/pdf.latex +++ b/templates/pdf.tex @@ -185,6 +185,18 @@ $if(indent)$ $else$ \makeatletter +% new code here +\newsavebox\pandoc@box +\newcommand*\pandocbounded[1]{% + \sbox\pandoc@box{#1}% + \Gscale@div\@tempa{\textheight}{\dimexpr\ht\pandoc@box+\dp\pandoc@box\relax}% + \Gscale@div\@tempb{\linewidth}{\wd\pandoc@box}% + \ifdim\@tempb\p@<\@tempa\p@\let\@tempa\@tempb\fi% + \ifdim\@tempa\p@<\p@\scalebox{\@tempa}{\usebox\pandoc@box}% + \else\usebox{\pandoc@box}% + \fi% +} + \@ifundefined{KOMAClassName}{% if non-KOMA class \IfFileExists{parskip.sty}{% \usepackage{parskip} @@ -427,6 +439,8 @@ $endif$ $endif$ + + \begin{document} $if(has-frontmatter)$ \frontmatter