init

eisape · Nov 19, 2023 · ca0e2c4 · ca0e2c4
1 parent 6fd3128
commit ca0e2c4
Show file tree

Hide file tree

Showing 267 changed files with 20,101 additions and 1 deletion.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,68 @@
+# Use an official Python runtime as a parent image
+# FROM python:3.9-slim
+FROM pytorch/pytorch:1.9.1-cuda11.1-cudnn8-devel
+
+# Upgrade pip
+RUN pip3 install --upgrade pip
+
+# Install Python packages
+RUN pip install \
+    async-timeout==4.0.2 \
+    Cython==0.29.32 \
+    h5py==3.6.0 \
+    huggingface-hub==0.5.1 \
+    IProgress==0.4 \
+    ipykernel==6.13.0 \
+    ipython==7.29.0 \
+    ipython-genutils==0.2.0 \
+    ipywidgets==7.7.0 \
+    joblib==1.1.0 \
+    jupyter-client==7.3.4 \
+    jupyter-core==4.10.0 \
+    jupyter-server==1.17.0 \
+    jupyterlab==3.4.0 \
+    jupyterlab-pygments==0.2.2 \
+    jupyterlab-server==2.13.0 \
+    jupyterlab-widgets==1.1.0 \
+    matplotlib==3.5.2 \
+    matplotlib-inline==0.1.2 \
+    mosestokenizer==1.2.1 \
+    multidict==6.0.2 \
+    nltk==3.7 \
+    numba==0.56.4 \
+    numpy==1.21.2 \
+    pandas==1.3.5 \
+    pickleshare==0.7.5 \
+    Pillow==8.4.0 \
+    pytorch-lightning==1.6.3 \
+    pytorch-memlab==0.2.4 \
+    pytorch-nlp==0.5.0 \
+    requests==2.25.1 \
+    scikit-learn==1.0.2 \
+    scipy==1.7.3 \
+    seaborn==0.11.2 \
+    sentencepiece==0.1.97 \
+    six==1.16.0 \
+    smart-open==5.2.1 \
+    tensorboard==2.9.0 \
+    tensorboard-data-server==0.6.1 \
+    tensorboard-plugin-wit==1.8.1 \
+    tokenizers==0.12.1 \
+    toolwrapper==2.1.0 \
+    torch==1.13.0 \
+    torchaudio==0.13.0 \
+    torchelastic==0.2.0 \
+    torchmetrics==0.8.2 \
+    torchtext==0.11.0 \
+    torchvision==0.11.1 \
+    tqdm==4.61.2 \
+    transformers==4.18.0
+
+# Set the working directory
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+
+# Specify the default command to run on container start
+CMD ["bash"]
diff --git a/README.md b/README.md
@@ -1 +1,58 @@
-# Code coming soon!
+
+## Probing for Incremental Parse States in Autoregressive Language Models
+
+Supplementary materials and demo for "Probing for Incremental Parse States in Autoregressive Language Models" (Eisape et al., 2022).
+
+## Environment
+
+Our [dockerfile](Dockerfile) contains the necessary dependencies to run the code in this repository and can be built with the following command:
+
+    docker build -t incremental_parse_probe .
+
+The rest of the walkthrough assumes you are working in a suitable environment.
+
+## Preprocessing
+
+The necessary datasets are 1) PTB formatted constituency parses and 2) conllx formatted dependency parses (i.e. `$SPLIT.txt`, `$SPLIT.conllx`; conllx formatted tree can be generated with [Stanford CoreNLP](https://stanfordnlp.github.io/CoreNLP/)). After adding those files to `data/`, running `python3 src/preprocess.py` will generate preprocessed versions of the dataset in `data/`.
+
+## Training
+
+The following command trains a probe specified by `config.yaml` with PyTorch Lightning:
+
+    python3 src/train.py --config $CONFIG_PATH
+
+The result of training is a new repository in `./experiment_checkpoints` with model parameters and hyperparameters. We provide config files for each of the models in the paper in [configs/](configs). **NOTE**: the geometric action probe is pretrained on the regression task from Hewitt and Manning (2019), to train these probes first train a geometric regression probe on the relevant model and layer, then point to its weights from the config file. See [configs/](configs) for an example.
+
+## Evaluation
+
+To evaluate the probes with probe-based word-synchronous beam search, run the following command with the path of a model training run:
+
+    python3 src/parse.py --experiment_path $EXPERIMENT_PATH
+
+Where `experiment` points to the directory with the probe that was created during training. This script uses utilities from gpt2.py to decode an incremental parse state up to and including the current word from GPT2 encodings of a sentence prefix up to that word. The result is a new CSV file in `results/` with parsing statistics (e.g. UAS).
+
+In addition to these, the paper includes several more involved experiments, including behavioural and causal intervention experiments on GPT-2 processing garden path sentences. This codebase contains all of the necessary utilities to replicate these experiments, mainly in gpt2.py; we also include the dataset used there in  ([data/npz_experiment](data/npz_experiment)).  Please contact [[email protected]]([mailto:[email protected]](https://eisape.github.io/)) with any difficulties or questions.
+
+## Citation
+
+    ```
+    @inproceedings{eisape-etal-2022-probing,
+        title = "Probing for Incremental Parse States in Autoregressive Language Models",
+        author = "Eisape, Tiwalayo and Gangireddy, Vineet  and Levy, Roger and Kim, Yoon",
+        booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
+        address = "Abu Dhabi, United Arab Emirates",
+        publisher = "Association for Computational Linguistics",
+        url = "https://aclanthology.org/2022.findings-emnlp.203",
+        pages = "2801--2813",
+        month = dec,
+        year = "2022",
+    }
+    ```
+
+## Acknowledgments
+
+This project builds on code based from the following repositories:
+
+- [https://github.com/john-hewitt/structural-probe](https://github.com/john-hewitt/structural-probe)
+- [https://github.com/aistairc/rnng-pytorch](https://github.com/aistairc/rnng-pytorch)
+- [https://github.com/qipeng/arc-swift](https://github.com/qipeng/arc-swift)
diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_0.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_0.yaml
@@ -0,0 +1,60 @@
+cuda: true
+data_params:
+  action_ngram_pad: 40
+  action_pad: 400
+  num_workers: 4
+  pin_memory: false
+  root_dir: data
+  test:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  token_pad: 150
+  train:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  valid:
+    batch_size: 30
+    dry_run: false
+    shuffle: false
+device: cuda
+exp_params:
+  manual_seed: 1265
+  optimizer_params:
+    lr: 0.001
+  optimizer_type: Adam
+  scheduler_params:
+    factor: 0.1
+    mode: min
+    patience: 0
+  scheduler_type: ReduceLROnPlateau
+logging_params:
+  save_dir: ./experiment_checkpoints/eval/gpt2-xl/
+  version: layer_0
+pretrained_model: gpt2-xl
+probe_params:
+  add_root: true
+  continuous: true
+  data_sources:
+  - action_ids
+  - continuous_action_masks
+  - gold_tuples
+  emb_size: 100
+  embeddings_dropout_rate: 0
+  layer: 0
+  layer_dropout_rate: 0.2
+  num_layers: 1
+  oracle_params:
+    mappings_file: data/mappings-ptb.txt
+    name: ArcStandard
+  probe_name: AttentiveProbe
+  probe_type: AttentiveProbe
+  reverse: true
+  rnn_type: GRU
+  state_size: 100
+trainer_params:
+  accumulate_grad_batches: 1
+  gpus:
+  - 0
+  max_epochs: 25
diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_1.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_1.yaml
@@ -0,0 +1,60 @@
+cuda: true
+data_params:
+  action_ngram_pad: 40
+  action_pad: 400
+  num_workers: 4
+  pin_memory: false
+  root_dir: data
+  test:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  token_pad: 150
+  train:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  valid:
+    batch_size: 30
+    dry_run: false
+    shuffle: false
+device: cuda
+exp_params:
+  manual_seed: 1265
+  optimizer_params:
+    lr: 0.001
+  optimizer_type: Adam
+  scheduler_params:
+    factor: 0.1
+    mode: min
+    patience: 0
+  scheduler_type: ReduceLROnPlateau
+logging_params:
+  save_dir: ./experiment_checkpoints/eval/gpt2-xl/
+  version: layer_1
+pretrained_model: gpt2-xl
+probe_params:
+  add_root: true
+  continuous: true
+  data_sources:
+  - action_ids
+  - continuous_action_masks
+  - gold_tuples
+  emb_size: 100
+  embeddings_dropout_rate: 0
+  layer: 1
+  layer_dropout_rate: 0.2
+  num_layers: 1
+  oracle_params:
+    mappings_file: data/mappings-ptb.txt
+    name: ArcStandard
+  probe_name: AttentiveProbe
+  probe_type: AttentiveProbe
+  reverse: true
+  rnn_type: GRU
+  state_size: 100
+trainer_params:
+  accumulate_grad_batches: 1
+  gpus:
+  - 8
+  max_epochs: 25
diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_10.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_10.yaml
@@ -0,0 +1,60 @@
+cuda: true
+data_params:
+  action_ngram_pad: 40
+  action_pad: 400
+  num_workers: 4
+  pin_memory: false
+  root_dir: data
+  test:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  token_pad: 150
+  train:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  valid:
+    batch_size: 30
+    dry_run: false
+    shuffle: false
+device: cuda
+exp_params:
+  manual_seed: 1265
+  optimizer_params:
+    lr: 0.001
+  optimizer_type: Adam
+  scheduler_params:
+    factor: 0.1
+    mode: min
+    patience: 0
+  scheduler_type: ReduceLROnPlateau
+logging_params:
+  save_dir: ./experiment_checkpoints/eval/gpt2-xl/
+  version: layer_10
+pretrained_model: gpt2-xl
+probe_params:
+  add_root: true
+  continuous: true
+  data_sources:
+  - action_ids
+  - continuous_action_masks
+  - gold_tuples
+  emb_size: 100
+  embeddings_dropout_rate: 0
+  layer: 10
+  layer_dropout_rate: 0.2
+  num_layers: 1
+  oracle_params:
+    mappings_file: data/mappings-ptb.txt
+    name: ArcStandard
+  probe_name: AttentiveProbe
+  probe_type: AttentiveProbe
+  reverse: true
+  rnn_type: GRU
+  state_size: 100
+trainer_params:
+  accumulate_grad_batches: 1
+  gpus:
+  - 5
+  max_epochs: 25
diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_11.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_11.yaml
@@ -0,0 +1,60 @@
+cuda: true
+data_params:
+  action_ngram_pad: 40
+  action_pad: 400
+  num_workers: 4
+  pin_memory: false
+  root_dir: data
+  test:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  token_pad: 150
+  train:
+    batch_size: 30
+    dry_run: false
+    shuffle: true
+  valid:
+    batch_size: 30
+    dry_run: false
+    shuffle: false
+device: cuda
+exp_params:
+  manual_seed: 1265
+  optimizer_params:
+    lr: 0.001
+  optimizer_type: Adam
+  scheduler_params:
+    factor: 0.1
+    mode: min
+    patience: 0
+  scheduler_type: ReduceLROnPlateau
+logging_params:
+  save_dir: ./experiment_checkpoints/eval/gpt2-xl/
+  version: layer_11
+pretrained_model: gpt2-xl
+probe_params:
+  add_root: true
+  continuous: true
+  data_sources:
+  - action_ids
+  - continuous_action_masks
+  - gold_tuples
+  emb_size: 100
+  embeddings_dropout_rate: 0
+  layer: 11
+  layer_dropout_rate: 0.2
+  num_layers: 1
+  oracle_params:
+    mappings_file: data/mappings-ptb.txt
+    name: ArcStandard
+  probe_name: AttentiveProbe
+  probe_type: AttentiveProbe
+  reverse: true
+  rnn_type: GRU
+  state_size: 100
+trainer_params:
+  accumulate_grad_batches: 1
+  gpus:
+  - 5
+  max_epochs: 25