diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c7c409d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,68 @@ +# Use an official Python runtime as a parent image +# FROM python:3.9-slim +FROM pytorch/pytorch:1.9.1-cuda11.1-cudnn8-devel + +# Upgrade pip +RUN pip3 install --upgrade pip + +# Install Python packages +RUN pip install \ + async-timeout==4.0.2 \ + Cython==0.29.32 \ + h5py==3.6.0 \ + huggingface-hub==0.5.1 \ + IProgress==0.4 \ + ipykernel==6.13.0 \ + ipython==7.29.0 \ + ipython-genutils==0.2.0 \ + ipywidgets==7.7.0 \ + joblib==1.1.0 \ + jupyter-client==7.3.4 \ + jupyter-core==4.10.0 \ + jupyter-server==1.17.0 \ + jupyterlab==3.4.0 \ + jupyterlab-pygments==0.2.2 \ + jupyterlab-server==2.13.0 \ + jupyterlab-widgets==1.1.0 \ + matplotlib==3.5.2 \ + matplotlib-inline==0.1.2 \ + mosestokenizer==1.2.1 \ + multidict==6.0.2 \ + nltk==3.7 \ + numba==0.56.4 \ + numpy==1.21.2 \ + pandas==1.3.5 \ + pickleshare==0.7.5 \ + Pillow==8.4.0 \ + pytorch-lightning==1.6.3 \ + pytorch-memlab==0.2.4 \ + pytorch-nlp==0.5.0 \ + requests==2.25.1 \ + scikit-learn==1.0.2 \ + scipy==1.7.3 \ + seaborn==0.11.2 \ + sentencepiece==0.1.97 \ + six==1.16.0 \ + smart-open==5.2.1 \ + tensorboard==2.9.0 \ + tensorboard-data-server==0.6.1 \ + tensorboard-plugin-wit==1.8.1 \ + tokenizers==0.12.1 \ + toolwrapper==2.1.0 \ + torch==1.13.0 \ + torchaudio==0.13.0 \ + torchelastic==0.2.0 \ + torchmetrics==0.8.2 \ + torchtext==0.11.0 \ + torchvision==0.11.1 \ + tqdm==4.61.2 \ + transformers==4.18.0 + +# Set the working directory +WORKDIR /app + +# Copy the current directory contents into the container at /app +COPY . /app + +# Specify the default command to run on container start +CMD ["bash"] \ No newline at end of file diff --git a/README.md b/README.md index d55dc99..ebd0102 100644 --- a/README.md +++ b/README.md @@ -1 +1,58 @@ -# Code coming soon! + +## Probing for Incremental Parse States in Autoregressive Language Models + +Supplementary materials and demo for "Probing for Incremental Parse States in Autoregressive Language Models" (Eisape et al., 2022). + +## Environment + +Our [dockerfile](Dockerfile) contains the necessary dependencies to run the code in this repository and can be built with the following command: + + docker build -t incremental_parse_probe . + +The rest of the walkthrough assumes you are working in a suitable environment. + +## Preprocessing + +The necessary datasets are 1) PTB formatted constituency parses and 2) conllx formatted dependency parses (i.e. `$SPLIT.txt`, `$SPLIT.conllx`; conllx formatted tree can be generated with [Stanford CoreNLP](https://stanfordnlp.github.io/CoreNLP/)). After adding those files to `data/`, running `python3 src/preprocess.py` will generate preprocessed versions of the dataset in `data/`. + +## Training + +The following command trains a probe specified by `config.yaml` with PyTorch Lightning: + + python3 src/train.py --config $CONFIG_PATH + +The result of training is a new repository in `./experiment_checkpoints` with model parameters and hyperparameters. We provide config files for each of the models in the paper in [configs/](configs). **NOTE**: the geometric action probe is pretrained on the regression task from Hewitt and Manning (2019), to train these probes first train a geometric regression probe on the relevant model and layer, then point to its weights from the config file. See [configs/](configs) for an example. + +## Evaluation + +To evaluate the probes with probe-based word-synchronous beam search, run the following command with the path of a model training run: + + python3 src/parse.py --experiment_path $EXPERIMENT_PATH + +Where `experiment` points to the directory with the probe that was created during training. This script uses utilities from gpt2.py to decode an incremental parse state up to and including the current word from GPT2 encodings of a sentence prefix up to that word. The result is a new CSV file in `results/` with parsing statistics (e.g. UAS). + +In addition to these, the paper includes several more involved experiments, including behavioural and causal intervention experiments on GPT-2 processing garden path sentences. This codebase contains all of the necessary utilities to replicate these experiments, mainly in gpt2.py; we also include the dataset used there in ([data/npz_experiment](data/npz_experiment)). Please contact [eisape@mit.edu]([mailto:eisape@mit.edu](https://eisape.github.io/)) with any difficulties or questions. + +## Citation + + ``` + @inproceedings{eisape-etal-2022-probing, + title = "Probing for Incremental Parse States in Autoregressive Language Models", + author = "Eisape, Tiwalayo and Gangireddy, Vineet and Levy, Roger and Kim, Yoon", + booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022", + address = "Abu Dhabi, United Arab Emirates", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2022.findings-emnlp.203", + pages = "2801--2813", + month = dec, + year = "2022", + } + ``` + +## Acknowledgments + +This project builds on code based from the following repositories: + +- [https://github.com/john-hewitt/structural-probe](https://github.com/john-hewitt/structural-probe) +- [https://github.com/aistairc/rnng-pytorch](https://github.com/aistairc/rnng-pytorch) +- [https://github.com/qipeng/arc-swift](https://github.com/qipeng/arc-swift) diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_0.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_0.yaml new file mode 100644 index 0000000..7f929a0 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_0.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_0 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_1.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_1.yaml new file mode 100644 index 0000000..ab35826 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_1.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_1 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_10.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_10.yaml new file mode 100644 index 0000000..cd1d14e --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_10.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_10 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_11.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_11.yaml new file mode 100644 index 0000000..ea72f73 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_11.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_11 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_12.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_12.yaml new file mode 100644 index 0000000..502de49 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_12.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_12 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_13.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_13.yaml new file mode 100644 index 0000000..fa767ce --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_13.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_13 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 13 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_14.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_14.yaml new file mode 100644 index 0000000..f216291 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_14.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_14 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 14 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_15.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_15.yaml new file mode 100644 index 0000000..665f4ba --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_15.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_15 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 15 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_16.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_16.yaml new file mode 100644 index 0000000..3240715 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_16.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_16 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 16 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_17.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_17.yaml new file mode 100644 index 0000000..27b1d7a --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_17.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_17 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 17 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_18.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_18.yaml new file mode 100644 index 0000000..80f8fec --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_18.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_18 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 18 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_19.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_19.yaml new file mode 100644 index 0000000..2257ce4 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_19.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_19 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 19 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_2.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_2.yaml new file mode 100644 index 0000000..be24c39 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_2.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_2 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_20.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_20.yaml new file mode 100644 index 0000000..01c06a5 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_20.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_20 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 20 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_21.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_21.yaml new file mode 100644 index 0000000..d165ebd --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_21.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_21 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 21 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_22.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_22.yaml new file mode 100644 index 0000000..0a1d809 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_22.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_22 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 22 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_23.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_23.yaml new file mode 100644 index 0000000..b94b2d9 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_23.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_23 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 23 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_24.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_24.yaml new file mode 100644 index 0000000..c56b856 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_24.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_24 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 24 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_25.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_25.yaml new file mode 100644 index 0000000..4cc7527 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_25.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_25 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 25 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_26.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_26.yaml new file mode 100644 index 0000000..4102817 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_26.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_26 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 26 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_27.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_27.yaml new file mode 100644 index 0000000..a3001f4 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_27.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_27 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 27 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_28.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_28.yaml new file mode 100644 index 0000000..7b5fc39 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_28.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_28 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 28 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_29.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_29.yaml new file mode 100644 index 0000000..48b3602 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_29.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_29 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 29 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_3.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_3.yaml new file mode 100644 index 0000000..d561aaf --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_3.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_3 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_30.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_30.yaml new file mode 100644 index 0000000..65e472e --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_30.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_30 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 30 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_31.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_31.yaml new file mode 100644 index 0000000..4960ca9 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_31.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_31 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 31 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_32.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_32.yaml new file mode 100644 index 0000000..5065b5f --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_32.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_32 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 32 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_33.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_33.yaml new file mode 100644 index 0000000..67a5f20 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_33.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_33 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 33 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_34.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_34.yaml new file mode 100644 index 0000000..d0f6b07 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_34.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_34 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 34 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_35.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_35.yaml new file mode 100644 index 0000000..5a7b973 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_35.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_35 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 35 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_36.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_36.yaml new file mode 100644 index 0000000..7cff1a8 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_36.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_36 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 36 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_37.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_37.yaml new file mode 100644 index 0000000..a03ef11 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_37.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_37 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 37 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_38.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_38.yaml new file mode 100644 index 0000000..ca73146 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_38.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_38 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 38 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_39.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_39.yaml new file mode 100644 index 0000000..257542f --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_39.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_39 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 39 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_4.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_4.yaml new file mode 100644 index 0000000..c3a66d0 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_4.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_4 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_40.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_40.yaml new file mode 100644 index 0000000..dc4a1e8 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_40.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_40 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 40 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_41.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_41.yaml new file mode 100644 index 0000000..78f3ab8 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_41.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_41 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 41 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_42.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_42.yaml new file mode 100644 index 0000000..c5445d6 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_42.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_42 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 42 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_43.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_43.yaml new file mode 100644 index 0000000..f6333d0 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_43.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_43 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 43 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_44.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_44.yaml new file mode 100644 index 0000000..f55e94c --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_44.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_44 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 44 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_45.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_45.yaml new file mode 100644 index 0000000..6041758 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_45.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_45 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 45 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_46.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_46.yaml new file mode 100644 index 0000000..63254ab --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_46.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_46 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 46 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_47.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_47.yaml new file mode 100644 index 0000000..e514ada --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_47.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_47 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 47 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_48.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_48.yaml new file mode 100644 index 0000000..11fbfec --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_48.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_48 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 48 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_5.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_5.yaml new file mode 100644 index 0000000..bc65a0e --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_5.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_5 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_6.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_6.yaml new file mode 100644 index 0000000..f5b5289 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_6.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_6 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_7.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_7.yaml new file mode 100644 index 0000000..bbad8f6 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_7.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_7 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_8.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_8.yaml new file mode 100644 index 0000000..db06cb2 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_8.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_8 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/AttentiveProbe/layer_9.yaml b/configs/eval/gpt2-xl/AttentiveProbe/layer_9.yaml new file mode 100644 index 0000000..12d7f43 --- /dev/null +++ b/configs/eval/gpt2-xl/AttentiveProbe/layer_9.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_9 +pretrained_model: gpt2-xl +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_0.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_0.yaml new file mode 100644 index 0000000..3925b02 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_0.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_0 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_0/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_1.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_1.yaml new file mode 100644 index 0000000..220a81e --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_1.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_1 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_1/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_10.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_10.yaml new file mode 100644 index 0000000..86bcea8 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_10.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_10 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_10/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_11.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_11.yaml new file mode 100644 index 0000000..62888e5 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_11.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_11 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_11/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_12.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_12.yaml new file mode 100644 index 0000000..62412aa --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_12.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_12 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_12/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_13.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_13.yaml new file mode 100644 index 0000000..1c36ff9 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_13.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_13 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_13/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 13 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_14.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_14.yaml new file mode 100644 index 0000000..9a1113d --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_14.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_14 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_14/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 14 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_15.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_15.yaml new file mode 100644 index 0000000..b2d61eb --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_15.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_15 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_15/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 15 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_16.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_16.yaml new file mode 100644 index 0000000..0e517a3 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_16.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_16 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_16/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 16 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_17.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_17.yaml new file mode 100644 index 0000000..4bf2ccf --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_17.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_17 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_17/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 17 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_18.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_18.yaml new file mode 100644 index 0000000..b2da955 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_18.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_18 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_18/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 18 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_19.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_19.yaml new file mode 100644 index 0000000..30eb935 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_19.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_19 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_19/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 19 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_2.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_2.yaml new file mode 100644 index 0000000..dac1aac --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_2.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_2 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_2/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_20.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_20.yaml new file mode 100644 index 0000000..ed66035 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_20.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_20 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_20/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 20 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_21.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_21.yaml new file mode 100644 index 0000000..d3b1429 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_21.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_21 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_21/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 21 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_22.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_22.yaml new file mode 100644 index 0000000..445fce3 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_22.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_22 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_22/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 22 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_23.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_23.yaml new file mode 100644 index 0000000..3a780e6 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_23.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_23 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_23/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 23 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_24.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_24.yaml new file mode 100644 index 0000000..34abe71 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_24.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_24 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_24/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 24 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_25.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_25.yaml new file mode 100644 index 0000000..a3d4450 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_25.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_25 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_25/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 25 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_26.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_26.yaml new file mode 100644 index 0000000..446a28e --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_26.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_26 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_26/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 26 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_27.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_27.yaml new file mode 100644 index 0000000..85e7fa8 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_27.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_27 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_27/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 27 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_28.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_28.yaml new file mode 100644 index 0000000..bf22af5 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_28.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_28 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_28/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 28 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_29.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_29.yaml new file mode 100644 index 0000000..6b2ceab --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_29.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_29 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_29/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 29 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_3.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_3.yaml new file mode 100644 index 0000000..0917146 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_3.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_3 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_3/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_30.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_30.yaml new file mode 100644 index 0000000..d36471c --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_30.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_30 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_30/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 30 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_31.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_31.yaml new file mode 100644 index 0000000..0c13b6f --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_31.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_31 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_31/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 31 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_32.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_32.yaml new file mode 100644 index 0000000..8b4f45e --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_32.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_32 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_32/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 32 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_33.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_33.yaml new file mode 100644 index 0000000..cad515f --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_33.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_33 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_33/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 33 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_34.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_34.yaml new file mode 100644 index 0000000..4046eed --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_34.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_34 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_34/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 34 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_35.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_35.yaml new file mode 100644 index 0000000..9b6a361 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_35.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_35 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_35/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 35 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_36.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_36.yaml new file mode 100644 index 0000000..f80f80d --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_36.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_36 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_36/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 36 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_37.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_37.yaml new file mode 100644 index 0000000..aceb9cc --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_37.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_37 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_37/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 37 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_38.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_38.yaml new file mode 100644 index 0000000..b90e64a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_38.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_38 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_38/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 38 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_39.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_39.yaml new file mode 100644 index 0000000..7191ac9 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_39.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_39 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_39/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 39 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_4.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_4.yaml new file mode 100644 index 0000000..b2eb870 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_4.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_4 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_4/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_40.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_40.yaml new file mode 100644 index 0000000..09fc133 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_40.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_40 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_40/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 40 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_41.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_41.yaml new file mode 100644 index 0000000..6de4f68 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_41.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_41 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_41/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 41 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_42.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_42.yaml new file mode 100644 index 0000000..4d0f5ae --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_42.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_42 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_42/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 42 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_43.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_43.yaml new file mode 100644 index 0000000..f1d20be --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_43.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_43 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_43/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 43 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_44.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_44.yaml new file mode 100644 index 0000000..cc4919a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_44.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_44 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_44/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 44 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_45.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_45.yaml new file mode 100644 index 0000000..369f821 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_45.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_45 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_45/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 45 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_46.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_46.yaml new file mode 100644 index 0000000..5fcb989 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_46.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_46 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_46/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 46 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_47.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_47.yaml new file mode 100644 index 0000000..9400fa4 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_47.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_47 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_47/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 47 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_48.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_48.yaml new file mode 100644 index 0000000..f624817 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_48.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_48 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_48/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 48 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_5.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_5.yaml new file mode 100644 index 0000000..cc7184a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_5.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_5 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_5/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_6.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_6.yaml new file mode 100644 index 0000000..c51079e --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_6.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_6 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_6/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_7.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_7.yaml new file mode 100644 index 0000000..9415e51 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_7.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_7 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_7/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_8.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_8.yaml new file mode 100644 index 0000000..e78f054 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_8.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_8 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_8/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Action/layer_9.yaml b/configs/eval/gpt2-xl/Geometric_Action/layer_9.yaml new file mode 100644 index 0000000..0cee575 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Action/layer_9.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_9 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2-xl/Geometric_Action/layer_9/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_0.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_0.yaml new file mode 100644 index 0000000..5c32b5a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_0.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_0 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_1.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_1.yaml new file mode 100644 index 0000000..eab1a43 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_1.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_1 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_10.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_10.yaml new file mode 100644 index 0000000..d16a26b --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_10.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_10 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_11.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_11.yaml new file mode 100644 index 0000000..fa28f7a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_11.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_11 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_12.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_12.yaml new file mode 100644 index 0000000..09ae0a8 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_12.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_12 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_13.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_13.yaml new file mode 100644 index 0000000..459a454 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_13.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_13 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 13 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_14.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_14.yaml new file mode 100644 index 0000000..2595747 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_14.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_14 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 14 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_15.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_15.yaml new file mode 100644 index 0000000..101345f --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_15.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_15 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 15 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_16.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_16.yaml new file mode 100644 index 0000000..3a632a7 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_16.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_16 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 16 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_17.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_17.yaml new file mode 100644 index 0000000..4c20068 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_17.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_17 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 17 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_18.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_18.yaml new file mode 100644 index 0000000..fffaf0e --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_18.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_18 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 18 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_19.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_19.yaml new file mode 100644 index 0000000..7698306 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_19.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_19 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 19 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_2.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_2.yaml new file mode 100644 index 0000000..bb82b26 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_2.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_2 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_20.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_20.yaml new file mode 100644 index 0000000..7c2a065 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_20.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_20 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 20 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_21.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_21.yaml new file mode 100644 index 0000000..bd7f3ca --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_21.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_21 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 21 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_22.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_22.yaml new file mode 100644 index 0000000..cab6a20 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_22.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_22 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 22 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_23.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_23.yaml new file mode 100644 index 0000000..11ec87a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_23.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_23 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 23 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_24.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_24.yaml new file mode 100644 index 0000000..b6927c7 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_24.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_24 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 24 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_25.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_25.yaml new file mode 100644 index 0000000..e200a03 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_25.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_25 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 25 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_26.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_26.yaml new file mode 100644 index 0000000..41da13c --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_26.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_26 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 26 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_27.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_27.yaml new file mode 100644 index 0000000..96a6c3f --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_27.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_27 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 27 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_28.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_28.yaml new file mode 100644 index 0000000..8b765f4 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_28.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_28 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 28 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_29.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_29.yaml new file mode 100644 index 0000000..7e1014b --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_29.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_29 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 29 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_3.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_3.yaml new file mode 100644 index 0000000..55af01d --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_3.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_3 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_30.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_30.yaml new file mode 100644 index 0000000..d8d637d --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_30.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_30 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 30 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_31.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_31.yaml new file mode 100644 index 0000000..e3748d7 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_31.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_31 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 31 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_32.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_32.yaml new file mode 100644 index 0000000..3c32602 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_32.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_32 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 32 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_33.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_33.yaml new file mode 100644 index 0000000..503a610 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_33.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_33 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 33 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_34.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_34.yaml new file mode 100644 index 0000000..a592bbb --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_34.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_34 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 34 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_35.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_35.yaml new file mode 100644 index 0000000..35de6b7 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_35.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_35 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 35 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_36.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_36.yaml new file mode 100644 index 0000000..08e945c --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_36.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_36 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 36 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_37.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_37.yaml new file mode 100644 index 0000000..401ecc7 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_37.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_37 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 37 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_38.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_38.yaml new file mode 100644 index 0000000..b9196f3 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_38.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_38 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 38 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_39.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_39.yaml new file mode 100644 index 0000000..fe78ca4 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_39.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_39 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 39 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_4.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_4.yaml new file mode 100644 index 0000000..ba226b0 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_4.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_4 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_40.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_40.yaml new file mode 100644 index 0000000..a1c6783 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_40.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_40 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 40 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_41.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_41.yaml new file mode 100644 index 0000000..35d1895 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_41.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_41 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 41 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_42.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_42.yaml new file mode 100644 index 0000000..c3e59fb --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_42.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_42 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 42 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_43.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_43.yaml new file mode 100644 index 0000000..11be9d5 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_43.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_43 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 43 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_44.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_44.yaml new file mode 100644 index 0000000..255bb98 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_44.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_44 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 44 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_45.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_45.yaml new file mode 100644 index 0000000..759fbc7 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_45.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_45 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 45 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_46.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_46.yaml new file mode 100644 index 0000000..d98b0e2 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_46.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_46 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 46 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_47.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_47.yaml new file mode 100644 index 0000000..fa58adc --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_47.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_47 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 47 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_48.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_48.yaml new file mode 100644 index 0000000..6ec744c --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_48.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_48 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 48 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_5.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_5.yaml new file mode 100644 index 0000000..9080f73 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_5.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_5 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_6.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_6.yaml new file mode 100644 index 0000000..40ba15a --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_6.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_6 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_7.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_7.yaml new file mode 100644 index 0000000..417cf1f --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_7.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_7 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_8.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_8.yaml new file mode 100644 index 0000000..03ad194 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_8.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_8 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/Geometric_Regression/layer_9.yaml b/configs/eval/gpt2-xl/Geometric_Regression/layer_9.yaml new file mode 100644 index 0000000..8022900 --- /dev/null +++ b/configs/eval/gpt2-xl/Geometric_Regression/layer_9.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_9 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_0.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_0.yaml new file mode 100644 index 0000000..9e76a45 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_0.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_0 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_1.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_1.yaml new file mode 100644 index 0000000..225db39 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_1.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_1 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_10.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_10.yaml new file mode 100644 index 0000000..e2dbacf --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_10.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_10 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_11.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_11.yaml new file mode 100644 index 0000000..26535d8 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_11.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_11 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_12.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_12.yaml new file mode 100644 index 0000000..1a9f601 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_12.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_12 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_13.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_13.yaml new file mode 100644 index 0000000..ff4da4e --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_13.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_13 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 13 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_14.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_14.yaml new file mode 100644 index 0000000..cf62caa --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_14.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_14 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 14 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_15.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_15.yaml new file mode 100644 index 0000000..8f343cf --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_15.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_15 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 15 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_16.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_16.yaml new file mode 100644 index 0000000..8885397 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_16.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_16 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 16 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_17.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_17.yaml new file mode 100644 index 0000000..2c997a5 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_17.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_17 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 17 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_18.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_18.yaml new file mode 100644 index 0000000..be860c3 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_18.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_18 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 18 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_19.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_19.yaml new file mode 100644 index 0000000..010963e --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_19.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_19 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 19 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_2.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_2.yaml new file mode 100644 index 0000000..ddceb7e --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_2.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_2 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_20.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_20.yaml new file mode 100644 index 0000000..7e1d5e0 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_20.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_20 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 20 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_21.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_21.yaml new file mode 100644 index 0000000..93a7788 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_21.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_21 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 21 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_22.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_22.yaml new file mode 100644 index 0000000..3aef5bd --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_22.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_22 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 22 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_23.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_23.yaml new file mode 100644 index 0000000..fc6b2d2 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_23.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_23 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 23 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_24.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_24.yaml new file mode 100644 index 0000000..d81523f --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_24.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_24 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 24 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_25.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_25.yaml new file mode 100644 index 0000000..3b39709 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_25.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_25 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 25 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_26.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_26.yaml new file mode 100644 index 0000000..f4b5700 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_26.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_26 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 26 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_27.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_27.yaml new file mode 100644 index 0000000..815f992 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_27.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_27 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 27 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_28.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_28.yaml new file mode 100644 index 0000000..9407945 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_28.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_28 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 28 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_29.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_29.yaml new file mode 100644 index 0000000..b471109 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_29.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_29 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 29 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_3.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_3.yaml new file mode 100644 index 0000000..3f64e85 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_3.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_3 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_30.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_30.yaml new file mode 100644 index 0000000..7c0904d --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_30.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_30 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 30 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_31.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_31.yaml new file mode 100644 index 0000000..6a6b4c8 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_31.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_31 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 31 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_32.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_32.yaml new file mode 100644 index 0000000..6a8f0bb --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_32.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_32 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 32 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_33.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_33.yaml new file mode 100644 index 0000000..078de38 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_33.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_33 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 33 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_34.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_34.yaml new file mode 100644 index 0000000..6cff065 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_34.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_34 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 34 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_35.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_35.yaml new file mode 100644 index 0000000..a7b347d --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_35.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_35 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 35 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_36.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_36.yaml new file mode 100644 index 0000000..76a5da7 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_36.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_36 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 36 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_37.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_37.yaml new file mode 100644 index 0000000..59cc796 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_37.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_37 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 37 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_38.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_38.yaml new file mode 100644 index 0000000..bfe2cd5 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_38.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_38 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 38 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_39.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_39.yaml new file mode 100644 index 0000000..d7c2e99 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_39.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_39 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 39 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_4.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_4.yaml new file mode 100644 index 0000000..8c7d2df --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_4.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_4 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_40.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_40.yaml new file mode 100644 index 0000000..9b08a65 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_40.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_40 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 40 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_41.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_41.yaml new file mode 100644 index 0000000..f8c76df --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_41.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_41 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 41 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_42.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_42.yaml new file mode 100644 index 0000000..fb1cbff --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_42.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_42 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 42 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_43.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_43.yaml new file mode 100644 index 0000000..3511daa --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_43.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_43 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 43 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_44.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_44.yaml new file mode 100644 index 0000000..8f1aa2a --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_44.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_44 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 44 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_45.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_45.yaml new file mode 100644 index 0000000..6b71348 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_45.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_45 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 45 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_46.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_46.yaml new file mode 100644 index 0000000..9f212e5 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_46.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_46 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 46 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_47.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_47.yaml new file mode 100644 index 0000000..4855e42 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_47.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_47 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 47 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_48.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_48.yaml new file mode 100644 index 0000000..2230640 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_48.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_48 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 48 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_5.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_5.yaml new file mode 100644 index 0000000..3a4f8d8 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_5.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_5 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_6.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_6.yaml new file mode 100644 index 0000000..befee60 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_6.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_6 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_7.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_7.yaml new file mode 100644 index 0000000..cb52a9a --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_7.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_7 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_8.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_8.yaml new file mode 100644 index 0000000..bbf3273 --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_8.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_8 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2-xl/StackActionProbe/layer_9.yaml b/configs/eval/gpt2-xl/StackActionProbe/layer_9.yaml new file mode 100644 index 0000000..a2a8d4a --- /dev/null +++ b/configs/eval/gpt2-xl/StackActionProbe/layer_9.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2-xl/ + version: layer_9 +pretrained_model: gpt2-xl +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_0.yaml b/configs/eval/gpt2/AttentiveProbe/layer_0.yaml new file mode 100644 index 0000000..67f41a0 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_0.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_0 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_1.yaml b/configs/eval/gpt2/AttentiveProbe/layer_1.yaml new file mode 100644 index 0000000..1e341c0 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_1.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_1 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_10.yaml b/configs/eval/gpt2/AttentiveProbe/layer_10.yaml new file mode 100644 index 0000000..e3e3f62 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_10.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_10 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_11.yaml b/configs/eval/gpt2/AttentiveProbe/layer_11.yaml new file mode 100644 index 0000000..96aa6a3 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_11.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_11 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_12.yaml b/configs/eval/gpt2/AttentiveProbe/layer_12.yaml new file mode 100644 index 0000000..c41030b --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_12.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_12 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_2.yaml b/configs/eval/gpt2/AttentiveProbe/layer_2.yaml new file mode 100644 index 0000000..4f84463 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_2.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_2 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_3.yaml b/configs/eval/gpt2/AttentiveProbe/layer_3.yaml new file mode 100644 index 0000000..0b71914 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_3.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_3 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_4.yaml b/configs/eval/gpt2/AttentiveProbe/layer_4.yaml new file mode 100644 index 0000000..bf52f3e --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_4.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_4 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_5.yaml b/configs/eval/gpt2/AttentiveProbe/layer_5.yaml new file mode 100644 index 0000000..21cf784 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_5.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_5 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_6.yaml b/configs/eval/gpt2/AttentiveProbe/layer_6.yaml new file mode 100644 index 0000000..83b0ba0 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_6.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_6 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_7.yaml b/configs/eval/gpt2/AttentiveProbe/layer_7.yaml new file mode 100644 index 0000000..5f1b1e1 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_7.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_7 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_8.yaml b/configs/eval/gpt2/AttentiveProbe/layer_8.yaml new file mode 100644 index 0000000..292104c --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_8.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_8 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/AttentiveProbe/layer_9.yaml b/configs/eval/gpt2/AttentiveProbe/layer_9.yaml new file mode 100644 index 0000000..27a1c59 --- /dev/null +++ b/configs/eval/gpt2/AttentiveProbe/layer_9.yaml @@ -0,0 +1,60 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 30 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 30 + dry_run: false + shuffle: true + valid: + batch_size: 30 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_9 +pretrained_model: gpt2 +probe_params: + add_root: true + continuous: true + data_sources: + - action_ids + - continuous_action_masks + - gold_tuples + emb_size: 100 + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: AttentiveProbe + probe_type: AttentiveProbe + reverse: true + rnn_type: GRU + state_size: 100 +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_0.yaml b/configs/eval/gpt2/Geometric_Action/layer_0.yaml new file mode 100644 index 0000000..bef272f --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_0.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_0 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_0/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_1.yaml b/configs/eval/gpt2/Geometric_Action/layer_1.yaml new file mode 100644 index 0000000..aff494a --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_1.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_1 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_1/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_10.yaml b/configs/eval/gpt2/Geometric_Action/layer_10.yaml new file mode 100644 index 0000000..b3ad87b --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_10.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_10 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_10/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_11.yaml b/configs/eval/gpt2/Geometric_Action/layer_11.yaml new file mode 100644 index 0000000..c2fc235 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_11.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_11 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_11/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_12.yaml b/configs/eval/gpt2/Geometric_Action/layer_12.yaml new file mode 100644 index 0000000..7e6befe --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_12.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_12 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_12/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_2.yaml b/configs/eval/gpt2/Geometric_Action/layer_2.yaml new file mode 100644 index 0000000..e420bc8 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_2.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_2 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_2/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_3.yaml b/configs/eval/gpt2/Geometric_Action/layer_3.yaml new file mode 100644 index 0000000..ad8532d --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_3.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_3 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_3/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_4.yaml b/configs/eval/gpt2/Geometric_Action/layer_4.yaml new file mode 100644 index 0000000..459531f --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_4.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_4 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_4/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_5.yaml b/configs/eval/gpt2/Geometric_Action/layer_5.yaml new file mode 100644 index 0000000..86e5373 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_5.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_5 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_5/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_6.yaml b/configs/eval/gpt2/Geometric_Action/layer_6.yaml new file mode 100644 index 0000000..7f93343 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_6.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_6 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_6/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_7.yaml b/configs/eval/gpt2/Geometric_Action/layer_7.yaml new file mode 100644 index 0000000..8ca0bb7 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_7.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_7 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_7/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_8.yaml b/configs/eval/gpt2/Geometric_Action/layer_8.yaml new file mode 100644 index 0000000..455792e --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_8.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_8 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_8/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Action/layer_9.yaml b/configs/eval/gpt2/Geometric_Action/layer_9.yaml new file mode 100644 index 0000000..a961423 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Action/layer_9.yaml @@ -0,0 +1,63 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 1.0e-05 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_9 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: experiment_checkpoints/eval/gpt2/Geometric_Action/layer_9/checkpoints/last.ckpt + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + loss_types: + - oracle_action_nll + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Action + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_0.yaml b/configs/eval/gpt2/Geometric_Regression/layer_0.yaml new file mode 100644 index 0000000..73ad1de --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_0.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_0 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_1.yaml b/configs/eval/gpt2/Geometric_Regression/layer_1.yaml new file mode 100644 index 0000000..8d1daf1 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_1.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_1 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_10.yaml b/configs/eval/gpt2/Geometric_Regression/layer_10.yaml new file mode 100644 index 0000000..fc3e287 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_10.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_10 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_11.yaml b/configs/eval/gpt2/Geometric_Regression/layer_11.yaml new file mode 100644 index 0000000..f03c1e8 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_11.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_11 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_12.yaml b/configs/eval/gpt2/Geometric_Regression/layer_12.yaml new file mode 100644 index 0000000..1f85125 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_12.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_12 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_2.yaml b/configs/eval/gpt2/Geometric_Regression/layer_2.yaml new file mode 100644 index 0000000..25e3ca1 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_2.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_2 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_3.yaml b/configs/eval/gpt2/Geometric_Regression/layer_3.yaml new file mode 100644 index 0000000..dc031d0 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_3.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_3 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_4.yaml b/configs/eval/gpt2/Geometric_Regression/layer_4.yaml new file mode 100644 index 0000000..f497efe --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_4.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_4 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_5.yaml b/configs/eval/gpt2/Geometric_Regression/layer_5.yaml new file mode 100644 index 0000000..4c94c92 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_5.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_5 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_6.yaml b/configs/eval/gpt2/Geometric_Regression/layer_6.yaml new file mode 100644 index 0000000..e509aeb --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_6.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_6 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 0 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_7.yaml b/configs/eval/gpt2/Geometric_Regression/layer_7.yaml new file mode 100644 index 0000000..b976222 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_7.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_7 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_8.yaml b/configs/eval/gpt2/Geometric_Regression/layer_8.yaml new file mode 100644 index 0000000..3000333 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_8.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_8 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/Geometric_Regression/layer_9.yaml b/configs/eval/gpt2/Geometric_Regression/layer_9.yaml new file mode 100644 index 0000000..895e277 --- /dev/null +++ b/configs/eval/gpt2/Geometric_Regression/layer_9.yaml @@ -0,0 +1,64 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_9 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: null + data_sources: + - action_ids + - gold_tuples + - gold_distances + - gold_depths + - xpos + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + loss_types: + - distance_mse + - depth_mse + num_layers: 1 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: Geometric_Regression + probe_type: GeometricProbe + temp: 0.1 + threshold: 1.5 + verbose: false +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 3 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_0.yaml b/configs/eval/gpt2/StackActionProbe/layer_0.yaml new file mode 100644 index 0000000..760e020 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_0.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_0 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 0 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_1.yaml b/configs/eval/gpt2/StackActionProbe/layer_1.yaml new file mode 100644 index 0000000..2b8815c --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_1.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_1 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 1 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_10.yaml b/configs/eval/gpt2/StackActionProbe/layer_10.yaml new file mode 100644 index 0000000..089d9b7 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_10.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_10 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 10 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 9 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_11.yaml b/configs/eval/gpt2/StackActionProbe/layer_11.yaml new file mode 100644 index 0000000..6c8cc3f --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_11.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_11 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 11 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 7 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_12.yaml b/configs/eval/gpt2/StackActionProbe/layer_12.yaml new file mode 100644 index 0000000..cc4385e --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_12.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_12 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 12 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_2.yaml b/configs/eval/gpt2/StackActionProbe/layer_2.yaml new file mode 100644 index 0000000..b38d540 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_2.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_2 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 2 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 4 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_3.yaml b/configs/eval/gpt2/StackActionProbe/layer_3.yaml new file mode 100644 index 0000000..8574323 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_3.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_3 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 3 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_4.yaml b/configs/eval/gpt2/StackActionProbe/layer_4.yaml new file mode 100644 index 0000000..a1bcad5 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_4.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_4 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 4 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_5.yaml b/configs/eval/gpt2/StackActionProbe/layer_5.yaml new file mode 100644 index 0000000..f3341a2 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_5.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_5 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 5 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 5 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_6.yaml b/configs/eval/gpt2/StackActionProbe/layer_6.yaml new file mode 100644 index 0000000..507782d --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_6.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_6 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 6 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 1 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_7.yaml b/configs/eval/gpt2/StackActionProbe/layer_7.yaml new file mode 100644 index 0000000..b17a8e1 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_7.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_7 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 7 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 2 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_8.yaml b/configs/eval/gpt2/StackActionProbe/layer_8.yaml new file mode 100644 index 0000000..c43f258 --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_8.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_8 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 8 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 6 + max_epochs: 25 diff --git a/configs/eval/gpt2/StackActionProbe/layer_9.yaml b/configs/eval/gpt2/StackActionProbe/layer_9.yaml new file mode 100644 index 0000000..9c3853d --- /dev/null +++ b/configs/eval/gpt2/StackActionProbe/layer_9.yaml @@ -0,0 +1,55 @@ +cuda: true +data_params: + action_ngram_pad: 40 + action_pad: 400 + num_workers: 4 + pin_memory: false + root_dir: data + test: + batch_size: 15 + dry_run: false + shuffle: true + token_pad: 150 + train: + batch_size: 15 + dry_run: false + shuffle: true + valid: + batch_size: 15 + dry_run: false + shuffle: false +device: cuda +exp_params: + manual_seed: 1265 + optimizer_params: + lr: 0.001 + optimizer_type: Adam + scheduler_params: + factor: 0.1 + mode: min + patience: 0 + scheduler_type: ReduceLROnPlateau +logging_params: + save_dir: ./experiment_checkpoints/eval/gpt2/ + version: layer_9 +pretrained_model: gpt2 +probe_params: + add_root: true + checkpoint_path: false + data_sources: + - action_ids + - gold_tuples + embeddings_dropout_rate: 0 + layer: 9 + layer_dropout_rate: 0.2 + num_layers: 3 + oracle_params: + mappings_file: data/mappings-ptb.txt + name: ArcStandard + probe_name: StackActionProbe + probe_type: StackActionProbe +trainer_params: + accumulate_grad_batches: 1 + gpus: + - 8 + max_epochs: 25 diff --git a/data/mappings-ptb.txt b/data/mappings-ptb.txt new file mode 100644 index 0000000..c6dd34f --- /dev/null +++ b/data/mappings-ptb.txt @@ -0,0 +1,109 @@ +::rel +acomp +advcl +advmod +amod +appos +aux +auxpass +cc +ccomp +conj +cop +csubj +csubjpass +dep +det +discourse +dobj +expl +iobj +mark +mwe +neg +nn +npadvmod +nsubj +nsubjpass +num +number +parataxis +pcomp +pobj +poss +possessive +preconj +predet +prep +prt +punct +quantmod +rcmod +root +tmod +vmod +xcomp +::pos +ADJ +ADP +ADV +AUX +CCONJ +DET +INTJ +NOUN +NUM +PART +PRON +PROPN +PUNCT +SCONJ +SYM +VERB +X +::fpos +# +$ +'' +, +-LRB- +-RRB- +. +: +CC +CD +DT +EX +FW +IN +JJ +JJR +JJS +LS +MD +NN +NNP +NNPS +NNS +PDT +POS +PRP +PRP$ +RB +RBR +RBS +RP +SYM +TO +UH +VB +VBD +VBG +VBN +VBP +VBZ +WDT +WP +WP$ +WRB +`` diff --git a/data/npz_experiment/README.md b/data/npz_experiment/README.md new file mode 100644 index 0000000..2b1e496 --- /dev/null +++ b/data/npz_experiment/README.md @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/data/npz_experiment/npz.csv b/data/npz_experiment/npz.csv new file mode 100644 index 0000000..d95c917 --- /dev/null +++ b/data/npz_experiment/npz.csv @@ -0,0 +1,53 @@ +prefix,continuation_1,head_idxs_1,continuation_2,head_idxs_2,both,neither,transitive +Although the band left the party,I stayed .,"[4,3,4,8,6,4,8,0]",went on .,"[4,3,4,7,6,7,0,7]",raged on for,.,True +As the criminal shot the woman,I shouted .,"[4,3,4,8,6,4,8,0]",fell down .,"[4,3,4,7,6,7,0,7]",on the porch,.,True +When the dog bit the doctor,I laughed .,"[4,3,4,8,6,4,8,0]",ran away .,"[4,3,4,7,6,7,0,7]",who was walking,.,True +As the ship crossed the waters,we slept .,"[4,3,4,8,6,4,8,0]",remained calm .,"[4,3,4,7,6,7,0,7]",which were calm,.,True +After the newcomers asked the soldiers,we marched .,"[4,3,4,8,6,4,8,0]",shared food .,"[4,3,4,7,6,7,0,7]",in the camp,.,True +Though the athlete telephoned the coach,we practiced .,"[4,3,4,8,6,4,8,0]",rejected him .,"[4,3,4,7,6,7,0,7]",of the team,.,True +While the crowd appluaded the actor,I left .,"[4,3,4,8,6,4,8,0]",sat down .,"[4,3,4,7,6,7,0,7]",who danced on,.,True +While the audience cheered the actor,we left .,"[4,3,4,8,6,4,8,0]",continued performing .,"[4,3,4,7,6,7,0,7]",who sang the,.,True +While the students ate the food,I starved .,"[4,3,4,8,6,4,8,0]",became cold .,"[4,3,4,7,6,7,0,7]",from the grocery,.,True +When the professor taught the visitors,I listened .,"[4,3,4,8,6,4,8,0]",lost interest .,"[4,3,4,7,6,7,0,7]",who were in,.,True +Because the baby grabbed the woman,we cheered .,"[4,3,4,8,6,4,8,0]",stayed longer .,"[4,3,4,7,6,7,0,7]",who was near,.,True +Because the manager applauded the comedian,we laughed .,"[4,3,4,8,6,4,8,0]",spoke more .,"[4,3,4,7,6,7,0,7]",who just performed,.,True +After the patient asked the nurse,I cried .,"[4,3,4,8,6,4,8,0]",told her .,"[4,3,4,7,6,7,0,7]",in the hospital,.,True +After the guard visited the children,we visited .,"[4,3,4,8,6,4,8,0]",played more .,"[4,3,4,7,6,7,0,7]",who were playing,.,True +After the dog signaled the farmer,I worked .,"[4,3,4,8,6,4,8,0]",went out .,"[4,3,4,7,6,7,0,7]",who was focused,.,True +As the students considered the teacher,I arrived .,"[4,3,4,8,6,4,8,0]",asked questions .,"[4,3,4,7,6,7,0,7]",who was speaking,.,True +While the woman decorated the pot,we watched .,"[4,3,4,8,6,4,8,0]",sat still .,"[4,3,4,7,6,7,0,7]",which was made,.,True +When the customer interrupted the manager,we watched .,"[4,3,4,8,6,4,8,0]",responded calmly .,"[4,3,4,7,6,7,0,7]",of the store,.,True +As the man wiped the pipe,I watched .,"[4,3,4,8,6,4,8,0]",blew smoke .,"[4,3,4,7,6,7,0,7]",made of wood,.,True +While the prisoners watched the guards,we escaped .,"[4,3,4,8,6,4,8,0]",threatened them .,"[4,3,4,7,6,7,0,7]",who were working,.,True +While the king governed his subjects,I left .,"[4,3,4,8,6,4,8,0]",feared him .,"[4,3,4,7,6,7,0,7]",who were loyal,.,True +After the host interrupted the discussion,I spoke .,"[4,3,4,8,6,4,8,0]",became interesting .,"[4,3,4,7,6,7,0,7]",about the show,.,True +After the woman dressed her children,we departed .,"[4,3,4,8,6,4,8,0]",played games .,"[4,3,4,7,6,7,0,7]",who were waiting,.,True +After the soldier signaled the doctor,I helped .,"[4,3,4,8,6,4,8,0]",stopped operating .,"[4,3,4,7,6,7,0,7]",in the hospital,.,True +As the guards stopped the thieves,we relaxed .,"[4,3,4,8,6,4,8,0]",stole more .,"[4,3,4,7,6,7,0,7]",who were running,.,True + +Although the band performed the party,I stayed .,"[4,3,4,8,6,4,8,0]",went on .,"[4,3,4,7,6,7,0,7]",raged on for,.,False +As the criminal fled the woman,I shouted .,"[4,3,4,8,6,4,8,0]",fell down .,"[4,3,4,7,6,7,0,7]",on the porch,.,False +When the dog struggled the doctor,I laughed .,"[4,3,4,8,6,4,8,0]",ran away .,"[4,3,4,7,6,7,0,7]",who was walking,.,False +As the ship drifted the waters,we slept .,"[4,3,4,8,6,4,8,0]",remained calm .,"[4,3,4,7,6,7,0,7]",which were calm,.,False +After the newcomers negotiated the soldiers,we marched .,"[4,3,4,8,6,4,8,0]",shared food .,"[4,3,4,7,6,7,0,7]",in the camp,.,False +Though the athlete complained the coach,we practiced .,"[4,3,4,8,6,4,8,0]",rejected him .,"[4,3,4,7,6,7,0,7]",of the team,.,False +While the crowd yelled the actor,I left .,"[4,3,4,8,6,4,8,0]",sat down .,"[4,3,4,7,6,7,0,7]",who danced on,.,False +While the audience arrived the actor,we left .,"[4,3,4,8,6,4,8,0]",continued performing .,"[4,3,4,7,6,7,0,7]",who sang the,.,False +While the students talked the food,I starved .,"[4,3,4,8,6,4,8,0]",became cold .,"[4,3,4,7,6,7,0,7]",from the grocery,.,False +When the professor talked the visitors,I listened .,"[4,3,4,8,6,4,8,0]",lost interest .,"[4,3,4,7,6,7,0,7]",who were in,.,False +Because the baby yelled the woman,we cheered .,"[4,3,4,8,6,4,8,0]",stayed longer .,"[4,3,4,7,6,7,0,7]",who was near,.,False +Because the manager departed the comedian,we laughed .,"[4,3,4,8,6,4,8,0]",spoke more .,"[4,3,4,7,6,7,0,7]",who just performed,.,False +After the patient arrived the nurse,I cried .,"[4,3,4,8,6,4,8,0]",told her .,"[4,3,4,7,6,7,0,7]",in the hospital,.,False +After the guard departed the children,we visited .,"[4,3,4,8,6,4,8,0]",played more .,"[4,3,4,7,6,7,0,7]",who were playing,.,False +After the dog struggled the farmer,I worked .,"[4,3,4,8,6,4,8,0]",went out .,"[4,3,4,7,6,7,0,7]",who was focused,.,False +As the students slept the teacher,I arrived .,"[4,3,4,8,6,4,8,0]",asked questions .,"[4,3,4,7,6,7,0,7]",who was speaking,.,False +While the woman slept the pot,we watched .,"[4,3,4,8,6,4,8,0]",sat still .,"[4,3,4,7,6,7,0,7]",which was made,.,False +When the customer complained the manager,we watched .,"[4,3,4,8,6,4,8,0]",responded calmly .,"[4,3,4,7,6,7,0,7]",of the store,.,False +As the man slept the pipe,I watched .,"[4,3,4,8,6,4,8,0]",blew smoke .,"[4,3,4,7,6,7,0,7]",made of wood,.,False + +While the prisoners looked the guards,we escaped .,"[4,3,4,8,6,4,8,0]",threatened them .,"[4,3,4,7,6,7,0,7]",who were working,.,False +While the king yelled his subjects,I left .,"[4,3,4,8,6,4,8,0]",feared him .,"[4,3,4,7,6,7,0,7]",who were loyal,.,False +After the host yelled the discussion,I spoke .,"[4,3,4,8,6,4,8,0]",became interesting .,"[4,3,4,7,6,7,0,7]",about the show,.,False +After the woman departed her children,we departed .,"[4,3,4,8,6,4,8,0]",played games .,"[4,3,4,7,6,7,0,7]",who were waiting,.,False +After the soldier complained the doctor,I helped .,"[4,3,4,8,6,4,8,0]",stopped operating .,"[4,3,4,7,6,7,0,7]",in the hospital,.,False +As the guards slept the thieves,we relaxed .,"[4,3,4,8,6,4,8,0]",stole more .,"[4,3,4,7,6,7,0,7]",who were running,.,False \ No newline at end of file diff --git a/data/test.json b/data/test.json new file mode 100644 index 0000000..1d01c1d --- /dev/null +++ b/data/test.json @@ -0,0 +1,20 @@ +{"orig_tokens": ["No", ",", "it", "was", "n't", "Black", "Monday", "."], "tokens": ["", ",", "it", "was", "n't", "", "", "."], "token_ids": [7, 45, 71, 56, 126, 7, 7, 62], "tags": ["RB", ",", "PRP", "VBD", "RB", "NNP", "NNP", "."], "tree_str": "(S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [5, 4, 3, 2, 1, 0], [6, 5, 4, 3, 2, 1, 0], [7, 6, 5, 4, 3, 2, 1, 0], [7, 5, 4, 3, 2, 1, 0], [7, 4, 3, 2, 1, 0], [7, 3, 2, 1, 0], [7, 2, 1, 0], [7, 1, 0], [7, 0], [8, 7, 0], [7, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8], [2, 3, 4, 5, 6, 7, 8], [3, 4, 5, 6, 7, 8], [4, 5, 6, 7, 8], [5, 6, 7, 8], [6, 7, 8], [7, 8], [8], [8], [8], [8], [8], [8], [8], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 21], [1, 10], [1, 24], [1, 37], [1, 15], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [0, 4, 3], [0, 5, 4], [0, 6, 5], [1, 7, 6], [1, 7, 5], [1, 7, 4], [1, 7, 3], [1, 7, 2], [1, 7, 1], [0, 7, 0], [2, 8, 7], [2, 7, 0]]}} +{"orig_tokens": ["But", "while", "the", "New", "York", "Stock", "Exchange", "did", "n't", "fall", "apart", "Friday", "as", "the", "Dow", "Jones", "Industrial", "Average", "plunged", "190.58", "points", "--", "most", "of", "it", "in", "the", "final", "hour", "--", "it", "barely", "managed", "to", "stay", "this", "side", "of", "chaos", "."], "tokens": ["But", "", "the", "", "", "", "Exchange", "did", "n't", "", "", "", "as", "the", "", "", "", "", "", "", "", "--", "", "of", "it", "in", "the", "", "", "--", "it", "", "", "to", "", "this", "", "of", "", "."], "token_ids": [266, 16, 40, 7, 7, 7, 143, 255, 126, 16, 16, 7, 168, 40, 7, 5, 16, 7, 3, 10, 6, 216, 16, 26, 71, 42, 40, 20, 16, 216, 71, 2, 3, 59, 16, 185, 16, 26, 6, 62], "tags": ["CC", "IN", "DT", "NNP", "NNP", "NNP", "NNP", "VBD", "RB", "VB", "RB", "NNP", "IN", "DT", "NNP", "NNP", "NNP", "NNP", "VBD", "CD", "NNS", ":", "JJS", "IN", "PRP", "IN", "DT", "JJ", "NN", ":", "PRP", "RB", "VBD", "TO", "VB", "DT", "NN", "IN", "NN", "."], "tree_str": "(S (CC But) (SBAR (IN while) (S (NP (DT the) (NNP New) (NNP York) (NNP Stock) (NNP Exchange)) (VP (VBD did) (RB n't) (VP (VB fall) (ADVP (RB apart)) (NP (NNP Friday)) (SBAR (IN as) (S (NP (DT the) (NNP Dow) (NNP Jones) (NNP Industrial) (NNP Average)) (VP (VBD plunged) (NP (NP (CD 190.58) (NNS points)) (PRN (: --) (NP (NP (JJS most)) (PP (IN of) (NP (PRP it))) (PP (IN in) (NP (DT the) (JJ final) (NN hour)))) (: --)))))))))) (NP (PRP it)) (ADVP (RB barely)) (VP (VBD managed) (S (VP (TO to) (VP (VB stay) (NP (NP (DT this) (NN side)) (PP (IN of) (NP (NN chaos)))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [5, 4, 3, 2, 1, 0], [6, 5, 4, 3, 2, 1, 0], [7, 6, 5, 4, 3, 2, 1, 0], [7, 5, 4, 3, 2, 1, 0], [7, 4, 3, 2, 1, 0], [7, 3, 2, 1, 0], [7, 2, 1, 0], [8, 7, 2, 1, 0], [9, 8, 7, 2, 1, 0], [10, 9, 8, 7, 2, 1, 0], [10, 8, 7, 2, 1, 0], [10, 7, 2, 1, 0], [10, 2, 1, 0], [10, 1, 0], [11, 10, 1, 0], [10, 1, 0], [12, 10, 1, 0], [10, 1, 0], [13, 10, 1, 0], [14, 13, 10, 1, 0], [15, 14, 13, 10, 1, 0], [16, 15, 14, 13, 10, 1, 0], [17, 16, 15, 14, 13, 10, 1, 0], [18, 17, 16, 15, 14, 13, 10, 1, 0], [18, 16, 15, 14, 13, 10, 1, 0], [18, 15, 14, 13, 10, 1, 0], [18, 14, 13, 10, 1, 0], [18, 13, 10, 1, 0], [19, 18, 13, 10, 1, 0], [19, 13, 10, 1, 0], [19, 10, 1, 0], [20, 19, 10, 1, 0], [21, 20, 19, 10, 1, 0], [21, 19, 10, 1, 0], [22, 21, 19, 10, 1, 0], [23, 22, 21, 19, 10, 1, 0], [23, 21, 19, 10, 1, 0], [24, 23, 21, 19, 10, 1, 0], [25, 24, 23, 21, 19, 10, 1, 0], [24, 23, 21, 19, 10, 1, 0], [23, 21, 19, 10, 1, 0], [26, 23, 21, 19, 10, 1, 0], [27, 26, 23, 21, 19, 10, 1, 0], [28, 27, 26, 23, 21, 19, 10, 1, 0], [29, 28, 27, 26, 23, 21, 19, 10, 1, 0], [29, 27, 26, 23, 21, 19, 10, 1, 0], [29, 26, 23, 21, 19, 10, 1, 0], [26, 23, 21, 19, 10, 1, 0], [23, 21, 19, 10, 1, 0], [30, 23, 21, 19, 10, 1, 0], [23, 21, 19, 10, 1, 0], [21, 19, 10, 1, 0], [19, 10, 1, 0], [10, 1, 0], [31, 10, 1, 0], [32, 31, 10, 1, 0], [33, 32, 31, 10, 1, 0], [33, 31, 10, 1, 0], [33, 10, 1, 0], [33, 1, 0], [33, 0], [34, 33, 0], [35, 34, 33, 0], [35, 33, 0], [36, 35, 33, 0], [37, 36, 35, 33, 0], [37, 35, 33, 0], [38, 37, 35, 33, 0], [39, 38, 37, 35, 33, 0], [38, 37, 35, 33, 0], [37, 35, 33, 0], [35, 33, 0], [33, 0], [40, 33, 0], [33, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40], [32, 33, 34, 35, 36, 37, 38, 39, 40], [33, 34, 35, 36, 37, 38, 39, 40], [34, 35, 36, 37, 38, 39, 40], [34, 35, 36, 37, 38, 39, 40], [34, 35, 36, 37, 38, 39, 40], [34, 35, 36, 37, 38, 39, 40], [34, 35, 36, 37, 38, 39, 40], [35, 36, 37, 38, 39, 40], [36, 37, 38, 39, 40], [36, 37, 38, 39, 40], [37, 38, 39, 40], [38, 39, 40], [38, 39, 40], [39, 40], [40], [40], [40], [40], [40], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 22], [1, 14], [0, -1], [0, -1], [0, -1], [1, 21], [1, 5], [1, 24], [1, 19], [0, -1], [2, 2], [0, -1], [2, 41], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 22], [1, 14], [0, -1], [1, 24], [1, 19], [0, -1], [0, -1], [1, 26], [0, -1], [0, -1], [1, 37], [0, -1], [0, -1], [2, 30], [2, 35], [0, -1], [0, -1], [0, -1], [0, -1], [1, 3], [1, 14], [2, 30], [2, 35], [0, -1], [2, 37], [2, 13], [2, 16], [2, 1], [0, -1], [0, -1], [0, -1], [1, 2], [1, 24], [1, 1], [1, 7], [0, -1], [0, -1], [1, 5], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [2, 30], [2, 35], [2, 16], [2, 43], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [0, 4, 3], [0, 5, 4], [0, 6, 5], [1, 7, 6], [1, 7, 5], [1, 7, 4], [1, 7, 3], [0, 7, 2], [0, 8, 7], [0, 9, 8], [1, 10, 9], [1, 10, 8], [1, 10, 7], [1, 10, 2], [0, 10, 1], [2, 11, 10], [0, 10, 1], [2, 12, 10], [0, 10, 1], [0, 13, 10], [0, 14, 13], [0, 15, 14], [0, 16, 15], [0, 17, 16], [1, 18, 17], [1, 18, 16], [1, 18, 15], [1, 18, 14], [0, 18, 13], [1, 19, 18], [1, 19, 13], [0, 19, 10], [0, 20, 19], [1, 21, 20], [0, 21, 19], [0, 22, 21], [1, 23, 22], [0, 23, 21], [0, 24, 23], [2, 25, 24], [2, 24, 23], [0, 23, 21], [0, 26, 23], [0, 27, 26], [0, 28, 27], [1, 29, 28], [1, 29, 27], [2, 29, 26], [2, 26, 23], [0, 23, 21], [2, 30, 23], [2, 23, 21], [2, 21, 19], [2, 19, 10], [0, 10, 1], [0, 31, 10], [0, 32, 31], [1, 33, 32], [1, 33, 31], [1, 33, 10], [1, 33, 1], [0, 33, 0], [0, 34, 33], [1, 35, 34], [0, 35, 33], [0, 36, 35], [1, 37, 36], [0, 37, 35], [0, 38, 37], [2, 39, 38], [2, 38, 37], [2, 37, 35], [2, 35, 33], [0, 33, 0], [2, 40, 33], [2, 33, 0]]}} +{"orig_tokens": ["Some", "``", "circuit", "breakers", "''", "installed", "after", "the", "October", "1987", "crash", "failed", "their", "first", "test", ",", "traders", "say", ",", "unable", "to", "cool", "the", "selling", "panic", "in", "both", "stocks", "and", "futures", "."], "tokens": ["", "``", "", "", "''", "", "after", "the", "", "1987", "crash", "", "their", "", "", ",", "", "", ",", "", "to", "", "the", "", "", "in", "", "stocks", "and", "", "."], "token_ids": [7, 27, 16, 6, 30, 3, 264, 40, 19, 213, 215, 3, 147, 16, 16, 45, 6, 16, 45, 4, 59, 16, 40, 17, 16, 42, 16, 231, 92, 6, 62], "tags": ["DT", "``", "NN", "NNS", "''", "VBN", "IN", "DT", "NNP", "CD", "NN", "VBD", "PRP$", "JJ", "NN", ",", "NNS", "VBP", ",", "JJ", "TO", "VB", "DT", "NN", "NN", "IN", "DT", "NNS", "CC", "NNS", "."], "tree_str": "(S (NP (NP (DT Some) (`` ``) (NN circuit) (NNS breakers) ('' '')) (VP (VBN installed) (PP (IN after) (NP (DT the) (NNP October) (CD 1987) (NN crash))))) (VP (VBD failed) (NP (PRP$ their) (JJ first) (NN test)) (PRN (, ,) (S (NP (NNS traders)) (VP (VBP say))) (, ,)) (S (ADJP (JJ unable) (S (VP (TO to) (VP (VB cool) (NP (NP (DT the) (NN selling) (NN panic)) (PP (IN in) (NP (DT both) (NNS stocks) (CC and) (NNS futures)))))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [4, 2, 1, 0], [4, 1, 0], [4, 0], [5, 4, 0], [4, 0], [6, 4, 0], [7, 6, 4, 0], [8, 7, 6, 4, 0], [9, 8, 7, 6, 4, 0], [10, 9, 8, 7, 6, 4, 0], [11, 10, 9, 8, 7, 6, 4, 0], [11, 9, 8, 7, 6, 4, 0], [11, 8, 7, 6, 4, 0], [11, 7, 6, 4, 0], [7, 6, 4, 0], [6, 4, 0], [4, 0], [12, 4, 0], [12, 0], [13, 12, 0], [14, 13, 12, 0], [15, 14, 13, 12, 0], [15, 13, 12, 0], [15, 12, 0], [12, 0], [16, 12, 0], [17, 16, 12, 0], [18, 17, 16, 12, 0], [18, 16, 12, 0], [18, 12, 0], [19, 18, 12, 0], [18, 12, 0], [12, 0], [20, 12, 0], [21, 20, 12, 0], [22, 21, 20, 12, 0], [22, 20, 12, 0], [23, 22, 20, 12, 0], [24, 23, 22, 20, 12, 0], [25, 24, 23, 22, 20, 12, 0], [25, 23, 22, 20, 12, 0], [25, 22, 20, 12, 0], [26, 25, 22, 20, 12, 0], [27, 26, 25, 22, 20, 12, 0], [28, 27, 26, 25, 22, 20, 12, 0], [28, 26, 25, 22, 20, 12, 0], [29, 28, 26, 25, 22, 20, 12, 0], [28, 26, 25, 22, 20, 12, 0], [30, 28, 26, 25, 22, 20, 12, 0], [28, 26, 25, 22, 20, 12, 0], [26, 25, 22, 20, 12, 0], [25, 22, 20, 12, 0], [22, 20, 12, 0], [20, 12, 0], [12, 0], [31, 12, 0], [12, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [23, 24, 25, 26, 27, 28, 29, 30, 31], [23, 24, 25, 26, 27, 28, 29, 30, 31], [24, 25, 26, 27, 28, 29, 30, 31], [25, 26, 27, 28, 29, 30, 31], [26, 27, 28, 29, 30, 31], [26, 27, 28, 29, 30, 31], [26, 27, 28, 29, 30, 31], [27, 28, 29, 30, 31], [28, 29, 30, 31], [29, 30, 31], [29, 30, 31], [30, 31], [30, 31], [31], [31], [31], [31], [31], [31], [31], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 37], [1, 14], [0, -1], [2, 37], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 26], [1, 22], [1, 14], [2, 30], [2, 35], [2, 42], [0, -1], [1, 24], [0, -1], [0, -1], [0, -1], [1, 3], [1, 31], [2, 16], [0, -1], [0, -1], [0, -1], [1, 24], [1, 37], [0, -1], [2, 37], [2, 28], [0, -1], [0, -1], [0, -1], [1, 5], [0, -1], [0, -1], [0, -1], [1, 22], [1, 14], [0, -1], [0, -1], [0, -1], [1, 33], [0, -1], [2, 7], [0, -1], [2, 9], [2, 30], [2, 35], [2, 16], [2, 43], [2, 13], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [1, 4, 3], [1, 4, 2], [1, 4, 1], [0, 4, 0], [2, 5, 4], [0, 4, 0], [0, 6, 4], [0, 7, 6], [0, 8, 7], [0, 9, 8], [0, 10, 9], [1, 11, 10], [1, 11, 9], [1, 11, 8], [2, 11, 7], [2, 7, 6], [2, 6, 4], [0, 4, 0], [1, 12, 4], [0, 12, 0], [0, 13, 12], [0, 14, 13], [1, 15, 14], [1, 15, 13], [2, 15, 12], [0, 12, 0], [0, 16, 12], [0, 17, 16], [1, 18, 17], [1, 18, 16], [0, 18, 12], [2, 19, 18], [2, 18, 12], [0, 12, 0], [0, 20, 12], [0, 21, 20], [1, 22, 21], [0, 22, 20], [0, 23, 22], [0, 24, 23], [1, 25, 24], [1, 25, 23], [0, 25, 22], [0, 26, 25], [0, 27, 26], [1, 28, 27], [0, 28, 26], [2, 29, 28], [0, 28, 26], [2, 30, 28], [2, 28, 26], [2, 26, 25], [2, 25, 22], [2, 22, 20], [2, 20, 12], [0, 12, 0], [2, 31, 12], [2, 12, 0]]}} +{"orig_tokens": ["The", "49", "stock", "specialist", "firms", "on", "the", "Big", "Board", "floor", "--", "the", "buyers", "and", "sellers", "of", "last", "resort", "who", "were", "criticized", "after", "the", "1987", "crash", "--", "once", "again", "could", "n't", "handle", "the", "selling", "pressure", "."], "tokens": ["The", "", "stock", "", "", "on", "the", "", "", "", "--", "the", "", "and", "", "of", "last", "", "who", "were", "", "after", "the", "1987", "crash", "--", "", "", "", "n't", "", "the", "", "", "."], "token_ids": [28, 10, 150, 16, 6, 165, 40, 7, 7, 16, 216, 40, 6, 92, 6, 26, 85, 16, 280, 250, 3, 264, 40, 213, 215, 216, 16, 16, 16, 126, 16, 40, 17, 16, 62], "tags": ["DT", "CD", "NN", "NN", "NNS", "IN", "DT", "NNP", "NNP", "NN", ":", "DT", "NNS", "CC", "NNS", "IN", "JJ", "NN", "WP", "VBD", "VBN", "IN", "DT", "CD", "NN", ":", "RB", "RB", "MD", "RB", "VB", "DT", "NN", "NN", "."], "tree_str": "(S (NP (NP (NP (DT The) (CD 49) (NN stock) (NN specialist) (NNS firms)) (PP (IN on) (NP (DT the) (NNP Big) (NNP Board) (NN floor)))) (: --) (NP (NP (DT the) (NNS buyers) (CC and) (NNS sellers)) (PP (IN of) (NP (JJ last) (NN resort))) (SBAR (WHNP (WP who)) (S (VP (VBD were) (VP (VBN criticized) (PP (IN after) (NP (DT the) (CD 1987) (NN crash)))))))) (: --)) (ADVP (RB once) (RB again)) (VP (MD could) (RB n't) (VP (VB handle) (NP (DT the) (NN selling) (NN pressure)))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [5, 4, 3, 2, 1, 0], [5, 3, 2, 1, 0], [5, 2, 1, 0], [5, 1, 0], [5, 0], [6, 5, 0], [7, 6, 5, 0], [8, 7, 6, 5, 0], [9, 8, 7, 6, 5, 0], [10, 9, 8, 7, 6, 5, 0], [10, 8, 7, 6, 5, 0], [10, 7, 6, 5, 0], [10, 6, 5, 0], [6, 5, 0], [5, 0], [11, 5, 0], [5, 0], [12, 5, 0], [13, 12, 5, 0], [13, 5, 0], [14, 13, 5, 0], [13, 5, 0], [15, 13, 5, 0], [13, 5, 0], [16, 13, 5, 0], [17, 16, 13, 5, 0], [18, 17, 16, 13, 5, 0], [18, 16, 13, 5, 0], [16, 13, 5, 0], [13, 5, 0], [19, 13, 5, 0], [20, 19, 13, 5, 0], [21, 20, 19, 13, 5, 0], [21, 19, 13, 5, 0], [21, 13, 5, 0], [22, 21, 13, 5, 0], [23, 22, 21, 13, 5, 0], [24, 23, 22, 21, 13, 5, 0], [25, 24, 23, 22, 21, 13, 5, 0], [25, 23, 22, 21, 13, 5, 0], [25, 22, 21, 13, 5, 0], [22, 21, 13, 5, 0], [21, 13, 5, 0], [13, 5, 0], [5, 0], [26, 5, 0], [5, 0], [27, 5, 0], [28, 27, 5, 0], [28, 5, 0], [29, 28, 5, 0], [30, 29, 28, 5, 0], [31, 30, 29, 28, 5, 0], [31, 29, 28, 5, 0], [31, 28, 5, 0], [31, 5, 0], [31, 0], [32, 31, 0], [33, 32, 31, 0], [34, 33, 32, 31, 0], [34, 32, 31, 0], [34, 31, 0], [31, 0], [35, 31, 0], [31, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [27, 28, 29, 30, 31, 32, 33, 34, 35], [27, 28, 29, 30, 31, 32, 33, 34, 35], [28, 29, 30, 31, 32, 33, 34, 35], [29, 30, 31, 32, 33, 34, 35], [29, 30, 31, 32, 33, 34, 35], [30, 31, 32, 33, 34, 35], [31, 32, 33, 34, 35], [32, 33, 34, 35], [32, 33, 34, 35], [32, 33, 34, 35], [32, 33, 34, 35], [32, 33, 34, 35], [33, 34, 35], [34, 35], [35], [35], [35], [35], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 26], [1, 14], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 14], [2, 30], [2, 35], [0, -1], [2, 37], [0, -1], [0, -1], [1, 14], [0, -1], [2, 7], [0, -1], [2, 9], [0, -1], [0, -1], [0, -1], [1, 3], [2, 30], [2, 35], [0, -1], [0, -1], [0, -1], [1, 6], [1, 25], [0, -1], [0, -1], [0, -1], [0, -1], [1, 26], [1, 14], [2, 30], [2, 35], [2, 39], [2, 13], [0, -1], [2, 37], [0, -1], [0, -1], [1, 2], [0, -1], [0, -1], [0, -1], [1, 21], [1, 5], [1, 2], [1, 24], [0, -1], [0, -1], [0, -1], [1, 22], [1, 14], [2, 16], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [0, 4, 3], [1, 5, 4], [1, 5, 3], [1, 5, 2], [1, 5, 1], [0, 5, 0], [0, 6, 5], [0, 7, 6], [0, 8, 7], [0, 9, 8], [1, 10, 9], [1, 10, 8], [1, 10, 7], [2, 10, 6], [2, 6, 5], [0, 5, 0], [2, 11, 5], [0, 5, 0], [0, 12, 5], [1, 13, 12], [0, 13, 5], [2, 14, 13], [0, 13, 5], [2, 15, 13], [0, 13, 5], [0, 16, 13], [0, 17, 16], [1, 18, 17], [2, 18, 16], [2, 16, 13], [0, 13, 5], [0, 19, 13], [0, 20, 19], [1, 21, 20], [1, 21, 19], [0, 21, 13], [0, 22, 21], [0, 23, 22], [0, 24, 23], [1, 25, 24], [1, 25, 23], [2, 25, 22], [2, 22, 21], [2, 21, 13], [2, 13, 5], [0, 5, 0], [2, 26, 5], [0, 5, 0], [0, 27, 5], [1, 28, 27], [0, 28, 5], [0, 29, 28], [0, 30, 29], [1, 31, 30], [1, 31, 29], [1, 31, 28], [1, 31, 5], [0, 31, 0], [0, 32, 31], [0, 33, 32], [1, 34, 33], [1, 34, 32], [2, 34, 31], [0, 31, 0], [2, 35, 31], [2, 31, 0]]}} +{"orig_tokens": ["Big", "investment", "banks", "refused", "to", "step", "up", "to", "the", "plate", "to", "support", "the", "beleaguered", "floor", "traders", "by", "buying", "big", "blocks", "of", "stock", ",", "traders", "say", "."], "tokens": ["", "", "", "", "to", "", "", "to", "the", "", "to", "", "the", "", "", "", "by", "", "", "", "of", "stock", ",", "", "", "."], "token_ids": [7, 16, 6, 3, 59, 16, 16, 59, 40, 16, 59, 16, 40, 3, 16, 6, 53, 17, 16, 6, 26, 150, 45, 6, 16, 62], "tags": ["JJ", "NN", "NNS", "VBD", "TO", "VB", "IN", "TO", "DT", "NN", "TO", "VB", "DT", "JJ", "NN", "NNS", "IN", "VBG", "JJ", "NNS", "IN", "NN", ",", "NNS", "VBP", "."], "tree_str": "(S (S (NP (JJ Big) (NN investment) (NNS banks)) (VP (VBD refused) (S (VP (TO to) (VP (VB step) (ADVP (IN up) (PP (TO to) (NP (DT the) (NN plate)))) (S (VP (TO to) (VP (VB support) (NP (DT the) (JJ beleaguered) (NN floor) (NNS traders)) (PP (IN by) (S (VP (VBG buying) (NP (NP (JJ big) (NNS blocks)) (PP (IN of) (NP (NN stock))))))))))))))) (, ,) (NP (NNS traders)) (VP (VBP say)) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [3, 0], [4, 3, 0], [4, 0], [5, 4, 0], [6, 5, 4, 0], [6, 4, 0], [7, 6, 4, 0], [8, 7, 6, 4, 0], [9, 8, 7, 6, 4, 0], [10, 9, 8, 7, 6, 4, 0], [10, 8, 7, 6, 4, 0], [8, 7, 6, 4, 0], [7, 6, 4, 0], [6, 4, 0], [11, 6, 4, 0], [12, 11, 6, 4, 0], [12, 6, 4, 0], [13, 12, 6, 4, 0], [14, 13, 12, 6, 4, 0], [15, 14, 13, 12, 6, 4, 0], [16, 15, 14, 13, 12, 6, 4, 0], [16, 14, 13, 12, 6, 4, 0], [16, 13, 12, 6, 4, 0], [16, 12, 6, 4, 0], [12, 6, 4, 0], [17, 12, 6, 4, 0], [18, 17, 12, 6, 4, 0], [19, 18, 17, 12, 6, 4, 0], [20, 19, 18, 17, 12, 6, 4, 0], [20, 18, 17, 12, 6, 4, 0], [21, 20, 18, 17, 12, 6, 4, 0], [22, 21, 20, 18, 17, 12, 6, 4, 0], [21, 20, 18, 17, 12, 6, 4, 0], [20, 18, 17, 12, 6, 4, 0], [18, 17, 12, 6, 4, 0], [17, 12, 6, 4, 0], [12, 6, 4, 0], [6, 4, 0], [4, 0], [23, 4, 0], [24, 23, 4, 0], [25, 24, 23, 4, 0], [25, 23, 4, 0], [25, 4, 0], [25, 0], [26, 25, 0], [25, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [18, 19, 20, 21, 22, 23, 24, 25, 26], [19, 20, 21, 22, 23, 24, 25, 26], [20, 21, 22, 23, 24, 25, 26], [21, 22, 23, 24, 25, 26], [21, 22, 23, 24, 25, 26], [22, 23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [23, 24, 25, 26], [24, 25, 26], [25, 26], [26], [26], [26], [26], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [0, -1], [1, 24], [0, -1], [0, -1], [1, 5], [0, -1], [0, -1], [0, -1], [0, -1], [1, 14], [2, 30], [2, 35], [2, 2], [0, -1], [0, -1], [1, 5], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [1, 14], [2, 16], [0, -1], [0, -1], [0, -1], [0, -1], [1, 3], [0, -1], [0, -1], [2, 30], [2, 35], [2, 16], [2, 29], [2, 35], [2, 43], [2, 43], [0, -1], [0, -1], [0, -1], [1, 24], [1, 37], [1, 8], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [1, 3, 1], [0, 3, 0], [1, 4, 3], [0, 4, 0], [0, 5, 4], [1, 6, 5], [0, 6, 4], [0, 7, 6], [0, 8, 7], [0, 9, 8], [1, 10, 9], [2, 10, 8], [2, 8, 7], [2, 7, 6], [0, 6, 4], [0, 11, 6], [1, 12, 11], [0, 12, 6], [0, 13, 12], [0, 14, 13], [0, 15, 14], [1, 16, 15], [1, 16, 14], [1, 16, 13], [2, 16, 12], [0, 12, 6], [0, 17, 12], [0, 18, 17], [0, 19, 18], [1, 20, 19], [0, 20, 18], [0, 21, 20], [2, 22, 21], [2, 21, 20], [2, 20, 18], [2, 18, 17], [2, 17, 12], [2, 12, 6], [2, 6, 4], [0, 4, 0], [0, 23, 4], [0, 24, 23], [1, 25, 24], [1, 25, 23], [1, 25, 4], [0, 25, 0], [2, 26, 25], [2, 25, 0]]}} +{"orig_tokens": ["Heavy", "selling", "of", "Standard", "&", "Poor", "'s", "500-stock", "index", "futures", "in", "Chicago", "relentlessly", "beat", "stocks", "downward", "."], "tokens": ["", "", "of", "", "&", "", "'s", "", "", "", "in", "Chicago", "", "", "stocks", "", "."], "token_ids": [7, 17, 26, 7, 47, 7, 33, 1, 16, 6, 42, 32, 2, 16, 231, 16, 62], "tags": ["JJ", "NN", "IN", "NNP", "CC", "NNP", "POS", "JJ", "NN", "NNS", "IN", "NNP", "RB", "VBD", "NNS", "RB", "."], "tree_str": "(S (NP (NP (JJ Heavy) (NN selling)) (PP (IN of) (NP (NP (NNP Standard) (CC &) (NNP Poor) (POS 's)) (JJ 500-stock) (NN index) (NNS futures))) (PP (IN in) (NP (NNP Chicago)))) (VP (ADVP (RB relentlessly)) (VBD beat) (NP (NNS stocks)) (ADVP (RB downward))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [2, 0], [3, 2, 0], [4, 3, 2, 0], [5, 4, 3, 2, 0], [4, 3, 2, 0], [6, 4, 3, 2, 0], [4, 3, 2, 0], [7, 4, 3, 2, 0], [4, 3, 2, 0], [8, 4, 3, 2, 0], [9, 8, 4, 3, 2, 0], [10, 9, 8, 4, 3, 2, 0], [10, 8, 4, 3, 2, 0], [10, 4, 3, 2, 0], [10, 3, 2, 0], [3, 2, 0], [2, 0], [11, 2, 0], [12, 11, 2, 0], [11, 2, 0], [2, 0], [13, 2, 0], [14, 13, 2, 0], [14, 2, 0], [14, 0], [15, 14, 0], [14, 0], [16, 14, 0], [14, 0], [17, 14, 0], [14, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [9, 10, 11, 12, 13, 14, 15, 16, 17], [10, 11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [12, 13, 14, 15, 16, 17], [13, 14, 15, 16, 17], [13, 14, 15, 16, 17], [13, 14, 15, 16, 17], [14, 15, 16, 17], [15, 16, 17], [15, 16, 17], [15, 16, 17], [16, 17], [16, 17], [17], [17], [], [], []], "actions": [[0, -1], [0, -1], [1, 3], [0, -1], [0, -1], [0, -1], [2, 7], [0, -1], [2, 9], [0, -1], [2, 32], [0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [1, 31], [2, 30], [2, 35], [0, -1], [0, -1], [2, 30], [2, 35], [0, -1], [0, -1], [1, 2], [1, 24], [0, -1], [2, 16], [0, -1], [2, 2], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [1, 2, 1], [0, 2, 0], [0, 3, 2], [0, 4, 3], [2, 5, 4], [0, 4, 3], [2, 6, 4], [0, 4, 3], [2, 7, 4], [0, 4, 3], [0, 8, 4], [0, 9, 8], [1, 10, 9], [1, 10, 8], [1, 10, 4], [2, 10, 3], [2, 3, 2], [0, 2, 0], [0, 11, 2], [2, 12, 11], [2, 11, 2], [0, 2, 0], [0, 13, 2], [1, 14, 13], [1, 14, 2], [0, 14, 0], [2, 15, 14], [0, 14, 0], [2, 16, 14], [0, 14, 0], [2, 17, 14], [2, 14, 0]]}} +{"orig_tokens": ["Seven", "Big", "Board", "stocks", "--", "UAL", ",", "AMR", ",", "BankAmerica", ",", "Walt", "Disney", ",", "Capital", "Cities\\/ABC", ",", "Philip", "Morris", "and", "Pacific", "Telesis", "Group", "--", "stopped", "trading", "and", "never", "resumed", "."], "tokens": ["", "", "", "stocks", "--", "", ",", "", ",", "", ",", "", "", ",", "", "", ",", "", "", "and", "", "", "", "--", "", "", "and", "", "", "."], "token_ids": [7, 7, 7, 231, 216, 7, 45, 7, 45, 7, 45, 7, 7, 45, 16, 7, 45, 7, 5, 92, 7, 5, 7, 216, 3, 17, 92, 13, 3, 62], "tags": ["CD", "NNP", "NNP", "NNS", ":", "NNP", ",", "NNP", ",", "NNP", ",", "NNP", "NNP", ",", "NNP", "NNP", ",", "NNP", "NNP", "CC", "NNP", "NNP", "NNP", ":", "VBD", "VBG", "CC", "RB", "VBD", "."], "tree_str": "(S (NP (NP (CD Seven) (NNP Big) (NNP Board) (NNS stocks)) (: --) (NP (NP (NNP UAL)) (, ,) (NP (NNP AMR)) (, ,) (NP (NNP BankAmerica)) (, ,) (NP (NNP Walt) (NNP Disney)) (, ,) (NP (NNP Capital) (NNP Cities\\/ABC)) (, ,) (NP (NNP Philip) (NNP Morris)) (CC and) (NP (NNP Pacific) (NNP Telesis) (NNP Group))) (: --)) (VP (VP (VBD stopped) (S (VP (VBG trading)))) (CC and) (VP (ADVP (RB never)) (VBD resumed))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [4, 2, 1, 0], [4, 1, 0], [4, 0], [5, 4, 0], [4, 0], [6, 4, 0], [7, 6, 4, 0], [6, 4, 0], [8, 6, 4, 0], [6, 4, 0], [9, 6, 4, 0], [6, 4, 0], [10, 6, 4, 0], [6, 4, 0], [11, 6, 4, 0], [6, 4, 0], [12, 6, 4, 0], [13, 12, 6, 4, 0], [13, 6, 4, 0], [6, 4, 0], [14, 6, 4, 0], [6, 4, 0], [15, 6, 4, 0], [16, 15, 6, 4, 0], [16, 6, 4, 0], [6, 4, 0], [17, 6, 4, 0], [6, 4, 0], [18, 6, 4, 0], [19, 18, 6, 4, 0], [19, 6, 4, 0], [6, 4, 0], [20, 6, 4, 0], [6, 4, 0], [21, 6, 4, 0], [22, 21, 6, 4, 0], [23, 22, 21, 6, 4, 0], [23, 21, 6, 4, 0], [23, 6, 4, 0], [6, 4, 0], [4, 0], [24, 4, 0], [4, 0], [25, 4, 0], [25, 0], [26, 25, 0], [25, 0], [27, 25, 0], [25, 0], [28, 25, 0], [29, 28, 25, 0], [29, 25, 0], [25, 0], [30, 25, 0], [25, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [22, 23, 24, 25, 26, 27, 28, 29, 30], [23, 24, 25, 26, 27, 28, 29, 30], [24, 25, 26, 27, 28, 29, 30], [24, 25, 26, 27, 28, 29, 30], [24, 25, 26, 27, 28, 29, 30], [24, 25, 26, 27, 28, 29, 30], [24, 25, 26, 27, 28, 29, 30], [25, 26, 27, 28, 29, 30], [25, 26, 27, 28, 29, 30], [26, 27, 28, 29, 30], [26, 27, 28, 29, 30], [27, 28, 29, 30], [27, 28, 29, 30], [28, 29, 30], [28, 29, 30], [29, 30], [30], [30], [30], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 26], [0, -1], [2, 37], [0, -1], [0, -1], [2, 37], [0, -1], [2, 9], [0, -1], [2, 37], [0, -1], [2, 9], [0, -1], [2, 37], [0, -1], [0, -1], [1, 22], [2, 9], [0, -1], [2, 37], [0, -1], [0, -1], [1, 22], [2, 9], [0, -1], [2, 37], [0, -1], [0, -1], [1, 22], [2, 9], [0, -1], [2, 7], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [2, 9], [2, 13], [0, -1], [2, 37], [0, -1], [1, 24], [0, -1], [2, 43], [0, -1], [2, 7], [0, -1], [0, -1], [1, 21], [2, 9], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [1, 4, 3], [1, 4, 2], [1, 4, 1], [0, 4, 0], [2, 5, 4], [0, 4, 0], [0, 6, 4], [2, 7, 6], [0, 6, 4], [2, 8, 6], [0, 6, 4], [2, 9, 6], [0, 6, 4], [2, 10, 6], [0, 6, 4], [2, 11, 6], [0, 6, 4], [0, 12, 6], [1, 13, 12], [2, 13, 6], [0, 6, 4], [2, 14, 6], [0, 6, 4], [0, 15, 6], [1, 16, 15], [2, 16, 6], [0, 6, 4], [2, 17, 6], [0, 6, 4], [0, 18, 6], [1, 19, 18], [2, 19, 6], [0, 6, 4], [2, 20, 6], [0, 6, 4], [0, 21, 6], [0, 22, 21], [1, 23, 22], [1, 23, 21], [2, 23, 6], [2, 6, 4], [0, 4, 0], [2, 24, 4], [0, 4, 0], [1, 25, 4], [0, 25, 0], [2, 26, 25], [0, 25, 0], [2, 27, 25], [0, 25, 0], [0, 28, 25], [1, 29, 28], [2, 29, 25], [0, 25, 0], [2, 30, 25], [2, 25, 0]]}} +{"orig_tokens": ["The", "finger-pointing", "has", "already", "begun", "."], "tokens": ["The", "", "has", "already", "", "."], "token_ids": [28, 17, 125, 217, 16, 62], "tags": ["DT", "NN", "VBZ", "RB", "VBN", "."], "tree_str": "(S (NP (DT The) (NN finger-pointing)) (VP (VBZ has) (ADVP (RB already)) (VP (VBN begun))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [2, 0], [3, 2, 0], [4, 3, 2, 0], [5, 4, 3, 2, 0], [5, 3, 2, 0], [5, 2, 0], [5, 0], [6, 5, 0], [5, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6], [2, 3, 4, 5, 6], [3, 4, 5, 6], [3, 4, 5, 6], [4, 5, 6], [5, 6], [6], [6], [6], [6], [], [], []], "actions": [[0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [1, 2], [1, 5], [1, 24], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [1, 2, 1], [0, 2, 0], [0, 3, 2], [0, 4, 3], [1, 5, 4], [1, 5, 3], [1, 5, 2], [0, 5, 0], [2, 6, 5], [2, 5, 0]]}} +{"orig_tokens": ["``", "The", "equity", "market", "was", "illiquid", "."], "tokens": ["``", "The", "", "market", "was", "", "."], "token_ids": [27, 28, 16, 214, 56, 16, 62], "tags": ["``", "DT", "NN", "NN", "VBD", "JJ", "."], "tree_str": "(S (`` ``) (NP (DT The) (NN equity) (NN market)) (VP (VBD was) (ADJP (JJ illiquid))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [4, 2, 1, 0], [4, 1, 0], [5, 4, 1, 0], [6, 5, 4, 1, 0], [6, 4, 1, 0], [6, 1, 0], [6, 0], [7, 6, 0], [6, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7], [4, 5, 6, 7], [5, 6, 7], [5, 6, 7], [5, 6, 7], [6, 7], [7], [7], [7], [7], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 14], [0, -1], [0, -1], [1, 10], [1, 24], [1, 37], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [1, 4, 3], [1, 4, 2], [0, 4, 1], [0, 5, 4], [1, 6, 5], [1, 6, 4], [1, 6, 1], [0, 6, 0], [2, 7, 6], [2, 6, 0]]}} +{"orig_tokens": ["Once", "again", "-LCB-", "the", "specialists", "-RCB-", "were", "not", "able", "to", "handle", "the", "imbalances", "on", "the", "floor", "of", "the", "New", "York", "Stock", "Exchange", ",", "''", "said", "Christopher", "Pedersen", ",", "senior", "vice", "president", "at", "Twenty-First", "Securities", "Corp", "."], "tokens": ["", "", "", "the", "", "", "were", "not", "", "to", "", "the", "", "on", "the", "", "of", "the", "", "", "", "Exchange", ",", "''", "said", "", "", ",", "", "", "president", "at", "", "Securities", "", "."], "token_ids": [7, 16, 1, 40, 6, 1, 250, 145, 16, 59, 16, 40, 6, 165, 40, 16, 26, 40, 7, 7, 7, 143, 45, 30, 70, 19, 7, 45, 16, 16, 91, 31, 11, 142, 7, 62], "tags": ["RB", "RB", "-LRB-", "DT", "NNS", "-RRB-", "VBD", "RB", "JJ", "TO", "VB", "DT", "NNS", "IN", "DT", "NN", "IN", "DT", "NNP", "NNP", "NNP", "NNP", ",", "''", "VBD", "NNP", "NNP", ",", "JJ", "NN", "NN", "IN", "NNP", "NNP", "NNP", "."], "tree_str": "(SINV (S (ADVP (RB Once) (RB again)) (-LRB- -LCB-) (NP (DT the) (NNS specialists)) (-RRB- -RCB-) (VP (VBD were) (RB not) (ADJP (JJ able) (S (VP (TO to) (VP (VB handle) (NP (NP (DT the) (NNS imbalances)) (PP (IN on) (NP (NP (DT the) (NN floor)) (PP (IN of) (NP (DT the) (NNP New) (NNP York) (NNP Stock) (NNP Exchange)))))))))))) (, ,) ('' '') (VP (VBD said)) (NP (NP (NNP Christopher) (NNP Pedersen)) (, ,) (NP (NP (JJ senior) (NN vice) (NN president)) (PP (IN at) (NP (NNP Twenty-First) (NNP Securities) (NNP Corp))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [2, 0], [3, 2, 0], [4, 3, 2, 0], [5, 4, 3, 2, 0], [5, 3, 2, 0], [6, 5, 3, 2, 0], [7, 6, 5, 3, 2, 0], [8, 7, 6, 5, 3, 2, 0], [9, 8, 7, 6, 5, 3, 2, 0], [9, 7, 6, 5, 3, 2, 0], [9, 6, 5, 3, 2, 0], [9, 5, 3, 2, 0], [9, 3, 2, 0], [9, 2, 0], [9, 0], [10, 9, 0], [11, 10, 9, 0], [11, 9, 0], [12, 11, 9, 0], [13, 12, 11, 9, 0], [13, 11, 9, 0], [14, 13, 11, 9, 0], [15, 14, 13, 11, 9, 0], [16, 15, 14, 13, 11, 9, 0], [16, 14, 13, 11, 9, 0], [17, 16, 14, 13, 11, 9, 0], [18, 17, 16, 14, 13, 11, 9, 0], [19, 18, 17, 16, 14, 13, 11, 9, 0], [20, 19, 18, 17, 16, 14, 13, 11, 9, 0], [21, 20, 19, 18, 17, 16, 14, 13, 11, 9, 0], [22, 21, 20, 19, 18, 17, 16, 14, 13, 11, 9, 0], [22, 20, 19, 18, 17, 16, 14, 13, 11, 9, 0], [22, 19, 18, 17, 16, 14, 13, 11, 9, 0], [22, 18, 17, 16, 14, 13, 11, 9, 0], [22, 17, 16, 14, 13, 11, 9, 0], [17, 16, 14, 13, 11, 9, 0], [16, 14, 13, 11, 9, 0], [14, 13, 11, 9, 0], [13, 11, 9, 0], [11, 9, 0], [9, 0], [23, 9, 0], [24, 23, 9, 0], [25, 24, 23, 9, 0], [25, 23, 9, 0], [25, 9, 0], [25, 0], [26, 25, 0], [27, 26, 25, 0], [27, 25, 0], [28, 27, 25, 0], [27, 25, 0], [29, 27, 25, 0], [30, 29, 27, 25, 0], [31, 30, 29, 27, 25, 0], [31, 29, 27, 25, 0], [31, 27, 25, 0], [32, 31, 27, 25, 0], [33, 32, 31, 27, 25, 0], [34, 33, 32, 31, 27, 25, 0], [35, 34, 33, 32, 31, 27, 25, 0], [35, 33, 32, 31, 27, 25, 0], [35, 32, 31, 27, 25, 0], [32, 31, 27, 25, 0], [31, 27, 25, 0], [27, 25, 0], [25, 0], [36, 25, 0], [25, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [28, 29, 30, 31, 32, 33, 34, 35, 36], [28, 29, 30, 31, 32, 33, 34, 35, 36], [29, 30, 31, 32, 33, 34, 35, 36], [29, 30, 31, 32, 33, 34, 35, 36], [30, 31, 32, 33, 34, 35, 36], [31, 32, 33, 34, 35, 36], [32, 33, 34, 35, 36], [32, 33, 34, 35, 36], [32, 33, 34, 35, 36], [33, 34, 35, 36], [34, 35, 36], [35, 36], [36], [36], [36], [36], [36], [36], [36], [], [], []], "actions": [[0, -1], [0, -1], [1, 2], [0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [0, -1], [1, 21], [1, 10], [1, 37], [1, 24], [1, 37], [1, 2], [0, -1], [0, -1], [1, 5], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 22], [1, 14], [2, 30], [2, 35], [2, 30], [2, 35], [2, 16], [2, 43], [0, -1], [0, -1], [0, -1], [1, 37], [1, 37], [1, 8], [0, -1], [0, -1], [1, 22], [0, -1], [2, 37], [0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [2, 30], [2, 35], [2, 4], [2, 24], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [1, 2, 1], [0, 2, 0], [0, 3, 2], [0, 4, 3], [1, 5, 4], [0, 5, 3], [0, 6, 5], [0, 7, 6], [0, 8, 7], [1, 9, 8], [1, 9, 7], [1, 9, 6], [1, 9, 5], [1, 9, 3], [1, 9, 2], [0, 9, 0], [0, 10, 9], [1, 11, 10], [0, 11, 9], [0, 12, 11], [1, 13, 12], [0, 13, 11], [0, 14, 13], [0, 15, 14], [1, 16, 15], [0, 16, 14], [0, 17, 16], [0, 18, 17], [0, 19, 18], [0, 20, 19], [0, 21, 20], [1, 22, 21], [1, 22, 20], [1, 22, 19], [1, 22, 18], [2, 22, 17], [2, 17, 16], [2, 16, 14], [2, 14, 13], [2, 13, 11], [2, 11, 9], [0, 9, 0], [0, 23, 9], [0, 24, 23], [1, 25, 24], [1, 25, 23], [1, 25, 9], [0, 25, 0], [0, 26, 25], [1, 27, 26], [0, 27, 25], [2, 28, 27], [0, 27, 25], [0, 29, 27], [0, 30, 29], [1, 31, 30], [1, 31, 29], [0, 31, 27], [0, 32, 31], [0, 33, 32], [0, 34, 33], [1, 35, 34], [1, 35, 33], [2, 35, 32], [2, 32, 31], [2, 31, 27], [2, 27, 25], [0, 25, 0], [2, 36, 25], [2, 25, 0]]}} +{"orig_tokens": ["Countered", "James", "Maguire", ",", "chairman", "of", "specialists", "Henderson", "Brothers", "Inc.", ":", "``", "It", "is", "easy", "to", "say", "the", "specialist", "is", "n't", "doing", "his", "job", "."], "tokens": ["", "", "", ",", "", "of", "", "", "", "Inc.", "", "``", "", "", "", "to", "", "the", "", "", "n't", "", "", "", "."], "token_ids": [15, 5, 7, 45, 16, 26, 6, 7, 5, 69, 16, 27, 7, 16, 16, 59, 16, 40, 16, 16, 126, 17, 16, 16, 62], "tags": ["VBD", "NNP", "NNP", ",", "NN", "IN", "NNS", "NNP", "NNP", "NNP", ":", "``", "PRP", "VBZ", "JJ", "TO", "VB", "DT", "NN", "VBZ", "RB", "VBG", "PRP$", "NN", "."], "tree_str": "(SINV (VP (VBD Countered)) (NP (NP (NNP James) (NNP Maguire)) (, ,) (NP (NP (NN chairman)) (PP (IN of) (NP (NNS specialists) (NNP Henderson) (NNP Brothers) (NNP Inc.))))) (: :) (`` ``) (S (NP (PRP It)) (VP (VBZ is) (ADJP (JJ easy)) (S (VP (TO to) (VP (VB say) (SBAR (S (NP (DT the) (NN specialist)) (VP (VBZ is) (RB n't) (VP (VBG doing) (NP (PRP$ his) (NN job))))))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [4, 3, 1, 0], [3, 1, 0], [5, 3, 1, 0], [6, 5, 3, 1, 0], [7, 6, 5, 3, 1, 0], [8, 7, 6, 5, 3, 1, 0], [9, 8, 7, 6, 5, 3, 1, 0], [10, 9, 8, 7, 6, 5, 3, 1, 0], [10, 8, 7, 6, 5, 3, 1, 0], [10, 7, 6, 5, 3, 1, 0], [10, 6, 5, 3, 1, 0], [6, 5, 3, 1, 0], [5, 3, 1, 0], [3, 1, 0], [1, 0], [11, 1, 0], [1, 0], [12, 1, 0], [1, 0], [13, 1, 0], [14, 13, 1, 0], [15, 14, 13, 1, 0], [15, 13, 1, 0], [15, 1, 0], [16, 15, 1, 0], [17, 16, 15, 1, 0], [17, 15, 1, 0], [18, 17, 15, 1, 0], [19, 18, 17, 15, 1, 0], [19, 17, 15, 1, 0], [20, 19, 17, 15, 1, 0], [21, 20, 19, 17, 15, 1, 0], [22, 21, 20, 19, 17, 15, 1, 0], [22, 20, 19, 17, 15, 1, 0], [22, 19, 17, 15, 1, 0], [22, 17, 15, 1, 0], [23, 22, 17, 15, 1, 0], [24, 23, 22, 17, 15, 1, 0], [24, 22, 17, 15, 1, 0], [22, 17, 15, 1, 0], [17, 15, 1, 0], [15, 1, 0], [1, 0], [25, 1, 0], [1, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [17, 18, 19, 20, 21, 22, 23, 24, 25], [18, 19, 20, 21, 22, 23, 24, 25], [18, 19, 20, 21, 22, 23, 24, 25], [19, 20, 21, 22, 23, 24, 25], [20, 21, 22, 23, 24, 25], [20, 21, 22, 23, 24, 25], [21, 22, 23, 24, 25], [22, 23, 24, 25], [23, 24, 25], [23, 24, 25], [23, 24, 25], [23, 24, 25], [24, 25], [25], [25], [25], [25], [25], [25], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 22], [0, -1], [2, 37], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 22], [2, 30], [2, 35], [2, 4], [2, 24], [0, -1], [2, 37], [0, -1], [2, 37], [0, -1], [0, -1], [0, -1], [1, 10], [1, 24], [0, -1], [0, -1], [1, 5], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [1, 21], [1, 5], [1, 24], [0, -1], [0, -1], [1, 31], [2, 16], [2, 8], [2, 43], [2, 8], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [0, 3, 1], [2, 4, 3], [0, 3, 1], [0, 5, 3], [0, 6, 5], [0, 7, 6], [0, 8, 7], [0, 9, 8], [1, 10, 9], [1, 10, 8], [1, 10, 7], [2, 10, 6], [2, 6, 5], [2, 5, 3], [2, 3, 1], [0, 1, 0], [2, 11, 1], [0, 1, 0], [2, 12, 1], [0, 1, 0], [0, 13, 1], [0, 14, 13], [1, 15, 14], [1, 15, 13], [0, 15, 1], [0, 16, 15], [1, 17, 16], [0, 17, 15], [0, 18, 17], [1, 19, 18], [0, 19, 17], [0, 20, 19], [0, 21, 20], [1, 22, 21], [1, 22, 20], [1, 22, 19], [0, 22, 17], [0, 23, 22], [1, 24, 23], [2, 24, 22], [2, 22, 17], [2, 17, 15], [2, 15, 1], [0, 1, 0], [2, 25, 1], [2, 1, 0]]}} +{"orig_tokens": ["When", "the", "dollar", "is", "in", "a", "free-fall", ",", "even", "central", "banks", "ca", "n't", "stop", "it", "."], "tokens": ["", "the", "", "", "in", "a", "", ",", "", "", "", "", "n't", "", "it", "."], "token_ids": [7, 40, 16, 16, 42, 113, 1, 45, 16, 20, 6, 16, 126, 16, 71, 62], "tags": ["WRB", "DT", "NN", "VBZ", "IN", "DT", "NN", ",", "RB", "JJ", "NNS", "MD", "RB", "VB", "PRP", "."], "tree_str": "(S (SBAR (WHADVP (WRB When)) (S (NP (DT the) (NN dollar)) (VP (VBZ is) (PP (IN in) (NP (DT a) (NN free-fall)))))) (, ,) (NP (RB even) (JJ central) (NNS banks)) (VP (MD ca) (RB n't) (VP (VB stop) (NP (PRP it)))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [4, 3, 1, 0], [4, 1, 0], [4, 0], [5, 4, 0], [6, 5, 4, 0], [7, 6, 5, 4, 0], [7, 5, 4, 0], [5, 4, 0], [4, 0], [8, 4, 0], [9, 8, 4, 0], [10, 9, 8, 4, 0], [11, 10, 9, 8, 4, 0], [11, 9, 8, 4, 0], [11, 8, 4, 0], [12, 11, 8, 4, 0], [13, 12, 11, 8, 4, 0], [14, 13, 12, 11, 8, 4, 0], [14, 12, 11, 8, 4, 0], [14, 11, 8, 4, 0], [14, 8, 4, 0], [14, 4, 0], [14, 0], [15, 14, 0], [14, 0], [16, 14, 0], [14, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [8, 9, 10, 11, 12, 13, 14, 15, 16], [8, 9, 10, 11, 12, 13, 14, 15, 16], [8, 9, 10, 11, 12, 13, 14, 15, 16], [8, 9, 10, 11, 12, 13, 14, 15, 16], [9, 10, 11, 12, 13, 14, 15, 16], [10, 11, 12, 13, 14, 15, 16], [11, 12, 13, 14, 15, 16], [12, 13, 14, 15, 16], [12, 13, 14, 15, 16], [12, 13, 14, 15, 16], [13, 14, 15, 16], [14, 15, 16], [15, 16], [15, 16], [15, 16], [15, 16], [15, 16], [15, 16], [16], [16], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [1, 24], [1, 2], [0, -1], [0, -1], [0, -1], [1, 14], [2, 30], [2, 35], [0, -1], [0, -1], [0, -1], [0, -1], [1, 3], [1, 2], [0, -1], [0, -1], [0, -1], [1, 21], [1, 5], [1, 24], [1, 37], [1, 1], [0, -1], [2, 16], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [0, 3, 1], [1, 4, 3], [1, 4, 1], [0, 4, 0], [0, 5, 4], [0, 6, 5], [1, 7, 6], [2, 7, 5], [2, 5, 4], [0, 4, 0], [0, 8, 4], [0, 9, 8], [0, 10, 9], [1, 11, 10], [1, 11, 9], [0, 11, 8], [0, 12, 11], [0, 13, 12], [1, 14, 13], [1, 14, 12], [1, 14, 11], [1, 14, 8], [1, 14, 4], [0, 14, 0], [2, 15, 14], [0, 14, 0], [2, 16, 14], [2, 14, 0]]}} +{"orig_tokens": ["Speculators", "are", "calling", "for", "a", "degree", "of", "liquidity", "that", "is", "not", "there", "in", "the", "market", ".", "''"], "tokens": ["", "are", "calling", "for", "a", "", "of", "", "that", "", "not", "", "in", "the", "market", ".", "''"], "token_ids": [5, 140, 281, 99, 113, 16, 26, 16, 188, 16, 145, 16, 42, 40, 214, 62, 30], "tags": ["NNS", "VBP", "VBG", "IN", "DT", "NN", "IN", "NN", "WDT", "VBZ", "RB", "RB", "IN", "DT", "NN", ".", "''"], "tree_str": "(S (NP (NNS Speculators)) (VP (VBP are) (VP (VBG calling) (PP (IN for) (NP (NP (DT a) (NN degree)) (PP (IN of) (NP (NN liquidity))) (SBAR (WHNP (WDT that)) (S (VP (VBZ is) (RB not) (ADVP (RB there)) (PP (IN in) (NP (DT the) (NN market)))))))))) (. .) ('' ''))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [3, 0], [4, 3, 0], [5, 4, 3, 0], [6, 5, 4, 3, 0], [6, 4, 3, 0], [7, 6, 4, 3, 0], [8, 7, 6, 4, 3, 0], [7, 6, 4, 3, 0], [6, 4, 3, 0], [9, 6, 4, 3, 0], [10, 9, 6, 4, 3, 0], [10, 6, 4, 3, 0], [11, 10, 6, 4, 3, 0], [10, 6, 4, 3, 0], [12, 10, 6, 4, 3, 0], [10, 6, 4, 3, 0], [13, 10, 6, 4, 3, 0], [14, 13, 10, 6, 4, 3, 0], [15, 14, 13, 10, 6, 4, 3, 0], [15, 13, 10, 6, 4, 3, 0], [13, 10, 6, 4, 3, 0], [10, 6, 4, 3, 0], [6, 4, 3, 0], [4, 3, 0], [3, 0], [16, 3, 0], [3, 0], [17, 3, 0], [3, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [9, 10, 11, 12, 13, 14, 15, 16, 17], [9, 10, 11, 12, 13, 14, 15, 16, 17], [9, 10, 11, 12, 13, 14, 15, 16, 17], [10, 11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [11, 12, 13, 14, 15, 16, 17], [12, 13, 14, 15, 16, 17], [12, 13, 14, 15, 16, 17], [13, 14, 15, 16, 17], [13, 14, 15, 16, 17], [14, 15, 16, 17], [15, 16, 17], [16, 17], [16, 17], [16, 17], [16, 17], [16, 17], [16, 17], [16, 17], [17], [17], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 5], [1, 24], [0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [2, 30], [2, 35], [0, -1], [0, -1], [1, 24], [0, -1], [2, 21], [0, -1], [2, 2], [0, -1], [0, -1], [0, -1], [1, 14], [2, 30], [2, 35], [2, 39], [2, 30], [2, 35], [0, -1], [2, 37], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [1, 3, 1], [0, 3, 0], [0, 4, 3], [0, 5, 4], [1, 6, 5], [0, 6, 4], [0, 7, 6], [2, 8, 7], [2, 7, 6], [0, 6, 4], [0, 9, 6], [1, 10, 9], [0, 10, 6], [2, 11, 10], [0, 10, 6], [2, 12, 10], [0, 10, 6], [0, 13, 10], [0, 14, 13], [1, 15, 14], [2, 15, 13], [2, 13, 10], [2, 10, 6], [2, 6, 4], [2, 4, 3], [0, 3, 0], [2, 16, 3], [0, 3, 0], [2, 17, 3], [2, 3, 0]]}} +{"orig_tokens": ["Many", "money", "managers", "and", "some", "traders", "had", "already", "left", "their", "offices", "early", "Friday", "afternoon", "on", "a", "warm", "autumn", "day", "--", "because", "the", "stock", "market", "was", "so", "quiet", "."], "tokens": ["Many", "money", "managers", "and", "some", "", "had", "already", "", "their", "", "", "", "", "on", "a", "", "", "", "--", "", "the", "stock", "market", "was", "so", "", "."], "token_ids": [207, 174, 175, 92, 159, 6, 292, 217, 16, 147, 6, 2, 7, 16, 165, 113, 16, 16, 16, 216, 16, 40, 150, 214, 56, 211, 16, 62], "tags": ["JJ", "NN", "NNS", "CC", "DT", "NNS", "VBD", "RB", "VBN", "PRP$", "NNS", "RB", "NNP", "NN", "IN", "DT", "JJ", "NN", "NN", ":", "IN", "DT", "NN", "NN", "VBD", "RB", "JJ", "."], "tree_str": "(S (NP (NP (JJ Many) (NN money) (NNS managers)) (CC and) (NP (DT some) (NNS traders))) (VP (VBD had) (ADVP (RB already)) (VP (VBN left) (NP (PRP$ their) (NNS offices)) (NP (RB early) (NNP Friday) (NN afternoon)) (PP (IN on) (NP (DT a) (JJ warm) (NN autumn) (NN day))) (: --) (SBAR (IN because) (S (NP (DT the) (NN stock) (NN market)) (VP (VBD was) (ADJP (RB so) (JJ quiet))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [3, 0], [4, 3, 0], [3, 0], [5, 3, 0], [6, 5, 3, 0], [6, 3, 0], [3, 0], [7, 3, 0], [8, 7, 3, 0], [9, 8, 7, 3, 0], [9, 7, 3, 0], [9, 3, 0], [9, 0], [10, 9, 0], [11, 10, 9, 0], [11, 9, 0], [9, 0], [12, 9, 0], [13, 12, 9, 0], [14, 13, 12, 9, 0], [14, 12, 9, 0], [14, 9, 0], [9, 0], [15, 9, 0], [16, 15, 9, 0], [17, 16, 15, 9, 0], [18, 17, 16, 15, 9, 0], [19, 18, 17, 16, 15, 9, 0], [19, 17, 16, 15, 9, 0], [19, 16, 15, 9, 0], [19, 15, 9, 0], [15, 9, 0], [9, 0], [20, 9, 0], [9, 0], [21, 9, 0], [22, 21, 9, 0], [23, 22, 21, 9, 0], [24, 23, 22, 21, 9, 0], [24, 22, 21, 9, 0], [24, 21, 9, 0], [25, 24, 21, 9, 0], [26, 25, 24, 21, 9, 0], [27, 26, 25, 24, 21, 9, 0], [27, 25, 24, 21, 9, 0], [27, 24, 21, 9, 0], [27, 21, 9, 0], [27, 9, 0], [9, 0], [28, 9, 0], [9, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [20, 21, 22, 23, 24, 25, 26, 27, 28], [20, 21, 22, 23, 24, 25, 26, 27, 28], [20, 21, 22, 23, 24, 25, 26, 27, 28], [20, 21, 22, 23, 24, 25, 26, 27, 28], [20, 21, 22, 23, 24, 25, 26, 27, 28], [20, 21, 22, 23, 24, 25, 26, 27, 28], [21, 22, 23, 24, 25, 26, 27, 28], [21, 22, 23, 24, 25, 26, 27, 28], [22, 23, 24, 25, 26, 27, 28], [23, 24, 25, 26, 27, 28], [24, 25, 26, 27, 28], [25, 26, 27, 28], [25, 26, 27, 28], [25, 26, 27, 28], [26, 27, 28], [27, 28], [28], [28], [28], [28], [28], [28], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [0, -1], [2, 7], [0, -1], [0, -1], [1, 14], [2, 9], [0, -1], [0, -1], [0, -1], [1, 2], [1, 5], [1, 24], [0, -1], [0, -1], [1, 31], [2, 13], [0, -1], [0, -1], [0, -1], [1, 22], [1, 2], [2, 16], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [1, 14], [2, 30], [2, 35], [0, -1], [2, 37], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 14], [0, -1], [0, -1], [0, -1], [1, 2], [1, 10], [1, 24], [1, 19], [2, 1], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [1, 3, 1], [0, 3, 0], [2, 4, 3], [0, 3, 0], [0, 5, 3], [1, 6, 5], [2, 6, 3], [0, 3, 0], [0, 7, 3], [0, 8, 7], [1, 9, 8], [1, 9, 7], [1, 9, 3], [0, 9, 0], [0, 10, 9], [1, 11, 10], [2, 11, 9], [0, 9, 0], [0, 12, 9], [0, 13, 12], [1, 14, 13], [1, 14, 12], [2, 14, 9], [0, 9, 0], [0, 15, 9], [0, 16, 15], [0, 17, 16], [0, 18, 17], [1, 19, 18], [1, 19, 17], [1, 19, 16], [2, 19, 15], [2, 15, 9], [0, 9, 0], [2, 20, 9], [0, 9, 0], [0, 21, 9], [0, 22, 21], [0, 23, 22], [1, 24, 23], [1, 24, 22], [0, 24, 21], [0, 25, 24], [0, 26, 25], [1, 27, 26], [1, 27, 25], [1, 27, 24], [1, 27, 21], [2, 27, 9], [0, 9, 0], [2, 28, 9], [2, 9, 0]]}} +{"orig_tokens": ["Then", "in", "a", "lightning", "plunge", ",", "the", "Dow", "Jones", "industrials", "in", "barely", "an", "hour", "surrendered", "about", "a", "third", "of", "their", "gains", "this", "year", ",", "chalking", "up", "a", "190.58-point", ",", "or", "6.9", "%", ",", "loss", "on", "the", "day", "in", "gargantuan", "trading", "volume", "."], "tokens": ["", "in", "a", "", "", ",", "the", "", "", "", "in", "", "an", "", "", "about", "a", "", "of", "their", "", "this", "year", ",", "", "", "a", "", ",", "", "", "%", ",", "", "on", "the", "", "in", "", "", "", "."], "token_ids": [7, 42, 113, 17, 16, 45, 40, 7, 5, 6, 42, 2, 22, 16, 3, 78, 113, 16, 26, 147, 6, 185, 86, 45, 17, 16, 113, 16, 45, 16, 10, 268, 45, 6, 165, 40, 16, 42, 16, 17, 16, 62], "tags": ["RB", "IN", "DT", "NN", "NN", ",", "DT", "NNP", "NNP", "NNS", "IN", "RB", "DT", "NN", "VBD", "RB", "DT", "JJ", "IN", "PRP$", "NNS", "DT", "NN", ",", "VBG", "RP", "DT", "JJ", ",", "CC", "CD", "NN", ",", "NN", "IN", "DT", "NN", "IN", "JJ", "NN", "NN", "."], "tree_str": "(S (RB Then) (PP (IN in) (NP (DT a) (NN lightning) (NN plunge))) (, ,) (NP (DT the) (NNP Dow) (NNP Jones) (NNS industrials)) (PP (IN in) (NP (QP (RB barely) (DT an)) (NN hour))) (VP (VBD surrendered) (NP (NP (QP (RB about) (DT a)) (JJ third)) (PP (IN of) (NP (NP (PRP$ their) (NNS gains)) (NP (DT this) (NN year))))) (, ,) (S (VP (VBG chalking) (PRT (RP up)) (NP (NP (DT a) (ADJP (ADJP (JJ 190.58-point)) (, ,) (CC or) (ADJP (CD 6.9) (NN %)) (, ,)) (NN loss)) (PP (IN on) (NP (DT the) (NN day)))) (PP (IN in) (NP (JJ gargantuan) (NN trading) (NN volume)))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [5, 4, 3, 2, 1, 0], [5, 3, 2, 1, 0], [5, 2, 1, 0], [2, 1, 0], [6, 2, 1, 0], [7, 6, 2, 1, 0], [8, 7, 6, 2, 1, 0], [9, 8, 7, 6, 2, 1, 0], [10, 9, 8, 7, 6, 2, 1, 0], [10, 8, 7, 6, 2, 1, 0], [10, 7, 6, 2, 1, 0], [10, 6, 2, 1, 0], [11, 10, 6, 2, 1, 0], [12, 11, 10, 6, 2, 1, 0], [13, 12, 11, 10, 6, 2, 1, 0], [13, 11, 10, 6, 2, 1, 0], [14, 13, 11, 10, 6, 2, 1, 0], [14, 11, 10, 6, 2, 1, 0], [11, 10, 6, 2, 1, 0], [15, 11, 10, 6, 2, 1, 0], [15, 10, 6, 2, 1, 0], [15, 6, 2, 1, 0], [15, 2, 1, 0], [15, 1, 0], [15, 0], [16, 15, 0], [17, 16, 15, 0], [17, 15, 0], [18, 17, 15, 0], [18, 15, 0], [19, 18, 15, 0], [20, 19, 18, 15, 0], [21, 20, 19, 18, 15, 0], [21, 19, 18, 15, 0], [22, 21, 19, 18, 15, 0], [23, 22, 21, 19, 18, 15, 0], [23, 21, 19, 18, 15, 0], [21, 19, 18, 15, 0], [19, 18, 15, 0], [18, 15, 0], [15, 0], [24, 15, 0], [15, 0], [25, 15, 0], [26, 25, 15, 0], [25, 15, 0], [27, 25, 15, 0], [28, 27, 25, 15, 0], [29, 28, 27, 25, 15, 0], [28, 27, 25, 15, 0], [30, 28, 27, 25, 15, 0], [28, 27, 25, 15, 0], [31, 28, 27, 25, 15, 0], [32, 31, 28, 27, 25, 15, 0], [32, 28, 27, 25, 15, 0], [28, 27, 25, 15, 0], [33, 28, 27, 25, 15, 0], [28, 27, 25, 15, 0], [34, 28, 27, 25, 15, 0], [34, 27, 25, 15, 0], [34, 25, 15, 0], [35, 34, 25, 15, 0], [36, 35, 34, 25, 15, 0], [37, 36, 35, 34, 25, 15, 0], [37, 35, 34, 25, 15, 0], [35, 34, 25, 15, 0], [34, 25, 15, 0], [25, 15, 0], [38, 25, 15, 0], [39, 38, 25, 15, 0], [40, 39, 38, 25, 15, 0], [41, 40, 39, 38, 25, 15, 0], [41, 39, 38, 25, 15, 0], [41, 38, 25, 15, 0], [38, 25, 15, 0], [25, 15, 0], [15, 0], [42, 15, 0], [15, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [34, 35, 36, 37, 38, 39, 40, 41, 42], [34, 35, 36, 37, 38, 39, 40, 41, 42], [35, 36, 37, 38, 39, 40, 41, 42], [35, 36, 37, 38, 39, 40, 41, 42], [35, 36, 37, 38, 39, 40, 41, 42], [36, 37, 38, 39, 40, 41, 42], [37, 38, 39, 40, 41, 42], [38, 39, 40, 41, 42], [38, 39, 40, 41, 42], [38, 39, 40, 41, 42], [38, 39, 40, 41, 42], [38, 39, 40, 41, 42], [39, 40, 41, 42], [40, 41, 42], [41, 42], [42], [42], [42], [42], [42], [42], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 14], [2, 30], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 14], [0, -1], [0, -1], [0, -1], [1, 38], [0, -1], [1, 26], [2, 30], [0, -1], [1, 35], [1, 24], [1, 37], [1, 35], [1, 2], [0, -1], [0, -1], [1, 38], [0, -1], [1, 26], [0, -1], [0, -1], [0, -1], [1, 31], [0, -1], [0, -1], [1, 14], [2, 13], [2, 30], [2, 35], [2, 16], [0, -1], [2, 37], [0, -1], [0, -1], [2, 36], [0, -1], [0, -1], [0, -1], [2, 37], [0, -1], [2, 7], [0, -1], [0, -1], [1, 27], [2, 9], [0, -1], [2, 37], [0, -1], [1, 3], [1, 14], [0, -1], [0, -1], [0, -1], [1, 14], [2, 30], [2, 35], [2, 16], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 3], [2, 30], [2, 35], [2, 42], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [0, 4, 3], [1, 5, 4], [1, 5, 3], [2, 5, 2], [0, 2, 1], [0, 6, 2], [0, 7, 6], [0, 8, 7], [0, 9, 8], [1, 10, 9], [1, 10, 8], [1, 10, 7], [0, 10, 6], [0, 11, 10], [0, 12, 11], [1, 13, 12], [0, 13, 11], [1, 14, 13], [2, 14, 11], [0, 11, 10], [1, 15, 11], [1, 15, 10], [1, 15, 6], [1, 15, 2], [1, 15, 1], [0, 15, 0], [0, 16, 15], [1, 17, 16], [0, 17, 15], [1, 18, 17], [0, 18, 15], [0, 19, 18], [0, 20, 19], [1, 21, 20], [0, 21, 19], [0, 22, 21], [1, 23, 22], [2, 23, 21], [2, 21, 19], [2, 19, 18], [2, 18, 15], [0, 15, 0], [2, 24, 15], [0, 15, 0], [0, 25, 15], [2, 26, 25], [0, 25, 15], [0, 27, 25], [0, 28, 27], [2, 29, 28], [0, 28, 27], [2, 30, 28], [0, 28, 27], [0, 31, 28], [1, 32, 31], [2, 32, 28], [0, 28, 27], [2, 33, 28], [0, 28, 27], [1, 34, 28], [1, 34, 27], [0, 34, 25], [0, 35, 34], [0, 36, 35], [1, 37, 36], [2, 37, 35], [2, 35, 34], [2, 34, 25], [0, 25, 15], [0, 38, 25], [0, 39, 38], [0, 40, 39], [1, 41, 40], [1, 41, 39], [2, 41, 38], [2, 38, 25], [2, 25, 15], [0, 15, 0], [2, 42, 15], [2, 15, 0]]}} +{"orig_tokens": ["Final-hour", "trading", "accelerated", "to", "108.1", "million", "shares", ",", "a", "record", "for", "the", "Big", "Board", "."], "tokens": ["", "", "", "to", "", "", "shares", ",", "a", "record", "for", "the", "", "", "."], "token_ids": [11, 17, 3, 59, 10, 8, 198, 45, 113, 123, 99, 40, 7, 7, 62], "tags": ["JJ", "NN", "VBD", "TO", "CD", "CD", "NNS", ",", "DT", "NN", "IN", "DT", "NNP", "NNP", "."], "tree_str": "(S (NP (JJ Final-hour) (NN trading)) (VP (VBD accelerated) (PP (TO to) (NP (NP (QP (CD 108.1) (CD million)) (NNS shares)) (, ,) (NP (NP (DT a) (NN record)) (PP (IN for) (NP (DT the) (NNP Big) (NNP Board))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [2, 0], [3, 2, 0], [3, 0], [4, 3, 0], [5, 4, 3, 0], [6, 5, 4, 3, 0], [6, 4, 3, 0], [7, 6, 4, 3, 0], [7, 4, 3, 0], [8, 7, 4, 3, 0], [7, 4, 3, 0], [9, 7, 4, 3, 0], [10, 9, 7, 4, 3, 0], [10, 7, 4, 3, 0], [11, 10, 7, 4, 3, 0], [12, 11, 10, 7, 4, 3, 0], [13, 12, 11, 10, 7, 4, 3, 0], [14, 13, 12, 11, 10, 7, 4, 3, 0], [14, 12, 11, 10, 7, 4, 3, 0], [14, 11, 10, 7, 4, 3, 0], [11, 10, 7, 4, 3, 0], [10, 7, 4, 3, 0], [7, 4, 3, 0], [4, 3, 0], [3, 0], [15, 3, 0], [3, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [7, 8, 9, 10, 11, 12, 13, 14, 15], [7, 8, 9, 10, 11, 12, 13, 14, 15], [8, 9, 10, 11, 12, 13, 14, 15], [8, 9, 10, 11, 12, 13, 14, 15], [9, 10, 11, 12, 13, 14, 15], [9, 10, 11, 12, 13, 14, 15], [10, 11, 12, 13, 14, 15], [11, 12, 13, 14, 15], [11, 12, 13, 14, 15], [12, 13, 14, 15], [13, 14, 15], [14, 15], [15], [15], [15], [15], [15], [15], [15], [15], [], [], []], "actions": [[0, -1], [0, -1], [1, 3], [0, -1], [1, 24], [0, -1], [0, -1], [0, -1], [1, 27], [0, -1], [1, 26], [0, -1], [2, 37], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 14], [2, 30], [2, 35], [2, 4], [2, 30], [2, 35], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [1, 2, 1], [0, 2, 0], [1, 3, 2], [0, 3, 0], [0, 4, 3], [0, 5, 4], [1, 6, 5], [0, 6, 4], [1, 7, 6], [0, 7, 4], [2, 8, 7], [0, 7, 4], [0, 9, 7], [1, 10, 9], [0, 10, 7], [0, 11, 10], [0, 12, 11], [0, 13, 12], [1, 14, 13], [1, 14, 12], [2, 14, 11], [2, 11, 10], [2, 10, 7], [2, 7, 4], [2, 4, 3], [0, 3, 0], [2, 15, 3], [2, 3, 0]]}} +{"orig_tokens": ["At", "the", "end", "of", "the", "day", ",", "251.2", "million", "shares", "were", "traded", "."], "tokens": ["", "the", "", "of", "the", "", ",", "", "", "shares", "were", "", "."], "token_ids": [7, 40, 16, 26, 40, 16, 45, 10, 8, 198, 250, 3, 62], "tags": ["IN", "DT", "NN", "IN", "DT", "NN", ",", "CD", "CD", "NNS", "VBD", "VBN", "."], "tree_str": "(S (PP (IN At) (NP (NP (DT the) (NN end)) (PP (IN of) (NP (DT the) (NN day))))) (, ,) (NP (QP (CD 251.2) (CD million)) (NNS shares)) (VP (VBD were) (VP (VBN traded))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [4, 3, 1, 0], [5, 4, 3, 1, 0], [6, 5, 4, 3, 1, 0], [6, 4, 3, 1, 0], [4, 3, 1, 0], [3, 1, 0], [1, 0], [7, 1, 0], [8, 7, 1, 0], [9, 8, 7, 1, 0], [9, 7, 1, 0], [10, 9, 7, 1, 0], [10, 7, 1, 0], [11, 10, 7, 1, 0], [12, 11, 10, 7, 1, 0], [12, 10, 7, 1, 0], [12, 7, 1, 0], [12, 1, 0], [12, 0], [13, 12, 0], [12, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [5, 6, 7, 8, 9, 10, 11, 12, 13], [6, 7, 8, 9, 10, 11, 12, 13], [7, 8, 9, 10, 11, 12, 13], [7, 8, 9, 10, 11, 12, 13], [7, 8, 9, 10, 11, 12, 13], [7, 8, 9, 10, 11, 12, 13], [7, 8, 9, 10, 11, 12, 13], [8, 9, 10, 11, 12, 13], [9, 10, 11, 12, 13], [10, 11, 12, 13], [10, 11, 12, 13], [11, 12, 13], [11, 12, 13], [12, 13], [13], [13], [13], [13], [13], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [1, 14], [2, 30], [2, 35], [2, 30], [0, -1], [0, -1], [0, -1], [1, 27], [0, -1], [1, 26], [0, -1], [0, -1], [1, 6], [1, 25], [1, 37], [1, 35], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [0, 3, 1], [0, 4, 3], [0, 5, 4], [1, 6, 5], [2, 6, 4], [2, 4, 3], [2, 3, 1], [0, 1, 0], [0, 7, 1], [0, 8, 7], [1, 9, 8], [0, 9, 7], [1, 10, 9], [0, 10, 7], [0, 11, 10], [1, 12, 11], [1, 12, 10], [1, 12, 7], [1, 12, 1], [0, 12, 0], [2, 13, 12], [2, 12, 0]]}} +{"orig_tokens": ["The", "Dow", "Jones", "industrials", "closed", "at", "2569.26", "."], "tokens": ["The", "", "", "", "", "at", "", "."], "token_ids": [28, 7, 5, 6, 3, 31, 10, 62], "tags": ["DT", "NNP", "NNP", "NNS", "VBD", "IN", "CD", "."], "tree_str": "(S (NP (DT The) (NNP Dow) (NNP Jones) (NNS industrials)) (VP (VBD closed) (PP (IN at) (NP (CD 2569.26)))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [4, 3, 2, 1, 0], [4, 2, 1, 0], [4, 1, 0], [4, 0], [5, 4, 0], [5, 0], [6, 5, 0], [7, 6, 5, 0], [6, 5, 0], [5, 0], [8, 5, 0], [5, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8], [2, 3, 4, 5, 6, 7, 8], [3, 4, 5, 6, 7, 8], [4, 5, 6, 7, 8], [5, 6, 7, 8], [5, 6, 7, 8], [5, 6, 7, 8], [5, 6, 7, 8], [6, 7, 8], [6, 7, 8], [7, 8], [8], [8], [8], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 14], [0, -1], [1, 24], [0, -1], [0, -1], [2, 30], [2, 35], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [0, 3, 2], [1, 4, 3], [1, 4, 2], [1, 4, 1], [0, 4, 0], [1, 5, 4], [0, 5, 0], [0, 6, 5], [2, 7, 6], [2, 6, 5], [0, 5, 0], [2, 8, 5], [2, 5, 0]]}} +{"orig_tokens": ["The", "Dow", "'s", "decline", "was", "second", "in", "point", "terms", "only", "to", "the", "508-point", "Black", "Monday", "crash", "that", "occurred", "Oct.", "19", ",", "1987", "."], "tokens": ["The", "", "'s", "", "was", "", "in", "", "", "", "to", "the", "", "", "", "crash", "that", "", "Oct.", "19", ",", "1987", "."], "token_ids": [28, 7, 33, 16, 56, 16, 42, 16, 6, 2, 59, 40, 1, 7, 7, 215, 188, 3, 23, 24, 45, 213, 62], "tags": ["DT", "NNP", "POS", "NN", "VBD", "JJ", "IN", "NN", "NNS", "RB", "TO", "DT", "JJ", "NNP", "NNP", "NN", "WDT", "VBD", "NNP", "CD", ",", "CD", "."], "tree_str": "(S (NP (NP (DT The) (NNP Dow) (POS 's)) (NN decline)) (VP (VBD was) (ADJP (JJ second) (PP (IN in) (NP (NN point) (NNS terms))) (PP (ADVP (RB only)) (TO to) (NP (NP (DT the) (JJ 508-point) (NNP Black) (NNP Monday) (NN crash)) (SBAR (WHNP (WDT that)) (S (VP (VBD occurred) (NP (NNP Oct.) (CD 19) (, ,) (CD 1987))))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [2, 0], [3, 2, 0], [2, 0], [4, 2, 0], [4, 0], [5, 4, 0], [6, 5, 4, 0], [6, 4, 0], [6, 0], [7, 6, 0], [8, 7, 6, 0], [9, 8, 7, 6, 0], [9, 7, 6, 0], [7, 6, 0], [6, 0], [10, 6, 0], [11, 10, 6, 0], [11, 6, 0], [12, 11, 6, 0], [13, 12, 11, 6, 0], [14, 13, 12, 11, 6, 0], [15, 14, 13, 12, 11, 6, 0], [16, 15, 14, 13, 12, 11, 6, 0], [16, 14, 13, 12, 11, 6, 0], [16, 13, 12, 11, 6, 0], [16, 12, 11, 6, 0], [16, 11, 6, 0], [17, 16, 11, 6, 0], [18, 17, 16, 11, 6, 0], [18, 16, 11, 6, 0], [19, 18, 16, 11, 6, 0], [20, 19, 18, 16, 11, 6, 0], [19, 18, 16, 11, 6, 0], [21, 19, 18, 16, 11, 6, 0], [19, 18, 16, 11, 6, 0], [22, 19, 18, 16, 11, 6, 0], [19, 18, 16, 11, 6, 0], [18, 16, 11, 6, 0], [16, 11, 6, 0], [11, 6, 0], [6, 0], [23, 6, 0], [6, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [15, 16, 17, 18, 19, 20, 21, 22, 23], [16, 17, 18, 19, 20, 21, 22, 23], [17, 18, 19, 20, 21, 22, 23], [17, 18, 19, 20, 21, 22, 23], [17, 18, 19, 20, 21, 22, 23], [17, 18, 19, 20, 21, 22, 23], [17, 18, 19, 20, 21, 22, 23], [18, 19, 20, 21, 22, 23], [19, 20, 21, 22, 23], [19, 20, 21, 22, 23], [20, 21, 22, 23], [21, 22, 23], [21, 22, 23], [22, 23], [22, 23], [23], [23], [23], [23], [23], [23], [], [], []], "actions": [[0, -1], [0, -1], [1, 14], [0, -1], [2, 32], [0, -1], [1, 31], [0, -1], [0, -1], [1, 10], [1, 24], [0, -1], [0, -1], [0, -1], [1, 22], [2, 30], [2, 35], [0, -1], [0, -1], [1, 2], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 22], [1, 22], [1, 3], [1, 14], [0, -1], [0, -1], [1, 24], [0, -1], [0, -1], [2, 26], [0, -1], [2, 37], [0, -1], [2, 26], [2, 41], [2, 39], [2, 30], [2, 35], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [1, 2, 1], [0, 2, 0], [2, 3, 2], [0, 2, 0], [1, 4, 2], [0, 4, 0], [0, 5, 4], [1, 6, 5], [1, 6, 4], [0, 6, 0], [0, 7, 6], [0, 8, 7], [1, 9, 8], [2, 9, 7], [2, 7, 6], [0, 6, 0], [0, 10, 6], [1, 11, 10], [0, 11, 6], [0, 12, 11], [0, 13, 12], [0, 14, 13], [0, 15, 14], [1, 16, 15], [1, 16, 14], [1, 16, 13], [1, 16, 12], [0, 16, 11], [0, 17, 16], [1, 18, 17], [0, 18, 16], [0, 19, 18], [2, 20, 19], [0, 19, 18], [2, 21, 19], [0, 19, 18], [2, 22, 19], [2, 19, 18], [2, 18, 16], [2, 16, 11], [2, 11, 6], [0, 6, 0], [2, 23, 6], [2, 6, 0]]}} +{"orig_tokens": ["In", "percentage", "terms", ",", "however", ",", "the", "Dow", "'s", "dive", "was", "the", "12th-worst", "ever", "and", "the", "sharpest", "since", "the", "market", "fell", "156.83", ",", "or", "8", "%", ",", "a", "week", "after", "Black", "Monday", "."], "tokens": ["In", "", "", ",", "", ",", "the", "", "'s", "", "was", "the", "", "", "and", "the", "", "since", "the", "market", "", "", ",", "", "", "%", ",", "a", "", "after", "", "", "."], "token_ids": [21, 16, 6, 45, 13, 45, 40, 7, 33, 16, 56, 40, 1, 13, 92, 40, 16, 249, 40, 214, 16, 10, 45, 16, 10, 268, 45, 113, 16, 264, 7, 7, 62], "tags": ["IN", "NN", "NNS", ",", "RB", ",", "DT", "NNP", "POS", "NN", "VBD", "DT", "JJ", "RB", "CC", "DT", "JJS", "IN", "DT", "NN", "VBD", "CD", ",", "CC", "CD", "NN", ",", "DT", "NN", "IN", "NNP", "NNP", "."], "tree_str": "(S (PP (IN In) (NP (NN percentage) (NNS terms))) (, ,) (ADVP (RB however)) (, ,) (NP (NP (DT the) (NNP Dow) (POS 's)) (NN dive)) (VP (VBD was) (NP (NP (NP (DT the) (JJ 12th-worst)) (ADVP (RB ever))) (CC and) (NP (NP (DT the) (JJS sharpest)) (SBAR (IN since) (S (NP (DT the) (NN market)) (VP (VBD fell) (NP (NP (CD 156.83)) (, ,) (CC or) (NP (CD 8) (NN %))) (, ,) (PP (NP (DT a) (NN week)) (IN after) (NP (NNP Black) (NNP Monday))))))))) (. .))", "key": "sentence", "projective": true, "ASd": {"gold_stacks": [[0], [1, 0], [2, 1, 0], [3, 2, 1, 0], [3, 1, 0], [1, 0], [4, 1, 0], [5, 4, 1, 0], [6, 5, 4, 1, 0], [7, 6, 5, 4, 1, 0], [8, 7, 6, 5, 4, 1, 0], [8, 6, 5, 4, 1, 0], [9, 8, 6, 5, 4, 1, 0], [8, 6, 5, 4, 1, 0], [10, 8, 6, 5, 4, 1, 0], [10, 6, 5, 4, 1, 0], [11, 10, 6, 5, 4, 1, 0], [12, 11, 10, 6, 5, 4, 1, 0], [13, 12, 11, 10, 6, 5, 4, 1, 0], [13, 11, 10, 6, 5, 4, 1, 0], [13, 10, 6, 5, 4, 1, 0], [13, 6, 5, 4, 1, 0], [13, 5, 4, 1, 0], [13, 4, 1, 0], [13, 1, 0], [13, 0], [14, 13, 0], [13, 0], [15, 13, 0], [13, 0], [16, 13, 0], [17, 16, 13, 0], [17, 13, 0], [18, 17, 13, 0], [19, 18, 17, 13, 0], [20, 19, 18, 17, 13, 0], [20, 18, 17, 13, 0], [21, 20, 18, 17, 13, 0], [21, 18, 17, 13, 0], [22, 21, 18, 17, 13, 0], [23, 22, 21, 18, 17, 13, 0], [22, 21, 18, 17, 13, 0], [24, 22, 21, 18, 17, 13, 0], [22, 21, 18, 17, 13, 0], [25, 22, 21, 18, 17, 13, 0], [26, 25, 22, 21, 18, 17, 13, 0], [26, 22, 21, 18, 17, 13, 0], [22, 21, 18, 17, 13, 0], [21, 18, 17, 13, 0], [27, 21, 18, 17, 13, 0], [21, 18, 17, 13, 0], [28, 21, 18, 17, 13, 0], [29, 28, 21, 18, 17, 13, 0], [29, 21, 18, 17, 13, 0], [30, 29, 21, 18, 17, 13, 0], [30, 21, 18, 17, 13, 0], [31, 30, 21, 18, 17, 13, 0], [32, 31, 30, 21, 18, 17, 13, 0], [32, 30, 21, 18, 17, 13, 0], [30, 21, 18, 17, 13, 0], [21, 18, 17, 13, 0], [18, 17, 13, 0], [17, 13, 0], [13, 0], [33, 13, 0], [13, 0], [0]], "gold_buffers": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [25, 26, 27, 28, 29, 30, 31, 32, 33], [25, 26, 27, 28, 29, 30, 31, 32, 33], [26, 27, 28, 29, 30, 31, 32, 33], [27, 28, 29, 30, 31, 32, 33], [27, 28, 29, 30, 31, 32, 33], [27, 28, 29, 30, 31, 32, 33], [27, 28, 29, 30, 31, 32, 33], [28, 29, 30, 31, 32, 33], [28, 29, 30, 31, 32, 33], [29, 30, 31, 32, 33], [30, 31, 32, 33], [30, 31, 32, 33], [31, 32, 33], [31, 32, 33], [32, 33], [33], [33], [33], [33], [33], [33], [33], [], [], []], "actions": [[0, -1], [0, -1], [0, -1], [1, 22], [2, 30], [0, -1], [0, -1], [0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [2, 32], [0, -1], [1, 31], [0, -1], [0, -1], [0, -1], [1, 14], [1, 10], [1, 24], [1, 37], [1, 2], [1, 37], [1, 35], [0, -1], [2, 2], [0, -1], [2, 7], [0, -1], [0, -1], [1, 14], [0, -1], [0, -1], [0, -1], [1, 14], [0, -1], [1, 24], [0, -1], [0, -1], [2, 37], [0, -1], [2, 7], [0, -1], [0, -1], [1, 26], [2, 9], [2, 16], [0, -1], [2, 37], [0, -1], [0, -1], [1, 14], [0, -1], [1, 23], [0, -1], [0, -1], [1, 22], [2, 30], [2, 35], [2, 29], [2, 35], [2, 9], [0, -1], [2, 37], [2, 40]], "action_tuples": [[0, -1, -1], [0, 1, 0], [0, 2, 1], [1, 3, 2], [2, 3, 1], [0, 1, 0], [0, 4, 1], [0, 5, 4], [0, 6, 5], [0, 7, 6], [1, 8, 7], [0, 8, 6], [2, 9, 8], [0, 8, 6], [1, 10, 8], [0, 10, 6], [0, 11, 10], [0, 12, 11], [1, 13, 12], [1, 13, 11], [1, 13, 10], [1, 13, 6], [1, 13, 5], [1, 13, 4], [1, 13, 1], [0, 13, 0], [2, 14, 13], [0, 13, 0], [2, 15, 13], [0, 13, 0], [0, 16, 13], [1, 17, 16], [0, 17, 13], [0, 18, 17], [0, 19, 18], [1, 20, 19], [0, 20, 18], [1, 21, 20], [0, 21, 18], [0, 22, 21], [2, 23, 22], [0, 22, 21], [2, 24, 22], [0, 22, 21], [0, 25, 22], [1, 26, 25], [2, 26, 22], [2, 22, 21], [0, 21, 18], [2, 27, 21], [0, 21, 18], [0, 28, 21], [1, 29, 28], [0, 29, 21], [1, 30, 29], [0, 30, 21], [0, 31, 30], [1, 32, 31], [2, 32, 30], [2, 30, 21], [2, 21, 18], [2, 18, 17], [2, 17, 13], [0, 13, 0], [2, 33, 13], [2, 13, 0]]}} diff --git a/data/test.txt b/data/test.txt new file mode 100644 index 0000000..acfabbc --- /dev/null +++ b/data/test.txt @@ -0,0 +1,20 @@ +(S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)) +(S (CC But) (SBAR (IN while) (S (NP (DT the) (NNP New) (NNP York) (NNP Stock) (NNP Exchange)) (VP (VBD did) (RB n't) (VP (VB fall) (ADVP (RB apart)) (NP (NNP Friday)) (SBAR (IN as) (S (NP (DT the) (NNP Dow) (NNP Jones) (NNP Industrial) (NNP Average)) (VP (VBD plunged) (NP (NP (CD 190.58) (NNS points)) (PRN (: --) (NP (NP (JJS most)) (PP (IN of) (NP (PRP it))) (PP (IN in) (NP (DT the) (JJ final) (NN hour)))) (: --)))))))))) (NP (PRP it)) (ADVP (RB barely)) (VP (VBD managed) (S (VP (TO to) (VP (VB stay) (NP (NP (DT this) (NN side)) (PP (IN of) (NP (NN chaos)))))))) (. .)) +(S (NP (NP (DT Some) (`` ``) (NN circuit) (NNS breakers) ('' '')) (VP (VBN installed) (PP (IN after) (NP (DT the) (NNP October) (CD 1987) (NN crash))))) (VP (VBD failed) (NP (PRP$ their) (JJ first) (NN test)) (PRN (, ,) (S (NP (NNS traders)) (VP (VBP say))) (, ,)) (S (ADJP (JJ unable) (S (VP (TO to) (VP (VB cool) (NP (NP (DT the) (NN selling) (NN panic)) (PP (IN in) (NP (DT both) (NNS stocks) (CC and) (NNS futures)))))))))) (. .)) +(S (NP (NP (NP (DT The) (CD 49) (NN stock) (NN specialist) (NNS firms)) (PP (IN on) (NP (DT the) (NNP Big) (NNP Board) (NN floor)))) (: --) (NP (NP (DT the) (NNS buyers) (CC and) (NNS sellers)) (PP (IN of) (NP (JJ last) (NN resort))) (SBAR (WHNP (WP who)) (S (VP (VBD were) (VP (VBN criticized) (PP (IN after) (NP (DT the) (CD 1987) (NN crash)))))))) (: --)) (ADVP (RB once) (RB again)) (VP (MD could) (RB n't) (VP (VB handle) (NP (DT the) (NN selling) (NN pressure)))) (. .)) +(S (S (NP (JJ Big) (NN investment) (NNS banks)) (VP (VBD refused) (S (VP (TO to) (VP (VB step) (ADVP (IN up) (PP (TO to) (NP (DT the) (NN plate)))) (S (VP (TO to) (VP (VB support) (NP (DT the) (JJ beleaguered) (NN floor) (NNS traders)) (PP (IN by) (S (VP (VBG buying) (NP (NP (JJ big) (NNS blocks)) (PP (IN of) (NP (NN stock))))))))))))))) (, ,) (NP (NNS traders)) (VP (VBP say)) (. .)) +(S (NP (NP (JJ Heavy) (NN selling)) (PP (IN of) (NP (NP (NNP Standard) (CC &) (NNP Poor) (POS 's)) (JJ 500-stock) (NN index) (NNS futures))) (PP (IN in) (NP (NNP Chicago)))) (VP (ADVP (RB relentlessly)) (VBD beat) (NP (NNS stocks)) (ADVP (RB downward))) (. .)) +(S (NP (NP (CD Seven) (NNP Big) (NNP Board) (NNS stocks)) (: --) (NP (NP (NNP UAL)) (, ,) (NP (NNP AMR)) (, ,) (NP (NNP BankAmerica)) (, ,) (NP (NNP Walt) (NNP Disney)) (, ,) (NP (NNP Capital) (NNP Cities\/ABC)) (, ,) (NP (NNP Philip) (NNP Morris)) (CC and) (NP (NNP Pacific) (NNP Telesis) (NNP Group))) (: --)) (VP (VP (VBD stopped) (S (VP (VBG trading)))) (CC and) (VP (ADVP (RB never)) (VBD resumed))) (. .)) +(S (NP (DT The) (NN finger-pointing)) (VP (VBZ has) (ADVP (RB already)) (VP (VBN begun))) (. .)) +(S (`` ``) (NP (DT The) (NN equity) (NN market)) (VP (VBD was) (ADJP (JJ illiquid))) (. .)) +(SINV (S (ADVP (RB Once) (RB again)) (-LRB- -LCB-) (NP (DT the) (NNS specialists)) (-RRB- -RCB-) (VP (VBD were) (RB not) (ADJP (JJ able) (S (VP (TO to) (VP (VB handle) (NP (NP (DT the) (NNS imbalances)) (PP (IN on) (NP (NP (DT the) (NN floor)) (PP (IN of) (NP (DT the) (NNP New) (NNP York) (NNP Stock) (NNP Exchange)))))))))))) (, ,) ('' '') (VP (VBD said)) (NP (NP (NNP Christopher) (NNP Pedersen)) (, ,) (NP (NP (JJ senior) (NN vice) (NN president)) (PP (IN at) (NP (NNP Twenty-First) (NNP Securities) (NNP Corp))))) (. .)) +(SINV (VP (VBD Countered)) (NP (NP (NNP James) (NNP Maguire)) (, ,) (NP (NP (NN chairman)) (PP (IN of) (NP (NNS specialists) (NNP Henderson) (NNP Brothers) (NNP Inc.))))) (: :) (`` ``) (S (NP (PRP It)) (VP (VBZ is) (ADJP (JJ easy)) (S (VP (TO to) (VP (VB say) (SBAR (S (NP (DT the) (NN specialist)) (VP (VBZ is) (RB n't) (VP (VBG doing) (NP (PRP$ his) (NN job))))))))))) (. .)) +(S (SBAR (WHADVP (WRB When)) (S (NP (DT the) (NN dollar)) (VP (VBZ is) (PP (IN in) (NP (DT a) (NN free-fall)))))) (, ,) (NP (RB even) (JJ central) (NNS banks)) (VP (MD ca) (RB n't) (VP (VB stop) (NP (PRP it)))) (. .)) +(S (NP (NNS Speculators)) (VP (VBP are) (VP (VBG calling) (PP (IN for) (NP (NP (DT a) (NN degree)) (PP (IN of) (NP (NN liquidity))) (SBAR (WHNP (WDT that)) (S (VP (VBZ is) (RB not) (ADVP (RB there)) (PP (IN in) (NP (DT the) (NN market)))))))))) (. .) ('' '')) +(S (NP (NP (JJ Many) (NN money) (NNS managers)) (CC and) (NP (DT some) (NNS traders))) (VP (VBD had) (ADVP (RB already)) (VP (VBN left) (NP (PRP$ their) (NNS offices)) (NP (RB early) (NNP Friday) (NN afternoon)) (PP (IN on) (NP (DT a) (JJ warm) (NN autumn) (NN day))) (: --) (SBAR (IN because) (S (NP (DT the) (NN stock) (NN market)) (VP (VBD was) (ADJP (RB so) (JJ quiet))))))) (. .)) +(S (RB Then) (PP (IN in) (NP (DT a) (NN lightning) (NN plunge))) (, ,) (NP (DT the) (NNP Dow) (NNP Jones) (NNS industrials)) (PP (IN in) (NP (QP (RB barely) (DT an)) (NN hour))) (VP (VBD surrendered) (NP (NP (QP (RB about) (DT a)) (JJ third)) (PP (IN of) (NP (NP (PRP$ their) (NNS gains)) (NP (DT this) (NN year))))) (, ,) (S (VP (VBG chalking) (PRT (RP up)) (NP (NP (DT a) (ADJP (ADJP (JJ 190.58-point)) (, ,) (CC or) (ADJP (CD 6.9) (NN %)) (, ,)) (NN loss)) (PP (IN on) (NP (DT the) (NN day)))) (PP (IN in) (NP (JJ gargantuan) (NN trading) (NN volume)))))) (. .)) +(S (NP (JJ Final-hour) (NN trading)) (VP (VBD accelerated) (PP (TO to) (NP (NP (QP (CD 108.1) (CD million)) (NNS shares)) (, ,) (NP (NP (DT a) (NN record)) (PP (IN for) (NP (DT the) (NNP Big) (NNP Board))))))) (. .)) +(S (PP (IN At) (NP (NP (DT the) (NN end)) (PP (IN of) (NP (DT the) (NN day))))) (, ,) (NP (QP (CD 251.2) (CD million)) (NNS shares)) (VP (VBD were) (VP (VBN traded))) (. .)) +(S (NP (DT The) (NNP Dow) (NNP Jones) (NNS industrials)) (VP (VBD closed) (PP (IN at) (NP (CD 2569.26)))) (. .)) +(S (NP (NP (DT The) (NNP Dow) (POS 's)) (NN decline)) (VP (VBD was) (ADJP (JJ second) (PP (IN in) (NP (NN point) (NNS terms))) (PP (ADVP (RB only)) (TO to) (NP (NP (DT the) (JJ 508-point) (NNP Black) (NNP Monday) (NN crash)) (SBAR (WHNP (WDT that)) (S (VP (VBD occurred) (NP (NNP Oct.) (CD 19) (, ,) (CD 1987))))))))) (. .)) +(S (PP (IN In) (NP (NN percentage) (NNS terms))) (, ,) (ADVP (RB however)) (, ,) (NP (NP (DT the) (NNP Dow) (POS 's)) (NN dive)) (VP (VBD was) (NP (NP (NP (DT the) (JJ 12th-worst)) (ADVP (RB ever))) (CC and) (NP (NP (DT the) (JJS sharpest)) (SBAR (IN since) (S (NP (DT the) (NN market)) (VP (VBD fell) (NP (NP (CD 156.83)) (, ,) (CC or) (NP (CD 8) (NN %))) (, ,) (PP (NP (DT a) (NN week)) (IN after) (NP (NNP Black) (NNP Monday))))))))) (. .)) diff --git a/data/train.txt b/data/train.txt new file mode 100644 index 0000000..a794269 --- /dev/null +++ b/data/train.txt @@ -0,0 +1,20 @@ +(S (PP (IN In) (NP (NP (DT an) (NNP Oct.) (CD 19) (NN review)) (PP (IN of) (NP (`` ``) (NP (DT The) (NN Misanthrope)) ('' '') (PP (IN at) (NP (NP (NNP Chicago) (POS 's)) (NNP Goodman) (NNP Theatre))))) (PRN (-LRB- -LRB-) (`` ``) (S (NP (VBN Revitalized) (NNS Classics)) (VP (VBP Take) (NP (DT the) (NN Stage)) (PP (IN in) (NP (NNP Windy) (NNP City))))) (, ,) ('' '') (NP (NN Leisure) (CC &) (NNS Arts)) (-RRB- -RRB-)))) (, ,) (NP (NP (NP (DT the) (NN role)) (PP (IN of) (NP (NNP Celimene)))) (, ,) (VP (VBN played) (PP (IN by) (NP (NNP Kim) (NNP Cattrall)))) (, ,)) (VP (VBD was) (VP (ADVP (RB mistakenly)) (VBN attributed) (PP (TO to) (NP (NNP Christina) (NNP Haag))))) (. .)) +(S (NP (NNP Ms.) (NNP Haag)) (VP (VBZ plays) (NP (NNP Elianti))) (. .)) +(S (NP (NNP Rolls-Royce) (NNP Motor) (NNPS Cars) (NNP Inc.)) (VP (VBD said) (SBAR (S (NP (PRP it)) (VP (VBZ expects) (S (NP (PRP$ its) (NNP U.S.) (NNS sales)) (VP (TO to) (VP (VB remain) (ADJP (JJ steady)) (PP (IN at) (NP (QP (IN about) (CD 1,200)) (NNS cars))) (PP (IN in) (NP (CD 1990)))))))))) (. .)) +(S (NP (DT The) (NN luxury) (NN auto) (NN maker)) (NP (JJ last) (NN year)) (VP (VBD sold) (NP (CD 1,214) (NNS cars)) (PP (IN in) (NP (DT the) (NNP U.S.))))) +(S (NP (NP (NNP Howard) (NNP Mosher)) (, ,) (NP (NP (NN president)) (CC and) (NP (JJ chief) (NN executive) (NN officer))) (, ,)) (VP (VBD said) (SBAR (S (NP (PRP he)) (VP (VBZ anticipates) (NP (NP (NN growth)) (PP (IN for) (NP (DT the) (NN luxury) (NN auto) (NN maker))) (PP (PP (IN in) (NP (NNP Britain) (CC and) (NNP Europe))) (, ,) (CC and) (PP (IN in) (NP (ADJP (JJ Far) (JJ Eastern)) (NNS markets))))))))) (. .)) +(S (NP (NNP BELL) (NNP INDUSTRIES) (NNP Inc.)) (VP (VBD increased) (NP (PRP$ its) (NN quarterly)) (PP (TO to) (NP (CD 10) (NNS cents))) (PP (IN from) (NP (NP (CD seven) (NNS cents)) (NP (DT a) (NN share))))) (. .)) +(S (NP (DT The) (JJ new) (NN rate)) (VP (MD will) (VP (VB be) (ADJP (JJ payable) (NP (NNP Feb.) (CD 15))))) (. .)) +(S (NP (DT A) (NN record) (NN date)) (VP (VBZ has) (RB n't) (VP (VBN been) (VP (VBN set)))) (. .)) +(S (NP (NP (NNP Bell)) (, ,) (VP (VBN based) (PP (IN in) (NP (NNP Los) (NNP Angeles)))) (, ,)) (VP (VBZ makes) (CC and) (VBZ distributes) (NP (UCP (JJ electronic) (, ,) (NN computer) (CC and) (NN building)) (NNS products))) (. .)) +(S (NP (NNS Investors)) (VP (VBP are) (VP (VBG appealing) (PP (TO to) (NP (DT the) (NNPS Securities) (CC and) (NNP Exchange) (NNP Commission))) (S (RB not) (VP (TO to) (VP (VB limit) (NP (NP (PRP$ their) (NN access)) (PP (TO to) (NP (NP (NN information)) (PP (IN about) (NP (NP (NN stock) (NNS purchases) (CC and) (NNS sales)) (PP (IN by) (NP (JJ corporate) (NNS insiders))))))))))))) (. .)) +(S (S (NP (DT A) (NNP SEC) (NN proposal) (S (VP (TO to) (VP (VB ease) (NP (NP (NN reporting) (NNS requirements)) (PP (IN for) (NP (DT some) (NN company) (NNS executives)))))))) (VP (MD would) (VP (VB undermine) (NP (NP (DT the) (NN usefulness)) (PP (IN of) (NP (NP (NN information)) (PP (IN on) (NP (NN insider) (NNS trades))))) (PP (IN as) (NP (DT a) (JJ stock-picking) (NN tool))))))) (, ,) (NP (NP (JJ individual) (NNS investors)) (CC and) (NP (JJ professional) (NN money) (NNS managers))) (VP (VBP contend)) (. .)) +(S (NP (PRP They)) (VP (VBP make) (NP (DT the) (NN argument)) (PP (IN in) (NP (NP (NNS letters)) (PP (TO to) (NP (DT the) (NN agency))) (PP (IN about) (NP (NP (NN rule) (NNS changes)) (VP (VBD proposed) (NP (DT this) (JJ past) (NN summer))) (SBAR (WHNP (IN that)) (, ,) (S (PP (IN among) (NP (JJ other) (NNS things))) (, ,) (VP (MD would) (VP (VB exempt) (NP (JJ many) (JJ middle-management) (NNS executives)) (PP (IN from) (S (VP (VBG reporting) (NP (NP (NNS trades)) (PP (IN in) (NP (NP (PRP$ their) (JJ own) (NNS companies) (POS ')) (NNS shares)))))))))))))))) (. .)) +(S (NP (DT The) (VBN proposed) (NNS changes)) (ADVP (RB also)) (VP (MD would) (VP (VB allow) (S (NP (NNS executives)) (VP (TO to) (VP (VB report) (NP (NP (NNS exercises)) (PP (IN of) (NP (NNS options)))) (ADVP (ADVP (RBR later)) (CC and) (ADVP (RBR less) (RB often)))))))) (. .)) +(S (NP (NP (JJ Many)) (PP (IN of) (NP (DT the) (NNS letters)))) (VP (VBP maintain) (SBAR (IN that) (S (S (NP (NN investor) (NN confidence)) (VP (VBZ has) (VP (VBN been) (VP (ADVP (RB so)) (VBN shaken) (PP (IN by) (NP (DT the) (CD 1987) (NN stock) (NN market) (NN crash))))))) (: --) (CC and) (S (NP (DT the) (NNS markets)) (ADVP (RB already)) (VP (ADVP (RB so)) (VBN stacked) (PP (IN against) (NP (DT the) (JJ little) (NN guy))))) (: --) (SBAR (IN that) (S (NP (NP (DT any) (NN decrease)) (PP (IN in) (NP (NP (NN information)) (PP (IN on) (NP (NN insider-trading) (NNS patterns)))))) (VP (MD might) (VP (VB prompt) (S (NP (NNS individuals)) (VP (TO to) (VP (VB get) (ADVP (RB out) (PP (IN of) (NP (NNS stocks)))) (ADVP (RB altogether)))))))))))) (. .)) +(SINV (`` ``) (S (NP (DT The) (NNP SEC)) (VP (VBZ has) (ADVP (RB historically)) (VP (VBN paid) (NP (NN obeisance)) (PP (TO to) (NP (NP (DT the) (NN ideal)) (PP (IN of) (NP (DT a) (JJ level) (NN playing) (NN field)))))))) (, ,) ('' '') (VP (VBD wrote)) (NP (NP (NNP Clyde) (NNP S.) (NNP McGregor)) (PP (IN of) (NP (NP (NNP Winnetka)) (, ,) (NP (NNP Ill.)) (, ,)))) (PP (IN in) (NP (NP (CD one)) (PP (IN of) (NP (NP (DT the) (CD 92) (NNS letters)) (SBAR (S (NP (DT the) (NN agency)) (VP (VBZ has) (VP (VBN received) (SBAR (IN since) (S (NP (DT the) (NNS changes)) (VP (VBD were) (VP (VBN proposed) (NP (NNP Aug.) (CD 17)))))))))))))) (. .)) +(S (`` ``) (ADVP (RB Apparently)) (NP (DT the) (NN commission)) (VP (VBD did) (RB not) (ADVP (RB really)) (VP (VB believe) (PP (IN in) (NP (DT this) (NN ideal))))) (. .) ('' '')) +(S (ADVP (RB Currently)) (, ,) (NP (DT the) (NNS rules)) (VP (VBP force) (S (NP (NP (NNS executives)) (, ,) (NP (NNS directors)) (CC and) (NP (JJ other) (JJ corporate) (NNS insiders))) (VP (TO to) (VP (VB report) (NP (NP (NNS purchases) (CC and) (NNS sales)) (PP (IN of) (NP (NP (PRP$ their) (NNS companies) (POS ')) (NNS shares)))) (PP (IN within) (NP (NP (QP (IN about) (DT a)) (NN month)) (PP (IN after) (NP (DT the) (NN transaction))))))))) (. .)) +(S (CC But) (NP (NP (QP (IN about) (CD 25)) (NN %)) (PP (IN of) (NP (DT the) (NNS insiders)))) (, ,) (PP (VBG according) (PP (TO to) (NP (NNP SEC) (NNS figures)))) (, ,) (VP (VBP file) (NP (PRP$ their) (NNS reports)) (ADVP (RB late))) (. .)) +(SINV (S (NP (DT The) (NNS changes)) (VP (VBD were) (VP (VBN proposed) (PP (IN in) (NP (DT an) (NN effort) (S (VP (TO to) (VP (VP (VB streamline) (NP (JJ federal) (NN bureaucracy))) (CC and) (VP (VB boost) (NP (NP (NN compliance)) (PP (IN by) (NP (NP (DT the) (NNS executives)) (`` ``) (SBAR (WHNP (WP who)) (S (VP (VBP are) (ADVP (RB really)) (VP (VBG calling) (NP (DT the) (NNS shots)))))))))))))))))) (, ,) ('' '') (VP (VBD said)) (NP (NP (NNP Brian) (NNP Lane)) (, ,) (NP (NP (JJ special) (NN counsel)) (PP (IN at) (NP (NP (NP (NP (DT the) (NNP SEC) (POS 's)) (NN office)) (PP (IN of) (NP (NN disclosure) (NN policy)))) (, ,) (SBAR (WHNP (WDT which)) (S (VP (VBD proposed) (NP (DT the) (NNS changes))))))))) (. .)) +(S (S (S (NP (NP (NNS Investors)) (, ,) (NP (NN money) (NNS managers)) (CC and) (NP (JJ corporate) (NNS officials))) (VP (VBD had) (PP (IN until) (NP (NN today))) (S (VP (TO to) (VP (VB comment) (PP (IN on) (NP (DT the) (NNS proposals)))))))) (, ,) (CC and) (S (NP (DT the) (NN issue)) (VP (VBZ has) (VP (VBN produced) (NP (NP (JJR more) (NN mail)) (PP (IN than) (NP (NP (ADJP (RB almost) (DT any)) (JJ other) (NN issue)) (PP (IN in) (NP (NN memory)))))))))) (, ,) (NP (NNP Mr.) (NNP Lane)) (VP (VBD said)) (. .)) diff --git a/data/valid.txt b/data/valid.txt new file mode 100644 index 0000000..2d07eb2 --- /dev/null +++ b/data/valid.txt @@ -0,0 +1,20 @@ +(S (NP (NP (DT The) (NN economy) (POS 's)) (NN temperature)) (VP (MD will) (VP (VB be) (VP (VBN taken) (PP (IN from) (NP (JJ several) (NN vantage) (NNS points))) (NP (DT this) (NN week)) (, ,) (PP (IN with) (NP (NP (NNS readings)) (PP (IN on) (NP (NP (NN trade)) (, ,) (NP (NN output)) (, ,) (NP (NN housing)) (CC and) (NP (NN inflation))))))))) (. .)) +(S (NP (DT The) (ADJP (RBS most) (JJ troublesome)) (NN report)) (VP (MD may) (VP (VB be) (NP (NP (DT the) (NNP August) (NN merchandise) (NN trade) (NN deficit)) (ADJP (JJ due) (ADVP (IN out)) (NP (NN tomorrow)))))) (. .)) +(S (NP (DT The) (NN trade) (NN gap)) (VP (VBZ is) (VP (VBN expected) (S (VP (TO to) (VP (VB widen) (PP (TO to) (NP (QP (IN about) ($ $) (CD 9) (CD billion)))) (PP (IN from) (NP (NP (NNP July) (POS 's)) (QP ($ $) (CD 7.6) (CD billion))))))) (, ,) (PP (VBG according) (PP (TO to) (NP (NP (DT a) (NN survey)) (PP (IN by) (NP (NP (NNP MMS) (NNP International)) (, ,) (NP (NP (DT a) (NN unit)) (PP (IN of) (NP (NP (NNP McGraw-Hill) (NNP Inc.)) (, ,) (NP (NNP New) (NNP York)))))))))))) (. .)) +(S (NP (NP (NP (NNP Thursday) (POS 's)) (NN report)) (PP (IN on) (NP (DT the) (NNP September) (NN consumer) (NN price) (NN index)))) (VP (VBZ is) (VP (VBN expected) (S (VP (TO to) (VP (VB rise) (, ,) (SBAR (IN although) (ADVP (ADVP (RB not) (RB as) (RB sharply)) (PP (IN as) (NP (NP (DT the) (ADJP (CD 0.9) (NN %)) (NN gain)) (VP (VBN reported) (NP (NNP Friday)) (PP (IN in) (NP (DT the) (NN producer) (NN price) (NN index))))))))))))) (. .)) +(S (NP (DT That) (NN gain)) (VP (VBD was) (VP (VBG being) (VP (VBD cited) (PP (IN as) (NP (NP (DT a) (NN reason)) (SBAR (S (NP (DT the) (NN stock) (NN market)) (VP (VBD was) (ADVP (IN down)) (ADVP (RB early) (PP (IN in) (NP (NP (NNP Friday) (POS 's)) (NN session)))) (, ,) (SBAR (IN before) (S (NP (PRP it)) (VP (VBD got) (S (VP (VBN started) (PP (IN on) (NP (PRP$ its) (JJ reckless) (JJ 190-point) (NN plunge)))))))))))))))) (. .)) +(S (NP (NNS Economists)) (VP (VBP are) (VP (VBN divided) (PP (IN as) (PP (TO to) (SBAR (WHNP (WHADVP (WRB how) (JJ much)) (VBG manufacturing) (NN strength)) (S (NP (PRP they)) (VP (VBP expect) (S (VP (TO to) (VP (VB see) (PP (IN in) (NP (NP (NP (NNP September) (NNS reports)) (PP (IN on) (NP (NP (JJ industrial) (NN production)) (CC and) (NP (NN capacity) (NN utilization))))) (, ,) (ADJP (ADVP (RB also)) (JJ due) (NP (NN tomorrow))))))))))))))) (. .)) +(S (ADVP (RB Meanwhile)) (, ,) (NP (NP (NNP September) (NN housing) (NNS starts)) (, ,) (ADJP (JJ due) (NP (NNP Wednesday))) (, ,)) (VP (VBP are) (VP (VBN thought) (S (VP (TO to) (VP (VB have) (VP (VBN inched) (ADVP (RB upward)))))))) (. .)) +(SINV (S (`` ``) (NP (EX There)) (VP (VBZ 's) (NP (NP (DT a) (NN possibility)) (PP (IN of) (NP (NP (DT a) (NN surprise)) ('' '') (PP (IN in) (NP (DT the) (NN trade) (NN report)))))))) (, ,) (VP (VBD said)) (NP (NP (NNP Michael) (NNP Englund)) (, ,) (NP (NP (NN director)) (PP (IN of) (NP (NN research))) (PP (IN at) (NP (NNP MMS))))) (. .)) +(S (S (NP (NP (DT A) (NN widening)) (PP (IN of) (NP (DT the) (NN deficit)))) (, ,) (SBAR (IN if) (S (NP (PRP it)) (VP (VBD were) (VP (VBN combined) (PP (IN with) (NP (DT a) (ADJP (RB stubbornly) (JJ strong)) (NN dollar))))))) (, ,) (VP (MD would) (VP (VB exacerbate) (NP (NN trade) (NNS problems)))) (: --)) (CC but) (S (NP (DT the) (NN dollar)) (VP (VBD weakened) (NP (NNP Friday)) (SBAR (IN as) (S (NP (NNS stocks)) (VP (VBD plummeted)))))) (. .)) +(S (PP (IN In) (NP (DT any) (NN event))) (, ,) (NP (NP (NNP Mr.) (NNP Englund)) (CC and) (NP (JJ many) (NNS others))) (VP (VBP say) (SBAR (IN that) (S (NP (NP (DT the) (JJ easy) (NNS gains)) (PP (IN in) (S (VP (VBG narrowing) (NP (DT the) (NN trade) (NN gap)))))) (VP (VBP have) (ADVP (RB already)) (VP (VBN been) (VP (VBN made))))))) (. .)) +(S (`` ``) (S (NP (NN Trade)) (VP (VBZ is) (ADVP (RB definitely)) (VP (VBG going) (S (VP (TO to) (VP (VB be) (ADJP (RBR more) (RB politically) (JJ sensitive)) (PP (IN over) (NP (DT the) (JJ next) (QP (CD six) (CC or) (CD seven)) (NNS months))) (SBAR (IN as) (S (NP (NN improvement)) (VP (VBZ begins) (S (VP (TO to) (VP (VB slow))))))))))))) (, ,) ('' '') (NP (PRP he)) (VP (VBD said)) (. .)) +(S (S (NP (NNS Exports)) (VP (VBP are) (VP (VBN thought) (S (VP (TO to) (VP (VB have) (VP (VBN risen) (ADVP (ADVP (RB strongly) (PP (IN in) (NP (NNP August)))) (, ,) (CC but) (ADVP (ADVP (RB probably)) (RB not) (RB enough) (S (VP (TO to) (VP (VB offset) (NP (NP (DT the) (NN jump)) (PP (IN in) (NP (NNS imports)))))))))))))))) (, ,) (NP (NNS economists)) (VP (VBD said)) (. .)) +(S (NP (NP (NNS Views)) (PP (IN on) (NP (VBG manufacturing) (NN strength)))) (VP (VBP are) (ADJP (VBN split) (PP (IN between) (NP (NP (NP (NNS economists)) (SBAR (WHNP (WP who)) (S (VP (VBP read) (NP (NP (NP (NNP September) (POS 's)) (JJ low) (NN level)) (PP (IN of) (NP (NN factory) (NN job) (NN growth)))) (PP (IN as) (NP (NP (DT a) (NN sign)) (PP (IN of) (NP (DT a) (NN slowdown))))))))) (CC and) (NP (NP (DT those)) (SBAR (WHNP (WP who)) (S (VP (VBP use) (NP (DT the) (ADJP (RB somewhat) (JJR more) (VBG comforting)) (JJ total) (NN employment) (NNS figures)) (PP (IN in) (NP (PRP$ their) (NNS calculations))))))))))) (. .)) +(S (S (NP (NP (DT The) (JJ wide) (NN range)) (PP (IN of) (NP (NP (NNS estimates)) (PP (IN for) (NP (DT the) (JJ industrial) (NN output) (NN number)))))) (VP (VBZ underscores) (NP (DT the) (NNS differences)))) (: :) (S (NP (DT The) (NNS forecasts)) (VP (VBD run) (PP (IN from) (NP (NP (DT a) (NN drop)) (PP (IN of) (NP (CD 0.5) (NN %))))) (PP (TO to) (NP (NP (DT an) (NN increase)) (PP (IN of) (NP (CD 0.4) (NN %))))) (, ,) (PP (VBG according) (PP (TO to) (NP (NNP MMS)))))) (. .)) +(S (NP (NP (DT A) (NN rebound)) (PP (IN in) (NP (NN energy) (NNS prices))) (, ,) (SBAR (WHNP (WDT which)) (S (VP (VBD helped) (VP (VB push) (PRT (RP up)) (NP (DT the) (NN producer) (NN price) (NN index)))))) (, ,)) (VP (VBZ is) (VP (VBN expected) (S (VP (TO to) (VP (VB do) (NP (DT the) (JJ same)) (PP (IN in) (NP (DT the) (NN consumer) (NN price) (NN report)))))))) (. .)) +(S (NP (DT The) (NN consensus) (NN view)) (VP (VBZ expects) (NP (NP (DT a) (ADJP (CD 0.4) (NN %)) (NN increase)) (PP (IN in) (NP (DT the) (NNP September) (NNP CPI)))) (PP (IN after) (NP (NP (DT a) (JJ flat) (NN reading)) (PP (IN in) (NP (NNP August)))))) (. .)) +(S (NP (NP (NNP Robert) (NNP H.) (NNP Chandross)) (, ,) (NP (NP (DT an) (NN economist)) (PP (IN for) (NP (NP (NP (NNP Lloyd) (POS 's)) (NNP Bank)) (PP (IN in) (NP (NNP New) (NNP York)))))) (, ,)) (VP (VBZ is) (PP (IN among) (NP (NP (DT those)) (VP (VBG expecting) (NP (NP (DT a) (ADJP (RBR more) (JJ moderate)) (NN gain)) (PP (IN in) (NP (DT the) (NNP CPI))) (PP (IN than) (PP (IN in) (NP (NP (NNS prices)) (PP (IN at) (NP (DT the) (NN producer) (NN level))))))))))) (. .)) +(S (`` ``) (S (S (NP (NN Auto) (NNS prices)) (VP (VBD had) (NP (DT a) (JJ big) (NN effect)) (PP (IN in) (NP (DT the) (NNP PPI))))) (, ,) (CC and) (S (PP (IN at) (NP (DT the) (NNP CPI) (NN level))) (NP (PRP they)) (VP (MD wo) (RB n't)))) (, ,) ('' '') (NP (PRP he)) (VP (VBD said)) (. .)) +(SINV (S (S (NP (NN Food) (NNS prices)) (VP (VBP are) (VP (VBN expected) (S (VP (TO to) (VP (VB be) (ADJP (JJ unchanged)))))))) (, ,) (CC but) (S (NP (NN energy) (NNS costs)) (VP (VBD jumped) (NP (NP (RB as) (RB much) (IN as) (CD 4)) (NN %))))) (, ,) (VP (VBD said)) (NP (NP (NNP Gary) (NNP Ciminero)) (, ,) (NP (NP (NN economist)) (PP (IN at) (NP (NNP Fleet\/Norstar) (NNP Financial) (NNP Group))))) (. .)) +(S (NP (PRP He)) (ADVP (RB also)) (VP (VBZ says) (SBAR (S (NP (PRP he)) (VP (VBZ thinks) (SBAR (S (NP (`` ``) (NP (NN core) (NN inflation)) (, ,) ('' '') (SBAR (WHNP (WDT which)) (S (VP (VBZ excludes) (NP (DT the) (JJ volatile) (NN food) (CC and) (NN energy) (NNS prices))))) (, ,)) (VP (VBD was) (ADJP (JJ strong)) (NP (JJ last) (NN month))))))))) (. .)) diff --git a/src/architectures.py b/src/architectures.py new file mode 100644 index 0000000..c549db6 --- /dev/null +++ b/src/architectures.py @@ -0,0 +1,783 @@ +from utils import * +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import transition as transition_system +from sklearn.metrics import accuracy_score, f1_score +import experiment + + +class IncrementalProbe(nn.Module): + def __init__(self, args): + super(IncrementalProbe, self).__init__() + self.args = args + if args["oracle_params"]["name"]: + self.oracle = getattr(transition_system, args["oracle_params"]["name"])( + args["oracle_params"]["mappings_file"] + ) + else: + self.oracle = None + + self.add_root = args["add_root"] if "add_root" in args.keys() else None + self.embeddings_dropout_rate = ( + args["embeddings_dropout_rate"] + if "embeddings_dropout_rate" in args.keys() + else None + ) + self.layer_dropout_rate = ( + args["layer_dropout_rate"] if "layer_dropout_rate" in args.keys() else None + ) + self.checkpoint_path = ( + args["checkpoint_path"] if "checkpoint_path" in args.keys() else None + ) + self.num_layers = args["num_layers"] if "num_layers" in args.keys() else None + self.layer = args["layer"] if "layer" in args.keys() else None + self.pretrained_model = ( + args["pretrained_model"] if "pretrained_model" in args.keys() else None + ) + + self.vocab_size = len(self.oracle.a2i) + + self.probe_rank = self.model_dim = MODEL_DATA[self.pretrained_model][ + "feature_count" + ] + + self.root = nn.Parameter(data=torch.zeros(self.model_dim)) + self.nll = nn.NLLLoss(reduction="none") + + def add_root_distance_labels(self, batch): + depths_w_root = self.add_root_depth_labels(batch) + gold_distances = batch["gold_distances"].clone().to(self.device) + distances_w_root = torch.zeros( + gold_distances.shape[0], + gold_distances.shape[1] + 1, + gold_distances.shape[2] + 1, + device=self.device, + ) + distances_w_root[:, 1:, 1:] += gold_distances + distances_w_root[:, 0, :] += depths_w_root.clone() + distances_w_root[:, :, 0] += depths_w_root.clone() + return distances_w_root + + def add_root_depth_labels(self, batch): + gold_depths = batch["gold_depths"].clone().to(self.device) + + gold_depths += 1 + gold_depths[gold_depths == 0] = -1 + + depths_w_root = torch.zeros( + gold_depths.shape[0], gold_depths.shape[1] + 1, device=self.device + ) + depths_w_root[:, 1:] += gold_depths.clone().to(self.device) + return depths_w_root + + def add_root_model_embeddings(self, batch): + model_embeddings = batch["padded_embeddings"][:, 0, 1:, :].to(self.device) + embeddings_w_root = torch.zeros( + model_embeddings.shape[0], + model_embeddings.shape[1] + 1, + model_embeddings.shape[2], + device=self.device, + ) + embeddings_w_root[:, 1:, :] = model_embeddings.clone() + embeddings_w_root[:, 0, :] += self.root + return embeddings_w_root.unsqueeze(1) + + +class AttentionLayer(nn.Module): + def __init__(self, y_dim=512, x_dim=512): + super(AttentionLayer, self).__init__() + self.key = nn.Linear(y_dim, x_dim, bias=False) + self.query = nn.Linear(x_dim, x_dim, bias=False) + self.device = None + + def forward(self, x, y, masks=None, output_attentions=False): + self.device = next(self.parameters()).device + q = self.query(x) + k = self.key(y) + v = y + + w = torch.matmul(q, k.transpose(-1, -2)) + + w = torch.where( + masks.unsqueeze(2).bool(), w, torch.tensor(-1e10).to(self.device) + ) + + w = nn.Softmax(dim=-1)(w) + + return torch.matmul(w, v)[:, :, 0, :], w[:, :, 0, :] + + +class AttentiveProbe(IncrementalProbe): + def __init__(self, args): + super(AttentiveProbe, self).__init__(args) + IncrementalProbe.__init__(self, args) + self.reverse = args["reverse"] + self.continuous = args["continuous"] + self.rnn_type = args["rnn_type"] + self.num_layers = args["num_layers"] + self.emb_size = args["emb_size"] + self.state_size = args["state_size"] + self.vocab_size = len(self.oracle.a2i) + + self.embeddings_dropout = nn.Dropout(self.embeddings_dropout_rate) + self.layer_dropout = nn.Dropout(self.layer_dropout_rate) + self.encoder = nn.Embedding(self.vocab_size, self.emb_size) + + self.rnn = getattr(nn, self.rnn_type)( + self.emb_size, self.state_size, self.num_layers, dropout=0, batch_first=True + ) + + layers = [ + nn.Sequential( + nn.Linear(self.state_size + self.model_dim, self.state_size), nn.ReLU() + ) + for layer_idx in range(1) + ] + layers = layers + [nn.Linear(self.state_size, len(self.oracle.actions_list()))] + self.decoder = nn.Sequential(*layers) + + initrange = 0.1 + nn.init.uniform_(self.encoder.weight, -initrange, initrange) + + self.attn = AttentionLayer(y_dim=self.model_dim, x_dim=self.state_size) + + def forward(self, batch): + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :].to( + self.device + ) + + inpt = batch["action_ids"].to(self.device) + + models = model_embeddings.unsqueeze(1).repeat(1, inpt.shape[1], 1, 1) + models = self.embeddings_dropout(models) + + masks = batch["continuous_action_masks"].to(self.device) + + hidden = self.repackage_hidden(self.init_hidden(inpt.shape[0])) + emb = self.encoder(inpt) + output, hidden = self.rnn(emb, hidden) + context, attentions = self.attn(output.unsqueeze(2), models, masks) + context = self.layer_dropout(context) + + output = torch.cat((output, context), dim=-1) + decoded = self.decoder(output) + decoded = decoded.view(-1, len(self.oracle.actions_list())) + return F.log_softmax(decoded, dim=-1), hidden, attentions + + def batch_step_eval(self, batch): + self.device = next(self.parameters()).device + dists, hidden, attentions = self(batch) + + targets = batch["action_ids"].roll(-1, dims=-1).to(self.device) + targets[:, -1] = self.oracle.a2i["PAD"] + + mask = targets.flatten() - self.oracle.a2i["PAD"] + mask = mask.nonzero(as_tuple=True) + + loss = self.nll(dists[mask], targets.flatten()[mask]) + + predicted_actions = dists[mask].argmax(dim=-1).detach().cpu().numpy() + losses = { + "loss": loss.mean(), + "f1": torch.tensor( + f1_score( + predicted_actions, + targets.flatten()[mask].detach().cpu().numpy(), + average="macro", + ) + ), + "accuracy": torch.tensor( + accuracy_score( + predicted_actions, targets.flatten()[mask].detach().cpu().numpy() + ) + ), + "perplexity": torch.exp(loss.mean()), + } + + return losses + + def action_dists(self, batch): + masks = batch["continuous_action_masks"] + self.device = next(self.parameters()).device + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :] + + inpt = batch["action_ids"] + + models = model_embeddings.unsqueeze(1).repeat(1, inpt.shape[1], 1, 1) + models = self.embeddings_dropout(models) + + hidden = self.repackage_hidden(self.init_hidden(inpt.shape[0])) + emb = self.encoder(inpt) + output, hidden = self.rnn(emb, hidden) + context, attentions = self.attn(output.unsqueeze(2), models, masks) + output = torch.cat((output, context), dim=-1) + decoded = self.decoder(output) + + return F.log_softmax(decoded[:, -1], dim=-1) + + def batch_step_train(self, batch, deterministic_action_loss=False): + self.device = next(self.parameters()).device + dists, hidden, attentions = self(batch) + targets = batch["action_ids"].roll(-1, dims=-1) + targets[:, -1] = self.oracle.a2i["PAD"] + mask = targets.flatten() - self.oracle.a2i["PAD"] + mask = mask.nonzero(as_tuple=True) + loss = self.nll(dists[mask], targets.flatten()[mask]) + return {"loss": loss.mean()} + + def repackage_hidden(self, h): + """Wraps hidden states in new Tensors, to detach them from their history.""" + if isinstance(h, torch.Tensor): + return h.detach() + else: + return tuple(self.repackage_hidden(v) for v in h) + + def init_hidden(self, bsz): + weight = next(self.parameters()) + if self.rnn_type == "LSTM": + return ( + weight.new_zeros(self.num_layers, bsz, self.state_size), + weight.new_zeros(self.num_layers, bsz, self.state_size), + ) + else: + return weight.new_zeros(self.num_layers, bsz, self.state_size) + + +class StackActionProbe(IncrementalProbe): + def __init__(self, args): + super(IncrementalProbe, self).__init__() + IncrementalProbe.__init__(self, args) + self.input_size = MODEL_DATA[self.pretrained_model]["feature_count"] * 2 + self.num_layers = args["num_layers"] + layers = [ + nn.Sequential( + nn.Linear(self.input_size, self.input_size), + nn.ReLU(), + nn.Dropout(self.layer_dropout_rate), + ) + for layer_idx in range(self.num_layers - 1) + ] + layers = ( + [nn.Dropout(self.embeddings_dropout_rate)] + + layers + + [nn.Linear(self.input_size, len(self.oracle.actions_list()))] + ) + self.transform = nn.Sequential(*layers) + self.device = next(self.parameters()).device + + def forward(self, embeddings): + return self.transform(embeddings).log_softmax(-1) + + def batch_step_eval(self, batch): + self.device = next(self.parameters()).device + + if self.add_root: + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :].to( + self.device + ) + + oracle_action_idxs, targets = self.oracle.targets_idxs(batch) + + first_emb_indx, second_emb_indx = ( + oracle_action_idxs[[0, 1], :], + oracle_action_idxs[[0, 2], :], + ) + + emb_pairs = torch.cat( + (model_embeddings[first_emb_indx], model_embeddings[second_emb_indx]), dim=1 + ) + + output_distributions = self.forward(emb_pairs) + predicted_actions = output_distributions.argmax(dim=-1).detach().cpu().numpy() + + loss = self.nll( + output_distributions, torch.tensor(targets, device=self.device) + ).mean() + losses = { + "loss": loss, + "accuracy": torch.tensor(accuracy_score(predicted_actions, targets)), + "f1": torch.tensor(f1_score(predicted_actions, targets, average="macro")), + "perplexity": torch.exp(loss), + } + return losses + + def action_dists(self, batch): + self.device = next(self.parameters()).device + if self.add_root: + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :].to( + self.device + ) + + emb_pairs = torch.cat( + ( + model_embeddings[ + np.array( + [ + torch.arange(model_embeddings.shape[0]).cpu(), + batch["node1s"].cpu(), + ] + ) + ], + model_embeddings[ + np.array( + [ + torch.arange(model_embeddings.shape[0]).cpu(), + batch["node2s"].cpu(), + ] + ) + ], + ), + dim=1, + ) + + return self.forward(emb_pairs) + + def batch_step_train(self, batch): + self.device = next(self.parameters()).device + + if self.add_root: + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :].to( + self.device + ) + + oracle_action_idxs, targets = self.oracle.targets_idxs(batch) + + first_emb_indx, second_emb_indx = ( + oracle_action_idxs[[0, 1], :], + oracle_action_idxs[[0, 2], :], + ) + emb_pairs = torch.cat( + (model_embeddings[first_emb_indx], model_embeddings[second_emb_indx]), dim=1 + ) + + output_distributions = self.forward(emb_pairs) + return { + "loss": self.nll( + output_distributions, torch.tensor(targets, device=self.device) + ).mean() + } + + +class GeometricProbe(IncrementalProbe): + def __init__(self, args): + super(IncrementalProbe, self).__init__() + IncrementalProbe.__init__(self, args) + + self.loss_types = args["loss_types"] + self.verbose = args["verbose"] + self.threshold = args["threshold"] + self.temp = args["temp"] + self.num_layers = args["num_layers"] + + layers = [ + nn.Sequential( + nn.Linear(self.probe_rank, self.probe_rank, bias=False), + nn.ReLU(), + nn.Dropout(self.layer_dropout_rate), + ) + for layer_idx in range(self.num_layers - 1) + ] + layers = ( + [nn.Dropout(self.embeddings_dropout_rate)] + + layers + + [nn.Linear(self.probe_rank, self.probe_rank, bias=False)] + ) + self.transform = nn.Sequential(*layers) + if self.checkpoint_path: + print(f"Loading checkpoint from {self.checkpoint_path}") + check_probe = ( + experiment.IncrementalParseProbeExperiment.load_from_checkpoint( + self.checkpoint_path + ).probe + ) + self.transform = copy.deepcopy(check_probe.transform) + if self.add_root: + self.root = copy.deepcopy(check_probe.root) + self.device = next(self.parameters()).device + + def t_sigmoid(self, x, threshold=1.5, temp=0.1): + return torch.sigmoid((x - threshold) / (temp)).clamp(min=1e-7, max=1 - 1e-7) + + def p_shift(self, model_embeddings, temp, threshold): + return self.t_sigmoid( + self.distance_matrix(model_embeddings), threshold=self.threshold, temp=temp + ) + + def marginal_p_reduce(self, model_embeddings, temp): + return self.t_sigmoid( + self.depth_matrix(model_embeddings), threshold=0, temp=temp + ) + + def forward_distance(self, batch, add_root=True): + transformed = self.transform(batch) + batchlen, seqlen, rank = transformed.size() + transformed = transformed.unsqueeze(2) + transformed = transformed.expand(-1, -1, seqlen, -1) + transposed = transformed.transpose(1, 2) + diffs = transformed - transposed + squared_diffs = diffs.pow(2) + squared_distances = torch.sum(squared_diffs, -1) + return squared_distances + + def forward_depth(self, batch): + transformed = self.transform(batch) + batchlen, seqlen, rank = transformed.size() + norms = torch.bmm( + transformed.view(batchlen * seqlen, 1, rank), + transformed.view(batchlen * seqlen, rank, 1), + ) + norms = norms.view(batchlen, seqlen) + return norms + + def distance_matrix(self, batch): + return self.forward_distance(batch) + + def depth_matrix(self, batch): + predictions = self.forward_depth(batch) + return predictions[..., None] - predictions[..., None, :] + + def L1DistanceLoss(self, predictions, label_batch, length_batch): + """Computes L1 loss on distance matrices. + + Ignores all entries where label_batch=-1 + Normalizes first within sentences (by dividing by the square of the sentence length) + and then across the batch. + + Args: + predictions: A pytorch batch of predicted distances + label_batch: A pytorch batch of true distances + length_batch: A pytorch batch of sentence lengths + Returns: + A tuple of: + batch_loss: average loss in the batch + total_sents: number of sentences in the batch + """ + labels_1s = (label_batch != -1).float() + predictions_masked = predictions * labels_1s + labels_masked = label_batch * labels_1s + total_sents = torch.sum((length_batch != 0)).float() + squared_lengths = length_batch.pow(2).float() + if total_sents > 0: + loss_per_sent = torch.sum( + torch.abs(predictions_masked - labels_masked), dim=(1, 2) + ) + normalized_loss_per_sent = loss_per_sent / squared_lengths + batch_loss = torch.sum(normalized_loss_per_sent) / total_sents + else: + batch_loss = torch.tensor(0.0, device=self.device) + return batch_loss + + def L1DepthLoss(self, predictions, label_batch, length_batch): + """Computes L1 loss on depth sequences. + + Ignores all entries where label_batch=-1 + Normalizes first within sentences (by dividing by the sentence length) + and then across the batch. + + Args: + predictions: A pytorch batch of predicted depths + label_batch: A pytorch batch of true depths + length_batch: A pytorch batch of sentence lengths + Returns: + A tuple of: + batch_loss: average loss in the batch + total_sents: number of sentences in the batch + """ + total_sents = torch.sum(length_batch != 0).float() + labels_1s = (label_batch != -1).float() + predictions_masked = predictions * labels_1s + labels_masked = label_batch * labels_1s + if total_sents > 0: + loss_per_sent = torch.sum( + torch.abs(predictions_masked - labels_masked), dim=1 + ) + normalized_loss_per_sent = loss_per_sent / length_batch.float() + batch_loss = torch.sum(normalized_loss_per_sent) / total_sents + else: + batch_loss = torch.tensor(0.0, device=self.device) + return batch_loss + + def dist_spearmanr(self, predictions, label_batch, length_batch): + dist_lengths_to_spearmanrs = defaultdict(list) + for prediction, label, length in zip( + predictions.detach().cpu().numpy(), label_batch, length_batch + ): + length = int(length) + prediction = prediction[:length, :length] + label = label[:length, :length].cpu() + dist_spearmanrs = [ + spearmanr(pred, gold) for pred, gold in zip(prediction, label) + ] + dist_lengths_to_spearmanrs[length].extend( + [x.correlation for x in dist_spearmanrs] + ) + dist_mean_spearman_for_each_length = { + length: np.mean(dist_lengths_to_spearmanrs[length]) + for length in dist_lengths_to_spearmanrs + } + return np.mean( + [ + dist_mean_spearman_for_each_length[x] + for x in range(5, 51) + if x in dist_mean_spearman_for_each_length + ] + ) + + def dep_spearmanr(self, depth_predictions, depth_label_batch, depth_length_batch): + depth_lengths_to_spearmanrs = defaultdict(list) + for prediction, label, length in zip( + depth_predictions.detach().cpu().numpy(), + depth_label_batch, + depth_length_batch, + ): + length = int(length) + prediction = prediction[:length] + label = label[:length].cpu() + depth_sent_spearmanr = spearmanr(prediction, label) + depth_lengths_to_spearmanrs[length].append(depth_sent_spearmanr.correlation) + + depth_mean_spearman_for_each_length = { + length: np.mean(depth_lengths_to_spearmanrs[length]) + for length in depth_lengths_to_spearmanrs + } + return np.mean( + [ + depth_mean_spearman_for_each_length[x] + for x in range(5, 51) + if x in depth_mean_spearman_for_each_length + ] + ) + + def root_accuracy_spanning_tree( + self, depth_predictions, depth_label_batch, depth_length_batch, tags + ): + """Computes the root prediction accuracy and writes to disk. + For each sentence in the corpus, the root token in the sentence + should be the least deep + Args: + batch: A sequence of observations + """ + correct_root_predictions = 0 + total_sents = 0 + print(depth_label_batch.shape, "depth label batch shape") + print(depth_length_batch.shape, "depth length batch shape") + for tag, prediction, label, length in zip( + tags, + depth_predictions.detach().cpu().numpy(), + depth_label_batch, + depth_length_batch, + ): + length = int(length) + prediction = prediction[1 : length + 1] + + label = label[1 : length + 1].cpu().numpy().tolist() + poses = tag + + correct_root_predictions += label.index(1) == get_nopunct_argmin( + prediction, poses + ) + total_sents += 1 + return correct_root_predictions / float(total_sents) + + def uuas_spanning_tree(self, predictions, label_batch, length_batch, tags): + """Computes the UUAS score for a batch. + From the true and predicted distances, computes a minimum spanning tree + of each, and computes the percentage overlap between edges in all + predicted and gold trees.""" + uspan_correct = 0 + uspan_total = 0 + total_sents = 0 + for tag, prediction, label, length in zip( + tags, predictions.detach().cpu().numpy(), label_batch, length_batch + ): + length = int(length) + prediction = prediction[1 : length + 1, 1 : length + 1] + label = label[1 : length + 1, 1 : length + 1].cpu() + poses = tag + gold_edges = prims_matrix_to_edges(label, poses) + pred_edges = prims_matrix_to_edges(prediction, poses) + uspan_correct += len( + set([tuple(sorted(x)) for x in gold_edges]).intersection( + set([tuple(sorted(x)) for x in pred_edges]) + ) + ) + uspan_total += len(gold_edges) + total_sents += 1 + uuas = uspan_correct / float(uspan_total) + return uuas + + def batch_step_eval(self, batch): + if "lengths" in batch: + max_tok_length = batch["lengths"].max() + batch["padded_embeddings"] = batch["padded_embeddings"][ + :, :, : max_tok_length + 4, : + ] + batch["gold_distances"] = batch["gold_distances"][ + :, : max_tok_length + 4, : max_tok_length + 4 + ] + batch["gold_depths"] = batch["gold_depths"][:, : max_tok_length + 4] + + self.device = next(self.parameters()).device + + if self.add_root: + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :].to( + self.device + ) + + gold_distances = self.add_root_distance_labels(batch)[:, :-1, :-1].to( + self.device + ) + gold_depths = self.add_root_depth_labels(batch)[:, :-1].to(self.device) + + else: + model_embeddings = batch["padded_embeddings"][:, 0, 1:, :].to(self.device) + gold_distances = batch["gold_distances"][:, :-1, :-1].to(self.device) + gold_depths = batch["gold_depths"][:, :-1].to(self.device) + + lengths = batch["lengths"].to(self.device) + + losses = { + "L2": torch.linalg.norm(self.transform(model_embeddings)), + "temperature": torch.tensor(self.temp, device=self.device), + } + + distance_predictions = self.forward_distance(model_embeddings) + depth_predictions = self.forward_depth(model_embeddings) + losses["distance_mse"] = self.L1DistanceLoss( + distance_predictions, gold_distances, lengths + ) + losses["depth_mse"] = self.L1DepthLoss(depth_predictions, gold_depths, lengths) + + action_dists = self.oracle.action_dists( + self.p_shift(model_embeddings, temp=self.temp, threshold=self.threshold), + self.marginal_p_reduce(model_embeddings, temp=self.temp), + ) + oracle_action_idxs, targets = self.oracle.targets_idxs(batch) + + losses["oracle_action_nll"] = self.nll( + action_dists[oracle_action_idxs], torch.tensor(targets, device=self.device) + ).mean() + + predicted_actions = ( + action_dists[oracle_action_idxs].argmax(dim=-1).detach().cpu().numpy() + ) + + losses["f1"] = torch.tensor( + f1_score(predicted_actions, targets, average="macro") + ) + losses["accuracy"] = torch.tensor(accuracy_score(predicted_actions, targets)) + losses["perplexity"] = torch.exp(losses["oracle_action_nll"].detach()) + losses["uuas_spanning_tree"] = torch.tensor( + self.uuas_spanning_tree( + distance_predictions, gold_distances, lengths, batch["xpos"] + ) + ) + losses["root_accuracy_spanning_tree"] = torch.tensor( + self.root_accuracy_spanning_tree( + depth_predictions, gold_depths, lengths, batch["xpos"] + ) + ) + losses["dep_spearman"] = torch.tensor( + self.dep_spearmanr(depth_predictions, gold_depths, lengths) + ) + losses["dist_spearman"] = torch.tensor( + self.dist_spearmanr(distance_predictions, gold_distances, lengths) + ) + + losses["loss"] = sum(losses[loss_type] for loss_type in self.loss_types) + + for key in losses: + losses[key] = losses[key].detach() + + return losses + + def action_dists(self, batch): + self.device = next(self.parameters()).device + if self.add_root: + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :] + + action_dists = self.oracle.action_dists( + self.p_shift(model_embeddings, temp=self.temp, threshold=self.threshold), + self.marginal_p_reduce(model_embeddings, temp=self.temp), + ) + + return action_dists[ + np.array( + [ + torch.arange(model_embeddings.shape[0]).cpu(), + batch["node1s"].cpu(), + batch["node2s"].cpu(), + ] + ) + ] + + def batch_step_train(self, batch): + if "lengths" in batch: + max_tok_length = batch["lengths"].max() + batch["padded_embeddings"] = batch["padded_embeddings"][ + :, :, : max_tok_length + 4, : + ] + batch["gold_distances"] = batch["gold_distances"][ + :, : max_tok_length + 4, : max_tok_length + 4 + ] + batch["gold_depths"] = batch["gold_depths"][:, : max_tok_length + 4] + + self.device = next(self.parameters()).device + + if self.add_root: + model_embeddings = self.add_root_model_embeddings(batch)[:, 0, :, :].to( + self.device + ) + + if self.args["probe_name"] == "Geometric_Regression": + gold_distances = self.add_root_distance_labels(batch)[:, :-1, :-1].to( + self.device + ) + gold_depths = self.add_root_depth_labels(batch)[:, :-1].to(self.device) + lengths = batch["lengths"].to(self.device) + + else: + model_embeddings = batch["padded_embeddings"][:, 0, 1:, :].to(self.device) + if self.args["probe_name"] == "Geometric_Regression": + gold_distances = batch["gold_distances"][:, :-1, :-1].to(self.device) + gold_depths = batch["gold_depths"][:, :-1].to(self.device) + lengths = batch["lengths"].to(self.device) + + losses = { + "L2": torch.linalg.norm(self.transform(model_embeddings)), + "temperature": torch.tensor(self.temp, device=self.device), + } + + if self.args["probe_name"] == "Geometric_Regression": + distance_predictions = self.forward_distance(model_embeddings) + depth_predictions = self.forward_depth(model_embeddings) + losses["distance_mse"] = self.L1DistanceLoss( + distance_predictions, gold_distances, lengths + ) + losses["depth_mse"] = self.L1DepthLoss( + depth_predictions, gold_depths, lengths + ) + + else: + action_dists = self.oracle.action_dists( + self.p_shift( + model_embeddings, temp=self.temp, threshold=self.threshold + ), + self.marginal_p_reduce(model_embeddings, temp=self.temp), + ) + oracle_action_idxs, targets = self.oracle.targets_idxs(batch) + + losses["oracle_action_nll"] = self.nll( + action_dists[oracle_action_idxs], + torch.tensor(targets, device=self.device), + ).mean() + losses["loss"] = sum(losses[loss_type] for loss_type in self.loss_types) + + for key in losses: + if not key == "loss": + losses[key] = losses[key].detach() + + return losses diff --git a/src/datasets.py b/src/datasets.py new file mode 100644 index 0000000..16fbef7 --- /dev/null +++ b/src/datasets.py @@ -0,0 +1,500 @@ +import torch +from pathlib import Path +from typing import List, Optional +import numpy as np +from utils import * +from gpt2 import GPT2_extended + +from tqdm import tqdm +import json + +from torch.utils.data import DataLoader, Dataset +from pytorch_lightning import LightningDataModule +from transformers import AutoTokenizer, GPT2LMHeadModel +import task +from collections import namedtuple + + +class PTB_Split(Dataset): + def __init__(self, split=None, probe=None, config=None, gpt=None): + with torch.no_grad(): + self.root_dir = config["data_params"]["root_dir"] + self.data_path = f"{self.root_dir}/{split}.json" + self.config = config + self.oracle = probe.oracle + self.probe = probe + self.items, self.embs = [], [] + self.gpt = gpt + + if config["probe_params"]["layer"] == "all": + start, end = 0, MODEL_DATA[config["pretrained_model"]]["layer_count"] + else: + start, end = ( + config["probe_params"]["layer"], + config["probe_params"]["layer"] + 1, + ) + + self.observations = self.load_conll_dataset( + f"{self.root_dir}/{split}.conllx" + ) + + device = "cuda" + + ( + self.token_ids, + self.stacks, + self.bufs, + self.action_ids, + self.padded_action_ngrams, + self.embs, + self.gold_distances, + self.gold_depths, + self.lengths, + self.gold_tuples, + self.cont_mask, + self.xpos, + ) = [[] for _ in range(12)] + + with open(self.data_path) as f: + num_lines = len(f.readlines()) + + with open(self.data_path) as f: + batch_embs, batch_maps, batch_toks, count = [], [], [], 0 + for idx, line in tqdm( + enumerate(f), desc=f"loading {split} data", total=num_lines + ): + o = json.loads(line) + if o["key"] == "sentence": + if o["projective"]: + sent = " ".join(o["orig_tokens"]) + line = sent.strip() # Remove trailing characters + line = ( + self.gpt.tokenizer.bos_token + + line + + self.gpt.tokenizer.eos_token + ) + tokenized_text = self.gpt.tokenizer.tokenize(line) + untok_tok_mapping = self.gpt.match_tokenized_to_untokenized( + tokenized_text, line + ) + batch_maps.append(untok_tok_mapping) + batch_toks.append(tokenized_text) + count += 1 + + if count > 100 or idx == num_lines - 1: + lens = [len(x) for x in batch_toks] + max_len = max(lens) + + for i, tok in enumerate(batch_toks): + if len(tok) < max_len: + batch_toks[i].extend( + [self.gpt.tokenizer.eos_token] + * (max_len - len(batch_toks[i])) + ) + batch_embs = [ + torch.tensor( + [ + self.gpt.tokenizer.convert_tokens_to_ids( + tokenized_text + ) + ] + ).to(device) + for tokenized_text in batch_toks + ] + with torch.no_grad(): + encoded_layers = self.gpt.model( + torch.cat(batch_embs, dim=0), + output_hidden_states=True, + )["hidden_states"][start] + + for ind2, untok_tok_mapping in enumerate(batch_maps): + model_embeddings = encoded_layers[ind2].unsqueeze(0) + aligned_model_embeddings = torch.cat( + [ + torch.mean( + model_embeddings[ + :, + untok_tok_mapping[i][ + 0 + ] : untok_tok_mapping[i][-1] + + 1, + :, + ], + dim=1, + ) + for i, tok in enumerate( + untok_tok_mapping.keys() + ) + ] + ).unsqueeze(0) + aligned_model_embeddings = torch.cat( + ( + model_embeddings[:, 0:1, :], + aligned_model_embeddings, + model_embeddings[:, -1:, :].repeat( + 1, + self.config["data_params"]["token_pad"] + - aligned_model_embeddings.shape[1] + - 1, + 1, + ), + ), + dim=1, + ).unsqueeze(0) + assert ( + aligned_model_embeddings.shape[2] + == self.config["data_params"]["token_pad"] + ) # model_embeddings.shape[1]#len(untok_tok_mapping.keys())+2 + + # model_embeddings = align(encoded_layers[ind2].unsqueeze(0), b) + self.embs.append( + aligned_model_embeddings[:, 0, :, :].to("cpu") + ) + + batch_embs, batch_maps, batch_toks, count = ( + [], + [], + [], + 0, + ) + + if self.oracle: + action_ids = [ + i[0] for i in o[self.oracle.name]["actions"] + ] + action_ids = np.pad( + action_ids, + ( + 0, + self.config["data_params"]["action_pad"] + - len(action_ids), + ), + "constant", + constant_values=self.probe.oracle.a2i["PAD"], + ) + else: + action_ids = torch.tensor([-1]) + + if ( + "padded_action_ngrams" + in config["probe_params"]["data_sources"] + ): + padded_action_ngrams = conv_padded_ngrams( + self.probe.oracle.a2i, + action_ids, + action_ngram_pad=self.config["data_params"][ + "action_ngram_pad" + ], + token_pad=self.config["data_params"]["token_pad"], + ) + else: + padded_action_ngrams = torch.tensor([-1]) + + if ( + "continuous_action_masks" + in config["probe_params"]["data_sources"] + ): + mask = generate_continuous_mask( + action_ids, self.config["data_params"]["token_pad"] + ) + cont_mask = np.pad( + mask, + ( + ( + 0, + self.config["data_params"]["action_pad"] + - len(mask), + ), + (0, 0), + ), + "constant", + constant_values=-1, + ) + else: + cont_mask = torch.tensor([-1]) + + if "gold_stacks" in config["probe_params"]["data_sources"]: + stacks = o[self.oracle.name]["gold_stacks"] + stacks.extend( + [[0]] + * ( + self.config["data_params"]["action_pad"] + - len(stacks) + ) + ) + stacks = np.array( + [ + i + + [0] + * ( + self.config["data_params"]["token_pad"] + - len(i) + ) + for i in stacks + ] + ) + else: + stacks = torch.tensor([-1]) + + if "gold_buffers" in config["probe_params"]["data_sources"]: + bufs = o[self.oracle.name]["gold_buffers"] + bufs.extend( + [[0]] + * ( + self.config["data_params"]["action_pad"] + - len(bufs) + ) + ) + bufs = np.array( + [ + i + + [0] + * ( + self.config["data_params"]["token_pad"] + - len(i) + ) + for i in bufs + ] + ) + else: + bufs = torch.tensor([-1]) + + if "gold_tuples" in config["probe_params"]["data_sources"]: + gold_tuples = o[self.oracle.name]["action_tuples"] + gold_tuples.extend( + [[-1]] + * ( + self.config["data_params"]["action_pad"] + - len(gold_tuples) + ) + ) + gold_tuples = np.array( + [ + i + + [-1] + * ( + self.config["data_params"]["token_pad"] + - len(i) + ) + for i in gold_tuples + ] + ) + else: + gold_tuples = torch.tensor([-1]) + + if ( + "gold_distances" + in config["probe_params"]["data_sources"] + ): + gold_distances = task.ParseDistanceTask.labels( + self.observations[idx] + ) + gold_distances = np.pad( + gold_distances, + ( + ( + 0, + config["data_params"]["token_pad"] + - len(gold_distances), + ), + ( + 0, + config["data_params"]["token_pad"] + - len(gold_distances), + ), + ), + "constant", + constant_values=-1, + ) + else: + gold_distances = torch.tensor([-1]) + + if "gold_depths" in config["probe_params"]["data_sources"]: + gold_depths = task.ParseDepthTask.labels( + self.observations[idx] + ) + gold_depths = np.pad( + gold_depths, + ( + 0, + config["data_params"]["token_pad"] + - len(gold_depths), + ), + "constant", + constant_values=-1, + ) + else: + gold_depths = torch.tensor([-1]) + + if "token_ids" in config["probe_params"]["data_sources"]: + token_ids = np.pad( + o["token_ids"], + ( + 0, + self.config["data_params"]["token_pad"] + - len(o["token_ids"]), + ), + "constant", + constant_values=0, + ) + else: + token_ids = torch.tensor([-1]) + + if "xpos" in config["probe_params"]["data_sources"]: + xpos = np.pad( + [XPOS2IDX[t] for t in o["tags"]], + ( + 0, + self.config["data_params"]["token_pad"] + - len(o["tags"]), + ), + "constant", + constant_values=XPOS2IDX["."], + ) + else: + xpos = torch.tensor([-1]) + + self.token_ids.append(token_ids) + self.stacks.append(stacks) + self.bufs.append(bufs) + self.action_ids.append(action_ids) + self.padded_action_ngrams.append(padded_action_ngrams) + self.gold_distances.append(gold_distances) + self.gold_depths.append(gold_depths) + self.lengths.append(len(o["orig_tokens"])) + self.gold_tuples.append(gold_tuples) + self.cont_mask.append(cont_mask) + self.xpos.append(xpos) + + if config["data_params"][split]["dry_run"]: + if ( + len(self.embs) + >= config["data_params"][split]["dry_run"] + ): + break + self.gpt = None + + def generate_lines_for_sent(self, lines): + """Yields batches of lines describing a sentence in conllx. + Args: + lines: Each line of a conllx file. + Yields: + a list of lines describing a single sentence in conllx. + """ + buf = [] + for line in lines: + if line.startswith("#"): + continue + if not line.strip(): + if buf: + yield buf + buf = [] + else: + continue + else: + buf.append(line.strip()) + if buf: + yield buf + + def load_conll_dataset(self, filepath): + """Reads in a conllx file; generates Observation objects + + For each sentence in a conllx file, generates a single Observation + object. + Args: + filepath: the filesystem path to the conll dataset + + Returns: + A list of Observations + """ + observation_class = namedtuple( + "Observation", + [ + "index", + "sentence", + "lemma_sentence", + "upos_sentence", + "xpos_sentence", + "morph", + "head_indices", + "governance_relations", + "secondary_relations", + "extra_info", + "embeddings", + ], + ) + + observations = [] + lines = (x for x in open(filepath)) + for buf in self.generate_lines_for_sent(lines): + conllx_lines = [] + for line in buf: + conllx_lines.append(line.strip().split("\t")) + embeddings = [None for x in range(len(conllx_lines))] + observation = observation_class(*zip(*conllx_lines), embeddings) + observations.append(observation) + return observations + + def __len__(self): + return len(self.embs) + + def __getitem__(self, idx): + return [ + self.token_ids[idx], + self.stacks[idx], + self.bufs[idx], + self.action_ids[idx], + self.padded_action_ngrams[idx], + self.embs[idx], + self.gold_distances[idx], + self.gold_depths[idx], + self.lengths[idx], + self.gold_tuples[idx], + self.cont_mask[idx], + self.xpos[idx], + ] + + +class PTB_Dataset(LightningDataModule): + def __init__(self, config=None, probe=None): + super().__init__() + self.config = config + device = "cuda" + self.probe = probe + tokenizer = AutoTokenizer.from_pretrained( + config["pretrained_model"], local_files_only=True + ) + model = ( + GPT2LMHeadModel.from_pretrained( + config["pretrained_model"], local_files_only=True + ) + .to(device) + .eval() + ) + self.gpt = GPT2_extended(model=model, tokenizer=tokenizer, tail=None) + for param in self.gpt.parameters(): + param.requires_grad = False + + def setup(self, stage: Optional[str] = None) -> None: + self.train_dataset, self.valid_dataset, self.test_dataset = [ + PTB_Split(split=split, probe=self.probe, config=self.config, gpt=self.gpt) + for split in ["train", "valid", "test"] + ] + + def produce_dataloader(self, split): + return DataLoader( + self.__dict__[f"{split}_dataset"], + batch_size=self.config["data_params"][split]["batch_size"], + num_workers=self.config["data_params"]["num_workers"], + shuffle=self.config["data_params"][split]["shuffle"], + pin_memory=self.config["data_params"]["pin_memory"], + ) + + def train_dataloader(self) -> DataLoader: + return self.produce_dataloader("train") + + def val_dataloader(self) -> DataLoader: + return self.produce_dataloader("valid") + + def test_dataloader(self) -> DataLoader: + return self.produce_dataloader("test") diff --git a/src/experiment.py b/src/experiment.py new file mode 100644 index 0000000..f151759 --- /dev/null +++ b/src/experiment.py @@ -0,0 +1,49 @@ +import torch.optim as optim +import pytorch_lightning as pl + +class IncrementalParseProbeExperiment(pl.LightningModule): + def __init__(self, probe=None, params: dict = None) -> None: + super(IncrementalParseProbeExperiment, self).__init__() + self.save_hyperparameters() + self.probe = probe + self.params = params + self.curr_device = 'cuda' + self.hold_graph = False + try: self.hold_graph = self.params['retain_first_backpass'] + except: pass + + def format_batch(self, batch): + token_ids, gold_stacks, gold_buffers, action_ids, padded_action_ngrams, padded_embeddings, gold_distances, gold_depths ,lengths, gold_tuples, cont_masks, xpos = batch + return {'token_ids': token_ids.to('cuda'), #batch_size x token_pad + 'gold_stacks': gold_stacks.to('cuda'), #batch_size x token_pad + 'gold_buffers': gold_buffers.to('cuda'), #batch_size x token_pad + 'action_ids': action_ids.to('cuda'), #batch_size x action_pad + 'padded_action_ngrams': padded_action_ngrams.to('cuda'),#batch_size x token_pad x action_ngram_pad + 'padded_embeddings': padded_embeddings.to('cuda'), #batch_size x model_layers x token_pad x feature_count + 'gold_distances': gold_distances.to('cuda'), #matrix of distances (batch_size x token_pad x token_pad) + 'gold_depths': gold_depths.to('cuda'), #matrix of depths (batch_size x token_pad x token_pad) + 'lengths': lengths.to('cuda'), + 'gold_tuples': gold_tuples.to('cuda'), + 'continuous_action_masks': cont_masks.to('cuda'), + 'xpos': xpos.to('cuda') # xpos for evaluation + } + + def training_step(self, batch, batch_idx, optimizer_idx = 0): + train_loss = self.probe.batch_step_train(self.format_batch(batch)) + self.log_dict({key: val.item() for key, val in train_loss.items()}, sync_dist=True) + return train_loss['loss'] + + def validation_step(self, batch, batch_idx, optimizer_idx = 0): + val_loss = self.probe.batch_step_eval(self.format_batch(batch)) + val_loss['loss'] = val_loss['loss'].detach() + self.log_dict({f"val_{key}": val.item() for key, val in val_loss.items()}, sync_dist=True) + + def on_validation_end(self) -> None: return None + + def configure_optimizers(self): + optimizer = getattr(optim, self.params['optimizer_type'])(filter(lambda p: p.requires_grad, self.probe.parameters()), **self.params['optimizer_params']) + scheduler = getattr(optim.lr_scheduler, self.params['scheduler_type'])(optimizer, **self.params['scheduler_params']) + if self.params['scheduler_type'] == 'ReduceLROnPlateau': return {'optimizer': optimizer, 'lr_scheduler': scheduler, 'monitor': 'val_loss'} + else: return {'optimizer': optimizer, 'lr_scheduler': scheduler} + + diff --git a/src/gpt2.py b/src/gpt2.py new file mode 100644 index 0000000..27d245e --- /dev/null +++ b/src/gpt2.py @@ -0,0 +1,637 @@ +import torch.nn as nn +import torch +from collections import defaultdict +from torch import optim +from queue import PriorityQueue +from utils import * +from itertools import count + +# torch won't bp through time in eval mode unless we set: +torch.backends.cudnn.enabled = False +torch.backends.cuda.matmul.allow_tf32 = False +torch.backends.cudnn.allow_tf32 = False +class ClozeTail_gpt2(nn.Module): + def __init__(self, cloze_model, layer_idx): + super(ClozeTail_gpt2, self).__init__() + self.last_layer = cloze_model.lm_head + + def forward(self, x): + transformer_output = self.transformer(x)[0] + return transformer_output + + +class GPT2_extended(nn.Module): + def __init__(self, model=None, tokenizer=None, tail=None): + super(GPT2_extended, self).__init__() + + self.model = model + self.tokenizer = tokenizer + self.model.eval() + self.tail = tail + + for param in self.model.parameters(): + param.requires_grad = False + + def tail_by_layer(self, layer, x): + if layer < self.model.config.n_layer: + tl = ClozeTail_gpt2(self.model, layer) + tl.eval() + return tl(x) + else: + return self.model.lm_head(x) + + def embeddings_w_map(self, sentence, layer): + untokenized_sent = sentence.split() + tokenized_sent = self.tokenizer.tokenize( + self.tokenizer.bos_token + sentence + self.tokenizer.eos_token + ) + tokens_tensor = self.tokenizer.encode( + self.tokenizer.bos_token + sentence + self.tokenizer.eos_token, + return_tensors="pt", + ).to(self.model.device) + output = self.model(tokens_tensor, output_hidden_states=True) + model_embeddings = output["hidden_states"][layer].detach() + + original_embeddings = model_embeddings.detach().clone().to(self.model.device) + + untok_tok_mapping = self.match_tokenized_to_untokenized( + tokenized_sent, untokenized_sent + ) + + return original_embeddings, untok_tok_mapping + + def align(self, model_embeddings, untok_tok_mapping): + aligned_model_embeddings = torch.cat( + [ + torch.mean( + model_embeddings[ + :, untok_tok_mapping[i][0] : untok_tok_mapping[i][-1] + 1, : + ], + dim=1, + ) + for i, tok in enumerate(untok_tok_mapping.keys()) + ] + ).unsqueeze(0) + + aligned_model_embeddings = torch.cat( + ( + model_embeddings[:, 0:1, :], + aligned_model_embeddings, + model_embeddings[:, -1:, :], + ), + dim=1, + ).unsqueeze(0) + + assert aligned_model_embeddings.shape[2] == len(untok_tok_mapping.keys()) + 2 + + return aligned_model_embeddings + + def match_tokenized_to_untokenized(self, tokenized_sent, untokenized_sent): + """Aligns tokenized and untokenized sentence given subwords "##" prefixed + Assuming that each subword token that does not start a new word is prefixed + by two hashes, "##", computes an alignment between the un-subword-tokenized + and subword-tokenized sentences. + Args: + tokenized_sent: a list of strings describing a subword-tokenized sentence + untokenized_sent: a list of strings describing a sentence, no subword tok. + Returns: + A dictionary of type {int: list(int)} mapping each untokenized sentence + index to a list of subword-tokenized sentence indices + """ + # avoiding |eos| + tokenized_sent = tokenized_sent[:-1] + mapping = defaultdict(list) + untokenized_sent_index = 0 + # avoiding |bos| + tokenized_sent_index = 1 + while untokenized_sent_index < len( + untokenized_sent + ) and tokenized_sent_index < len(tokenized_sent): + while tokenized_sent_index + 1 < len(tokenized_sent) and not tokenized_sent[ + tokenized_sent_index + 1 + ].startswith("Ġ"): + mapping[untokenized_sent_index].append(tokenized_sent_index) + tokenized_sent_index += 1 + mapping[untokenized_sent_index].append(tokenized_sent_index) + untokenized_sent_index += 1 + tokenized_sent_index += 1 + return mapping + + def gen_counterfactuals( + self, + probe=None, + sent=None, + label_batch=None, + num_steps=500000, + patience=10000, + verbose=True, + loss_tolerance=0.05, + lr=0.0001, + print_every=5000, + prefix_freebits=1, + lastword_freebits=1, + kl_weight=1, + scheduler_patience=100, + compute_kl=True, + ): + probe.eval() + untokenized_sent = sent.split() + tokenized_sent = self.tokenizer.tokenize( + self.tokenizer.bos_token + sent + self.tokenizer.eos_token + ) + tokens_tensor = self.tokenizer.encode( + self.tokenizer.bos_token + sent + self.tokenizer.eos_token, + return_tensors="pt", + ).to(self.model.device) + model_embeddings = self.model(tokens_tensor, output_hidden_states=True)[ + "hidden_states" + ][probe.layer].detach() + original_embeddings = ( + model_embeddings.detach().clone().unsqueeze(0).to(self.model.device) + ) + model_embeddings = model_embeddings.unsqueeze(0).repeat( + label_batch["gold_tuples"].shape[0], 1, 1, 1 + ) + untok_tok_mapping = self.match_tokenized_to_untokenized( + tokenized_sent, untokenized_sent + ) + + model_embeddings.requires_grad = True + optimizer = torch.optim.Adam([model_embeddings], lr=lr) + scheduler = optim.lr_scheduler.ReduceLROnPlateau( + optimizer, mode="min", factor=0.1, patience=scheduler_patience + ) + + prediction_loss = 100 # Initialize the prediction loss as high + increment_idx = 0 + + smallest_loss = prediction_loss + steps_since_best = 0 + # print(prediction_loss > loss_tolerance) + while prediction_loss > loss_tolerance: + if increment_idx >= num_steps: + if verbose: + print("Breaking because of increment index") + break + + if increment_idx % print_every == 0 and verbose: + print(f"=========== step {increment_idx} ===========") + + if model_embeddings.shape[1] == len(untokenized_sent) + 2: + aligned_model_embeddings = model_embedding # s.unsqueeze(0) + + else: + assert model_embeddings.shape[2] == len(tokenized_sent) + + aligned_model_embeddings = torch.cat( + [ + torch.mean( + model_embeddings[ + :, + :, + untok_tok_mapping[i][0] : untok_tok_mapping[i][-1] + 1, + :, + ], + dim=2, + ) + for i, tok in enumerate(untokenized_sent) + ], + dim=1, + ).unsqueeze(1) + + aligned_model_embeddings = torch.cat( + ( + model_embeddings[:, :, 0:1, :], + aligned_model_embeddings, + model_embeddings[:, :, -1:, :], + ), + dim=2, + ) # .unsqueeze(0) + + assert aligned_model_embeddings.shape[2] == len(untokenized_sent) + 2 + + batch = { + "padded_embeddings": aligned_model_embeddings, + "gold_tuples": label_batch["gold_tuples"].clone(), + "action_ids": label_batch["action_ids"].clone(), + "continuous_action_masks": label_batch[ + "continuous_action_masks" + ].clone(), + } + + loss_dict = probe.batch_step_train(batch) + loss = loss_dict["loss"] + prediction_loss = loss.clone().detach() + if increment_idx == 0: + initial_loss = loss.clone().detach() + + """kldivloss""" + if compute_kl and kl_weight > 0: + print("computing kl") + postperturb_logits = self.tail(aligned_model_embeddings[0]) + + prefix_kl_loss = ( + F.kl_div( + preperturb_logits[:, :-2, :].log_softmax(-1), + postperturb_logits[:, :-2, :].log_softmax(-1), + size_average=None, + reduce=False, + log_target=True, + ) + .sum(-1) + .squeeze() + ) + + last_word_kl_loss = ( + F.kl_div( + preperturb_logits[:, -2:-1, :].log_softmax(-1), + postperturb_logits[:, -2:-1, :].log_softmax(-1), + size_average=None, + reduce=False, + log_target=True, + ) + .sum(-1) + .squeeze() + ) + + # output_kl_loss_mean = output_kl_loss.sum()/mask_mask.sum() + + loss += kl_weight * ( + torch.abs(last_word_kl_loss.mean() - lastword_freebits) + + torch.abs(prefix_kl_loss.mean() - prefix_freebits) + ) + if increment_idx % print_every == 0 and verbose: + print( + f"abs(last_word_kl - fb): {torch.abs(last_word_kl_loss.mean() - lastword_freebits).detach()}" + ) + print( + f"abs(prefix_kl - fb): {torch.abs(prefix_kl_loss.mean() - prefix_freebits).detach()}" + ) + """""" + + loss.backward() + # adwf + optimizer.step() + scheduler.step(loss) + + if increment_idx % print_every == 0 and verbose: + print(f"steps_since_best: {steps_since_best}") + print(f"total_loss: {loss.detach()}") + print("==============================") + print() + + if (smallest_loss - prediction_loss) > 0.001: + best_embeddings = model_embeddings.detach().clone() + steps_since_best = 0 + smallest_loss = prediction_loss + + else: + steps_since_best += 1 + # if steps_since_best == patience/2: + if steps_since_best == patience and verbose: + print("Breaking because of patience with loss", smallest_loss) + break + increment_idx += 1 + if verbose: + print(f"Exited grad update loop after {increment_idx} steps, ") + + return { + "padded_embeddings": best_embeddings, + "original_embeddings": original_embeddings[0], + "output_logits": None, + "original_logits": None, + "cfx_loss": prediction_loss.item(), + "initial_loss": initial_loss.item(), + } + + def parse_beamsearch( + self, + probe=None, + sentence=None, + generative=False, + topk=30, + ncont=5, + ): + """ + Beam search decoding + inputs: probe - IncrementalParse Probe + outputs: [(score, parsestate) x beam_width] + """ + probe.eval().to(self.model.device) + init_parserstate = probe.oracle.initial_state() + + original_model_embeddings, untok_tok_mapping = self.embeddings_w_map( + sentence, probe.layer + ) + original_model_embeddings = self.align( + original_model_embeddings, untok_tok_mapping + ) + init_parserstate.model_embeddings = original_model_embeddings + sentence_tokens = self.tokenizer.encode( + self.tokenizer.bos_token + sentence + self.tokenizer.eos_token, + return_tensors="pt", + ).to(self.model.device)[0] + self.model.device + endstates = [] + states = PriorityQueue() + state_count = count() + states.put((0, next(state_count), init_parserstate)) + + sentence_len = len(sentence.split()) + + while True: + next_states = [] + while states.qsize(): + if len(next_states) >= topk: + break + score, _, state = states.get() + + ngram_init_state = state + ngram_beam_width = ncont // 10 + ngram_topk = ncont + probe.eval() + # Number of ngrams to generate + ngram_endstates = [] + ngram_states = PriorityQueue() + ngram_states.put((0, ngram_init_state)) + # from itertools import count + + while True: + ngram_pruned_queue = PriorityQueue() + ngram_state_model_embeddings = [] + ngram_node1s = [] + ngram_node2s = [] + ngram_action_ids = [] + ngram_continuous_action_masks = [] + # prune to the topl + for i in range(ngram_topk): + if ngram_states.qsize(): + ngram_score, ngram_state = ngram_states.get() + # check if state has reached a shift or is terminal and check if we have the desired number of states + # and state batch data to meta batch + # only add to the batch if there is an action to predict + if len(ngram_state.stack) > 1: + ngram_state_batch = ngram_state.to_batch(probe) + ngram_state_model_embeddings.append( + ngram_state_batch["padded_embeddings"] + ) + ngram_node1s.append(ngram_state.stack[0]) + ngram_node2s.append(ngram_state.stack[1]) + ngram_action_ids.append(ngram_state_batch["action_ids"]) + ngram_continuous_action_masks.append( + ngram_state_batch["continuous_action_masks"] + ) + ngram_pruned_queue.put((ngram_score, ngram_state)) + + if ngram_node1s: + ngram_batch = { + "padded_embeddings": torch.cat( + ngram_state_model_embeddings, dim=0 + ).to(self.model.device), + "node1s": torch.tensor(ngram_node1s).to(self.model.device), + "node2s": torch.tensor(ngram_node2s).to(self.model.device), + "action_ids": torch.cat(ngram_action_ids, dim=0).to( + self.model.device + ), + "continuous_action_masks": torch.cat( + ngram_continuous_action_masks, dim=0 + ).to(self.model.device), + } + # run once for the whole q + ngram_action_dists = probe.action_dists(ngram_batch) + else: + ngram_action_dists = [] + ngram_states = ngram_pruned_queue + if not ngram_states.qsize(): + break + + ngram_c = count() + ngram_next_states = [] + + while ngram_states.qsize(): + ngram_score, ngram_state = ngram_states.get() + """get predictions from probe""" + ngram_possible_actions = np.array( + [i[0] for i in ngram_state.transitionset()] + ) + + if len(ngram_state.stack) > 1: + # get the action distribution for the current state + # if stack <=1 dont need to increment because it's action dist isnt in the batch + ngram_inc = next(ngram_c) + ngram_node1, ngram_node2 = ( + ngram_state.stack[0], + ngram_state.stack[1], + ) + ngram_actions_dist = ngram_action_dists[ngram_inc][:3] + + else: + ngram_node1, ngram_node2 = -1, -1 + ngram_actions_dist = ( + torch.zeros(probe.oracle.num_actions).to( + self.model.device + ) + - 1e10 + ) + ngram_actions_dist[probe.oracle.a2i["SHIFT"]] = 0 + + # take the top k scores + ngram_log_prob, ngram_indexes = torch.topk( + ngram_actions_dist, probe.oracle.num_actions + ) + ngram_possible_action_mask = torch.zeros( + probe.oracle.num_actions + ).to(self.model.device) + + for ngram_pa in ngram_possible_actions: + ngram_possible_action_mask += ngram_indexes == ngram_pa + + ngram_log_prob, ngram_indexes = ( + ngram_log_prob[ngram_possible_action_mask.bool()], + ngram_indexes[ngram_possible_action_mask.bool()], + ) + + for ngram_new_k, _ in enumerate(ngram_possible_actions): + ngram_action = ngram_indexes[ngram_new_k].item() + if ( + 0 in ngram_state.heads_idxs() + and ngram_node2 == 0 + and ngram_action == 2 + ): + continue + ngram_action_log_prob = ngram_log_prob[ngram_new_k].item() + + ngram_state_clone = ngram_state.clone() + # transition from int doesnt work aparently so we need to give tuple + probe.oracle.advance(ngram_state_clone, (ngram_action, -1)) + probe.oracle._preparetransitionset(ngram_state_clone) + + ngram_state_clone.action_tuples.append( + [ngram_action, ngram_node1, ngram_node2] + ) + ngram_state_clone.log_prob += ngram_action_log_prob + + ngram_state_clone.action_log_probs.append( + ngram_action_log_prob + ) + if ( + ngram_state_clone.action_tuples[-1][0] + == probe.oracle.a2i["SHIFT"] + and ngram_state_clone.action_tuples + != ngram_init_state.action_tuples + ) or len(ngram_state_clone.transitionset()) == 0: + ngram_endstates.append((ngram_score, ngram_state_clone)) + # if we reached maximum # of sentences required + if ( + len(ngram_endstates) >= ngram_beam_width + or not ngram_states.qsize() + ): + break + else: + continue + + ngram_next_states.append( + (-ngram_state_clone.log_prob, ngram_state_clone) + ) + + for ngram_ss in ngram_next_states: + ngram_states.put(ngram_ss) + + scores_conts = sorted( + ngram_endstates, key=lambda x: x[0], reverse=False + ) + + for score, cont in scores_conts: + if cont.num_shifts == sentence_len: + cont.buf = [] + else: + cont.buf = [cont.num_shifts + 1] + probe.oracle._preparetransitionset(cont) + # next_states.append((-cont.log_prob/len(cont.action_tuples),_, cont)) + next_states.append((-cont.log_prob, _, cont)) + + for ss in next_states: + states.put((ss[0], next(state_count), ss[2])) + + pruned_queue = PriorityQueue() + # mask the logits that are not the next token + next_token_masks = [] + state_action_tuples = [] + + # prune to the topk + for i in range(topk): + if states.qsize(): + score, _, state = states.get() + # check if state has reached a shift or is terminal and check if we have the desired number of states + if len(state.transitionset()) == 0: + if ( + state.num_shifts != sentence_len + or ( + np.array([state.head[i] for i in state.head.keys()]) + == 0 + ).sum() + > 1 + ): + if states.qsize(): + continue + else: + break + endstates.append((score, state)) + + if len(endstates) >= topk or not states.qsize(): + break + else: + continue + + if generative: + # and state batch data to meta batch + state_batch = state.to_batch(probe) + mask = ( + torch.zeros( + sentence_tokens.shape[0], self.tokenizer.vocab_size + ) + .to(self.model.device) + .unsqueeze(0) + ) + mask[ + :, + untok_tok_mapping[state.num_shifts - 1][ + 0 + ] : untok_tok_mapping[state.num_shifts - 1][-1] + + 1, + :, + ] = 1 + next_token_masks.append(mask) + + state_action_tuples.append( + torch.cat( + [ + state_batch["gold_tuples"], + torch.tensor([-1, -1, -1, -1]) + .unsqueeze(0) + .repeat( + 400 - state_batch["gold_tuples"].shape[1], 1 + ) + .unsqueeze(0) + .to(self.model.device), + ], + dim=1, + ) + ) + + pruned_queue.put((score, _, state)) + + if state_action_tuples: + batch = {"gold_tuples": torch.cat(state_action_tuples, dim=0)} + + counterfactuals = self.gen_counterfactuals( + probe=probe, + sent=sentence, + label_batch=batch, + output_probs=False, + print_every=100, + lr=0.001, + patience=100, + num_steps=50000, + loss_tolerance=0.01, + prefix_freebits=0, + lastword_freebits=0, + kl_weight=0, # .0001, + scheduler_patience=1000, + verbose=True, + compute_kl=False, + ) + + # run once for the whole q + counterfactual_logprobs = self.tail_by_layer( + probe.layer, counterfactuals["padded_embeddings"][:, 0, :, :] + ).log_softmax(dim=-1) + batch_mask = torch.cat(next_token_masks, dim=0) + next_word_log_probs = ( + torch.gather( + counterfactual_logprobs[:, :-1] * batch_mask[:, :-1], + -1, + sentence_tokens[1:] + .unsqueeze(0) + .T.unsqueeze(0) + .repeat(batch["gold_tuples"].shape[0], 1, 1), + ) + .sum(-1) + .sum(-1) + ) + + new_queue = PriorityQueue() + inc = count() + while pruned_queue.qsize(): + score, _, state = pruned_queue.get() + state.log_prob += next_word_log_probs[next(inc)].item() + # TODO: if using length norm then use it here + new_queue.put((-state.log_prob, _, state)) + states = new_queue + else: + states = pruned_queue + + if not states.qsize(): + break + + return endstates diff --git a/src/parse.py b/src/parse.py new file mode 100644 index 0000000..7535de9 --- /dev/null +++ b/src/parse.py @@ -0,0 +1,239 @@ +import os +import yaml +import argparse +from collections import defaultdict +from itertools import count +from tqdm import tqdm +import torch +import pandas as pd + +from experiment import IncrementalParseProbeExperiment +from task import ParseDepthTask +from datasets import PTB_Dataset +from utils import ignored_tags + +from transformers import AutoTokenizer, GPT2LMHeadModel +from transition import * +from utils import * +from gpt2 import GPT2_extended +import json + +args = argparse.ArgumentParser() +args.add_argument( + "--experiment_path", + type=str, + default="experiment_checkpoints/eval/gpt2/AttentiveProbe/layer_6/", +) + +args = args.parse_args() + +with open(args.experiment_path + "config.yaml") as file: + l_args = yaml.safe_load(file) + +print("loading probe...") +l_args["probe_params"]["pretrained_model"] = l_args["pretrained_model"] +l_args["probe_params"]["checkpoint_path"] = None +exp = IncrementalParseProbeExperiment.load_from_checkpoint( + args.experiment_path + "checkpoints/last.ckpt" +) +p = exp.probe.eval() + +print("loading gpt2...") +device = "cuda" +gpt2 = GPT2LMHeadModel.from_pretrained( + l_args["pretrained_model"], local_files_only=True +) +gpt2_tokenizer = AutoTokenizer.from_pretrained( + l_args["pretrained_model"], local_files_only=True +) + +for param in gpt2.parameters(): + param.requires_grad = False + +gpt2_ext = GPT2_extended(model=gpt2, tokenizer=gpt2_tokenizer, tail=None) + +results = pd.DataFrame( + columns=[ + "model", + "probe_name", + "layer", + "loss", + "distance_mse", + "depth_mse", + "oracle_action_nll", + "f1", + "perplexity", + "accuracy", + "uuas_beamsearch", + "root_accuracy_beamsearch", + "root_accuracy_spanning_tree", + "uuas_spanning_tree", + ] +) + +l_args["data_params"]["test"]["shuffle"] = False +l_args["data_params"]["train"]["dry_run"] = 2 +l_args["data_params"]["valid"]["dry_run"] = 2 +l_args["data_params"]["test"]["dry_run"] = False + +l_args["probe_params"]["data_sources"].extend( + ["gold_distances", "gold_depths", "xpos", "gold_tuples"] +) + +distance_depth_data = PTB_Dataset(config=l_args, probe=p) +distance_depth_data.setup() + +with open(distance_depth_data.test_dataset.data_path) as f: + ( + total_sents, + correct_root_predictions, + uspan_correct, + uspan_total, + uas_correct, + uas_total, + uuas_w_head_total, + uuas_w_head_correct, + ) = (0, 0, 0, 0, 0, 0, 0, 0) + incr = count() + for idx, line in tqdm(enumerate(f), desc=f"beamsearch decoding"): + o = json.loads(line) + if o["key"] == "sentence" and o["projective"]: + inc = next(incr) + if len(o["tokens"]) > 1: + topk, ncont, parses = 10, 10, [] + + while not parses: + if topk > 100: + print("max beamsize exceeded, breaking") + break + print("topk:", topk, " /ncont:", ncont) + parses = gpt2_ext.parse_beamsearch( + probe=p, + sentence=" ".join(o["orig_tokens"]), + generative=False, + topk=topk, + ncont=ncont, + ) + topk, ncont = topk * 2, ncont * 2 + + if not parses: + print("no parses found") + continue + + batch = exp.format_batch( + [ + torch.tensor(i) + for i in distance_depth_data.test_dataset.__getitem__(inc) + ] + ) + + top_parse = parses[0][1] + test_batch = exp.format_batch( + [ + torch.tensor(i) + for i in distance_depth_data.test_dataset.__getitem__(inc) + ] + ) + + vparse = parses[0][1] + + gold_depths = batch["gold_depths"][: batch["lengths"]] + gold_distances = batch["gold_distances"][ + : batch["lengths"], : batch["lengths"] + ] + + pred_depths = ParseDepthTask.labels(obs(top_parse.heads_idxs())) + correct_root_predictions += ( + (gold_depths == 0).nonzero(as_tuple=True)[0] + ).item() == get_nopunct_argmin(vparse.heads_idxs(), batch["xpos"]) + + gold_edges = prims_matrix_to_edges(gold_distances, test_batch["xpos"]) + pred_edges = [ + tuple(sorted((tup[0] - 1, tup[1][0] - 1))) + for tup, tag in zip(vparse.head.items(), o["tags"]) + if not tag in ignored_tags + ] + + total_sents += 1 + top_parse_head_invetred = {} + + gold_heads = [ + i + for i, tag in zip( + distance_depth_data.test_dataset.observations[idx].head_indices, + o["tags"], + ) + if not tag in ignored_tags + ] + pred_heads = [ + i + for i, tag in zip(vparse.heads_idxs(), o["tags"]) + if not tag in ignored_tags + ] + + invert_heads = defaultdict(list) + for x, y in vparse.head.items(): + invert_heads[int(y[0])].append(int(x)) + + overlap = [ + h for i, h in enumerate(pred_heads) if gold_heads[i] == str(h) + ] + undir_overlap = [ + h + for i, h in enumerate(pred_heads) + if gold_heads[i] == str(h) or i in invert_heads[h] + ] + undir_overlap_no_root = [h for h in undir_overlap if h != 0] + + uuas_w_head_correct += len(undir_overlap) + + uuas_w_head_total += len(gold_heads) + uspan_correct += len(undir_overlap_no_root) + uspan_total += len(gold_heads) - 1 + uas_correct += len(overlap) + uas_total += len(gold_heads) + + root_acc = correct_root_predictions / float(total_sents) + uuas = uspan_correct / float(uspan_total) + uas = uas_correct / float(uas_total) + uuas_w_head = uuas_w_head_correct / float(uuas_w_head_total) + + print( + "root_acc:", + root_acc, + "uas:", + uas, + "uuas:", + uuas, + "uuas_w_head:", + uuas_w_head, + "inc:", + inc, + ) + + results = results.append( + { + "model": l_args["pretrained_model"], + "probe_name": l_args["probe_params"]["probe_name"], + "layer": l_args["probe_params"]["layer"], + "uuas_beamsearch": uuas, + "uas_beamsearch": uas, + "uuas_beamsearch_w_head": uuas_w_head, + "root_accuracy_beamsearch": root_acc, + }, + ignore_index=True, + ) + + results = results.melt( + id_vars=["model", "probe_name", "layer"], var_name="metric", value_name="value" + ).dropna() + +results_path = f'./results/results_{l_args["pretrained_model"]}_layer_{str(l_args["probe_params"]["layer"])}_{l_args["probe_params"]["probe_name"]}_beamsearch.csv' +if os.path.exists(results_path): + net_res = pd.read_csv(results_path) + pd.concat([results, net_res]).drop_duplicates( + subset=["model", "probe_name", "layer", "metric"] + ).to_csv(results_path, index=False) +else: + os.makedirs(results_path.rsplit("/", 1)[0], exist_ok=True) + results.to_csv(results_path, mode="a", header=True, index=False) diff --git a/src/preprocess.py b/src/preprocess.py new file mode 100755 index 0000000..308c2ae --- /dev/null +++ b/src/preprocess.py @@ -0,0 +1,731 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Create data files +source: https://github.com/aistairc/rnng-pytorch/blob/master/preprocess.py +""" + +import os +import sys +import argparse +import itertools +from collections import defaultdict +import utils as utils +import re +import shutil +import json +from multiprocessing import Pool +import itertools + +from transition import ( + ArcSwift, + ArcEagerReduce, + ArcEagerShift, + ArcStandard, + ArcHybrid, + ParserState_dec, +) + +import json + +pad = "" +unk = "" + + +class Vocabulary(object): + """ + This vocabulary prohibits registering a new token during lookup. + Vocabulary should be constructed from a set of tokens with counts (w2c), a dictionary + from a word to its count in the training data. (or anything) + """ + + def __init__( + self, w2c_list, pad="", unkmethod="unk", unktoken="", specials=[] + ): + self.pad = pad + self.padding_idx = 0 + self.specials = specials + self.unkmethod = unkmethod + self.unktoken = unktoken + if self.unkmethod == "unk": + if unktoken not in specials: + specials.append(unktoken) + + assert isinstance(w2c_list, list) + self.i2w = [self.pad] + specials + [w for w, _ in w2c_list] + self.w2i = dict([(w, i) for i, w in enumerate(self.i2w)]) + self.w2c = dict(w2c_list) + self.i2c = dict([(self.w2i[w], c) for w, c in self.w2c.items()]) + + if self.unkmethod == "unk": + self.unk_id = self.w2i[self.unktoken] + + def id_to_word(self, i): + return self.i2w[i] + + def to_unk(self, w): + if self.unkmethod == "unk": + return self.unktoken + elif self.unkmethod == "berkeleyrule": + return utils.berkeley_unk_conv(w) + elif self.unkmethod == "berkeleyrule2": + return utils.berkeley_unk_conv2(w) + + def to_unk_id(self, w_id): + if self.unkmethod == "unk": + return self.unk_id + else: + if 1 <= w_id < 1 + len(self.specials): + return w_id + else: + return self.get_id(utils.berkeley_unk_conv(self.i2w[w_id])) + + def size(self): + return len(self.i2w) + + def get_id(self, w): + if w not in self.w2i: + w = self.to_unk(w) + if w not in self.w2i: + # Back-off to a general unk token when converted unk-token is not registered in the + # vocabulary (which happens when an unseen unk token is generated at test time). + w = self.unktoken + return self.w2i[w] + + def get_count_from_id(self, w_id): + if w_id not in self.i2c: + return 0 + else: + return self.i2c[w_id] + + def get_count(self, w): + if w not in self.w2c: + return 0 + else: + return self.w2c[w] + + # for serialization + def list_w2c(self): + return [(w, self.get_count(w)) for w in self.i2w[1 + len(self.specials) :]] + + def dump(self, fn): + with open(fn, "wt") as o: + o.write(self.pad + "\n") + o.write(self.unkmethod + "\n") + o.write(self.unktoken + "\n") + o.write(" ".join(self.specials) + "\n") + for w, c in self.list_w2c(): + o.write("{}\t{}\n".format(w, c)) + + def to_json_dict(self): + return { + "pad": self.pad, + "unkmethod": self.unkmethod, + "unktoken": self.unktoken, + "specials": self.specials, + "word_count": self.list_w2c(), + } + + @staticmethod + def load(self, fn): + with open(fn) as f: + lines = [line for line in f] + pad, unkmethod, unktoken, specials = [l.strip() for l in line[:4]] + specials = [w for w in specials] + + def parse_line(line): + w, c = line[:-1].split() + return w, int(c) + + w2c_list = [parse_line(line) for line in lines[4:]] + return Vocabulary(w2c_list, pad, unkmethod, unktoken, specials) + + @staticmethod + def from_data_json(data): + d = data["vocab"] + return Vocabulary( + d["word_count"], d["pad"], d["unkmethod"], d["unktoken"], d["specials"] + ) + + +def is_next_open_bracket(line, start_idx): + for char in line[(start_idx + 1) :]: + if char == "(": + return True + elif char == ")": + return False + raise IndexError( + "Bracket possibly not balanced, open bracket not followed by closed bracket" + ) + + +def get_next_bracket_index(line, start_idx): + for i in range(start_idx + 1, len(line)): + char = line[i] + if char == "(" or char == ")": + return i + raise IndexError( + "Bracket possibly not balanced, open bracket not followed by closed bracket" + ) + + +def get_between_brackets(line, start_idx): + output = [] + for char in line[(start_idx + 1) :]: + if char == ")": + break + assert not (char == "(") + output.append(char) + return "".join(output) + + +def get_tags_tokens_lowercase(line): + output = [] + line = line.rstrip() + for i in range(len(line)): + if i == 0: + assert line[i] == "(" + if line[i] == "(" and not ( + is_next_open_bracket(line, i) + ): # fulfilling this condition means this is a terminal symbol + output.append(get_between_brackets(line, i)) + # print 'output:',output + output_tags = [] + output_tokens = [] + output_lowercase = [] + for terminal in output: + terminal_split = terminal.split() + # print(terminal, terminal_split) + assert len(terminal_split) == 2 # each terminal contains a POS tag and word + output_tags.append(terminal_split[0]) + output_tokens.append(terminal_split[1]) + output_lowercase.append(terminal_split[1].lower()) + return [output_tags, output_tokens, output_lowercase] + + +def transform_to_subword_tree(line, sp): + line = line.rstrip() + tags, tokens, _ = get_tags_tokens_lowercase(line) + pieces = sp.encode(" ".join(tokens), out_type=str) + end_idxs = [i + 1 for i, p in enumerate(pieces) if "▁" in p] + begin_idxs = [0] + end_idxs[:-1] + spans = list( + zip(begin_idxs, end_idxs) + ) # map from original token idx to piece span idxs. + + def get_piece_preterms(tok_i): + tag = tags[tok_i] + b, e = spans[tok_i] + span_pieces = pieces[b:e] + return " ".join(["({} {})".format(tag, p) for p in span_pieces]) + + new_preterms = [get_piece_preterms(i) for i in range(len(tokens))] + orig_token_spans = [] + for i in range(len(line)): + if line[i] == "(": + next_bracket_idx = get_next_bracket_index(line, i) + found_bracket = line[next_bracket_idx] + if found_bracket == "(": + continue # not terminal -> skip + orig_token_spans.append((i, next_bracket_idx + 1)) + assert len(new_preterms) == len(orig_token_spans) + ex_span_ends = [span[0] for span in orig_token_spans] + [len(line)] + ex_span_begins = [0] + [span[1] for span in orig_token_spans] + parts = [] + for i in range(len(new_preterms)): + parts.append(line[ex_span_begins[i] : ex_span_ends[i]]) + parts.append(new_preterms[i]) + parts.append(line[ex_span_begins[i + 1] : ex_span_ends[i + 1]]) + return "".join(parts) + + +def get_nonterminal(line, start_idx): + assert line[start_idx] == "(" # make sure it's an open bracket + output = [] + for char in line[(start_idx + 1) :]: + if char == " ": + break + assert not (char == "(") and not (char == ")") + output.append(char) + return "".join(output) + + +def get_actions(line): + output_actions = [] + line_strip = line.rstrip() + i = 0 + max_idx = len(line_strip) - 1 + while i <= max_idx: + assert line_strip[i] == "(" or line_strip[i] == ")" + if line_strip[i] == "(": + if is_next_open_bracket(line_strip, i): # open non-terminal + curr_NT = get_nonterminal(line_strip, i) + output_actions.append("NT(" + curr_NT + ")") + i += 1 + while ( + line_strip[i] != "(" + ): # get the next open bracket, which may be a terminal or another non-terminal + i += 1 + else: # it's a terminal symbol + output_actions.append("SHIFT") + while line_strip[i] != ")": + i += 1 + i += 1 + while line_strip[i] != ")" and line_strip[i] != "(": + i += 1 + else: + output_actions.append("REDUCE") + if i == max_idx: + break + i += 1 + while line_strip[i] != ")" and line_strip[i] != "(": + i += 1 + assert i == max_idx + return output_actions + + +def find_nts_in_tree(tree): + tree = tree.strip() + return re.findall(r"(?=\(([^\s]+)\s\()", tree) + + +def get_sent_info(arg): + tree, setting = arg + tree = tree.strip() + lowercase, replace_num, vocab, sp = setting + if sp is not None: + # use sentencepiece + tree = transform_to_subword_tree(tree, sp) + subword_tokenized = sp is not None + tags, tokens, tokens_lower = get_tags_tokens_lowercase(tree) + tags, tokens, tokens_lower = get_tags_tokens_lowercase(tree) + orig_tokens = tokens[:] + if sp is None: + # these are not applied with sentencepiece + if lowercase: + tokens = tokens_lower + if replace_num: + tokens = [utils.clean_number(w) for w in tokens] + + token_ids = [vocab.get_id(t) for t in tokens] + conved_tokens = [vocab.i2w[w_i] for w_i in token_ids] + else: + token_ids = sp.piece_to_id(tokens) + conved_tokens = tokens + + return { + "orig_tokens": orig_tokens, + "tokens": conved_tokens, + "token_ids": token_ids, + "tags": tags, + "tree_str": tree, + } + + +def make_vocab( + textfile, + seqlength, + minseqlength, + lowercase, + replace_num, + vocabsize, + vocabminfreq, + unkmethod, + jobs, + apply_length_filter=True, +): + w2c = defaultdict(int) + with open(textfile, "r") as f: + trees = [tree.strip() for tree in f] + with Pool(jobs) as pool: + for tags, sent, sent_lower in pool.map(get_tags_tokens_lowercase, trees): + assert len(tags) == len(sent) + if lowercase: + sent = sent_lower + if replace_num: + sent = [utils.clean_number(w) for w in sent] + if (len(sent) > seqlength and apply_length_filter) or len( + sent + ) < minseqlength: + continue + + for word in sent: + w2c[word] += 1 + if unkmethod == "berkeleyrule" or unkmethod == "berkeleyrule2": + conv_method = ( + utils.berkeley_unk_conv + if unkmethod == "berkeleyrule" + else utils.berkeley_unk_conv2 + ) + berkeley_unks = set([conv_method(w) for w, c in w2c.items()]) + specials = list(berkeley_unks) + else: + specials = [unk] + if vocabminfreq: + w2c = dict([(w, c) for w, c in w2c.items() if c >= vocabminfreq]) + elif vocabsize > 0 and len(w2c) > vocabsize: + sorted_wc = sorted(list(w2c.items()), key=lambda x: x[1], reverse=True) + w2c = dict(sorted_wc[:vocabsize]) + return Vocabulary(list(w2c.items()), pad, unkmethod, unk, specials) + + +def get_data(args): + def get_nonterminals(textfiles, jobs=-1): + nts = set() + for fn in textfiles: + with open(fn, "r") as f: + lines = [line for line in f] + with Pool(jobs) as pool: + local_nts = pool.map(find_nts_in_tree, lines) + nts.update(list(itertools.chain.from_iterable(local_nts))) + nts = sorted(list(nts)) + print("Found nonterminals: {}".format(nts)) + return nts + + def convert( + textfile, + lowercase, + replace_num, + seqlength, + minseqlength, + outfile, + vocab, + sp, + apply_length_filter=True, + jobs=-1, + ): + dropped = 0 + num_sents = 0 + conv_setting = (lowercase, replace_num, vocab, sp) + + def process_block(tree_with_settings, f): + _dropped = 0 + with Pool(jobs) as pool: + for sent_info in pool.map(get_sent_info, tree_with_settings): + tokens = sent_info["tokens"] + if apply_length_filter and ( + len(tokens) > seqlength or len(tokens) < minseqlength + ): + _dropped += 1 + continue + sent_info["key"] = "sentence" + f.write(json.dumps(sent_info) + "\n") + return _dropped + + with open(outfile, "w") as f, open(textfile, "r") as in_f: + block_size = 100000 + tree_with_settings = [] + for tree in in_f: + tree_with_settings.append((tree, conv_setting)) + if len(tree_with_settings) >= block_size: + dropped += process_block(tree_with_settings, f) + num_sents += len(tree_with_settings) + tree_with_settings = [] + print(num_sents) + if len(tree_with_settings) > 0: + process_block(tree_with_settings, f) + num_sents += len(tree_with_settings) + + others = { + "vocab": vocab.to_json_dict() if vocab is not None else None, + "nonterminals": nonterminals, + "pad_token": pad, + "unk_token": unk, + "args": args.__dict__, + } + for k, v in others.items(): + print("Saving {} to {}".format(k, outfile + "." + k)) + f.write(json.dumps({"key": k, "value": v}) + "\n") + + print( + "Saved {} sentences (dropped {} due to length/unk filter)".format( + num_sents, dropped + ) + ) + + print("First pass through data to get nonterminals...") + nonterminals = get_nonterminals( + [args.trainfile, args.valfile, args.testfile], args.jobs + ) + + if args.unkmethod == "subword": + if args.vocabfile != "": + print( + "Loading pre-trained sentencepiece model from {}".format(args.vocabfile) + ) + import sentencepiece as spm + + sp = spm.SentencePieceProcessor(model_file=args.vocabfile) + sp_model_path = "{}-spm.model".format(args.outputpath) + print("Copy sentencepiece model to {}".format(sp_model_path)) + shutil.copyfile(args.vocabfile, sp_model_path) + else: + print( + "unkmethod subword is selected. Running sentencepiece on the training data..." + ) + sp = learn_sentencepiece( + args.trainfile, args.outputpath + "/" + "-spm", args + ) + vocab = None + else: + if args.vocabfile != "": + print("Loading pre-specified source vocab from " + args.vocabfile) + vocab = Vocabulary.load(args.vocabfile) + else: + print("Second pass through data to get vocab...") + vocab = make_vocab( + args.trainfile, + args.seqlength, + args.minseqlength, + args.lowercase, + args.replace_num, + args.vocabsize, + args.vocabminfreq, + args.unkmethod, + args.jobs, + ) + vocab.dump(args.outputpath + "/" + ".vocab") + print("Vocab size: {}".format(len(vocab.i2w))) + sp = None + + convert( + args.testfile, + args.lowercase, + args.replace_num, + 0, + args.minseqlength, + args.outputpath + "/" + "test.json", + vocab, + sp, + 0, + args.jobs, + ) + convert( + args.valfile, + args.lowercase, + args.replace_num, + args.seqlength, + args.minseqlength, + args.outputpath + "/" + "valid.json", + vocab, + sp, + 0, + args.jobs, + ) + convert( + args.trainfile, + args.lowercase, + args.replace_num, + args.seqlength, + args.minseqlength, + args.outputpath + "/" + "train.json", + vocab, + sp, + 1, + args.jobs, + ) + + +def main(arguments): + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("--vocabsize", type=int, default=100000) + parser.add_argument("--vocabminfreq", type=int, default=1) + parser.add_argument( + "--unkmethod", + choices=["unk", "berkeleyrule", "berkeleyrule2", "subword"], + default="berkeleyrule", + ) + parser.add_argument("--subword_type", choices=["bpe", "unigram"], default="bpe") + parser.add_argument("--keep_ptb_bracket", action="store_true") + parser.add_argument("--subword_user_defined_symbols", nargs="*") + parser.add_argument("--lowercase", help="Lower case", action="store_true") + parser.add_argument( + "--replace_num", help="Replace numbers with N", action="store_true" + ) + # parser.add_argument('--trainfile', help="Path to training data.",default='/data/cl/user/eisape/docker-home/incremental_parse_probe/data_large/train.txt') + # parser.add_argument('--valfile', help="Path to validation data.",default='/data/cl/user/eisape/docker-home/incremental_parse_probe/data_large/valid.txt') + # parser.add_argument('--testfile', help="Path to test validation data.",default='/data/cl/user/eisape/docker-home/incremental_parse_probe/data_large/test.txt') + parser.add_argument( + "--seqlength", + help="Maximum sequence length. Sequences longer than this are dropped.", + type=int, + default=300, + ) + parser.add_argument( + "--minseqlength", + help="Minimum sequence length. Sequences shorter than this are dropped.", + type=int, + default=0, + ) + parser.add_argument( + "--data_dir", help="Prefix of the output file names. ", type=str, default="data" + ) + parser.add_argument("--vocabfile", type=str, default="") + parser.add_argument("--jobs", type=int, default=10) + # for example here is the command line to run the script + # python3 preprocess.py --trainfile data/train.txt --valfile data/valid.txt --testfile data/test.txt --outputfile ./data/ --jobs 10 --vocabminfreq 1 --lowercase + # comand to copy ./*.json to /data/cl/user/eisape/drive/ptb1/ + # cp ./*.json /data/cl/user/eisape/drive/ptb1/ + args = parser.parse_args(arguments) + if args.jobs == -1: + args.jobs = len(os.sched_getaffinity(0)) + # set file pats by hand + args.trainfile = args.data_dir + "/train.txt" + args.valfile = args.data_dir + "/valid.txt" + args.testfile = args.data_dir + "/test.txt" + args.outputpath = args.data_dir + + # np.random.seed(3435) + get_data(args) + + def transsys_lookup(k): + lookup = { + "ASw": ArcSwift, + "AER": ArcEagerReduce, + "AES": ArcEagerShift, + "ASd": ArcStandard, + "AH": ArcHybrid, + } + return lookup[k] + + def is_projective(lines): + projective = True + + # find decendents + words = ["ROOT"] + for line in lines: + words += [line[1]] + + children = [[] for i in range(len(words))] + for i, line in enumerate(lines): + try: + parent = int(line[6]) + relation = line[7] + children[parent] += [(relation, i + 1)] + except Exception: + print(line) + + decendents = [ + set([child[1] for child in children[i]]) for i in range(len(words)) + ] + + change = True + while change: + change = False + for i in range(len(decendents)): + update = [] + for d in decendents[i]: + for d1 in decendents[d]: + if d1 not in decendents[i]: + update += [d1] + if len(update) > 0: + decendents[i].update(update) + change = True + + for i, node in enumerate(children): + for child in node: + childid = child[1] + for j in range(min(childid, i) + 1, max(childid, i)): + if j not in decendents[i]: + projective = False + + return projective + + def processlines(lines, transsys): + arcs = [dict() for i in range(len(lines) + 1)] + + pos = ["" for i in range(len(lines) + 1)] + fpos = ["" for i in range(len(lines) + 1)] + + for i, line in enumerate(lines): + pos[i + 1] = line[3] # fine-grained + fpos[i + 1] = line[4] + parent = int(line[6]) + relation = line[7] + arcs[parent][i + 1] = transsys.mappings["rel"][relation] + + res = [ + ParserState_dec([""] + lines, transsys=transsys, goldrels=arcs), + pos, + ] + if fpos: + res += [fpos] + else: + res == [None] + return res + + for dataset in ["valid", "train", "test"]: + sents = [] + ret_sents = [] + ds = dataset + # if dataset == 'valid': ds='val' + with open(args.outputpath + "/" + ds + ".json", "r") as f: + for line in f: + o = json.loads(line) + if o["key"] == "sentence": + sents.append(o) + + count, nonproj, lines = 0, 0, [] + + with open(args.outputpath + "/" + ds + ".conllx", "r") as fin: + line = fin.readline() + while line: + if line.startswith("#"): + line = fin.readline() + continue + line = line.strip().split() + if len(line) > 0 and "-" in line[0]: + line = fin.readline() + continue + + if len(line) == 0: + if is_projective(lines): + sents[count]["projective"] = True + for tsys in ["ASd"]: + sents[count][tsys] = {} + transsys = transsys_lookup(tsys)("./data/mappings-ptb.txt") + stck, buf, actions, tuples = [], [], [], [] + + state, pos, fpos = processlines(lines, transsys) + transsys = state.transsys + + while len(state.transitionset()) > 0: + t = transsys.goldtransition(state) + actions.append(t) + stck.append(state.stack) + buf.append(state.buf) + tup = transsys.goldtransition(state, return_tuple=True) + tuples.append(list(tup)) + transsys.advance(state, t) + + stck.append(state.stack) + buf.append(state.buf) + + sents[count][tsys]["gold_stacks"] = stck + sents[count][tsys]["gold_buffers"] = buf + sents[count][tsys]["actions"] = actions + sents[count][tsys]["action_tuples"] = tuples + ret_sents.append(sents[count]) + else: + # Remove non-projective sentences from the dataset + sents[count]["projective"] = False + ret_sents.append(sents[count]) + count += 1 + lines = [] + else: + lines += [line] + line = fin.readline() + if len(lines) > 0: + None + with open( + args.outputpath + "/" + ds + ".json", "w", encoding="utf8" + ) as json_file: + print(f"Writing {ds} to {args.outputpath+'/'+ds+'.json'}", os.getcwd()) + for s in ret_sents: + json_file.write(json.dumps(s) + "\n") + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/src/task.py b/src/task.py new file mode 100644 index 0000000..fa22e23 --- /dev/null +++ b/src/task.py @@ -0,0 +1,149 @@ +# """Contains classes describing linguistic tasks of interest on annotated data.""" + +import numpy as np +import torch + +class Task: + """Abstract class representing a linguistic task mapping texts to labels.""" + + @staticmethod + def labels(observation): + """Maps an observation to a matrix of labels. + + Should be overriden in implementing classes. + """ + raise NotImplementedError + +class ParseDistanceTask(Task): + """Maps observations to dependency parse distances between words.""" + + @staticmethod + def labels(observation): + """Computes the distances between all pairs of words; returns them as a torch tensor. + + Args: + observation: a single Observation class for a sentence: + Returns: + A torch tensor of shape (sentence_length, sentence_length) of distances + in the parse tree as specified by the observation annotation. + """ + sentence_length = len(observation[0]) #All observation fields must be of same length + distances = torch.zeros((sentence_length, sentence_length)) + for i in range(sentence_length): + for j in range(i,sentence_length): + i_j_distance = ParseDistanceTask.distance_between_pairs(observation, i, j) + distances[i][j] = i_j_distance + distances[j][i] = i_j_distance + return distances + + @staticmethod + def distance_between_pairs(observation, i, j, head_indices=None, disconnected_parse=False, subtree_distance=0): + ''' + sub_tree_distance is the distance between subtrees assuming we have subtrees that are artificial connected through the root + ''' + '''Computes path distance between a pair of words + TODO: It would be (much) more efficient to compute all pairs' distances at once; + this pair-by-pair method is an artefact of an older design, but + was unit-tested for correctness... + Args: + observation: an Observation namedtuple, with a head_indices field. + or None, if head_indies != None + i: one of the two words to compute the distance between. + j: one of the two words to compute the distance between. + head_indices: the head indices (according to a dependency parse) of all + words, or None, if observation != None. + Returns: + The integer distance d_path(i,j) + ''' + if i == j: + return 0 + if observation: + head_indices = [] + number_of_underscores = 0 + for elt in observation.head_indices: + if elt == '_': + head_indices.append(0) + number_of_underscores += 1 + else: + head_indices.append(int(elt) + number_of_underscores) + i_path = [i+1] + j_path = [j+1] + i_head = i+1 + j_head = j+1 + while True: + if not (i_head == 0 and (i_path == [i+1] or i_path[-1] == 0)): + i_head = head_indices[i_head - 1] + i_path.append(i_head) + if not (j_head == 0 and (j_path == [j+1] or j_path[-1] == 0)): + j_head = head_indices[j_head - 1] + j_path.append(j_head) + if i_head in j_path: + j_path_length = j_path.index(i_head) + i_path_length = len(i_path) - 1 + + break + elif j_head in i_path: + i_path_length = i_path.index(j_head) + j_path_length = len(j_path) - 1 + break + elif i_head == j_head: + i_path_length = len(i_path) - 1 + j_path_length = len(j_path) - 1 + break + + total_length = j_path_length + i_path_length + # nodes_along_path = j_path[:j_path_length] + i_path[:i_path_length] + # ''' subtree_distance + # if + return total_length + +class ParseDepthTask: + """Maps observations to a depth in the parse tree for each word""" + + @staticmethod + def labels(observation): + """Computes the depth of each word; returns them as a torch tensor. + + Args: + observation: a single Observation class for a sentence: + Returns: + A torch tensor of shape (sentence_length,) of depths + in the parse tree as specified by the observation annotation. + """ + sentence_length = len(observation[0]) #All observation fields must be of same length + depths = torch.zeros(sentence_length) + for i in range(sentence_length): + depths[i] = ParseDepthTask.get_ordering_index(observation, i) + return depths + + @staticmethod + def get_ordering_index(observation, i, head_indices=None): + '''Computes tree depth for a single word in a sentence + + Args: + observation: an Observation namedtuple, with a head_indices field. + or None, if head_indies != None + i: the word in the sentence to compute the depth of + head_indices: the head indices (according to a dependency parse) of all + words, or None, if observation != None. + + Returns: + The integer depth in the tree of word i + ''' + if observation: + head_indices = [] + number_of_underscores = 0 + for elt in observation.head_indices: + if elt == '_': + head_indices.append(0) + number_of_underscores += 1 + else: + head_indices.append(int(elt) + number_of_underscores) + length = 0 + i_head = i+1 + while True: + i_head = head_indices[i_head - 1] + if i_head != 0: + length += 1 + else: + return length \ No newline at end of file diff --git a/src/train.py b/src/train.py new file mode 100644 index 0000000..7e2782a --- /dev/null +++ b/src/train.py @@ -0,0 +1,77 @@ +import os + +# os.environ["CUDA_VISIBLE_DEVICES"] = '4' + +import yaml +import argparse +import shutil +import random +from pathlib import Path + +import torch + +import architectures +import datasets +from utils import * + +from pytorch_lightning import Trainer +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.utilities.seed import seed_everything +from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint +from pytorch_lightning.callbacks.early_stopping import EarlyStopping +from experiment import IncrementalParseProbeExperiment + +torch.backends.cudnn.enabled = False +torch.backends.cuda.matmul.allow_tf32 = False +torch.backends.cudnn.allow_tf32 = False +os.environ["TOKENIZERS_PARALLELISM"] = "true" + +parser = argparse.ArgumentParser() +parser.add_argument("--config", dest="filename", default="./configs/test.yaml") +parser.add_argument("--device", dest="device", default=[0], nargs="+") + +args = parser.parse_args() +device = [int(d) for d in args.device] +config_path = args.filename +with open(args.filename, "r") as file: + args = yaml.safe_load(file) + +print(f"======= Training {args['probe_params']['probe_name']} =======") + +args["trainer_params"]["gpus"] = device +args["exp_params"]["manual_seed"] = random.randint(1000, 2000) + +tb_logger = TensorBoardLogger( + save_dir=args["logging_params"]["save_dir"], + name=args["probe_params"]["probe_name"], + version=args["logging_params"]["version"], +) +Path(f"{tb_logger.log_dir}").mkdir(exist_ok=True, parents=True) +shutil.copy2(config_path, f"{tb_logger.log_dir}/config.yaml") +seed_everything(args["exp_params"]["manual_seed"], True) + +args["probe_params"]["pretrained_model"] = args["pretrained_model"] + +probe = getattr(architectures, args["probe_params"]["probe_type"])( + args["probe_params"] +).to("cuda") + +Trainer( + logger=tb_logger, + callbacks=[ + EarlyStopping(monitor="val_loss"), + LearningRateMonitor(), + ModelCheckpoint( + save_top_k=5, + dirpath=os.path.join(tb_logger.log_dir, "checkpoints"), + monitor="val_loss", + filename="{epoch}-{val_loss:.2f}", + save_last=True, + ), + ], + strategy="ddp", + **args["trainer_params"], +).fit( + IncrementalParseProbeExperiment(probe=probe, params=args["exp_params"]), + datamodule=datasets.PTB_Dataset(config=args, probe=probe), +) diff --git a/src/transition.py b/src/transition.py new file mode 100644 index 0000000..f5e1b25 --- /dev/null +++ b/src/transition.py @@ -0,0 +1,1030 @@ +""" +Implementation of transition systems. + +The TransitionSystem class is an "interface" for all of the +subclasses that are being used, but isn't really used anywhere +explicitly itself. +source: https://github.com/qipeng/arc-swift/blob/master/src/transition.py +""" +from smart_open import smart_open +import torch +import random +import copy +import torch.nn as nn +from collections import defaultdict +import numpy as np + +class ParserState_dec: + def __init__(self, sentence = [None], transsys=None, goldrels=None): + self.history = [] + self.action_tuples = [] + self.model_embeddings = torch.tensor([]) + self.log_prob = 0 + self.num_shifts = 0 + self.action_log_probs = [] + self.conditional_likelihood= [] + self.word_log_probs = [] + self.words = [] + self.expanded = False + + self.stack = [0] + self.buf = [i+1 for i in range(len(sentence)-1)] + # head and relation labels + self.head = defaultdict(list) #[[-1, -1] for _ in range(len(sentence))] + + self.goldrels = goldrels + + self.transsys = transsys + if self.transsys is not None: + self.transsys._preparetransitionset(self) + + self.terminated = False + + def to_batch(self, probe): + device = next(probe.parameters()).device + gold_tuples = torch.tensor([t+[-1] for t in self.action_tuples]).unsqueeze(0).to(device) + model_embeddings = self.model_embeddings.detach().clone().to(device) + + action_ids = [t[0] for t in self.action_tuples] + + if 'continuous_action_masks' in probe.args['data_sources']: + mask =generate_continuous_mask(action_ids, model_embeddings.shape[2])#self.num_shifts+1) + cont_mask = mask + # cont_mask = np.pad(mask, + # ((0, 400 - len(mask)),(0,0)), + # 'constant', constant_values=-1) + else: cont_mask = torch.tensor([-1]) + + return {'gold_tuples':gold_tuples, + 'padded_embeddings': model_embeddings, + 'action_ids':torch.tensor(action_ids).unsqueeze(0).to(device), + 'continuous_action_masks':torch.tensor(cont_mask).unsqueeze(0).to(device)} #tuples + + def heads_idxs(self): return [self.head[i][0] for i in sorted(self.head.keys())] + + def incremental_distance_matrix(self): + sentence_length = len(self.heads_idxs()) #All observation fields must be of same length + distances = torch.zeros((sentence_length, sentence_length)) + relative_depths = torch.zeros((sentence_length, sentence_length)) + for i in range(sentence_length): + for j in range(i,sentence_length): + # print(self.incremental_distance(i, j)) + i_j_distance,i_j_relative_depth = self.incremental_distance(i, j) + distances[i][j] = i_j_distance + distances[j][i] = i_j_distance + + relative_depths[i][j] = i_j_relative_depth + relative_depths[j][i] = -i_j_relative_depth + + return distances, relative_depths + + def incremental_distance(self, i, j,unconnected_pad = 1): + if i == j: + return 0, 0 + # if observation: + head_indices = [] + number_of_underscores = 0 + for elt in self.heads_idxs(): + # print(elt) + if elt == '_': + head_indices.append(0) + number_of_underscores += 1 + else: + head_indices.append(int(elt) + number_of_underscores) + i_path = [i+1] + j_path = [j+1] + i_head = i+1 + j_head = j+1 + while True: + if not (i_head == 0 and (i_path == [i+1] or i_path[-1] == 0)): + i_head = head_indices[i_head - 1] + i_path.append(i_head) + if not (j_head == 0 and (j_path == [j+1] or j_path[-1] == 0)): + j_head = head_indices[j_head - 1] + j_path.append(j_head) + if i_head in j_path: + j_path_length = j_path.index(i_head) + i_path_length = len(i_path) - 1 + + break + elif j_head in i_path: + i_path_length = i_path.index(j_head) + j_path_length = len(j_path) - 1 + break + elif i_head == j_head: + i_path_length = len(i_path) - 1 + j_path_length = len(j_path) - 1 + break + + total_length = j_path_length + i_path_length + nodes_along_path = j_path[:j_path_length+1] + i_path[:i_path_length+1] + + if -1 in nodes_along_path: + if unconnected_pad: total_length += unconnected_pad + else: total_length = -1 + + # if return_rel_depth: + if -1 in nodes_along_path: + return -1, float('inf') + + rel_depth = -(i_path_length - j_path_length) if not j_path_length == i_path_length else 0 + return total_length, rel_depth + + def transitionset(self): + return self._transitionset + + def clone(self, clone_embeddings=True): + res = ParserState_dec([]) + res.stack = copy.copy(self.stack) + res.buf = copy.copy(self.buf) + res.head = copy.copy(self.head) + # res.pos = copy.copy(self.pos) + res.goldrels = copy.copy(self.goldrels) + res.transsys = self.transsys + res.terminated = self.terminated + res.action_tuples = copy.copy(self.action_tuples) + res.log_prob = self.log_prob + res.num_shifts = self.num_shifts + res.action_log_probs = copy.copy(self.action_log_probs) + res.conditional_likelihood = copy.deepcopy(self.conditional_likelihood) + if clone_embeddings: res.model_embeddings = copy.deepcopy(self.model_embeddings) + else: res.model_embeddings = [] + res.word_log_probs = copy.copy(self.word_log_probs) + res.words = copy.copy(self.words) + res.expanded = self.expanded + res.history = copy.copy(self.history) + + if hasattr(self, '_transitionset'): + res._transitionset = copy.copy(self._transitionset) + return res + +class ParserState: + def __init__(self, sentence, transsys=None, goldrels=None): + + self.stack = [0] + self.buf = [i+1 for i in range(len(sentence)-1)] + self.head = [[-1, -1] for _ in range(len(sentence))] + self.pos = [-1 for _ in range(len(sentence))] + self.goldrels = goldrels + self.transsys = transsys + if self.transsys is not None: self.transsys._preparetransitionset(self) + + def transitionset(self): return self._transitionset + + def clone(self): + res = ParserState([]) + res.stack = copy.copy(self.stack) + res.buf = copy.copy(self.buf) + res.head = copy.copy(self.head) + res.pos = copy.copy(self.pos) + res.goldrels = copy.copy(self.goldrels) + res.transsys = self.transsys + if hasattr(self, '_transitionset'): + res._transitionset = copy.copy(self._transitionset) + return res + +class TransitionSystem(object): + def __init__(self, mappings_file): + self.mappings, self.invmappings = read_mappings(mappings_file, self.actions_list(), log=None) + + def _preparetransitionset(self, parserstate): + """ Prepares the set of gold transitions given a parser state """ + raise NotImplementedError() + + def advance(self, parserstate, action): + """ Advances a parser state given an action """ + raise NotImplementedError() + + def goldtransition(self, parserstate, goldrels): + """ Returns the next gold transition given the set of gold arcs """ + raise NotImplementedError() + + def trans_to_str(self, transition, state, pos, fpos=None): + raise NotImplementedError() + + @classmethod + def trans_from_line(self, line): + raise NotImplementedError() + + @classmethod + def actions_list(self): + raise NotImplementedError() + +class ArcSwift(TransitionSystem): + def __init__(self, mappings_file): + self.mappings, self.invmappings = read_mappings(mappings_file, self.actions_list(), log=None) + self.name='ASw' + @classmethod + def actions_list(self): + return ['SHIFT', 'Left-Arc', 'Right-Arc'] + + def _preparetransitionset(self, parserstate): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + stack, buf, head = parserstate.stack, parserstate.buf, parserstate.head + + t = [] + + if len(buf) > 1: + t += [(SHIFT, -1)] + + left_possible = False + if len(buf) > 0: + for si in range(len(stack) - 1): + if head[stack[si]][0] < 0: + t += [(LEFTARC, si)] + left_possible = True + break + if len(buf) > 1 or (len(buf) == 1 and not left_possible): + for si in range(len(stack)): + t += [(RIGHTARC, si)] + if head[stack[si]][0] < 0: + break + + parserstate._transitionset = t + + def advance(self, parserstate, action): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + RELS = len(self.mappings['rel']) + cand = parserstate.transitionset() + + if isinstance(action, int): + a, rel = self.tuple_trans_from_int(cand, action) + else: + rel = action[-1] + a = action[:-1] + + stack = parserstate.stack + buf = parserstate.buf + + if a[0] == SHIFT: + parserstate.stack = [buf[0]] + stack + parserstate.buf = buf[1:] + elif a[0] == LEFTARC: + si = a[1] + parserstate.head[stack[si]] = [buf[0], rel] + parserstate.stack = stack[(si+1):] + elif a[0] == RIGHTARC: + si = a[1] + parserstate.head[buf[0]] = [stack[si], rel] + parserstate.stack = [buf[0]] + stack[si:] + parserstate.buf = buf[1:] + + self._preparetransitionset(parserstate) + + def goldtransition(self, parserstate, goldrels=None, return_tuple=False): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + goldrels = goldrels or parserstate.goldrels + stack = parserstate.stack + buf = parserstate.buf + head = parserstate.head + + j = buf[0] + addedArc = False + for n in range(len(stack)): + if stack[n] in goldrels[j]: + rel = goldrels[j][stack[n]] + a = (LEFTARC, n, rel) + addedArc = True + + break + elif j in goldrels[stack[n]]: + rel = goldrels[stack[n]][j] + a = (RIGHTARC, n, rel) + addedArc = True + break + if head[stack[n]][0] < 0: break + + if not addedArc: + a = (SHIFT, -1, -1) + if return_tuple: + #this means we did |stack| comparisions and non of them succeeded + #choice point, if we want to optimize for implicit action return full stack + return a[0], buf[0], stack + + if return_tuple: + #this means we did n comparisions and only the last on succeeded + return a[0], buf[0], stack[:n] + return a + + def trans_to_str(self, t, state, pos, fpos=None): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + if t[0] == SHIFT: + if fpos is None: + return "SHIFT\t%s" % (pos[state.buf[0]]) + else: + return "SHIFT\t%s\t%s" % (pos[state.buf[0]], fpos[state.buf[0]]) + elif t[0] == LEFTARC: + return "Left-Arc\t%d\t%s" % (t[1]+1, self.invmappings['rel'][t[2]]) + elif t[0] == RIGHTARC: + if fpos is None: + return "Right-Arc\t%d\t%s\t%s" % (t[1]+1, self.invmappings['rel'][t[2]], pos[state.buf[0]]) + else: + return "Right-Arc\t%d\t%s\t%s\t%s" % (t[1]+1, self.invmappings['rel'][t[2]], pos[state.buf[0]], fpos[state.buf[0]]) + + @classmethod + def trans_from_line(self, line): + if line[0] == 'Left-Arc': + fields = { 'action':line[0], 'n':int(line[1]), 'rel':line[2] } + elif line[0] == 'Right-Arc': + fields = { 'action':line[0], 'n':int(line[1]), 'rel':line[2], 'pos':line[3] } + if len(line) > 4: + fields['fpos'] = line[4] + elif line[0] == 'SHIFT': + fields = { 'action':line[0], 'pos':line[1] } + if len(line) > 2: + fields['fpos'] = line[2] + else: + raise ValueError(line[0]) + return fields + + def tuple_trans_to_int(self, cand, t): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + RELS = len(self.mappings['rel']) + + base = 0 + if t[0] == SHIFT: + return 0 + + if cand[0][0] == SHIFT: + base = 1 + + if t[0] == LEFTARC: + return base + t[2] + + if len(cand) > 1 and cand[1][0] == LEFTARC: + base += RELS + + if t[0] == RIGHTARC: + return base + t[1]*RELS + t[2] + + def tuple_trans_from_int(self, cand, action): + SHIFT = self.mappings['action']['SHIFT'] + RELS = len(self.mappings['rel']) + rel = -1 + + if cand[0][0] == SHIFT: + if action == 0: + a = cand[0] + else: + a = cand[(action - 1) / RELS + 1] + rel = (action - 1) % RELS + else: + a = cand[action / RELS] + rel = action % RELS + + return a, rel + +class ArcEagerReduce(TransitionSystem): + def __init__(self, mappings_file): + self.mappings, self.invmappings = read_mappings(mappings_file, self.actions_list(), log=None) + self.name='AER' + + @classmethod + def actions_list(self): + return ['SHIFT', 'Left-Arc', 'Right-Arc', 'Reduce'] + + def _preparetransitionset(self, parserstate): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + + stack, buf, head = parserstate.stack, parserstate.buf, parserstate.head + + t = [] + + if len(buf) > 1: + t += [(SHIFT,)] + + if len(buf) > 0 and len(stack) > 1: + t += [(REDUCE,)] + + left_possible = False + if len(buf) > 0 and len(stack) > 1: + if head[stack[0]][0] < 0: + t += [(LEFTARC,)] + left_possible = True + + if len(buf) > 1 or (len(buf) == 1 and not left_possible): + t += [(RIGHTARC,)] + + parserstate._transitionset = t + + def advance(self, parserstate, action): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + + RELS = len(self.mappings['rel']) + cand = parserstate.transitionset() + + if isinstance(action, int): + a, rel = self.tuple_trans_from_int(cand, action) + else: + rel = action[-1] + a = action[:-1] + + stack = parserstate.stack + buf = parserstate.buf + + if a[0] == SHIFT: + parserstate.stack = [buf[0]] + stack + parserstate.buf = buf[1:] + elif a[0] == LEFTARC: + parserstate.head[stack[0]] = [buf[0], rel] + parserstate.stack = stack[1:] + elif a[0] == RIGHTARC: + parserstate.head[buf[0]] = [stack[0], rel] + parserstate.stack = [buf[0]] + stack + parserstate.buf = buf[1:] + elif a[0] == REDUCE: + parserstate.stack = stack[1:] + + self._preparetransitionset(parserstate) + + def goldtransition(self, parserstate, goldrels=None, return_tuple=False): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + + goldrels = goldrels or parserstate.goldrels + stack = parserstate.stack + buf = parserstate.buf + head = parserstate.head + + POS = len(self.mappings['pos']) + + j = buf[0] + + norightchildren = True + for x in buf: + if x in goldrels[stack[0]]: + norightchildren = False + break + + if stack[0] in goldrels[j]: + rel = goldrels[j][stack[0]] + a = (LEFTARC, rel) + + if return_tuple: + return a[0], buf[0], stack[0] + + elif j in goldrels[stack[0]]: + rel = goldrels[stack[0]][j] + a = (RIGHTARC, rel) + + if return_tuple: + return a[0], buf[0], stack[0] + + elif head[stack[0]][0] >= 0 and norightchildren: + a = (REDUCE, -1) + if return_tuple: + return a[0], buf[0], stack[0] + + else: + a = (SHIFT, -1) + if return_tuple: + return a[0], buf[0], stack[0] + + return a + + def trans_to_str(self, t, state, pos, fpos=None): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + if t[0] == SHIFT: + if fpos is None: + return "SHIFT\t%s" % (pos[state.buf[0]]) + else: + return "SHIFT\t%s\t%s" % (pos[state.buf[0]], fpos[state.buf[0]]) + elif t[0] == LEFTARC: + return "Left-Arc\t%s" % (self.invmappings['rel'][t[1]]) + elif t[0] == RIGHTARC: + if fpos is None: + return "Right-Arc\t%s\t%s" % (self.invmappings['rel'][t[1]], pos[state.buf[0]]) + else: + return "Right-Arc\t%s\t%s\t%s" % (self.invmappings['rel'][t[1]], pos[state.buf[0]], fpos[state.buf[0]]) + elif t[0] == REDUCE: + return "Reduce" + + @classmethod + def trans_from_line(self, line): + if line[0] == 'Left-Arc': + fields = { 'action':line[0], 'rel':line[1] } + elif line[0] == 'Right-Arc': + fields = { 'action':line[0], 'rel':line[1], 'pos':line[2] } + if len(line) > 3: + fields['fpos'] = line[3] + elif line[0] == 'SHIFT': + fields = { 'action':line[0], 'pos':line[1] } + if len(line) > 2: + fields['fpos'] = line[2] + elif line[0] == 'Reduce': + fields = { 'action':line[0] } + else: + raise ValueError(line[0]) + return fields + + def tuple_trans_to_int(self, cand, t): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + + RELS = len(self.mappings['rel']) + + base = 0 + if t[0] == SHIFT: + return base + + base += 1 + + if t[0] == REDUCE: + return base + + base += 1 + + if t[0] == LEFTARC: + return base + t[1] + + base += RELS + + if t[0] == RIGHTARC: + return base + t[1] + + def tuple_trans_from_int(self, cand, action): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + RELS = len(self.mappings['rel']) + rel = -1 + + base = 0 + if action == base: + a = (SHIFT,) + base += 1 + + if action == base: + a = (REDUCE,) + base += 1 + + if base <= action < base + RELS: + a = (LEFTARC,) + rel = action - base + base += RELS + + if base <= action < base + RELS: + a = (RIGHTARC,) + rel = action - base + + return a, rel + +class ArcEagerShift(ArcEagerReduce): + def __init__(self, mappings_file): + self.mappings, self.invmappings = read_mappings(mappings_file, self.actions_list(), log=None) + self.name='AES' + + def goldtransition(self, parserstate, goldrels=None, return_tuple=False): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + REDUCE = self.mappings['action']['Reduce'] + + goldrels = goldrels or parserstate.goldrels + stack = parserstate.stack + buf = parserstate.buf + head = parserstate.head + + POS = len(self.mappings['pos']) + + j = buf[0] + + has_right_children = False + for i in buf: + if i in goldrels[stack[0]]: + has_right_children = True + break + + must_reduce = False + for i in stack: + if i in goldrels[j] or j in goldrels[i]: + must_reduce = True + break + if head[i][0] < 0: + break + + if stack[0] in goldrels[j]: + rel = goldrels[j][stack[0]] + a = (LEFTARC, rel) + + if return_tuple: + return a[0], buf[0], stack[0] + + elif j in goldrels[stack[0]]: + rel = goldrels[stack[0]][j] + a = (RIGHTARC, rel) + + if return_tuple: + return a[0], buf[0], stack[0] + + elif not must_reduce or head[stack[0]][0] < 0 or has_right_children: + a = (SHIFT, -1) + if return_tuple: + #you can only be here if the comparisons failed (and of course someother things failed as well) + return a[0], buf[0], stack[0] + else: + a = (REDUCE, -1) + if return_tuple: + #you can only be here if the comparisons failed (and of course someother things failed as well) + return a[0], buf[0], stack[0] + return a + +class ArcStandard(TransitionSystem): + def __init__(self, mappings_file): + self.mappings, self.invmappings = read_mappings(mappings_file, self.actions_list(), log=None) + self.name='ASd' + self.num_actions = 3 + + self.i2a = self.actions_list() + self.i2a.extend(['BOS', 'EOS', 'PAD']) + self.a2i = {i:self.i2a.index(i) for i in self.i2a} + + + def action_dists(self, p_shift, marginal_p_reduce): + p_reduce = (1-p_shift).unsqueeze(-1).log()+torch.concat((1-marginal_p_reduce.unsqueeze(-1), marginal_p_reduce.unsqueeze(-1)), -1).log() + dists = torch.cat(((p_shift).unsqueeze(-1).log(), p_reduce), -1) + return dists + + def initial_state(self): + '''returns the initial state for beam search parsing + blank parser state after one shift + ''' + init_parserstate = ParserState_dec() + + init_parserstate.buf = [init_parserstate.num_shifts+1] + self._preparetransitionset(init_parserstate) + self.advance(init_parserstate, self.a2i['SHIFT']) + init_parserstate.action_log_probs.append(0) + init_parserstate.action_tuples = [[self.a2i['SHIFT'], -1, -1]] + + init_parserstate.buf = [init_parserstate.num_shifts+1] + self._preparetransitionset(init_parserstate) + return init_parserstate + + def targets_idxs(self, batch): + ''' + Returns 2 np arrays of the form [[indx in batch], + [index of first embedding], + [index of second embedding]], + + [[imdex of the target action]]] + + indices of the gold actions in the batch''' + + tuples = batch['gold_tuples'].clone() + + tuples = tuples.roll(1, -1) + tuples[:,:,0] = torch.arange(tuples.shape[0]).unsqueeze(1).repeat(1,tuples.shape[1]) + + vector_comparisons = tuples[:,:,3] != -1 + + oracle_action_idxs = tuples[vector_comparisons][:,[0,2,3,1]].transpose(1,0).cpu().numpy() + + return oracle_action_idxs[:-1], oracle_action_idxs[-1] + + @classmethod + def actions_list(self): + return ['SHIFT', 'Left-Arc', 'Right-Arc'] + + def _preparetransitionset(self, parserstate): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + stack, buf, head = parserstate.stack, parserstate.buf, parserstate.head + + t = [] + + if len(buf) > 0: + t += [(SHIFT,)] + + if len(stack) > 2: + t += [(LEFTARC,)] + + if len(stack) > 1: + t += [(RIGHTARC,)] + + parserstate._transitionset = t + + def advance(self, parserstate, action): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + RELS = len(self.mappings['rel']) + cand = parserstate.transitionset() + + if isinstance(action, int): + a, rel = self.tuple_trans_from_int(cand, action) + else: + rel = action[-1] + a = action[:-1] + + stack = parserstate.stack + buf = parserstate.buf + + if a[0] == SHIFT: + parserstate.stack = [buf[0]] + stack + #new + parserstate.head[buf[0]] = [-1, -1] + parserstate.num_shifts += 1 + # + parserstate.buf = buf[1:] + elif a[0] == LEFTARC: + parserstate.head[stack[1]] = [stack[0], rel] + parserstate.stack = [stack[0]] + stack[2:] + elif a[0] == RIGHTARC: + parserstate.head[stack[0]] = [stack[1], rel] + parserstate.stack = stack[1:] + + self._preparetransitionset(parserstate) + + def goldtransition(self, parserstate, goldrels=None, return_tuple=False): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + goldrels = goldrels or parserstate.goldrels + stack = parserstate.stack + buf = parserstate.buf + head = parserstate.head + + POS = len(self.mappings['pos']) + + #this is a double check to make sure we dont reduce node that still have children in the future + #hopefully this is just a hack and we dont need it + stack0_done = True + for x in buf: + if x in goldrels[stack[0]]: + stack0_done = False + break + + if len(stack) > 2 and stack[1] in goldrels[stack[0]]: + rel = goldrels[stack[0]][stack[1]] + a = (LEFTARC, rel) + + if return_tuple: + return a[0], stack[0], stack[1] + + elif len(stack) > 1 and stack[0] in goldrels[stack[1]] and stack0_done: + rel = goldrels[stack[1]][stack[0]] + a = (RIGHTARC, rel) + + if return_tuple: + return a[0], stack[0], stack[1] + # return a[0], stack[1], stack[0] + else: + a = (SHIFT, -1) + + if return_tuple: + #look at the non distance comparison triggers if neither ('or' statement) triggered it means its the distance comparisons fault + if not len(stack) > 1: + return a[0], -1, -1 + + else: + return a[0], stack[0], stack[1] + + # if random.randint(0, 1): + # return a[0], stack[0], stack[1] + # else: + # return a[0], stack[1], stack[0] + #means we didnt actually compare anything (one of the disqualifies triggered), doesnt matter what the stack looks like + + return a + + def trans_to_str(self, t, state, pos, fpos=None): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + if t[0] == SHIFT: + if fpos is None: + return "SHIFT\t%s" % (pos[state.buf[0]]) + else: + try: + return "SHIFT\t%s\t%s" % (pos[state.buf[0]], fpos[state.buf[0]]) + except: + None + + elif t[0] == LEFTARC: + return "Left-Arc\t%s" % (self.invmappings['rel'][t[1]]) + elif t[0] == RIGHTARC: + return "Right-Arc\t%s" % (self.invmappings['rel'][t[1]]) + + + @classmethod + def trans_from_line(self, line): + if line[0] == 'Left-Arc': + fields = { 'action':line[0], 'rel':line[1] } + elif line[0] == 'Right-Arc': + fields = { 'action':line[0], 'rel':line[1] } + elif line[0] == 'SHIFT': + fields = { 'action':line[0], 'pos':line[1] } + if len(line) > 2: + fields['fpos'] = line[2] + else: + raise ValueError(line[0]) + return fields + + def tuple_trans_to_int(self, cand, t): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + RELS = len(self.mappings['rel']) + + base = 0 + if t[0] == SHIFT: + return base + + base += 1 + + if t[0] == LEFTARC: + return base + t[1] + + base += RELS + + if t[0] == RIGHTARC: + return base + t[1] + + def tuple_trans_from_int(self, cand, action): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + RELS = len(self.mappings['rel']) + rel = -1 + + base = 0 + if action == base: + a = (SHIFT,) + base += 1 + + if base <= action < base + RELS: + a = (LEFTARC,) + rel = action - base + base += RELS + + if base <= action < base + RELS: + a = (RIGHTARC,) + rel = action - base + + return a, rel + +class ArcHybrid(ArcStandard): + def __init__(self, mappings_file): + self.mappings, self.invmappings = read_mappings(mappings_file, self.actions_list(), log=None) + self.name='AH' + + def _preparetransitionset(self, parserstate): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + stack, buf, head = parserstate.stack, parserstate.buf, parserstate.head + + t = [] + + if len(buf) > 0: + t += [(SHIFT,)] + + if len(buf) > 0 and len(stack) > 1 and head[stack[0]][0] < 0: + t += [(LEFTARC,)] + + if len(stack) > 1 and head[stack[0]][0] < 0: + t += [(RIGHTARC,)] + + parserstate._transitionset = t + + def advance(self, parserstate, action): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + RELS = len(self.mappings['rel']) + cand = parserstate.transitionset() + + if isinstance(action, int): + a, rel = self.tuple_trans_from_int(cand, action) + else: + rel = action[-1] + a = action[:-1] + + stack = parserstate.stack + buf = parserstate.buf + + if a[0] == SHIFT: + parserstate.stack = [buf[0]] + stack + parserstate.buf = buf[1:] + elif a[0] == LEFTARC: + parserstate.head[stack[0]] = [buf[0], rel] + parserstate.stack = stack[1:] + elif a[0] == RIGHTARC: + parserstate.head[stack[0]] = [stack[1], rel] + parserstate.stack = stack[1:] + + self._preparetransitionset(parserstate) + + def goldtransition(self, parserstate, goldrels=None, return_tuple=False): + SHIFT = self.mappings['action']['SHIFT'] + LEFTARC = self.mappings['action']['Left-Arc'] + RIGHTARC = self.mappings['action']['Right-Arc'] + + goldrels = goldrels or parserstate.goldrels + stack = parserstate.stack + buf = parserstate.buf + head = parserstate.head + + POS = len(self.mappings['pos']) + + stack0_done = True + for x in buf: + if x in goldrels[stack[0]]: + stack0_done = False + break + + if len(buf) > 0 and stack[0] in goldrels[buf[0]]: + rel = goldrels[buf[0]][stack[0]] + a = (LEFTARC, rel) + + if return_tuple: + #for LEFTARC, only buf[0] and stack[0] are used + return a[0], buf[0], stack[0] + + elif len(stack) > 1 and stack[0] in goldrels[stack[1]] and stack0_done: + rel = goldrels[stack[1]][stack[0]] + a = (RIGHTARC, rel) + + if return_tuple: + #for RIGHTARC, only stack[0] and stack[1] are used + return a[0], stack[0], stack[1] + else: + a = (SHIFT, -1) + if return_tuple: + if not (not (len(stack) > 1) or not (stack0_done)): + #for SHIFT all three are used (implictly) + return a[0], buf[0], stack[0], stack[1] + else: + #means we didnt actually compare anything (one of the disqualifies triggered), doesnt matter what the stack looks like + return a[0], -1, -1, -1 + + return a + +def read_mappings(mappings_file, actions_list, log=None): + i = 0 + res = dict() + res2 = dict() + with smart_open(mappings_file, 'r') as f: + for line in f: + line = line.strip() + if line.startswith("::"): + currentkey = line[2:] + res[currentkey] = dict() + res2[currentkey] = [] + i = 0 + else: + res[currentkey][line] = i + res2[currentkey] += [line] + i += 1 + + res['action'] = {k: i for i, k in enumerate(actions_list)} + res2['action'] = actions_list + + return res, res2 + +def generate_continuous_mask(action_ids, token_pad): + mask = [] + #i think we we missing the last embedding before + #shouuldnt be word indec should be number of words + wrd_indx = 1 + for indx,i in enumerate(action_ids): + if i == 0: wrd_indx+=1 + mask.append([1]*wrd_indx + [0]*(token_pad-wrd_indx)) + return mask \ No newline at end of file diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..1d1edab --- /dev/null +++ b/src/utils.py @@ -0,0 +1,533 @@ +from smart_open import smart_open +import numpy as np +import torch +import random +import torch.nn.functional as F +from nltk import Tree +from collections import defaultdict +import h5py +from tqdm import tqdm +import json +import torch.nn as nn +from abc import ABC +import os +from queue import PriorityQueue +import yaml +import copy +import shutil +from itertools import count +from transition import ArcSwift, ArcEagerReduce, ArcEagerShift, ArcStandard, ArcHybrid +import transition +import yaml +import architectures +from scipy.stats import spearmanr, pearsonr + +ignored_tags = ["''", ",", ".", ":", "``", "-LRB-", "-RRB-"] + +MODEL_DATA = {'gpt2': {'layer_count': 13, 'feature_count': 768}, + 'gpt2-medium': {'layer_count': 25, 'feature_count': 1024}, + 'gpt2-large': {'layer_count': 37, 'feature_count': 1280}, + 'gpt2-xl': {'layer_count': 49, 'feature_count': 1600}, + 'bert-base-cased': {'layer_count': 13, 'feature_count': 768}, + 'bert-large-cased': {'layer_count': 25, 'feature_count': 1024}} + + +def generate_continuous_mask(action_ids, token_pad): + mask = [] + #i think we we missing the last embedding before + #shouuldnt be word indec should be number of words + wrd_indx = 1 + for indx,i in enumerate(action_ids): + if i == 0: wrd_indx+=1 + mask.append([1]*wrd_indx + [0]*(token_pad-wrd_indx)) + return mask + + +def generate_lines_for_sent(lines): + '''Yields batches of lines describing a sentence in conllx. + Args: + lines: Each line of a conllx file. + Yields: + a list of lines describing a single sentence in conllx. + ''' + buf = [] + for line in lines: + if line.startswith('#'): + continue + if not line.strip(): + if buf: + yield buf + buf = [] + else: + continue + else: + buf.append(line.strip()) + if buf: + yield buf + +def clean_number(w): + new_w = re.sub('[0-9]{1,}([,.]?[0-9]*)*', 'N', w) + return new_w + +def conv_padded_ngrams(probe_vocab, + action_ids, + action_ngram_pad=30, + token_pad=30, + pad_token = -1): + ''' + input: + converts unpadded array of action id to padded array of padded action ngrams + probe_vocab(dict): probe.a2i + action_ids (array, cpu tensor): (len(action_ids),) + action_ngram_pad (int): pad + token_pad (int): pad + pad_token (int): what int to pad with (should be probe.a2i[PAD]) + retuns: + padded_action_ngrams (nparray): (token_pad x action_ngram_pad) + ''' + #convert to numpy array + arr_action_ids = np.array(action_ids) + + #boolean array is this action a shift? + shift_bin = (arr_action_ids == probe_vocab['SHIFT']) + + #idxs of where shifts should happen - adds a shift at the end + shift_ids = np.concatenate((np.nonzero(shift_bin)[0], [len(arr_action_ids)])) + + #action ngrams + split_actions = np.split(arr_action_ids,shift_ids+1,0)[:-1] + + #remove trailing pad token + split_actions[-1] = split_actions[-1][np.where(split_actions[-1] != probe_vocab['PAD'])] + + #pad ngrams and add special tokens + padded_ngrams = np.array([np.concatenate(([probe_vocab['BOS']], i,[probe_vocab['EOS']] ,[probe_vocab['PAD']]*(action_ngram_pad-len(i)-2))) for i in split_actions]) + + #pad ngram batch to token_pad + padded_ngrams = np.concatenate((padded_ngrams, np.zeros((token_pad-len(padded_ngrams), action_ngram_pad)) + probe_vocab['PAD']),0) + return padded_ngrams + + +def update_log(s): + with open(args.logpath, 'a') as f: + f.write(s + '\n') + +def flatten_list(lst): return [j for sub in lst for j in sub] + +def head_indxs_to_states(head_indxs,oracle): + goldrels = [dict() for i in range(len(head_indxs)+1)] + for tok, head in enumerate(head_indxs): goldrels[head][tok+1] = -1 + + state = transition.ParserState_dec([""] + head_indxs, transsys=oracle, goldrels=goldrels) + full_states = []#[state.clone()] + while len(state.transitionset()) > 0: + + goldtransition =oracle.goldtransition(state) + state.action_tuples.append(list(oracle.goldtransition(state, return_tuple=True))) + oracle.advance(state, goldtransition) + full_states.append(state.clone()) + + return full_states + +def prune_queue(queue, k): + pruned_queue = PriorityQueue() + for i in range(k): + if queue.qsize(): + g = queue.get() #g is a tuple (score, node) + pruned_queue.put(g) + return pruned_queue + +def clean_dir(dir_path): + if os.path.exists(dir_path): + shutil.rmtree(dir_path, ignore_errors=True) + +def mkdir_ex(dir_path): + if not os.path.exists(dir_path): + os.mkdir(dir_path) + +def mkdir_p(dir): + '''make a directory (dir) if it doesn't exist''' + if not os.path.exists(dir): + os.mkdir(dir) + +MODEL_DATA = {'gpt2': {'layer_count': 13, 'feature_count': 768}, + 'gpt2-medium': {'layer_count': 25, 'feature_count': 1024}, + 'gpt2-large': {'layer_count': 37, 'feature_count': 1280}, + 'gpt2-xl': {'layer_count': 49, 'feature_count': 1600}, + 'bert-base': {'layer_count': 13, 'feature_count': 768}, + 'bert-large': {'layer_count': 25, 'feature_count': 1024}} + +def oracle_lookup(k): + lookup = {"ASw": ArcSwift, + "AER": ArcEagerReduce, + "AES": ArcEagerShift, + "ASd": ArcStandard, + "AH" : ArcHybrid,} + return lookup[k] + +class obs(object): + def __init__(self, head_indices): self.head_indices = head_indices + def __getitem__(self,index): return self.head_indices + +class UnionFind: + ''' + Naive UnionFind implementation for (slow) Prim's MST algorithm + Used to compute minimum spanning trees for distance matrices + ''' + def __init__(self, n): + self.parents = list(range(n)) + def union(self, i,j): + if self.find(i) != self.find(j): + i_parent = self.find(i) + self.parents[i_parent] = j + def find(self, i): + i_parent = i + while True: + if i_parent != self.parents[i_parent]: + i_parent = self.parents[i_parent] + else: + break + return i_parent + +def prims_matrix_to_edges(matrix, poses): + ''' + Constructs a minimum spanning tree from the pairwise weights in matrix; + returns the edges. + Never lets punctuation-tagged words be part of the tree. + ''' + pairs_to_distances = {} + uf = UnionFind(len(matrix)) + for i_index, line in enumerate(matrix): + for j_index, dist in enumerate(line): + if IDX2XPOS[poses[i_index].item()] in ["''", ",", ".", ":", "``", "-LRB-", "-RRB-"]: + continue + if IDX2XPOS[poses[j_index].item()] in ["''", ",", ".", ":", "``", "-LRB-", "-RRB-"]: + continue + pairs_to_distances[(i_index, j_index)] = dist + edges = [] + for (i_index, j_index), distance in sorted(pairs_to_distances.items(), key = lambda x: x[1]): + if uf.find(i_index) != uf.find(j_index): + uf.union(i_index, j_index) + edges.append((i_index, j_index)) + return edges + +def get_nopunct_argmin(prediction, poses): + ''' + Gets the argmin of predictions, but filters out all punctuation-POS-tagged words + ''' + puncts = ["''", ",", ".", ":", "``", "-LRB-", "-RRB-"] + original_argmin = np.argmin(prediction) + for i in range(len(poses)): + argmin = np.argmin(prediction) + if IDX2XPOS[poses[argmin].item()] not in puncts: + return argmin + else: + prediction[argmin] = np.inf + return original_argmin + +def heads_to_displacy(sentence, heads): + displacy_format = { + "words": [ + {"text": token, "tag": ' '} for token in sentence.split() + ], + "arcs": [ + {"start": dep, "end": head[0], "label": ' ', "dir": "right"} if dep < head[0] else {"start": head[0], "end": dep, "label": ' ', "dir": "left"} for dep, head in heads.items() if head[0] != -1 + ] + } + displacy_format["words"].insert(0, {"text": 'ROOT', "tag": ' '}) + return displacy_format + +def load_lit_checkpoint(purpose,mod,probe_name,l): + if os.path.isfile(f"./experiment_checkpoints/{purpose}/{mod}/{probe_name}/layer_{str(l)}/checkpoints/last.ckpt"): + with open(f"./experiment_checkpoints/{purpose}/{mod}/{probe_name}/layer_{str(l)}/config.yaml", 'r') as file: l_args = yaml.safe_load(file) + l_args['probe_params']['pretrained_model'] = l_args['pretrained_model'] + p_ckpt = experiment.IncrementalParseProbeExperiment.load_from_checkpoint(f"./experiment_checkpoints/{purpose}/{mod}/{probe_name}/layer_{str(l)}/checkpoints/last.ckpt").probe + p = getattr(architectures, l_args['probe_params']['probe_type'])(l_args['probe_params']).to('cuda') + p.load_state_dict(p_ckpt.state_dict()) + p.eval() + p.oracle = transition.ArcStandard(l_args['probe_params']['oracle_params']['mappings_file']) + return l_args, p + else: return None, None + +def berkeley_unk_conv(ws): + """This is a simplified version of unknown token conversion in BerkeleyParser. + + The full version is berkely_unk_conv2. + """ + uk = "unk" + sz = len(ws) - 1 + if ws[0].isupper(): + uk = "c" + uk + if ws[0].isdigit() and ws[sz].isdigit(): + uk = uk + "n" + elif sz <= 2: + pass + elif ws[sz-2:sz+1] == "ing": + uk = uk + "ing" + elif ws[sz-1:sz+1] == "ed": + uk = uk + "ed" + elif ws[sz-1:sz+1] == "ly": + uk = uk + "ly" + elif ws[sz] == "s": + uk = uk + "s" + elif ws[sz-2:sz+1] == "est": + uk = uk + "est" + elif ws[sz-1:sz+1] == "er": + uk = uk + 'ER' + elif ws[sz-2:sz+1] == "ion": + uk = uk + "ion" + elif ws[sz-2:sz+1] == "ory": + uk = uk + "ory" + elif ws[0:2] == "un": + uk = "un" + uk + elif ws[sz-1:sz+1] == "al": + uk = uk + "al" + else: + for i in range(sz): + if ws[i] == '-': + uk = uk + "-" + break + elif ws[i] == '.': + uk = uk + "." + break + return "<" + uk + ">" + +def berkeley_unk_conv2(token): + numCaps = 0 + hasDigit = False + hasDash = False + hasLower = False + for char in token: + if char.isdigit(): + hasDigit = True + elif char == '-': + hasDash = True + elif char.isalpha(): + if char.islower(): + hasLower = True + elif char.isupper(): + numCaps += 1 + result = 'UNK' + lower = token.rstrip().lower() + ch0 = token.rstrip()[0] + if ch0.isupper(): + if numCaps == 1: + result = result + '-INITC' + # Remove this because it relies on a vocabulary, not given to this funciton (HN). + # if lower in words_dict: + # result = result + '-KNOWNLC' + else: + result = result + '-CAPS' + elif not(ch0.isalpha()) and numCaps > 0: + result = result + '-CAPS' + elif hasLower: + result = result + '-LC' + if hasDigit: + result = result + '-NUM' + if hasDash: + result = result + '-DASH' + if lower[-1] == 's' and len(lower) >= 3: + ch2 = lower[-2] + if not(ch2 == 's') and not(ch2 == 'i') and not(ch2 == 'u'): + result = result + '-s' + elif len(lower) >= 5 and not(hasDash) and not(hasDigit and numCaps > 0): + if lower[-2:] == 'ed': + result = result + '-ed' + elif lower[-3:] == 'ing': + result = result + '-ing' + elif lower[-3:] == 'ion': + result = result + '-ion' + elif lower[-2:] == 'er': + result = result + '-er' + elif lower[-3:] == 'est': + result = result + '-est' + elif lower[-2:] == 'ly': + result = result + '-ly' + elif lower[-3:] == 'ity': + result = result + '-ity' + elif lower[-1] == 'y': + result = result + '-y' + elif lower[-2:] == 'al': + result = result + '-al' + return result + +import logging +import os.path as op +from smart_open import smart_open +# import cPickle as pickle +import pickle +from transition import ArcSwift, ArcEagerReduce, ArcEagerShift, ArcStandard, ArcHybrid +import numpy as np + +from copy import copy + +class ParserState: + def __init__(self, sentence, transsys=None, goldrels=None): +# print(sentence) + self.stack = [0] + # sentences should already have a symbol as the first token +# print([i+1 for i in range(len(sentence)-1)]) + self.buf = [i+1 for i in range(len(sentence)-1)] + # head and relation labels + self.head = [[-1, -1] for _ in range(len(sentence))] + + self.pos = [-1 for _ in range(len(sentence))] + + self.goldrels = goldrels + + self.transsys = transsys + if self.transsys is not None: + self.transsys._preparetransitionset(self) + + def transitionset(self): + return self._transitionset + + def clone(self): + res = ParserState([]) + res.stack = copy(self.stack) + res.buf = copy(self.buf) + res.head = copy(self.head) + res.pos = copy(self.pos) + res.goldrels = copy(self.goldrels) + res.transsys = self.transsys + if hasattr(self, '_transitionset'): + res._transitionset = copy(self._transitionset) + return res + + +transition_dims = ['action', 'n', 'rel', 'pos', 'fpos'] +transition_pos = {v:i for i, v in enumerate(transition_dims)} +floatX = np.float32 + +def transsys_lookup(k): + lookup = {"ASw": ArcSwift, + "AER": ArcEagerReduce, + "AES": ArcEagerShift, + "ASd": ArcStandard, + "AH" : ArcHybrid,} + return lookup[k] + +def process_example(conll_lines, seq_lines, vocab, mappings, transsys, fpos=False, log=None): + if fpos: + res = [[] for _ in range(4)] + else: + res = [[] for _ in range(3)] + res[0] = [vocab[u'']] + [vocab[u''] if line.split()[1] not in vocab else vocab[line.split()[1]] for line in conll_lines] + for line in seq_lines: + line = line.split() + try: + fields = transsys.trans_from_line(line) + except ValueError as e: + log.error('Encountered unknown transition type "%s" in sequences file, ignoring...' % (str(e))) + return None + + vector_form = [] + for k in transition_dims: + if k in fields: + if k in mappings: + fields[k] = mappings[k][fields[k]] + vector_form += [fields[k]] + else: + vector_form += [-1] # this should never be used + + res[1] += [vector_form] + + # gold POS + res[2] = [len(mappings['pos'])] + [mappings['pos'][line.split()[3]] for line in conll_lines] + if fpos: + # fine-grained POS + res[3] = [len(mappings['fpos'])] + [mappings['fpos'][line.split()[4]] for line in conll_lines] + + return tuple(res) + +def read_mappings(mappings_file, transsys, log=None): + i = 0 + res = dict() + res2 = dict() + with smart_open(mappings_file, 'r') as f: + for line in f: + line = line.strip() + if line.startswith("::"): + currentkey = line[2:] + res[currentkey] = dict() + res2[currentkey] = [] + i = 0 + else: + res[currentkey][line] = i + res2[currentkey] += [line] + i += 1 + + res['action'] = {k: i for i, k in enumerate(transsys.actions_list())} + res2['action'] = transsys.actions_list() + + return res, res2 + +def read_gold_parserstates(fin, transsys, fpos=False): + def processlines(lines): + arcs = [dict() for i in range(len(lines)+1)] + + pos = ["" for i in range(len(lines)+1)] + fpos = ["" for i in range(len(lines)+1)] + + for i, line in enumerate(lines): + pos[i+1] = line[3] # fine-grained + fpos[i+1] = line[4] + parent = int(line[6]) + relation = line[7] + arcs[parent][i+1] = transsys.mappings['rel'][relation] +# print(ParserState([""] + lines)) + + res = [ParserState([""] + lines, transsys=transsys, goldrels=arcs), pos] + if fpos: + res += [fpos] + else: + res == [None] + return res + res = [] + + lines = [] + line = fin.readline()#.decode('utf-8') + while line: + line = line.strip().split() + + if len(line) == 0: + res += [processlines(lines)] + + lines = [] + else: + lines += [line] + + line = fin.readline()#.decode('utf-8') + + if len(lines) > 0: + res += [processlines(lines)] +# print(res[0][0].buf) + + return res + +def write_gold_trans(tpl, fout): + state, pos, fpos = tpl + transsys = state.transsys + while len(state.transitionset()) > 0: + t = transsys.goldtransition(state) + + fout.write("%s\n" % transsys.trans_to_str(t, state, pos, fpos)) + + transsys.advance(state, t) + + fout.write("\n") + +def multi_argmin(lst): + minval = 1e10 + res = [] + for i, v in enumerate(lst): + if v < minval: + minval = v + res = [i] + elif v == minval: + res += [i] + + return res + +XPOS2IDX = {'$': 0,'PRP$': 1,'VBZ': 2,'CD': 3,'JJS': 4,'VBG': 5,'IN': 6,'VB': 7,',': 8,'RB': 9,'JJ': 10,'LS': 11,'TO': 12,'UH': 13,'EX': 14,'``': 15,'SYM': 16,'NNP': 17,'WP': 18,'.': 19,"''": 20,'VBP': 21,'WP$': 22,'-RRB-': 23,'-LRB-': 24,'PDT': 25,'PRP': 26,'NNS': 27,':': 28,'WDT': 29,'POS': 30,'MD': 31,'RBS': 32,'RP': 33,'VBN': 34,'CC': 35,'NNPS': 36,'JJR': 37,'RBR': 38,'DT': 39,'WRB': 40,'NN': 41,'FW': 42,'VBD': 43,'#': 44} +IDX2XPOS = {v: k for k, v in XPOS2IDX.items()} \ No newline at end of file