From 8cdba09e81c32b68cb9df2041269221b22aac450 Mon Sep 17 00:00:00 2001 From: Jennifer Hu Date: Thu, 3 Oct 2019 17:16:33 -0400 Subject: [PATCH] clean accuracy code --- README.md | 38 +++- analysis/accuracy.py | 164 ------------------ analysis/compute_accuracy.py | 120 +++++++++++++ analysis/get_accuracy | 21 +++ analysis/{plot_all => get_figures} | 2 +- data/accuracy/all_exp_accuracy_grnn.csv | 16 -- data/accuracy/all_exp_accuracy_tiny_rnng.csv | 19 -- data/accuracy/all_exp_accuracy_tinywiki.csv | 10 -- data/accuracy/cc_accuracy_all.csv | 28 --- data/accuracy/exp1a-ml-rc.csv | 16 ++ data/accuracy/exp1b-ml-comp.csv | 16 ++ data/accuracy/exp2-rc.csv | 22 +++ data/accuracy/exp3-comp.csv | 22 +++ data/accuracy/exp4-pp.csv | 22 +++ ...rell_rc_subrc_accuracy_grnn_jrnn_trans.csv | 7 - ...c_subrc_accuracy_grnn_jrnn_trans_5gram.csv | 9 - data/accuracy/loc_accuracy_all.csv | 28 --- data/accuracy/ml_accuracy_grnn_jrnn_trans.csv | 10 -- .../ml_accuracy_grnn_jrnn_trans_5gram.csv | 13 -- ...ml_accuracy_grnn_jrnn_trans_5gram_bert.csv | 16 -- ..._grnn_multi_grnn_jrnn_trans_5gram_bert.csv | 19 -- .../ml_rc_accuracy_grnn_jrnn_trans.csv | 10 -- .../ml_rc_accuracy_grnn_jrnn_trans_5gram.csv | 13 -- ...rc_accuracy_grnn_jrnn_trans_5gram_bert.csv | 16 -- ..._grnn_multi_grnn_jrnn_trans_5gram_bert.csv | 19 -- data/accuracy/rc_accuracy_all.csv | 28 --- .../rc_accuracy_grnn_jrnn_trans_tiny_rnng.csv | 16 -- 27 files changed, 270 insertions(+), 450 deletions(-) delete mode 100644 analysis/accuracy.py create mode 100755 analysis/compute_accuracy.py create mode 100755 analysis/get_accuracy rename analysis/{plot_all => get_figures} (88%) delete mode 100644 data/accuracy/all_exp_accuracy_grnn.csv delete mode 100644 data/accuracy/all_exp_accuracy_tiny_rnng.csv delete mode 100644 data/accuracy/all_exp_accuracy_tinywiki.csv delete mode 100644 data/accuracy/cc_accuracy_all.csv create mode 100644 data/accuracy/exp1a-ml-rc.csv create mode 100644 data/accuracy/exp1b-ml-comp.csv create mode 100644 data/accuracy/exp2-rc.csv create mode 100644 data/accuracy/exp3-comp.csv create mode 100644 data/accuracy/exp4-pp.csv delete mode 100644 data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans.csv delete mode 100644 data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans_5gram.csv delete mode 100644 data/accuracy/loc_accuracy_all.csv delete mode 100644 data/accuracy/ml_accuracy_grnn_jrnn_trans.csv delete mode 100644 data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram.csv delete mode 100644 data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram_bert.csv delete mode 100644 data/accuracy/ml_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv delete mode 100644 data/accuracy/ml_rc_accuracy_grnn_jrnn_trans.csv delete mode 100644 data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram.csv delete mode 100644 data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram_bert.csv delete mode 100644 data/accuracy/ml_rc_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv delete mode 100644 data/accuracy/rc_accuracy_all.csv delete mode 100644 data/accuracy/rc_accuracy_grnn_jrnn_trans_tiny_rnng.csv diff --git a/README.md b/README.md index b1ab937..fba97f7 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ training corpus. See the paper for more details on how we constructed our materials. ### Vocabulary issues -In all of our novel materials (**TODO: list the experiment names**), the +In our novel materials (used in `['exp2-rc-all', 'exp3-comp', 'exp4-pp']`), the lexical items are designed to be in-vocabulary for models trained on the Penn Treebank. This is not the case for the materials used in Experiment 1, the [Marvin & Linzen (2018)](https://arxiv.org/abs/1808.09031) replication. @@ -73,11 +73,16 @@ Penn Treebank. This is not the case for the materials used in Experiment 1, the The per-token surprisal values for each model can be found in the [data](data) folder, following this naming convention: ``` -data///_.txt +data/surprisal///_.txt ``` The BERT data is in a slightly different `.csv` format, but otherwise follows the same naming convention. +The accuracy results can be found at +``` +data/accuracy/.csv +``` + ## Dependencies Our analysis code requires a basic scientific installation of Python (`numpy`, `pandas`, `matplotlib`, `seaborn`, etc.). @@ -100,14 +105,18 @@ We can make the training script for our n-gram model available upon request. ## Reproducing our results ### Figures -To generate the plots for a given experiment and model, run the following: +To generate the plots for a given experiment and list of models, run the following: ```bash cd analysis -mkdir figures -python generate_lot.py -o figures -model -exp -vs +mkdir -p figures +python generate_plot.py -o figures -model -exp -vs ``` -This will save a plot to `analysis/figures/_.png`. +This will save a plot to `analysis/figures/-.png`. +Note that `` can be a list of model names (e.g. `-model rnng bert jrnn`), +`'big'` for large-vocabulary models, or `'all'` for all models. The +large-vocabulary models are **BERT, Transformer-XL, JRNN, GRNN, 5-gram**. + The `-vs` flag specifies to plot the negative log probability **differential**. You can omit the flag to plot the raw negative log probabilities. @@ -117,9 +126,22 @@ if it does not exist): ```bash cd analysis -./plot_all figures +./get_figures figures ``` ### Accuracy -**TODO** +Similarly, to compute the accuracy for a given experiment and list of models, +run: +```bash +cd analysis +mkdir -p accuracy +python compute_accuracy.py -o accuracy -model -exp +``` +This will save a file to `analysis/accuracy/-.csv`. + +To compute the accuracy for all our experiments, run the following: +```bash +cd analysis +./get_accuracy accuracy +``` \ No newline at end of file diff --git a/analysis/accuracy.py b/analysis/accuracy.py deleted file mode 100644 index 15bef0d..0000000 --- a/analysis/accuracy.py +++ /dev/null @@ -1,164 +0,0 @@ -""" - accuracy.py - Get accuracy results. -""" -import argparse -from numpy import mean -import random -import pandas as pd - -import utils - -################################################################################# -# Global variables -################################################################################# - -# MODELS = ['grnn_multi', 'grnn', 'jrnn', 'trans', 'rnng', 'tiny', 'tinywiki', '5gram', 'bert'] - -################################################################################# -# Helper functions -################################################################################# - -# def _prob_ratio(df1, df2): -# prob_ratios = [] -# for row in df1.itertuples(): -# surprisal1 = row.surprisal -# surprisal2 = df2.loc[row.Index].surprisal -# prob_ratio = 2**(surprisal2 - surprisal1) -# prob_ratios.append(prob_ratio) -# return mean(prob_ratios) - - -# def _get_data_df(data, surp, exp, nonrefl, multi=False): -# # read surprisals and data -# if not multi: -# surp_df = pd.read_csv(surp, delim_whitespace=True, -# names=['token', 'surprisal']) -# else: -# surp_df = pd.read_csv(surp, sep=' ', -# names=['token', 'sentid', 'sentpos', 'wlen', 'surprisal', 'entropy'], -# skiprows=2, skipfooter=3) -# print(surp_df.head()) -# data_df = pd.read_csv(data) - -# agree, pl = 'agree' in exp, 'pl' in exp -# # only keep surprisal at specified pronoun or verb -# if agree: -# verb = 'were' if pl else 'was' -# surp_df = surp_df.loc[surp_df.token == verb] -# else: -# if nonrefl: -# pn = 'them' if pl else exp.split('_')[-1][:3] -# else: -# pn = 'themselves' if pl else exp.split('_')[-1] -# surp_df = surp_df.loc[surp_df.token == pn] - -# # data_df = data_df.loc[data_df.pronoun == pn] - -# # insert surprisal into data_df -# data_df['surprisal'] = surp_df.surprisal.values - -# return data_df - - -# def _subtract_baseline(df, exp): -# item_list = df.item.unique() -# for item in item_list: -# item_rows = df.loc[df.item == item] -# base_rows = item_rows.loc[item_rows.mismatch_position == 'none'] -# baseline = base_rows.surprisal.mean() -# # subtract baseline from surprisal of all rows -# item_rows.surprisal -= baseline -# df.loc[df.item == item] = item_rows -# return df - -def get_accuracy(df, mismatch_position): - item_list = df.item.unique() - n_items = len(item_list) - num_correct_vs_baseline = 0 - num_correct_vs_distractor = 0 - num_correct = 0 - - for item in item_list: - item_rows = df[df.item == item] - ungrammatical_rows = item_rows[item_rows.grammatical == 0] - baseline_rows = item_rows[item_rows.mismatch_position == 'none'] - distractor_rows = item_rows[item_rows.mismatch_position == mismatch_position] - - vs_baseline = ungrammatical_rows.surprisal.mean() - baseline_rows.surprisal.mean() - vs_distractor = ungrammatical_rows.surprisal.mean() - distractor_rows.surprisal.mean() - - if vs_baseline > 0: - num_correct_vs_baseline += 1 - - if vs_distractor > 0: - num_correct_vs_distractor += 1 - - if vs_baseline > 0 and vs_distractor > 0: - num_correct += 1 - - elif vs_baseline == 0 and vs_distractor == 0: - choice = random.choice(['baseline', 'distractor', 'ungrammatical']) - if choice == 'ungrammatical': - num_correct += 1 - - vs_baseline_acc = num_correct_vs_baseline / float(n_items) - vs_distractor_acc = num_correct_vs_distractor / float(n_items) - total_acc = num_correct / float(n_items) - - return total_acc, vs_baseline_acc, vs_distractor_acc - - -################################################################################# -# Main function -################################################################################# - -def main(out_prefix, model, exp): - out_path = '%s/%s_accuracy_%s.csv' % (out_prefix, exp, '_'.join(model)) - suffixes = ['_himself', '_herself', '_pl'] - model_list = MODELS if model == ['all'] else model - - acc_dict = {'model':[], 'full_exp':[], 'total_acc':[], 'vs_baseline_acc':[], 'vs_distractor_acc':[]} - for m in model_list: - print(m) - dfs = [] - for s in suffixes: - full_exp = exp + s - print(full_exp) - data_path = '../materials/%s.csv' % full_exp - surp = '../surprisal_data/%s/%s_surprisal_%s.txt' % (m, full_exp, m) - if m == 'bert': - df = pd.read_csv('../surprisal_data/bert/%s_surprisal_bert.csv' % full_exp) - else: - multi = m == 'grnn_multi' - df = _get_data_df(data_path, surp, full_exp, nonrefl=nonrefl, multi=multi) - - if 'rc' in exp: - mismatch_position = 'rc_subj' - elif 'loc' in exp or 'ml' in exp: - mismatch_position = 'nonlocal_subj' - elif 'cc' in exp: - mismatch_position = 'distractor' - - total_acc, vs_baseline_acc, vs_distractor_acc = _get_accuracy(df, mismatch_position) - acc_dict['model'].append(m) - acc_dict['total_acc'].append(total_acc) - acc_dict['full_exp'].append(full_exp) - acc_dict['vs_baseline_acc'].append(vs_baseline_acc) - acc_dict['vs_distractor_acc'].append(vs_distractor_acc) - acc_df = pd.DataFrame(acc_dict) - acc_df.to_csv(out_path, index=False) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Compute accuracy for models.') - parser.add_argument('--out_prefix', '-out_prefix', '--O', '-O', - default='accuracy', - help='prefix to save final file (file will ' - 'be named according to experiment name)') - parser.add_argument('--model', '-model', '--M', '-M', nargs='+', - help='names of models, or all to plot all at once') - parser.add_argument('--exp', '-exp', - help='name of experiment') - args = parser.parse_args() - main(args) diff --git a/analysis/compute_accuracy.py b/analysis/compute_accuracy.py new file mode 100755 index 0000000..118f865 --- /dev/null +++ b/analysis/compute_accuracy.py @@ -0,0 +1,120 @@ +""" + accuracy.py + Get accuracy results. +""" +import argparse +from pathlib import Path +from numpy import mean +import random +import pandas as pd + +import utils + +def get_accuracy(df, distractor_pos): + item_list = df.item.unique() + n_items = len(item_list) + num_correct_vs_baseline = 0 + num_correct_vs_distractor = 0 + num_correct = 0 + + for item in item_list: + item_rows = df[df.item == item] + baseline_rows = item_rows[item_rows.mismatch_position == 'none'] + distractor_rows = item_rows[item_rows.mismatch_position == distractor_pos] + ungrammatical_rows = item_rows[item_rows.grammatical == 0] + + vs_baseline = ungrammatical_rows.surprisal.mean() - baseline_rows.surprisal.mean() + vs_distractor = ungrammatical_rows.surprisal.mean() - distractor_rows.surprisal.mean() + + # Check if ungrammatical - baseline is positive. + if vs_baseline > 0: + num_correct_vs_baseline += 1 + + # Check if ungrammatical - distractor is positive. + if vs_distractor > 0: + num_correct_vs_distractor += 1 + + # Check if both differentials are positive. + if vs_baseline > 0 and vs_distractor > 0: + num_correct += 1 + + # If both differentials are zero, then label correct with probability 1/3. + elif vs_baseline == 0 and vs_distractor == 0: + choice = random.choice(['baseline', 'distractor', 'ungrammatical']) + if choice == 'ungrammatical': + num_correct += 1 + + # Calculate proportion of items where different accuracy conditions hold. + vs_baseline_acc = num_correct_vs_baseline / float(n_items) + vs_distractor_acc = num_correct_vs_distractor / float(n_items) + total_acc = num_correct / float(n_items) + + return total_acc, vs_baseline_acc, vs_distractor_acc + +################################################################################# +# Main function -- partially shared with generate_plot.py +################################################################################# + +def main(args): + # Get list of model names. + if args.model == ['all']: + model_list = utils.MODELS + elif args.model == ['big']: + model_list = utils.BIG_MODELS + else: + model_list = args.model + + # Ensure only large-vocabulary models are specified for M&L replication. + if 'ml' in args.exp and any(m not in utils.BIG_MODELS for m in model_list): + raise ValueError( + 'Only large-vocabulary models are compatible with ' + 'Marvin & Linzen\'s (2018) materials. ' + 'Please use "--model big" to plot the results from that experiment.' + ) + + # Assign file name based on name of experiment and specified models. + out_path = Path(f'{args.out_prefix}/{args.exp}-{"_".join(args.model)}.csv') + + acc_dict = [] + for model in model_list: + # Get data for each pronoun for current model. + for pn in utils.PRONOUNS: + surp_ext = 'csv' if model == 'bert' else 'txt' + surp_path = Path( + f'../data/surprisal/{model}/{args.exp}/{pn}_{model}.{surp_ext}' + ) + if model == 'bert': + pn_df = pd.read_csv(surp_path) + else: + data_path = Path(f'../stimuli/{args.exp}/{pn}.csv') + pn_df = utils.get_data_df(data_path, surp_path, args.exp, pn) + + # Assign appropriate mismatch position for distractor condition. + if 'rc' in args.exp: + distractor_pos = 'rc_subj' + elif 'comp' in args.exp or 'ml' in args.exp: + distractor_pos = 'nonlocal_subj' + else: + distractor_pos = 'distractor' + + total_acc, vs_baseline_acc, vs_distractor_acc = get_accuracy( + pn_df, distractor_pos + ) + acc_dict.append(dict( + model=model, total_acc=total_acc, exp=args.exp, pronoun=pn, + vs_baseline_acc=vs_baseline_acc, vs_distractor_acc=vs_distractor_acc + )) + acc_df = pd.DataFrame(acc_dict) + acc_df.to_csv(out_path, index=False) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Compute accuracy for models.') + parser.add_argument('--out_prefix', '-out_prefix', '--o', '-o', + help='prefix to path to save final .csv file ' + '(file will be named according to experiment)') + parser.add_argument('--model', '-model', '--m', '-m', nargs='+', + help='list of model names, or "all" or "big"') + parser.add_argument('--exp', '-exp', help='name of experiment') + args = parser.parse_args() + main(args) diff --git a/analysis/get_accuracy b/analysis/get_accuracy new file mode 100755 index 0000000..ea256c4 --- /dev/null +++ b/analysis/get_accuracy @@ -0,0 +1,21 @@ +#!/bin/bash + +if [ "$#" -ne 1 ]; then + echo "Expected usage: ./get_accuracy " +fi + +mkdir -p $1 + +ML_EXPS=("exp1a-ml-rc" "exp1b-ml-comp") +OTHER_EXPS=("exp2-rc" "exp3-comp" "exp4-pp") +EXPS=("${ML_EXPS[@]}" "${OTHER_EXPS[@]}") + +for exp in ${EXPS[@]}; do + echo "== Computing accuracy for $exp ==" + if [[ " ${ML_EXPS[*]} " == *" $exp "* ]]; then + model="big" + else + model="all" + fi + python compute_accuracy.py -o $1 -model $model -exp $exp +done \ No newline at end of file diff --git a/analysis/plot_all b/analysis/get_figures similarity index 88% rename from analysis/plot_all rename to analysis/get_figures index 340681a..4887fa7 100755 --- a/analysis/plot_all +++ b/analysis/get_figures @@ -1,7 +1,7 @@ #!/bin/bash if [ "$#" -ne 1 ]; then - echo "Expected usage: ./plot_all " + echo "Expected usage: ./get_figures " fi mkdir -p $1 diff --git a/data/accuracy/all_exp_accuracy_grnn.csv b/data/accuracy/all_exp_accuracy_grnn.csv deleted file mode 100644 index 23627d1..0000000 --- a/data/accuracy/all_exp_accuracy_grnn.csv +++ /dev/null @@ -1,16 +0,0 @@ -model,full_exp,total_acc,vs_baseline_acc,vs_distractor_acc -grnn,loc_himself,1.0,1.0,1.0 -grnn,loc_herself,0.9733333333333334,1.0,0.9733333333333334 -grnn,loc_pl,1.0,1.0,1.0 -grnn,cc_himself,0.6533333333333333,1.0,0.6533333333333333 -grnn,cc_herself,0.8133333333333334,0.9866666666666667,0.8266666666666667 -grnn,cc_pl,0.7866666666666666,1.0,0.7866666666666666 -grnn,rc_himself,0.13333333333333333,1.0,0.13333333333333333 -grnn,rc_herself,0.37333333333333335,1.0,0.37333333333333335 -grnn,rc_pl,0.76,1.0,0.76 -grnn,ml_himself,1.0,1.0,1.0 -grnn,ml_herself,1.0,1.0,1.0 -grnn,ml_pl,1.0,1.0,1.0 -grnn,ml_rc_himself,0.5714285714285714,1.0,0.5714285714285714 -grnn,ml_rc_herself,0.8857142857142857,1.0,0.8857142857142857 -grnn,ml_rc_pl,0.6428571428571429,1.0,0.6428571428571429 \ No newline at end of file diff --git a/data/accuracy/all_exp_accuracy_tiny_rnng.csv b/data/accuracy/all_exp_accuracy_tiny_rnng.csv deleted file mode 100644 index d7a1a93..0000000 --- a/data/accuracy/all_exp_accuracy_tiny_rnng.csv +++ /dev/null @@ -1,19 +0,0 @@ -model,full_exp,total_acc,vs_baseline_acc,vs_distractor_acc -rnng,loc_himself,1.0,1.0,1.0 -rnng,loc_herself,0.9866666666666667,1.0,0.9866666666666667 -rnng,loc_pl,0.7066666666666667,0.8666666666666667,0.7066666666666667 -tiny,loc_himself,0.9333333333333333,0.9733333333333334,0.9333333333333333 -tiny,loc_herself,0.49333333333333335,0.52,0.6933333333333334 -tiny,loc_pl,0.9866666666666667,0.9866666666666667,0.9866666666666667 -rnng,cc_himself,0.64,0.9866666666666667,0.6533333333333333 -rnng,cc_herself,0.6266666666666667,0.9733333333333334,0.6266666666666667 -rnng,cc_pl,0.6133333333333333,0.9733333333333334,0.64 -tiny,cc_himself,0.21333333333333335,0.9066666666666666,0.24 -tiny,cc_herself,0.12,0.28,0.28 -tiny,cc_pl,0.9333333333333333,1.0,0.9333333333333333 -rnng,rc_himself,0.12,0.8,0.12 -rnng,rc_herself,0.0,0.7733333333333333,0.0 -rnng,rc_pl,0.6,1.0,0.6 -tiny,rc_himself,0.14666666666666667,1.0,0.14666666666666667 -tiny,rc_herself,0.21333333333333335,0.41333333333333333,0.29333333333333333 -tiny,rc_pl,0.04,1.0,0.04 \ No newline at end of file diff --git a/data/accuracy/all_exp_accuracy_tinywiki.csv b/data/accuracy/all_exp_accuracy_tinywiki.csv deleted file mode 100644 index 4b335c4..0000000 --- a/data/accuracy/all_exp_accuracy_tinywiki.csv +++ /dev/null @@ -1,10 +0,0 @@ -model,full_exp,total_acc,vs_baseline_acc,vs_distractor_acc -tinywiki,loc_himself,1.0,1.0,1.0 -tinywiki,loc_herself,1.0,1.0,1.0 -tinywiki,loc_pl,1.0,1.0,1.0 -tinywiki,cc_himself,0.37333333333333335,1.0,0.37333333333333335 -tinywiki,cc_herself,0.84,1.0,0.84 -tinywiki,cc_pl,1.0,1.0,1.0 -tinywiki,rc_himself,0.0,1.0,0.0 -tinywiki,rc_herself,0.21333333333333335,1.0,0.21333333333333335 -tinywiki,rc_pl,0.14666666666666667,0.5466666666666666,0.14666666666666667 \ No newline at end of file diff --git a/data/accuracy/cc_accuracy_all.csv b/data/accuracy/cc_accuracy_all.csv deleted file mode 100644 index 0b9e66f..0000000 --- a/data/accuracy/cc_accuracy_all.csv +++ /dev/null @@ -1,28 +0,0 @@ -model,full_exp,total_acc,vs_baseline_acc,vs_distractor_acc -grnn_multi,cc_himself,0.30666666666666664,1.0,0.30666666666666664 -grnn_multi,cc_herself,0.5466666666666666,1.0,0.5466666666666666 -grnn_multi,cc_pl,0.9333333333333333,1.0,0.9333333333333333 -grnn,cc_himself,0.6533333333333333,1.0,0.6533333333333333 -grnn,cc_herself,0.8133333333333334,0.9866666666666667,0.8266666666666667 -grnn,cc_pl,0.7866666666666666,1.0,0.7866666666666666 -jrnn,cc_himself,0.92,0.9866666666666667,0.9333333333333333 -jrnn,cc_herself,0.8533333333333334,0.8533333333333334,1.0 -jrnn,cc_pl,0.30666666666666664,1.0,0.30666666666666664 -trans,cc_himself,0.6533333333333333,0.8533333333333334,0.7066666666666667 -trans,cc_herself,0.6266666666666667,0.9866666666666667,0.6266666666666667 -trans,cc_pl,0.7466666666666667,1.0,0.7466666666666667 -rnng,cc_himself,0.64,0.9866666666666667,0.6533333333333333 -rnng,cc_herself,0.6266666666666667,0.9733333333333334,0.6266666666666667 -rnng,cc_pl,0.6133333333333333,0.9733333333333334,0.64 -tiny,cc_himself,0.21333333333333335,0.9066666666666666,0.24 -tiny,cc_herself,0.12,0.28,0.28 -tiny,cc_pl,0.9333333333333333,1.0,0.9333333333333333 -tinywiki,cc_himself,0.37333333333333335,1.0,0.37333333333333335 -tinywiki,cc_herself,0.84,1.0,0.84 -tinywiki,cc_pl,1.0,1.0,1.0 -5gram,cc_himself,0.26666666666666666,0.0,0.16 -5gram,cc_herself,0.26666666666666666,0.0,0.13333333333333333 -5gram,cc_pl,0.24,0.0,0.06666666666666667 -bert,cc_himself,1.0,1.0,1.0 -bert,cc_herself,0.9866666666666667,0.9866666666666667,1.0 -bert,cc_pl,1.0,1.0,1.0 diff --git a/data/accuracy/exp1a-ml-rc.csv b/data/accuracy/exp1a-ml-rc.csv new file mode 100644 index 0000000..d431af6 --- /dev/null +++ b/data/accuracy/exp1a-ml-rc.csv @@ -0,0 +1,16 @@ +exp,model,pronoun,total_acc,vs_baseline_acc,vs_distractor_acc +exp1a-ml-rc,bert,themselves,0.7571428571428571,1.0,0.7571428571428571 +exp1a-ml-rc,bert,himself,0.7714285714285715,0.9857142857142858,0.7714285714285715 +exp1a-ml-rc,bert,herself,0.7428571428571429,0.9571428571428572,0.7428571428571429 +exp1a-ml-rc,trans,themselves,1.0,1.0,1.0 +exp1a-ml-rc,trans,himself,0.6,1.0,0.6 +exp1a-ml-rc,trans,herself,0.6142857142857143,0.9571428571428572,0.6142857142857143 +exp1a-ml-rc,jrnn,themselves,0.8,1.0,0.8 +exp1a-ml-rc,jrnn,himself,0.14285714285714285,1.0,0.14285714285714285 +exp1a-ml-rc,jrnn,herself,0.3,1.0,0.3 +exp1a-ml-rc,grnn,themselves,0.6428571428571429,1.0,0.6428571428571429 +exp1a-ml-rc,grnn,himself,0.5714285714285714,1.0,0.5714285714285714 +exp1a-ml-rc,grnn,herself,0.8857142857142857,1.0,0.8857142857142857 +exp1a-ml-rc,5gram,themselves,0.32857142857142857,0.0,0.0 +exp1a-ml-rc,5gram,himself,0.2714285714285714,0.0,0.0 +exp1a-ml-rc,5gram,herself,0.3142857142857143,0.0,0.0 diff --git a/data/accuracy/exp1b-ml-comp.csv b/data/accuracy/exp1b-ml-comp.csv new file mode 100644 index 0000000..902b293 --- /dev/null +++ b/data/accuracy/exp1b-ml-comp.csv @@ -0,0 +1,16 @@ +exp,model,pronoun,total_acc,vs_baseline_acc,vs_distractor_acc +exp1b-ml-comp,bert,themselves,0.8571428571428571,1.0,0.8571428571428571 +exp1b-ml-comp,bert,himself,1.0,1.0,1.0 +exp1b-ml-comp,bert,herself,1.0,1.0,1.0 +exp1b-ml-comp,trans,themselves,1.0,1.0,1.0 +exp1b-ml-comp,trans,himself,0.8857142857142857,1.0,0.8857142857142857 +exp1b-ml-comp,trans,herself,0.8571428571428571,0.9857142857142858,0.8571428571428571 +exp1b-ml-comp,jrnn,themselves,0.8857142857142857,1.0,0.8857142857142857 +exp1b-ml-comp,jrnn,himself,1.0,1.0,1.0 +exp1b-ml-comp,jrnn,herself,1.0,1.0,1.0 +exp1b-ml-comp,grnn,themselves,1.0,1.0,1.0 +exp1b-ml-comp,grnn,himself,1.0,1.0,1.0 +exp1b-ml-comp,grnn,herself,1.0,1.0,1.0 +exp1b-ml-comp,5gram,themselves,0.4,0.0,0.0 +exp1b-ml-comp,5gram,himself,0.37142857142857144,0.0,0.0 +exp1b-ml-comp,5gram,herself,0.2571428571428571,0.0,0.0 diff --git a/data/accuracy/exp2-rc.csv b/data/accuracy/exp2-rc.csv new file mode 100644 index 0000000..a0ba46f --- /dev/null +++ b/data/accuracy/exp2-rc.csv @@ -0,0 +1,22 @@ +exp,model,pronoun,total_acc,vs_baseline_acc,vs_distractor_acc +exp2-rc,bert,themselves,0.9333333333333333,1.0,0.9333333333333333 +exp2-rc,bert,himself,0.6533333333333333,0.8933333333333333,0.6533333333333333 +exp2-rc,bert,herself,0.52,0.76,0.5466666666666666 +exp2-rc,trans,themselves,1.0,1.0,1.0 +exp2-rc,trans,himself,0.30666666666666664,0.5733333333333334,0.3333333333333333 +exp2-rc,trans,herself,0.7866666666666666,1.0,0.7866666666666666 +exp2-rc,jrnn,themselves,0.8666666666666667,1.0,0.8666666666666667 +exp2-rc,jrnn,himself,0.8,1.0,0.8 +exp2-rc,jrnn,herself,0.3333333333333333,1.0,0.3333333333333333 +exp2-rc,grnn,themselves,0.76,1.0,0.76 +exp2-rc,grnn,himself,0.13333333333333333,1.0,0.13333333333333333 +exp2-rc,grnn,herself,0.37333333333333335,1.0,0.37333333333333335 +exp2-rc,5gram,themselves,0.37333333333333335,0.0,0.0 +exp2-rc,5gram,himself,0.30666666666666664,0.0,0.0 +exp2-rc,5gram,herself,0.3466666666666667,0.0,0.0 +exp2-rc,tiny,themselves,0.04,1.0,0.04 +exp2-rc,tiny,himself,0.14666666666666667,1.0,0.14666666666666667 +exp2-rc,tiny,herself,0.21333333333333335,0.41333333333333333,0.29333333333333333 +exp2-rc,rnng,themselves,0.6,1.0,0.6 +exp2-rc,rnng,himself,0.12,0.8,0.12 +exp2-rc,rnng,herself,0.0,0.7733333333333333,0.0 diff --git a/data/accuracy/exp3-comp.csv b/data/accuracy/exp3-comp.csv new file mode 100644 index 0000000..03c94e7 --- /dev/null +++ b/data/accuracy/exp3-comp.csv @@ -0,0 +1,22 @@ +exp,model,pronoun,total_acc,vs_baseline_acc,vs_distractor_acc +exp3-comp,bert,themselves,1.0,1.0,1.0 +exp3-comp,bert,himself,1.0,1.0,1.0 +exp3-comp,bert,herself,0.9333333333333333,0.9333333333333333,0.9466666666666667 +exp3-comp,trans,themselves,1.0,1.0,1.0 +exp3-comp,trans,himself,0.9866666666666667,0.9866666666666667,0.9866666666666667 +exp3-comp,trans,herself,0.8266666666666667,1.0,0.8266666666666667 +exp3-comp,jrnn,themselves,0.9333333333333333,1.0,0.9333333333333333 +exp3-comp,jrnn,himself,1.0,1.0,1.0 +exp3-comp,jrnn,herself,0.9733333333333334,0.9866666666666667,0.9866666666666667 +exp3-comp,grnn,themselves,1.0,1.0,1.0 +exp3-comp,grnn,himself,1.0,1.0,1.0 +exp3-comp,grnn,herself,0.9733333333333334,1.0,0.9733333333333334 +exp3-comp,5gram,themselves,0.4266666666666667,0.21333333333333335,0.21333333333333335 +exp3-comp,5gram,himself,0.36,0.08,0.08 +exp3-comp,5gram,herself,0.21333333333333335,0.05333333333333334,0.05333333333333334 +exp3-comp,tiny,themselves,0.9866666666666667,0.9866666666666667,0.9866666666666667 +exp3-comp,tiny,himself,0.9333333333333333,0.9733333333333334,0.9333333333333333 +exp3-comp,tiny,herself,0.49333333333333335,0.52,0.6933333333333334 +exp3-comp,rnng,themselves,0.7066666666666667,0.8666666666666667,0.7066666666666667 +exp3-comp,rnng,himself,1.0,1.0,1.0 +exp3-comp,rnng,herself,0.9866666666666667,1.0,0.9866666666666667 diff --git a/data/accuracy/exp4-pp.csv b/data/accuracy/exp4-pp.csv new file mode 100644 index 0000000..ed6a07c --- /dev/null +++ b/data/accuracy/exp4-pp.csv @@ -0,0 +1,22 @@ +exp,model,pronoun,total_acc,vs_baseline_acc,vs_distractor_acc +exp4-pp,bert,themselves,1.0,1.0,1.0 +exp4-pp,bert,himself,1.0,1.0,1.0 +exp4-pp,bert,herself,0.9866666666666667,0.9866666666666667,1.0 +exp4-pp,trans,themselves,0.7466666666666667,1.0,0.7466666666666667 +exp4-pp,trans,himself,0.6533333333333333,0.8533333333333334,0.7066666666666667 +exp4-pp,trans,herself,0.6266666666666667,0.9866666666666667,0.6266666666666667 +exp4-pp,jrnn,themselves,0.30666666666666664,1.0,0.30666666666666664 +exp4-pp,jrnn,himself,0.92,0.9866666666666667,0.9333333333333333 +exp4-pp,jrnn,herself,0.8533333333333334,0.8533333333333334,1.0 +exp4-pp,grnn,themselves,0.7866666666666666,1.0,0.7866666666666666 +exp4-pp,grnn,himself,0.6533333333333333,1.0,0.6533333333333333 +exp4-pp,grnn,herself,0.8133333333333334,0.9866666666666667,0.8266666666666667 +exp4-pp,5gram,themselves,0.26666666666666666,0.0,0.06666666666666667 +exp4-pp,5gram,himself,0.29333333333333333,0.0,0.16 +exp4-pp,5gram,herself,0.28,0.0,0.13333333333333333 +exp4-pp,tiny,themselves,0.9333333333333333,1.0,0.9333333333333333 +exp4-pp,tiny,himself,0.21333333333333335,0.9066666666666666,0.24 +exp4-pp,tiny,herself,0.12,0.28,0.28 +exp4-pp,rnng,themselves,0.6133333333333333,0.9733333333333334,0.64 +exp4-pp,rnng,himself,0.64,0.9866666666666667,0.6533333333333333 +exp4-pp,rnng,herself,0.6266666666666667,0.9733333333333334,0.6266666666666667 diff --git a/data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans.csv b/data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans.csv deleted file mode 100644 index bf134a9..0000000 --- a/data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans.csv +++ /dev/null @@ -1,7 +0,0 @@ -model,full_exp,vs_baseline_acc,vs_distractor_acc -grnn,futrell_rc_subrc_himself,0.7111111111111111,0.8333333333333334 -grnn,futrell_rc_subrc_herself,1.0,0.9333333333333333 -jrnn,futrell_rc_subrc_himself,0.9888888888888889,0.5777777777777777 -jrnn,futrell_rc_subrc_herself,1.0,1.0 -trans,futrell_rc_subrc_himself,1.0,0.9444444444444444 -trans,futrell_rc_subrc_herself,0.9222222222222223,0.9555555555555556 diff --git a/data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans_5gram.csv b/data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans_5gram.csv deleted file mode 100644 index 45bb203..0000000 --- a/data/accuracy/futrell_rc_subrc_accuracy_grnn_jrnn_trans_5gram.csv +++ /dev/null @@ -1,9 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -futrell_rc_subrc_himself,grnn,0.6222222222222222,0.7111111111111111,0.8333333333333334 -futrell_rc_subrc_herself,grnn,0.9333333333333333,1.0,0.9333333333333333 -futrell_rc_subrc_himself,jrnn,0.5666666666666667,0.9888888888888889,0.5777777777777777 -futrell_rc_subrc_herself,jrnn,1.0,1.0,1.0 -futrell_rc_subrc_himself,trans,0.9444444444444444,1.0,0.9444444444444444 -futrell_rc_subrc_herself,trans,0.9222222222222223,0.9222222222222223,0.9555555555555556 -futrell_rc_subrc_himself,5gram,0.022222222222222223,0.022222222222222223,0.08888888888888889 -futrell_rc_subrc_herself,5gram,0.0,0.0,0.0 diff --git a/data/accuracy/loc_accuracy_all.csv b/data/accuracy/loc_accuracy_all.csv deleted file mode 100644 index d854c60..0000000 --- a/data/accuracy/loc_accuracy_all.csv +++ /dev/null @@ -1,28 +0,0 @@ -model,full_exp,total_acc,vs_baseline_acc,vs_distractor_acc -grnn_multi,loc_himself,1.0,1.0,1.0 -grnn_multi,loc_herself,1.0,1.0,1.0 -grnn_multi,loc_pl,1.0,1.0,1.0 -grnn,loc_himself,1.0,1.0,1.0 -grnn,loc_herself,0.9733333333333334,1.0,0.9733333333333334 -grnn,loc_pl,1.0,1.0,1.0 -jrnn,loc_himself,1.0,1.0,1.0 -jrnn,loc_herself,0.9733333333333334,0.9866666666666667,0.9866666666666667 -jrnn,loc_pl,0.9333333333333333,1.0,0.9333333333333333 -trans,loc_himself,0.9866666666666667,0.9866666666666667,0.9866666666666667 -trans,loc_herself,0.8266666666666667,1.0,0.8266666666666667 -trans,loc_pl,1.0,1.0,1.0 -rnng,loc_himself,1.0,1.0,1.0 -rnng,loc_herself,0.9866666666666667,1.0,0.9866666666666667 -rnng,loc_pl,0.7066666666666667,0.8666666666666667,0.7066666666666667 -tiny,loc_himself,0.9333333333333333,0.9733333333333334,0.9333333333333333 -tiny,loc_herself,0.49333333333333335,0.52,0.6933333333333334 -tiny,loc_pl,0.9866666666666667,0.9866666666666667,0.9866666666666667 -tinywiki,loc_himself,1.0,1.0,1.0 -tinywiki,loc_herself,1.0,1.0,1.0 -tinywiki,loc_pl,1.0,1.0,1.0 -5gram,loc_himself,0.36,0.08,0.08 -5gram,loc_herself,0.29333333333333333,0.05333333333333334,0.05333333333333334 -5gram,loc_pl,0.38666666666666666,0.21333333333333335,0.21333333333333335 -bert,loc_himself,1.0,1.0,1.0 -bert,loc_herself,0.9333333333333333,0.9333333333333333,0.9466666666666667 -bert,loc_pl,1.0,1.0,1.0 diff --git a/data/accuracy/ml_accuracy_grnn_jrnn_trans.csv b/data/accuracy/ml_accuracy_grnn_jrnn_trans.csv deleted file mode 100644 index ad0f57c..0000000 --- a/data/accuracy/ml_accuracy_grnn_jrnn_trans.csv +++ /dev/null @@ -1,10 +0,0 @@ -model,full_exp,vs_baseline_acc,vs_distractor_acc -grnn,ml_himself,1.0,1.0 -grnn,ml_herself,1.0,1.0 -grnn,ml_pl,1.0,1.0 -jrnn,ml_himself,1.0,1.0 -jrnn,ml_herself,1.0,1.0 -jrnn,ml_pl,1.0,0.8857142857142857 -trans,ml_himself,1.0,0.8857142857142857 -trans,ml_herself,0.9857142857142858,0.8571428571428571 -trans,ml_pl,1.0,1.0 diff --git a/data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram.csv b/data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram.csv deleted file mode 100644 index e5f2138..0000000 --- a/data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram.csv +++ /dev/null @@ -1,13 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -ml_himself,grnn,1.0,1.0,1.0 -ml_herself,grnn,1.0,1.0,1.0 -ml_pl,grnn,1.0,1.0,1.0 -ml_himself,jrnn,1.0,1.0,1.0 -ml_herself,jrnn,1.0,1.0,1.0 -ml_pl,jrnn,0.8857142857142857,1.0,0.8857142857142857 -ml_himself,trans,0.8857142857142857,1.0,0.8857142857142857 -ml_herself,trans,0.8571428571428571,0.9857142857142858,0.8571428571428571 -ml_pl,trans,1.0,1.0,1.0 -ml_himself,5gram,0.0,0.0,0.0 -ml_herself,5gram,0.0,0.0,0.0 -ml_pl,5gram,0.0,0.0,0.0 diff --git a/data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram_bert.csv b/data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram_bert.csv deleted file mode 100644 index f6b446d..0000000 --- a/data/accuracy/ml_accuracy_grnn_jrnn_trans_5gram_bert.csv +++ /dev/null @@ -1,16 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -ml_himself,grnn,1.0,1.0,1.0 -ml_herself,grnn,1.0,1.0,1.0 -ml_pl,grnn,1.0,1.0,1.0 -ml_himself,jrnn,1.0,1.0,1.0 -ml_herself,jrnn,1.0,1.0,1.0 -ml_pl,jrnn,0.8857142857142857,1.0,0.8857142857142857 -ml_himself,trans,0.8857142857142857,1.0,0.8857142857142857 -ml_herself,trans,0.8571428571428571,0.9857142857142858,0.8571428571428571 -ml_pl,trans,1.0,1.0,1.0 -ml_himself,5gram,0.35714285714285715,0.0,0.0 -ml_herself,5gram,0.3,0.0,0.0 -ml_pl,5gram,0.4,0.0,0.0 -ml_himself,bert,1.0,1.0,1.0 -ml_herself,bert,1.0,1.0,1.0 -ml_pl,bert,0.8571428571428571,1.0,0.8571428571428571 diff --git a/data/accuracy/ml_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv b/data/accuracy/ml_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv deleted file mode 100644 index ef58ebe..0000000 --- a/data/accuracy/ml_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv +++ /dev/null @@ -1,19 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -ml_himself,grnn_multi,1.0,1.0,1.0 -ml_herself,grnn_multi,1.0,1.0,1.0 -ml_pl,grnn_multi,1.0,1.0,1.0 -ml_himself,grnn,1.0,1.0,1.0 -ml_herself,grnn,1.0,1.0,1.0 -ml_pl,grnn,1.0,1.0,1.0 -ml_himself,jrnn,1.0,1.0,1.0 -ml_herself,jrnn,1.0,1.0,1.0 -ml_pl,jrnn,0.8857142857142857,1.0,0.8857142857142857 -ml_himself,trans,0.8857142857142857,1.0,0.8857142857142857 -ml_herself,trans,0.8571428571428571,0.9857142857142858,0.8571428571428571 -ml_pl,trans,1.0,1.0,1.0 -ml_himself,5gram,0.44285714285714284,0.0,0.0 -ml_herself,5gram,0.2857142857142857,0.0,0.0 -ml_pl,5gram,0.45714285714285713,0.0,0.0 -ml_himself,bert,1.0,1.0,1.0 -ml_herself,bert,1.0,1.0,1.0 -ml_pl,bert,0.8571428571428571,1.0,0.8571428571428571 diff --git a/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans.csv b/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans.csv deleted file mode 100644 index 2b55208..0000000 --- a/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans.csv +++ /dev/null @@ -1,10 +0,0 @@ -model,full_exp,vs_baseline_acc,vs_distractor_acc -grnn,ml_rc_himself,1.0,0.5714285714285714 -grnn,ml_rc_herself,1.0,0.8857142857142857 -grnn,ml_rc_pl,1.0,0.6428571428571429 -jrnn,ml_rc_himself,1.0,0.14285714285714285 -jrnn,ml_rc_herself,1.0,0.3 -jrnn,ml_rc_pl,1.0,0.8 -trans,ml_rc_himself,1.0,0.6 -trans,ml_rc_herself,0.9571428571428572,0.6142857142857143 -trans,ml_rc_pl,1.0,1.0 diff --git a/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram.csv b/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram.csv deleted file mode 100644 index bc742d6..0000000 --- a/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram.csv +++ /dev/null @@ -1,13 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -ml_rc_himself,grnn,0.5714285714285714,1.0,0.5714285714285714 -ml_rc_herself,grnn,0.8857142857142857,1.0,0.8857142857142857 -ml_rc_pl,grnn,0.6428571428571429,1.0,0.6428571428571429 -ml_rc_himself,jrnn,0.14285714285714285,1.0,0.14285714285714285 -ml_rc_herself,jrnn,0.3,1.0,0.3 -ml_rc_pl,jrnn,0.8,1.0,0.8 -ml_rc_himself,trans,0.6,1.0,0.6 -ml_rc_herself,trans,0.6142857142857143,0.9571428571428572,0.6142857142857143 -ml_rc_pl,trans,1.0,1.0,1.0 -ml_rc_himself,5gram,0.0,0.0,0.0 -ml_rc_herself,5gram,0.0,0.0,0.0 -ml_rc_pl,5gram,0.0,0.0,0.0 diff --git a/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram_bert.csv b/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram_bert.csv deleted file mode 100644 index 95297d7..0000000 --- a/data/accuracy/ml_rc_accuracy_grnn_jrnn_trans_5gram_bert.csv +++ /dev/null @@ -1,16 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -ml_rc_himself,grnn,0.5714285714285714,1.0,0.5714285714285714 -ml_rc_herself,grnn,0.8857142857142857,1.0,0.8857142857142857 -ml_rc_pl,grnn,0.6428571428571429,1.0,0.6428571428571429 -ml_rc_himself,jrnn,0.14285714285714285,1.0,0.14285714285714285 -ml_rc_herself,jrnn,0.3,1.0,0.3 -ml_rc_pl,jrnn,0.8,1.0,0.8 -ml_rc_himself,trans,0.6,1.0,0.6 -ml_rc_herself,trans,0.6142857142857143,0.9571428571428572,0.6142857142857143 -ml_rc_pl,trans,1.0,1.0,1.0 -ml_rc_himself,5gram,0.34285714285714286,0.0,0.0 -ml_rc_herself,5gram,0.32857142857142857,0.0,0.0 -ml_rc_pl,5gram,0.38571428571428573,0.0,0.0 -ml_rc_himself,bert,0.7714285714285715,0.9857142857142858,0.7714285714285715 -ml_rc_herself,bert,0.7428571428571429,0.9571428571428572,0.7428571428571429 -ml_rc_pl,bert,0.7571428571428571,1.0,0.7571428571428571 diff --git a/data/accuracy/ml_rc_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv b/data/accuracy/ml_rc_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv deleted file mode 100644 index 8d78a4a..0000000 --- a/data/accuracy/ml_rc_accuracy_grnn_multi_grnn_jrnn_trans_5gram_bert.csv +++ /dev/null @@ -1,19 +0,0 @@ -full_exp,model,total_acc,vs_baseline_acc,vs_distractor_acc -ml_rc_himself,grnn_multi,0.04285714285714286,1.0,0.04285714285714286 -ml_rc_herself,grnn_multi,0.04285714285714286,0.9142857142857143,0.04285714285714286 -ml_rc_pl,grnn_multi,0.0,1.0,0.0 -ml_rc_himself,grnn,0.5714285714285714,1.0,0.5714285714285714 -ml_rc_herself,grnn,0.8857142857142857,1.0,0.8857142857142857 -ml_rc_pl,grnn,0.6428571428571429,1.0,0.6428571428571429 -ml_rc_himself,jrnn,0.14285714285714285,1.0,0.14285714285714285 -ml_rc_herself,jrnn,0.3,1.0,0.3 -ml_rc_pl,jrnn,0.8,1.0,0.8 -ml_rc_himself,trans,0.6,1.0,0.6 -ml_rc_herself,trans,0.6142857142857143,0.9571428571428572,0.6142857142857143 -ml_rc_pl,trans,1.0,1.0,1.0 -ml_rc_himself,5gram,0.21428571428571427,0.0,0.0 -ml_rc_herself,5gram,0.3142857142857143,0.0,0.0 -ml_rc_pl,5gram,0.2857142857142857,0.0,0.0 -ml_rc_himself,bert,0.7714285714285715,0.9857142857142858,0.7714285714285715 -ml_rc_herself,bert,0.7428571428571429,0.9571428571428572,0.7428571428571429 -ml_rc_pl,bert,0.7571428571428571,1.0,0.7571428571428571 diff --git a/data/accuracy/rc_accuracy_all.csv b/data/accuracy/rc_accuracy_all.csv deleted file mode 100644 index de73145..0000000 --- a/data/accuracy/rc_accuracy_all.csv +++ /dev/null @@ -1,28 +0,0 @@ -model,full_exp,total_acc,vs_baseline_acc,vs_distractor_acc -grnn_multi,rc_himself,0.013333333333333334,0.9733333333333334,0.013333333333333334 -grnn_multi,rc_herself,0.0,0.6,0.0 -grnn_multi,rc_pl,0.0,1.0,0.0 -grnn,rc_himself,0.13333333333333333,1.0,0.13333333333333333 -grnn,rc_herself,0.37333333333333335,1.0,0.37333333333333335 -grnn,rc_pl,0.76,1.0,0.76 -jrnn,rc_himself,0.8,1.0,0.8 -jrnn,rc_herself,0.3333333333333333,1.0,0.3333333333333333 -jrnn,rc_pl,0.8666666666666667,1.0,0.8666666666666667 -trans,rc_himself,0.30666666666666664,0.5733333333333334,0.3333333333333333 -trans,rc_herself,0.7866666666666666,1.0,0.7866666666666666 -trans,rc_pl,1.0,1.0,1.0 -rnng,rc_himself,0.12,0.8,0.12 -rnng,rc_herself,0.0,0.7733333333333333,0.0 -rnng,rc_pl,0.6,1.0,0.6 -tiny,rc_himself,0.14666666666666667,1.0,0.14666666666666667 -tiny,rc_herself,0.21333333333333335,0.41333333333333333,0.29333333333333333 -tiny,rc_pl,0.04,1.0,0.04 -tinywiki,rc_himself,0.0,1.0,0.0 -tinywiki,rc_herself,0.21333333333333335,1.0,0.21333333333333335 -tinywiki,rc_pl,0.14666666666666667,0.5466666666666666,0.14666666666666667 -5gram,rc_himself,0.3333333333333333,0.0,0.0 -5gram,rc_herself,0.2,0.0,0.0 -5gram,rc_pl,0.44,0.0,0.0 -bert,rc_himself,0.6533333333333333,0.8933333333333333,0.6533333333333333 -bert,rc_herself,0.52,0.76,0.5466666666666666 -bert,rc_pl,0.9333333333333333,1.0,0.9333333333333333 diff --git a/data/accuracy/rc_accuracy_grnn_jrnn_trans_tiny_rnng.csv b/data/accuracy/rc_accuracy_grnn_jrnn_trans_tiny_rnng.csv deleted file mode 100644 index 7d8bc74..0000000 --- a/data/accuracy/rc_accuracy_grnn_jrnn_trans_tiny_rnng.csv +++ /dev/null @@ -1,16 +0,0 @@ -model,full_exp,vs_baseline_acc,vs_distractor_acc -grnn,rc_himself,1.0,0.13333333333333333 -grnn,rc_herself,1.0,0.37333333333333335 -grnn,rc_pl,1.0,0.76 -jrnn,rc_himself,1.0,0.8 -jrnn,rc_herself,1.0,0.3333333333333333 -jrnn,rc_pl,1.0,0.8666666666666667 -trans,rc_himself,0.5733333333333334,0.3333333333333333 -trans,rc_herself,1.0,0.7866666666666666 -trans,rc_pl,1.0,1.0 -tiny,rc_himself,1.0,0.14666666666666667 -tiny,rc_herself,0.41333333333333333,0.29333333333333333 -tiny,rc_pl,1.0,0.04 -rnng,rc_himself,0.8,0.12 -rnng,rc_herself,0.7733333333333333,0.0 -rnng,rc_pl,1.0,0.6