diff --git a/.gitignore b/.gitignore
index dd387e2..ea04f7a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,8 @@
 ################################################################################
 
 # Folders to ignore - files may be too large, too many, etc. NOTE: EDIT IF NEEDED.
-data
+data/experiments
+data/mnist_point_cloud
 videos
 plots
 notebooks
diff --git a/results/MIL/mill_names_scores.bson b/data/results/MIL/mill_names_scores.bson
similarity index 100%
rename from results/MIL/mill_names_scores.bson
rename to data/results/MIL/mill_names_scores.bson
diff --git a/results/MIL/mill_results_collection.bson b/data/results/MIL/mill_results_collection.bson
similarity index 100%
rename from results/MIL/mill_results_collection.bson
rename to data/results/MIL/mill_results_collection.bson
diff --git a/results/MIL/mill_results_scores.bson b/data/results/MIL/mill_results_scores.bson
similarity index 100%
rename from results/MIL/mill_results_scores.bson
rename to data/results/MIL/mill_results_scores.bson
diff --git a/results/MIL/mill_results_scores_agg.bson b/data/results/MIL/mill_results_scores_agg.bson
similarity index 100%
rename from results/MIL/mill_results_scores_agg.bson
rename to data/results/MIL/mill_results_scores_agg.bson
diff --git a/results/MNIST/leave-one-in/PoolModel.bson b/data/results/MNIST/leave-one-in/PoolModel.bson
similarity index 100%
rename from results/MNIST/leave-one-in/PoolModel.bson
rename to data/results/MNIST/leave-one-in/PoolModel.bson
diff --git a/results/MNIST/leave-one-in/statistician.bson b/data/results/MNIST/leave-one-in/statistician.bson
similarity index 100%
rename from results/MNIST/leave-one-in/statistician.bson
rename to data/results/MNIST/leave-one-in/statistician.bson
diff --git a/results/MNIST/leave-one-in/vae_instance.bson b/data/results/MNIST/leave-one-in/vae_instance.bson
similarity index 100%
rename from results/MNIST/leave-one-in/vae_instance.bson
rename to data/results/MNIST/leave-one-in/vae_instance.bson
diff --git a/results/MNIST/leave-one-out/vae_instance.bson b/data/results/MNIST/leave-one-out/vae_instance.bson
similarity index 100%
rename from results/MNIST/leave-one-out/vae_instance.bson
rename to data/results/MNIST/leave-one-out/vae_instance.bson
diff --git a/results/MNIST/mnist_results_in.bson b/data/results/MNIST/mnist_results_in.bson
similarity index 100%
rename from results/MNIST/mnist_results_in.bson
rename to data/results/MNIST/mnist_results_in.bson
diff --git a/results/MNIST/mnist_results_in_scores.bson b/data/results/MNIST/mnist_results_in_scores.bson
similarity index 100%
rename from results/MNIST/mnist_results_in_scores.bson
rename to data/results/MNIST/mnist_results_in_scores.bson
diff --git a/results/MNIST/mnist_results_out.bson b/data/results/MNIST/mnist_results_out.bson
similarity index 100%
rename from results/MNIST/mnist_results_out.bson
rename to data/results/MNIST/mnist_results_out.bson
diff --git a/results/MNIST/mnist_results_out_scores.bson b/data/results/MNIST/mnist_results_out_scores.bson
similarity index 100%
rename from results/MNIST/mnist_results_out_scores.bson
rename to data/results/MNIST/mnist_results_out_scores.bson
diff --git a/results/MNIST/models/vae-in.bson b/data/results/MNIST/models/vae-in.bson
similarity index 100%
rename from results/MNIST/models/vae-in.bson
rename to data/results/MNIST/models/vae-in.bson
diff --git a/results/MNIST/models/vae-out.bson b/data/results/MNIST/models/vae-out.bson
similarity index 100%
rename from results/MNIST/models/vae-out.bson
rename to data/results/MNIST/models/vae-out.bson
diff --git a/results/MNIST/models/vae.bson b/data/results/MNIST/models/vae.bson
similarity index 100%
rename from results/MNIST/models/vae.bson
rename to data/results/MNIST/models/vae.bson
diff --git a/images/MIL/validation/all/BrownCreeper.png b/images/MIL/validation/all/BrownCreeper.png
deleted file mode 100644
index 1b7e1de..0000000
Binary files a/images/MIL/validation/all/BrownCreeper.png and /dev/null differ
diff --git a/images/MIL/validation/all/CorelAfrican.png b/images/MIL/validation/all/CorelAfrican.png
deleted file mode 100644
index b5cc164..0000000
Binary files a/images/MIL/validation/all/CorelAfrican.png and /dev/null differ
diff --git a/images/MIL/validation/all/CorelBeach.png b/images/MIL/validation/all/CorelBeach.png
deleted file mode 100644
index c5eb4bd..0000000
Binary files a/images/MIL/validation/all/CorelBeach.png and /dev/null differ
diff --git a/images/MIL/validation/all/Elephant.png b/images/MIL/validation/all/Elephant.png
deleted file mode 100644
index cf8011d..0000000
Binary files a/images/MIL/validation/all/Elephant.png and /dev/null differ
diff --git a/images/MIL/validation/all/Fox.png b/images/MIL/validation/all/Fox.png
deleted file mode 100644
index d70cc45..0000000
Binary files a/images/MIL/validation/all/Fox.png and /dev/null differ
diff --git a/images/MIL/validation/all/Musk1.png b/images/MIL/validation/all/Musk1.png
deleted file mode 100644
index 01f755d..0000000
Binary files a/images/MIL/validation/all/Musk1.png and /dev/null differ
diff --git a/images/MIL/validation/all/Musk2.png b/images/MIL/validation/all/Musk2.png
deleted file mode 100644
index c833126..0000000
Binary files a/images/MIL/validation/all/Musk2.png and /dev/null differ
diff --git a/images/MIL/validation/all/Mutagenesis1.png b/images/MIL/validation/all/Mutagenesis1.png
deleted file mode 100644
index e841f41..0000000
Binary files a/images/MIL/validation/all/Mutagenesis1.png and /dev/null differ
diff --git a/images/MIL/validation/all/Mutagenesis2.png b/images/MIL/validation/all/Mutagenesis2.png
deleted file mode 100644
index 3235ad9..0000000
Binary files a/images/MIL/validation/all/Mutagenesis2.png and /dev/null differ
diff --git a/images/MIL/validation/all/Newsgroups1.png b/images/MIL/validation/all/Newsgroups1.png
deleted file mode 100644
index 3e7ce11..0000000
Binary files a/images/MIL/validation/all/Newsgroups1.png and /dev/null differ
diff --git a/images/MIL/validation/all/Newsgroups2.png b/images/MIL/validation/all/Newsgroups2.png
deleted file mode 100644
index 5e4a6ae..0000000
Binary files a/images/MIL/validation/all/Newsgroups2.png and /dev/null differ
diff --git a/images/MIL/validation/all/Newsgroups3.png b/images/MIL/validation/all/Newsgroups3.png
deleted file mode 100644
index 09632ef..0000000
Binary files a/images/MIL/validation/all/Newsgroups3.png and /dev/null differ
diff --git a/images/MIL/validation/all/Protein.png b/images/MIL/validation/all/Protein.png
deleted file mode 100644
index af9c89a..0000000
Binary files a/images/MIL/validation/all/Protein.png and /dev/null differ
diff --git a/images/MIL/validation/all/Tiger.png b/images/MIL/validation/all/Tiger.png
deleted file mode 100644
index 090a547..0000000
Binary files a/images/MIL/validation/all/Tiger.png and /dev/null differ
diff --git a/images/MIL/validation/all/UCSBBreastCancer.png b/images/MIL/validation/all/UCSBBreastCancer.png
deleted file mode 100644
index a9f10d9..0000000
Binary files a/images/MIL/validation/all/UCSBBreastCancer.png and /dev/null differ
diff --git a/images/MIL/validation/all/Web1.png b/images/MIL/validation/all/Web1.png
deleted file mode 100644
index 76364c6..0000000
Binary files a/images/MIL/validation/all/Web1.png and /dev/null differ
diff --git a/images/MIL/validation/all/Web2.png b/images/MIL/validation/all/Web2.png
deleted file mode 100644
index 8403032..0000000
Binary files a/images/MIL/validation/all/Web2.png and /dev/null differ
diff --git a/images/MIL/validation/all/Web3.png b/images/MIL/validation/all/Web3.png
deleted file mode 100644
index 9e27568..0000000
Binary files a/images/MIL/validation/all/Web3.png and /dev/null differ
diff --git a/images/MIL/validation/all/Web4.png b/images/MIL/validation/all/Web4.png
deleted file mode 100644
index 9f64070..0000000
Binary files a/images/MIL/validation/all/Web4.png and /dev/null differ
diff --git a/images/MIL/validation/all/WinterWren.png b/images/MIL/validation/all/WinterWren.png
deleted file mode 100644
index ffe7171..0000000
Binary files a/images/MIL/validation/all/WinterWren.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/BrownCreeper.png b/images/MIL/validation/all_0/BrownCreeper.png
deleted file mode 100644
index fd8fabe..0000000
Binary files a/images/MIL/validation/all_0/BrownCreeper.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/CorelAfrican.png b/images/MIL/validation/all_0/CorelAfrican.png
deleted file mode 100644
index 08347e3..0000000
Binary files a/images/MIL/validation/all_0/CorelAfrican.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/CorelBeach.png b/images/MIL/validation/all_0/CorelBeach.png
deleted file mode 100644
index 5d0580c..0000000
Binary files a/images/MIL/validation/all_0/CorelBeach.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Elephant.png b/images/MIL/validation/all_0/Elephant.png
deleted file mode 100644
index ec88e82..0000000
Binary files a/images/MIL/validation/all_0/Elephant.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Fox.png b/images/MIL/validation/all_0/Fox.png
deleted file mode 100644
index 20c386f..0000000
Binary files a/images/MIL/validation/all_0/Fox.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Musk1.png b/images/MIL/validation/all_0/Musk1.png
deleted file mode 100644
index ee92c9b..0000000
Binary files a/images/MIL/validation/all_0/Musk1.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Musk2.png b/images/MIL/validation/all_0/Musk2.png
deleted file mode 100644
index 15ccf65..0000000
Binary files a/images/MIL/validation/all_0/Musk2.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Mutagenesis1.png b/images/MIL/validation/all_0/Mutagenesis1.png
deleted file mode 100644
index dfed4de..0000000
Binary files a/images/MIL/validation/all_0/Mutagenesis1.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Mutagenesis2.png b/images/MIL/validation/all_0/Mutagenesis2.png
deleted file mode 100644
index f541e66..0000000
Binary files a/images/MIL/validation/all_0/Mutagenesis2.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Newsgroups1.png b/images/MIL/validation/all_0/Newsgroups1.png
deleted file mode 100644
index 59af192..0000000
Binary files a/images/MIL/validation/all_0/Newsgroups1.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Newsgroups2.png b/images/MIL/validation/all_0/Newsgroups2.png
deleted file mode 100644
index 3a57d72..0000000
Binary files a/images/MIL/validation/all_0/Newsgroups2.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Newsgroups3.png b/images/MIL/validation/all_0/Newsgroups3.png
deleted file mode 100644
index 04e59b3..0000000
Binary files a/images/MIL/validation/all_0/Newsgroups3.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Protein.png b/images/MIL/validation/all_0/Protein.png
deleted file mode 100644
index 26f64ac..0000000
Binary files a/images/MIL/validation/all_0/Protein.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Tiger.png b/images/MIL/validation/all_0/Tiger.png
deleted file mode 100644
index 78d9b0a..0000000
Binary files a/images/MIL/validation/all_0/Tiger.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/UCSBBreastCancer.png b/images/MIL/validation/all_0/UCSBBreastCancer.png
deleted file mode 100644
index be23584..0000000
Binary files a/images/MIL/validation/all_0/UCSBBreastCancer.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Web1.png b/images/MIL/validation/all_0/Web1.png
deleted file mode 100644
index b78c0eb..0000000
Binary files a/images/MIL/validation/all_0/Web1.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Web2.png b/images/MIL/validation/all_0/Web2.png
deleted file mode 100644
index 7538ce8..0000000
Binary files a/images/MIL/validation/all_0/Web2.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Web3.png b/images/MIL/validation/all_0/Web3.png
deleted file mode 100644
index 2577ca2..0000000
Binary files a/images/MIL/validation/all_0/Web3.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/Web4.png b/images/MIL/validation/all_0/Web4.png
deleted file mode 100644
index e78ae6f..0000000
Binary files a/images/MIL/validation/all_0/Web4.png and /dev/null differ
diff --git a/images/MIL/validation/all_0/WinterWren.png b/images/MIL/validation/all_0/WinterWren.png
deleted file mode 100644
index 739a4cc..0000000
Binary files a/images/MIL/validation/all_0/WinterWren.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=1.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=1.png
deleted file mode 100644
index 5364c25..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=1.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=10.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=10.png
deleted file mode 100644
index 2d6c619..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=10.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=2.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=2.png
deleted file mode 100644
index 97a857e..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=2.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=3.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=3.png
deleted file mode 100644
index 0a954fb..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=3.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=4.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=4.png
deleted file mode 100644
index 3a77217..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=4.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=5.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=5.png
deleted file mode 100644
index 724df25..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=5.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=6.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=6.png
deleted file mode 100644
index 47b5124..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=6.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=7.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=7.png
deleted file mode 100644
index 0e56f03..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=7.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=8.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=8.png
deleted file mode 100644
index fe962e4..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=8.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-in/class_index=9.png b/images/MNIST/validation_MNIST/leave-one-in/class_index=9.png
deleted file mode 100644
index 49c71fd..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-in/class_index=9.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-out/class_index=1.png b/images/MNIST/validation_MNIST/leave-one-out/class_index=1.png
deleted file mode 100644
index e0518f5..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-out/class_index=1.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-out/class_index=2.png b/images/MNIST/validation_MNIST/leave-one-out/class_index=2.png
deleted file mode 100644
index ac07bc6..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-out/class_index=2.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-out/class_index=3.png b/images/MNIST/validation_MNIST/leave-one-out/class_index=3.png
deleted file mode 100644
index 85e61bc..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-out/class_index=3.png and /dev/null differ
diff --git a/images/MNIST/validation_MNIST/leave-one-out/class_index=4.png b/images/MNIST/validation_MNIST/leave-one-out/class_index=4.png
deleted file mode 100644
index 4b490cf..0000000
Binary files a/images/MNIST/validation_MNIST/leave-one-out/class_index=4.png and /dev/null differ
diff --git a/images/MNIST_vae_basic_in.png b/images/MNIST_vae_basic_in.png
deleted file mode 100644
index e16fd4a..0000000
Binary files a/images/MNIST_vae_basic_in.png and /dev/null differ
diff --git a/images/MNIST_vae_basic_out.png b/images/MNIST_vae_basic_out.png
deleted file mode 100644
index ea5fc76..0000000
Binary files a/images/MNIST_vae_basic_out.png and /dev/null differ
diff --git a/images/barplot_1-10.png b/images/barplot_1-10.png
deleted file mode 100644
index 9a11aba..0000000
Binary files a/images/barplot_1-10.png and /dev/null differ
diff --git a/images/barplot_11-20.png b/images/barplot_11-20.png
deleted file mode 100644
index 2ba75f1..0000000
Binary files a/images/barplot_11-20.png and /dev/null differ
diff --git a/images/downsampled.png b/images/downsampled.png
deleted file mode 100644
index c0a37a6..0000000
Binary files a/images/downsampled.png and /dev/null differ
diff --git a/images/mill_validation/README.md b/images/mill_validation/README.md
deleted file mode 100644
index 61977f6..0000000
--- a/images/mill_validation/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# MIL results evaluated at $n$ validation samples
-
-These pictures present results for MIL datasets (test AUC) for all models. For every number of anomalous samples in validation data $(n = \{ 1,2,5,10,20,50,100 \})$ the best model was chosen. The results were averaged for different chosen anomalies in validation dataset.
\ No newline at end of file
diff --git a/images/mill_validation/all_models_BrownCreeper.png b/images/mill_validation/all_models_BrownCreeper.png
deleted file mode 100644
index 2461e97..0000000
Binary files a/images/mill_validation/all_models_BrownCreeper.png and /dev/null differ
diff --git a/images/mill_validation/all_models_CorelAfrican.png b/images/mill_validation/all_models_CorelAfrican.png
deleted file mode 100644
index 2e5b762..0000000
Binary files a/images/mill_validation/all_models_CorelAfrican.png and /dev/null differ
diff --git a/images/mill_validation/all_models_CorelBeach.png b/images/mill_validation/all_models_CorelBeach.png
deleted file mode 100644
index 8e3f868..0000000
Binary files a/images/mill_validation/all_models_CorelBeach.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Elephant.png b/images/mill_validation/all_models_Elephant.png
deleted file mode 100644
index aa2a100..0000000
Binary files a/images/mill_validation/all_models_Elephant.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Fox.png b/images/mill_validation/all_models_Fox.png
deleted file mode 100644
index ce78b1c..0000000
Binary files a/images/mill_validation/all_models_Fox.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Musk1.png b/images/mill_validation/all_models_Musk1.png
deleted file mode 100644
index 0db83ab..0000000
Binary files a/images/mill_validation/all_models_Musk1.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Musk2.png b/images/mill_validation/all_models_Musk2.png
deleted file mode 100644
index 5878499..0000000
Binary files a/images/mill_validation/all_models_Musk2.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Mutagenesis1.png b/images/mill_validation/all_models_Mutagenesis1.png
deleted file mode 100644
index edd1594..0000000
Binary files a/images/mill_validation/all_models_Mutagenesis1.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Mutagenesis2.png b/images/mill_validation/all_models_Mutagenesis2.png
deleted file mode 100644
index fc9cf7c..0000000
Binary files a/images/mill_validation/all_models_Mutagenesis2.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Newsgroups1.png b/images/mill_validation/all_models_Newsgroups1.png
deleted file mode 100644
index 9d4a337..0000000
Binary files a/images/mill_validation/all_models_Newsgroups1.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Newsgroups2.png b/images/mill_validation/all_models_Newsgroups2.png
deleted file mode 100644
index b97abf1..0000000
Binary files a/images/mill_validation/all_models_Newsgroups2.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Newsgroups3.png b/images/mill_validation/all_models_Newsgroups3.png
deleted file mode 100644
index 364caab..0000000
Binary files a/images/mill_validation/all_models_Newsgroups3.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Protein.png b/images/mill_validation/all_models_Protein.png
deleted file mode 100644
index e39e1b8..0000000
Binary files a/images/mill_validation/all_models_Protein.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Tiger.png b/images/mill_validation/all_models_Tiger.png
deleted file mode 100644
index 2b9e0d4..0000000
Binary files a/images/mill_validation/all_models_Tiger.png and /dev/null differ
diff --git a/images/mill_validation/all_models_UCSBBreastCancer.png b/images/mill_validation/all_models_UCSBBreastCancer.png
deleted file mode 100644
index 2b34c2f..0000000
Binary files a/images/mill_validation/all_models_UCSBBreastCancer.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Web1.png b/images/mill_validation/all_models_Web1.png
deleted file mode 100644
index 62684e0..0000000
Binary files a/images/mill_validation/all_models_Web1.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Web2.png b/images/mill_validation/all_models_Web2.png
deleted file mode 100644
index 3016361..0000000
Binary files a/images/mill_validation/all_models_Web2.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Web3.png b/images/mill_validation/all_models_Web3.png
deleted file mode 100644
index 768a5ef..0000000
Binary files a/images/mill_validation/all_models_Web3.png and /dev/null differ
diff --git a/images/mill_validation/all_models_Web4.png b/images/mill_validation/all_models_Web4.png
deleted file mode 100644
index f5fdbf0..0000000
Binary files a/images/mill_validation/all_models_Web4.png and /dev/null differ
diff --git a/images/mill_validation/all_models_WinterWren.png b/images/mill_validation/all_models_WinterWren.png
deleted file mode 100644
index 4e2d6e3..0000000
Binary files a/images/mill_validation/all_models_WinterWren.png and /dev/null differ
diff --git a/scripts/evaluation/MIL/mill_results.jl b/scripts/evaluation/MIL/mill_results.jl
index a9f4f3b..813ba7c 100644
--- a/scripts/evaluation/MIL/mill_results.jl
+++ b/scripts/evaluation/MIL/mill_results.jl
@@ -60,9 +60,8 @@ save(datadir("dataframes", "mill_results_scores_agg.bson"), mill_results_scores_
 
 
 # if already calculated, just load the data
-mill_results_collection = load(datadir("dataframes", "mill_results_collection.bson"))
-mill_results_scores = load(datadir("dataframes", "mill_results_scores.bson"))
-mill_results_scores_agg = load(datadir("dataframes", "mill_results_scores_agg.bson"))
+mill_results_collection = load(datadir("results", "MIL", "mill_results_collection.bson"))
+mill_results_scores_agg = load(datadir("results", "MIL", "mill_results_scores_agg.bson"))
 
 
 ###################################################
diff --git a/scripts/evaluation/MIL/mill_results_table.jl b/scripts/evaluation/MIL/mill_results_table.jl
index 972b9b8..9bf80d1 100644
--- a/scripts/evaluation/MIL/mill_results_table.jl
+++ b/scripts/evaluation/MIL/mill_results_table.jl
@@ -14,7 +14,7 @@ using BSON
 include(scriptsdir("evaluation", "MIL", "workflow.jl"))
 
 # load results dataframes
-mill_results_collection = load(datadir("dataframes", "mill_results_collection.bson"))
+mill_results_collection = load(datadir("results", "MIL", "mill_results_collection.bson"))
 knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm = map(key -> mill_results_collection[key], modelnames)
 modelvec = [knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm]
 # add modelname
diff --git a/scripts/evaluation/MNIST/best_model_downsampled.jl b/scripts/evaluation/MNIST/best_model_downsampled.jl
deleted file mode 100644
index 4d9a369..0000000
--- a/scripts/evaluation/MNIST/best_model_downsampled.jl
+++ /dev/null
@@ -1,103 +0,0 @@
-using DrWatson
-@quickactivate
-using GroupAD
-using GroupAD: Evaluation
-using DataFrames
-using Statistics
-using EvalMetrics
-
-using Plots
-using StatsPlots
-ENV["GKSwstype"] = "100"
-
-include(scriptsdir("evaluation", "workflow.jl"))
-include(scriptsdir("evaluation", "downsampling_src.jl"))
-
-"""
-    find_best_model_scores(modelname::String, dataset::String, method::String, class_ind::Int; metric=:val_AUC)
-
-Returns the rows of a results dataframe with the best result based on chosen
-metric for all scores calculated for the model.
-"""
-function find_best_model_scores(modelname::String, dataset::String, method::String, class_ind::Int; metric=:val_AUC)
-    folder = datadir("experiments", "contamination-0.0", modelname, dataset, method, "class_index=$(class_ind)")
-    data = GroupAD.Evaluation.results_dataframe(folder)
-    point = load(GroupAD.Evaluation.collect_scores(folder)[1])
-    params = point[:parameters]
-
-    g_score = groupby(data, :score)
-    g = map(x -> groupby(x, [keys(params)...]), g_score)
-    un = unique(vcat(map(x -> unique(map(y -> size(y), x)), g)...))
-
-    if length(un) != 1
-        idx = findall.(x -> size(x,1) > 5, g)
-        @warn "There are groups with different sizes (different number of seeds). Possible duplicate models or missing seeds.
-        Removing $(sum(length.(g)) - sum(length.(idx))) groups out of $(sum(length.(g))) with less than 6 seeds."
-        g = map(i -> g[i][idx[i]], 1:length(g))
-    end
-
-    metricsnames = [:val_AUC, :val_AUPRC, :test_AUC, :test_AUPRC]
-    cdf = map(y -> combine(y, map(x -> x => mean, metricsnames)), g)
-    cdf_sorted = map(x -> sort(x, :val_AUC_mean, rev=true), cdf)
-    best_models = vcat(map(x -> DataFrame(x[1,:]), cdf_sorted)...)
-end
-
-function best_model_files_scores(best_models)
-    pr = best_models[:, Not([:val_AUC_mean, :val_AUPRC_mean, :test_AUC_mean, :test_AUPRC_mean])]
-    params = map(x -> Dict(names(x) .=> values(x)), eachrow(pr))
-    files = map(x -> savename(x, "bson", digits=5), params)
-    return files
-end
-
-function best_model_files_models(best_models)
-    mpath = GroupAD.Evaluation.collect_models(datadir("experiments", "contamination-0.0", modelname, dataset, method, "class_index=$(class_ind)"))[1]
-    mdata = load(mpath)
-    mpars = mdata["parameters"]
-    pr = best_models[:, [keys(mpars)...]]
-    params = map(x -> Dict(names(x) .=> values(x)), eachrow(pr))
-    files = map(x -> savename("model", x, "bson", digits=5), params)
-    return files
-end
-
-function mnist_paths(modelname, dataset, method, class_ind, files)
-    paths = []
-    for f in files
-        seed_paths = String[]
-        for seed in 1:10
-            folder = datadir("experiments", "contamination-0.0", modelname, dataset, method, "class_index=$(class_ind)", "seed=$seed")
-            path = joinpath(folder, f)
-            push!(seed_paths, path)
-        end
-        push!(paths, seed_paths)
-    end
-    return paths
-end
-
-function evaluate_model_at_downsampled(modelname, dataset, method; classes=1:10)
-    # load and find the best models for each score
-    models = map(c -> find_best_model_scores(modelname, dataset, method, c), classes)
-    files = best_model_files_models.(models)
-    modelpaths = map(c -> mnist_paths(modelname, dataset, method, c, files[c]), classes)
-    paths_unloaded = vcat(vcat(modelpaths...)...)
-    r1 = map(x -> match(r"(.*\/seed=[0-9]{1,2})(\/.*)", x).captures[1], paths_unloaded)
-
-    mdf = unique(DataFrame(
-        :path => paths_unloaded,
-        :group => r1
-    ))
-
-    gdf = groupby(mdf, :group)
-    arr = map(x -> x[:, :path], gdf)
-
-    for ar in arr
-        evaluate_at_downsampled(ar)
-    end
-end
-
-# for testing purposes
-modelname = "vae_basic"
-dataset = "MNIST"
-method = "leave-one-in"
-
-# evaluate and save new scores
-evaluate_model_at_downsampled("vae_basic", "MNIST", "leave-one-out")
\ No newline at end of file
diff --git a/scripts/evaluation/MNIST/downsampling_src.jl b/scripts/evaluation/MNIST/downsampling_src.jl
deleted file mode 100644
index 22e59d6..0000000
--- a/scripts/evaluation/MNIST/downsampling_src.jl
+++ /dev/null
@@ -1,253 +0,0 @@
-using DrWatson
-using GroupAD
-using GroupAD: load_data
-using GroupAD.Models: unpack_mill
-using BSON
-using DataFrames
-using Latexify
-using Mill
-using ValueHistories
-
-using Plots
-using Plots: scatter, scatter!
-using StatsPlots
-ENV["GKSwstype"] = "100"
-include(scriptsdir("plotting", "mnist.jl"))
-
-using Flux
-using DistributionsAD
-using ConditionalDists
-using GenerativeModels
-using Random
-using StatsBase
-using Statistics
-
-"""
-    downsample(x::AbstractArray, ratio=0.5; seed = nothing)
-    downsample(data::Mill.BagNode, labels; seed = nothing)
-    downsample(data::Tuple; seed = nothing)
-
-The `downsample` function is used to reduce cardinality of some bags in the data.
-The input dataset is randomly divided into 4 parts of same length. First part is left
-as it is. Bags in the other parts are donwsampled to 90%, 75% and 50% of instances.
-"""
-function downsample(x::AbstractArray, ratio=0.5; seed = nothing)
-    # set seed
-    (seed === nothing) ? nothing : Random.seed!(seed)
-
-    n = size(x,2)
-    nd = round(Int, n*ratio)
-    idx = sample(1:n, nd)
-
-    # reset seed
-    (seed === nothing) ? nothing : Random.seed!()
-
-    return x[:, idx]
-end
-function downsample(x::Mill.BagNode, labels; seed = nothing)
-    d, _ = GroupAD.Models.unpack_mill((x, []))
-    n = length(d)
-
-    (seed === nothing) ? nothing : Random.seed!(seed)
-    bag_idx = sample(1:n,n)
-    indices = round.(Int, n ./ [4,3,2,1])
-
-    down0 = d[bag_idx[1:indices[1]]]
-    down90 = map(x -> downsample(x, 0.9; seed = seed), d[bag_idx[indices[1]+1 : indices[2]]])
-    down75 = map(x -> downsample(x, 0.75; seed = seed), d[bag_idx[indices[2]+1 : indices[3]]])
-    down50 = map(x -> downsample(x, 0.5; seed = seed), d[bag_idx[indices[3]+1 : indices[4]]])
-
-    l0 = labels[bag_idx[1:indices[1]]]
-    l90 = labels[bag_idx[indices[1]+1 : indices[2]]]
-    l75 = labels[bag_idx[indices[2]+1 : indices[3]]]
-    l50 = labels[bag_idx[indices[3]+1 : indices[4]]]
-
-    new = vcat(down0, down90, down75, down50)
-    sz = size.(new, 2)
-    ids = vcat(0, map(i -> sum(sz[1:i]), 1:n))
-    bagids = map(i -> ids[i]+1:ids[i+1], 1:n)
-
-    new_labels = vcat(l0,l90,l75,l50)
-    if length(new) != n != length(new_labels)
-        error("Error in length of new data.")
-    end
-
-    M = hcat(new...)
-    (Mill.BagNode(ArrayNode(M), bagids), new_labels)
-end
-function downsample(data::Tuple; seed = nothing)
-    train, val, test = data
-    tr = (downsample(train..., seed = seed))
-    v = (downsample(val..., seed = seed))
-    t = (downsample(test..., seed = seed))
-
-    return (tr, v, t)
-end
-
-"""
-    evaluate_at_downsampled(modelpath::String; all_results = all_results)
-
-This function takes the modelpath and calculates all scores for the model on downsampled MNIST dataset.
-The function extracts all necessary parameters from the modelpath so nothing else is needed.
-
-Steps:
-1. Load the model.
-2. Extract all parameters.
-3. Load data given parameters, prepare (leave-one-in or leave-one-out downsampling), and downsample
-    the instances in the bags.
-4. Load results functions and send them to `experiment` or `experiment_bag` functions to calculate
-    scores on downsampled data.
-
-New result files are saved to the folder data/contamination-0.0/model/MNIST_downsampled/...
-Therefore the evaluation of this dataset can be done easily with the functions for evaluation
-already prepared, the only thing to change is the dataset="MNIST_downsampled" name.
-
-Note: the calculation of scores for some models (vae_instance, statistician) might take a while because
-of sampled likelihood function.
-"""
-function evaluate_at_downsampled(modelpath::String)
-    # seed extraction from modelpath
-    m = match(r"/contamination-0.0\/(.*)\/MNIST.*\/seed=([0-9]*)\/.*", modelpath)
-    modelname = m.captures[1]
-    seed = parse(Int, m.captures[2])
-
-    # load model
-    training_info = load(modelpath)
-    model = training_info["model"]
-    parameters = training_info["parameters"]
-    method, class_ind = parameters[:method], parameters[:class]
-
-    # this doesn't work because it is a Dict and not NamedTuple
-    # save_entries = merge(training_info, (modelname = modelname, seed = seed, dataset = "MNIST"))
-    # this should probably be enough
-    save_entries = (model = model, modelname = modelname, seed = seed, dataset = "MNIST")
-
-    # load data, prepare and downsample
-    data = GroupAD.load_data("MNIST", anomaly_class_ind = class_ind, method = method, contamination = 0)
-    if method == "leave-one-in"
-        data = GroupAD.leave_one_in(data; seed=seed)
-    elseif method == "leave-one-out"
-        data = GroupAD.leave_one_out(data; seed=seed)
-    else
-        error("This evaluation only works on MNIST!")
-    end
-    ds = downsample(data; seed = seed)
-
-    # any difference to path? like MNIST_downsampled?
-    # _savepath = datadir("experiments/contamination-0.0", modelname, "MNIST", method, "class_index=$(class)/seed=$(seed)")
-    _savepath = datadir("experiments/contamination-0.0", modelname, "MNIST_downsampled", method, "class_index=$(class_ind)/seed=$(seed)")
-
-    # to do: add results (the anomalous functions)
-    # this is results for vae_instance
-    results = results_functions(modelname, model, parameters)
-
-    for result in results
-        if modelname in ["vae_instance", "statistician", "PoolModel"]
-            experiment_bag(result..., ds, _savepath; save_entries...)
-        else
-            GroupAD.experiment(result..., ds, _savepath; save_entries...)
-        end
-    end
-end
-function evaluate_at_downsampled(modelpaths::Array{String,1})
-    # seed extraction from modelpath
-    modelpath = modelpaths[1]
-    m = match(r"/contamination-0.0\/(.*)\/MNIST\/(.*)\/class_index=([0-9]{1,2})\/seed=([0-9]*)\/.*", modelpath)
-    modelname, method, class_ind, seed = String.(m.captures)
-    class_ind, seed = parse.(Int, (class_ind, seed))
-
-    # load data, prepare and downsample
-    data = GroupAD.load_data("MNIST", anomaly_class_ind = class_ind, method = method, contamination = 0)
-    if method == "leave-one-in"
-        data = GroupAD.leave_one_in(data; seed=seed)
-    elseif method == "leave-one-out"
-        data = GroupAD.leave_one_out(data; seed=seed)
-    else
-        error("This evaluation only works on MNIST!")
-    end
-    ds = downsample(data; seed = seed)
-
-    for modelpath in modelpaths
-        # load model
-        try
-            training_info = load(modelpath)
-            model = training_info["model"]
-            parameters = training_info["parameters"]
-
-            # this doesn't work because it is a Dict and not NamedTuple
-            # save_entries = merge(training_info, (modelname = modelname, seed = seed, dataset = "MNIST"))
-            # this should probably be enough
-            save_entries = (model = model, modelname = modelname, seed = seed, dataset = "MNIST")
-
-            # any difference to path? like MNIST_downsampled?
-            # _savepath = datadir("experiments/contamination-0.0", modelname, "MNIST", method, "class_index=$(class)/seed=$(seed)")
-            _savepath = datadir("experiments/contamination-0.0", modelname, "MNIST_downsampled", method, "class_index=$(class_ind)/seed=$(seed)")
-
-            # to do: add results (the anomalous functions)
-            # this is results for vae_instance
-            results = results_functions(modelname, model, parameters)
-
-            for result in results
-                if modelname in ["vae_instance", "statistician", "PoolModel"]
-                    GroupAD.experiment_bag(result..., ds, _savepath; save_entries...)
-                else
-                    GroupAD.experiment(result..., ds, _savepath; save_entries...)
-                end
-            end
-        catch e
-            @warn "Model not found."
-        end
-    end
-end
-
-# save this as a contant NamedTuple
-# choose from all_results based on modelname
-# results = all_results[Symbol(modelname)]
-function results_functions(modelname, model, parameters)
-    if haskey(parameters, :aggregation)
-        agf = eval(:($(Symbol(parameters[:aggregation]))))
-    end
-    all_results = (
-        vae_basic = [
-            (x -> GroupAD.Models.reconstruction_score(model,x,agf), 
-                merge(parameters, (score = "reconstruction",))),
-            (x -> GroupAD.Models.reconstruction_score_mean(model,x,agf), 
-                merge(parameters, (score = "reconstruction-mean",))),
-            (x -> GroupAD.Models.reconstruction_score(model,x,agf,100), 
-                merge(parameters, (score = "reconstruction-sampled", L=100)))		
-        ],
-        vae_instance = [
-            (x -> GroupAD.Models.likelihood(model,x), 
-                merge(parameters, (score = "reconstruction",))),
-            (x -> GroupAD.Models.mean_likelihood(model,x), 
-                merge(parameters, (score = "reconstruction-mean",))),
-            (x -> GroupAD.Models.likelihood(model,x,50), 
-                merge(parameters, (score = "reconstruction-sampled", L=50))),
-            (x -> GroupAD.Models.reconstruct(model,x), 
-                merge(parameters, (score = "reconstructed_input",)))
-        ],
-        statistician = [
-            (x -> GroupAD.Models.likelihood(model,x), 
-                merge(parameters, (score = "reconstruction",))),
-            (x -> GroupAD.Models.mean_likelihood(model,x), 
-                merge(parameters, (score = "reconstruction-mean",))),
-            (x -> GroupAD.Models.likelihood(model,x,50), 
-                merge(parameters, (score = "reconstruction-sampled", L=50))),
-            (x -> GroupAD.Models.reconstruct_input(model, x),
-                merge(parameters, (score = "reconstructed_input",)))
-        ],
-        MGMM = [
-            (x -> GroupAD.Models.topic_score(model,x), 
-                merge(parameters, (score = "topic",))),
-            (x -> GroupAD.Models.point_score(model,x), 
-                merge(parameters, (score = "point",))),
-            (x -> GroupAD.Models.MGMM_score(model,x), 
-                merge(parameters, (score = "topic+point",)))
-        ],
-        PoolModel = [
-            (x -> GroupAD.Models.reconstruct(model, x),
-                merge(parameters, (score = "reconstructed_input",)))
-        ]
-    )
-    return all_results[Symbol(modelname)]
-end
\ No newline at end of file
diff --git a/scripts/evaluation/MNIST/images.jl b/scripts/evaluation/MNIST/images.jl
deleted file mode 100644
index 52fbafd..0000000
--- a/scripts/evaluation/MNIST/images.jl
+++ /dev/null
@@ -1,44 +0,0 @@
-using DrWatson
-using GroupAD
-using GroupAD: load_data
-using GroupAD.Models: unpack_mill
-using BSON
-using DataFrames
-using Latexify
-using Mill
-using ValueHistories
-
-using Plots
-using Plots: scatter, scatter!
-using StatsPlots
-ENV["GKSwstype"] = "100"
-include(scriptsdir("plotting", "mnist.jl"))
-
-using Flux
-using DistributionsAD
-using ConditionalDists
-using GenerativeModels
-using Random
-using StatsBase
-
-# load data    
-data = load_data("MNIST")
-d = unpack_mill(data[1])
-dt = d[1]
-
-# downsample train data
-mnist_down_90 = map(x -> downsample(x, 0.9; seed = 2), dt)
-mnist_down_75 = map(x -> downsample(x, 0.75; seed = 2), dt)
-mnist_down_50 = map(x -> downsample(x, 0.5; seed = 2), dt)
-
-# plot downsampled data in colums
-k = 50
-d_title = ["100%" "" "" "" "" "90%" "" "" "" "" "75%" "" "" "" "" "50%" "" "" "" ""]
-plot(
-    plot_number_row(k, dt),
-    plot_number_row(k, mnist_down_90),
-    plot_number_row(k, mnist_down_75),
-    plot_number_row(k, mnist_down_50),
-    layout = (1,4), size=(600,600), title=d_title
-)
-savefig(plotsdir("MNIST", "downsampled.png"))
\ No newline at end of file
diff --git a/scripts/evaluation/MNIST/mnist_results_in.jl b/scripts/evaluation/MNIST/mnist_results_in.jl
index a73bb7d..471444d 100644
--- a/scripts/evaluation/MNIST/mnist_results_in.jl
+++ b/scripts/evaluation/MNIST/mnist_results_in.jl
@@ -19,7 +19,7 @@ include(scriptsdir("evaluation", "MIL", "workflow.jl"))
 
 # first empty Dictionary
 #mnist_results_in = Dict()
-mnist_results_in = load(datadir("dataframes", "mnist_results_in.bson"))
+mnist_results_in = load(datadir("results", "MNIST", "mnist_results_in.bson"))
 
 modelname = "knn_basic"
 modelname = "vae_basic"
@@ -39,7 +39,7 @@ for class in 1:10
 end
 rdf = vcat(results...)
 push!(mnist_results_in, modelname => rdf)
-save(datadir("dataframes", "mnist_results_in.bson"), mnist_results_in)
+save(datadir("results", "MNIST", "mnist_results_in.bson"), mnist_results_in)
 
 # add :model columns
 modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel"]
@@ -73,7 +73,7 @@ savefig(plotsdir("MNIST", "groupedbar_leave-in.png"))
 ########################
 
 #mnist_results_in_scores = Dict()
-mnist_results_in_scores = load(datadir("dataframes", "mnist_results_in_scores.bson"))
+mnist_results_in_scores = load(datadir("results", "MNIST", "mnist_results_in_scores.bson"))
 
 modelname = "knn_basic"
 modelname = "vae_basic"
@@ -93,7 +93,7 @@ for class in 1:10
 end
 rdf = vcat(results...)
 push!(mnist_results_in_scores, modelname => rdf)
-save(datadir("dataframes", "mnist_results_in_scores.bson"), mnist_results_in_scores)
+save(datadir("results", "MNIST", "mnist_results_in_scores.bson"), mnist_results_in_scores)
 
 # groupedbarplot for :aggregation, or :score, :type etc.
 # kNN
diff --git a/scripts/evaluation/MNIST/mnist_results_out.jl b/scripts/evaluation/MNIST/mnist_results_out.jl
index ad32eb9..f0f7a86 100644
--- a/scripts/evaluation/MNIST/mnist_results_out.jl
+++ b/scripts/evaluation/MNIST/mnist_results_out.jl
@@ -17,7 +17,7 @@ include(scriptsdir("evaluation", "MIL", "workflow.jl"))
 ### leave-one-out ###
 #####################
 #mnist_results_out = Dict()
-mnist_results_out = load(datadir("dataframes", "mnist_results_out.bson"))
+mnist_results_out = load(datadir("results", "MNIST", "mnist_results_out.bson"))
 
 modelname = "knn_basic"
 modelname = "vae_basic"
@@ -35,7 +35,7 @@ for class in 1:10
 end
 rdf = vcat(results...)
 push!(mnist_results_out, modelname => rdf)
-save(datadir("dataframes", "mnist_results_out.bson"), mnist_results_out)
+save(datadir("results", "MNIST", "mnist_results_out.bson"), mnist_results_out)
 
 # add :model columns
 modelnames = ["knn_basic", "vae_basic", "vae_instance"]
@@ -64,7 +64,7 @@ mnist_barplots(
 ########################
 
 #mnist_results_out_scores = Dict()
-mnist_results_out_scores = load(datadir("dataframes", "mnist_results_out_scores.bson"))
+mnist_results_out_scores = load(datadir("results", "MNIST", "mnist_results_out_scores.bson"))
 
 modelname = "knn_basic"
 modelname = "vae_basic"
@@ -81,7 +81,7 @@ for class in 1:10
 end
 rdf = vcat(results...)
 push!(mnist_results_out_scores, modelname => rdf)
-save(datadir("dataframes", "mnist_results_out_scores.bson"), mnist_results_out_scores)
+save(datadir("results", "MNIST", "mnist_results_out_scores.bson"), mnist_results_out_scores)
 
 
 knn_basic, vae_basic, vae_instance = map(m-> mnist_results_out_scores[m], modelnames)
diff --git a/scripts/evaluation/MNIST/mnist_results_table.jl b/scripts/evaluation/MNIST/mnist_results_table.jl
index b8cc691..bd584f8 100644
--- a/scripts/evaluation/MNIST/mnist_results_table.jl
+++ b/scripts/evaluation/MNIST/mnist_results_table.jl
@@ -13,7 +13,7 @@ include(scriptsdir("evaluation", "MIL", "workflow.jl"))
 ############## leave-one-in ##############
 ##########################################
 
-mnist_results_in = load(datadir("dataframes", "mnist_results_in.bson"))
+mnist_results_in = load(datadir("results", "MNIST", "mnist_results_in.bson"))
 
 #model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel", "MGMM"]
 model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel"]
@@ -67,7 +67,7 @@ t = pretty_table(
 ############## leave-one-out ##############
 ###########################################
 
-mnist_results_out = load(datadir("dataframes", "mnist_results_out.bson"))
+mnist_results_out = load(datadir("results", "MNIST", "mnist_results_out.bson"))
 
 #model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel", "MGMM"]
 model_names = ["kNNagg", "VAEagg", "VAE"]
diff --git a/scripts/evaluation/MNIST/normal+downsampled.jl b/scripts/evaluation/MNIST/normal+downsampled.jl
deleted file mode 100644
index 4008ff6..0000000
--- a/scripts/evaluation/MNIST/normal+downsampled.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-modelname = "vae_basic"
-dataset = "MNIST"
-method = "leave-one-out"
-classes = 1:10
-
-models_full = map(c -> find_best_model_scores(modelname, dataset, method, c), classes)
-models_downsampled = map(c -> find_best_model_scores(modelname, "MNIST_downsampled", method, c), classes)
-
-df_full = sort(vcat(models_full...)[:, [:class, :test_AUC_mean]])
-df_down = sort(vcat(models_downsampled...)[:, [:class, :test_AUC_mean]])
-
-g_full = groupby(df_full, :class)
-g1 = map(x -> rename(x, :test_AUC_mean => Symbol("class=$(x[1,:class])")), g_full)
-g11 = hcat(map(x -> x[!, 2], g1)...)
-
-g_down = groupby(df_down, :class)
-g2 = map(x -> rename(x, :test_AUC_mean => Symbol("class=$(x[1,:class])")), g_down)
-g22 = hcat(map(x -> x[!, 2], g2)...)
-
-G = vcat(g11,g22)
-scorenames = ["reconstruction-sampled" "reconstruction-mean" "reconstruction" "reconstruction-sampled + downsample" "reconstruction-mean + downsample" "reconstruction + downsample"]
-
-groupedbar(
-    map(i -> "$i", 1:10), G',
-    ylabel="AUC", labels=scorenames,
-    ylims=(0,1), size=(1850,700), color_palette=:tab20,
-    legendfontsize=12, tickfontsize=12, legend=:outerbottom
-    )
-savefig(plotsdir("barplots", "MNIST_$(modelname)_$(method).png"))
-
-# to put corresponding scores next to each other
-G2 = G[[1,4,2,5,3,6],:]
-scorenames2 = ["reconstruction-sampled" "reconstruction-sampled + downsample" "reconstruction-mean" "reconstruction-mean + downsample" "reconstruction" "reconstruction + downsample"]
-
-groupedbar(
-    map(i -> "$i", 1:10), G2',
-    ylabel="AUC", labels=scorenames2,
-    ylims=(0,1), size=(1850,700), color_palette=:tab20,
-    legendfontsize=12, tickfontsize=12, legend=:outerbottom
-    )
-savefig(plotsdir("barplots", "MNIST_$(modelname)_$(method)_2.png"))
\ No newline at end of file
diff --git a/scripts/evaluation/mill_results.jl b/scripts/evaluation/mill_results.jl
deleted file mode 100644
index d8e0f93..0000000
--- a/scripts/evaluation/mill_results.jl
+++ /dev/null
@@ -1,121 +0,0 @@
-using DrWatson
-@quickactivate
-using GroupAD
-using GroupAD: Evaluation
-using DataFrames
-using Statistics
-using EvalMetrics
-
-using Plots
-using StatsPlots
-ENV["GKSwstype"] = "100"
-
-include(scriptsdir("evaluation", "workflow.jl"))
-
-mill_datasets = ["BrownCreeper", "CorelBeach", "CorelAfrican", "Elephant", "Fox", "Musk1", "Musk2", "Mutagenesis1", "Mutagenesis2",
-                    "Newsgroups1", "Newsgroups2", "Newsgroups3", "Protein", "Tiger", "UCSBBreastCancer",
-                    "Web1", "Web2", "Web3", "Web4", "WinterWren"]
-
-"""
-    mill_results(modelname, mill_datasets; info = true)
-
-Calculates the results dataframe for full validation dataset for all
-MIL datasets and chosen model.
-"""
-function mill_results(modelname, mill_datasets; info = true)
-    res = []
-
-    for d in mill_datasets
-        model = find_best_model(modelname, d) |> DataFrame
-        #@info d
-        insertcols!(model, :dataset => d)
-        push!(res, model)
-        if info
-            @info "Best $modelname model for $d found."
-        end
-    end
-
-    results = vcat(res...)
-end
-
-"""
-    barplot_mill(modelname, results; sorted = false)
-
-Plots and saves a barplot of all MIL datasets and given model.
-"""
-function barplot_mill(modelname, results; sorted = false, savef = false)
-    if sorted
-        res_sort = sort(results, :val_AUC_mean, rev=true)
-    else
-        res_sort = results
-    end
-    r = res_sort[:, [:val_AUC_mean, :test_AUC_mean]] |> Array
-    p = groupedbar(
-        res_sort[:, :dataset],r,xrotation=55,legendtitle=modelname,
-        label=["val-AUC" "test-AUC"], ylabel="AUC", legend=:bottomright, ylims=(0,1))
-    if savef
-        savefig(plotsdir("barplot_$(modelname).png"))
-    end
-    return p
-end
-
-# calculate results dataframe (full validation)
-res_knn = mill_results("knn_basic",mill_datasets)
-res_vae_basic = mill_results("vae_basic",mill_datasets)
-res_vae_instance = mill_results("vae_instance",mill_datasets)
-res_mgmm = mill_results("MGMM",mill_datasets)
-res_statistician = mill_results("statistician", mill_datasets)
-res_pool = mill_results("PoolModel", mill_datasets)
-
-results_full_validation = Dict(
-    :knn_basic => res_knn,
-    :vae_basic => res_vae_basic,
-    :vae_instance => res_vae_instance,
-    :mgmm => res_mgmm,
-    :statistician => res_statistician,
-    :poolmodel => res_pool
-)
-safesave(datadir("dataframes", "results_full_validation.bson"), results_full_validation)
-R = load(datadir("dataframes", "results_full_validation.bson"))
-
-@unpack knn_basic, vae_basic, vae_instance, mgmm, statistician, poolmodel = R
-
-# barplots for each model
-p_knn = barplot_mill("kNN (agg)", knn_basic)
-savefig(plotsdir("barplots", "knn.png"))
-p_mgmm = barplot_mill("MGMM", mgmm)
-savefig(plotsdir("barplots", "MGMM.png"))
-p_vae_b = barplot_mill("VAE (agg)", vae_basic)
-savefig(plotsdir("barplots", "vae_basic.png"))
-p_vae_i = barplot_mill("VAE", vae_instance)
-savefig(plotsdir("barplots", "vae_instance.png"))
-p_ns = barplot_mill("Neural Statistician", statistician)
-savefig(plotsdir("barplots", "statistician.png"))
-p_pool = barplot_mill("PoolModel", poolmodel)
-savefig(plotsdir("barplots", "PoolModel.png"))
-
-knn_basic_b = rename(knn_basic[:, [:dataset, :test_AUC_mean]], :test_AUC_mean => :knn_basic)
-vae_basic_b = rename(vae_basic[:, [:dataset, :test_AUC_mean]], :test_AUC_mean => :vae_basic)
-mgmm_b = rename(mgmm[:, [:dataset, :test_AUC_mean]], :test_AUC_mean => :mgmm)
-statistician_b = rename(statistician[:, [:dataset, :test_AUC_mean]], :test_AUC_mean => :statistician)
-vae_instance_b = rename(vae_instance[:, [:dataset, :test_AUC_mean]], :test_AUC_mean => :vae_instance)
-pool_b = rename(poolmodel[:, [:dataset, :test_AUC_mean]], :test_AUC_mean => :PoolModel)
-
-bar_mill = hcat(knn_basic_b, vae_basic_b, mgmm_b, statistician_b, vae_instance_b, pool_b, makeunique=true)
-modelnames = ["kNN (agg)" "VAE (agg)" "MGMM" "Neural Statistician" "VAE" "PoolModel"]
-mat = bar_mill[:, [:knn_basic, :vae_basic, :mgmm, :statistician, :vae_instance, :PoolModel]] |> Array
-
-groupedbar(
-    mill_datasets[1:10], mat[1:10,:], xrotation=15,
-    label=modelnames, ylabel="AUC",
-    ylims=(0,1), size=(1850,700), color_palette=:tab20,
-    legendfontsize=12, tickfontsize=12, legend=:outerbottom
-    )
-savefig(plotsdir("barplots", "barplot_1-10.png"))
-groupedbar(
-    mill_datasets[11:20],mat[11:20,:],xrotation=15,
-    label=modelnames, ylabel="AUC",
-    ylims=(0,1), size=(1850,700), color_palette=:tab20,
-    legendfontsize=12, tickfontsize=12, legend=:outerright
-    )
-savefig(plotsdir("barplots", "barplot_11-20.png"))
\ No newline at end of file
diff --git a/scripts/evaluation/mill_summary.jl b/scripts/evaluation/mill_summary.jl
deleted file mode 100644
index d3ec8d3..0000000
--- a/scripts/evaluation/mill_summary.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-using Latexify
-
-"""
-# MIL datasets summary table
-
-Should feature:
-- dataset name
-- number of features
-- number of bags
-- number of normal/anomalous samples
-- mean/meadian of cardinalities
-"""
-
-t = DataFrame[]
-for dataset in mill_datasets
-    d = GroupAD.load_data(dataset)
-    bags = cat(d[1][1], d[2][1], d[3][1])
-    no_bags = length(bags)
-    no_features = size(bags.data.data,1)
-
-    labels = vcat(d[1][2], d[2][2], d[3][2])
-    no_anomalous = sum(labels)
-    no_normal = no_bags - no_anomalous
-
-    sizes = map(i -> size(bags[i], 2), 1:length(labels))
-    _median = median(sizes)
-    _mean = mean(sizes)
-
-    df = DataFrame(
-        :dataset => dataset,
-        :bags => no_bags,
-        :features => no_features,
-        :normal => no_normal,
-        :anomalous => no_anomalous,
-        Symbol("median size") => _median,
-        Symbol("mean size") => _mean
-    )
-    push!(t, df)
-end
-
-T = vcat(t...)
-tex = latexify(T, env=:tabular, fmt=x->round(x, digits=1), booktabs=true)
-
diff --git a/scripts/evaluation/mnist_summary.jl b/scripts/evaluation/mnist_summary.jl
deleted file mode 100644
index 1eb22c5..0000000
--- a/scripts/evaluation/mnist_summary.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-using GroupAD
-using GroupAD: load_data
-using GroupAD.Models: unpack_mill
-using BSON
-using DataFrames
-using Latexify
-using Mill
-
-dp = GroupAD.get_mnist_point_cloud_datapath()
-test = load(joinpath(dp, "test.bson"))
-train = load(joinpath(dp, "train.bson"))
-
-bag_labels = vcat(train[:bag_labels], test[:bag_labels])
-labels = vcat(train[:labels], test[:labels])
-bagids = vcat(train[:bagids], test[:bagids] .+ length(train[:bag_labels]))
-data = Float32.(hcat(train[:data], test[:data]))
-data = data .+ rand(size(data)...)
-data = GroupAD.standardize(data)
-
-obs = GroupAD.seqids2bags(bagids)
-mill_mnist = BagNode(ArrayNode(data), obs)
-
-mnist, _ = unpack_mill((mill_mnist, []))
-
-mnist_df = DataFrame(:data => mnist, :class => bag_labels)
-g = groupby(mnist_df, :class)
-
-how_many = map(x -> size(x,1), g)
-classes = map(x -> x[1,:class], g)
-
-mean_card = map(x -> mean(size.(x[:, :data], 2)), g)
-med_card = map(x -> median(size.(x[:, :data], 2)), g)
-min_card = map(x -> minimum(size.(x[:, :data], 2)), g)
-max_card = map(x -> maximum(size.(x[:, :data], 2)), g)
-
-mnist_summary = DataFrame(
-    :class => classes,
-    :quantity => how_many,
-    :mean_cardinality => mean_card,
-    :minimum_cardinality => min_card,
-    :median_cardinality => med_card,
-    :maximum_cardinality => max_card
-)
-sort!(mnist_summary, :class)
-
-tex = latexify(mnist_summary, env=:tabular, fmt=x->round(x, digits=1), booktabs=true)
\ No newline at end of file