some rewrites

aicenter · Sep 23, 2021 · 9e5b52a · 9e5b52a
1 parent 948c0c8
commit 9e5b52a
Show file tree

Hide file tree

Showing 20 changed files with 178 additions and 348 deletions.
diff --git a/Project.toml b/Project.toml
@@ -17,7 +17,6 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Flux3D = "432009dd-59a1-4b72-8c93-6462ce9b220f"
 GenerativeModels = "6ac2c632-c4cd-11e9-0501-33c4b9b2f9c9"
 IPMeasures = "d7dc6e0c-a753-11e8-227e-b362679e0b17"
-Latexify = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLDataPattern = "9920b226-0b2a-5f5f-9153-9aa70a013f8b"
 Mill = "1d0525e4-8992-11e8-313c-e310e1f6ddea"

diff --git a/data/results/MIL/models/vae_basic.bson b/data/results/MIL/models/vae_basic.bson
diff --git a/scripts/README.md b/scripts/README.md
@@ -0,0 +1,35 @@
+# Scripts
+
+Every dataset type has its own folder for run scripts due to different setting for MIL, MNIST and toy datasets.
+
+# Evaluation of results
+
+This folder contains evaluation scripts.
+
+## One-sample evaluation
+
+To evaluate a single results file (or all results file in a folder), use `evaluate_performance_single.jl` script which computes AUC-ROC, AUC-PR, F score and others for given results file.
+
+To find the best model for a particular dataset, use `evaluate_single_model.jl`. 
+
+### Examples
+```
+# evaluation of vae_basic model on Fox dataset
+$ julia scripts/evaluate_single_model.jl vae_basic Fox
+
+# evaluation of knn_basic model on MNIST dataset, leave-one-in method, normal class 1
+$ julia scripts/evaluate_single_model.jl knn_basic MNIST nothing 1 leave-one-in
+```
+
+To find the best model based on some criteria (aggregation, score, distance etc.), use the third argument, groupkey (should be `Symbol` type).
+
+```
+# evaluation of vae_basic model on Fox dataset based on aggregation
+$ julia scripts/evaluate_single_model.jl vae_basic Fox aggregation
+
+# evaluation of knn_basic model on MNIST dataset, leave-one-in method, normal class 1 based on distance
+$ julia scripts/evaluate_single_model.jl knn_basic MNIST distance 1 leave-one-in
+```
+
+## MIL results
+
diff --git a/scripts/evaluate_single_model.jl b/scripts/evaluate_single_model.jl
@@ -0,0 +1,83 @@
+using DrWatson
+@quickactivate
+using GroupAD
+using GroupAD.Evaluation
+using ArgParse
+using DataFrames
+using PrettyTables
+
+s = ArgParseSettings()
+@add_arg_table! s begin
+    "modelname"
+        arg_type = String
+        help = "model name"
+        default = "vae_basic"
+    "dataset"
+        default = "Fox"
+        arg_type = String
+        help = "dataset"
+    "groupkey"
+        default = :nothing
+        arg_type = Symbol
+        help = "group key, e.g. `:aggregation`"
+    "class"
+        default = 1
+        arg_type = Int
+        help = "class for MNIST"
+    "method"
+        default = "leave-one-in"
+        arg_type = String
+        help = "method: leave-one-in or leave-one-out"
+end
+parsed_args = parse_args(ARGS, s)
+@unpack modelname, dataset, groupkey, class, method = parsed_args
+
+mill_datasets = [
+    "BrownCreeper", "CorelBeach", "CorelAfrican", "Elephant", "Fox", "Musk1", "Musk2",
+    "Mutagenesis1", "Mutagenesis2", "Newsgroups1", "Newsgroups2", "Newsgroups3", "Protein",
+    "Tiger", "UCSBBreastCancer", "Web1", "Web2", "Web3", "Web4", "WinterWren"
+]
+
+"""
+    evaluate_single_model(modelname, dataset; groupkey = nothing, class=1, method="leave-one-in")
+
+Given `modelname` and `dataset` (+ `class` and `method` for MNIST dataset), finds the best model
+and prints it. If `groupkey` is provided, finds the best models based on `groupkey`.
+"""
+function evaluate_single_model(modelname, dataset; groupkey = nothing, class=1, method="leave-one-in")
+    if groupkey == :nothing
+        if dataset in mill_datasets
+            df = mill_results(modelname, [dataset])
+        elseif dataset == "MNIST"
+            folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$(class+1)")
+            df = find_best_model(folder) |> DataFrame
+        elseif dataset == "toy"
+            nothing
+        else
+            error("Dataset \"$dataset\" for model \"$modelname\" not found.")
+        end
+    else
+        if dataset in mill_datasets
+            df = mill_results(modelname, [dataset], groupkey)
+        elseif dataset == "MNIST"
+            folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$(class+1)")
+            df = find_best_model(folder, groupkey) |> DataFrame
+        elseif dataset == "toy"
+            nothing
+        else
+            error("Dataset \"$dataset\" for model \"$modelname\" not found.")
+        end
+    end
+
+    println("Full results DataFrame:")
+    pdf_full = pretty_table(df)
+    if groupkey == :nothing
+        println("Small results DataFrame:")
+        pdf_small = pretty_table(df[:, [:val_AUC_mean, :val_AUPRC_mean, :test_AUC_mean, :test_AUPRC_mean]])
+    else
+        println("Small results DataFrame:")
+        pdf_small = pretty_table(df[:, [groupkey, :val_AUC_mean, :val_AUPRC_mean, :test_AUC_mean, :test_AUPRC_mean]])
+    end
+end
+
+evaluate_single_model(modelname, dataset; groupkey = groupkey, class=class, method=method)
diff --git a/scripts/evaluation/MIL/mill_results.jl b/scripts/evaluation/MIL/mill_results.jl
@@ -1,7 +1,7 @@
 using DrWatson
 @quickactivate
 using GroupAD
-using GroupAD: Evaluation
+using GroupAD.Evaluation
 using DataFrames
 using Statistics
 using EvalMetrics
@@ -11,7 +11,7 @@ using Plots
 using StatsPlots
 ENV["GKSwstype"] = "100"
 
-include(scriptsdir("evaluation", "MIL", "workflow.jl"))
+#include(scriptsdir("evaluation", "MIL", "workflow.jl"))
 
 mill_datasets = [
     "BrownCreeper", "CorelBeach", "CorelAfrican", "Elephant", "Fox", "Musk1", "Musk2",

diff --git a/scripts/evaluation/MIL/mill_results_table.jl b/scripts/evaluation/MIL/mill_results_table.jl
@@ -1,5 +1,5 @@
 """
-This script is very untidy and it could be optimized.
+This script is very untidy and could be optimized.
 """
 
 using DrWatson
@@ -11,9 +11,8 @@ using Statistics
 using EvalMetrics
 using BSON
 
-include(scriptsdir("evaluation", "MIL", "workflow.jl"))
-
 # load results dataframes
+modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel", "MGMM"]
 mill_results_collection = load(datadir("results", "MIL", "mill_results_collection.bson"))
 knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm = map(key -> mill_results_collection[key], modelnames)
 modelvec = [knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm]
@@ -23,7 +22,6 @@ df = vcat(knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm, col
 
 model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel", "MGMM"]
 
-df_red = df[:, [:dataset, :model, :val_AUC_mean, :test_AUC_mean, :val_AUPRC_mean, :test_AUPRC_mean]]
 df_red = df[:, [:dataset, :model, :test_AUC_mean]]
 sort!(df_red, :dataset)
 

diff --git a/scripts/evaluation/MIL/mill_summary.jl b/scripts/evaluation/MIL/mill_summary.jl
@@ -1,4 +1,4 @@
-using Latexify
+using PrettyTables
 
 """
 # MIL datasets summary table
@@ -39,5 +39,9 @@ for dataset in mill_datasets
 end
 
 T = vcat(t...)
-tex = latexify(T, env=:tabular, fmt=x->round(x, digits=1), booktabs=true)
+t = pretty_table(
+    T,
+    formatters = ft_printf("%5.1f"),
+    backend=:latex, tf=tf_latex_booktabs, nosubheader=true
+)
 
diff --git a/scripts/evaluation/mnist_downsampling.jl → .../MNIST/downsampling/mnist_downsampling.jl b/scripts/evaluation/mnist_downsampling.jl → .../MNIST/downsampling/mnist_downsampling.jl
diff --git a/scripts/evaluation/MNIST/mnist_summary.jl b/scripts/evaluation/MNIST/mnist_summary.jl
@@ -3,7 +3,7 @@ using GroupAD: load_data
 using GroupAD.Models: unpack_mill
 using BSON
 using DataFrames
-using Latexify
+using PrettyTables
 using Mill
 
 dp = GroupAD.get_mnist_point_cloud_datapath()
@@ -43,4 +43,8 @@ mnist_summary = DataFrame(
 )
 sort!(mnist_summary, :class)
 
-tex = latexify(mnist_summary, env=:tabular, fmt=x->round(x, digits=1), booktabs=true)
+t = pretty_table(
+    mnist_summary,
+    formatters = ft_printf("%5.1f"),
+    backend=:latex, tf=tf_latex_booktabs, nosubheader=true
+)
diff --git a/scripts/evaluation/knn_eval_test.jl b/scripts/evaluation/knn_eval_test.jl
diff --git a/scripts/evaluation/mgmm_eval.jl b/scripts/evaluation/mgmm_eval.jl
diff --git a/scripts/evaluation/statistician_eval.jl b/scripts/evaluation/statistician_eval.jl