evaluation mess, starting clean up

aicenter · Sep 22, 2021 · 948c0c8 · 948c0c8
1 parent ba720ee
commit 948c0c8
Show file tree

Hide file tree

Showing 23 changed files with 572 additions and 126 deletions.
diff --git a/MGMM.png b/MGMM.png
diff --git a/Manifest.toml b/Manifest.toml
@@ -132,6 +132,12 @@ git-tree-sha1 = "75479b7df4167267d75294d14b58244695beb2ac"
 uuid = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
 version = "0.14.2"
 
+[[CodeTracking]]
+deps = ["InteractiveUtils", "UUIDs"]
+git-tree-sha1 = "9aa8a5ebb6b5bf469a7e0e2b5202cf6f8c291104"
+uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
+version = "1.0.6"
+
 [[CodecZlib]]
 deps = ["TranscodingStreams", "Zlib_jll"]
 git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
@@ -343,6 +349,9 @@ git-tree-sha1 = "fee8955b9dfa7bec67117ef48085fb2b559b9c22"
 uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 version = "1.4.5"
 
+[[FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+
 [[FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays"]
 git-tree-sha1 = "502b3de6039d5b78c76118423858d981349f3823"
@@ -579,6 +588,12 @@ git-tree-sha1 = "9aff0587d9603ea0de2c6f6300d9f9492bbefbd3"
 uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
 version = "2.0.1+3"
 
+[[JuliaInterpreter]]
+deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
+git-tree-sha1 = "e273807f38074f033d94207a201e6e827d8417db"
+uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
+version = "0.8.21"
+
 [[Juno]]
 deps = ["Base64", "Logging", "Media", "Profile"]
 git-tree-sha1 = "07cb43290a840908a771552911a6274bc6c072c7"
@@ -705,6 +720,12 @@ git-tree-sha1 = "e9f52dd5b33bba1b825bdb69b72844e81285c2c1"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
 version = "0.9.20"
 
+[[LoweredCodeUtils]]
+deps = ["JuliaInterpreter"]
+git-tree-sha1 = "491a883c4fef1103077a7f648961adbf9c8dd933"
+uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
+version = "2.1.2"
+
 [[LsqFit]]
 deps = ["Distributions", "ForwardDiff", "LinearAlgebra", "NLSolversBase", "OptimBase", "Random", "StatsBase"]
 git-tree-sha1 = "b32b5549461fcb93bce223e264d4a7ef0c9923fd"
@@ -1002,6 +1023,12 @@ git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
 uuid = "ae029012-a4dd-5104-9daa-d747884805df"
 version = "1.1.3"
 
+[[Revise]]
+deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "Pkg", "REPL", "Requires", "UUIDs", "Unicode"]
+git-tree-sha1 = "1947d2d75463bd86d87eaba7265b0721598dd803"
+uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
+version = "3.1.19"
+
 [[Rmath]]
 deps = ["Random", "Rmath_jll"]
 git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370"
@@ -1128,11 +1155,6 @@ version = "1.4.0"
 deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
 uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 
-[[Suppressor]]
-git-tree-sha1 = "a819d77f31f83e5792a76081eee1ea6342ab8787"
-uuid = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
-version = "0.2.0"
-
 [[TableOperations]]
 deps = ["SentinelArrays", "Tables", "Test"]
 git-tree-sha1 = "a7cf690d0ac3f5b53dd09b5d613540b230233647"

diff --git a/Project.toml b/Project.toml
@@ -26,10 +26,10 @@ NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
-Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
 UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e"
 ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
 

diff --git a/data/results/MNIST/leave-one-in/PoolModel.bson b/data/results/MNIST/leave-one-in/PoolModel.bson
diff --git a/data/results/MNIST/leave-one-out/PoolModel.bson b/data/results/MNIST/leave-one-out/PoolModel.bson
diff --git a/data/results/MNIST/mnist_results_in.bson b/data/results/MNIST/mnist_results_in.bson
diff --git a/data/results/MNIST/mnist_results_in_H.bson b/data/results/MNIST/mnist_results_in_H.bson
diff --git a/data/results/MNIST/mnist_results_in_scores.bson b/data/results/MNIST/mnist_results_in_scores.bson
diff --git a/data/results/MNIST/mnist_results_in_scores_H.bson b/data/results/MNIST/mnist_results_in_scores_H.bson
diff --git a/data/results/MNIST/mnist_results_out.bson b/data/results/MNIST/mnist_results_out.bson
diff --git a/data/results/MNIST/mnist_results_out_H.bson b/data/results/MNIST/mnist_results_out_H.bson
diff --git a/data/results/MNIST/mnist_results_out_scores.bson b/data/results/MNIST/mnist_results_out_scores.bson
diff --git a/data/results/MNIST/mnist_results_out_scores_H.bson b/data/results/MNIST/mnist_results_out_scores_H.bson
diff --git a/scripts/evaluation/MIL/workflow.jl b/scripts/evaluation/MIL/workflow.jl
@@ -1,68 +1,3 @@
-"""
-    find_best_model(folder::String [, groupkey]; metric=:val_AUC)
-
-Recursively goes through given folder and finds the best model based on
-chosen metric, default is validation AUC.
-
-If `groupkey` is present, returns the best model for each category of groupkey.
-Group key can be both a symbol or an array of symbols.
-"""
-function find_best_model(folder::String; metric=:val_AUC, save_best_seed=false)
-    #folder = datadir("experiments", "contamination-0.0", modelname, dataset)
-    data = GroupAD.Evaluation.results_dataframe(folder)
-    point = load(GroupAD.Evaluation.collect_scores(folder)[1])
-    params = point[:parameters]
-
-    g = groupby(data, [keys(params)...])
-    un = unique(map(x -> size(x), g))
-    if length(un) != 1
-        idx = findall(x -> size(x,1) > 5, g)
-        @warn "There are groups with different sizes (different number of seeds). Possible duplicate models or missing seeds.
-        Removing $(length(g) - length(idx)) groups out of $(length(g)) with less than 6 seeds."
-        g = g[idx]
-    end
-
-    metricsnames = [:val_AUC, :val_AUPRC, :test_AUC, :test_AUPRC]
-    cdf = combine(g, map(x -> x => mean, metricsnames))
-    sort!(cdf, :val_AUC_mean, rev=true)
-    best_model = cdf[1,:]
-
-    if save_best_seed
-        _nm = names(best_model)
-        nm = _nm[occursin.("mean", _nm) .== 0]
-        nm = nm[occursin.("L", nm) .== 0]
-        values = best_model[[nm...]]
-
-        idx = findall(row -> row[nm] == values, eachrow(data))
-        s = sort(data[idx, :], :val_AUC, rev = true)[1,:][:seed]
-        return DataFrame(best_model), s
-    else
-        return best_model
-    end
-end
-function find_best_model(folder, groupkey, metric=:val_AUC)
-    #folder = datadir("experiments", "contamination-0.0", modelname, dataset, "scenario=$scenario")
-    data = GroupAD.Evaluation.results_dataframe(folder)
-    point = load(GroupAD.Evaluation.collect_scores(folder)[1])
-    params = point[:parameters]
-
-    g_score = groupby(data, groupkey)
-    g = map(x -> groupby(x, [keys(params)...]), g_score)
-    un = unique(vcat(map(x -> unique(map(y -> size(y), x)), g)...))
-
-    if length(un) != 1
-        idx = findall.(x -> size(x,1) > 5, g)
-        @warn "There are groups with different sizes (different number of seeds). Possible duplicate models or missing seeds.
-        Removing $(sum(length.(g)) - sum(length.(idx))) groups out of $(sum(length.(g))) with less than 6 seeds."
-        g = map(i -> g[i][idx[i]], 1:length(g))
-    end
-
-    metricsnames = [:val_AUC, :val_AUPRC, :test_AUC, :test_AUPRC]
-    cdf = map(y -> combine(y, map(x -> x => mean, metricsnames)), g)
-    cdf_sorted = map(x -> sort(x, :val_AUC_mean, rev=true), cdf)
-    best_models = vcat(map(x -> DataFrame(x[1,:]), cdf_sorted)...)
-end
-
 """
     groupedbar_matrix(df::DataFrame; group::Symbol, cols::Symbol, value::Symbol, groupnamefull=true)
 

diff --git a/scripts/evaluation/MNIST/mnist_results_in.jl b/scripts/evaluation/MNIST/mnist_results_in.jl
@@ -11,7 +11,7 @@ using Plots
 using StatsPlots
 ENV["GKSwstype"] = "100"
 
-include(scriptsdir("evaluation", "MIL", "workflow.jl"))
+#include(scriptsdir("evaluation", "MIL", "workflow.jl"))
 
 ####################
 ### leave-one-in ###
@@ -26,6 +26,7 @@ modelname = "vae_basic"
 modelname = "vae_instance"
 modelname = "statistician"
 modelname = "PoolModel"
+modelname = "MGMM"
 method = "leave-one-in"
 class = 10
 folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")
@@ -80,6 +81,7 @@ modelname = "vae_basic"
 modelname = "vae_instance"
 modelname = "statistician"
 modelname = "PoolModel"
+modelname = "MGMM"
 method = "leave-one-in"
 class = 10
 folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")
@@ -182,4 +184,28 @@ mnist_barplots(
     gdf, "poolmodel-in", new_labels; legend_title="Pool function",
     group=:class, cols=:poolf, value=:test_AUC_mean,
     w1=0.8, w2=0.85
+)
+
+### MGMM results
+mgmm = mnist_results_in["MGMM"]
+mgmm = mnist_results_in_scores["MGMM"]
+g = groupby(sort(mgmm, :val_AUC_mean, rev=true), [:class,:score])
+gm = map(x -> DataFrame(x[1,:]), g)
+gdf = vcat(gm...)
+groupnames, M, labels = groupedbar_matrix(gdf, group=:class, cols=:score, value=:test_AUC_mean)
+hcat(groupnames...)
+
+new_labels = ["point" "topic" "point + topic"]
+vcat(labels, new_labels)
+
+p = groupedbar(
+    map(i -> "$i", 0:9), M, label=new_labels, ylims=(0,1), legend=:bottomright,
+    xlabel="digit", ylabel="test AUC"
+)
+wsave(plotsdir("MNIST", "pdf", "MGMM-in.pdf"), p)
+
+mnist_barplots(
+    gdf, "MGMM-in", new_labels; legend_title="Score",
+    group=:class, cols=:score, value=:test_AUC_mean,
+    w1=0.8, w2=0.85
 )
diff --git a/scripts/evaluation/MNIST/mnist_results_out.jl b/scripts/evaluation/MNIST/mnist_results_out.jl
@@ -22,6 +22,8 @@ mnist_results_out = load(datadir("results", "MNIST", "mnist_results_out.bson"))
 modelname = "knn_basic"
 modelname = "vae_basic"
 modelname = "vae_instance"
+modelname = "PoolModel"
+modelname = "MGMM"
 method = "leave-one-out"
 folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")
 
@@ -38,16 +40,16 @@ push!(mnist_results_out, modelname => rdf)
 save(datadir("results", "MNIST", "mnist_results_out.bson"), mnist_results_out)
 
 # add :model columns
-modelnames = ["knn_basic", "vae_basic", "vae_instance"]
-model_names = ["kNNagg", "VAEagg", "VAE"]
-knn_basic, vae_basic, vae_instance = map(m-> insertcols!(mnist_results_out[m], :model => m), modelnames)
+modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel"]
+model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel"]
+knn_basic, vae_basic, vae_instance, statistician, poolmodel = map(m-> insertcols!(mnist_results_out[m], :model => m), modelnames)
 
 # groupedbarplot for more models
-df_all = vcat(knn_basic, vae_basic, vae_instance, cols=:union)
+df_all = vcat(knn_basic, vae_basic, vae_instance, statistician, poolmodel, cols=:union)
 df_red = df_all[:, [:model, :class, :test_AUC_mean]]
 groupnames, M, labels = groupedbar_matrix(df_red, group=:class, cols=:model, value=:test_AUC_mean)
 groupnames
-idx = [1,2,3]
+idx = [2,4,5,3,1]
 vcat(hcat(labels[idx]...), hcat(model_names...))
 
 mnist_barplots(
@@ -69,13 +71,15 @@ mnist_results_out_scores = load(datadir("results", "MNIST", "mnist_results_out_s
 modelname = "knn_basic"
 modelname = "vae_basic"
 modelname = "vae_instance"
+modelname = "PoolModel"
+modelname = "MGMM"
 method = "leave-one-out"
 
 # calculating the results for a single model
 results = DataFrame[]
 for class in 1:10
     folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")
-    df = find_best_model(folder, :type)
+    df = find_best_model(folder, :poolf)
     #df = find_best_model(folder) |> DataFrame
     push!(results, df)
 end
@@ -84,7 +88,7 @@ push!(mnist_results_out_scores, modelname => rdf)
 save(datadir("results", "MNIST", "mnist_results_out_scores.bson"), mnist_results_out_scores)
 
 
-knn_basic, vae_basic, vae_instance = map(m-> mnist_results_out_scores[m], modelnames)
+knn_basic, vae_basic, vae_instance, statistician, poolmodel = map(m-> mnist_results_out_scores[m], modelnames)
 # groupedbarplot for :aggregation, or :score, :type etc.
 # kNN
 modelname = "knn_basic"
@@ -134,4 +138,59 @@ mnist_barplots(
     gdf, "vae-out", new_labels; ind = idx, 
     group=:class, cols=:type, value=:test_AUC_mean,
     w1=0.8, w2=0.85
-)
+)
+
+# NS
+modelname = "statistician"
+#vae_instance = mnist_results_out_scores["vae_instance"]
+g = groupby(sort(statistician, :val_AUC_mean, rev=true), [:class,:type])
+gm = map(x -> DataFrame(x[1,:]), g)
+gdf = vcat(gm...)
+groupnames, M, labels = groupedbar_matrix(gdf, group=:class, cols=:type, value=:test_AUC_mean)
+
+idx = [11,8,7,4,5,6,9,10,1,2,3]
+new_labels = ["sum" "mean" "maximum" "logU" "LN" "LN + logU" "Po" "Po + logU" "MMD-G" "MMD-IMQ" "Chamfer"]
+groupnames
+vcat(hcat(labels[idx]...), new_labels)
+
+mnist_barplots(
+    gdf, "statistician-out", new_labels; ind = idx, 
+    group=:class, cols=:type, value=:test_AUC_mean,
+    w1=0.8, w2=0.85
+)
+
+# PoolModel
+modelname = "PoolModel"
+#poolmodel = mnist_results_out_scores[modelname]
+g = groupby(sort(poolmodel, :val_AUC_mean, rev=true), [:class,:poolf])
+gm = map(x -> DataFrame(x[1,:]), g)
+gdf = vcat(gm...)
+groupnames, M, labels = groupedbar_matrix(gdf, group=:class, cols=:poolf, value=:test_AUC_mean)
+
+new_labels = ["maximum" "mean" "meanmax" "meanmax + card" "sumstat" "sumstat + card"]
+groupnames
+vcat(labels, new_labels)
+
+mnist_barplots(
+    gdf, "poolmodel-out", new_labels; legend_title="Pooling function",
+    group=:class, cols=:poolf, value=:test_AUC_mean,
+    w1=0.8, w2=0.85
+)
+
+### MGMM results
+mgmm = mnist_results_out["MGMM"]
+mgmm = mnist_results_out_scores["MGMM"]
+g = groupby(sort(mgmm, :val_AUC_mean, rev=true), [:class,:score])
+gm = map(x -> DataFrame(x[1,:]), g)
+gdf = vcat(gm...)
+groupnames, M, labels = groupedbar_matrix(gdf, group=:class, cols=:score, value=:test_AUC_mean)
+hcat(groupnames...)
+
+new_labels = ["point" "topic" "point + topic"]
+vcat(labels, new_labels)
+
+p = groupedbar(
+    map(i -> "$i", 0:9), M, label=new_labels, ylims=(0,1), legend=:bottomright,
+    xlabel="digit", ylabel="test AUC"
+)
+wsave(plotsdir("MNIST", "pdf", "MGMM-out.pdf"), p)
diff --git a/scripts/evaluation/MNIST/mnist_results_table.jl b/scripts/evaluation/MNIST/mnist_results_table.jl
@@ -69,28 +69,29 @@ t = pretty_table(
 
 mnist_results_out = load(datadir("results", "MNIST", "mnist_results_out.bson"))
 
-#model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel", "MGMM"]
-model_names = ["kNNagg", "VAEagg", "VAE"]
-modelnames = ["knn_basic", "vae_basic", "vae_instance"]
+model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel"]
+modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel"]
 modelvec = map(key -> mnist_results_out[key], modelnames)
-knn_basic, vae_basic, vae_instance = map(key -> mnist_results_out[key], modelnames)
+knn_basic, vae_basic, vae_instance, statistician, poolmodel = map(key -> mnist_results_out[key], modelnames)
 # add modelname
 #knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm = map((d, m) -> insertcols!(d, :model => m), modelvec, modelnames)
-knn_basic, vae_basic, vae_instance = map((d, m) -> insertcols!(d, :model => m), modelvec, modelnames)
-df = vcat(knn_basic, vae_basic, vae_instance, cols=:union)
+knn_basic, vae_basic, vae_instance, statistician, poolmodel = map((d, m) -> insertcols!(d, :model => m), modelvec, modelnames)
+df = vcat(knn_basic, vae_basic, vae_instance, statistician, poolmodel, cols=:union)
 
 # create dataframe
 df_red = df[:, [:class, :model, :val_AUC_mean, :test_AUC_mean, :val_AUPRC_mean, :test_AUPRC_mean]]
 df_red = df[:, [:class, :model, :test_AUC_mean]]
 sort!(df_red, [:class, :model])
 
 g = groupby(df_red, :class)
+nm = g[1][:, :model] |> Array{String,1}
 g = map(x -> rename(x, :test_AUC_mean => Symbol(x[1,:class])), g)
 g = hcat(map(x -> x[:, 3], g)...)
 df1 = DataFrame(g')
-rename!(df1, model_names)
+rename!(df1, nm)
 df1[:, :digit] = map(i -> "$i", 0:9)
-df_new = df1[:, [4,1,2,3]]
+df_new = df1[:, [6,2,4,5,3,1]]
+rename!(df_new, vcat("digit", model_names))
 
 avg = map(x -> typeof(x) == Array{Float64,1} ? mean(x) : "Average", eachcol(df_new))
 #avg_rank maybe do it if there is time
@@ -99,11 +100,11 @@ push!(df_new, avg)
 using PrettyTables
 
 l_max = LatexHighlighter(
-    (data, i, j) -> (data[i,j] == maximum(df_new[i, 2:4])) && typeof(data[i,j])!==String,
+    (data, i, j) -> (data[i,j] == maximum(df_new[i, 2:6])) && typeof(data[i,j])!==String,
     ["textbf", "textcolor{blue}"]
 )
 l_min = LatexHighlighter(
-    (data, i, j) -> (data[i,j] == minimum(df_new[i, 2:4])) && typeof(data[i,j])!==String,
+    (data, i, j) -> (data[i,j] == minimum(df_new[i, 2:6])) && typeof(data[i,j])!==String,
     ["textcolor{red}"]
 )