Skip to content

Commit

Permalink
results dataframes, evaluation scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
maskomic committed Aug 28, 2021
1 parent 1bcde05 commit e2302eb
Show file tree
Hide file tree
Showing 21 changed files with 183 additions and 28 deletions.
59 changes: 59 additions & 0 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ git-tree-sha1 = "4a8f4df432fd8e8a96a142c53f9432b9022a92e6"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.1"

[[ArnoldiMethod]]
deps = ["LinearAlgebra", "Random", "StaticArrays"]
git-tree-sha1 = "f87e559f87a45bece9c9ed97458d3afe98b1ebb9"
uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
version = "0.1.0"

[[Arpack]]
deps = ["Arpack_jll", "Libdl", "LinearAlgebra"]
git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9"
Expand Down Expand Up @@ -343,6 +349,12 @@ git-tree-sha1 = "502b3de6039d5b78c76118423858d981349f3823"
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
version = "0.9.7"

[[FiniteDiff]]
deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
git-tree-sha1 = "8b3c09b56acaf3c0e581c66638b85c8650ee9dca"
uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
version = "2.8.1"

[[FixedPointNumbers]]
deps = ["Statistics"]
git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
Expand Down Expand Up @@ -507,6 +519,11 @@ git-tree-sha1 = "28e837ff3e7a6c3cdb252ce49fb412c8eb3caeef"
uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
version = "0.1.0"

[[Inflate]]
git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c"
uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
version = "0.1.2"

[[IniFile]]
deps = ["Test"]
git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
Expand Down Expand Up @@ -669,6 +686,12 @@ git-tree-sha1 = "f879ae9edbaa2c74c922e8b85bb83cc84ea1450b"
uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
version = "2.34.0+7"

[[LightGraphs]]
deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
git-tree-sha1 = "432428df5f360964040ed60418dd5601ecd240b6"
uuid = "093fc24a-ae57-5d10-9952-331d41423f4d"
version = "1.3.5"

[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -682,6 +705,12 @@ git-tree-sha1 = "e9f52dd5b33bba1b825bdb69b72844e81285c2c1"
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
version = "0.9.20"

[[LsqFit]]
deps = ["Distributions", "ForwardDiff", "LinearAlgebra", "NLSolversBase", "OptimBase", "Random", "StatsBase"]
git-tree-sha1 = "b32b5549461fcb93bce223e264d4a7ef0c9923fd"
uuid = "2fda8390-95c7-5789-9bda-21331edee243"
version = "0.11.0"

[[MKL_jll]]
deps = ["IntelOpenMP_jll", "Libdl", "Pkg"]
git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8"
Expand Down Expand Up @@ -772,6 +801,12 @@ git-tree-sha1 = "8d958ff1854b166003238fe191ec34b9d592860a"
uuid = "6f286f6a-111f-5878-ab1e-185364afe411"
version = "0.8.0"

[[NLSolversBase]]
deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
git-tree-sha1 = "144bab5b1443545bc4e791536c9f1eacb4eed06a"
uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
version = "7.8.1"

[[NNlib]]
deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"]
git-tree-sha1 = "5ce2e4b2bfe3811811e7db4b6a148439806fd2f8"
Expand All @@ -783,6 +818,12 @@ git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
version = "0.3.5"

[[NearestNeighborDescent]]
deps = ["DataStructures", "Distances", "LightGraphs", "Random", "Reexport", "SparseArrays"]
git-tree-sha1 = "410580927bc16e156e5481d9318b8ca177c30f1b"
uuid = "dd2c4c9e-a32f-5b2f-b342-08c2f244fce8"
version = "0.3.4"

[[NearestNeighbors]]
deps = ["Distances", "StaticArrays"]
git-tree-sha1 = "9afd724797039125e8e2cc362098f01dab60bc3a"
Expand Down Expand Up @@ -829,6 +870,12 @@ git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.3+4"

[[OptimBase]]
deps = ["NLSolversBase", "Printf", "Reexport"]
git-tree-sha1 = "9cb1fee807b599b5f803809e85c81b582d2009d6"
uuid = "87e2bd06-a317-5318-96d9-3ecbac512eee"
version = "2.0.2"

[[Opus_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "f9d57f4126c39565e05a2b0264df99f497fc6f37"
Expand Down Expand Up @@ -1007,6 +1054,12 @@ git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de"
uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f"
version = "1.0.3"

[[SimpleTraits]]
deps = ["InteractiveUtils", "MacroTools"]
git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
version = "0.9.4"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

Expand Down Expand Up @@ -1119,6 +1172,12 @@ git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c"
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.9.5"

[[UMAP]]
deps = ["Arpack", "Distances", "LinearAlgebra", "LsqFit", "NearestNeighborDescent", "Random", "SparseArrays"]
git-tree-sha1 = "c96f3a85e8d429129714a1363e622a4cb9936c79"
uuid = "c4f8c510-2410-5be4-91d7-4fbaeb39457e"
version = "0.1.8"

[[URIs]]
git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2"
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e"
ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"

[compat]
Expand Down
Binary file added results/MIL/mill_names_scores.bson
Binary file not shown.
Binary file added results/MIL/mill_results_collection.bson
Binary file not shown.
Binary file added results/MIL/mill_results_scores.bson
Binary file not shown.
Binary file added results/MIL/mill_results_scores_agg.bson
Binary file not shown.
Binary file added results/MNIST/leave-one-in/PoolModel.bson
Binary file not shown.
Binary file added results/MNIST/leave-one-in/statistician.bson
Binary file not shown.
Binary file added results/MNIST/leave-one-in/vae_instance.bson
Binary file not shown.
Binary file added results/MNIST/leave-one-out/vae_instance.bson
Binary file not shown.
Binary file added results/MNIST/mnist_results_in.bson
Binary file not shown.
Binary file added results/MNIST/mnist_results_in_scores.bson
Binary file not shown.
Binary file added results/MNIST/mnist_results_out.bson
Binary file not shown.
Binary file added results/MNIST/mnist_results_out_scores.bson
Binary file not shown.
Binary file added results/MNIST/models/vae-in.bson
Binary file not shown.
Binary file added results/MNIST/models/vae-out.bson
Binary file not shown.
Binary file added results/MNIST/models/vae.bson
Binary file not shown.
32 changes: 26 additions & 6 deletions scripts/evaluation/MNIST/mnist_results_in.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ modelname = "knn_basic"
modelname = "vae_basic"
modelname = "vae_instance"
modelname = "statistician"
modelname = "PoolModel"
method = "leave-one-in"
class = 10
folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")
Expand All @@ -41,15 +42,15 @@ push!(mnist_results_in, modelname => rdf)
save(datadir("dataframes", "mnist_results_in.bson"), mnist_results_in)

# add :model columns
modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician"]
model_names = ["kNNagg", "VAEagg", "VAE", "NS"]
knn_basic, vae_basic, vae_instance, statistician = map(m-> insertcols!(mnist_results_in[m], :model => m), modelnames)
modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel"]
model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel"]
knn_basic, vae_basic, vae_instance, statistician, poolmodel = map(m-> insertcols!(mnist_results_in[m], :model => m), modelnames)

df_all = vcat(knn_basic, vae_basic, vae_instance, statistician, cols=:union)
df_all = vcat(knn_basic, vae_basic, vae_instance, statistician, poolmodel, cols=:union)
df_red = df_all[:, [:model, :class, :test_AUC_mean]]
#sort!(df_red, [:class, :model])
groupnames, M, labels = groupedbar_matrix(df_red, group=:class, cols=:model, value=:test_AUC_mean)
idx = [1,3,4,2]
idx = [2,4,5,3,1]
vcat(hcat(labels[idx]...), hcat(model_names...))

mnist_barplots(
Expand Down Expand Up @@ -78,14 +79,15 @@ modelname = "knn_basic"
modelname = "vae_basic"
modelname = "vae_instance"
modelname = "statistician"
modelname = "PoolModel"
method = "leave-one-in"
class = 10
folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")

results = DataFrame[]
for class in 1:10
folder = datadir("experiments", "contamination-0.0", modelname, "MNIST", method, "class_index=$class")
df = find_best_model(folder, :type)
df = find_best_model(folder, :poolf)
#df = find_best_model(folder) |> DataFrame
push!(results, df)
end
Expand Down Expand Up @@ -162,4 +164,22 @@ mnist_barplots(
gdf, "statistician-in", new_labels; ind = idx,
group=:class, cols=:type, value=:test_AUC_mean,
w1=0.8, w2=0.85
)

# PoolModel
modelname = "PoolModel"
poolmodel = mnist_results_in_scores[modelname]
g = groupby(sort(poolmodel, :val_AUC_mean, rev=true), [:class,:poolf])
gm = map(x -> DataFrame(x[1,:]), g)
gdf = vcat(gm...)
groupnames, M, labels = groupedbar_matrix(gdf, group=:class, cols=:poolf, value=:test_AUC_mean)
hcat(groupnames...)

new_labels = ["maximum" "mean" "meanmax" "meanmax + card" "sumstat" "sumstat + card"]
vcat(labels, new_labels)

mnist_barplots(
gdf, "poolmodel-in", new_labels; legend_title="Pool function",
group=:class, cols=:poolf, value=:test_AUC_mean,
w1=0.8, w2=0.85
)
2 changes: 1 addition & 1 deletion scripts/evaluation/MNIST/mnist_results_out.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include(scriptsdir("evaluation", "MIL", "workflow.jl"))
#####################
### leave-one-out ###
#####################
mnist_results_out = Dict()
#mnist_results_out = Dict()
mnist_results_out = load(datadir("dataframes", "mnist_results_out.bson"))

modelname = "knn_basic"
Expand Down
16 changes: 8 additions & 8 deletions scripts/evaluation/MNIST/mnist_results_table.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ include(scriptsdir("evaluation", "MIL", "workflow.jl"))
mnist_results_in = load(datadir("dataframes", "mnist_results_in.bson"))

#model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel", "MGMM"]
model_names = ["kNNagg", "VAEagg", "VAE", "NS"]
modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician"]
model_names = ["kNNagg", "VAEagg", "VAE", "NS", "PoolModel"]
modelnames = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel"]
modelvec = map(key -> mnist_results_in[key], modelnames)
knn_basic, vae_basic, vae_instance, statistician = map(key -> mnist_results_in[key], modelnames)
knn_basic, vae_basic, vae_instance, statistician, poolmodel = map(key -> mnist_results_in[key], modelnames)
# add modelname
#knn_basic, vae_basic, vae_instance, statistician, poolmodel, mgmm = map((d, m) -> insertcols!(d, :model => m), modelvec, modelnames)
knn_basic, vae_basic, vae_instance, statistician = map((d, m) -> insertcols!(d, :model => m), modelvec, modelnames)
df = vcat(knn_basic, vae_basic, vae_instance, statistician, cols=:union)
knn_basic, vae_basic, vae_instance, statistician, poolmodel = map((d, m) -> insertcols!(d, :model => m), modelvec, modelnames)
df = vcat(knn_basic, vae_basic, vae_instance, statistician, poolmodel, cols=:union)

# create dataframe
df_red = df[:, [:class, :model, :val_AUC_mean, :test_AUC_mean, :val_AUPRC_mean, :test_AUPRC_mean]]
Expand All @@ -38,7 +38,7 @@ df1 = DataFrame(g')
rename!(df1, nm)
df1[:, :digit] = map(i -> "$i", 0:9)
df1
df_new = df1[:, [5,1,3,4,2]]
df_new = df1[:, [6,2,4,5,3,1]]
rename!(df_new, vcat("digit", model_names))

avg = map(x -> typeof(x) == Array{Float64,1} ? mean(x) : "Average", eachcol(df_new))
Expand All @@ -48,11 +48,11 @@ push!(df_new, avg)
using PrettyTables

l_max = LatexHighlighter(
(data, i, j) -> (data[i,j] == maximum(df_new[i, 2:5])) && typeof(data[i,j])!==String,
(data, i, j) -> (data[i,j] == maximum(df_new[i, 2:6])) && typeof(data[i,j])!==String,
["textbf", "textcolor{blue}"]
)
l_min = LatexHighlighter(
(data, i, j) -> (data[i,j] == minimum(df_new[i, 2:5])) && typeof(data[i,j])!==String,
(data, i, j) -> (data[i,j] == minimum(df_new[i, 2:6])) && typeof(data[i,j])!==String,
["textcolor{red}"]
)

Expand Down
101 changes: 88 additions & 13 deletions scripts/evaluation/MNIST/reconstruction/models_rec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,11 @@ using GroupAD.Models: reconstruct
import GroupAD.Models: reconstruct
reconstruct(model::NeuralStatistician, x) = GroupAD.Models.reconstruct_input(model, x)

# parameters
modelname = "vae_instance"
method = "leave-one-in"
class = 1

# load data
data = GroupAD.load_data("MNIST", method=method, anomaly_class_ind=class)
tr_x, tr_l = unpack_mill(data[1])
val_x, val_l = unpack_mill(data[2])
test_x, test_l = unpack_mill(data[3])


"""
best_model_files(best_models, modelname)
Given a DataFrame of best models and their parameters, returns the file names for the models.
"""
function best_model_files(best_models, modelname)
mpath = GroupAD.Evaluation.collect_models(datadir("experiments", "contamination-0.0", modelname, "MNIST", "leave-one-in", "class_index=1", "seed=1"))[1]
mdata = load(mpath)
Expand All @@ -49,6 +41,11 @@ function best_model_files(best_models, modelname)
return files
end

"""
mnist_paths(modelname, method, class, files)
Joins the files paths with the datapath and returns the file paths for all models over the possible seeds.
"""
function mnist_paths(modelname, method, class, files)
paths = []
for f in files
Expand All @@ -67,6 +64,12 @@ function mnist_paths(modelname, method, class, files)
end
end

"""
collect_mnist_models(modelname, method)
Collects the results, finds the best model for each class and saves a Dictionary
of models given the anomaly class index.
"""
function collect_mnist_models(modelname, method)
models = Dict()

Expand All @@ -78,7 +81,7 @@ function collect_mnist_models(modelname, method)
model = load(paths[best_seed])["model"]
push!(models, Symbol(class) => model)
end
wsave(datadir("results", "MNIST", method, "models", "$(modelname).bson"), models)
wsave(datadir("results", "MNIST", method, "$(modelname).bson"), models)
end


Expand Down Expand Up @@ -136,4 +139,76 @@ for class in 1:10
method, modelname,
"reconstruction_class=$(class-1).png"
), p)
end

# context
class = 1
data = GroupAD.load_mnist_point_cloud(;anomaly_class_ind=class)
X = cat(data[:normal], data[:anomaly]);
dt, _ = unpack_mill((X, []));
labels = vcat(data[:l_normal], data[:l_anomaly]);

function mean_context(m::NeuralStatistician, x::AbstractArray)
# instance network
v = m.instance_encoder(x)
p = mean(v, dims=2)

# sample latent for context
c = mean(m.encoder_c, p)
end

using GroupAD.Models: PoolModel
function pool_context(m::PoolModel, x::AbstractArray)
v = m.prepool_net(x)
# pooling
p = m.poolf(v)
# post-pool
p_post = m.postpool_net(p)
end

idx = sample(1:70000, 5000, replace=false)
d = dt[idx]
l = Int.(labels[idx])

# statistician
model = models[Symbol(class)]
C = hcat(map(x -> mean_context(model, x), d)...)

p = scatter(C[1,:], C[2,:], color=l)
wsave(plotsdir("context", "context_in_class=$(class-1).png"), p)

using UMAP
# PoolModel
for class in 1:10
modelname = "PoolModel"
models = load(datadir("results", "MNIST", method, "models", "$(modelname).bson"))
model = models[Symbol(class)]
C = hcat(map(x -> pool_context(model, x), d)...)

if size(C,1) > 2
emb = umap(C, 2)
else
emb = C
end

nix = l .!= class-1
aix = l .== class-1

p = scatter(emb[1,nix], emb[2,nix], label="normal")
p = scatter!(emb[1,aix], emb[2,aix], label="anomalous")
wsave(plotsdir("context", modelname, "in-class=$(class-1).png"), p)
end

for class in 1:10
modelname = "PoolModel"
models = load(datadir("results", "MNIST", method, "models", "$(modelname).bson"))
model = models[Symbol(class)]
C = hcat(map(x -> pool_context(model, x), d)...)

nix = l .!= class-1
aix = l .== class-1

p = scatter(C[1,nix], C[2,nix], C[3,nix], label="normal")
p = scatter!(C[1,aix], C[2,aix], C[3,aix], label="anomalous")
wsave(plotsdir("context", modelname, "in-3D_class=$(class-1).png"), p)
end

0 comments on commit e2302eb

Please sign in to comment.