Skip to content

Commit

Permalink
mvtec
Browse files Browse the repository at this point in the history
  • Loading branch information
maskomic committed Oct 4, 2022
1 parent 7867be7 commit d0d96aa
Show file tree
Hide file tree
Showing 13 changed files with 365 additions and 51 deletions.
12 changes: 12 additions & 0 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,18 @@ git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2"
uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe"
version = "1.0.2"

[[deps.HDF5]]
deps = ["Compat", "HDF5_jll", "Libdl", "Mmap", "Random", "Requires"]
git-tree-sha1 = "899f041bf330ebeead3637073b2ca7477760edde"
uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
version = "0.16.11"

[[deps.HDF5_jll]]
deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "c003b31e2e818bc512b0ff99d7dce03b0c1359f5"
uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
version = "1.12.2+1"

[[deps.HTTP]]
deps = ["Base64", "Dates", "IniFile", "Logging", "MbedTLS", "NetworkOptions", "Sockets", "URIs"]
git-tree-sha1 = "0fa77022fe4b511826b39c894c90daf5fce3334a"
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ EvalMetrics = "251d5f9e-10c1-4699-ba24-e0ad168fa3e4"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Flux3D = "432009dd-59a1-4b72-8c93-6462ce9b220f"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLDataPattern = "9920b226-0b2a-5f5f-9153-9aa70a013f8b"
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ julia GroupAD.jl/scripts/evaluate_performance_single.jl path/to/results

## Running experiments on the RCI cluster

*Note: Since LHCO dataset, Python is needed for data loading. Use Python/3.8 to install `pandas`.*

0. First, load Julia and Python modules.
```bash
ml Julia
ml Python
ml Python/3.8
```
1. Install the package somewhere on the RCI cluster.
2. Then the experiments can be run via `slurm`. This will run 20 experiments with the basic VAE model, each with 5 crossvalidation repetitions on all datasets in the text file with 10 parallel processes for each dataset.
Expand Down
46 changes: 46 additions & 0 deletions experimental/lhco_results.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,16 @@ function collect_lhco(model::String, dataset="events_anomalydetection_v2.h5")
return vcat(dfs...)
end

function collect_mvtec(model::String, datasets=mvtec_datasets)
len = length(mvtec_datasets)
dfs = repeat([DataFrame()], len)
Threads.@threads for i in 1:len
_df = collect_results(datadir("experiments", "contamination-0.0", "mv_tec", model, datasets[i]), subfolders=true, rexclude=[r"model_.*"])
dfs[i] = _df
end
return vcat(dfs...)
end

"""
calculate_results(model::String; dataset::String="MIL", metric::Symbol=:val_AUC, show=false, tf=tf_unicode, filter_fun=nothing, max_seed=10)
Expand All @@ -140,6 +150,8 @@ function calculate_results(model::String; dataset::String="MIL", metric::Symbol=
df = collect_mill(model)
elseif dataset == "LHCO"
df = collect_lhco(model)
elseif dataset == "mvtec"
df = collect_mvtec(model)
end
@info "Data loaded."
# filter out model files (for vae, statistician...) - not needed with the newest DrWatson's collect_results rexclude
Expand Down Expand Up @@ -223,3 +235,37 @@ function lhco_model_results(model::String; metric::Symbol=:val_AUC, show=false,
end
R2, g2[1]
end

function mvtec_model_results(model::String; metric::Symbol=:val_AUC, show=false, tf=tf_unicode, filter_fun=nothing, max_seed=5)
# load results and create a grouped dataframe
g2 = calculate_results(model, dataset="mvtec", metric=metric, show=show, tf=tf, filter_fun=filter_fun, max_seed=max_seed)
# find the best model based on metric (validation AUC)
R = findmaxs(g2, metric)

# reorder columns
c = ncol(R)
R2 = R[:, vcat([1,c-1,c], setdiff(1:c, [1,c,c-1]))]

# create a pretty table
if show
pretty_table(R2, nosubheader=true, tf = tf)
end

return R2, g2
end

function results_all_models(dataset::String; models = ["knn_basic", "vae_basic", "vae_instance", "statistician", "PoolModel"],
metric::Symbol=:val_AUC, show=false, tf=tf_unicode, filter_fun=nothing, max_seed=5)
PT = []

for model in models
g = calculate_results(model, dataset=dataset, metric=metric, show=show, tf=tf, filter_fun=filter_fun, max_seed=max_seed)
R = findmaxs(g, metric)
c = ncol(R)
R2 = R[:, vcat([1,c-1,c], setdiff(1:c, [1,c,c-1]))]
p = hcat(DataFrame(:modelname => model), R2)
push!(PT, p)
end

map(x -> pretty_table(x, nosubheader=true, tf=tf), PT)
end
4 changes: 4 additions & 0 deletions scripts/experiments_mill/datasets_mvtech.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
hazelnut_together
pill_together
screw_together
toothbrush_together
33 changes: 23 additions & 10 deletions scripts/experiments_mill/knn_basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,27 @@ edit_params = GroupAD.edit_params
####################################################################
################ THIS PART IS COMMON FOR ALL MODELS ################
if abspath(PROGRAM_FILE) == @__FILE__
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL")
if in(dataset, mill_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL"),
)
end
elseif in(dataset, mvtec_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/mv_tec")
)
end
end
33 changes: 23 additions & 10 deletions scripts/experiments_mill/poolmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,27 @@ end
################ THIS PART IS COMMON FOR ALL MODELS ################
# only execute this if run directly - so it can be included in other files
if abspath(PROGRAM_FILE) == @__FILE__
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL")
if in(dataset, mill_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL"),
)
end
elseif in(dataset, mvtec_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/mv_tec")
)
end
end
33 changes: 23 additions & 10 deletions scripts/experiments_mill/statistician.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,27 @@ end
################ THIS PART IS COMMON FOR ALL MODELS ################
# only execute this if run directly - so it can be included in other files
if abspath(PROGRAM_FILE) == @__FILE__
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL")
if in(dataset, mill_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL"),
)
end
elseif in(dataset, mvtec_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/mv_tec")
)
end
end
31 changes: 22 additions & 9 deletions scripts/experiments_mill/vae_basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,27 @@ end
################ THIS PART IS COMMON FOR ALL MODELS ################
# only execute this if run directly - so it can be included in other files
if abspath(PROGRAM_FILE) == @__FILE__
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL")
if in(dataset, mill_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL")
)
elseif in(dataset, mvtec_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/mv_tec")
)
end
end
31 changes: 22 additions & 9 deletions scripts/experiments_mill/vae_instance.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,27 @@ end
################ THIS PART IS COMMON FOR ALL MODELS ################
# only execute this if run directly - so it can be included in other files
if abspath(PROGRAM_FILE) == @__FILE__
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL"),
if in(dataset, mill_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/MIL"),
)
elseif in(dataset, mvtec_datasets)
GroupAD.basic_experimental_loop(
sample_params,
fit,
edit_params,
max_seed,
modelname,
dataset,
contamination,
datadir("experiments/contamination-$(contamination)/mv_tec")
)
end
end
Loading

0 comments on commit d0d96aa

Please sign in to comment.