added modelnet to the experiments

aicenter · Oct 10, 2023 · 37a0ae2 · 37a0ae2
1 parent 4e3f125
commit 37a0ae2
Show file tree

Hide file tree

Showing 16 changed files with 272 additions and 125 deletions.
diff --git a/scripts/experiments_point_cloud/hmil_classifier.jl b/scripts/experiments_point_cloud/hmil_classifier.jl
@@ -11,6 +11,10 @@ using ValueHistories
 using MLDataPattern: RandomBatches
 using Random
 
+# dataset = "modelnet"
+# method = "chair"
+# data = GroupAD.load_data(dataset, method=method)
+
 s = ArgParseSettings()
 @add_arg_table! s begin
    "max_seed"
@@ -23,10 +27,10 @@ s = ArgParseSettings()
         help = "dataset"
 	"anomaly_classes"
 		arg_type = Int
-		default = 10
+		default = 1
 		help = "number of anomaly classes"
 	"method"
-		default = "leave-one-out"
+		default = "leave-one-in"
 		arg_type = String
 		help = "method for data creation -> \"leave-one-out\" or \"leave-one-in\" "
    "contamination"
@@ -52,7 +56,8 @@ function sample_params()
     return (mdim=mdim, activation=activation, aggregation=aggregation, nlayers=nlayers)
 end
 
-loss(model, x, y) = Flux.logitcrossentropy(model(x), y)
+loss(model, x, y) = Flux.crossentropy(model(x), y)
+# loss(model, x, y) = Flux.logitcrossentropy(model(x), y)
 
 """
 	fit(data, parameters)
@@ -70,7 +75,7 @@ function fit(data, parameters, seed)
 	# fit train data
 	# max. train time: 24 hours
 	try
-		global _info, fit_t, _, _, _ = @timed GroupAD.Models.fit_hmil!(model, data, loss; max_train_time=23*3600/max_seed/4, 
+		global _info, fit_t, _, _, _ = @timed GroupAD.Models.fit_hmil!(model, data, loss; max_train_time=22*3600/max_seed/3, 
 			patience=200, check_interval=5, seed=seed, parameters...)
 		global info = _info[1]
 		global new_data = (_info[2], _info[3], data[3])
@@ -112,14 +117,31 @@ end
 ################ THIS PART IS COMMON FOR ALL MODELS ################
 # only execute this if run directly - so it can be included in other files
 if abspath(PROGRAM_FILE) == @__FILE__
-	GroupAD.Models.hmil_basic_loop(
-		sample_params, 
-		fit, 
-		edit_params, 
-		max_seed, 
-		modelname, 
-		dataset, 
-		contamination, 
-		datadir("experiments/contamination-$(contamination)/MNIST"),
-	)
+	if dataset == "MNIST"
+		GroupAD.Models.hmil_pc_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/MNIST"),
+			anomaly_classes,
+			method
+		)
+	elseif dataset == "modelnet"
+		GroupAD.Models.hmil_pc_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/modelnet"),
+			anomaly_classes,
+			method
+		)
+	end
 end
diff --git a/scripts/experiments_point_cloud/hmil_classifier.sh b/scripts/experiments_point_cloud/hmil_classifier.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --partition=cpu
+#SBATCH --time=24:00:00
+#SBATCH --nodes=1 --ntasks-per-node=1 --cpus-per-task=2
+#SBATCH --mem=10G
+
+MAX_SEED=$1
+DATASET=$2
+ANOMALY_CLASSES=$3
+METHOD=$4
+CONTAMINATION=$5
+
+module load Julia/1.7.3-linux-x86_64
+
+julia --project ./hmil_classifier.jl ${MAX_SEED} $DATASET ${ANOMALY_CLASSES} $METHOD $CONTAMINATION
diff --git a/scripts/experiments_point_cloud/knn_basic.jl b/scripts/experiments_point_cloud/knn_basic.jl
@@ -1,5 +1,3 @@
-using Pkg
-Pkg.activate(split(pwd(), ".jl")[1]*".jl")
 using DrWatson
 @quickactivate
 using ArgParse
@@ -21,7 +19,7 @@ s = ArgParseSettings()
         help = "dataset"
 	"anomaly_classes"
 		arg_type = Int
-		default = 10
+		default = 1
 		help = "number of anomaly classes"
 	"method"
 		default = "leave-one-out"
@@ -110,16 +108,31 @@ end
 ####################################################################
 ################ THIS PART IS COMMON FOR ALL MODELS ################
 if abspath(PROGRAM_FILE) == @__FILE__
-	GroupAD.point_cloud_experimental_loop(
-		sample_params, 
-		fit, 
-		edit_params, 
-		max_seed, 
-		modelname, 
-		dataset, 
-		contamination, 
-		datadir("experiments/contamination-$(contamination)"),
-		anomaly_classes,
-        method
+	if dataset == "MNIST"
+		GroupAD.point_cloud_experimental_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/MNIST"),
+			anomaly_classes,
+			method
 		)
+	elseif dataset == "modelnet"
+		GroupAD.point_cloud_experimental_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/modelnet"),
+			anomaly_classes,
+			method
+		)
+	end
 end
diff --git a/scripts/experiments_point_cloud/knn_basic.sh b/scripts/experiments_point_cloud/knn_basic.sh
@@ -2,14 +2,14 @@
 #SBATCH --partition=cpufast
 #SBATCH --time=4:00:00
 #SBATCH --nodes=1 --ntasks-per-node=2 --cpus-per-task=1
-#SBATCH --mem=20G
+#SBATCH --mem=10G
 
 MAX_SEED=$1
 DATASET=$2
 ANOMALY_CLASSES=$3
 METHOD=$4
 CONTAMINATION=$5
 
-module load Julia/1.7.2-linux-x86_64
+module load Julia/1.7.3-linux-x86_64
 
-julia ./knn_basic.jl ${MAX_SEED} $DATASET ${ANOMALY_CLASSES} $METHOD $CONTAMINATION
+julia --project ./knn_basic.jl ${MAX_SEED} $DATASET ${ANOMALY_CLASSES} $METHOD $CONTAMINATION
diff --git a/scripts/experiments_point_cloud/point_cloud_datasets.txt b/scripts/experiments_point_cloud/point_cloud_datasets.txt
@@ -1,2 +1 @@
-MNIST
-ModelNet10
+modelnet
diff --git a/scripts/experiments_point_cloud/poolmodel.jl b/scripts/experiments_point_cloud/poolmodel.jl
@@ -1,5 +1,3 @@
-using Pkg
-Pkg.activate(split(pwd(), ".jl")[1]*".jl")
 using DrWatson
 @quickactivate
 using ArgParse
@@ -23,7 +21,7 @@ s = ArgParseSettings()
         help = "dataset"
 	"anomaly_classes"
 		arg_type = Int
-		default = 10
+		default = 1
 		help = "number of anomaly classes"
 	"method"
 		default = "leave-one-out"
@@ -140,16 +138,31 @@ end
 ################ THIS PART IS COMMON FOR ALL MODELS ################
 # only execute this if run directly - so it can be included in other files
 if abspath(PROGRAM_FILE) == @__FILE__
-	GroupAD.point_cloud_experimental_loop(
-		sample_params, 
-		fit, 
-		edit_params, 
-		max_seed, 
-		modelname, 
-		dataset, 
-		contamination, 
-		datadir("experiments/contamination-$(contamination)"),
-		anomaly_classes,
-        method
+	if dataset == "MNIST"
+		GroupAD.point_cloud_experimental_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/MNISTooo"),
+			anomaly_classes,
+			method
 		)
+	elseif dataset == "modelnet"
+		GroupAD.point_cloud_experimental_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/modelnet"),
+			anomaly_classes,
+			method
+		)
+	end
 end
diff --git a/scripts/experiments_point_cloud/run_parallel_modelnet.sh b/scripts/experiments_point_cloud/run_parallel_modelnet.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# This runs MNIST on slurm.
+# USAGE EXAMPLE
+#   ./run_parallel_mnist.sh statistician 50 10 2 leave-one-in 0
+# Run from this folder only.
+MODEL=$1 		# which model to run
+NUM_SAMPLES=$2	# how many repetitions
+MAX_SEED=$3		# how many folds over dataset
+NUM_CONC=$4		# number of concurrent tasks in the array job
+
+LOG_DIR="${HOME}/logs/${MODEL}"
+
+if [ ! -d "$LOG_DIR" ]; then
+	mkdir $LOG_DIR
+fi
+
+for class in bathtub bed chair desk dresser monitor night_stand sofa table toilet
+do
+    # submit to slurm
+    sbatch \
+    --array=1-${NUM_SAMPLES}%${NUM_CONC} \
+    --output="${LOG_DIR}/modelnet_$METHOD-%A_%a.out" \
+        ./${MODEL}.sh $MAX_SEED "modelnet" 1 $class 0
+done
+
+# for local testing    
+# ./${MODEL}_run.sh $MAX_SEED "MNIST" $METHOD 10
diff --git a/scripts/experiments_point_cloud/run_parallel_point_cloud.sh b/scripts/experiments_point_cloud/run_parallel_point_cloud.sh
@@ -22,7 +22,7 @@ while read d; do
     sbatch \
     --array=1-${NUM_SAMPLES}%${NUM_CONC} \
     --output="${LOG_DIR}/${d}-%A_%a.out" \
-     ./${MODEL}.sh $MAX_SEED ${DATASET_FILE} 10 $METHOD $CONTAMINATION
+     ./${MODEL}.sh $MAX_SEED ${DATASET_FILE} 1 $METHOD $CONTAMINATION
 
     # for local testing    
     # ./${MODEL}_run.sh $MAX_SEED $d

diff --git a/scripts/experiments_point_cloud/statistician.jl b/scripts/experiments_point_cloud/statistician.jl
@@ -1,5 +1,3 @@
-using Pkg
-Pkg.activate(split(pwd(), ".jl")[1]*".jl")
 using DrWatson
 @quickactivate
 using ArgParse
@@ -23,7 +21,7 @@ s = ArgParseSettings()
         help = "dataset"
 	"anomaly_classes"
 		arg_type = Int
-		default = 10
+		default = 1
 		help = "number of anomaly classes"
 	"method"
 		default = "leave-one-out"
@@ -74,7 +72,8 @@ end
 
 Negative ELBO for training of a Neural Statistician model.
 """
-loss(model::GenerativeModels.NeuralStatistician,x) = -GroupAD.Models.elbo1(model, x)
+# loss(model::GenerativeModels.NeuralStatistician,x) = -GroupAD.Models.elbo1(model, x)
+loss(model::GenerativeModels.NeuralStatistician, batch) = mean(x -> -GroupAD.Models.elbo1(model, x), batch)
 
 """
 	fit(data, parameters)
@@ -142,16 +141,31 @@ end
 ################ THIS PART IS COMMON FOR ALL MODELS ################
 # only execute this if run directly - so it can be included in other files
 if abspath(PROGRAM_FILE) == @__FILE__
-	GroupAD.point_cloud_experimental_loop(
-		sample_params, 
-		fit, 
-		edit_params, 
-		max_seed, 
-		modelname, 
-		dataset, 
-		contamination, 
-		datadir("experiments/contamination-$(contamination)"),
-		anomaly_classes,
-        method
+	if dataset == "MNIST"
+		GroupAD.point_cloud_experimental_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/MNIST"),
+			anomaly_classes,
+			method
 		)
+	elseif dataset == "modelnet"
+		GroupAD.point_cloud_experimental_loop(
+			sample_params, 
+			fit, 
+			edit_params, 
+			max_seed, 
+			modelname, 
+			dataset, 
+			contamination, 
+			datadir("experiments/contamination-$(contamination)/modelnet"),
+			anomaly_classes,
+			method
+		)
+	end
 end
diff --git a/scripts/experiments_point_cloud/statistician.sh b/scripts/experiments_point_cloud/statistician.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 #SBATCH --partition=cpulong
 #SBATCH --time=48:00:00
-#SBATCH --nodes=1 --ntasks-per-node=1 --cpus-per-task=10
-#SBATCH --mem=100G
+#SBATCH --nodes=1 --ntasks-per-node=2
+#SBATCH --mem=10G
 
 MAX_SEED=$1
 DATASET=$2
 ANOMALY_CLASSES=$3
 METHOD=$4
 CONTAMINATION=$5
 
-module load Julia/1.7.2-linux-x86_64
+module load Julia/1.7.3-linux-x86_64
 
-julia --threads 10 ./statistician.jl ${MAX_SEED} $DATASET ${ANOMALY_CLASSES} $METHOD $CONTAMINATION
+julia --project ./statistician.jl ${MAX_SEED} $DATASET ${ANOMALY_CLASSES} $METHOD $CONTAMINATION