diff --git a/.gitignore b/.gitignore index 94d419b..b03b432 100755 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,11 @@ +################################################################################ +# Custom # +################################################################################ + +data/times +*.h5 + + ################################################################################ # DrWatson Project Structure # ################################################################################ diff --git a/experimental/plot.pdf b/experimental/plot.pdf deleted file mode 100644 index 536ca0e..0000000 Binary files a/experimental/plot.pdf and /dev/null differ diff --git a/experimental/plot.png b/experimental/plot.png deleted file mode 100644 index 8e17883..0000000 Binary files a/experimental/plot.png and /dev/null differ diff --git a/plot.png b/plot.png deleted file mode 100644 index d40961a..0000000 Binary files a/plot.png and /dev/null differ diff --git a/scripts/experiments_mill/hmil_classifier.jl b/scripts/experiments_mill/hmil_classifier.jl index bdcdc4b..82a2aec 100644 --- a/scripts/experiments_mill/hmil_classifier.jl +++ b/scripts/experiments_mill/hmil_classifier.jl @@ -45,7 +45,8 @@ function sample_params() return (mdim=mdim, activation=activation, aggregation=aggregation, nlayers=nlayers) end -loss(model, x, y) = Flux.logitcrossentropy(model(x), y) +loss(model, x, y) = Flux.crossentropy(model(x), y) +# loss(model, x, y) = Flux.logitcrossentropy(model(x), y) """ fit(data, parameters) diff --git a/test/lhco.jl b/test/lhco.jl new file mode 100644 index 0000000..2c6b9a2 --- /dev/null +++ b/test/lhco.jl @@ -0,0 +1,60 @@ +using Test +using GroupAD +using PyCall +using Mill + +""" + load_lhco(dataset = "events_anomalydetection_v2.h5") + +This function loads the LHCO2020 dataset (the R&D version for now) +and processes it to get a Mill.jl datasets of normal and anomalous +samples. + +Note: PyCall.jl must be installed, Python/3.8 loaded with pandas, +tables packages installed. If this version is used, the path to +Python must be `/mnt/appl/software/Python/3.8.6-GCCcore-10.2.0/bin/python`. +""" +function load_lhco_from_pandas(dataset = "events_anomalydetection_v2.h5") + file = joinpath(get_lhco_datapath(), dataset) + if occursin("Python/3.8.6-GCCcore-10.2.0", read(`which python`, String)) + pd = pyimport("pandas") + end + + data = Array{Float32}[] + labels = Int[] + + for i in 0:100000:1100000 + df_test = pd.read_hdf(file, start=i, stop=i+100000) + data_array = df_test[:values] + + for row in eachrow(data_array)#[1:100000, :]) + label = row[end] |> Int + push!(labels, label) + zeroix = findfirst(x -> x == 0.0, row) |> Int + d = row[1:zeroix-1] + al = zeros(Float32, 3, length(d)รท3) + al[1,:] = d[1:3:end] + al[2,:] = d[2:3:end] + al[3,:] = d[3:3:end] + push!(data, al) + end + end + + obs0 = labels .== 0 + obs1 = labels .== 1 + + ls0 = size.(data[obs0], 2) + ls1 = size.(data[obs1], 2) + + bagids1 = Mill.length2bags(ls1) + bagids0 = Mill.length2bags(ls0) + + return ( + normal = BagNode(ArrayNode(hcat(data[obs0]...)), bagids0), + anomaly = BagNode(ArrayNode(hcat(data[obs1]...)), bagids1) + ) +end + +@testset "LHCO data" begin + @test 1 == 1 +end \ No newline at end of file