Skip to content

Commit

Permalink
other minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
maskomic committed Oct 10, 2023
1 parent 3af5e9c commit 740a828
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 1 deletion.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
################################################################################
# Custom #
################################################################################

data/times
*.h5


################################################################################
# DrWatson Project Structure #
################################################################################
Expand Down
Binary file removed experimental/plot.pdf
Binary file not shown.
Binary file removed experimental/plot.png
Binary file not shown.
Binary file removed plot.png
Binary file not shown.
3 changes: 2 additions & 1 deletion scripts/experiments_mill/hmil_classifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ function sample_params()
return (mdim=mdim, activation=activation, aggregation=aggregation, nlayers=nlayers)
end

loss(model, x, y) = Flux.logitcrossentropy(model(x), y)
loss(model, x, y) = Flux.crossentropy(model(x), y)
# loss(model, x, y) = Flux.logitcrossentropy(model(x), y)

"""
fit(data, parameters)
Expand Down
60 changes: 60 additions & 0 deletions test/lhco.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using Test
using GroupAD
using PyCall
using Mill

"""
load_lhco(dataset = "events_anomalydetection_v2.h5")
This function loads the LHCO2020 dataset (the R&D version for now)
and processes it to get a Mill.jl datasets of normal and anomalous
samples.
Note: PyCall.jl must be installed, Python/3.8 loaded with pandas,
tables packages installed. If this version is used, the path to
Python must be `/mnt/appl/software/Python/3.8.6-GCCcore-10.2.0/bin/python`.
"""
function load_lhco_from_pandas(dataset = "events_anomalydetection_v2.h5")
file = joinpath(get_lhco_datapath(), dataset)
if occursin("Python/3.8.6-GCCcore-10.2.0", read(`which python`, String))
pd = pyimport("pandas")
end

data = Array{Float32}[]
labels = Int[]

for i in 0:100000:1100000
df_test = pd.read_hdf(file, start=i, stop=i+100000)
data_array = df_test[:values]

for row in eachrow(data_array)#[1:100000, :])
label = row[end] |> Int
push!(labels, label)
zeroix = findfirst(x -> x == 0.0, row) |> Int
d = row[1:zeroix-1]
al = zeros(Float32, 3, length(d)÷3)
al[1,:] = d[1:3:end]
al[2,:] = d[2:3:end]
al[3,:] = d[3:3:end]
push!(data, al)
end
end

obs0 = labels .== 0
obs1 = labels .== 1

ls0 = size.(data[obs0], 2)
ls1 = size.(data[obs1], 2)

bagids1 = Mill.length2bags(ls1)
bagids0 = Mill.length2bags(ls0)

return (
normal = BagNode(ArrayNode(hcat(data[obs0]...)), bagids0),
anomaly = BagNode(ArrayNode(hcat(data[obs1]...)), bagids1)
)
end

@testset "LHCO data" begin
@test 1 == 1
end

0 comments on commit 740a828

Please sign in to comment.