-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport_distributions.exs
98 lines (80 loc) · 2.73 KB
/
import_distributions.exs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
alias Risteys.{FGEndpoint.Definition, Repo, AgeDistribution, YearDistribution}
require Logger
Logger.configure(level: :info)
[distrib_filepath, distribution_type, dataset | _] = System.argv()
# test that valid arguments are given
if distribution_type != "age" and distribution_type != "year" do
raise ArgumentError, message: "Type of distribution needs to be given as a second argument, either age or year"
end
if dataset != "FG" and dataset != "FR" do
raise ArgumentError, message: "Dataset needs to be given as a third argument, either FG or FR."
end
Logger.info("Start importing #{distribution_type} distributions")
distrib_filepath
|> File.stream!()
|> CSV.decode!(headers: :true)
|> Enum.each(fn row ->
%{
"endpoint" => name,
"sex" => sex,
"left" => left,
"right" => right,
"count" => count
} = row
Logger.info("Handling data of #{name}, value of 'left': #{left}")
# convert histogram bin values to correct datatype: nil or float
left = if left == "-inf", do: nil, else: String.to_float(left)
right = if right == "inf", do: nil, else: String.to_float(right)
# Prevent distributions from being accidentially imported to incorrect distribution table
# by checking that histogram bin edge values make sense with the given distribution type
message_text =
"Data import stopped.
You're trying to import #{distribution_type} histogram data, but the input data
has a histogram bin edge value of #{right}, which is not in the expected range.
Please check your input data and the argument for distribution type."
case distribution_type do
"age" ->
if right > 200 and !is_nil(right) do
raise ArgumentError, message: message_text
end
"year" ->
if right < 1000 and !is_nil(right) do
raise ArgumentError, message: message_text
end
end
# get enpoint definition data for endpoint id
endpoint = Repo.get_by(Definition, name: name)
# Get correct module
distrib_module =
if distribution_type == "age" do
AgeDistribution
else
YearDistribution
end
# Import data to the DB
case endpoint do
nil ->
Logger.warning("Endpoint #{name} not in the DB, skipping")
endpoint ->
distrib =
case distribution_type do
"age" -> %AgeDistribution{}
"year" -> %YearDistribution{}
end
|> distrib_module.changeset(%{
fg_endpoint_id: endpoint.id,
sex: sex,
left: left,
right: right,
count: String.to_integer(count),
dataset: dataset
})
|> Repo.insert()
case distrib do
{:ok, _} ->
Logger.info("Insert ok")
{:error, changeset} ->
Logger.warning(inspect(changeset))
end
end
end)