-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_data.py
59 lines (45 loc) · 2.73 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import ncas_amof_netcdf_template as nant
import polars as pl
import datetime as dt
def read_data_year(input_file, year=2023):
df = pl.read_csv(input_file, null_values="NA")
df = df.filter(~pl.all_horizontal(pl.all().is_null()))
o2permeg = pl.Series("o2permeg", [ -1e20 if i is None else i for i in df["O2_permeg"] ])
dt_dates = pl.Series("dt_dates", [ dt.datetime.strptime(i, "%d/%m/%Y %H:%M") for i in df['Date'] ])
df = df.with_columns(dt_dates.alias("Date"))
df = df.with_columns(o2permeg.alias("O2_permeg"))
df = df.filter(pl.col("Date").is_between(dt.datetime(year,1,1), dt.datetime(year+1,1,1) ))
return df
def make_netcdf(input_file="ncas-o2-1_growing.csv", output_location=".", product_version = "1.0", year=2023):
data_df = read_data_year(input_file, year=year)
unix_times, day_of_year, years, months, days, hours, minutes, seconds, \
time_coverage_start_unix, time_coverage_end_unix, file_date = nant.util.get_times(data_df["Date"])
nc = nant.create_netcdf.main("ncas-o2-1", date = file_date, dimension_lengths={"time":len(unix_times)}, file_location = output_location, product_version = product_version)
nant.util.update_variable(nc, "time", unix_times)
nant.util.update_variable(nc, "day_of_year", day_of_year)
nant.util.update_variable(nc, "year", years)
nant.util.update_variable(nc, "month", months)
nant.util.update_variable(nc, "day", days)
nant.util.update_variable(nc, "hour", hours)
nant.util.update_variable(nc, "minute", minutes)
nant.util.update_variable(nc, "second", seconds)
nant.util.update_variable(nc, "molecular_oxygen_molecular_nitrogen_ratio_in_air", data_df["O2_permeg"])
nc["molecular_oxygen_molecular_nitrogen_ratio_in_air"].cell_methods = "time: point"
if len(data_df.filter(pl.col("O2_permeg") != 1e-20)) > 0:
nant.util.update_variable(nc, "qc_flag", data_df["O2_Flag"])
nc.setncattr('time_coverage_start',
dt.datetime.fromtimestamp(time_coverage_start_unix, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S"))
nc.setncattr('time_coverage_end',
dt.datetime.fromtimestamp(time_coverage_end_unix, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S"))
nant.util.add_metadata_to_netcdf(nc, "metadata.csv")
# Close file
nc.close()
# Check for empty variables and remove if necessary
nant.remove_empty_variables.main(f'{output_location}/ncas-o2-1_wao_{file_date}_o2n2-concentration-ratio_v{product_version}.nc')
if __name__ == "__main__":
import sys
input_file = sys.argv[1]
output_loc = sys.argv[2]
product_version = sys.argv[3]
year = int(sys.argv[4])
make_netcdf(input_file = input_file, output_location = output_loc, product_version = product_version, year = year)