-
Notifications
You must be signed in to change notification settings - Fork 50
/
Copy pathworkflow.py
executable file
·81 lines (64 loc) · 3.17 KB
/
workflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from pathlib import Path
import logging
import pyam
from nomenclature import DataStructureDefinition, RegionProcessor, process
from nomenclature.codelist import RegionCode
from datetime import datetime, timedelta
here = Path(__file__).absolute().parent
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# datetime must be in Central European Time (CET)
EXP_TZ = "UTC+01:00"
EXP_TIME_OFFSET = timedelta(seconds=3600)
def oe_subannual_format(x):
return x.strftime("%m-%d %H:%M%z").replace("+0100", "+01:00")
def main(df: pyam.IamDataFrame, dimensions=["region", "variable"]) -> pyam.IamDataFrame:
"""Main function for validation and processing"""
if "subannual" in df.dimensions or df.time_col == "time":
dsd_dimensions = dimensions + ["subannual"]
else:
dsd_dimensions = dimensions
# import definitions and region-processor
definition = DataStructureDefinition(
here / "definitions", dimensions=dsd_dimensions
)
processor = RegionProcessor.from_directory(here / "mappings", definition)
# check if directional data exists in the scenario data, add to region codelist
if any([r for r in df.region if ">" in r]):
for r in df.region:
if r in definition.region:
continue
r_split = r.split(">")
if len(r_split) > 2:
raise ValueError(
f"Directional data other than `origin>destination` not allowed: {r}"
)
elif len(r_split) == 2:
if all([_r in definition.region for _r in r_split]):
# add the directional-region to the codelist (without attributes)
definition.region[r] = RegionCode(name=r, hierarchy="directional")
# validate the region and variable dimensions, apply region processing
df = process(df, definition, dimensions=dimensions, processor=processor)
# this is a quick-fix for https://github.com/IAMconsortium/nomenclature/issues/283
if invalid_regions := definition.region.validate_items(df.region):
raise ValueError(f"Invalid regions: {invalid_regions}")
# convert to subannual format if data provided in datetime format
if df.time_col == "time":
logger.info('Re-casting from "time" column to categorical "subannual" format')
df = df.swap_time_for_year(subannual=oe_subannual_format)
# check that any datetime-like items in "subannual" are valid datetime and UTC+01:00
if "subannual" in df.dimensions:
_datetime = [s for s in df.subannual if s not in definition.subannual]
for d in _datetime:
try:
_dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z")
except ValueError:
try:
datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M")
except ValueError:
raise ValueError(f"Invalid subannual timeslice: {d}")
raise ValueError(f"Missing timezone: {d}")
# casting to datetime with timezone was successful
if not (_dt.tzname() == EXP_TZ or _dt.utcoffset() == EXP_TIME_OFFSET):
raise ValueError(f"Invalid timezone: {d}")
return df