-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwriteData.py
75 lines (59 loc) · 2.84 KB
/
writeData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
def getMergedDataframe(model):
# DATA
data_col_names = ['ID', 'YEAR', 'ELEMENT']
for i in range(1, 13):
data_col_names.extend([f'VALUE{i}', f'DMFLAG{i}', f'QCFLAG{i}', f'DSFLAG{i}'])
data_col_widths = [11, 4, 4] + [5, 1, 1, 1]*12
data = pd.read_fwf(f'EXPLOREDATA/ghcnm.tavg.v4.0.1.20230609.{model}.dat', widths=data_col_widths, names=data_col_names)
# METADATA
metadata_col_names = ['ID', "LATITUDE", "LONGITUDE", "ELEVATION", "STATION"]
metadata_col_widths = [12, 8, 10, 8, 25]
metadata = pd.read_fwf(f'EXPLOREDATA/ghcnm.tavg.v4.0.1.20230609.{model}.inv', widths=metadata_col_widths, names=metadata_col_names)
# MERGED DATA
mergeddata = pd.merge(data, metadata, on='ID')
# Rename columnd ID to station ID
mergeddata['Country Code'] = mergeddata['ID'].str[:2]
mergeddata.rename(columns={'ID': 'STATION ID'}, inplace=True)
# COUNTRY CODES
countryCodes = pd.read_csv('COUNTRYDATA/COUNTRIES.csv')
countryCodes.head()
# Merge the dataframes and drop the ID column
df = pd.merge(mergeddata, countryCodes, on='Country Code')
# Rename Country column to COUNTRY
df.rename(columns={'Country': 'COUNTRY'}, inplace=True)
return df
def getLongTable(df):
# Create a long table with the year, month, and value for each country and station
df_long = pd.melt(df, id_vars=['STATION ID', 'COUNTRY', 'STATION', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'YEAR'], value_vars=[f'VALUE{i}' for i in range(1, 13)], var_name='MONTH', value_name='TEMP')
# Divive TEMP by 100 to get the temperature in degrees Celsius
df_long['TEMP'] = df_long['TEMP'] / 100
# On month column, remove the string 'VALUE' and convert to integer
df_long['MONTH'] = df_long['MONTH'].str[5:].astype(int)
# Reorder Columns
df_long = df_long[['STATION ID', 'COUNTRY', 'STATION', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'YEAR', 'MONTH', 'TEMP']]
return df_long
def avgThroughStationIdAndData(df):
return df.groupby(['STATION ID', 'COUNTRY', 'STATION', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'YEAR', 'MONTH'])['TEMP'].mean().reset_index()
def avgThroughAllCountries(df):
return df.groupby(['YEAR', 'MONTH'])['TEMP'].mean().reset_index()
def writeCSV(df, country = None):
if (country):
df = df[df['COUNTRY'] == country]
df.to_csv(f'TESTDATA/{country}.csv', index=False)
else:
for country in df['COUNTRY'].unique():
df[df['COUNTRY'] == country].to_csv(f'TESTDATA/{country}.csv', index=False)
def writeJSON(df, country = None):
if (country):
df = df[df['COUNTRY'] == country]
df.to_json(f'JSONDATA/{country}.json', orient='records')
else:
for country in df['COUNTRY'].unique():
df[df['COUNTRY'] == country].to_json(f'JSONDATA/{country}.json', orient='records')
df = getMergedDataframe('qfe')
df_long = getLongTable(df)
df_avg = avgThroughStationIdAndData(df_long)
# writeCSV(df_long)
# writeJSON(df_long)
writeCSV(df_avg)