-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrwmodelmean_mpi_new.py
195 lines (147 loc) · 7.03 KB
/
rwmodelmean_mpi_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""
Python modules for reading an ensemble of files,
estimating across year mean, and writing output netcdf file
"""
import os, time
import numpy as np
import pandas as pd
import xarray as xr
from mpi4py import MPI
from optparse import OptionParser
from plotdailymean import plot_timeseries
from estaverage import estimate_daily_average_across_years
from writeoutfile import write_netcdf_output
__author__ = 'Eva Sinha'
__email__ = '[email protected]'
parser = OptionParser();
parser.add_option("--plist_file", dest="plist_file", default="", \
help="File containing parameter name and pft")
parser.add_option("--rundir", dest="rundir", default="", \
help="Directory where ELM ensemble outputs are stored")
parser.add_option("--nsamples", dest="nsamples", default="", \
help="Number of parameters samples considered in the ensemble")
parser.add_option("--caseid", dest="caseid", default="", \
help="Case name")
parser.add_option("--yr_start", dest="yr_start", default=2000, \
help="Start year for reading ELM model outputs")
parser.add_option("--yr_end", dest="yr_end", default=2005, \
help="Last year for reading ELM model outputs")
parser.add_option("--fnamepre", dest="fnamepre", default="", \
help="file name prefix for saving plots")
(options, args) = parser.parse_args()
#----------------------------------------------------------
def read_output_data(plist_file, rundir, nsamples, caseid, yr_start, yr_end, varnames, fnamepre):
"""Read ELM model output for all ensemble members for specified case and year range and subset for variables
:param: rundir: path to the run directory
:param: caseid: case name
:param: yr_start: first year for reading data
:param: yr_end: last year for reading data
:param: varnames: list of variable names of interest
:param: fnamepre: file name prefix for saving plots
"""
comm=MPI.COMM_WORLD
rank=comm.Get_rank()
size=comm.Get_size()
# Read names of output variables and conversion factor
calib_var = pd.read_csv('calib_var.csv', comment='#')
# File containing parameter name and pft
#plist_path = '/home/ac.eva.sinha/OLMT/'
#plist_file = 'parm_list_miscanthus'
# Obtain the parameter names
pnames=[]
ppfts=[]
#pfile = open(plist_path + plist_file, 'r')
pfile = open(plist_file, 'r')
nparms = 0
for s in pfile:
pnames.append(s.split()[0])
ppfts.append(s.split()[1])
nparms = nparms+1
pfile.close()
if (rank == 0): #master
n_done = 0
for n_job in range(1,size):
comm.send(n_job, dest=n_job, tag=1)
comm.send(0, dest=n_job, tag=2)
time.sleep(1)
for n_job in range(size, int(nsamples)+1):
process = comm.recv(source=MPI.ANY_SOURCE, tag=3)
thisjob = comm.recv(source=process, tag=4)
n_done = n_done+1
comm.send(n_job, dest=process, tag=1)
comm.send(0, dest=process, tag=2)
while(n_done < int(nsamples)):
process = comm.recv(source=MPI.ANY_SOURCE, tag=3)
thisjob = comm.recv(source=process, tag=4)
n_done = n_done+1
comm.send(-1, dest=process, tag=1)
comm.send(-1, dest=process, tag=2)
MPI.Finalize()
else:
status=0
while status == 0:
myjob = comm.recv(source=0, tag=1)
status = comm.recv(source=0, tag=2)
if (status == 0):
dirname = 'g' + str(100000+myjob)[1:]
filepath = rundir + '/' + dirname
# Read names of all NetCDF files within the given year range
fnames = []
for yr in range(int(yr_start), int(yr_end)+1):
fnames.append(filepath + '/' + caseid + '.elm.h0.' + str(yr) + '-01-01-00000.nc')
# Open a multiple netCDF data file and load the data into xarrays
with xr.open_mfdataset(fnames, concat_dim='time') as ds:
# Only keep select variables in the data array
ds = ds[varnames]
# Drop landgrid dimension
ds = ds.isel(lndgrid=0)
# Estimate average across years for each day of the year
ds = estimate_daily_average_across_years(ds)
# Save output xarray as a netcdf file
outfname = caseid + '_mean_across_years_' + str(myjob) + '.nc'
print(outfname)
write_netcdf_output(ds, fnamepre, filename=outfname)
# Create empty panda series
df_all = pd.Series(dtype = 'float64')
for index, row in calib_var.iterrows():
# Convert to pandas dataframe and then pivot the data
df = ds[row['variable']].to_dataframe().unstack(level=-1)*row['daily_multi_factor']
# Estimate annual average
df_annual = df.mean()*(row['annual_multi_factor']/row['daily_multi_factor'])
if (len(df_all.index) == 0):
df_all = df
df_annual_all = pd.Series([df_annual])
else:
# Combine by concatinating along columns
df_all = pd.concat([df_all, df], axis=0, ignore_index=True)
df_annual_all = df_annual_all.append(pd.Series([df_annual]), ignore_index=True)
# Reshape single columm to single row and append data to file
f1 = open(fnamepre + '/ytrain.dat', 'ab')
df_all = df_all.to_numpy()
np.savetxt(f1, df_all.reshape(1,len(df_all)))
f1.close()
f2 = open(fnamepre + '/ytrain_annual.dat', 'ab')
df_annual_all = df_annual_all.to_numpy()
np.savetxt(f2, df_annual_all.reshape(1,len(df_annual_all)))
f2.close()
# Numpy array for saving parameter values
parms = np.zeros([nparms], float)
# Open netcdf parameter file
pfname = filepath + '/clm_params_' + str(dirname[1:6]) + '.nc'
print(dirname)
print(pfname)
with xr.open_dataset(pfname, decode_times=False) as mydata:
for pnum, pname in enumerate(pnames):
parms[pnum] = mydata[pname][int(ppfts[pnum])]
# Reshape single columm to single row and append data to file
f=open(fnamepre + '/ptrain.dat', 'ab')
np.savetxt(f, parms.reshape(1,len(parms)))
f.close()
comm.send(rank, dest=0, tag=3)
comm.send(myjob, dest=0, tag=4)
MPI.Finalize()
#----------------------------------------------------------
# List of variable names that we want to keep
varnames=['GPP', 'NPP', 'NEE', 'ER', 'AR', 'GR', 'HR', 'MR', 'XR', 'XSMRPOOL', 'TLAI', 'FPG', 'DMYIELD', 'BTRAN', 'FERT', 'EFLX_LH_TOT', 'FSH', 'QSOIL', 'QVEGE', 'QVEGT', 'TBOT', 'RAIN', 'SNOW']
# Read ELM model output for specified case and year range and subset for variables
read_output_data(options.plist_file, options.rundir, options.nsamples, options.caseid, options.yr_start, options.yr_end, varnames, options.fnamepre)