11 from collections
import defaultdict, OrderedDict
13 from pathlib
import Path
15 IODA_CONV_PATH = Path(__file__).parent/
"@SCRIPT_LIB_PATH@"
16 if not IODA_CONV_PATH.is_dir():
17 IODA_CONV_PATH = Path(__file__).parent/
'..'/
'lib-python'
18 sys.path.append(
str(IODA_CONV_PATH.resolve()))
20 import ioda_conv_ncio
as iconv
21 from orddicts
import DefaultOrderedDict
47 ncf = nc.Dataset(f, mode=
'r')
48 for key, value
in ncf.variables.items():
50 if vs[1]
not in [
'MetaData',
'VarMetaData',
'TestReference']:
51 if vs[0]
not in DataVarNames:
52 DataVarNames.append(vs[0])
53 if vs[0]
in DataVarNames:
54 if key
not in DataVars:
56 vtype = ncf.variables[key].dtype
57 DataVType.append(vtype)
59 units = ncf.variables[vs[0]+
"@ObsValue"].getncattr(
"units")
60 VarUnits[vs[0]] = units
61 except AttributeError:
65 if vs[1] ==
'MetaData' and key
not in MetaVars
and vs[0]
not in [
'time',
'ObsIndex']:
67 MetaVarNames.append(tuple(vs))
68 vtype = ncf.variables[key].dtype
69 MetaVType.append(vtype)
71 units = ncf.variables[vs[0]+
"@MetaData"].getncattr(
"units")
72 VarUnits[vs[0]] = units
73 except AttributeError:
75 vtypestr = vtypedict[
str(vtype)]
76 if vs[1] ==
'datetime':
78 LocKeyList.append((vs[0], vtypestr))
79 elif vs[1] ==
'VarMetaData' and key
not in VarMetaVars
and vs[0]
not in [
'variable_names']:
80 VarMetaVars.append(key)
86 inob = f.split(
'/')[-1]
87 obstype = inob[:inob.rfind(
"obs")-1]
88 if obstype ==
"ahi_himawari8":
89 sensor = obstype.split(
'_')[0]
90 satellite = obstype.split(
'_')[-1]
96 ncf = nc.Dataset(f, mode=
'r')
98 a = np.array(ncf.variables[v])[0, ...]
109 ncf = nc.Dataset(f, mode=
'r')
110 tmpdata = np.array(ncf.variables[v])
111 tmpvardata.append(tmpdata)
114 ashape = tmpdata.shape[1]
115 tmpvardata = np.vstack(tmpvardata)
116 tmpvardata = np.array([b
''.join(td)
for td
in tmpvardata])
117 except (IndexError, ValueError):
119 tmpvardata = np.hstack(tmpvardata)
121 MetaVarData.append(tmpvardata)
123 bad_idxs.append(MetaVars.index(v))
124 for i
in sorted(bad_idxs, reverse=
True):
128 MetaVarData = np.vstack(MetaVarData)
129 MetaVarUnique, idx, inv, cnt = np.unique(MetaVarData, return_index=
True, return_inverse=
True, return_counts=
True, axis=1)
133 for idx2, v
in enumerate(DataVars):
136 ncf = nc.Dataset(f, mode=
'r')
138 tmpdata = np.array(ncf.variables[v])
139 tmpvardata.append(tmpdata)
141 tmpdata = np.ones_like(np.array(ncf.variables[
'record_number@MetaData'])).astype(DataVType[idx2])
142 if DataVType[idx2] == np.int32:
143 tmpdata = tmpdata * nc.default_fillvals[
'i4']
145 tmpdata = tmpdata * np.abs(nc.default_fillvals[
'f4'])
146 tmpvardata.append(tmpdata)
147 validtime = dt.datetime.strptime(
str(ncf.getncattr(
'date_time')),
"%Y%m%d%H")
149 tmpvardata = np.hstack(tmpvardata)
150 DataVarData.append(tmpvardata)
151 DataVarData = np.vstack(DataVarData)
152 DataVarUnique = np.ones((len(DataVarData), len(idx))) * nc.default_fillvals[
'f4']
153 for ii
in range(DataVarData.shape[0]):
154 mask = ~((DataVarData[ii, :] == nc.default_fillvals[
'i4']) | (DataVarData[ii, :] == np.abs(nc.default_fillvals[
'f4'])))
155 DataVarUnique[ii, inv[mask]] = DataVarData[ii, mask]
158 if obstype !=
"ahi_himawari8":
159 ridx = np.argwhere(np.array(MetaVars) ==
'record_number@MetaData')[0][0]
160 nlocs = MetaVarUnique.shape[-1]
161 writer = iconv.NcWriter(OutFile, LocKeyList)
162 var_mdata[
'variable_names'] = writer.FillNcVector(DataVarNames,
"string")
164 AttrData[
"date_time_string"] = validtime.strftime(
"%Y-%m-%dT%H:%M:%SZ")
165 writer._nvars = len(DataVarNames)
166 writer._nlocs = nlocs
168 for idx3, vname
in enumerate(MetaVarNames):
169 if vname[0]
in [
'datetime',
'station_id']:
171 tmp = MetaVarUnique[idx3, :]
172 tmp = tmp.astype(str)
173 if vname[0] ==
'datetime':
177 tmp2 = np.empty((len(tmp), jj), dtype=str)
179 for i
in range(len(tmp)):
181 tmp2[i, :len(s)] = list(s)
182 loc_mdata[vname[0]] = tmp2.astype(MetaVType[idx3])
184 loc_mdata[vname[0]] = MetaVarUnique[idx3, ...].astype(MetaVType[idx3])
185 for idx3, vname
in enumerate(DataVars):
186 tmp = DataVarUnique[idx3, ...]
187 tmp = tmp.astype(DataVType[idx3])
188 if DataVType[idx3] ==
'int32':
189 tmp[tmp < -1e5] = nc.default_fillvals[
'i4']
190 outdata[tuple(vname.split(
'@'))] = tmp
191 if obstype ==
"ahi_himawari8":
192 var_mdata[
'sensor_channel'] = np.asarray(list(range(7, 17)))
193 AttrData[
"satellite"] = satellite
194 AttrData[
"sensor"] = sensor
195 writer.BuildNetcdf(outdata, loc_mdata, var_mdata, AttrData, VarUnits)
208 inob = f.split(
'/')[-1]
209 ingeo = inob.replace(
'obs',
'geoval')
211 GeoFileList.append(g)
213 for f
in GeoFileList:
214 ncf = nc.Dataset(f, mode=
'r')
215 for key, value
in ncf.variables.items():
216 if key
not in GeoVarNames2
and key
not in GeoVarNames3
and key
not in GeoVarNames31:
218 GeoVarNames2.append(key)
219 GeoVarTypes2.append(ncf.variables[key].dtype)
221 if 'nlevs' in value.dimensions:
222 GeoVarNames3.append(key)
223 GeoVarTypes3.append(ncf.variables[key].dtype)
225 GeoVarNames31.append(key)
226 GeoVarTypes31.append(ncf.variables[key].dtype)
235 for idx2, v
in enumerate(GeoVarNames2):
238 for f
in GeoFileList:
239 ncf = nc.Dataset(f, mode=
'r')
241 tmpdata = np.array(ncf.variables[v])
242 tmpgeodata.append(tmpdata)
243 tmpgeoidx.append(np.ones_like(tmpdata).astype(int)*
int(idx2))
245 tmpdata = np.ones_like(np.array(ncf.variables[
'time'])).astype(GeoVarTypes2[idx2])
246 if GeoVarTypes2[idx2] == np.int32:
247 tmpdata = tmpdata * nc.default_fillvals[
'i4']
249 tmpdata = tmpdata * np.abs(nc.default_fillvals[
'f4'])
250 tmpgeodata.append(tmpdata)
251 tmpgeoidx.append(np.ones_like(tmpdata).astype(int)*
int(idx2))
253 tmpgeodata = np.hstack(tmpgeodata)
254 tmpgeoidx = np.hstack(tmpgeoidx)
255 GeoVarData2.append(tmpgeodata)
256 GeoVarIdx2.append(tmpgeoidx)
257 GeoVarData2 = np.vstack(GeoVarData2)
258 GeoVarIdx2 = np.vstack(GeoVarIdx2)
259 GeoVarData2 = np.transpose(GeoVarData2)
260 GeoVarIdx2 = np.transpose(GeoVarIdx2)
261 GeoVarUnique2 = np.ones((len(idx), len(GeoVarData2[0])))*np.abs(nc.default_fillvals[
'f4'])
262 for idx2, v
in enumerate(GeoVarNames3):
265 for f
in GeoFileList:
266 ncf = nc.Dataset(f, mode=
'r')
268 tmpdata = np.array(ncf.variables[v])
269 tmpgeodata.append(tmpdata)
270 tmpgeoidx.append(np.ones_like(tmpdata).astype(int)*
int(idx2))
273 tmpdata = np.ones_like(np.array(ncf.variables[
'air_pressure'])).astype(GeoVarTypes3[idx2])
275 tmpdata = np.ones_like(np.array(ncf.variables[
'atmosphere_ln_pressure_coordinate'])).astype(GeoVarTypes3[idx2])
276 if GeoVarTypes3[idx2] == np.int32:
277 tmpdata = tmpdata * nc.default_fillvals[
'i4']
279 tmpdata = tmpdata * np.abs(nc.default_fillvals[
'f4'])
280 tmpgeodata.append(tmpdata)
281 tmpgeoidx.append(np.ones_like(tmpdata).astype(int)*
int(idx2))
282 tmpgeodata = np.vstack(tmpgeodata)
283 tmpgeoidx = np.vstack(tmpgeoidx)
284 GeoVarData3.append(tmpgeodata)
285 GeoVarIdx3.append(tmpgeoidx)
286 GeoVarData3 = np.dstack(GeoVarData3)
287 GeoVarIdx3 = np.dstack(GeoVarIdx3)
288 GeoVarUnique3 = np.ones((len(idx), len(GeoVarData3[0]), len(GeoVarData3[0, 0, :])))*np.abs(nc.default_fillvals[
'f4'])
289 for idx2, v
in enumerate(GeoVarNames31):
292 for f
in GeoFileList:
293 ncf = nc.Dataset(f, mode=
'r')
295 tmpdata = np.array(ncf.variables[v])
296 tmpgeodata.append(tmpdata)
297 tmpgeoidx.append(np.ones_like(tmpdata).astype(int)*
int(idx2))
300 tmpdata = np.ones_like(np.array(ncf.variables[
'air_pressure_levels'])).astype(GeoVarTypes31[idx2])
302 tmpdata = np.ones_like(np.array(ncf.variables[
'atmosphere_ln_pressure_interface_coordinate'])).astype(GeoVarTypes31[idx2])
303 if GeoVarTypes31[idx2] == np.int32:
304 tmpdata = tmpdata * nc.default_fillvals[
'i4']
306 tmpdata = tmpdata * np.abs(nc.default_fillvals[
'f4'])
307 tmpgeodata.append(tmpdata)
308 tmpgeoidx.append(np.ones_like(tmpdata).astype(int)*
int(idx2))
309 tmpgeodata = np.vstack(tmpgeodata)
310 tmpgeoidx = np.vstack(tmpgeoidx)
311 GeoVarData31.append(tmpgeodata)
312 GeoVarIdx31.append(tmpgeoidx)
313 GeoVarData31 = np.dstack(GeoVarData31)
314 GeoVarIdx31 = np.dstack(GeoVarIdx31)
315 GeoVarUnique31 = np.ones((len(idx), len(GeoVarData31[0]), len(GeoVarData31[0, 0, :])))*np.abs(nc.default_fillvals[
'f4'])
316 for ii, jj
in np.ndindex(GeoVarData2.shape):
317 j = GeoVarIdx2[ii, jj]
319 if GeoVarData2[ii, jj] != nc.default_fillvals[
'i4']
and GeoVarData2[ii, jj] != np.abs(nc.default_fillvals[
'f4']):
320 GeoVarUnique2[i, j] = GeoVarData2[ii, jj]
321 for ii, kk, jj
in np.ndindex(GeoVarData3.shape):
322 j = GeoVarIdx3[ii, kk, jj]
324 if GeoVarData3[ii, kk, jj] != nc.default_fillvals[
'i4']
and GeoVarData3[ii, kk, jj] != np.abs(nc.default_fillvals[
'f4']):
325 GeoVarUnique3[i, kk, j] = GeoVarData3[ii, kk, jj]
326 for ii, kk, jj
in np.ndindex(GeoVarData31.shape):
327 j = GeoVarIdx31[ii, kk, jj]
329 if GeoVarData31[ii, kk, jj] != nc.default_fillvals[
'i4']
and GeoVarData31[ii, kk, jj] != np.abs(nc.default_fillvals[
'f4']):
330 GeoVarUnique31[i, kk, j] = GeoVarData31[ii, kk, jj]
331 OutGeoFile = OutFile.replace(
'obs',
'geoval')
332 of = nc.Dataset(OutGeoFile,
'w', format=
'NETCDF4')
333 of.setncattr(
"date_time", ncf.getncattr(
"date_time"))
334 nlocs = len(GeoVarUnique3)
335 nlevs = len(GeoVarUnique3[0])
336 ninterfaces = len(GeoVarUnique31[0])
337 of.createDimension(
"nlocs", nlocs)
338 of.createDimension(
"nlevs", nlevs)
339 of.createDimension(
"ninterfaces", ninterfaces)
340 for ivar, var
in enumerate(GeoVarNames2):
342 var_out = of.createVariable(var, GeoVarTypes2[ivar], dims)
343 var_out[...] = GeoVarUnique2[..., ivar]
344 for ivar, var
in enumerate(GeoVarNames3):
345 dims = (
"nlocs",
"nlevs")
346 var_out = of.createVariable(var, GeoVarTypes3[ivar], dims)
347 var_out[...] = GeoVarUnique3[..., ivar]
348 for ivar, var
in enumerate(GeoVarNames31):
349 dims = (
"nlocs",
"ninterfaces")
350 var_out = of.createVariable(var, GeoVarTypes31[ivar], dims)
351 var_out[...] = GeoVarUnique31[..., ivar]
356 if __name__ ==
'__main__':
357 parser = argparse.ArgumentParser(
358 description=
'Combine conventional obs and GOESIR (currently ahi_himawari8) in IODA format into one output file',
359 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
361 '-i',
'--input', help=
'list of the input files to combine',
362 type=str, nargs=
'+', required=
True)
364 '-o',
'--output', help=
'name of the output IODA observation file',
365 type=str, required=
True, default=
None)
366 parser.add_argument(
'-g',
'--geovals', help=
'path to where matching geovals should be')
368 args = parser.parse_args()
370 FileList = args.input
371 OutFile = args.output
375 GeoDir = args.geovals
def concat_ioda(FileList, OutFile, GeoDir)