IODA Bundle
gmao_obs2ioda.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 # (C) Copyright 2019 UCAR
4 #
5 # This software is licensed under the terms of the Apache Licence Version 2.0
6 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 
8 """
9 Convert GMAO ocean data to IODA netCDF4 format
10 """
11 
12 from __future__ import print_function
13 import sys
14 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
15 import netCDF4 as nc4
16 import numpy as np
17 from datetime import datetime
18 from pathlib import Path
19 
20 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
21 if not IODA_CONV_PATH.is_dir():
22  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
23 sys.path.append(str(IODA_CONV_PATH.resolve()))
24 
25 import ioda_conv_ncio as iconv
26 from orddicts import DefaultOrderedDict
27 
28 
29 # obsIdDict is defined as obsid_dict in ocean_obs.py
30 obsIdDict = {
31  5521: 'sea_water_salinity', # Salinity
32  3073: 'sea_water_temperature', # Temperature
33  5525: 'sea_surface_temperature', # SST
34  5526: 'obs_absolute_dynamic_topography', # SSH (Not used ...)
35  5351: 'obs_absolute_dynamic_topography', # SSH
36  6000: 'sea_ice_area_fraction', # AICE
37  6001: 'sea_ice_thickness' # HICE
38 }
39 
40 
41 varDict = {
42  'sea_water_salinity': 'sal',
43  'sea_water_temperature': 'temp',
44  'sea_surface_temperature': 'sst',
45  'obs_absolute_dynamic_topography': 'adt',
46  'sea_ice_area_fraction': 'frac',
47  'sea_ice_thickness': 'thick'
48 }
49 
50 
51 def flipDict(dictIn):
52 
53  dictOut = {}
54 
55  for key, value in dictIn.items():
56  if value not in dictOut:
57  dictOut[value] = [key]
58  else:
59  dictOut[value].append(key)
60 
61  return dictOut
62 
63 
64 class GMAOobs(object):
65 
66  def __init__(self, filename, date):
67 
68  self.filenamefilename = filename
69  self.datedate = date
70 
71  # Read GMAO data
72  self._read_read()
73 
74  return
75 
76  def _read(self):
77 
78  data = {}
79 
80  nc = nc4.Dataset(self.filenamefilename)
81 
82  data['nobs'] = len(nc.dimensions['nobs'])
83 
84  data['typ'] = nc.variables['typ'][:].data
85  data['lon'] = nc.variables['lon'][:].data
86  data['lat'] = nc.variables['lat'][:].data
87  data['depth'] = nc.variables['depth'][:].data
88  data['value'] = nc.variables['value'][:].data
89  data['oerr'] = nc.variables['oerr'][:].data
90 
91  nc.close()
92 
93  self.datadata = data
94 
95  return
96 
97 
98 class refGMAOobs(object):
99 
100  def __init__(self, filename, date, data):
101 
102  self.filenamefilename = filename
103  self.datedate = date
104  self.datadata = data
105 
106  return
107 
108 
109 class IODA(object):
110 
111  def __init__(self, filename, date, varName, obsList):
112  '''
113  Initialize IODA writer class,
114  transform to IODA data structure and,
115  write out to IODA file.
116  '''
117 
118  self.filenamefilename = filename
119  self.datedate = date
120 
121  self.locKeyListlocKeyList = [
122  ("latitude", "float"),
123  ("longitude", "float"),
124  ("depth", "float"),
125  ("datetime", "string")
126  ]
127 
128  self.AttrDataAttrData = {
129  'odb_version': 1,
130  'date_time_string': self.datedate.strftime("%Y-%m-%dT%H:%M:%SZ")
131  }
132 
133  # Skip out if there are no obs!
134  totalObs = 0
135  for obs in obsList:
136  if obs.data['nobs'] <= 0:
137  continue
138  totalObs += obs.data['nobs']
139  if totalObs == 0:
140  print('No %s observations for IODA!' % varName)
141  return
142 
143  self.writerwriter = iconv.NcWriter(self.filenamefilename, self.locKeyListlocKeyList)
144 
145  self.keyDictkeyDict = DefaultOrderedDict(lambda: DefaultOrderedDict(dict))
146 
147  self.keyDictkeyDict[varName]['valKey'] = varName, self.writerwriter.OvalName()
148  self.keyDictkeyDict[varName]['errKey'] = varName, self.writerwriter.OerrName()
149  self.keyDictkeyDict[varName]['qcKey'] = varName, self.writerwriter.OqcName()
150 
151  # data is the dictionary containing IODA friendly data structure
152  self.datadata = DefaultOrderedDict(lambda: DefaultOrderedDict(dict))
153 
154  recKey = 0
155 
156  for obs in obsList:
157 
158  if obs.data['nobs'] <= 0:
159  continue
160 
161  for n in range(obs.data['nobs']):
162 
163  oval = obs.data['value'][n]
164  oerr = obs.data['oerr'][n]
165 
166  if discardOb(varName, oval):
167  continue
168 
169  lat = obs.data['lat'][n]
170  lon = obs.data['lon'][n]
171  lvl = obs.data['depth'][n]
172 
173  locKey = lat, lon, lvl, obs.date.strftime("%Y-%m-%dT%H:%M:%SZ")
174 
175  valKey = self.keyDictkeyDict[varName]['valKey']
176  errKey = self.keyDictkeyDict[varName]['errKey']
177  qcKey = self.keyDictkeyDict[varName]['qcKey']
178 
179  self.datadata[recKey][locKey][valKey] = oval
180  self.datadata[recKey][locKey][errKey] = oerr
181  self.datadata[recKey][locKey][qcKey] = 0
182 
183  (ObsVars, LocMdata, VarMdata) = self.writerwriter.ExtractObsData(self.datadata)
184  self.writerwriter.BuildNetcdf(ObsVars, LocMdata, VarMdata, self.AttrDataAttrData)
185 
186  return
187 
188 
189 def separateObs(obsList):
190 
191  obsDict = {}
192  for key in obsIdDict.keys():
193 
194  obsListKey = []
195 
196  for obs in obsList:
197 
198  date = obs.date
199  filename = obs.filename
200 
201  ind = np.where(obs.data['typ'] == key)
202 
203  data = {}
204  data['nobs'] = len(ind[0])
205  data['typ'] = obs.data['typ'][ind]
206  data['lon'] = obs.data['lon'][ind]
207  data['lat'] = obs.data['lat'][ind]
208  data['depth'] = obs.data['depth'][ind]
209  data['value'] = obs.data['value'][ind]
210  data['oerr'] = obs.data['oerr'][ind]
211 
212  obsListKey.append(refGMAOobs(filename, date, data))
213 
214  obsDict[key] = obsListKey
215 
216  return obsDict
217 
218 
219 def sortDict(obsDictIn):
220 
221  # Flip the obsIdDict
222  obsIdDictFlipped = flipDict(obsIdDict)
223 
224  obsDictOut = {}
225 
226  # Loop over flipped obsIdDict
227  for key, values in obsIdDictFlipped.items():
228 
229  obsList = []
230  for value in values:
231  obsList.append(obsDictIn[value])
232 
233  # Flatten the newly created list of lists
234  obsDictOut[key] = [item for sublist in obsList for item in sublist]
235 
236  return obsDictOut
237 
238 
239 def discardOb(varName, obsValue):
240 
241  discardOb = True
242 
243  if varName in ["sea_water_salinity"]:
244  if 0. <= obsValue <= 50.:
245  discardOb = False
246  elif varName in ["sea_water_temperature", "sea_surface_temperature"]:
247  if -2. <= obsValue <= 100.:
248  discardOb = False
249  elif varName in ["obs_absolute_dynamic_topography"]:
250  if -5. <= obsValue <= 5.:
251  discardOb = False
252  elif varName in ["sea_ice_area_fraction"]:
253  if 0. <= obsValue <= 1.:
254  discardOb = False
255  elif varName in ["sea_ice_thickness"]:
256  discardOb = False
257  else:
258  raise SystemExit("Unknown observation variable %s" % varName)
259 
260  return discardOb
261 
262 
263 def main():
264 
265  parser = ArgumentParser(
266  description=__doc__,
267  formatter_class=ArgumentDefaultsHelpFormatter)
268  parser.add_argument(
269  '-i', '--input', help='name of the input GMAO ocean obs file(s)',
270  type=str, nargs='+', required=True)
271  parser.add_argument(
272  '-o', '--output', help='template name of the output IODA file (one per type)',
273  type=str, required=True)
274  parser.add_argument(
275  '-d', '--date', help='datetime at the middle of the window', metavar='YYYYMMDDHH',
276  type=str, required=True)
277  parser.add_argument(
278  '--inputdates', help='dates of the input GMAO ocean obs file(s)',
279  type=str, nargs='+', required=False, metavar='YYYYMMDDHH')
280 
281  args = parser.parse_args()
282 
283  fList = args.input
284  dList = args.inputdates
285  foutput = args.output
286  fdate = datetime.strptime(args.date, '%Y%m%d%H')
287 
288  if dList:
289  assert len(dList) == len(fList)
290  dList = [datetime.strptime(d, '%Y%m%d%H') for d in dList]
291  else:
292  dList = [fdate] * len(fList)
293 
294  obsList = []
295  for fname, idate in zip(fList, dList):
296  obsList.append(GMAOobs(fname, idate))
297 
298  obsDict = separateObs(obsList)
299 
300  obsDictSorted = sortDict(obsDict)
301 
302  for key, value in varDict.items():
303  fout = '%s_%s.nc' % (foutput, value)
304  IODA(fout, fdate, key, obsDictSorted[key])
305 
306 
307 if __name__ == '__main__':
308  main()
def __init__(self, filename, date)
def __init__(self, filename, date, varName, obsList)
def __init__(self, filename, date, data)
def separateObs(obsList)
def discardOb(varName, obsValue)
def flipDict(dictIn)
def sortDict(obsDictIn)