IODA Bundle
ioda_conv_ncio.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 import datetime as dt
4 import math
5 import sys
6 from collections import OrderedDict
7 
8 import numpy as np
9 import netCDF4
10 from netCDF4 import Dataset
11 
12 ###################################################################################
13 # SUBROUTINES
14 ###################################################################################
15 
16 
17 def WriteNcVar(Gfid, Ofid, OutDest, Vname, Vdtype, Vdims, Vvals):
18  ###############################################################
19  # This method will write out the variable into the appropriate
20  # netcdf files.
21  #
22  if (OutDest == 'G' or OutDest == 'B'):
23  # write to geo file
24  Ovar = Gfid.createVariable(Vname, Vdtype, Vdims)
25  Ovar[...] = Vvals[...]
26 
27  if (OutDest == 'O' or OutDest == 'B'):
28  # write to obs file
29  Ovar = Ofid.createVariable(Vname, Vdtype, Vdims)
30  Ovar[...] = Vvals[...]
31 
32 
33 def CharVectorToString(CharVec):
34  # Need to do this differently given the Python version (2 vs 3)
35  #
36  # sys.version_info[0] yeilds the Python major version number.
37  if (sys.version_info[0] == 2):
38  String = CharVec.tostring().rstrip('\x00')
39  else:
40  String = str(CharVec.tostring(), 'utf-8').rstrip('\x00')
41 
42  return String
43 
44 
45 ###################################################################################
46 # CLASSES
47 ###################################################################################
48 
49 class NcWriter(object):
50  # Constructor
51  def __init__(self, NcFname, LocKeyList, TestKeyList=None):
52 
53  # Variable names of items in the record key
54 
55  # Variable names of items in the location key
56  self._loc_key_list_loc_key_list = LocKeyList
57 
58  # Variable names of items in the TestReference key
59  self._test_key_list_test_key_list = TestKeyList
60 
61  # Names assigned to obs values, error estimates and qc marks
62  self._oval_name_oval_name = "ObsValue"
63  self._oerr_name_oerr_name = "ObsError"
64  self._oqc_name_oqc_name = "PreQC"
65 
66  # Names assigned to obs bias terms and predoctirs related to observations
67  self._obiasterm_name_obiasterm_name = "GsiObsBiasTerm"
68  self._obiaspred_name_obiaspred_name = "GsitObsBiasPredictor"
69 
70  # Names assigned to dimensions
71  self._nlocs_dim_name_nlocs_dim_name = 'nlocs'
72  self._nvars_dim_name_nvars_dim_name = 'nvars'
73  self._nstr_dim_name_nstr_dim_name = 'nstring'
74  self._ndatetime_dim_name_ndatetime_dim_name = 'ndatetime'
75 
76  # Dimension sizes
77  self._nlocs_nlocs = 0
78  self._nvars_nvars = 0
79  self._nstring_nstring = 50
80  self._ndatetime_ndatetime = 20
81 
82  # default fill values
83  self._defaultF4_defaultF4 = np.float32(netCDF4.default_fillvals['f4'])
84  self._defaultI4_defaultI4 = np.int32(netCDF4.default_fillvals['i4'])
85 
86  # Names assigned to record number (location metadata)
87  self._rec_num_name_rec_num_name = "record_number"
88 
89  # Names assigned to variable metadata
90  self._var_list_name_var_list_name = "variable_names"
91 
92  # Names for metadata groups
93  self._loc_md_name_loc_md_name = "MetaData"
94  self._var_md_name_var_md_name = "VarMetaData"
95  self._test_md_name_test_md_name = "TestReference"
96 
97  # Reference date time
98  self._ref_date_time_ref_date_time = dt.datetime(1, 1, 1, 0)
99 
100  # Open the netcdf file for writing
101  self._fid_fid = Dataset(NcFname, 'w', format='NETCDF4')
102 
103  # Destructor
104  def __del__(self):
105  # Close the netcdf file
106  self._fid_fid.close()
107 
108  # Methods
109 
110  def OvalName(self):
111  return self._oval_name_oval_name
112 
113  def ObiastermName(self):
114  return self._obiasterm_name_obiasterm_name
115 
116  def ObiaspredName(self):
117  return self._obiaspred_name_obiaspred_name
118 
119  def OerrName(self):
120  return self._oerr_name_oerr_name
121 
122  def OqcName(self):
123  return self._oqc_name_oqc_name
124 
125  def NumpyToNcDtype(self, NumpyDtype):
126  ############################################################
127  # This method converts the numpy data type to the
128  # corresponding netcdf datatype
129 
130  if (NumpyDtype == np.dtype('float64')):
131  NcDtype = 'f4' # convert double to float
132  elif (NumpyDtype == np.dtype('float32')):
133  NcDtype = 'f4'
134  elif (NumpyDtype == np.dtype('int64')):
135  NcDtype = 'i4' # convert long to int
136  elif (NumpyDtype == np.dtype('int32')):
137  NcDtype = 'i4'
138  elif (NumpyDtype == np.dtype('int16')):
139  NcDtype = 'i4'
140  elif (NumpyDtype == np.dtype('int8')):
141  NcDtype = 'i4'
142  elif (NumpyDtype == np.dtype('S1')):
143  NcDtype = 'c'
144  elif (NumpyDtype == np.dtype('U1')):
145  NcDtype = 'c'
146  elif (NumpyDtype == np.dtype('S32')):
147  NcDtype = 'c'
148  else:
149  print("ERROR: Unrecognized numpy data type: ", NumpyDtype)
150  exit(-2)
151 
152  return NcDtype
153 
154  def NumpyToIodaDtype(self, NumpyDtype):
155  ############################################################
156  # This method converts the numpy data type to the
157  # corresponding ioda datatype
158 
159  if (NumpyDtype == np.dtype('float64')):
160  IodaDtype = 'float' # convert double to float
161  elif (NumpyDtype == np.dtype('float32')):
162  IodaDtype = 'float'
163  elif (NumpyDtype == np.dtype('int64')):
164  IodaDtype = 'integer' # convert long to int
165  elif (NumpyDtype == np.dtype('int32')):
166  IodaDtype = 'integer'
167  elif (NumpyDtype == np.dtype('int16')):
168  IodaDtype = 'integer'
169  elif (NumpyDtype == np.dtype('int8')):
170  IodaDtype = 'integer'
171  elif (NumpyDtype == np.dtype('S1')):
172  IodaDtype = 'string'
173  else:
174  print("ERROR: Unrecognized numpy data type: ", NumpyDtype)
175  exit(-2)
176 
177  return IodaDtype
178 
179  def ConvertToTimeOffset(self, datestr_vec):
180  """
181  Convert from date strings in ioda reference format "YYYY-mm-ddTHH:MM:SSZ" to decimal hour offsets from reference datetime.
182 
183  Input: datestr_vec is an array of numpy chararrays, representing the raw bytes of the date string
184  Output: numpy dtype:'f4' array of time offsets in decimal hours from the reference date.
185  """
186  offset = np.zeros((self._nlocs_nlocs), dtype='f4')
187  ref_offset = math.floor(self._ref_date_time_ref_date_time.hour*3600. + self._ref_date_time_ref_date_time.minute*60. + self._ref_date_time_ref_date_time.second)/3600. # decimal hours
188  date_offset_cache = {} # Cache a map from bytes[0:10] to the corresponding date's offset from reference date
189  for i in range(len(datestr_vec)):
190  date_bytes = datestr_vec[i].tobytes()
191  date_offset = date_offset_cache.get(date_bytes[0:10])
192  if date_offset is None:
193  # There are at most 2 distinct dates per cycle for any window length < 24hrs
194  delta = dt.date(int(date_bytes[0:4]), int(date_bytes[5:7]), int(date_bytes[8:10])) - self._ref_date_time_ref_date_time.date()
195  date_offset = delta.total_seconds()/3600.
196  date_offset_cache[date_bytes[0:10]] = date_offset
197  t_offset = (int(date_bytes[11:13])*3600. + int(date_bytes[14:16])*60. + int(date_bytes[17:19]))/3600.
198  offset[i] = date_offset + (t_offset - ref_offset)
199  return offset
200 
201  def CreateNcVector(self, Nsize, Dtype):
202  ############################################################
203  # This method will create a numpy array (vector) of an
204  # appropriate type for writing into the output netcdf file.
205 
206  if (Dtype == "integer"):
207  Vector = np.zeros((Nsize), dtype='i4')
208  elif (Dtype == "float"):
209  Vector = np.zeros((Nsize), dtype='f4')
210  elif (Dtype == "string"):
211  Vector = np.chararray((Nsize, self._nstring_nstring))
212  elif (Dtype == "datetime"):
213  Vector = np.chararray((Nsize, self._ndatetime_ndatetime))
214  else:
215  print("ERROR: Unrecognized vector data type: ", Dtype)
216  exit(-3)
217 
218  return Vector
219 
220  def FillNcVector(self, Values, Dtype):
221  ############################################################
222  # This method will fill a numpy array (vector) that will
223  # be used for writing into the output netcdf file.
224 
225  if (Dtype == "integer"):
226  Vector = Values
227  elif (Dtype == "float"):
228  Vector = Values
229  elif (Dtype == "string"):
230  StringType = "S{0:d}".format(self._nstring_nstring)
231  s = np.array(Values, StringType)
232  try:
233  Vector = netCDF4.stringtochar(s)
234  except (UnicodeEncodeError, UnicodeDecodeError) as e:
235  Vector = netCDF4.stringtochar(s, encoding='bytes')
236  elif (Dtype == "datetime"):
237  StringType = "S{0:d}".format(self._ndatetime_ndatetime)
238  Vector = netCDF4.stringtochar(np.array(Values, StringType))
239  else:
240  print("ERROR: Unrecognized vector data type: ", Dtype)
241  exit(-3)
242 
243  return Vector
244 
245  def WriteNcAttr(self, AttrData):
246  ############################################################
247  # This method will dump out the netcdf global attribute data
248  # contained in the dictionary AttrData.
249 
250  self._fid_fid.setncattr(self._nvars_dim_name_nvars_dim_name, np.int32(self._nvars_nvars))
251  self._fid_fid.setncattr(self._nlocs_dim_name_nlocs_dim_name, np.int32(self._nlocs_nlocs))
252 
253  for Aname, Aval in AttrData.items():
254  if (Aname == "date_time_string"):
255  # Save the datetime object in this object and convert
256  # to integer representation for the netcdf file
257  self._ref_date_time_ref_date_time = dt.datetime.strptime(Aval, "%Y-%m-%dT%H:%M:%SZ")
258  refDateTime = self._ref_date_time_ref_date_time.year * 1000000
259  refDateTime += self._ref_date_time_ref_date_time.month * 10000
260  refDateTime += self._ref_date_time_ref_date_time.day * 100
261  refDateTime += self._ref_date_time_ref_date_time.hour
262  self._fid_fid.setncattr("date_time", np.int32(refDateTime))
263  else:
264  self._fid_fid.setncattr(Aname, Aval)
265 
266  def WriteNcObsVars(self, ObsVars, VarMdata, VarUnits):
267  ############################################################
268  # This method will create dimensions and variables in the
269  # output netcdf file for the obs variables.
270 
271  # Dimensions used by any group can be placed at in the top
272  # level (root) group. This is convenient if we decide to
273  # rearrange the group structure.
274  self._fid_fid.createDimension(self._nvars_dim_name_nvars_dim_name, self._nvars_nvars)
275  self._fid_fid.createDimension(self._nlocs_dim_name_nlocs_dim_name, self._nlocs_nlocs)
276  self._fid_fid.createDimension(self._nstr_dim_name_nstr_dim_name, self._nstring_nstring)
277  self._fid_fid.createDimension(self._ndatetime_dim_name_ndatetime_dim_name, self._ndatetime_ndatetime)
278 
279  for VarKey, Vvals in ObsVars.items():
280  (Vname, Gname) = VarKey
281  NcVname = "{0:s}@{1:s}".format(Vname, Gname)
282 
283  self._fid_fid.createVariable(NcVname, self.NumpyToNcDtypeNumpyToNcDtype(Vvals.dtype), (self._nlocs_dim_name_nlocs_dim_name))
284  self._fid_fid[NcVname][:] = Vvals
285  # add units
286  if Gname in ['ObsValue', 'ObsError', 'GsiHofX', 'GsiHofXBc',
287  'GsiHofXClr', 'GsiFinalObsError', 'GsiBc', 'GsiBcConst',
288  'GsiBcScanAng', 'GsiAdjustObsError', 'GsiFinalObsError', 'GsiObsBias']:
289  try:
290  self._fid_fid[NcVname].setncattr_string("units", VarUnits[Vname])
291  except KeyError:
292  pass
293 
294  def WriteNcMetadata(self, MdataGroup, DimName, Mdata, VarUnits):
295  ############################################################
296  # This method will create variables in the output netcdf
297  # file for the given metadata group.
298 
299  if (MdataGroup == self._loc_md_name_loc_md_name):
300  # Will be adding the time offset to the location metadata
301  ToffsetName = "time@{0:s}".format(MdataGroup)
302  self._fid_fid.createVariable(ToffsetName, "f4", (DimName))
303 
304  for Vname, Vvals in Mdata.items():
305  NcVname = "{0:s}@{1:s}".format(Vname, MdataGroup)
306  NcDtype = self.NumpyToNcDtypeNumpyToNcDtype(Vvals.dtype)
307  if (NcDtype == 'c'):
308  if (NcVname == "datetime@MetaData"):
309  self._fid_fid.createVariable(NcVname, NcDtype, (DimName, self._ndatetime_dim_name_ndatetime_dim_name))
310  else:
311  self._fid_fid.createVariable(NcVname, NcDtype, (DimName, self._nstr_dim_name_nstr_dim_name))
312  else:
313  self._fid_fid.createVariable(NcVname, NcDtype, (DimName))
314 
315  self._fid_fid[NcVname][:] = Vvals
316  # add units
317  try:
318  self._fid_fid[NcVname].setncattr_string("units", VarUnits[Vname])
319  except KeyError:
320  pass
321 
322  # If we are writing out the datetime string, then we also
323  # need to write out the time offset (from the reference date_time
324  # attribute value).
325  if (NcVname == "datetime@MetaData"):
326  ToffsetValues = self.ConvertToTimeOffsetConvertToTimeOffset(Vvals)
327  self._fid_fid[ToffsetName][:] = ToffsetValues
328  # self._fid[ToffsetName].setncattr_string("Units", "hours since "+self._ref_date_time.strftime("%Y-%m-%d %H:%M:%S")+" UTC")
329 
330  def ExtractObsData(self, ObsData):
331  ############################################################
332  # This method will extract information from the ObsData
333  # dictionary and reformat it into a more amenable form for
334  # writing into the output file.
335 
336  self._nvars_nvars = 0
337  self._nlocs_nlocs = 0
338 
339  VarNames = set()
340 
341  # Walk through the structure and get counts so arrays
342  # can be preallocated, and variable numbers can be assigned
343  ObsVarList = []
344  ObsVarExamples = []
345  VMName = []
346  VMData = {}
347  nrecs = 0
348  for RecKey, RecDict in ObsData.items():
349  nrecs += 1
350  for LocKey, LocDict in RecDict.items():
351  self._nlocs_nlocs += 1
352  for VarKey, VarVal in LocDict.items():
353  if (VarKey[1] == self._oval_name_oval_name):
354  VarNames.add(VarKey[0])
355  if (VarKey not in ObsVarList):
356  ObsVarList.append(VarKey)
357  ObsVarExamples.append(VarVal)
358  VarList = sorted(list(VarNames))
359  self._nvars_nvars = len(VarList)
360 
361  # Preallocate arrays and fill them up with data from the dictionary
362  ObsVars = OrderedDict()
363  for o in range(len(ObsVarList)):
364  VarType = type(ObsVarExamples[o])
365  if (VarType in [float, np.float32, np.float64]):
366  defaultval = self._defaultF4_defaultF4
367  elif (VarType in [int, np.int64, np.int32, np.int8]):
368  defaultval = self._defaultI4_defaultI4 # convert long to int
369  elif (VarType in [str, np.str_]):
370  defaultval = ''
371  elif (VarType in [np.ma.core.MaskedConstant]):
372  # If we happened to pick an invalid value (inf, nan, etc.) from
373  # a masked array, then the type is a MaskedConstant, and that implies
374  # floating point values.
375  defaultval = self._defaultF4_defaultF4
376  else:
377  print('Warning: VarType', VarType, ' not in float, int, str for:', ObsVarList[o])
378  continue
379  ObsVars[ObsVarList[o]] = np.full((self._nlocs_nlocs), defaultval, dtype=defaultval.dtype)
380 
381  LocMdata = OrderedDict()
382  for i in range(len(self._loc_key_list_loc_key_list)):
383  (LocVname, LocVtype) = self._loc_key_list_loc_key_list[i]
384  # if datetime was specified as as a "string" override to
385  # define it as a "datetime"
386  if LocVname == "datetime":
387  LocVtype = "datetime"
388  LocMdata[LocVname] = self.CreateNcVectorCreateNcVector(self._nlocs_nlocs, LocVtype)
389  LocMdata[self._rec_num_name_rec_num_name] = self.CreateNcVectorCreateNcVector(self._nlocs_nlocs, "integer")
390 
391  VarMdata = OrderedDict()
392  VarMdata[self._var_list_name_var_list_name] = self.CreateNcVectorCreateNcVector(self._nvars_nvars, "string")
393  VarMdata[self._var_list_name_var_list_name] = self.FillNcVectorFillNcVector(VarList, "string")
394 
395  RecNum = 0
396  LocNum = 0
397  for RecKey, RecDict in ObsData.items():
398  RecNum += 1
399 
400  # Exract record metadata encoded in the keys
401  for LocKey, LocDict in RecDict.items():
402  LocNum += 1
403 
404  # Extract the locations metadata encoded in the keys
405  for i in range(len(self._loc_key_list_loc_key_list)):
406  (LocVname, LocVtype) = self._loc_key_list_loc_key_list[i]
407 
408  # if datetime was specified as as a "string" override to
409  # define it as a "datetime"
410  if LocVname == "datetime":
411  LocVtype = "datetime"
412 
413  LocMdata[LocVname][LocNum-1] = self.FillNcVectorFillNcVector(LocKey[i], LocVtype)
414  LocMdata[self._rec_num_name_rec_num_name][LocNum-1] = RecNum
415 
416  for VarKey, VarVal in LocDict.items():
417  if (type(VarVal) in [np.ma.core.MaskedConstant]):
418  VarVal = self._defaultF4_defaultF4
419  ObsVars[VarKey][LocNum-1] = VarVal
420 
421  return (ObsVars, LocMdata, VarMdata)
422 
423  def BuildNetcdf(self, ObsVars, LocMdata, VarMdata, AttrData, VarUnits={}, TestData=None):
424  ############################################################
425  # This method will create an output netcdf file, and dump
426  # the contents of the ObsData dictionary into that file.
427  #
428  # The structure of the output file is:
429  #
430  # global attributes:
431  # nvars
432  # nlocs
433  # contents of AttrData dictionary
434  #
435  # group, variable structure:
436  # /ObsValue
437  # 2D float array, nvars X nlocs
438  # Observation values
439  # /ObsError
440  # 2D float array, nvars X nlocs
441  # Observation errors
442  # /PreQC
443  # 2D integer array, nvars X nlocs
444  # Observation QC marks
445  # /VarMetaData/
446  # Group holding 1D vectors of various data types, nvars
447  # channel_number
448  # channel_frequency
449  # etc.
450  # /LocMetaData/
451  # Group holding 1D vectors of various data types, nlocs
452  # latitude
453  # longitude
454  # datetime
455  # /VarUnits/
456  # Dictionary holding string of units for each variable/metadata item
457  # Key: variable, Value: unit_string
458  # Example: VarUnits['air_temperature'] = 'K'
459  # /TestData/
460  # Group holding 1D vectors of various data types, nlocs
461  # cloud_liquid_water_column_retrieved_from_observations for example
462 
463  # Write out the global attributes followed by the different
464  # data sections: Obs variables, Record metadata, locations metadata
465  # and variable metadata.
466  self.WriteNcAttrWriteNcAttr(AttrData)
467  self.WriteNcObsVarsWriteNcObsVars(ObsVars, VarMdata, VarUnits)
468 
469  self.WriteNcMetadataWriteNcMetadata(self._loc_md_name_loc_md_name, self._nlocs_dim_name_nlocs_dim_name, LocMdata, VarUnits)
470  self.WriteNcMetadataWriteNcMetadata(self._var_md_name_var_md_name, self._nvars_dim_name_nvars_dim_name, VarMdata, VarUnits)
471  if TestData is not None:
472  self.WriteNcMetadataWriteNcMetadata(self._test_md_name_test_md_name, self._nlocs_dim_name_nlocs_dim_name, TestData, VarUnits)
def BuildNetcdf(self, ObsVars, LocMdata, VarMdata, AttrData, VarUnits={}, TestData=None)
def ConvertToTimeOffset(self, datestr_vec)
def __init__(self, NcFname, LocKeyList, TestKeyList=None)
def NumpyToNcDtype(self, NumpyDtype)
_nvars
This method will extract information from the ObsData dictionary and reformat it into a more amenable...
def FillNcVector(self, Values, Dtype)
def ExtractObsData(self, ObsData)
def CreateNcVector(self, Nsize, Dtype)
def NumpyToIodaDtype(self, NumpyDtype)
def WriteNcObsVars(self, ObsVars, VarMdata, VarUnits)
def WriteNcAttr(self, AttrData)
def WriteNcMetadata(self, MdataGroup, DimName, Mdata, VarUnits)
_ref_date_time
This method will dump out the netcdf global attribute data contained in the dictionary AttrData.
def CharVectorToString(CharVec)
def WriteNcVar(Gfid, Ofid, OutDest, Vname, Vdtype, Vdims, Vvals)
SUBROUTINES.