IODA Bundle
bufr2ncObsTypes.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 from __future__ import print_function
4 import numpy as np
5 import sys
6 import re
7 import netCDF4
8 import struct
9 import datetime as dt
10 import bufr2ncCommon as cm
11 
12 ############################################################################
13 # SUBROUTINES
14 ############################################################################
15 
16 
17 def BufrFloatToActual(Bval, Dtype):
18  # This routine will extract the value of a variable from the
19  # output of read_subset(). read_subset() will return a floating point
20  # number (for any type) or an empty list if the mnemonic didn't exist. For
21  # strings(Dtype = DTYPE_STRING) read the floating point number as
22  # characters. Otherwise convert to integer or leave alone.
23  #
24  # Keep Dtype values in sync with entries in the DATA_TYPES dictionary. For
25  # now, these values are DTYPE_STRING, DTYPE_INTEGER, DTYPE_FLOAT,
26  # DTYPE_DOUBLE.
27 
28  # If the incoming Bval is empty, then return an empty masked array
29  # value so that writing process can skip this value if Bval was empty.
30  if (Bval.size == 0):
31  # Bval is empty so return an empty Dval.
32  Dval = np.ma.array([])
33  else:
34  # Bval is not empty. Convert the Bval data to the appropriate type, and
35  # return another masked array with the proper data and mask.
36 
37  if (Dtype == cm.DTYPE_STRING):
38  # TODO This is not generic enough to accomodate all the strings.
39  #
40  # convert to list of strings
41  # assume that an ID is a 1D array of float with only one
42  # entry
43  #
44  # The bytes.join().decode() method wants the byte values
45  # < 127 so that they can be mapped to the old style ascii
46  # character set. In order to accommodate this, unpack the
47  # float value into bytes. Then check the bytes and replace
48  # all values > 127 with a blank character. Then convert
49  # the byte lists to strings. Replace byte value
50  # equal to zero with a blank as well.
51  ByteList = list(struct.unpack('8c', Bval))
52 
53  # replace chars < 1 and > 127 with blank space
54  for j in range(len(ByteList)):
55  ByteVal = struct.unpack('@B', ByteList[j])[0]
56  if ((ByteVal < 1) or (ByteVal > 127)):
57  ByteList[j] = b' '
58 
59  TempStr = bytes.join(b'', ByteList).decode('ascii')
60  Dval = np.ma.array(TempStr, mask=Bval.mask, dtype='S8')
61  elif (Dtype == cm.DTYPE_INTEGER):
62  # convert to integer
63  Dval = np.ma.array(Bval.data.astype(np.int32),
64  mask=Bval.mask, dtype=np.int32)
65  elif (Dtype == cm.DTYPE_FLOAT):
66  # copy floats
67  Dval = np.ma.array(Bval.data.astype(np.float32),
68  mask=Bval.mask, dtype=np.float32)
69  elif (Dtype == cm.DTYPE_DOUBLE):
70  # copy doubles
71  Dval = np.ma.array(Bval.data.astype(np.float64),
72  mask=Bval.mask, dtype=np.float64)
73  else:
74  Dval = np.ma.array(Bval.data.astype(np.float32),
75  mask=Bval.mask, dtype=np.float32)
76 
77  # Squeeze the array since read_subset can return size 1 dimensions (eg.
78  # nlevs).
79  return Dval.squeeze()
80 
81 
82 def WriteNcVar(Fid, ObsNum, Vname, Vdata, isProfile=False):
83  # This routine will write into a variable in the output netCDF file
84 
85  # For the string data, convert to a numpy character array
86  if ((Vdata.dtype.char == 'S') or (Vdata.dtype.char == 'U')):
87  StrSpec = "S{0:d}".format(cm.MAX_STRING_LEN)
88  Value = netCDF4.stringtochar(Vdata.astype(StrSpec))
89  else:
90  Value = Vdata.copy()
91 
92  # At this point, the dimension sizes of the netcdf variable (NcVar) and
93  # Value need to get aligned. For some dimensions, the NcVar dimension size
94  # will tend to be larger than the Value dimension size (eg, nlevs). For
95  # other dimensions, it will be the other way around (eg, nevents). The one
96  # thing that can be counted on is that the list of dimensions will match
97  # between NcVar and Value, except that NcVar will have nlevs as an extra
98  # dimension, and nlocs will be the first in the dimension list.
99  # For example, if you have a multi-level event:
100  #
101  # Value dimensions will be [ nlevs, nevents ]
102  # Ncvar dimensions will be [ nlocs, nlevs, nevents ]
103  #
104  # This means that to reconcile the sizes of each dimension, we need to
105  # slice out of the minimum sizes of the corresponding dimension of NcVar
106  # and Value.
107  # Using the example above, for a multi-level event:
108  #
109  # Value dimensions are [ 51, 255 ]
110  # NcVar dimensions are [ 1000, 255, 20 ]
111  #
112  # nlevs size for Value is 51, for NcVar is 255 so use 51 for the slicing
113  # nevents size for Value is 255, for NcVar is 20 so use 20 for the
114  # slicing. The assignment then becomes
115  #
116  # NcVar[ObsNum, 0:51, 0:20] = Value[0:51, 0:20]
117  #
118  # Figure out how to do the slicing by looking at the number of dimensions
119  # at both Value (masked array) and NcVar (netcdf array). A masked array
120  # that gets built using a scalar data value will return 0 for the number
121  # of dimensions.
122  #
123  # It is possible to get single level values from a multiple level
124  # obs type (such as sondes). In this case the levels dimension will
125  # be squeezed away from Value. Handle this case by inserting Value
126  # into the first element of the nlevs dimension. Note that there is
127  # an assumption that nlevs is the first dimension of Value and
128  # the second dimension of NcVar.
129 
130  NcVar = Fid[Vname]
131  ValNdim = Value.ndim
132  NcNdim = NcVar.ndim
133  if (NcNdim == 1):
134  # No need for slicing. Value is either a scalar or a
135  # 1D array with a single element
136  if isProfile:
137  if Value.shape == ():
138  NcVar[ObsNum] = Value
139  else:
140  NcVar[ObsNum:ObsNum+Value.shape[0]] = Value
141  else:
142  NcVar[ObsNum] = Value
143  elif (NcNdim == 2):
144  if (ValNdim == 0):
145  # Value is a scalar (representing a single level value)
146  # NcVar has two dimensions (eg, [nlocs,nlevs])
147  NcVar[ObsNum, 0] = Value
148  else:
149  # Value has one dimension (eg, [nlevs])
150  # NcVar has two dimensions (eg, [nlocs,nlevs])
151  if isProfile:
152  N1 = NcVar.shape[1]
153  if len(Value.shape) == 1:
154  NcVar[ObsNum, 0:N1] = Value[0:N1]
155  else:
156  for i in range(Value.shape[0]):
157  NcVar[(ObsNum+i), 0:N1] = Value[0:N1]
158  else:
159  N1 = min(Value.shape[0], NcVar.shape[1])
160  # N1 = Value.shape[0]
161  NcVar[ObsNum, 0:N1] = Value[0:N1]
162  elif (NcNdim == 3):
163  if (ValNdim == 1):
164  # Value has one dimension and is single level (eg, [nevents])
165  # NcVar has three dimensions (eg, [nlocs,nlevs,nevents])
166  N2 = min(Value.shape[0], NcVar.shape[2])
167  NcVar[ObsNum, 0, 0:N2] = Value[0:N2]
168  else:
169  # Value has two dimensions (eg, [nlevs,nevents])
170  # NcVar has three dimensions (eg, [nlocs,nlevs,nevents])
171  N1 = min(Value.shape[0], NcVar.shape[1])
172  N2 = min(Value.shape[1], NcVar.shape[2])
173  NcVar[ObsNum, 0:N1, 0:N2] = Value[0:N1, 0:N2]
174  elif (NcNdim == 4):
175  if (ValNdim == 2):
176  # Value has two dimensions and is single level (eg,
177  # [nstring,nevents])
178  # NcVar has four dimensions (eg, [nlocs,nlevs,nstring,nevents])
179  N2 = min(Value.shape[0], NcVar.shape[2])
180  N3 = min(Value.shape[1], NcVar.shape[3])
181  NcVar[ObsNum, 0, 0:N2, 0:N3] = Value[0:N2, 0:N3]
182  else:
183  # Value has three dimensions (eg, [nlevs,nstring,nevents])
184  # NcVar has four dimensions (eg, [nlocs,nlevs,nstring,nevents])
185  N1 = min(Value.shape[0], NcVar.shape[1])
186  N2 = min(Value.shape[1], NcVar.shape[2])
187  N3 = min(Value.shape[2], NcVar.shape[3])
188  NcVar[ObsNum, 0:N1, 0:N2, 0:N3] = Value[0:N1, 0:N2, 0:N3]
189 
190 ############################################################################
191 # CLASSES
192 ############################################################################
193 
194 # The BUFR format is extremely flexible, and different obs types have taken
195 # advantage of that fact. This has resulted in the requirement of utilizing
196 # different algorithms to extract obs data for different obs types. Ie, it's
197 # extremely difficult to force the different formats into a common algorithm.
198 # Using a base class with a simple extraction algorithm which can be overridden
199 # in a derived class seems to be a good way to handle this situation.
200 #
201 # For the extraction, it does appear that many obs types will place a header
202 # at the front of an BUFR subset which consists of a simple list of BUFR
203 # mnemonics. The header is followed by the obs data which can be a simple
204 # list of mnemonics, but typically is a more complex structure with
205 # replications, sequences and events. The header extraction algorithm can
206 # (for now) belong in the base class, and the obs data extraction algorithms
207 # can belong in the derived classes (ie, specific to each obs type).
208 #
209 # Define the base class with a simple method that assumes all variables have a
210 # one-to-one corrspondence with a BUFR mnemonic. More complex examples can
211 # override the convert() method or its sub-methods.
212 #
213 # The format for an entry in the *_spec lists is:
214 #
215 # [ nc_varname, mnemonic, data_type, dim_names, dim_sizes, created ]
216 #
217 # nc_varname: netcdf variable name
218 # mnemonic: BUFR mnemonic
219 # data_type: float, integer, string, ...
220 # dim_names: (list of dimension names)
221 # dim_sizes: (list of dimension sizes)
222 # created: flag: True - nc variable has been created
223 # False - nc variable has not been created
224 #
225 
226 # ############################### Base Observation Type ##################
227 
228 
229 class ObsType(object):
230  # # initialize data elements ###
231  def __init__(self):
232  self.bufr_ftypebufr_ftype = cm.BFILE_UNDEF
233 
234  # Variables for message selector
235  self.mtype_remtype_re = 'UnDef'
236  self.max_num_msgmax_num_msg = 0
237  self.thin_intervalthin_interval = 1
238  self.num_msg_selectednum_msg_selected = 0
239  self.num_msg_mtypenum_msg_mtype = 0
240 
241  # Keep this list of dimensions in sync with the if statment structure
242  # in the init_dim_spec() method.
243  self.nlocsnlocs = -1
244  self.nlevsnlevs = cm.MAX_LEVELS
245  self.neventsnevents = cm.MAX_EVENTS
246  self.nstringnstring = cm.MAX_STRING_LEN
247  self.nchansnchans = -1
248  self.nrecsnrecs = 1
249  self.nvarsnvars = -1
250 
251  self.int_specint_spec = []
252  self.evn_specevn_spec = []
253  self.rep_specrep_spec = []
254  self.seq_specseq_spec = []
255  self.dim_specdim_spec = []
256  self.misc_specmisc_spec = [
257  [['msg_type@MetaData', '', cm.DTYPE_STRING, ['nlocs', 'nstring'], [self.nlocsnlocs, self.nstringnstring]],
258  ['msg_date@MetaData', '', cm.DTYPE_UINT, ['nlocs'], [self.nlocsnlocs]]]
259  ]
260 
261  # # methods ###
262 
263  ##########################################################################
264  # This method will set the number of observations. This must be called
265  # before attempting to create any netcdf variables since self.nlocs
266  # is also used to define the dimension sizes in all of the netcdf
267  # variables.
268  def set_nlocs(self, nlocs):
269  # update the data memeber
270  self.nlocsnlocs = nlocs
271 
272  # update the dimension sizes in the specs
273  #
274  # each spec is a list of variable specs
275  # each variable spec is a list with the fourth item being a list of
276  # dimension names and the fifth item being a list of dimension sizes
277  #
278  # for every place in the dimension name list where the name is 'nlocs',
279  # replace the corresponding size in the size list with self.nlocs
280  for slist in [self.int_specint_spec, self.evn_specevn_spec, self.rep_specrep_spec, self.seq_specseq_spec,
281  self.dim_specdim_spec, self.misc_specmisc_spec]:
282  for sub_slist in slist:
283  for var_spec in sub_slist:
284  for i in [j for j, dname in enumerate(
285  var_spec[3]) if dname == 'nlocs']:
286  var_spec[4][i] = self.nlocsnlocs
287 
288  ##########################################################################
289  # This method is a default routine for counting the number of observations
290  # in the current BUFR message. The default number of observations is simply
291  # the number of subsets in this message. The reason that this is a method
292  # in the base class is so that derived classes (GpsroObsType, for example)
293  # can override this method with a more complex algorithm.
294  def msg_obs_count(self, bufr):
295  return bufr._subsets()
296 
297  ##########################################################################
298  # This method will set the dimension specs (data memeber self.dim_spec).
299  # The format for the dim_spec will match that of the other specs (eg,
300  # self.int_spec).
301  def init_dim_spec(self):
302  # Do a union on all of the dimension names.
303  AllDimNames = set([])
304  for slist in [self.int_specint_spec, self.evn_specevn_spec, self.rep_specrep_spec,
305  self.seq_specseq_spec, self.misc_specmisc_spec]:
306  for sub_slist in slist:
307  for var_spec in sub_slist:
308  # dimension names are in the list given by var_spec[3]
309  AllDimNames = AllDimNames | set(var_spec[3])
310 
311  # AllDimNames holds the list of unique dimension names.
312  # Keep the following list of dimensions in sync with the
313  # __init__ method in the ObsType base class.
314  DimList = []
315  for dname in AllDimNames:
316  if (dname == 'nlocs'):
317  dsize = self.nlocsnlocs
318  elif (dname == 'nlevs'):
319  dsize = self.nlevsnlevs
320  elif (dname == 'nevents'):
321  dsize = self.neventsnevents
322  elif (dname == 'nstring'):
323  dsize = self.nstringnstring
324  elif (dname == 'nchans'):
325  dsize = self.nchansnchans
326  elif (dname == 'nrecs'):
327  dsize = self.nrecsnrecs
328  elif (dname == 'nvars'):
329  dsize = self.nvarsnvars
330  else:
331  print(
332  "ERROR: init_dim_spec: Unknown dimension name: {0:s}".format(dname))
333  sys.exit(3)
334  DimList.append([dname, dname, cm.DTYPE_UINT, [dname], [dsize]])
335 
336  self.dim_specdim_spec = [DimList]
337 
338  ##########################################################################
339  # This method will create dimensions and variables in the netcdf file
340  # according to the obs type variable specs.
341  def create_nc_datasets(self, nc, isProfile=False):
342 
343  # Create dimensions first so that the variables can reference them.
344  nc.createDimension('nrecs', self.nrecsnrecs) # placeholder for now
345  nc.createDimension('nvars', self.nvarsnvars)
346  for sub_slist in self.dim_specdim_spec:
347  for dspec in sub_slist:
348  if isProfile:
349  if dspec[0] == "nlocs":
350  nc.createDimension(dspec[0], 0)
351  else:
352  nc.createDimension(dspec[0], dspec[4][0])
353  else:
354  nc.createDimension(dspec[0], dspec[4][0])
355  # Create variables including the coordinates for the dimensions
356  for slist in [self.dim_specdim_spec, self.int_specint_spec, self.evn_specevn_spec,
357  self.rep_specrep_spec, self.seq_specseq_spec, self.misc_specmisc_spec]:
358  for sub_slist in slist:
359  for var_spec in sub_slist:
360  Vname = var_spec[0]
361  Dtype = var_spec[2]
362  DimNames = var_spec[3]
363  DimSizes = var_spec[4]
364 
365  # Convert the data type code to a netCDF data type
366  if (Dtype == cm.DTYPE_STRING):
367  Vtype = 'S1'
368  elif (Dtype == cm.DTYPE_INTEGER):
369  Vtype = 'i4'
370  elif (Dtype == cm.DTYPE_UINT):
371  Vtype = 'u4'
372  elif (Dtype == cm.DTYPE_FLOAT):
373  Vtype = 'f4'
374  elif (Dtype == cm.DTYPE_DOUBLE):
375  Vtype = 'f8'
376  # Don't specify the chunk size. Since all of the dimensions
377  # are of fixed size, the built-in algorithm for calculating
378  # chunk sizes will do a good job.
379  nc.createVariable(Vname, Vtype, DimNames, zlib=True,
380  shuffle=True, complevel=6)
381 
382  ##########################################################################
383  # This method will fill in the dimension variables with coordinate values.
384  # For now, using dummy values which are 1..n where n is the variable size.
385  def fill_coords(self, nc):
386  for DimSpecs in self.dim_specdim_spec:
387  for VarSpec in DimSpecs:
388  Vname = VarSpec[0]
389  Value = np.arange(VarSpec[4][0]) + 1
390  nc[Vname][:] = Value
391 
392  ##########################################################################
393  # This method will read in a list of bufr mnemonics and return a list of
394  # the corresponding data values.
395  def read_bufr_data(self, bufr, Mlists, Rflag=False,
396  Sflag=False, Eflag=False):
397  BufrValues = []
398 
399  # Mlists contains sub-lists of mnemonic names. Process each sub-list by
400  # reading all mnemonics in that sub-list in one call to read_subset().
401  for MnemonicList in Mlists:
402  Mstring = " ".join(MnemonicList)
403  BufrValues.append(bufr.read_subset(
404  Mstring, events=Eflag, seq=Sflag, rep=Rflag))
405 
406  return BufrValues
407 
408  ##########################################################################
409  # This method will convert bufr float data to the specified actual format.
410  # BufrValues is a list of masked arrays, where each masked array contains
411  # entries for all mnemonics in the sub-list of SpecList.
412  def bufr_float_to_actual(self, SpecList, BufrValues, ActualValues):
413  # Make a separate copy of the input dictionary
414  OutVals = {key: value for key, value in ActualValues.items()}
415  OutValsBufr = {key: value for key, value in ActualValues.items()}
416  for SubSpecs, SubBvals in zip(SpecList, BufrValues):
417  for VarSpec, Bval in zip(SubSpecs, SubBvals):
418  # Convert according to the spec, and add to the dictionary.
419  # Netcdf variable name is in VarSpec[0]
420  # Data type is in VarSpec[2]
421  if (VarSpec[1] != 'RRSTG'):
422  OutVals[VarSpec[0]] = BufrFloatToActual(Bval, VarSpec[2])
423  OutValsBufr[VarSpec[1]] = BufrFloatToActual(
424  Bval, VarSpec[2])
425 
426  return [OutVals, OutValsBufr]
427 
428  ##########################################################################
429  # This method will convert bufr float data to the specified actual format.
430  # BufrValues is a list of masked arrays, where each masked array contains
431  # entries for all mnemonics in the sub-list of SpecList.
433  self, SpecList, BufrValues, ActualValues, ActualValuesBufr):
434  # Make a separate copy of the input dictionary
435  OutVals = {key: value for key, value in ActualValues.items()}
436  OutValsBufr = {key: value for key, value in ActualValuesBufr.items()}
437  for SubSpecs, SubBvals in zip(SpecList, BufrValues):
438  for VarSpec, Bval in zip(SubSpecs, SubBvals):
439  # Convert according to the spec, and add to the dictionary.
440  # Netcdf variable name is in VarSpec[0]
441  # Data type is in VarSpec[2]
442  if (VarSpec[1] != 'RRSTG'):
443  OutVals[VarSpec[0]] = BufrFloatToActual(Bval, VarSpec[2])
444  OutValsBufr[VarSpec[1]] = BufrFloatToActual(
445  Bval, VarSpec[2])
446  return [OutVals, OutValsBufr]
447 
448  ##########################################################################
449  # This method will take the four input spec lists and read the mnemonics
450  # from the bufr file. This routine will also convert the bufr values to
451  # corresponding netcdf values. This method will return a dictionary keyed
452  # by the netcdf variable name containing the associated values.
453  #
454  # This method provides a defalut method that can be overridden by an obs
455  # type requiring a more complex algorithm (Gpsro, eg.). ActualValues is a
456  # list of dictionaries, and the default action is to create one item in
457  # that list.
458  # This single dictionary will be filled in by simply walking through the
459  # variables in the lists contained in int_spec, evn_spec, rep_spec and
460  # seq_spec, reading the mnemonics out of the BUFR file, and loading in the
461  # results into the single dictionary.
462  def extract_bufr(self, bufr):
463  # Initialize ActualValues to a list with one entry which is an empty
464  # dictionary.
465  ActualValues = []
466  ActualValues.append({})
467  ActualValuesBufr = []
468  ActualValuesBufr.append({})
469 
470  # Read and convert the individual data mnemonics. The mnemonic value
471  # is the second entry in the int_spec sublist elements.
472  Mlists = [[Mlist[1] for Mlist in SubList] for SubList in self.int_specint_spec]
473  BufrValues = self.read_bufr_dataread_bufr_data(bufr, Mlists)
474  [ActualValues[0], ActualValuesBufr[0]] = self.bufr_float_to_actual_bufrbufr_float_to_actual_bufr(
475  self.int_specint_spec, BufrValues,
476  ActualValues[0], ActualValuesBufr[0])
477 
478  # Read and convert the event mnemonics
479  Mlists = [[Mlist[1] for Mlist in SubList] for SubList in self.evn_specevn_spec]
480  BufrValues = self.read_bufr_dataread_bufr_data(bufr, Mlists, Eflag=True)
481  [ActualValues[0], ActualValuesBufr[0]] = self.bufr_float_to_actual_bufrbufr_float_to_actual_bufr(
482  self.evn_specevn_spec, BufrValues,
483  ActualValues[0], ActualValuesBufr[0])
484 
485  # Read and convert the replication mnemonics
486  Mlists = [[Mlist[1] for Mlist in SubList] for SubList in self.rep_specrep_spec]
487  BufrValues = self.read_bufr_dataread_bufr_data(bufr, Mlists, Rflag=True)
488  [ActualValues[0], ActualValuesBufr[0]] = self.bufr_float_to_actual_bufrbufr_float_to_actual_bufr(
489  self.rep_specrep_spec, BufrValues,
490  ActualValues[0], ActualValuesBufr[0])
491 
492  # Read and convert the sequence mnemonics
493  Mlists = [[Mlist[1] for Mlist in SubList] for SubList in self.seq_specseq_spec]
494  BufrValues = self.read_bufr_dataread_bufr_data(bufr, Mlists, Sflag=True)
495  [ActualValues[0], ActualValuesBufr[0]] = self.bufr_float_to_actual_bufrbufr_float_to_actual_bufr(
496  self.seq_specseq_spec, BufrValues,
497  ActualValues[0], ActualValuesBufr[0])
498 
499  return [ActualValues, ActualValuesBufr]
500 
501  ##########################################################################
502  # This method will calculate the absolute date and time values from the
503  # BUFR mnemonic values. The calculation depends on the type of BUFR file
504  # (raw BUFR or prepBUFR). For raw BUFR, the absolute observation time comes
505  # from the mnemonics:
506  # YEAR - year
507  # MNTH - month
508  # DAYS - day
509  # HOUR - hour
510  # MINU - minute
511  # SECO - second
512  #
513  # For prepBUFR, the time relative to msg_date is held in DHR, and for
514  # multi-level obs in HRDR.
515  # msg_date - Message date/time
516  # DHR - Observation time minus cycle time
517  # HRDR - Observation time minus cycle time on a level by level
518  # basis (taking drift into account)
519  #
520  def calc_obs_date_time(self, ActualValues):
521  # raw BUFR: use YEAR, MNTH, ...
522  Year = int(ActualValues['YEAR'].data)
523  Month = int(ActualValues['MNTH'].data)
524  Day = int(ActualValues['DAYS'].data)
525  Hour = int(ActualValues['HOUR'].data)
526  Minute = int(ActualValues['MINU'].data)
527  if ('SECO' in ActualValues):
528  try:
529  Second = int(ActualValues['SECO'].data)
530  except Exception:
531  Second = int(0)
532  else:
533  Second = int(0)
534 
535  # Create datetime object with above data. Sometimes the SECO value
536  # is outside the range 0..59 (which is what datetime requires).
537  # Use Year through Minute to create the datetime object and add in
538  # Second via a timedelta object.
539  ObsDtime = dt.datetime(
540  Year, Month, Day, Hour, Minute) + dt.timedelta(seconds=Second)
541  DateTime = np.array(ObsDtime.strftime("%Y-%m-%dT%H:%M:%SZ"))
542  return [DateTime]
543 
544  ##########################################################################
545  # This method will start the message selector. This selector method will
546  # apply a few filters for selecting messages. These filters require
547  # internal message counters that this method will reset.
548 
550  self.num_msg_selectednum_msg_selected = 0
551  self.num_msg_mtypenum_msg_mtype = 0
552 
553  ##########################################################################
554  # This method is the message selector. It will apply selection filters
555  # to the input BUFR messages. This isn't a clean as it could be, but time
556  # constraints are at work here!
557  def select_next_msg(self, bufr):
558  got_a_msg = False
559  # Grab the next message
560  while (bufr.advance() == 0):
561  # Skip this message if not the desired type
562  if (re.search(self.mtype_remtype_re, bufr.msg_type)):
563  # Keep count of the messages that match the desired type, which
564  # is needed to do the selection filtering.
565  self.num_msg_mtypenum_msg_mtype += 1
566 
567  # Apply the filtering. Default is to take all messages
568  Select = True
569 
570  # If the max_num_msg parameter is greater than zero, then use
571  # it to limit the number of messages that are selected.
572  if (self.max_num_msgmax_num_msg > 0):
573  Select = (self.num_msg_selectednum_msg_selected < self.max_num_msgmax_num_msg)
574 
575  # If the thinning interval is greater than 1, then use it to
576  # further select every n-th message.
577  if (self.thin_intervalthin_interval > 1):
578  Select = Select and (
579  (self.num_msg_mtypenum_msg_mtype % self.thin_intervalthin_interval) == 0)
580 
581  # If Select is true, the current message has been selected.
582  # Keep track of how many messages have been selected, plus
583  # break out of the loop and return.
584  if (Select):
585  self.num_msg_selectednum_msg_selected += 1
586  got_a_msg = True
587  break
588 
589  return got_a_msg
590 
591  ##########################################################################
592  # This method will convert the BUFR data into netcdf data. This includes
593  # reading BUFR and writing netcdf. This method represents a default that
594  # can be used for (hopefully) many obs types. If an obs type requires a
595  # more complex method, then this one can be overridden in a derived class.
596  #
597  # The default method provides the following:
598  # Copy all BUFR mnemonic values in the variable specs to the output
599  # netcdf file.
600  # Calculate a time offset from the reference time and store in addition
601  # to the BUFR mnemonic values
602  def convert(self, bufr, nc, isProfile=False):
603  # Walk through the messages, selecting only those match the regular
604  # expression for this obs type.
605  print("Converting BUFR to netcdf:")
606  ObsNum = 0
607  self.start_msg_selectorstart_msg_selector()
608 
609  while (self.select_next_msgselect_next_msg(bufr)):
610  MsgType = np.ma.array(bufr.msg_type)
611  MsgDate = np.ma.array([bufr.msg_date])
612  while (bufr.load_subset() == 0):
613  # Grab all of the mnemonics from the bufr file, and convert
614  # from the BUFR float representation to the actual data type
615  # (integer, float, string, double). ActualValues is a list of
616  # dictionaries where each dictionary represents one
617  # observation. A dictionary within the list is keyed by the
618  # netcdf variable name and contains the associated data value.
619  [ActualValues, ActualValuesBufr] = self.extract_bufrextract_bufr(bufr)
620  if isProfile:
621  maxLength = 1
622  for k in ActualValues[0].keys():
623  if len(ActualValues[0][k].shape) >= 1 and \
624  ActualValues[0][k].shape[0] > maxLength:
625  maxLength = ActualValues[0][k].shape[0]
626 
627  for i in range(len(ActualValues)):
628  # Put the message type, message date and datetime
629  # into the dictionary.
630  ActualValues[i]['msg_type@MetaData'] = MsgType
631  ActualValues[i]['msg_date@MetaData'] = MsgDate
632  [ActualValues[i]['datetime@MetaData']] = self.calc_obs_date_timecalc_obs_date_time(ActualValuesBufr[i])
633 
634  # Write out the netcdf variables.
635 
636  for Vname, Vdata in ActualValues[i].items():
637  if isProfile:
638  if Vdata.shape == ():
639  try:
640  Vdata = np.ma.array(maxLength*[Vdata],
641  dtype=Vdata.dtype)
642  except Exception:
643  pass
644  Vdata = Vdata.squeeze()
645  # Skip the write if Vdata is empty
646  if Vdata.size:
647  WriteNcVar(nc, ObsNum, Vname, Vdata, isProfile)
648 
649  # Increment observation number and print out progress
650  # messages.
651  if isProfile:
652  ObsNum += maxLength
653  else:
654  ObsNum += 1
655  if ((ObsNum % 100) == 0):
656  print(" Converted {0:d} observations".format(ObsNum))
657 
658  # If processing a prepBUFR file, record the virtual temperature
659  # program code
660  if (self.bufr_ftypebufr_ftype == cm.BFILE_PREPBUFR):
661  nc.virtmp_code = bufr.get_program_code('VIRTMP')
662 
663  print("")
664  print(" Total converted observations: ", ObsNum)
665  print("")
def bufr_float_to_actual_bufr(self, SpecList, BufrValues, ActualValues, ActualValuesBufr)
This method will convert bufr float data to the specified actual format.
def convert(self, bufr, nc, isProfile=False)
This method will convert the BUFR data into netcdf data.
def msg_obs_count(self, bufr)
This method is a default routine for counting the number of observations in the current BUFR message.
def extract_bufr(self, bufr)
This method will take the four input spec lists and read the mnemonics from the bufr file.
def fill_coords(self, nc)
This method will fill in the dimension variables with coordinate values.
def set_nlocs(self, nlocs)
This method will set the number of observations.
def init_dim_spec(self)
This method will set the dimension specs (data memeber self.dim_spec).
def start_msg_selector(self)
This method will start the message selector.
def select_next_msg(self, bufr)
This method is the message selector.
def read_bufr_data(self, bufr, Mlists, Rflag=False, Sflag=False, Eflag=False)
This method will read in a list of bufr mnemonics and return a list of the corresponding data values.
def bufr_float_to_actual(self, SpecList, BufrValues, ActualValues)
This method will convert bufr float data to the specified actual format.
def create_nc_datasets(self, nc, isProfile=False)
This method will create dimensions and variables in the netcdf file according to the obs type variabl...
def calc_obs_date_time(self, ActualValues)
This method will calculate the absolute date and time values from the BUFR mnemonic values.
def BufrFloatToActual(Bval, Dtype)
SUBROUTINES.
def WriteNcVar(Fid, ObsNum, Vname, Vdata, isProfile=False)