IODA Bundle
ncep_classes.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 import ncepbufr
4 from netCDF4 import Dataset
5 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
6 from datetime import datetime as dt
7 import sys
8 import os
9 import yaml
10 
11 from pathlib import Path
12 
13 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
14 if not IODA_CONV_PATH.is_dir():
15  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
16 sys.path.append(str(IODA_CONV_PATH.resolve()))
17 
18 import bufr2ncCommon as cm
19 from bufr2ncObsTypes import ObsType
20 
21 NCEP_CONFIG_PATH = (IODA_CONV_PATH/'config').resolve()
22 
23 ##########################################################################
24 # SUBROUTINES To be deleted (maybe).
25 ##########################################################################
26 
27 
28 def MessageCounter(BufrFname):
29  # This function counts the number of messages in the file BufrFname
30  bufr = ncepbufr.open(BufrFname)
31  NumMessages = 0
32  Obs.start_msg_selector()
33  while (Obs.select_next_msg(bufr)):
34  NumMessages += 1
35 
36  bufr.close()
37  return [NumMessages]
38 
39 
40 def BfilePreprocess(BufrFname, Obs):
41  # This routine will read the BUFR file and figure out how many observations
42  # will be read when recording data.
43 
44  bufr = ncepbufr.open(BufrFname)
45 
46  # The number of observations will be equal to the total number of subsets
47  # contained in the selected messages.
48  NumObs = 0
49  Obs.start_msg_selector()
50  while (Obs.select_next_msg(bufr)):
51  NumObs += Obs.msg_obs_count(bufr)
52  bufr.close()
53 
54  return [NumObs, Obs.num_msg_selected, Obs.num_msg_mtype]
55 
56 # ########################################################################
57 # ######## (Prep-) BUFR NCEP Observations #
58 # ########################################################################
59 #
60 # The class is developed to import all the entries to any BUFR family data set
61 # that the tables A, B, D of BUFR are embedded to the files.
62 #
63 
64 
66  # # initialize data elements ###
67  def __init__(self, bf_type, alt_type, tablefile, dictfile):
68 
69  super(NcepObsType, self).__init__()
70 
71  self.bufr_ftypebufr_ftypebufr_ftype = bf_type
72  self.multi_levelmulti_level = False
73  # Put the time and date vars in the subclasses so that their dimensions
74  # can vary ( [nlocs], [nlocs,nlevs] ).
75  self.misc_specmisc_spec[0].append(
76  ['datetime@MetaData', '', cm.DTYPE_STRING, ['nlocs', 'nstring'], [self.nlocsnlocs, self.nstringnstring]])
77 
78  if (bf_type == cm.BFILE_BUFR):
79  self.mtype_remtype_remtype_re = alt_type # alt_type is the BUFR mnemonic
80 
81  if os.path.isfile(dictfile):
82  # i.e. 'NC001007.dict'
83  full_table = read_yaml(dictfile)
84  _, blist = read_table(tablefile)
85  else:
86  full_table, blist = read_table(
87  tablefile) # i.e. 'NC001007.tbl'
88  write_yaml(full_table, dictfile)
89 
90  spec_list = get_int_spec(alt_type, blist)
91  intspec = []
92  intspecDum = []
93 
94  if not os.path.isfile(Lexicon):
95  for i in spec_list[alt_type]:
96  if i in full_table:
97  intspecDum = [
98  full_table[i]['name'].replace(
99  ' ',
100  '_').replace('/', '_'),
101  i,
102  full_table[i]['dtype'],
103  full_table[i]['ddims']]
104  if intspecDum not in intspec:
105  intspec.append([full_table[i]['name'].replace(
106  ' ', '_').replace('/', '_'),
107  i, full_table[i]['dtype'],
108  full_table[i]['ddims']])
109  # else:
110  # TODO what to do if the spec is not in the full_table (or
111  # in this case, does not have a unit in the full_table)
112  for j, dname in enumerate(intspec):
113  if len(dname[3]) == 1:
114  intspec[j].append([self.nlocsnlocs])
115  elif len(dname[3]) == 2:
116  intspec[j].append([self.nlocsnlocs, self.nstringnstring])
117  else:
118  print('walked off the edge')
119 
120  write_yaml(intspec, Lexicon)
121  else:
122  intspec = read_yaml(Lexicon)
123  self.nvarsnvarsnvars = 0
124  for k in intspec:
125  if '@ObsValue' in (" ".join(map(str, k))):
126  self.nvarsnvarsnvars += 1
127  # The last mnemonic (RRSTG) corresponds to the raw data, instead
128  # of -1 below, it is explicitly removed. The issue with RRSTG is
129  # the Binary length of it, which makes the system to crash
130  # during at BufrFloatToActual string convention. Probably, there
131  # are more Mnemonics with the same problem.
132 
133  self.int_specint_specint_spec = [intspec[x:x + 1]
134  for x in range(0, len(intspec), 1)]
135  self.evn_specevn_specevn_spec = []
136 
137  spec_list = get_rep_spec(alt_type, blist)
138  repspec = []
139  repspecDum = []
140 
141  for i in spec_list[alt_type]:
142  if i in full_table:
143  repspecDum = [
144  full_table[i]['name'].replace(
145  ' ', '_').replace('/', '_'), i,
146  full_table[i]['dtype'],
147  full_table[i]['ddims']]
148  if repspecDum not in repspec:
149  repspec.append([full_table[i]['name'].replace(
150  ' ', '_').replace('/', '_'), i,
151  full_table[i]['dtype'], full_table[i]['ddims']])
152  for j, dname in enumerate(repspec):
153  if len(dname[3]) == 1:
154  repspec[j].append([self.nlocsnlocs])
155  elif len(dname[3]) == 2:
156  repspec[j].append([self.nlocsnlocs, self.nstringnstring])
157  else:
158  print('walked off the edge')
159 
160  # write_yaml(repspec, Lexicon)
161  self.rep_specrep_specrep_spec = [repspec[x:x + 1]
162  for x in range(0, len(repspec), 1) if not repspec[x] in intspec]
163 
164  # TODO Check the intspec for "SQ" if exist, added at seq_spec
165  self.seq_specseq_specseq_spec = []
166 
167  self.nrecsnrecsnrecs = 1 # place holder
168 
169  # Set the dimension specs.
170  super(NcepObsType, self).init_dim_spec()
171 ##########################################################################
172 # read bufr table and return new table with bufr names and units
173 ##########################################################################
174 
175 
176 def write_yaml(dictionary, dictfileName):
177  f = open(dictfileName, 'w')
178  yaml.dump(dictionary, f)
179  f.close()
180 
181 
182 def read_yaml(dictfileName):
183  f = open(dictfileName, 'r')
184  dictionary = yaml.safe_load(f)
185  f.close()
186 
187  return dictionary
188 
189 
190 def read_table(filename):
191  all = []
192  with open(filename) as f:
193  for line in f:
194  if line[:11] != '|' + '-' * 10 + '|' \
195  and line[:11] != '|' + ' ' * 10 + '|' \
196  and line.find('-' * 20) == -1:
197  all.append(line)
198 
199  all = all[1:]
200  stops = []
201  for ndx, line in enumerate(all[1:]):
202  if line.find('MNEMONIC') != -1:
203  stops.append(ndx)
204 
205  part_a = all[2:stops[0]]
206  part_b = all[stops[0] + 3:stops[1]]
207  part_c = all[stops[1] + 3:]
208  dum = []
209  for x in part_a:
210  dum.append(
211  ' '.join(
212  x.replace(
213  "(",
214  "").replace(
215  ")",
216  "").replace(
217  "-",
218  "").split()))
219 
220  part_a = dum
221 
222  tbl_a = {line.split('|')[1].strip(): line.split(
223  '|')[3].strip().lower() for line in part_a}
224  tbl_c = {line.split('|')[1].strip(): line.split(
225  '|')[5].strip().lower() for line in part_c}
226 
227  full_table = {i: {'name': tbl_a[i], 'units': tbl_c[i]}
228  for i in tbl_c.keys()}
229 
230 # TODO Double check the declarations below.
231  # DTYPE_INTEGER
232  integer_types = [
233  'CODE TABLE',
234  'FLAG TABLE',
235  'YEAR',
236  'MONTH',
237  'DAY',
238  'MINUTE',
239  'MINUTES',
240  'PASCALS']
241  # DTYPE_FLOAT
242  float_types = [
243  'SECOND',
244  'NUMERIC',
245  'DEGREES',
246  'METERS',
247  'METERS/SECOND',
248  'M',
249  'DECIBELS',
250  'HZ',
251  'DB',
252  'K',
253  'KG/M**2',
254  'M/S',
255  'DEGREE**2',
256  'M**2',
257  'DEGREES TRUE',
258  'PERCENT',
259  '%',
260  'KG/METER**2',
261  'SIEMENS/M',
262  'METERS**3/SECOND',
263  'JOULE/METER**2',
264  'PART PER THOUSAND',
265  'PARTS/1000',
266  'METERS**2/HZ',
267  'S',
268  'METERS**2/SECOND',
269  'VOLTS',
270  'V',
271  'DEGREE TRUE',
272  'DEGREES KELVIN',
273  'HERTZ',
274  'HOURS',
275  'HOUR',
276  'METER/SECOND',
277  'DEGREE',
278  'SECONDS']
279  # DTYPE_STRING
280  string_types = ['CCITT IA5']
281 
282  string_dims = ['nlocs', 'nstring']
283  nums_dims = ['nlocs']
284 
285  for key, item in full_table.items():
286  if item['units'].upper() in integer_types:
287  full_table[key]['dtype'] = cm.DTYPE_INTEGER
288  full_table[key]['ddims'] = nums_dims
289  elif item['units'].upper() in float_types:
290  full_table[key]['dtype'] = cm.DTYPE_FLOAT
291  full_table[key]['ddims'] = nums_dims
292  elif item['units'].upper() in string_types:
293  full_table[key]['dtype'] = cm.DTYPE_STRING
294  full_table[key]['ddims'] = string_dims
295  else:
296  full_table[key]['dtype'] = cm.DTYPE_UNDEF
297  full_table[key]['ddims'] = nums_dims
298  return full_table, part_b
299 
300 ##########################################################################
301 # get the int_spec entries from satellite table
302 ##########################################################################
303 
304 
305 def get_int_spec(mnemonic, part_b):
306  # mnemonic is the BUFR msg_type, i.e. 'NC001007'
307  # part_b from the read_table, table entries associated with the mnemonic
308  # Here part_b=blist
309  # find the table entries for the bufr msg_type (mnemonic):
310  bentries = {}
311  for line in part_b:
312  line = line.replace(
313  '{',
314  '').replace(
315  '}',
316  '').replace(
317  # '(',
318  # '').replace(
319  # ')',
320  # '').replace(
321  '<',
322  '').replace(
323  '>',
324  '')
325  if line.find(mnemonic) != -1:
326  if mnemonic in bentries:
327  bentries[mnemonic] = bentries[mnemonic] + \
328  ''.join(line.split('|')[2:]).strip().split()
329  else:
330  bentries[mnemonic] = ''.join(
331  line.split('|')[2:]).strip().split()
332  # bentries is a dictionary for the mnemonic
333 
334  for i in range(3):
335  for b_monic in bentries[mnemonic]:
336  for line in part_b:
337  line = line.replace(
338  '{',
339  '').replace(
340  '}',
341  '').replace(
342  # '(',
343  # '').replace(
344  # ')',
345  # '').replace(
346  '<',
347  '').replace(
348  '>',
349  '')
350  if line.split('|')[1].find(b_monic) != -1:
351  bentries[mnemonic] = bentries[mnemonic] + \
352  ''.join(line.split('|')[2:]).strip().split()
353  return bentries
354 
355 
356 ##########################################################################
357 # get the rep_spec entries
358 ##########################################################################
359 
360 
361 def get_rep_spec(mnemonic, part_b):
362  # mnemonic is the BUFR msg_type, i.e. 'NC001007'
363  # part_b from the read_table, table entries associated with the mnemonic
364  #
365  # find the table entries for the bufr msg_type (mnemonic):
366  bentries = {}
367  for line in part_b:
368  line = line.replace(
369  '{',
370  '').replace(
371  '}',
372  '').replace(
373  # '(',
374  # '').replace(
375  # ')',
376  # '').replace(
377  '<',
378  '').replace(
379  '>',
380  '')
381  if line.find(mnemonic) != -1:
382  if mnemonic in bentries:
383  bentries[mnemonic] = bentries[mnemonic] + \
384  ''.join(line.split('|')[2:]).strip().split()
385  else:
386  bentries[mnemonic] = ''.join(
387  line.split('|')[2:]).strip().split()
388  # bentries is a dictionary for the mnemonic
389  bentries[mnemonic] = [x[1:-1] for x in bentries[mnemonic] if '(' in x]
390 
391  for b_monic in bentries[mnemonic]:
392  for line in part_b:
393  line = line.replace(
394  '(',
395  '').replace(
396  ')',
397  '')
398  if line.split('|')[1].find(b_monic) != -1:
399  bentries[mnemonic] = bentries[mnemonic] + \
400  ''.join(line.split('|')[2:]).strip().split()
401  return bentries
402 
403 ##########################################################################
404 # function to create the full path of
405 ##########################################################################
406 
407 
408 def get_fname(base_mnemo, BufrPath):
409  BufrFname = BufrPath + BufrFname
410  BufrTname = base_mnemo + '.tbl'
411  NetcdfFname = 'xx' + base_mnemo[5:] + '.nc'
412 
413  return BufrFname, BufrTname, NetcdfFname
414 
415 
416 def create_bufrtable(BufrFname, ObsTable):
417  bufr = ncepbufr.open(BufrFname)
418  bufr.advance()
419  bufr.dump_table(ObsTable)
420  bufr.close()
421  return
422 
423 ##########################################################################
424 # MAIN
425 ##########################################################################
426 
427 
428 if __name__ == '__main__':
429 
430  desc = ('Read NCEP BUFR data and convert to IODA netCDF4 format'
431  'example: ./ncep_classes.py -p /path/to/obs/ -i obs_filename'
432  ' -ot observation type -l yamlfile -m number_of_messages')
433 
434  parser = ArgumentParser(
435  description=desc,
436  formatter_class=ArgumentDefaultsHelpFormatter)
437 
438  parser.add_argument(
439  '-p', '--obs_path', help='path with the observations',
440  type=str, required=True)
441 
442  parser.add_argument(
443  '-i', '--input_bufr', help='name of the input BUFR file',
444  type=str, required=True)
445 
446  parser.add_argument(
447  '-ot', '--obs_type', help='Submessage of the input BUFR file, e.g., NC001007',
448  type=str, required=True)
449 
450  parser.add_argument(
451  '-o', '--output_netcdf', help='name of the output NC file',
452  type=str, required=False, default=None)
453 
454  parser.add_argument(
455  '-m', '--maxmsgs', help="maximum number of messages to keep",
456  type=int, required=False, default=0, metavar="<max_num_msgs>")
457 
458  parser.add_argument(
459  '-Th', '--thin', type=int, default=1,
460  help="select every nth message (thinning)", metavar="<thin_interval>")
461 
462  parser.add_argument(
463  '-d', '--date', help='file date', metavar='YYYYMMDDHH',
464  type=str, required=True)
465 
466  parser.add_argument(
467  '-l', '--lexicon', help='yaml file with the dictionary', metavar="name_of_dict",
468  type=str, required=True)
469 
470  parser.add_argument(
471  '-Pr', '--bufr', action='store_true', default=1,
472  help='input BUFR file is in prepBUFR or BUFR format')
473 
474  args = parser.parse_args()
475 
476  BufrPath = args.obs_path # Path of the observations
477  MaxNumMsg = args.maxmsgs # Maximum number of messages to be imported
478  ThinInterval = args.thin # Thinning window. TODO: To be removed, legacy
479  ObsType = args.obs_type # Observation type. e.g., NC001007
480  BufrFname = BufrPath + args.input_bufr # path and name of BUFR name
481  DateCentral = dt.strptime(args.date, '%Y%m%d%H') # DateHH of analysis
482 
483  if Path(args.lexicon).is_absolute():
484  Lexicon = args.lexicon # User defined Lexicon path
485  else:
486  Lexicon = str((NCEP_CONFIG_PATH/args.lexicon).resolve()) # Default Lexicon path
487 
488  if (args.bufr):
489  BfileType = cm.BFILE_BUFR # BUFR or prepBUFR. TODO: To be removed
490  else:
491  BfileType = cm.BFILE_PREPBUFR
492 
493  if (args.output_netcdf):
494  NetcdfFname = args.output_netcdf # Filename of the netcdf ioda file
495  else:
496  NetcdfFname = 'ioda.' + ObsType + '.' + \
497  DateCentral.strftime("%Y%m%d%H") + '.nc'
498 
499  date_time = DateCentral.strftime("%Y%m%d%H")
500 
501  ObsTable = ObsType + '.tbl' # Bufr table from the data.
502  DictObs = ObsType + '.dict' # Bufr dict
503 
504  # Check if BufrFname exists
505 
506  if os.path.isfile(BufrFname):
507  bufr = ncepbufr.open(BufrFname)
508  bufr.advance()
509  mnemonic = bufr.msg_type
510  bufr.close()
511  print('Mnemonic name is ', mnemonic)
512  else:
513  sys.exit('The ', BufrFname, 'does not exist.')
514 
515  # Check if Bufr Observation Table exists, if not created.
516  # The table is defined as base_mnemo.tbl, it is a text file.
517 
518  if os.path.isfile(ObsTable):
519  print('ObsTable exists: ', ObsTable)
520  else:
521  print('ObsTable does not exist, the ', ObsTable, 'is created!')
522  create_bufrtable(BufrFname, ObsTable)
523 
524  # Create the observation instance
525 
526  Obs = NcepObsType(BfileType, mnemonic, ObsTable, DictObs)
527  NumMessages = MessageCounter(BufrFname)
528  if MaxNumMsg > 0:
529  Obs.max_num_msg = MaxNumMsg
530  else:
531  Obs.max_num_msg = NumMessages[0]
532  print("NumMessages = ", NumMessages[0])
533 
534  Obs.thin_interval = ThinInterval
535  Obs.date_central = DateCentral
536 
537  [NumObs, NumMsgs, TotalMsgs] = BfilePreprocess(BufrFname, Obs)
538  Obs.set_nlocs(NumObs)
539 
540  nc = Dataset(NetcdfFname, 'w', format='NETCDF4')
541 
542  nc.date_time = int(date_time[0:10])
543 
544  bufr = ncepbufr.open(BufrFname)
545  pf_list = ['NC001003', 'NC001103', 'NC031001', 'NC031002', 'NC031003',
546  'NC031004', 'NC031005', 'NC031006', 'NC031007']
547  if ObsType in pf_list:
548  Obs.create_nc_datasets(nc, True)
549  Obs.fill_coords(nc)
550  Obs.convert(bufr, nc, True)
551  else:
552  Obs.create_nc_datasets(nc, False)
553  Obs.fill_coords(nc)
554  Obs.convert(bufr, nc, False)
555  bufr.close()
def init_dim_spec(self)
This method will set the dimension specs (data memeber self.dim_spec).
def __init__(self, bf_type, alt_type, tablefile, dictfile)
Definition: ncep_classes.py:67
def BfilePreprocess(BufrFname, Obs)
Definition: ncep_classes.py:40
def read_yaml(dictfileName)
def MessageCounter(BufrFname)
SUBROUTINES To be deleted (maybe).
Definition: ncep_classes.py:28
def create_bufrtable(BufrFname, ObsTable)
def get_rep_spec(mnemonic, part_b)
get the rep_spec entries
def read_table(filename)
def get_int_spec(mnemonic, part_b)
get the int_spec entries from satellite table
def write_yaml(dictionary, dictfileName)
def get_fname(base_mnemo, BufrPath)
function to create the full path of