IODA Bundle
godae_profile2ioda.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 #
4 # (C) Copyright 2019 UCAR
5 #
6 # This software is licensed under the terms of the Apache Licence Version 2.0
7 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8 #
9 
10 from __future__ import print_function
11 import sys
12 from datetime import datetime
13 from scipy.io import FortranFile
14 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
15 from pathlib import Path
16 
17 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
18 if not IODA_CONV_PATH.is_dir():
19  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
20 sys.path.append(str(IODA_CONV_PATH.resolve()))
21 
22 import ioda_conv_ncio as iconv
23 from orddicts import DefaultOrderedDict
24 
25 
26 class profile(object):
27 
28  def __init__(self, filename, date):
29 
30  self.filenamefilename = filename
31  self.datedate = date
32 
33  # Read profile data
34  self._rd_prof_rd_prof()
35 
36  return
37 
38  def _rd_prof(self):
39  '''
40  Read the profile data
41  Based on subroutine rd_prof in ocn_obs.f
42  '''
43 
44  try:
45  fh = FortranFile(self.filenamefilename, mode='r', header_dtype='>u4')
46  except IOError:
47  raise IOError('%s file not found!' % self.filenamefilename)
48  except Exception:
49  raise Exception('Unknown error opening %s' % self.filenamefilename)
50 
51  # data is the dictionary with data structure as in ocn_obs.f
52  data = {}
53 
54  data['n_obs'], data['n_lvl'], data['n_vrsn'] = fh.read_ints('>i4')
55 
56  print(' number profiles: %d' % data['n_obs'])
57  print(' max number levels: %d' % data['n_lvl'])
58  print('file version number: %d' % data['n_vrsn'])
59 
60  if data['n_obs'] <= 0:
61  print('No profile observations to process from %s' % self.filenamefilename)
62  return
63 
64  data['ob_btm'] = fh.read_reals('>f4')
65  data['ob_lat'] = fh.read_reals('>f4')
66  data['ob_lon'] = fh.read_reals('>f4')
67  data['ob_ls'] = fh.read_reals('>i4')
68  data['ob_lt'] = fh.read_reals('>i4')
69  data['ob_ssh'] = fh.read_reals('>f4')
70  data['ob_sst'] = fh.read_reals('>f4')
71  data['ob_sal_typ'] = fh.read_reals('>i4')
72  data['ob_sal_qc'] = fh.read_reals('>f4')
73  data['ob_tmp_typ'] = fh.read_reals('>i4')
74  data['ob_tmp_qc'] = fh.read_reals('>f4')
75 
76  data['ob_lvl'] = []
77  data['ob_sal'] = []
78  data['ob_sal_err'] = []
79  data['ob_sal_prb'] = []
80  data['ob_tmp'] = []
81  data['ob_tmp_err'] = []
82  data['ob_tmp_prb'] = []
83 
84  for n in range(data['n_obs']):
85  data['ob_lvl'].append(fh.read_reals('>f4'))
86  data['ob_sal'].append(fh.read_reals('>f4'))
87  data['ob_sal_err'].append(fh.read_reals('>f4'))
88  data['ob_sal_prb'].append(fh.read_reals('>f4'))
89  data['ob_tmp'].append(fh.read_reals('>f4'))
90  data['ob_tmp_err'].append(fh.read_reals('>f4'))
91  data['ob_tmp_prb'].append(fh.read_reals('>f4'))
92 
93  data['ob_dtg'] = fh.read_record('>S12').astype('U12')
94  data['ob_rcpt'] = fh.read_record('>S12').astype('U12')
95  data['ob_scr'] = fh.read_record('>S1').astype('U1')
96  data['ob_sign'] = fh.read_record('>S7').astype('U7')
97 
98  data['ob_clm_sal'] = []
99  data['ob_clm_tmp'] = []
100  data['ob_clm_ssd'] = []
101  data['ob_clm_tsd'] = []
102  data['ob_glb_sal'] = []
103  data['ob_glb_tmp'] = []
104  data['ob_glb_ssd'] = []
105  data['ob_glb_tsd'] = []
106  data['ob_mds_sal'] = []
107  data['ob_mds_tmp'] = []
108  data['ob_rgn_sal'] = []
109  data['ob_rgn_tmp'] = []
110  data['ob_rgn_ssd'] = []
111  data['ob_rgn_tsd'] = []
112 
113  for n in range(data['n_obs']):
114  data['ob_clm_sal'].append(fh.read_reals('>f4'))
115  data['ob_clm_tmp'].append(fh.read_reals('>f4'))
116  data['ob_clm_ssd'].append(fh.read_reals('>f4'))
117  data['ob_clm_tsd'].append(fh.read_reals('>f4'))
118  data['ob_glb_sal'].append(fh.read_reals('>f4'))
119  data['ob_glb_tmp'].append(fh.read_reals('>f4'))
120  data['ob_glb_ssd'].append(fh.read_reals('>f4'))
121  data['ob_glb_tsd'].append(fh.read_reals('>f4'))
122  data['ob_mds_sal'].append(fh.read_reals('>f4'))
123  data['ob_mds_tmp'].append(fh.read_reals('>f4'))
124  data['ob_rgn_sal'].append(fh.read_reals('>f4'))
125  data['ob_rgn_tmp'].append(fh.read_reals('>f4'))
126  data['ob_rgn_ssd'].append(fh.read_reals('>f4'))
127  data['ob_rgn_tsd'].append(fh.read_reals('>f4'))
128 
129  if data['n_vrsn'] > 1:
130  data['ob_sal_xvl'] = fh.read_reals('>f4')
131  data['ob_sal_xsd'] = fh.read_reals('>f4')
132  data['ob_tmp_xvl'] = fh.read_reals('>f4')
133  data['ob_tmp_xsd'] = fh.read_reals('>f4')
134 
135  if data['n_vrsn'] > 2:
136  # ob_id is 10 characters long, with variable spaces in front.
137  # How are these legitimate fortran spaces represented in
138  # python? FortranFile.read_record has difficulty, because
139  # dtype does not conform
140  # Since ob_id is not used, treat it as an array of characters
141  # Something like this would be ideal:
142  # data['ob_id'] = fh.read_record('>S10').astype('U10')
143  data['ob_id'] = fh.read_record('>S1')
144 
145  fh.close()
146 
147  self.datadata = data
148 
149  return
150 
151 
152 class IODA(object):
153 
154  def __init__(self, filename, date, varDict, obsList):
155  '''
156  Initialize IODA writer class,
157  transform to IODA data structure and,
158  write out to IODA file.
159  '''
160 
161  self.filenamefilename = filename
162  self.datedate = date
163  self.varDictvarDict = varDict
164 
165  self.locKeyListlocKeyList = [
166  ("latitude", "float"),
167  ("longitude", "float"),
168  ("depth", "float"),
169  ("datetime", "string")
170  ]
171 
172  self.AttrDataAttrData = {
173  'odb_version': 1,
174  'date_time_string': self.datedate.strftime("%Y-%m-%dT%H:%M:%SZ")
175  }
176 
177  self.writerwriter = iconv.NcWriter(self.filenamefilename, self.locKeyListlocKeyList)
178 
179  self.keyDictkeyDict = DefaultOrderedDict(lambda: DefaultOrderedDict(dict))
180  for key in self.varDictvarDict.keys():
181  value = self.varDictvarDict[key]
182  self.keyDictkeyDict[key]['valKey'] = value, self.writerwriter.OvalName()
183  self.keyDictkeyDict[key]['errKey'] = value, self.writerwriter.OerrName()
184  self.keyDictkeyDict[key]['qcKey'] = value, self.writerwriter.OqcName()
185 
186  # data is the dictionary containing IODA friendly data structure
187  self.datadata = DefaultOrderedDict(lambda: DefaultOrderedDict(dict))
188 
189  recKey = 0
190 
191  for obs in obsList:
192 
193  if obs.data['n_obs'] <= 0:
194  print('No profile observations for IODA!')
195  continue
196 
197  for n in range(obs.data['n_obs']):
198 
199  lat = obs.data['ob_lat'][n]
200  lon = obs.data['ob_lon'][n]
201  dtg = datetime.strptime(obs.data['ob_dtg'][n], '%Y%m%d%H%M')
202 
203  for ilev, lvl in enumerate(obs.data['ob_lvl'][n]):
204 
205  locKey = lat, lon, lvl, dtg.strftime("%Y-%m-%dT%H:%M:%SZ")
206 
207  for key in self.varDictvarDict.keys():
208 
209  val = obs.data[key][n][ilev]
210  err = obs.data[key+'_err'][n][ilev]
211  qc = (100 * obs.data[key+'_qc'][n]).astype('i4')
212 
213  valKey = self.keyDictkeyDict[key]['valKey']
214  errKey = self.keyDictkeyDict[key]['errKey']
215  qcKey = self.keyDictkeyDict[key]['qcKey']
216 
217  self.datadata[recKey][locKey][valKey] = val
218  self.datadata[recKey][locKey][errKey] = err
219  self.datadata[recKey][locKey][qcKey] = qc
220 
221  recKey += 1
222 
223  (ObsVars, LocMdata, VarMdata) = self.writerwriter.ExtractObsData(self.datadata)
224  self.writerwriter.BuildNetcdf(ObsVars, LocMdata, VarMdata, self.AttrDataAttrData)
225 
226  return
227 
228 
229 def main():
230 
231  desc = 'Convert GODAE binary profile data to IODA netCDF4 format'
232  parser = ArgumentParser(
233  description=desc,
234  formatter_class=ArgumentDefaultsHelpFormatter)
235  parser.add_argument(
236  '-i', '--input', help='name of the input binary GODAE profile file',
237  type=str, nargs='+', required=True)
238  parser.add_argument(
239  '-o', '--output', help='name of the output netCDF GODAE profile file',
240  type=str, required=True, default=None)
241  parser.add_argument(
242  '-d', '--date', help='file date', metavar='YYYYMMDDHH',
243  type=str, required=True)
244 
245  args = parser.parse_args()
246 
247  fList = args.input
248  foutput = args.output
249  fdate = datetime.strptime(args.date, '%Y%m%d%H')
250 
251  obsList = []
252  for fname in fList:
253  obs = profile(fname, fdate)
254  obsList.append(obs)
255 
256  varDict = {
257  'ob_tmp': 'sea_water_temperature',
258  'ob_sal': 'sea_water_salinity'
259  }
260 
261  IODA(foutput, fdate, varDict, obsList)
262 
263 
264 if __name__ == '__main__':
265  main()
def __init__(self, filename, date, varDict, obsList)
def __init__(self, filename, date)