IODA Bundle
gpsbufr2nc.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import ncepbufr, os, sys, netCDF4, datetime
3 from optparse import OptionParser
4 import numpy as np
5 
6 # function to convert number to binary string (without 0b prefix)
7 get_bin = lambda x, n: format(x, 'b').zfill(n)
8 # date manipulation funcs
9 def splitdate(yyyymmddhh):
10  """
11  yyyy,mm,dd,hh = splitdate(yyyymmddhh)
12 
13  give an date string (yyyymmddhh) return integers yyyy,mm,dd,hh.
14  """
15  yyyy = int(yyyymmddhh[0:4])
16  mm = int(yyyymmddhh[4:6])
17  dd = int(yyyymmddhh[6:8])
18  hh = int(yyyymmddhh[8:10])
19  return yyyy,mm,dd,hh
20 def makedate(yyyy,mm,dd,hh):
21  """
22  yyyymmddhh = makedate(yyyy,mm,dd,hh)
23 
24  return a date string of the form yyyymmddhh given integers yyyy,mm,dd,hh.
25  """
26  return '%0.4i'%(yyyy)+'%0.2i'%(mm)+'%0.2i'%(dd)+'%0.2i'%(hh)
27 def daterange(date1,date2,hrinc):
28  """
29  date_list = daterange(date1,date2,hrinc)
30 
31  return of list of date strings of the form yyyymmddhh given
32  a starting date, ending date and an increment in hours.
33  """
34  date = date1
35  delta = datetime.timedelta(hours=1)
36  yyyy,mm,dd,hh = splitdate(date)
37  d = datetime.datetime(yyyy,mm,dd,hh)
38  n = 0
39  dates = [date]
40  while date < date2:
41  d = d + hrinc*delta
42  date = makedate(d.year,d.month,d.day,d.hour)
43  dates.append(date)
44  n = n + 1
45  return dates
46 
47 # Grab input arguments
48 ScriptName = os.path.basename(sys.argv[0])
49 UsageString = "USAGE: {0:s} [options] <obs_type> <input_prepbufr> <output_netcdf>".format(ScriptName)
50 
51 # Parse command line
52 op = OptionParser(usage=UsageString)
53 op.add_option("-m", "--max-msgs", type="int", dest="max_msgs", default=-1,
54  help="maximum number of messages to keep", metavar="<max_num_msgs>")
55 op.add_option("-c", "--clobber", action="store_true",
56  help="allow overwrite of output netcdf file")
57 
58 MyOptions, MyArgs = op.parse_args()
59 
60 if len(MyArgs) != 3:
61  print("ERROR: must supply exactly 3 arguments")
62  print(UsageString)
63  sys.exit(1)
64 
65 MaxMsgs = MyOptions.max_msgs
66 ClobberOfile = MyOptions.clobber
67 
68 ObsType = MyArgs[0] # 'bending ang' or 'refractivity' ('bend' or 'ref' is OK)
69 if not ObsType.startswith('bend') and not ObsType.startswith('ref'):
70  print("ERROR: obs type string must start with 'bend' or 'ref'")
71  sys.exit(1)
72 
73 bufrFname = MyArgs[1]
74 netcdfFname = MyArgs[2]
75 
76 # Check files
77 BadArgs = False
78 if (not os.path.isfile(bufrFname)):
79  print("ERROR: {0:s}: Specified input BUFR file does not exist: {0:s}".format(ScriptName, bufrFname))
80  print("")
81  BadArgs = True
82 
83 if (os.path.isfile(netcdfFname)):
84  if (ClobberOfile):
85  print("WARNING: {0:s}: Overwriting nc file: {1:s}".format(ScriptName, netcdfFname))
86  print("")
87  else:
88  print("ERROR: {0:s}: Specified nc file already exists: {1:s}".format(ScriptName, netcdfFname))
89  print("ERROR: {0:s}: Use -c option to overwrite.".format(ScriptName))
90  print("")
91  BadArgs = True
92 
93 if (BadArgs):
94  sys.exit(2)
95 
96 hdrstr ='YEAR MNTH DAYS HOUR MINU SECO PCCF ELRC SAID PTID GEODU QFRO'
97 
98 # read gpsro file.
99 
100 bufr = ncepbufr.open(bufrFname)
101 # infer nominal analysis time from bufr message dates, use as time origin
102 dates = []
103 while bufr.advance() == 0:
104  dates.append(bufr.msg_date)
105 bufr.rewind()
106 dates = np.array(dates)
107 date1 = str(dates.min()); date2 = str(dates.max())
108 YYYY1 = date1[0:4]; YYYY2 = date2[0:4]
109 MM1 = date1[4:6]; MM2 = date2[4:6]
110 DD1 = date1[6:8]; DD2 = date2[6:8]
111 datestart = '%04s%s%02s00' % (YYYY1,MM1,DD1)
112 dateend = '%04s%02s%02s18' % (YYYY2,MM2,DD2)
113 for refdate in daterange(datestart,dateend,6):
114  if refdate[8:10] in ['00','06','12','18'] and\
115  refdate > date1 and refdate < date2: break
116 
117 nc = netCDF4.Dataset(netcdfFname,'w',format='NETCDF4')
118 nc.createDimension('nobs',None)
119 #nc.createDimension('nflags',16) # number of bits in quality flag table
120 lat = nc.createVariable('Latitude',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
121 lat.units='degrees north'
122 lon = nc.createVariable('Longitude',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
123 lon.units='degress east'
124 if ObsType.startswith('ref'):
125  hgt = nc.createVariable('Height',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
126  hgt.units='meters'
127 time = nc.createVariable('Time',np.int64,'nobs',zlib=True)
128 time.units = 'seconds since %04s-%02s-%02s %02s:00 UTC' %\
129 (refdate[0:4],refdate[4:6],refdate[6:8],refdate[8:10])
130 ob = nc.createVariable('Observation',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
131 if ObsType.startswith('bend'):
132  ob.long_name = 'bending angle observation at zero frequency'
133 else:
134  ob.long_name = 'refractivity observation'
135 oberr = nc.createVariable('ObservationErrorBufr',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
136 obpcc = nc.createVariable('ObservationPercentConfidence',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
137 profpcc = nc.createVariable('ProfilePercentConfidence',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
138 satidn = nc.createVariable('SatelliteID',np.int16,'nobs',zlib=True)
139 platidn = nc.createVariable('PlatformTransmitterID',np.int16,'nobs',zlib=True)
140 rcurv = nc.createVariable('EarthLocalRadiusCurv',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
141 if ObsType.startswith('bend'):
142  imp = nc.createVariable('ImpactParameter',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
143  imp.units = 'meters'
144 #qf = nc.createVariable('QualityFlags',np.int8,('nobs','nflags'),zlib=True)
145 qf = nc.createVariable('QualityFlags',np.int16,'nobs',zlib=True)
146 geo = nc.createVariable('GeoidUndulation',np.float32,'nobs',zlib=True,fill_value=bufr.missing_value)
147 #if ObsType.startswith('bend'):
148 # ob = nc.createVariable('Incremental_Bending_Angle',np.float32,'nobs')
149 
150 nob = 0
151 while bufr.advance() == 0:
152  print(bufr.msg_counter, bufr.msg_type, bufr.msg_date)
153  while bufr.load_subset() == 0:
154  # the asarray converts masked arrays to numpy arrays, will missing
155  # values filled in. Prevents NaNs from ending up in the netcdf file.
156  hdr = np.asarray(bufr.read_subset(hdrstr).squeeze())
157  yyyymmddhhss ='%04i%02i%02i%02i%02i%02i' % tuple(hdr[0:6])
158  date=datetime.datetime(int(hdr[0]),int(hdr[1]),int(hdr[2]),int(hdr[3]),int(hdr[4]),int(hdr[5]))
159  timeval = netCDF4.date2num(date,units=time.units)
160  pcc = hdr[6] # profile percent confidence
161  roc = hdr[7] # Earth local radius of curvature
162  satid = int(hdr[8]) # satellite identifier
163  ptid = int(hdr[9]) # Platform transmitter ID number
164  geoid = hdr[10] # geod undulation
165  qfro = int(hdr[11]) # quality flag (used by read_gps to flag bad profile)
166  #ibits = get_bin(qfro, 16) # convert to 16 bit binary string
167  #iflags = np.zeros(16,np.int8)
168  #for n,ibit in enumerate(ibits):
169  # if int(ibit): iflags[n]=1
170 # Get the number of occurences of sequence ROSEQ2 in this subset
171 # (will also be the number of replications of sequence ROSEQ1).
172 # Also determine the number of replications of sequence ROSEQ2 nested
173 # inside each replication of ROSEQ1,
174  nreps_this_ROSEQ2 = bufr.read_subset('{ROSEQ2}').squeeze()
175  nreps_this_ROSEQ1 = len(nreps_this_ROSEQ2)
176  data1 = np.asarray(bufr.read_subset('ROSEQ1',seq=True)) # bending angle
177  data2 = np.asarray(bufr.read_subset('ROSEQ3',seq=True)) # refractivity
178  levs1 = data1.shape[1]
179  levs2 = data2.shape[1]
180  if levs1 != levs2:
181  print('skip report due to bending angle/refractivity mismatch')
182  continue
183  levs = levs1
184  #print('sat id,platform transitter id, levels, yyyymmddhhmm =',\
185  #satid,ptid,levs1,yyyymmddhh)
186  #print('k, height, lat, lon, ref, bend:')
187  lats = []; lons = []; hgts = []; obs = []
188  pccs = []; rocs = []; satids = []; ptids = []
189  geoids = []; obserr = []; obspccf = []; rocs = []
190  qflags = []; impacts = []
191  ncount = 0
192  for k in range(levs):
193  latval = data1[0,k]
194  lonval = data1[1,k]
195  hgtval = data2[0,k]
196  try:
197  if latval.mask or lonval.mask or hgtval.mask:
198  continue
199  except:
200  pass
201  ncount += 1
202  lats.append(latval)
203  lons.append(lonval)
204  hgts.append(hgtval)
205  geoids.append(geoid)
206  rocs.append(roc)
207  pccs.append(pcc)
208  satids.append(satid)
209  ptids.append(ptid)
210  #qflags.append(iflags)
211  qflags.append(qfro)
212  if ObsType.startswith('ref'):
213  ref=data2[1,k]
214  ref_error=data2[3,k]
215  ref_pccf=data2[5,k]
216  obs.append(ref)
217  obserr.append(ref_error)
218  obspccf.append(ref_pccf)
219  #print(k,rlat,rlon,height,ob)
220  elif ObsType.startswith('bend'):
221  for i in range(int(nreps_this_ROSEQ2[k])):
222  m = 6*(i+1)-3
223  freq = data1[m,k]
224  impact = data1[m+1,k] # impact parameter
225  bend = data1[m+2,k]
226  bend_error = data1[m+4,k]
227  # look for zero frequency bending angle ob
228  # don't want non-zero frequency (read_gps in gsi)
229  if int(freq) == 0: break
230  bend_pccf=data1[int(6*nreps_this_ROSEQ2[k])+3,k] # % conf
231  obs.append(bend)
232  obserr.append(bend_error)
233  obspccf.append(bend_pccf)
234  impacts.append(impact)
235  #print(k,rlat,rlon,height,ob)
236  if ncount:
237  lat[nob:nob+ncount] = lats
238  lon[nob:nob+ncount] = lons
239  ob[nob:nob+ncount] = obs
240  oberr[nob:nob+ncount] = obserr
241  obpcc[nob:nob+ncount] = obspccf
242  time[nob:nob+ncount] = timeval
243  profpcc[nob:nob+ncount] = pccs
244  satidn[nob:nob+ncount] = satids
245  platidn[nob:nob+ncount] = ptids
246  rcurv[nob:nob+ncount] = rocs
247  geo[nob:nob+ncount] = geoids
248  #qf[nob:nob+ncount,:] = qflags
249  qf[nob:nob+ncount] = qflags
250  if ObsType.startswith('ref'):
251  hgt[nob:nob+ncount] = hgts
252  else:
253  imp[nob:nob+ncount] = impacts
254  nob += ncount
255  # only loop over first MaxMsgs messages
256  if MaxMsgs > 0 and bufr.msg_counter == MaxMsgs: break
257 bufr.close()
258 nc.close()
def splitdate(yyyymmddhh)
Definition: gpsbufr2nc.py:9
def makedate(yyyy, mm, dd, hh)
Definition: gpsbufr2nc.py:20
def daterange(date1, date2, hrinc)
Definition: gpsbufr2nc.py:27