IODA Bundle
prepbufr2ncSG.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 """convert prepbufr file to netCDF"""
3 from __future__ import print_function
4 import ncepbufr
5 import numpy as np
6 from netCDF4 import Dataset
7 from ncepbufr import prepbufr_mnemonics_dict as mnemonics_dict
8 import sys
9 
10 # write obs in nobs_chunk chunks for better compression.
11 # nobs_chunk should not be less than the total number of obs in the bufr file.
12 # /Users/sgregory/Documents/NOAA/Data_Assimilation/sample_ensembledata/radbufrdata/bufr_2015102700/
13 nobs_chunk = 200
14 
15 # min/max times in assim window
16 time_min = -3.0; time_max = 3.0
17 
18 if len(sys.argv) < 3:
19  raise SystemExit('prepbufr2nc <input prepbufr> <output netcdf>')
20 
21 # input and output file names from command line args.
22 prepbufr_filename = sys.argv[1]
23 netcdf_filename = sys.argv[2]
24 if prepbufr_filename == netcdf_filename:
25  raise IOError('cannot overwrite input prepbufr file')
26 
27 # mnemonics to extract data from prepbufr file.
28 hdstr='SID XOB YOB DHR TYP ELV SAID T29'
29 obstr='POB QOB TOB ZOB UOB VOB PWO CAT PRSS TDO PMO XDR YDR HRDR'
30 qcstr='PQM QQM TQM ZQM WQM PWQ PMQ'
31 oestr='POE QOE TOE ZOE WOE PWE'
32 
33 # skip these report types
34 #skiptypes = []
35 skiptypes = ['SATWND'] # sat winds in a separate bufr file.
36 
37 # open prepbufr file.
38 # find total number of messages, analysis date
39 bufr = ncepbufr.open(prepbufr_filename)
40 nmessages = 0
41 while bufr.advance() == 0: # loop over messages.
42  if bufr.msg_type in skiptypes: continue
43  nmessages += 1
44 bufr.rewind()
45 
46 # open netcdf file
47 print('ncfile=', netcdf_filename)
48 nc = Dataset(netcdf_filename,'w',format='NETCDF4')
49 
50 # create netcdf dimensions
51 hd = nc.createDimension('hdrinfo',len(hdstr.split()))
52 nhdd = len(hd)
53 ob = nc.createDimension('obinfo',len(obstr.split()))
54 nobd = len(ob)
55 oe = nc.createDimension('oeinfo',len(oestr.split()))
56 noed = len(oe)
57 qc = nc.createDimension('qcinfo',len(qcstr.split()))
58 nqcd = len(qc)
59 nobsd = nc.createDimension('nobs',None)
60 nmsgs = nc.createDimension('nmsgs',nmessages)
61 
62 # create netcdf variables.
63 hdrdata =\
64 nc.createVariable('header',np.float64,('nobs','hdrinfo'),\
65 fill_value=bufr.missing_value,zlib=True,chunksizes=(nobs_chunk,nhdd))
66 hdrdata.desc = 'observation header data'
67 for key in hdstr.split():
68  hdrdata.setncattr(key,mnemonics_dict[key])
69 hdrdata.hdrinfo = hdstr
70 
71 obid = nc.createVariable('obid',str,('nobs',),zlib=True)
72 obid.desc = 'observation id (station id/type code/lon/lat/time/elevation/pressure)'
73 obdata =\
74 nc.createVariable('obdata',np.float32,('nobs','obinfo'),\
75 fill_value=bufr.missing_value,zlib=True,chunksizes=(nobs_chunk,nobd))
76 oedata =\
77 nc.createVariable('oberr',np.float32,('nobs','oeinfo'),\
78 fill_value=bufr.missing_value,zlib=True,chunksizes=(nobs_chunk,noed))
79 qc_fillval = 255
80 qcdata =\
81 nc.createVariable('qcdata',np.uint8,('nobs','qcinfo'),\
82 fill_value=qc_fillval,zlib=True,chunksizes=(nobs_chunk,nqcd))
83 msgnum = nc.createVariable('msgnum',np.uint32,('nobs'),\
84 zlib=True,chunksizes=(nobs_chunk,))
85 msgnum.desc = 'bufr message number'
86 subsetnum = nc.createVariable('subsetnum',np.uint16,('nobs'),\
87 zlib=True,chunksizes=(nobs_chunk,))
88 subsetnum.desc='subset number within bufr message'
89 msgtype = nc.createVariable('msgtype',str,('nmsgs'),zlib=True)
90 msgtype.desc='bufr message type'
91 msgdate = nc.createVariable('msgdate',np.uint32,('nmsgs'),zlib=True)
92 msgdate.desc='bufr message date'
93 # mnemonic descriptions as variable attributes.
94 for key in obstr.split():
95  obdata.setncattr(key,mnemonics_dict[key])
96 obdata.obinfo = obstr
97 obdata.desc = 'observation data'
98 for key in oestr.split():
99  oedata.setncattr(key,mnemonics_dict[key])
100 oedata.oeinfo = oestr
101 oedata.desc = 'observation error data'
102 for key in qcstr.split():
103  qcdata.setncattr(key,mnemonics_dict[key])
104 qcdata.qcinfo = qcstr
105 qcdata.desc = 'observation QC data'
106 
107 # read prepbufr data, write to netcdf.
108 nob = 0
109 obid_set = set()
110 nmsg = 0
111 while bufr.advance() == 0: # loop over messages.
112  if bufr.msg_type in skiptypes: continue
113 
114  # lists to hold data from each subset in message.
115  hdrarr = []; obsarr = []; qcarr = []; errarr = []; obidarr = []
116  msgnumarr = []; subsetnumarr = []
117  nobs_message = 0
118  nsubset = 0
119  nc['msgtype'][nmsg] = bufr.msg_type
120  nc['msgdate'][nmsg] = bufr.msg_date
121  nmsg += 1
122  while bufr.load_subset() == 0: # loop over subsets in message.
123  nsubset += 1
124  hdr = bufr.read_subset(hdstr).squeeze()
125  obs = bufr.read_subset(obstr)
126  qc = bufr.read_subset(qcstr)
127  err = bufr.read_subset(oestr)
128  nlevs_use = 0
129 
130  for nlev in range(obs.shape[-1]):
131  lon = hdr[1]; lat = hdr[2]; time = hdr[3]
132  press = obs[0,nlev]; z = hdr[5]
133  # use balloon drift lat/lon/time for sondes, pibals
134  # in obid string.
135  if hdr[4] in [120,220,221]:
136  lon = obs[11,nlev]; lat = obs[12,nlev]
137  # only use drift corrected time if it is within
138  # assimilation window.
139  if obs[13,nlev] >= time_min and obs[13,nlev] < time_max:
140  time = obs[13,nlev]
141  elif hdr[4] >= 223 and hdr[4] <= 228:
142  z = obs[3,nlev] # use zob, not station elev
143  obidstr = "%s %3i %6.2f %6.2f %9.5f %5i %6.1f" % \
144  (hdr[0].tostring(), hdr[4], lon, lat, time, z, press)
145  # skip ob if there is already a matching obid string
146  # (do we need more checking here to make sure obs are
147  # really duplicates?)
148  if obidstr not in obid_set:
149  obid_set.add(obidstr)
150  else:
151  print('skipping duplicate ob %s' % obidstr)
152  continue
153 
154  #print(obidstr)
155 
156  hdrarr.append(hdr.squeeze())
157  obidarr.append(obidstr)
158  obsarr.append(obs[:,nlev])
159  errarr.append(err[:,nlev])
160  qcarr.append(qc[:,nlev])
161  msgnumarr.append(nmsg)
162  subsetnumarr.append(nsubset)
163  nob += 1; nlevs_use += 1
164  nobs_message += nlevs_use # obs for this message
165  # make lists into arrays.
166  hdrarr = np.array(hdrarr); obsarr = np.array(obsarr)
167  errarr = np.array(errarr); qcarr = np.array(qcarr)
168  obidarr = np.array(obidarr); msgnumarr = np.array(msgnumarr)
169  subsetnumarr = np.array(subsetnumarr)
170  qcarr = np.where(qcarr == bufr.missing_value,qc_fillval,qcarr).astype(np.uint8)
171  # write all the data for this message
172  nob1 = nob-nobs_message
173 
174  print('writing message %s out of %s, type %s with %s obs' %\
175  (nmsg,nmessages,bufr.msg_type,nobs_message))
176  nc['header'][nob1:nob] = hdrarr
177  nc['obdata'][nob1:nob] = obsarr
178  nc['oberr'][nob1:nob] = errarr
179  nc['qcdata'][nob1:nob] = qcarr
180  nc['obid'][nob1:nob] = obidarr
181  nc['msgnum'][nob1:nob] = msgnumarr
182  nc['subsetnum'][nob1:nob] = subsetnumarr
183  nc.sync() # dump data to disk.
184 
185 # close files.
186 bufr.close(); nc.close()
187