IODA Bundle
JEDI_conv_bufr2nc_2D.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 """convert prepbufr file to netCDF
3 cdraper, Nov, 2017"""
4 
5 from __future__ import print_function
6 import ncepbufr
7 import numpy as np
8 import netCDF4
9 import numbers
10 import sys
11 import re
12 from netCDF4 import Dataset
13 
14 # INPUT ARGS
15 
16 #if len(sys.argv) < 4:
17 # raise SystemExit('prepbufr2nc <input prepbufr> <output netcdf> <VARIABLE>')
18 
19 #prepbufr_filename = sys.argv[1]
20 #netcdf_filename = sys.argv[2]
21 #VARIABLE = sys.argv[3]
22 
23 prepbufr_filename = 'prepbufr.gdas.2016030406'
24 netcdf_filename = 'test.nc'
25 ##################################
26 # functions
27 
28 # create variables, append handles
29 def append_nc_createvar(min_ind, bufr_str,dim_flag):
30  for n in range(len(bufr_str)):
31  if (dim_flag==0):
32  nc_varh.append(nc.createVariable(bufr_str[n],np.double,'nobs',zlib=True))
33  elif (dim_flag==1):
34  nc_varh.append(nc.createVariable(bufr_str[n]+'1',np.double,'nobs',zlib=True))
35  elif (dim_flag==2):
36  nc_varh.append(nc.createVariable(bufr_str[n]+'2',np.double,('nobs','max_plev'),zlib=True))
37  elif (dim_flag==3): # bufr events
38  nc_varh.append(nc.createVariable(bufr_str[n]+'_bevn3',np.double,('nobs','max_plev','bevn_layer'),zlib=True))
39  elif (dim_flag==4): # bufr events
40  nc_varh.append(nc.createVariable(bufr_str[n]+'_bevn2',np.double,('nobs','bevn_layer'),zlib=True))
41 
42  next_ind=min_ind+len(bufr_str)
43  return next_ind
44 
45 # read bufr, write to nc (single record)
46 def bufr2nc_record(min_ind, bufr_str,n_subset):
47  for n in range(len(bufr_str)):
48  tmp = bufr.read_subset(bufr_str[n]).data
49  #if (tmp.size > 0): # if hdr is present and data is not missing
50  if ((tmp.size >0) & (tmp < 10**9)):
51  nc_varh[n+min_ind][n_subset] = tmp
52 
53 # read bufr, write to nc (single record, 2D)
54 def bufr2nc_record2D(min_ind1, min_ind2, bufr_str,n_subset):
55  maxnlev=0
56 
57  for n in range(len(bufr_str)):
58  tmp = bufr.read_subset(bufr_str[n]).data
59  nlev=tmp.size # includes missing data
60  maxnlev=max(maxnlev,nlev)
61 
62  if (nlev>1):
63  for z in range(nlev):
64  if (tmp[0,z] < 10**9): # skip missing
65  nc_varh[n+min_ind2][n_subset,z] = tmp[0,z]
66  elif (nlev==1):
67  nc_varh[n+min_ind1][n_subset] = tmp
68 
69  return maxnlev
70 
71 def bufr2nc_record_bevn(min_ind2, min_ind3, bufr_str,n_subset):
72  maxnlev=0
73 
74  for n in range(len(bufr_str)):
75  tmp = bufr.read_subset(bufr_str[n],events=True).data
76  nlev=tmp.shape[1] # includes missing data
77  maxnlev=max(maxnlev,nlev)
78 
79  if (nlev>1):
80  for z in range(nlev):
81  for e in range(20):
82  if (tmp[0,z,e] < 10**9): # skip missing
83  nc_varh[n+min_ind3][n_subset,z,e] = tmp[0,z,e]
84  elif (nlev==1):
85  for e in range(20):
86  if (tmp[0,0,e] < 10**9): # skip missing
87  nc_varh[n+min_ind2][n_subset,e] = tmp[0,0,e]
88 
89  return maxnlev
90 
91 
92 ##################################
93 # from read_prepbufr.f90
94 
95 hdstr = np.array(['SID','XOB','YOB','DHR','TYP','ELV','SAID','T29'])
96 misc = np.array(['TSB','PRVSTG','SPRVSTG','ACID'])
97 obstr = np.array(['POB','QOB','TOB','ZOB','UOB','VOB','PWO','MXGS','HOVI','CAT','PRSS','TDO','PMO' ])
98 satqcstr = np.array(['QIFN'])
99 drift = np.array(['XDR','YDR','HRDR'])
100 fcststr = np.array(['UFC','VFC','TFC'])
101 qcstr = np.array(['PQM','QQM','TQM','ZQM','WQM','NUL','PWQ','PMQ'])
102 oestr = np.array(['POE','QOE','TOE','NUL','WOE','NUL','PWE'])
103 aircraftstr = np.array(['POAF','IALR'])
104 bevnstr = np.array(['TPC','TOB','TQM'])
105 
106 sststr = ['MSST','DBSS','SST1','SSTQM','SSTOE']
107 prvstr = ['PRVSTG']
108 sprvstr = ['SPRVSTG']
109 levstr = ['POB']
110 cld2seqstr = ['TOCC','HBLCS']
111 cldseqstr = ['VSSO','CLAM','HOCB']
112 metarcldstr = ['CLAM','HOCB']
113 metarwthstr = ['PRWE']
114 metarvisstr = ['HOVI','TDO']
115 goescldstr = ['CDTP','TOCC','GCDTT','CDTP_QM']
116 maxtmintstr = ['MXTM','MITM']
117 owavestr = ['HOWV']
118 cldceilhstr = ['CEILING']
119 
120 ##################################
121 # INDEXES TO BE READ IN FROM EACH STRING ABOVE
122 
123 # ALL
124 hdr_ind_out = np.arange(8)
125 msc_ind_out = np.arange(4)
126 
127 obs_ind_out = np.arange(13)
128 sqc_ind_out = np.array([0])
129 drf_ind_out = np.arange(3)
130 fcs_ind_out = np.arange(3)
131 qcs_ind_out = np.array([0,1,2,3,4,6,7])
132 oes_ind_out = np.array([0,1,2,4,6])
133 bev_ind_out = np.array([0])
134 air_ind_out = np.arange(2)
135 
136 
137 ##################################################################
138 # SET UP THE NC FILE
139 
140 nc =Dataset(netcdf_filename,'w',format='NETCDF4')
141 
142 # dimensions
143 nobs_dim = nc.createDimension('nobs',None)
144 nobs_diag_dim = nc.createDimension('nobs_diag',None)
145 plev_diag_dim = nc.createDimension('max_plev',None)
146 bevn_layr_dim = nc.createDimension('bevn_layer',None)
147 mtyp_string_dim = nc.createDimension('mtyp_string_len',10) # string not supported by nc4, write as array of char
148 
149 # initialize variables handles
150 nc_varh=[]
151 nc_varh.append(nc.createVariable('idate','i','nobs'))
152 nc_varh.append(nc.createVariable('msg_type','S1',('nobs','mtyp_string_len')))
153 
154 
155 # doubles
156 next_ind=2 # index 0 is date, index 1 is msg_header
157 min_ind_hdr = next_ind ; next_ind = append_nc_createvar(next_ind,hdstr[hdr_ind_out],0)
158 min_ind_msc = next_ind ; next_ind = append_nc_createvar(next_ind,misc[msc_ind_out],0)
159 min_ind_sqc = next_ind ; next_ind = append_nc_createvar(next_ind,satqcstr[sqc_ind_out],0)
160 
161 # variables with 2 dimensions
162 min_ind_obs2 = next_ind ; next_ind = append_nc_createvar(next_ind,obstr[obs_ind_out],2)
163 min_ind_drf2 = next_ind ; next_ind = append_nc_createvar(next_ind,drift[drf_ind_out],2)
164 min_ind_fcs2 = next_ind ; next_ind = append_nc_createvar(next_ind,fcststr[fcs_ind_out],2)
165 min_ind_qcs2 = next_ind ; next_ind = append_nc_createvar(next_ind,qcstr[qcs_ind_out],2)
166 min_ind_oes2 = next_ind ; next_ind = append_nc_createvar(next_ind,oestr[oes_ind_out],2)
167 min_ind_air2 = next_ind ; next_ind = append_nc_createvar(next_ind,aircraftstr[air_ind_out],2)
168 
169 # 1D versions
170 min_ind_obs1 = next_ind ; next_ind = append_nc_createvar(next_ind,obstr[obs_ind_out],1)
171 min_ind_drf1 = next_ind ; next_ind = append_nc_createvar(next_ind,drift[drf_ind_out],1)
172 min_ind_fcs1 = next_ind ; next_ind = append_nc_createvar(next_ind,fcststr[fcs_ind_out],1)
173 min_ind_qcs1 = next_ind ; next_ind = append_nc_createvar(next_ind,qcstr[qcs_ind_out],1)
174 min_ind_oes1 = next_ind ; next_ind = append_nc_createvar(next_ind,oestr[oes_ind_out],1)
175 min_ind_air1 = next_ind ; next_ind = append_nc_createvar(next_ind,aircraftstr[air_ind_out],1)
176 
177 # variables with 3 dimensions
178 min_ind_bev3 = next_ind ; next_ind = append_nc_createvar(next_ind,bevnstr[bev_ind_out],3)
179 
180 # 2D versions
181 min_ind_bev2 = next_ind ; next_ind = append_nc_createvar(next_ind,bevnstr[bev_ind_out],4)
182 
183 bufr = ncepbufr.open(prepbufr_filename )
184 
185 # read prepbufr data
186 
187 n_subset=0
188 n_msg_excl=0
189 
190 
191 nlev_msg=[]
192 type_msg=[]
193 
194 #while (bufr.advance() == 0) & (n_subset<1000): # loop over messages.
195 while (bufr.advance() == 0):
196  # msg type
197  tmp_str = bufr.msg_type
198  if ( (tmp_str == 'AIRCFT') | (tmp_str == 'AIRCAR') | (tmp_str == 'SATWND') ):
199  n_msg_excl+=1
200  continue
201 
202  while bufr.load_subset() == 0: # loop over subsets in message.
203 
204  # date
205  nc_varh[0][n_subset] = bufr.msg_date
206 
207  # msg type
208  #tmp_str = bufr.msg_type
209  if (len(tmp_str) > 10 ):
210  print ('msg_type len too long)')
211  else:
212  tmp_str=tmp_str.ljust(10,' ')
213 
214  #for s in range(len(tmp_str)):
215  # nc_varh[1][n_subset,s] = tmp_str[s]
216 
217  #tmp = bufr.read_subset('ZOB').data
218  #nlev=tmp.size # includes missing data
219 
220  #for z in range(nlev):
221  # if (tmp[0,z] < 10**9): # skip missing
222  # nc_varh[min_ind_obs][n_subset,z] = tmp[0,z]
223 
224  bufr2nc_record(min_ind_hdr,hdstr[hdr_ind_out],n_subset)
225  bufr2nc_record(min_ind_msc, misc[msc_ind_out],n_subset)
226  bufr2nc_record(min_ind_sqc, satqcstr[sqc_ind_out],n_subset)
227  nlev = bufr2nc_record2D(min_ind_obs1, min_ind_obs2, obstr[obs_ind_out],n_subset)
228  nlev_msg.append(nlev)
229  type_msg.append(tmp_str)
230 
231  nlev = bufr2nc_record2D(min_ind_drf1, min_ind_drf2, drift[drf_ind_out],n_subset)
232  nlev = bufr2nc_record2D(min_ind_fcs1, min_ind_fcs2, fcststr[fcs_ind_out],n_subset)
233  nlev = bufr2nc_record2D(min_ind_qcs1, min_ind_qcs2, qcstr[qcs_ind_out],n_subset)
234  nlev = bufr2nc_record2D(min_ind_oes1, min_ind_oes2, oestr[oes_ind_out],n_subset)
235  nlev = bufr2nc_record2D(min_ind_air1, min_ind_air2, aircraftstr[air_ind_out],n_subset)
236  nlev = bufr2nc_record_bevn(min_ind_bev2, min_ind_bev3, bevnstr[bev_ind_out],n_subset)
237 
238  n_subset+=1
239  print(n_subset)
240 
241 nc.virtmp_code = bufr.get_program_code('VIRTMP')
242 
243 
244 print(n_subset)
245 nc.sync()
246 nc.close()
247 
248 bufr.close()
249 
250 
def bufr2nc_record_bevn(min_ind2, min_ind3, bufr_str, n_subset)
def bufr2nc_record(min_ind, bufr_str, n_subset)
def append_nc_createvar(min_ind, bufr_str, dim_flag)
functions
def bufr2nc_record2D(min_ind1, min_ind2, bufr_str, n_subset)