IODA Bundle
bufr2nc.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 from __future__ import print_function
4 import ncepbufr
5 import sys
6 import os
7 import argparse
8 from netCDF4 import Dataset
9 from pathlib import Path
10 
11 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
12 if not IODA_CONV_PATH.is_dir():
13  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
14 sys.path.append(str(IODA_CONV_PATH.resolve()))
15 
16 import bufr2ncCommon as cm
17 import bufr2ncObsTypes as ot
18 
19 ###############################################################################
20 # SUBROUTINES
21 ###############################################################################
22 
23 
24 def BfilePreprocess(BufrFname, Obs):
25  # This routine will read the BUFR file and figure out how many observations
26  # will be read when recording data.
27 
28  bufr = ncepbufr.open(BufrFname)
29 
30  # The number of observations will be equal to the total number of subsets
31  # contained in the selected messages.
32  NumObs = 0
33  Obs.start_msg_selector()
34  while (Obs.select_next_msg(bufr)):
35  NumObs += Obs.msg_obs_count(bufr)
36 
37  bufr.close()
38 
39  return [NumObs, Obs.num_msg_selected, Obs.num_msg_mtype]
40 
41 
42 ###############################################################################
43 # MAIN
44 ###############################################################################
45 ScriptName = os.path.basename(sys.argv[0])
46 
47 # Parse command line
48 ap = argparse.ArgumentParser()
49 ap.add_argument("obs_type", help="observation type")
50 ap.add_argument("input_bufr", help="path to input BUFR file")
51 ap.add_argument("output_netcdf", help="path to output netCDF4 file")
52 ap.add_argument("-m", "--maxmsgs", type=int, default=-1,
53  help="maximum number of messages to keep", metavar="<max_num_msgs>")
54 ap.add_argument("-t", "--thin", type=int, default=1,
55  help="select every nth message (thinning)", metavar="<thin_interval>")
56 ap.add_argument("-c", "--clobber", action="store_true",
57  help="allow overwrite of output netcdf file")
58 ap.add_argument("-p", "--prepbufr", action="store_true",
59  help="input BUFR file is in prepBUFR format")
60 
61 MyArgs = ap.parse_args()
62 
63 ObsType = MyArgs.obs_type
64 BufrFname = MyArgs.input_bufr
65 NetcdfFname = MyArgs.output_netcdf
66 MaxNumMsg = MyArgs.maxmsgs
67 ThinInterval = MyArgs.thin
68 ClobberOfile = MyArgs.clobber
69 if (MyArgs.prepbufr):
70  BfileType = cm.BFILE_PREPBUFR
71 else:
72  BfileType = cm.BFILE_BUFR
73 
74 # Check files
75 BadArgs = False
76 if (not os.path.isfile(BufrFname)):
77  print("ERROR: {0:s}: Specified input BUFR file does not exist: {1:s}".format(
78  ScriptName, BufrFname))
79  print("")
80  BadArgs = True
81 
82 if (os.path.isfile(NetcdfFname)):
83  if (ClobberOfile):
84  print("WARNING: {0:s}: Overwriting nc file: {1:s}".format(
85  ScriptName, NetcdfFname))
86  print("")
87  else:
88  print("ERROR: {0:s}: Specified nc file already exists: {1:s}".format(
89  ScriptName, NetcdfFname))
90  print("ERROR: {0:s}: Use -c option to overwrite.".format(ScriptName))
91  print("")
92  BadArgs = True
93 
94 # Check the observation type, and create an observation instance.
95 if (ObsType == 'Aircraft'):
96  Obs = ot.AircraftObsType(BfileType)
97 elif (ObsType == 'Sondes'):
98  Obs = ot.SondesObsType(BfileType)
99 elif (ObsType == 'Amsua'):
100  Obs = ot.AmsuaObsType(BfileType)
101 elif (ObsType == 'Gpsro'):
102  Obs = ot.GpsroObsType(BfileType)
103 else:
104  print("ERROR: {0:s}: Unknown observation type: {1:s}".format(
105  ScriptName, ObsType))
106  print("")
107  BadArgs = True
108 
109 if (not BadArgs):
110  if (Obs.mtype_re == 'UnDef'):
111  if (BfileType == cm.BFILE_BUFR):
112  print("ERROR: {0:s}: Observation type {1:s} for BUFR format is undefined".format(
113  ScriptName, ObsType))
114  elif (BfileType == cm.BFILE_PREPBUFR):
115  print("ERROR: {0:s}: Observation type {1:s} for prepBUFR format is undefined".format(
116  ScriptName, ObsType))
117  print("")
118  BadArgs = True
119 
120 if (BadArgs):
121  sys.exit(2)
122 
123 # Arguments are okay, and we've got an observation object instantiated. Note
124 # that we need to have the obs object instantiated before calling
125 # BfilePreprocess() routine below. This is so BfilePreprocess() can select
126 # messages in the same manner as the subsequent conversion.
127 print("Converting BUFR to netCDF")
128 print(" Observation Type: {0:s}".format(ObsType))
129 if (BfileType == cm.BFILE_BUFR):
130  print(" Input BUFR file (BUFR format): {0:s}".format(BufrFname))
131 elif (BfileType == cm.BFILE_PREPBUFR):
132  print(" Input BUFR file (prepBUFR format): {0:s}".format(BufrFname))
133 print(" Output netCDF file: {0:s}".format(NetcdfFname))
134 if (MaxNumMsg > 0):
135  print(
136  " Limiting nubmer of messages to record to {0:d} messages".format(MaxNumMsg))
137 if (ThinInterval > 1):
138  print(" Thining: selecting every {0:d}-th message".format(ThinInterval))
139 print("")
140 
141 # It turns out that using multiple unlimited dimensions in the netCDF file
142 # can be very detrimental to the file's size, and can also be detrimental
143 # to the runtime for creating the file.
144 #
145 # In order to mitigate this, we want to use fixed size dimensions instead.
146 # Each obs type object will have its associated dimension sizes defined as
147 # fixed sizes. The only missing part is how many observations (subsets) will
148 # be selected.
149 #
150 # This number of subsets needs to be determined from reading through all the
151 # selected messages. Fortunately, this is very fast.
152 #
153 # Make a pass through the BUFR file to determine the number of observations
154 # and the reference time.
155 #
156 # BfilePreprocess() will use the regular expression for selecting message
157 # types. NumObs will be set to the number of observations selected,
158 # NumMsgs will be set to the number of messages selected, and TotalMsgs will
159 # be set to the total number of messages that match Obs.mtype_re in the file.
160 Obs.max_num_msg = MaxNumMsg
161 Obs.thin_interval = ThinInterval
162 [NumObs, NumMsgs, TotalMsgs] = BfilePreprocess(BufrFname, Obs)
163 
164 print(" Total number of messages that match obs type {0:s}: {1:d}".format(
165  ObsType, TotalMsgs))
166 print(" Number of messages selected: {0:d}".format(NumMsgs))
167 print(" Number of observations selected: {0:d}".format(NumObs))
168 print("")
169 
170 # Now that we have the number of observations we will be recording, set the
171 # dimension size in the obs object. Note the set_nlocs() method needs to be
172 # called before creating netcdf variables.
173 Obs.set_nlocs(NumObs)
174 
175 # Create the dimensions and variables in the netCDF file in preparation for
176 # recording the selected observations.
177 nc = Dataset(NetcdfFname, 'w', format='NETCDF4')
178 Obs.create_nc_datasets(nc)
179 
180 # Fill in the dimension variables with the coordinate values. Just using dummy
181 # values for now which are 1..n where n is the size of the coorespoding
182 # dimension.
183 Obs.fill_coords(nc)
184 
185 # Open the BUFR file and initialize the file object.
186 bufr = ncepbufr.open(BufrFname)
187 
188 # Run the conversion
189 Obs.convert(bufr, nc)
190 
191 # Clean up
192 bufr.close()
193 
194 nc.sync()
195 nc.close()
def BfilePreprocess(BufrFname, Obs)
SUBROUTINES.
Definition: bufr2nc.py:24