IODA Bundle
gsi_ncdiag.py
Go to the documentation of this file.
1 # gsi_ncdiag.py
2 # a collection of classes, and supporting information
3 # to read in GSI netCDF diagnostic files and rewrite them
4 # into JEDI UFO GeoVaLs and IODA observation files
5 ###############################################################################
6 ###############################################################################
7 # dictionaries and lists
8 
9 import os
10 from collections import defaultdict, OrderedDict
11 from orddicts import DefaultOrderedDict
12 
13 import numpy as np
14 import datetime as dt
15 import netCDF4 as nc
16 
17 import ioda_conv_ncio as iconv
18 
19 __ALL__ = ['conv_platforms']
20 
21 conv_platforms = {
22  "conv_ps": [
23  'sfc',
24  'sondes',
25  'sfcship',
26  ],
27  "conv_q": [
28  'aircraft',
29  'sondes',
30  'sfcship',
31  'sfc',
32  ],
33  "conv_t": [
34  'aircraft',
35  'sondes',
36  'rass',
37  'sfcship',
38  'sfc',
39  ],
40  "conv_uv": [
41  'sfc',
42  'aircraft',
43  'sondes',
44  'vadwind',
45  'windprof',
46  'sfcship',
47  'satwind',
48  'scatwind',
49  ],
50  "conv_gps": [
51  'gps',
52  ],
53  "conv_sst": [
54  'sst',
55  ]
56 }
57 
58 # note in python range, last number is not used so second values are +1
59 # bufr codes
60 uv_bufrtypes = {
61  "aircraft": [230, 231, 233, 235], # 234 is TAMDAR; always rstprod
62  "sondes": range(220, 223),
63  "satwind": range(240, 261),
64  "vadwind": [224],
65  "windprof": range(227, 230),
66  "sfcship": [280, 282, 284],
67  "sfc": [281, 287],
68  "scatwind": [290],
69  # 232 are dropsondes
70 }
71 
72 conv_bufrtypes = {
73  "aircraft": [130, 131, 133, 135], # 234 is TAMDAR; always rstprod
74  "sondes": range(120, 123),
75  "rass": [126],
76  "sfcship": [180, 183],
77  "sfc": [181, 187],
78  "gps": [3, 4, 42, 43, 745, 825],
79  "sst": [181, 182, 183, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202],
80  # 132 are dropsondes
81 }
82 
83 # LocKeyList = { 'gsiname':('IODAname','dtype')}
84 all_LocKeyList = {
85  'Station_ID': ('station_id', 'string'),
86  'Time': ('datetime', 'string'),
87  'Latitude': ('latitude', 'float'),
88  'Longitude': ('longitude', 'float'),
89  'Station_Elevation': ('station_elevation', 'float'),
90  'Pressure': ('air_pressure', 'float'),
91  'Height': ('height', 'float'),
92  'Elevation': ('height_above_mean_sea_level', 'float'),
93  'Obs_Time': ('datetime', 'string'),
94  'Scan_Position': ('scan_position', 'float'),
95  'Sat_Zenith_Angle': ('sensor_zenith_angle', 'float'),
96  'Sat_Azimuth_Angle': ('sensor_azimuth_angle', 'float'),
97  'Sol_Zenith_Angle': ('solar_zenith_angle', 'float'),
98  'Sol_Azimuth_Angle': ('solar_azimuth_angle', 'float'),
99  'Scan_Angle': ('sensor_view_angle', 'float'),
100  'Surface_type': ('surface_type', 'integer'),
101  'MODIS_deep_blue_flag': ('modis_deep_blue_flag', 'integer'),
102  'Reference_Pressure': ('air_pressure', 'float'),
103  'Solar_Zenith_Angle': ('solar_zenith_angle', 'float'),
104  'Row_Anomaly_Index': ('row_anomaly_index', 'float'),
105  'TopLevelPressure': ('top_level_pressure', 'float'),
106  'BottomLevelPressure': ('bottom_level_pressure', 'float'),
107  'Total_Ozone_Error_Flag': ('total_ozone_error_flag', 'float'),
108  'Profile_Ozone_Error_Flag': ('profile_ozone_error_flag', 'float'),
109  'XoverR': ('radar_azimuth', 'float'),
110  'YoverR': ('radar_tilt', 'float'),
111  'ZoverR': ('radar_dir3', 'float'),
112  'Vterminal': ('vterminal', 'float'),
113  'SWCM_spec_type': ('satwind_spectral_type', 'float'),
114  'SAZA_sat_zen_angle': ('sensor_zenith_angle', 'float'),
115  'SCCF_chan_wavelen': ('channel_wavelength', 'float'),
116  'QI_with_FC': ('satwind_quality_ind_with_fc', 'float'),
117  'QI_without_FC': ('satwind_quality_ind_no_fc', 'float'),
118  'LaunchTime': ('LaunchTime', 'float'),
119 }
120 
121 checkuv = {
122  "eastward_wind": "u",
123  "northward_wind": "v",
124 }
125 
126 conv_varnames = {
127  "tv": ["virtual_temperature"],
128  "tsen": ["air_temperature"],
129  "uv": ["eastward_wind", "northward_wind"],
130  "ps": ["surface_pressure"],
131  "q": ["specific_humidity"],
132  "bend": ["bending_angle"],
133  "refract": ["refractivity"],
134  "sst": ["sea_surface_temperature"],
135 }
136 
137 conv_gsivarnames = {
138  "tv": ["Observation"],
139  "tsen": ["Observation"],
140  "uv": ["u_Observation", "v_Observation"],
141  "ps": ["Observation"],
142  "q": ["Observation"],
143  "bend": ["Observation"],
144  "refract": ["Observation"],
145  "sst": ["Observation"],
146 }
147 
148 gsi_add_vars_allsky = {
149  'Observation_Type': 'ObsType',
150  'Prep_Use_Flag': 'PreUseFlag',
151  'Analysis_Use_Flag': 'GsiUseFlag',
152  'Nonlinear_QC_Rel_Wgt': 'GsiQCWeight',
153  'Errinv_Adjust': 'GsiAdjustObsError',
154  'Errinv_Final': 'GsiFinalObsError',
155  'Forecast_adjusted': 'GsiHofXBc',
156  'Forecast_unadjusted': 'GsiHofX',
157  'Forecast_unadjusted_clear': 'GsiHofXClr',
158  'Obs_Minus_Forecast_adjusted': 'GsiHofXBc',
159  'Obs_Minus_Forecast_unadjusted': 'GsiHofX',
160  'Obs_Minus_Forecast_unadjusted_clear': 'GsiHofX',
161  'Inverse_Observation_Error': 'GsiFinalObsError',
162  'Bias_Correction': 'GsiBc',
163  'hxdbz': 'GsiHofX',
164  'hxrw': 'GsiHofX',
165 }
166 
167 gsi_add_qcvars_allsky = {
168  'ObsBias': 'GsiObsBias',
169  'Inverse_Observation_Error_after_jsfcchk': 'GsiObsError_after_jsfcchk',
170  'Inverse_Observation_Error_after_sdoei': 'GsiObsError_after_sdoei',
171  'Inverse_Observation_Error_after_grosschk': 'GsiObsError_after_grosschk',
172  'Inverse_Observation_Error_sdoei': 'GsiObsError_sdoei',
173  'Inverse_Observation_Error_grosschk': 'GsiObsError_grosschk',
174 }
175 
176 gsi_add_vars = {
177  'ObsBias': 'GsiObsBias',
178  'Observation_Type': 'ObsType',
179  'Prep_Use_Flag': 'PreUseFlag',
180  'Analysis_Use_Flag': 'GsiUseFlag',
181  'Nonlinear_QC_Rel_Wgt': 'GsiQCWeight',
182  'Errinv_Adjust': 'GsiAdjustObsError',
183  'Errinv_Final': 'GsiFinalObsError',
184  'Obs_Minus_Forecast_adjusted': 'GsiHofXBc',
185  'Obs_Minus_Forecast_unadjusted': 'GsiHofX',
186  'Forecast_adjusted': 'GsiHofXBc',
187  'Forecast_unadjusted': 'GsiHofX',
188  'Inverse_Observation_Error': 'GsiFinalObsError',
189  'Bias_Correction': 'GsiBc',
190  'hxdbz': 'GsiHofX',
191  'hxrw': 'GsiHofX',
192 }
193 
194 gsi_add_qcvars = {
195  'Bias_Correction': 'GsiObsBias',
196  'Inverse_Observation_Error_jsfc': 'GsiObsError_jsfc',
197  'Inverse_Observation_Error_clddet': 'GsiObsError_clddet',
198  'Inverse_Observation_Error_nsstret': 'GsiObsError_nsstret',
199  'Inverse_Observation_Error_grosschk': 'GsiObsError_grosschk',
200  'Inverse_Observation_Error_after_wavenum': 'GsiObsError_after_wavenum',
201  'Inverse_Observation_Error_after_range': 'GsiObsError_after_rangechk',
202  'Inverse_Observation_Error_after_topo': 'GsiObsError_after_topo',
203  'Inverse_Observation_Error_after_transmittop': 'GsiObsError_after_transmittop',
204  'Inverse_Observation_Error_after_clddet': 'GsiObsError_after_clddet',
205  'Inverse_Observation_Error_after_nsstret': 'GsiObsError_after_nsstret',
206  'Inverse_Observation_Error_after_jsfcchk': 'GsiObsError_after_jsfcchk',
207 }
208 
209 gsi_add_vars_uv = {
210  'Observation_Type': 'ObsType',
211  'Prep_Use_Flag': 'PreUseFlag',
212  'Analysis_Use_Flag': 'GsiUseFlag',
213  'Nonlinear_QC_Rel_Wgt': 'GsiQCWeight',
214  'Errinv_Adjust': 'GsiAdjustObsError',
215  'Errinv_Final': 'GsiFinalObsError',
216  'Errinv_Input': 'GsiInputObsError',
217  'u_Forecast_adjusted': 'GsiHofXBc',
218  'u_Forecast_unadjusted': 'GsiHofX',
219  'v_Forecast_adjusted': 'GsiHofXBc',
220  'v_Forecast_unadjusted': 'GsiHofX',
221  'u_Obs_Minus_Forecast_adjusted': 'GsiHofXBc',
222  'u_Obs_Minus_Forecast_unadjusted': 'GsiHofX',
223  'v_Obs_Minus_Forecast_adjusted': 'GsiHofXBc',
224  'v_Obs_Minus_Forecast_unadjusted': 'GsiHofX',
225 }
226 
227 radar_qc = {
228  'obsdbz': 'dbzuse',
229  'obsrw': 'rwuse',
230 }
231 
232 radar_err = {
233  'obsdbz': 'dbzerror',
234  'obsrw': 'rwerror',
235 }
236 
237 # values that should be integers
238 gsiint = [
239  'PreUseFlag',
240  'GsiUseFlag',
241  'ObsType',
242  'Analysis_Use_Flag',
243 ]
244 
245 geovals_metadata_dict = {
246  'Latitude': 'latitude',
247  'Longitude': 'longitude',
248  'Time': 'time',
249  'Obs_Time': 'time',
250 }
251 
252 obsdiag_metadata_dict = {
253  'Latitude': 'latitude',
254  'Longitude': 'longitude',
255  'Time': 'time',
256  'Obs_Time': 'time',
257 }
258 
259 rad_sensors = [
260  'airs',
261  'amsua',
262  'atms',
263  'hirs4',
264  'iasi',
265  'mhs',
266  'seviri',
267  'sndrd1', 'sndrd2', 'sndrd3', 'sndrd4',
268  'cris-fsr',
269  'cris',
270  'ssmis',
271  'abi',
272  'ahi',
273  'avhrr',
274  'avhrr3',
275  'saphir',
276  'gmi',
277  'amsr2',
278 ]
279 
280 radar_sensors = [
281  'radar',
282 ]
283 
284 chan_metadata_dict = {
285  'sensor_chan': 'sensor_channel',
286  'use_flag': 'gsi_use_flag',
287  'frequency': 'sensor_band_central_radiation_frequency',
288  'polarization': 'polarization',
289  'wavenumber': 'sensor_band_central_radiation_wavenumber',
290  'error_variance': 'ObsError',
291  'mean_lapse_rate': 'mean_lapse_rate',
292 }
293 
294 # geovals_vars = {gsiname:geoval_name}
295 geovals_vars = {
296  'virtual_temperature': 'virtual_temperature',
297  'atmosphere_ln_pressure_coordinate': 'atmosphere_ln_pressure_coordinate',
298  'specific_humidity': 'specific_humidity',
299  'northward_wind': 'northward_wind',
300  'eastward_wind': 'eastward_wind',
301  'geopotential_height': 'geopotential_height',
302  'height': 'height_above_mean_sea_level',
303  'tropopause_pressure': 'tropopause_pressure',
304  'surface_pressure': 'surface_pressure',
305  'surface_air_pressure': 'surface_pressure',
306  'surface_temperature': 'surface_temperature',
307  'sea_surface_temperature': 'sea_surface_temperature',
308  'surface_roughness': 'surface_roughness_length',
309  'surface_height': 'surface_geopotential_height',
310  'surface_geopotential_height': 'surface_geopotential_height',
311  'landmask': 'land_area_fraction',
312  'air_temperature': 'air_temperature',
313  'air_pressure': 'air_pressure',
314  'atmosphere_pressure_coordinate': 'air_pressure',
315  'atmosphere_pressure_coordinate_interface': 'air_pressure_levels',
316  'air_pressure_levels': 'air_pressure_levels',
317  'atmosphere_absorber_01': 'humidity_mixing_ratio',
318  'atmosphere_absorber_02': 'mole_fraction_of_carbon_dioxide_in_air',
319  'mole_fraction_of_ozone_in_air': 'mole_fraction_of_ozone_in_air',
320  'atmosphere_absorber_03': 'mole_fraction_of_ozone_in_air',
321  'atmosphere_mass_content_of_cloud_01': 'mass_content_of_cloud_liquid_water_in_atmosphere_layer',
322  'effective_radius_of_cloud_particle_01': 'effective_radius_of_cloud_liquid_water_particle',
323  'atmosphere_mass_content_of_cloud_02': 'mass_content_of_cloud_ice_in_atmosphere_layer',
324  'effective_radius_of_cloud_particle_02': 'effective_radius_of_cloud_ice_particle',
325  'Water_Fraction': 'water_area_fraction',
326  'Land_Fraction': 'land_area_fraction',
327  'Ice_Fraction': 'ice_area_fraction',
328  'Snow_Fraction': 'surface_snow_area_fraction',
329  'Vegetation_Fraction': 'vegetation_area_fraction',
330  'Water_Temperature': 'surface_temperature_where_sea',
331  'Land_Temperature': 'surface_temperature_where_land',
332  'Ice_Temperature': 'surface_temperature_where_ice',
333  'Snow_Temperature': 'surface_temperature_where_snow',
334  'tsavg5': 'average_surface_temperature_within_field_of_view',
335  'Sfc_Wind_Speed': 'surface_wind_speed',
336  'Sfc_Wind_Direction': 'surface_wind_from_direction',
337  'Lai': 'leaf_area_index',
338  'Soil_Moisture': 'volume_fraction_of_condensed_water_in_soil',
339  'Soil_Temperature': 'soil_temperature',
340  'Land_Type_Index': 'land_type_index',
341  'Vegetation_Type': 'vegetation_type_index',
342  'Soil_Type': 'soil_type',
343  'Snow_Depth': 'surface_snow_thickness',
344  'humidity_mixing_ratio': 'humidity_mixing_ratio',
345  'Sfc_Height': 'surface_geopotential_height',
346  'Wind_Reduction_Factor_at_10m': 'wind_reduction_factor_at_10m',
347  'sulf': 'sulf',
348  'bc1': 'bc1',
349  'bc2': 'bc2',
350  'oc1': 'oc1',
351  'oc2': 'oc2',
352  'dust1': 'dust1',
353  'dust2': 'dust2',
354  'dust3': 'dust3',
355  'dust4': 'dust4',
356  'dust5': 'dust5',
357  'seas1': 'seas1',
358  'seas2': 'seas2',
359  'seas3': 'seas3',
360  'seas4': 'seas4',
361  'dbzges': 'equivalent_reflectivity_factor',
362  'upward_air_velocity': 'upward_air_velocity',
363 }
364 
365 obsdiag_vars = {
366  'Jacobian_Surface_Temperature': 'brightness_temperature_jacobian_surface_temperature',
367  'Jacobian_Surface_Emissivity': 'brightness_temperature_jacobian_surface_emissivity',
368  'Jacobian_Temperature': 'brightness_temperature_jacobian_air_temperature',
369  'Jacobian_Moisture': 'brightness_temperature_jacobian_humidity_mixing_ratio',
370  'Layer_Optical_Depth': 'optical_thickness_of_atmosphere_layer',
371  'Layer_to_Space_Transmittance': 'transmittances_of_atmosphere_layer',
372  'Weighting_Function': 'weightingfunction_of_atmosphere_layer',
373  'Pressure_Level_WeightFuncMax': 'pressure_level_at_peak_of_weightingfunction',
374  'Forecast_unadjusted_clear': 'brightness_temperature_assuming_clear_sky',
375 }
376 
377 aod_sensors = [
378  'modis',
379  'viirs',
380 ]
381 
382 oz_sensors = [
383  'gome',
384  'sbuv2',
385  'omi',
386  'ompsnp',
387  'ompstc8',
388  'ompslp',
389  'mls55',
390  'ompsnm',
391 ]
392 
393 # units
394 # 'IODA/UFO_variable_name': 'Unit'
395 units_values = {
396  'virtual_temperature': 'K',
397  'atmosphere_ln_pressure_coordinate': '1',
398  'specific_humidity': '1',
399  'northward_wind': 'm s-1',
400  'eastward_wind': 'm s-1',
401  'geopotential_height': 'm',
402  'height_above_mean_sea_level': 'm',
403  'surface_pressure': 'Pa',
404  'sea_surface_temperature': 'K',
405  'surface_temperature': 'K',
406  'surface_roughness_length': 'm',
407  'surface_geopotential_height': 'm',
408  'land_area_fraction': '1',
409  'air_temperature': 'K',
410  'air_pressure': 'Pa',
411  'air_pressure_levels': 'Pa',
412  'humidity_mixing_ratio': '1',
413  'mole_fraction_of_carbon_dioxide_in_air': '1',
414  'mole_fraction_of_ozone_in_air': '1',
415  'integrated_layer_ozone_in_air': 'DU',
416  'atmosphere_mass_content_of_cloud_liquid_water': 'kg m-2',
417  'effective_radius_of_cloud_liquid_water_particle': 'm',
418  'atmosphere_mass_content_of_cloud_ice': 'kg m-2',
419  'effective_radius_of_cloud_ice_particle': 'm',
420  'water_area_fraction': '1',
421  'land_area_fraction': '1',
422  'ice_area_fraction': '1',
423  'surface_snow_area_fraction': '1',
424  'vegetation_area_fraction': '1',
425  'surface_temperature_where_sea': 'K',
426  'surface_temperature_where_land': 'K',
427  'surface_temperature_where_ice': 'K',
428  'surface_temperature_where_snow': 'K',
429  'surface_wind_speed': 'm s-1',
430  'wind_speed': 'm s-1',
431  'surface_wind_from_direction': 'degree',
432  'leaf_area_index': '1',
433  'volume_fraction_of_condensed_water_in_soil': '1',
434  'soil_temperature': 'K',
435  'land_type_index': '1',
436  'vegetation_type_index': '1',
437  'soil_type': '1',
438  'surface_snow_thickness': 'm',
439  'humidity_mixing_ratio': '1',
440  'wind_reduction_factor_at_10m': '1',
441  'sulf': '1',
442  'bc1': '1',
443  'bc2': '1',
444  'oc1': '1',
445  'oc2': '1',
446  'dust1': '1',
447  'dust2': '1',
448  'dust3': '1',
449  'dust4': '1',
450  'dust5': '1',
451  'seas1': '1',
452  'seas2': '1',
453  'seas3': '1',
454  'seas4': '1',
455  'latitude': 'degrees_north',
456  'longitude': 'degrees_east',
457  'station_elevation': 'm',
458  'height': 'm',
459  'height_above_mean_sea_level': 'm',
460  'scan_position': '1',
461  'sensor_zenith_angle': 'degree',
462  'sensor_azimuth_angle': 'degree',
463  'solar_zenith_angle': 'degree',
464  'solar_azimuth_angle': 'degree',
465  'modis_deep_blue_flag': '1',
466  'row_anomaly_index': '1',
467  'total_ozone_error_flag': '1',
468  'profile_ozone_error_flag': '1',
469  'top_level_pressure': 'Pa',
470  'bottom_level_pressure': 'Pa',
471  'tropopause_pressure': 'Pa',
472  'brightness_temperature_jacobian_surface_temperature': '1',
473  'brightness_temperature_jacobian_surface_emissivity': 'K',
474  'brightness_temperature_jacobian_air_temperature': '1',
475  'brightness_temperature_jacobian_humidity_mixing_ratio': 'K/g/Kg ',
476  'optical_thickness_of_atmosphere_layer': '1',
477  'clw_retrieved_from_observation': 'kg/m/m',
478  'clw_retrieved_from_background': 'kg/m/m',
479  'scat_retrieved_from_observation': '1',
480  'LaunchTime': 'hours',
481 }
482 
483 # @TestReference
484 # fields from GSI to compare to computations done in UFO
485 # Note: For conventional data, the combine script would fail if the
486 # input subset files (_m, and _s) contain test variables.
487 test_fields_conv = {
488  'wind_speed': ('wind_speed', 'float'),
489 }
490 
491 test_fields = {}
492 
493 test_fields_allsky = {
494  'clwp_amsua': ('clw_retrieved_from_observation', 'float'),
495  'clw_guess_retrieval': ('clw_retrieved_from_background', 'float'),
496  'clw_symmetric_amount': ('clw_symmetric_amount', 'float'),
497  'scat_amsua': ('scat_retrieved_from_observation', 'float'),
498 }
499 test_fields_with_channels_allsky = {
500  'Hydrometeor_Affected_Channels': ('Hydrometeor_Affected_Channels', 'float'),
501  'Input_Observation_Error': ('ObsError', 'float'),
502  'Cloud_Match_Index': ('Cloud_Match_Index', 'float'),
503  'Error_Inflation_Factor_sdoei': ('error_inflation_factor_sdoei', 'float'),
504 }
505 test_fields_with_channels = {
506  'Error_Inflation_Factor_Topo': ('error_inflation_factor_topo', 'float'),
507  'Error_Inflation_Factor_Transmittop': ('error_inflation_factor_transmittop', 'float'),
508  'Error_Inflation_Factor_Wavenum': ('error_inflation_factor_wavenum', 'float'),
509  'Error_Inflation_Factor_Jsfc': ('error_inflation_factor_jsfc', 'float'),
510  'Error_Inflation_Factor_Grosschk': ('error_inflation_factor_grosschk', 'float'),
511  'Transmittance_at_Top': ('tao_top', 'float'),
512  'Transmittance_at_Sfc': ('tao_sfc', 'float'),
513  'Cloudy_Channel': ('cloudy_channel', 'integer'),
514  'Transmittance_at_Cloud_Top': ('tao_cldtop', 'float'),
515  'NSST_Retrieval_check': ('nsstret_check', 'integer'),
516 }
517 gmi_chan_dep_loc_vars = {
518  'Sat_Zenith_Angle',
519  'Sat_Azimuth_Angle',
520  'Sol_Zenith_Angle',
521  'Sol_Azimuth_Angle',
522  'Scan_Angle',
523 }
524 
525 
526 class BaseGSI:
527  EPSILON = 9e-12
528  FLOAT_FILL = nc.default_fillvals['f4']
529  INT_FILL = nc.default_fillvals['i4']
530 
531  @staticmethod
532  def _as_array(netcdf_var):
533  return np.array(netcdf_var[:])
534 
535  def var(self, var_name):
536  """
537  Data array. Return a numpy array based on variable name
538  """
539  return self._as_array_as_array(self.df[var_name])
540 
541 
542 # conventional observations
543 class Conv(BaseGSI):
544  """ class Conv - conventional observations
545 
546  Use this class to read in conventional observations
547  from GSI netCDF diag files
548 
549  Functions:
550 
551  Attributes:
552  filename - string path to file
553  validtime - datetime object of valid observation time
554  nobs - number of observations
555  """
556  def __init__(self, filename):
557  self.filenamefilename = filename
558  splitfname = self.filenamefilename.split('/')[-1].split('_')
559  if 'conv' in splitfname:
560  i = splitfname.index('conv')
561  self.obstypeobstype = "_".join(splitfname[i:i + 2])
562  else:
563  raise ValueError("Observation is not a conventional type...")
564  # below is because T has both T and Tv, others should just be 'value'
565  # but flexibility for later (GPS?)
566  if self.obstypeobstype == 'conv_t':
567  self.obsvarsobsvars = ['tv', 'tsen']
568  elif self.obstypeobstype == 'conv_gps':
569  self.obsvarsobsvars = ['bend', 'refract']
570  else:
571  self.obsvarsobsvars = [splitfname[i + 1]]
572 
573  def read(self):
574  # get valid time
575  df = nc.Dataset(self.filenamefilename)
576  tstr = str(df.getncattr('date_time'))
577  self.validtimevalidtime = dt.datetime.strptime(tstr, "%Y%m%d%H")
578  # number of observations
579  self.nobsnobs = len(df['Observation_Type'][:])
580  self.dfdf = df
581 
582  def close(self):
583  self.dfdf.close()
584 
585  def toGeovals(self, OutDir, clobber=True):
586  """ toGeovals(OutDir,clobber=True)
587  if model state fields are in the GSI diag file, create
588  GeoVaLs in an output file for use by JEDI/UFO
589  """
590  # note, this is a temporary construct and thus, there is no
591  # ioda_conv_ncio or equivalent to handle the format
592  # get list of platforms to process for the given obstype
593  try:
594  platforms = conv_platforms[self.obstypeobstype]
595  except BaseException:
596  print(self.obstypeobstype + " is not currently supported. Exiting.")
597  return
598  # loop through obsvariables and platforms to do processing
599  for v in self.obsvarsobsvars:
600  for p in platforms:
601  outname = OutDir + '/' + p + '_' + v + '_geoval_' + \
602  self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
603  if (v == 'sst'):
604  outname = OutDir + '/' + v + '_geoval_' + \
605  self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
606  if (p == 'windprof' or p == 'satwind' or p == 'scatwind' or p == 'vadwind'):
607  outname = OutDir + '/' + p + '_geoval_' + \
608  self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
609  if not clobber:
610  if (os.path.exists(outname)):
611  print("File exists. Skipping and not overwriting:%s" % outname)
612  continue
613  OutVars = []
614  InVars = []
615  for ncv in self.dfdf.variables:
616  if ncv in geovals_vars:
617  OutVars.append(geovals_vars[ncv])
618  InVars.append(ncv)
619 
620  idx = grabobsidx(self.dfdf, p, v)
621  if (np.sum(idx) == 0):
622  print("No matching observations for Platform:%s Var:%s" % (p, v))
623  continue
624  print("Platform:%s Var:%s #Obs:%d" % (p, v, np.sum(idx)))
625  # set up output file
626  ncout = nc.Dataset(outname, 'w', format='NETCDF4')
627  ncout.setncattr(
628  "date_time", np.int32(
629  self.validtimevalidtime.strftime("%Y%m%d%H")))
630  # get nlocs
631  nlocs = np.sum(idx)
632  ncout.createDimension("nlocs", nlocs)
633  # other dims
634  if (v != "sst"):
635  ncout.createDimension(
636  "nlevs", self.dfdf.dimensions["atmosphere_pressure_coordinate_arr_dim"].size)
637  ncout.createDimension(
638  "ninterfaces", self.dfdf.dimensions["atmosphere_pressure_coordinate_interface_arr_dim"].size)
639  dimname = "Station_ID_maxstrlen"
640  ncout.createDimension(dimname, self.dfdf.dimensions[dimname].size)
641  dimname = "Observation_Class_maxstrlen"
642  ncout.createDimension(dimname, self.dfdf.dimensions[dimname].size)
643  for var in self.dfdf.variables.values():
644  vname = var.name
645  if (vname in geovals_metadata_dict.keys()) or (
646  vname in geovals_vars.keys()):
647  vdata = var[...].data
648  dims = tuple([len(self.dfdf.dimensions[d]) for d in var.dimensions])
649  vdata = np.frombuffer(vdata, dtype=var.dtype)
650  vdata = np.reshape(vdata, dims)
651  if vname in geovals_metadata_dict.keys():
652  dims = ("nlocs",) + var.dimensions[1:]
653  var_out = ncout.createVariable(geovals_metadata_dict[vname], vdata.dtype, dims)
654  var_out[...] = vdata[idx, ...]
655  if vname in geovals_vars.keys():
656  if (len(var.dimensions) == 1):
657  dims = ("nlocs",)
658  else:
659  if vname == "atmosphere_pressure_coordinate_interface":
660  dims = ("nlocs", "ninterfaces")
661  else:
662  dims = ("nlocs", "nlevs")
663  var_out = ncout.createVariable(geovals_vars[vname], vdata.dtype, dims)
664  var_out[...] = vdata[idx, ...]
665  ncout.close()
666 
667  def toIODAobs(self, OutDir, clobber=True, platforms=None):
668  """ toIODAobs(OutDir,clobber=True)
669  output observations from the specified GSI diag file
670  to the JEDI/IODA observation format
671  """
672  if not platforms:
673  # get list of platforms to process for the given obstype
674  try:
675  platforms = conv_platforms[self.obstypeobstype]
676  except BaseException:
677  print(self.obstypeobstype + " is not currently supported. Exiting.")
678  return
679  # loop through obsvariables and platforms to do processing
680  for v in self.obsvarsobsvars:
681  for p in platforms:
682  # set up a NcWriter class
683  outname = OutDir + '/' + p + '_' + v + '_obs_' + \
684  self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
685  if (v == 'sst'):
686  outname = OutDir + '/' + v + '_obs_' + \
687  self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
688  if (p == 'windprof' or p == 'satwind' or p == 'scatwind' or p == 'vadwind'):
689  outname = OutDir + '/' + p + '_obs_' + \
690  self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
691  if not clobber:
692  if (os.path.exists(outname)):
693  print("File exists. Skipping and not overwriting: %s" % outname)
694  continue
695  LocKeyList = []
696  TestKeyList = []
697  LocVars = []
698  TestVars = []
699  AttrData = {}
700  varDict = defaultdict(lambda: defaultdict(dict))
701  outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
702  loc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
703  var_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
704  test_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
705  test_fields_ = test_fields_conv
706  # get list of location variable for this var/platform
707  for ncv in self.dfdf.variables:
708  if ncv in all_LocKeyList:
709  LocKeyList.append(all_LocKeyList[ncv])
710  LocVars.append(ncv)
711  # get list of TestReference variables for this var/platform
712  for ncv in self.dfdf.variables:
713  if ncv in test_fields_:
714  TestKeyList.append(test_fields_[ncv])
715  TestVars.append(ncv)
716  # grab obs to process
717  idx = grabobsidx(self.dfdf, p, v)
718  if (np.sum(idx) == 0):
719  print("No matching observations for Platform:%s Var:%s" % (p, v))
720  continue
721  print("Platform:%s Var:%s #Obs:%d" % (p, v, np.sum(idx)))
722 
723  writer = iconv.NcWriter(outname, LocKeyList, TestKeyList=TestKeyList)
724 
725  outvars = conv_varnames[v]
726  for value in outvars:
727  varDict[value]['valKey'] = value, writer.OvalName()
728  varDict[value]['errKey'] = value, writer.OerrName()
729  varDict[value]['qcKey'] = value, writer.OqcName()
730 
731  for o in range(len(outvars)):
732  obsdata = self.varvar(conv_gsivarnames[v][o])[idx]
733  if outvars[o] == 'surface_pressure':
734  if np.max(obsdata) < 1100.:
735  obsdata = obsdata * 100. # convert to Pa from hPa
736  obserr = self.varvar('Errinv_Input')[idx]
737  mask = obserr < self.EPSILONEPSILON
738  obserr[~mask] = 1.0 / obserr[~mask]
739  obserr[mask] = self.FLOAT_FILLFLOAT_FILL
740  obserr[obserr > 4e8] = self.FLOAT_FILLFLOAT_FILL
741  try:
742  obsqc = self.varvar('Prep_QC_Mark')[idx]
743  except BaseException:
744  obsqc = np.ones_like(obsdata) * 2
745  if (v == 'uv'):
746  gsivars = gsi_add_vars_uv
747  else:
748  gsivars = gsi_add_vars
749 
750  for key, value in gsivars.items():
751  if key in self.dfdf.variables:
752  df_key = self.varvar(key)
753  gvname = outvars[o], value
754  # some special actions need to be taken depending on
755  # var name...
756  if ("Forecast" in key) and (v == 'uv'):
757  if (checkuv[outvars[o]] != key[0]):
758  continue
759  if "Errinv" in key:
760  tmp = df_key[idx]
761  mask = tmp < self.EPSILONEPSILON
762  tmp[~mask] = 1.0 / tmp[~mask]
763  tmp[mask] = self.FLOAT_FILLFLOAT_FILL
764  elif "Obs_Minus_" in key:
765  if 'u_Forecast_adjusted' in self.dfdf.variables:
766  continue
767  elif 'Forecast_adjusted' in self.dfdf.variables:
768  continue
769  if v == 'uv':
770  if (checkuv[outvars[o]] != key[0]):
771  continue
772  else:
773  key1 = key[0]+'_Observation'
774  else:
775  key1 = 'Observation'
776  tmp = self.varvar(key1)[idx] - df_key[idx]
777  else:
778  tmp = df_key[idx]
779  if value in gsiint:
780  tmp = tmp.astype(int)
781  tmp[tmp > 4e4] = self.INT_FILLINT_FILL
782  else:
783  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
784  outdata[gvname] = tmp
785  # store values in output data dictionary
786  outdata[varDict[outvars[o]]['valKey']] = obsdata
787  outdata[varDict[outvars[o]]['errKey']] = obserr
788  outdata[varDict[outvars[o]]['qcKey']] = obsqc.astype(int)
789 
790  for lvar in LocVars:
791  loc_mdata_name = all_LocKeyList[lvar][0]
792  if lvar == 'Station_ID':
793  tmp = self.varvar(lvar)[idx]
794  StationIDs = [bytes((b''.join(tmp[a])).decode('iso-8859-1').encode('utf8')) for a in range(len(tmp))]
795  loc_mdata[loc_mdata_name] = writer.FillNcVector(StationIDs, "string")
796  elif lvar == 'Time': # need to process into time stamp strings #"%Y-%m-%dT%H:%M:%SZ"
797  tmp = self.varvar(lvar)[idx]
798  obstimes = [self.validtimevalidtime + dt.timedelta(hours=float(tmp[a])) for a in range(len(tmp))]
799  obstimes = [a.strftime("%Y-%m-%dT%H:%M:%SZ") for a in obstimes]
800  loc_mdata[loc_mdata_name] = writer.FillNcVector(obstimes, "datetime")
801  # special logic for unit conversions depending on GSI version
802  elif lvar == 'Pressure':
803  tmpps = self.varvar(lvar)[idx]
804  if np.max(tmpps) > 1100.:
805  loc_mdata[loc_mdata_name] = tmpps
806  else:
807  loc_mdata[loc_mdata_name] = tmpps * 100. # from hPa to Pa
808  # special logic for missing station_elevation and height for surface obs
809  elif lvar in ['Station_Elevation', 'Height']:
810  if p == 'sfc':
811  tmp = self.varvar(lvar)[idx]
812  tmp[tmp == 9999.] = self.FLOAT_FILLFLOAT_FILL
813  tmp[tmp == 10009.] = self.FLOAT_FILLFLOAT_FILL # for u,v sfc Height values that are 10+9999
814  # GSI sfc obs are at 0m agl, but operator assumes 2m agl, correct output to 2m agl
815  # this is correctly 10m agl though for u,v obs
816  if lvar == 'Height' and self.obstypeobstype in ['conv_t', 'conv_q']:
817  elev = self.varvar('Station_Elevation')[idx]
818  hgt = elev + 2.
819  hgt[hgt > 9998.] = self.FLOAT_FILLFLOAT_FILL
820  tmp = hgt
821  loc_mdata[loc_mdata_name] = tmp
822  elif p == 'sondes' or p == 'aircraft' or p == 'satwind':
823  tmp = self.varvar(lvar)[idx]
824  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL # 1e11 is fill value for sondes, etc.
825  loc_mdata[loc_mdata_name] = tmp
826  else:
827  loc_mdata[loc_mdata_name] = self.varvar(lvar)[idx]
828  else:
829  loc_mdata[loc_mdata_name] = self.varvar(lvar)[idx]
830  # put the TestReference fields in the structure for writing out
831  for tvar in TestVars:
832  if tvar in test_fields_:
833  test_mdata_name = test_fields_[tvar][0]
834  tmp = self.varvar(tvar)[idx]
835  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
836  test_mdata[test_mdata_name] = tmp
837  # record info
838  SIDUnique, idxs, invs = np.unique(StationIDs, return_index=True, return_inverse=True, axis=0)
839  loc_mdata['record_number'] = invs
840 
841  # var metadata
842  var_mdata['variable_names'] = writer.FillNcVector(outvars, "string")
843 
844  AttrData["date_time_string"] = self.validtimevalidtime.strftime("%Y-%m-%dT%H:%M:%SZ")
845 
846  # writer metadata
847  nvars = len(outvars)
848  nlocs = len(StationIDs)
849 
850  writer._nvars = nvars
851  writer._nlocs = nlocs
852 
853  writer.BuildNetcdf(outdata, loc_mdata, var_mdata, AttrData, units_values, test_mdata)
854 
855  print("ProcessedL %d Conventional obs processed to: %s" % (len(obsdata), outname))
856 
857 
858 def grabobsidx(obsdata, platform, var):
859  """ grabobsidx(obsdata,platform,var):
860  obsdata - netCDF dataset object
861  platform - string of observation type: 'sondes','sfc',etc.
862  var - string of variable type: 'tsen','tv','q', etc.
863 
864  returns idx - indices of observations to write out
865  """
866  code = obsdata['Observation_Type'][:]
867  if var in ['tsen', 'tv']:
868  iqt = obsdata['Setup_QC_Mark'][:]
869  if var == 'tsen':
870  idx2 = (iqt != 0)
871  elif var == 'tv':
872  idx2 = (iqt == 0)
873  elif var in ['bend', 'refract']:
874  igps = obsdata['GPS_Type'][:]
875  if var == 'bend':
876  idx2 = (igps != 0)
877  elif var == 'refract':
878  idx2 = (igps == 0)
879  else:
880  # to be consistent
881  idx2 = (code > -999)
882  # grab np logical based off of conv_dicts entry
883  if var == 'uv':
884  codes = uv_bufrtypes[platform]
885  else:
886  codes = conv_bufrtypes[platform]
887  idx = np.logical_and(np.in1d(code, codes), idx2)
888 
889  return idx
890 
891 
892 # satellite radiance observations
894  """ class Radiances - satellite radiance observations
895 
896  Use this class to read in satellite radiance observations
897  from GSI netCDF diag files
898 
899  Functions:
900 
901  Attributes:
902  filename - string path to file
903  validtime - datetime object of valid observation time
904  nobs - number of observations
905 
906 
907  """
908 
909  def __init__(self, filename):
910  self.filenamefilename = filename
911  splitfname = self.filenamefilename.split('/')[-1].split('_')
912  i = False
913  for s in rad_sensors:
914  if s in splitfname:
915  i = splitfname.index(s)
916  self.obstypeobstype = "_".join(splitfname[i:i + 2])
917  if not i:
918  raise ValueError("Observation is not a radiance type...")
919 
920  def read(self):
921  # get valid time
922  df = nc.Dataset(self.filenamefilename)
923  tstr = str(df.getncattr('date_time'))
924  self.validtimevalidtime = dt.datetime.strptime(tstr, "%Y%m%d%H")
925  # sensor and satellite
926  self.sensorsensor = df.getncattr('Observation_type')
927  self.satellitesatellite = df.getncattr('Satellite')
928  # number of observations
929  self.nobsnobs = len(df.dimensions['nobs'])
930  self.nchansnchans = len(df.dimensions['nchans'])
931  self.dfdf = df
932 
933  def close(self):
934  self.dfdf.close()
935 
936  def toGeovals(self, OutDir, clobber=True):
937  """ toGeovals(OutDir,clobber=True)
938  if model state fields are in the GSI diag file, create
939  GeoVaLs in an output file for use by JEDI/UFO
940  """
941  # note, this is a temporary construct and thus, there is no
942  # ioda_conv_ncio or equivalent to handle the format
943  # set up output file
944  outname = OutDir + '/' + self.sensorsensor + '_' + self.satellitesatellite + \
945  '_geoval_' + self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
946  if not clobber:
947  if (os.path.exists(outname)):
948  print("File exists. Skipping and not overwriting:")
949  print(outname)
950  return
951  OutVars = []
952  InVars = []
953  for ncv in self.dfdf.variables:
954  if ncv in geovals_vars:
955  OutVars.append(geovals_vars[ncv])
956  InVars.append(ncv)
957 
958  # set up output file
959  ncout = nc.Dataset(outname, 'w', format='NETCDF4')
960  ncout.setncattr("date_time", np.int32(self.validtimevalidtime.strftime("%Y%m%d%H")))
961  ncout.setncattr("satellite", self.satellitesatellite)
962  ncout.setncattr("sensor", self.sensorsensor)
963  # get nlocs
964  nlocs = self.nobsnobs / self.nchansnchans
965  ncout.createDimension("nlocs", nlocs)
966  # other dims
967  ncout.createDimension("nlevs", self.dfdf.dimensions["air_temperature_arr_dim"].size)
968  ncout.createDimension("nlevsp1", self.dfdf.dimensions["air_pressure_levels_arr_dim"].size)
969 
970  for var in self.dfdf.variables.values():
971  vname = var.name
972  if vname in geovals_metadata_dict.keys():
973  dims = ("nlocs",)
974  var_out = ncout.createVariable(geovals_metadata_dict[vname], var.dtype, dims)
975  vdata = var[:]
976  vdata = vdata[::self.nchansnchans]
977  var_out[:] = vdata
978  elif vname in geovals_vars.keys():
979  if (len(var.dimensions) == 1):
980  dims = ("nlocs",)
981  elif "_levels" in vname:
982  dims = ("nlocs", "nlevsp1")
983  else:
984  dims = ("nlocs", "nlevs")
985  var_out = ncout.createVariable(geovals_vars[vname], var.dtype, dims)
986  vdata = var[...]
987  vdata = vdata[::self.nchansnchans, ...]
988  var_out[...] = vdata
989  else:
990  pass
991  ncout.close()
992 
993  def toObsdiag(self, OutDir, clobber=True):
994  """ toObsdiag(OutDir,clobber=True)
995  if model state fields are in the GSI diag file, create
996  Obsdiag in an output file for use by JEDI/UFO
997  """
998  # note, this is a temporary construct and thus, there is no
999  # ioda_conv_ncio or equivalent to handle the format
1000 
1001  # set up output file
1002  outname = OutDir + '/' + self.sensorsensor + '_' + self.satellitesatellite + \
1003  '_obsdiag_' + self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
1004  if not clobber:
1005  if (os.path.exists(outname)):
1006  print("File exists. Skipping and not overwriting: %s" % outname)
1007  return
1008  OutVars = []
1009  InVars = []
1010  for ncv in self.dfdf.variables:
1011  if ncv in obsdiag_vars:
1012  OutVars.append(obsdiag_vars[ncv])
1013  InVars.append(ncv)
1014 
1015  # set up output file
1016  ncout = nc.Dataset(outname, 'w', format='NETCDF4')
1017  ncout.setncattr("date_time", np.int32(self.validtimevalidtime.strftime("%Y%m%d%H")))
1018  ncout.setncattr("satellite", self.satellitesatellite)
1019  ncout.setncattr("sensor", self.sensorsensor)
1020  # get nlocs
1021  nlocs = self.nobsnobs / self.nchansnchans
1022  ncout.createDimension("nlocs", nlocs)
1023  # other dims
1024  nlevs = self.dfdf.dimensions["air_pressure_arr_dim"].size
1025  nlevsp1 = self.dfdf.dimensions["air_pressure_levels_arr_dim"].size
1026 
1027  ncout.createDimension("nlevs", self.dfdf.dimensions["air_pressure_arr_dim"].size)
1028 
1029  # get channel info and list
1030  chan_number = self.darr('sensor_chan')
1031  chan_number = chan_number[chan_number >= 0]
1032  chan_indx = self.varvar('Channel_Index')
1033  nchans = len(chan_number)
1034  nlocs = int(self.nobsnobs / nchans)
1035  chanlist = chan_number
1036 
1037  # get data
1038  for var in self.dfdf.variables.values():
1039  vname = var.name
1040  if vname in obsdiag_metadata_dict.keys():
1041  dims = ("nlocs",)
1042  var_out = ncout.createVariable(obsdiag_metadata_dict[vname], var.dtype, dims)
1043  vdata = var[:]
1044  vdata = vdata[::self.nchansnchans]
1045  var_out[:] = vdata
1046  elif vname in obsdiag_vars.keys():
1047  # print("toObsdiag: var.shape = ", var.shape)
1048  if (len(var.dimensions) == 1):
1049  dims = ("nlocs",)
1050  for c in range(len(chanlist)):
1051  var_name = obsdiag_vars[vname]+"_"+"{:d}".format(chanlist[c])
1052  idx = chan_indx == c+1
1053  if (np.sum(idx) == 0):
1054  print("No matching observations for: %s" % value)
1055  continue
1056  var_out = ncout.createVariable(var_name, var.dtype, dims)
1057  vdata = var[:]
1058  vdata = vdata[idx]
1059  var_out[:] = vdata
1060  elif "_levels" in vname:
1061  dims = ("nlocs", "nlevsp1")
1062  else:
1063  dims = ("nlocs", "nlevs")
1064  for c in range(len(chanlist)):
1065  var_name = obsdiag_vars[vname]+"_"+"{:d}".format(chanlist[c])
1066  idx = chan_indx == c+1
1067  if (np.sum(idx) == 0):
1068  print("No matching observations for: %s" % value)
1069  continue
1070  var_out = ncout.createVariable(var_name, var.dtype, dims)
1071  vdata = var[...]
1072  vdata = vdata[idx, ...]
1073  var_out[...] = vdata
1074  else:
1075  pass
1076  ncout.close()
1077 
1078  def toIODAobs(self, OutDir, ObsBias, QCVars, TestRefs, clobber=True):
1079  """ toIODAobs(OutDir,clobber=True)
1080  output observations from the specified GSI diag file
1081  to the JEDI/IODA observation format
1082  """
1083 
1084  print("Input Parameters: ObsBias=%s QCVars=%s TestRefs=%s" % (ObsBias, QCVars, TestRefs))
1085  # set up a NcWriter class
1086  outname = OutDir + '/' + self.sensorsensor + '_' + self.satellitesatellite + \
1087  '_obs_' + self.validtimevalidtime.strftime("%Y%m%d%H") + '.nc4'
1088  if not clobber:
1089  if (os.path.exists(outname)):
1090  print("File exists. Skipping and not overwriting: %s" % outname)
1091  return
1092  LocKeyList = []
1093  TestKeyList = []
1094  LocVars = []
1095  TestVars = []
1096  AttrData = {}
1097  varDict = defaultdict(lambda: defaultdict(dict))
1098  outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1099  loc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1100  var_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1101  test_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1102  if self.sensorsensor == "amsua":
1103  test_fields_ = test_fields_allsky
1104  test_fields_with_channels_ = test_fields_with_channels_allsky
1105  elif self.sensorsensor == "atms":
1106  test_fields_ = test_fields_allsky
1107  test_fields_with_channels_ = test_fields_with_channels_allsky
1108  else:
1109  test_fields_ = test_fields
1110  test_fields_with_channels_ = test_fields_with_channels
1111 
1112  # get list of location variable for this var/platform
1113  for ncv in self.dfdf.variables:
1114  if ncv in all_LocKeyList:
1115  LocKeyList.append(all_LocKeyList[ncv])
1116  LocVars.append(ncv)
1117 
1118  # get list of TestReference variables for this var/platform
1119  for ncv in self.dfdf.variables:
1120  if ncv in test_fields_:
1121  TestKeyList.append(test_fields_[ncv])
1122  TestVars.append(ncv)
1123  if ncv in test_fields_with_channels_:
1124  TestKeyList.append(test_fields_with_channels_[ncv])
1125  TestVars.append(ncv)
1126 
1127  # for now, record len is 1 and the list is empty?
1128  if (TestRefs):
1129  writer = iconv.NcWriter(outname, LocKeyList, TestKeyList=TestKeyList)
1130  else:
1131  writer = iconv.NcWriter(outname, LocKeyList)
1132 
1133  chan_number = self.varvar('sensor_chan')
1134  chan_number = chan_number[chan_number >= 0]
1135  chan_indx = self.varvar('Channel_Index')
1136  nchans = len(chan_number)
1137  nlocs = int(self.nobsnobs / nchans)
1138 
1139  chanlist = chan_number
1140  for a in chanlist:
1141  value = "brightness_temperature_{:d}".format(a)
1142  varDict[value]['valKey'] = value, writer.OvalName()
1143  varDict[value]['errKey'] = value, writer.OerrName()
1144  varDict[value]['qcKey'] = value, writer.OqcName()
1145  units_values[value] = 'K'
1146  if (ObsBias):
1147  valuebc = [
1148  "constant_{:d}".format(a),
1149  "zenith_angle_{:d}".format(a),
1150  "cloud_liquid_water_{:d}".format(a),
1151  "lapse_rate_squared_{:d}".format(a),
1152  "lapse_rate_{:d}".format(a),
1153  "cosine_of_latitude_times_orbit_node_{:d}".format(a),
1154  "sine_of_latitude_{:d}".format(a),
1155  "emissivity_{:d}".format(a),
1156  "scan_angle_order_4_{:d}".format(a),
1157  "scan_angle_order_3_{:d}".format(a),
1158  "scan_angle_order_2_{:d}".format(a),
1159  "scan_angle_{:d}".format(a),
1160  ]
1161  ibc = 0
1162  for vbc in valuebc:
1163  varDict[vbc]['bctKey'] = vbc, writer.ObiastermName()
1164  varDict[vbc]['bcpKey'] = vbc, writer.ObiaspredName()
1165  ibc += 1
1166  obsdata = self.varvar('Observation')
1167  try:
1168  obserr = self.varvar('Input_Observation_Error')
1169  except IndexError:
1170  obserr = 1./self.varvar('Inverse_Observation_Error')
1171  obsqc = self.varvar('QC_Flag').astype(int)
1172  if (ObsBias):
1173  nametbc = [
1174  'BC_Constant',
1175  'BC_Scan_Angle',
1176  'BC_Cloud_Liquid_Water',
1177  'BC_Lapse_Rate_Squared',
1178  'BC_Lapse_Rate',
1179  'BC_Cosine_Latitude_times_Node',
1180  'BC_Sine_Latitude',
1181  'BC_Emissivity',
1182  'BC_Scan_Angle_4th_order',
1183  'BC_Scan_Angle_3rd_order',
1184  'BC_Scan_Angle_2nd_order',
1185  'BC_Scan_Angle_1st_order',
1186  ]
1187  namepbc = [
1188  'BCPred_Constant',
1189  'BCPred_Scan_Angle',
1190  'BCPred_Cloud_Liquid_Water',
1191  'BCPred_Lapse_Rate_Squared',
1192  'BCPred_Lapse_Rate',
1193  'BCPred_Cosine_Latitude_times_Node',
1194  'BCPred_Sine_Latitude',
1195  'BCPred_Emissivity',
1196  'BCPred_Scan_Angle_4th_order',
1197  'BCPred_Scan_Angle_3rd_order',
1198  'BCPred_Scan_Angle_2nd_order',
1199  'BCPred_Scan_Angle_1st_order',
1200  ]
1201  obsbiasterm = []
1202  ii = 0
1203  for nbc in nametbc:
1204  obsbiasterm.append(self.varvar(nametbc[ii]))
1205  ii += 1
1206 
1207  obsbiaspred = []
1208  ii = 0
1209  for nbc in namepbc:
1210  obsbiaspred.append(self.varvar(namepbc[ii]))
1211  ii += 1
1212  for lvar in LocVars:
1213  loc_mdata_name = all_LocKeyList[lvar][0]
1214  if lvar == 'Obs_Time':
1215  tmp = self.varvar(lvar)[::nchans]
1216  obstimes = [self.validtimevalidtime + dt.timedelta(hours=float(tmp[a])) for a in range(len(tmp))]
1217  obstimes = [a.strftime("%Y-%m-%dT%H:%M:%SZ") for a in obstimes]
1218  loc_mdata[loc_mdata_name] = writer.FillNcVector(obstimes, "datetime")
1219  elif self.sensorsensor == "gmi" and lvar in gmi_chan_dep_loc_vars:
1220  # Channels 1-9
1221  tmp = self.varvar(lvar)[::nchans]
1222  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1223  loc_mdata[loc_mdata_name] = tmp
1224  # Channels 10-13
1225  tmp = self.varvar(lvar)[nchans-1::nchans]
1226  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1227  loc_mdata[loc_mdata_name+'1'] = tmp
1228  else:
1229  tmp = self.varvar(lvar)[::nchans]
1230  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1231  loc_mdata[loc_mdata_name] = tmp
1232 
1233  # put the TestReference fields in the structure for writing out
1234  for tvar in TestVars:
1235  if tvar in test_fields_with_channels_:
1236  for ii, ch in enumerate(chanlist):
1237  test_mdata_name = test_fields_with_channels_[tvar][0]+"_{:d}".format(ch)
1238  tmp = self.varvar(tvar)[:]
1239  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1240  idx = chan_indx == ii+1
1241  outvals = tmp[idx]
1242  test_mdata[test_mdata_name] = outvals
1243  if tvar in test_fields_:
1244  test_mdata_name = test_fields_[tvar][0]
1245  tmp = self.varvar(tvar)[::nchans]
1246  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1247  test_mdata[test_mdata_name] = tmp
1248 
1249  gsi_add_radvars = gsi_add_vars
1250  if (QCVars):
1251  gsi_add_radvars.update(gsi_add_qcvars)
1252 
1253  if self.sensorsensor == "amsua":
1254  gsi_add_radvars = gsi_add_vars_allsky
1255  if (QCVars):
1256  gsi_add_radvars.update(gsi_add_qcvars_allsky)
1257 
1258  if self.sensorsensor == "atms":
1259  gsi_add_radvars = gsi_add_vars_allsky
1260  if (QCVars):
1261  gsi_add_radvars.update(gsi_add_qcvars_allsky)
1262 
1263  # check for additional GSI output for each variable
1264  for gsivar, iodavar in gsi_add_radvars.items():
1265  if gsivar in self.dfdf.variables:
1266  if "Inverse" in gsivar:
1267  tmp = self.varvar(gsivar)
1268  # fix for if some reason 1/small does not result in inf but zero
1269  mask = tmp < self.EPSILONEPSILON
1270  tmp[~mask] = 1.0 / tmp[~mask]
1271  tmp[mask] = self.FLOAT_FILLFLOAT_FILL
1272  elif "Obs_Minus_" in gsivar:
1273  if 'Forecast_adjusted' in self.dfdf.variables:
1274  continue
1275  key1 = 'Observation'
1276  tmp = self.varvar(key1) - self.varvar(gsivar)
1277  else:
1278  tmp = self.varvar(gsivar)
1279  if gsivar in gsiint:
1280  tmp = tmp.astype(int)
1281  else:
1282  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1283  for ii, ch in enumerate(chanlist):
1284  varname = "brightness_temperature_{:d}".format(ch)
1285  gvname = varname, iodavar
1286  idx = chan_indx == ii+1
1287  outvals = tmp[idx]
1288  outdata[gvname] = outvals
1289 
1290  # loop through channels for subset
1291  var_names = []
1292  for c in range(len(chanlist)):
1293  value = "brightness_temperature_{:d}".format(chanlist[c])
1294  var_names.append(value)
1295  idx = chan_indx == c+1
1296  if (np.sum(idx) == 0):
1297  print("No matching observations for: %s" % value)
1298  continue
1299  obsdatasub = obsdata[idx]
1300  obsdatasub[obsdatasub > 9e5] = self.FLOAT_FILLFLOAT_FILL
1301  obserrsub = obserr[idx]
1302  obsqcsub = obsqc[idx]
1303  obsqcsub[obsdatasub > 9e5] = self.INT_FILLINT_FILL
1304 
1305  # store values in output data dictionary
1306  outdata[varDict[value]['valKey']] = obsdatasub
1307  outdata[varDict[value]['errKey']] = obserrsub
1308  outdata[varDict[value]['qcKey']] = obsqcsub.astype(int)
1309  if (ObsBias):
1310  valuebc = [
1311  "constant_{:d}".format(chanlist[c]),
1312  "zenith_angle_{:d}".format(chanlist[c]),
1313  "cloud_liquid_water_{:d}".format(chanlist[c]),
1314  "lapse_rate_squared_{:d}".format(chanlist[c]),
1315  "lapse_rate_{:d}".format(chanlist[c]),
1316  "cosine_of_latitude_times_orbit_node_{:d}".format(chanlist[c]),
1317  "sine_of_latitude_{:d}".format(chanlist[c]),
1318  "emissivity_{:d}".format(chanlist[c]),
1319  "scan_angle_order_4_{:d}".format(chanlist[c]),
1320  "scan_angle_order_3_{:d}".format(chanlist[c]),
1321  "scan_angle_order_2_{:d}".format(chanlist[c]),
1322  "scan_angle_{:d}".format(chanlist[c]),
1323  ]
1324  ii = 0
1325  for value in valuebc:
1326  obsbiastermsub = obsbiasterm[ii][idx]
1327  obsbiaspredsub = obsbiaspred[ii][idx]
1328  obsbiastermsub[obsbiastermsub > 9e5] = self.FLOAT_FILLFLOAT_FILL
1329  obsbiaspredsub[obsbiaspredsub > 9e5] = self.FLOAT_FILLFLOAT_FILL
1330 
1331  # store values in output data dictionary
1332  outdata[varDict[value]['bctKey']] = obsbiastermsub
1333  outdata[varDict[value]['bcpKey']] = obsbiaspredsub
1334  ii += 1
1335  # var metadata
1336  var_mdata['variable_names'] = writer.FillNcVector(var_names, "string")
1337  for key, value2 in chan_metadata_dict.items():
1338  try:
1339  var_mdata[value2] = self.varvar(key)
1340  except IndexError:
1341  pass
1342 
1343  # dummy record metadata, for now
1344  loc_mdata['record_number'] = np.full((nlocs), 1, dtype='i4')
1345 
1346  # global attributes
1347 
1348  AttrData["date_time_string"] = self.validtimevalidtime.strftime("%Y-%m-%dT%H:%M:%SZ")
1349  AttrData["satellite"] = self.satellitesatellite
1350  AttrData["sensor"] = self.sensorsensor
1351 
1352  # set dimension lengths in the writer since we are bypassing
1353  # ExtractObsData
1354  writer._nvars = nchans
1355  writer._nlocs = nlocs
1356  if (TestRefs):
1357  writer.BuildNetcdf(outdata, loc_mdata, var_mdata,
1358  AttrData, units_values, test_mdata)
1359  else:
1360  writer.BuildNetcdf(outdata, loc_mdata, var_mdata,
1361  AttrData, units_values)
1362 
1363  print("Satellite radiance obs processed, wrote to: %s" % outname)
1364 
1365 
1366 # atmospheric composition observations
1367 
1368 class AOD(BaseGSI):
1369  """ class AOD - aerosol optical depth satellite observations
1370 
1371  Use this class to read in AOD satellite observations
1372  from GSI netCDF diag files
1373 
1374  Functions:
1375  Attributes:
1376  filename - string path to file
1377  validtime - datetime object of valid observation time
1378  nobs - number of observations
1379  """
1380  def __init__(self, filename):
1381  self.filenamefilename = filename
1382  splitfname = self.filenamefilename.split('/')[-1].split('_')
1383  i = False
1384  for s in aod_sensors:
1385  if s in splitfname:
1386  i = splitfname.index(s)
1387  self.obstypeobstype = "_".join(splitfname[i:i+3])
1388  if not self.obstypeobstype:
1389  raise ValueError("Observation is not AOD type...")
1390  # sensor and satellite
1391  self.sensorsensor = splitfname[i]
1392  self.satellitesatellite = splitfname[i+2]
1393 
1394  def read(self):
1395  # get valid time
1396  df = nc.Dataset(self.filenamefilename)
1397  tstr = str(df.getncattr('date_time'))
1398  self.validtimevalidtime = dt.datetime.strptime(tstr, "%Y%m%d%H")
1399  # number of observations
1400  self.nobsnobs = len(df.dimensions['nobs'])
1401  self.nchansnchans = len(df.dimensions['nchans'])
1402  self.dfdf = df
1403 
1404  def toGeovals(self, OutDir, clobber=True):
1405  """ toGeovals(OutDir,clobber=True)
1406  if model state fields are in the GSI diag file, create
1407  GeoVaLs in an output file for use by JEDI/UFO
1408  """
1409  # note, this is a temporary construct and thus, there is no
1410  # ioda_conv_ncio or equivalent to handle the format
1411 
1412  # set up output file
1413  outname = OutDir+'/'+self.obstypeobstype+'_geoval_'+self.validtimevalidtime.strftime("%Y%m%d%H")+'.nc4'
1414  if not clobber:
1415  if (os.path.exists(outname)):
1416  print("File exists. Skipping and not overwriting: %s" % outname)
1417  return
1418  OutVars = []
1419  InVars = []
1420  for ncv in self.dfdf.variables:
1421  if ncv in geovals_vars:
1422  OutVars.append(geovals_vars[ncv])
1423  InVars.append(ncv)
1424 
1425  # set up output file
1426  ncout = nc.Dataset(outname, 'w', format='NETCDF4')
1427  ncout.setncattr("date_time", np.int32(self.validtimevalidtime.strftime("%Y%m%d%H")))
1428  ncout.setncattr("satellite", self.satellitesatellite)
1429  ncout.setncattr("sensor", self.sensorsensor)
1430  # get nlocs
1431  nlocs = self.nobsnobs
1432  ncout.createDimension("nlocs", nlocs)
1433  # other dims
1434  ncout.createDimension("nlevs", self.dfdf.dimensions["air_temperature_arr_dim"].size)
1435  ncout.createDimension("nlevsp1", self.dfdf.dimensions["air_pressure_levels_arr_dim"].size)
1436  for var in self.dfdf.variables.values():
1437  vname = var.name
1438  if vname in geovals_metadata_dict.keys():
1439  dims = ("nlocs",)
1440  var_out = ncout.createVariable(geovals_metadata_dict[vname], var.dtype, dims)
1441  vdata = var[:]
1442  var_out[:] = vdata
1443  elif vname in geovals_vars.keys():
1444  if (len(var.dimensions) == 1):
1445  dims = ("nlocs",)
1446  elif "_levels" in vname:
1447  dims = ("nlocs", "nlevsp1")
1448  else:
1449  dims = ("nlocs", "nlevs")
1450  var_out = ncout.createVariable(geovals_vars[vname], var.dtype, dims)
1451  vdata = var[...]
1452  var_out[...] = vdata
1453  else:
1454  pass
1455  ncout.close()
1456 
1457  def toIODAobs(self, OutDir, clobber=True):
1458  """
1459  toIODAobs(OutDir,clobber=True)
1460  output observations from the specified GSI diag file
1461  to the JEDI/IODA observation format
1462  """
1463  # set up a NcWriter class
1464  outname = OutDir+'/'+self.obstypeobstype+'_obs_'+self.validtimevalidtime.strftime("%Y%m%d%H")+'.nc4'
1465  if not clobber:
1466  if (os.path.exists(outname)):
1467  print("File exists. Skipping and not overwriting:%s" % outname)
1468  return
1469  LocKeyList = []
1470  LocVars = []
1471  AttrData = {}
1472  varDict = defaultdict(lambda: defaultdict(dict))
1473  outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1474  loc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1475  var_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1476  # get list of location variable for this var/platform
1477  for ncv in self.dfdf.variables:
1478  if ncv in all_LocKeyList:
1479  LocKeyList.append(all_LocKeyList[ncv])
1480  LocVars.append(ncv)
1481 
1482  # for now, record len is 1 and the list is empty?
1483  writer = iconv.NcWriter(outname, LocKeyList)
1484 
1485  chan_number = self.varvar('sensor_chan')
1486  chan_number = chan_number[chan_number >= 0]
1487  chan_indx = self.varvar('Channel_Index')
1488  nchans = len(chan_number)
1489  nlocs = self.nobsnobs / nchans
1490  chanlist = chan_indx[:nchans]
1491  for a in chanlist:
1492  value = "aerosol_optical_depth_{:d}".format(a)
1493  varDict[value]['valKey'] = value, writer.OvalName()
1494  varDict[value]['errKey'] = value, writer.OerrName()
1495  varDict[value]['qcKey'] = value, writer.OqcName()
1496 
1497  obsdata = self.varvar('Observation')
1498  obserr = 1.0/self.varvar('Observation_Error')
1499  obsqc = self.varvar('QC_Flag').astype(int)
1500 
1501  gsivars = gsi_add_vars
1502 
1503  # loop through channels for subset
1504  var_names = []
1505  for c in range(len(chanlist)):
1506  value = "aerosol_optical_depth_{:d}".format(chanlist[c])
1507  var_names.append(value)
1508  idx = chan_indx == chanlist[c]
1509  obsdatasub = obsdata[idx]
1510  obserrsub = obserr[idx]
1511  obsqcsub = obsqc[idx]
1512  for lvar in LocVars:
1513  loc_mdata_name = all_LocKeyList[lvar][0]
1514  if lvar == 'Obs_Time':
1515  tmp = self.varvar(lvar)[idx]
1516  obstimes = [self.validtimevalidtime+dt.timedelta(hours=float(tmp[a])) for a in range(len(tmp))]
1517  obstimes = [a.strftime("%Y-%m-%dT%H:%M:%SZ") for a in obstimes]
1518  loc_mdata[loc_mdata_name] = writer.FillNcVector(obstimes, "datetime")
1519  else:
1520  loc_mdata[loc_mdata_name] = self.varvar(lvar)[idx]
1521  gsimeta = {}
1522  for key, value2 in gsivars.items():
1523  # some special actions need to be taken depending on var name...
1524  if "Inverse" in key:
1525  try:
1526  gsimeta[key] = 1.0/self.varvar(key)[idx]
1527  except IndexError:
1528  pass
1529  else:
1530  try:
1531  gsimeta[key] = self.varvar(key)[idx]
1532  except IndexError:
1533  pass
1534 
1535  # store values in output data dictionary
1536  outdata[varDict[value]['valKey']] = obsdatasub
1537  outdata[varDict[value]['errKey']] = obserrsub
1538  outdata[varDict[value]['qcKey']] = obsqcsub
1539 
1540  # add additional GSI variables that are not needed long term but useful for testing
1541  for key, value2 in gsivars.items():
1542  gvname = value, value2
1543  if value2 in gsiint:
1544  try:
1545  outdata[gvname] = gsimeta[key].astype(int)
1546  except KeyError:
1547  pass
1548  else:
1549  try:
1550  outdata[gvname] = gsimeta[key]
1551  except KeyError:
1552  pass
1553 
1554  # var metadata
1555  var_mdata['variable_names'] = writer.FillNcVector(var_names, "string")
1556  for key, value2 in chan_metadata_dict.items():
1557  try:
1558  var_mdata[value2] = self.varvar(key)[:nchans]
1559  except IndexError:
1560  pass
1561 
1562  # dummy record metadata, for now
1563  loc_mdata['record_number'] = np.full((nlocs), 1, dtype='i4')
1564 
1565  # global attributes
1566  AttrData["date_time_string"] = self.validtimevalidtime.strftime("%Y-%m-%dT%H:%M:%SZ")
1567  AttrData["satellite"] = self.satellitesatellite
1568  AttrData["sensor"] = self.sensorsensor
1569 
1570  # set dimension lengths in the writer since we are bypassing ExtractObsData
1571  writer._nvars = nchans
1572  writer._nlocs = nlocs
1573 
1574  writer.BuildNetcdf(outdata, loc_mdata, var_mdata, AttrData, units_values)
1575  print("AOD obs processed, wrote to:%s" % outname)
1576 
1577 
1579  """ class Ozone - ozone satellite observations
1580 
1581  Use this class to read in ozone satellite observations
1582  from GSI netCDF diag files
1583 
1584  Functions:
1585 
1586  Attributes:
1587  filename - string path to file
1588  validtime - datetime object of valid observation time
1589  nobs - number of observations
1590 
1591  """
1592  def __init__(self, filename):
1593  self.filenamefilename = filename
1594  splitfname = self.filenamefilename.split('/')[-1].split('_')
1595  i = False
1596  for s in oz_sensors:
1597  if s in splitfname:
1598  i = splitfname.index(s)
1599  self.obstypeobstype = "_".join(splitfname[i:i+2])
1600  if not i:
1601  raise ValueError("Observation is not an ozone type...")
1602  # sensor and satellite
1603  self.sensorsensor = splitfname[i]
1604  self.satellitesatellite = splitfname[i+1]
1605 
1606  def read(self):
1607  # get valid time
1608  df = nc.Dataset(self.filenamefilename)
1609  tstr = str(df.getncattr('date_time'))
1610  self.validtimevalidtime = dt.datetime.strptime(tstr, "%Y%m%d%H")
1611  # number of observations
1612  self.nobsnobs = len(df.dimensions['nobs'])
1613  self.dfdf = df
1614 
1615  def toGeovals(self, OutDir, clobber=True):
1616  """ toGeovals(OutDir,clobber=True)
1617  if model state fields are in the GSI diag file, create
1618  GeoVaLs in an output file for use by JEDI/UFO
1619  """
1620  # note, this is a temporary construct and thus, there is no
1621  # ioda_conv_ncio or equivalent to handle the format
1622 
1623  # set up output file
1624  outname = OutDir+'/'+self.sensorsensor+'_'+self.satellitesatellite+'_geoval_'+self.validtimevalidtime.strftime("%Y%m%d%H")+'.nc4'
1625  if not clobber:
1626  if (os.path.exists(outname)):
1627  print("File exists. Skipping and not overwriting: %s" % outname)
1628  return
1629  OutVars = []
1630  InVars = []
1631  for ncv in self.dfdf.variables:
1632  if ncv in geovals_vars:
1633  OutVars.append(geovals_vars[ncv])
1634  InVars.append(ncv)
1635 
1636  # set up output file
1637  ncout = nc.Dataset(outname, 'w', format='NETCDF4')
1638  ncout.setncattr("date_time", np.int32(self.validtimevalidtime.strftime("%Y%m%d%H")))
1639  ncout.setncattr("satellite", self.satellitesatellite)
1640  ncout.setncattr("sensor", self.sensorsensor)
1641  # get nlocs
1642  nlocs = self.nobsnobs
1643  ncout.createDimension("nlocs", nlocs)
1644  # other dims
1645  ncout.createDimension("nlevs", self.dfdf.dimensions["mole_fraction_of_ozone_in_air_arr_dim"].size)
1646  if (self.sensorsensor not in ["ompslp", "mls55"]):
1647  ncout.createDimension("nlevsp1", self.dfdf.dimensions["air_pressure_levels_arr_dim"].size)
1648  for var in self.dfdf.variables.values():
1649  vname = var.name
1650  if vname in geovals_metadata_dict.keys():
1651  dims = ("nlocs",)
1652  var_out = ncout.createVariable(geovals_metadata_dict[vname], var.dtype, dims)
1653  vdata = var[:]
1654  var_out[:] = vdata
1655  elif vname in geovals_vars.keys():
1656  if (len(var.dimensions) == 1):
1657  dims = ("nlocs",)
1658  elif "_levels" in vname:
1659  dims = ("nlocs", "nlevsp1")
1660  else:
1661  dims = ("nlocs", "nlevs")
1662  var_out = ncout.createVariable(geovals_vars[vname], var.dtype, dims)
1663  vdata = var[...]
1664  var_out[...] = vdata
1665  else:
1666  pass
1667  ncout.close()
1668 
1669  def toIODAobs(self, OutDir, clobber=True):
1670  """ toIODAobs(OutDir,clobber=True)
1671  output observations from the specified GSI diag file
1672  to the JEDI/IODA observation format
1673  """
1674  # set up a NcWriter class
1675  outname = OutDir+'/'+self.sensorsensor+'_'+self.satellitesatellite+'_obs_'+self.validtimevalidtime.strftime("%Y%m%d%H")+'.nc4'
1676  if not clobber:
1677  if (os.path.exists(outname)):
1678  print("File exists. Skipping and not overwriting: %s" % outname)
1679  return
1680  LocKeyList = []
1681  LocVars = []
1682  AttrData = {}
1683  varDict = defaultdict(lambda: defaultdict(dict))
1684  outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1685  loc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1686  var_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1687  # get list of location variable for this var/platform
1688  for ncv in self.dfdf.variables:
1689  if ncv in all_LocKeyList:
1690  LocKeyList.append(all_LocKeyList[ncv])
1691  LocVars.append(ncv)
1692  # for now, record len is 1 and the list is empty?
1693  writer = iconv.NcWriter(outname, LocKeyList)
1694 
1695  nlocs = self.nobsnobs
1696  vname = "integrated_layer_ozone_in_air"
1697  if (self.sensorsensor in ["ompslp", "mls55"]):
1698  vname = "mole_fraction_of_ozone_in_air"
1699  varDict[vname]['valKey'] = vname, writer.OvalName()
1700  varDict[vname]['errKey'] = vname, writer.OerrName()
1701  varDict[vname]['qcKey'] = vname, writer.OqcName()
1702 
1703  obsdata = self.varvar('Observation')
1704  try:
1705  tmp = self.varvar('Input_Observation_Error')
1706  except IndexError:
1707  tmp = 1./self.varvar('Inverse_Observation_Error')
1708  tmp[tmp < self.EPSILONEPSILON] = 0
1709  obserr = tmp
1710  obserr[np.isinf(obserr)] = self.FLOAT_FILLFLOAT_FILL
1711  obsqc = self.varvar('Analysis_Use_Flag').astype(int)
1712  locKeys = []
1713  for lvar in LocVars:
1714  loc_mdata_name = all_LocKeyList[lvar][0]
1715  if lvar == 'Time':
1716  tmp = self.varvar(lvar)
1717  obstimes = [self.validtimevalidtime+dt.timedelta(hours=float(tmp[a])) for a in range(len(tmp))]
1718  obstimes = [a.strftime("%Y-%m-%dT%H:%M:%SZ") for a in obstimes]
1719  loc_mdata[loc_mdata_name] = writer.FillNcVector(obstimes, "datetime")
1720  else:
1721  tmp = self.varvar(lvar)
1722  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1723  loc_mdata[loc_mdata_name] = tmp
1724 
1725  for gsivar, iodavar in gsi_add_vars.items():
1726  # some special actions need to be taken depending on var name...
1727  if gsivar in self.dfdf.variables:
1728  if "Inverse" in gsivar:
1729  tmp = self.varvar(gsivar)
1730  # fix for if some reason 1/small does not result in inf but zero
1731  mask = tmp < self.EPSILONEPSILON
1732  tmp[~mask] = 1.0 / tmp[~mask]
1733  tmp[mask] = self.FLOAT_FILLFLOAT_FILL
1734  elif "Obs_Minus_" in gsivar:
1735  if 'Forecast_adjusted' in self.dfdf.variables:
1736  continue
1737  key1 = 'Observation'
1738  tmp = self.varvar(key1) - self.varvar(gsivar)
1739  else:
1740  tmp = self.varvar(gsivar)
1741  if gsivar in gsiint:
1742  tmp = tmp.astype(int)
1743  else:
1744  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1745  gvname = vname, iodavar
1746  outdata[gvname] = tmp
1747  # observation data
1748  outdata[varDict[vname]['valKey']] = obsdata
1749  outdata[varDict[vname]['errKey']] = obserr
1750  outdata[varDict[vname]['qcKey']] = obsqc
1751 
1752  # dummy record metadata, for now
1753  loc_mdata['record_number'] = np.full((nlocs), 1, dtype='i4')
1754 
1755  AttrData["date_time_string"] = self.validtimevalidtime.strftime("%Y-%m-%dT%H:%M:%SZ")
1756  AttrData["satellite"] = self.satellitesatellite
1757  AttrData["sensor"] = self.sensorsensor
1758  # set dimension lengths in the writer since we are bypassing
1759  # ExtractObsData
1760  writer._nvars = 1
1761  writer._nlocs = nlocs
1762 
1763  writer.BuildNetcdf(outdata, loc_mdata, var_mdata, AttrData, units_values)
1764  print("Ozone obs processed, wrote to: %s" % outname)
1765 
1766 
1768  """ class Radar - reflectivity and radial wind observations
1769 
1770  Use this class to read in radar observations
1771  from GSI netCDF diag files
1772 
1773  Functions:
1774 
1775  Attributes:
1776  filename - string path to file
1777  validtime - datetime object of valid observation time
1778  nobs - number of observations
1779 
1780  """
1781  def __init__(self, filename):
1782  self.filenamefilename = filename
1783  splitfname = self.filenamefilename.split('/')[-1].split('_')
1784  i = False
1785  for s in radar_sensors:
1786  if s in splitfname:
1787  i = splitfname.index(s)
1788  self.obstypeobstype = "_".join(splitfname[i:i+2])
1789  if not i:
1790  raise ValueError("Observation is not a radar type...")
1791  # sensor and satellite
1792  self.sensorsensor = splitfname[i]
1793  self.obstypeobstype = splitfname[i+1]
1794 
1795  def read(self):
1796  # get valid time
1797  df = nc.Dataset(self.filenamefilename)
1798  tstr = str(df.getncattr('date_time'))
1799  self.validtimevalidtime = dt.datetime.strptime(tstr, "%Y%m%d%H")
1800  # number of observations
1801  self.nobsnobs = len(df.dimensions['nobs'])
1802  self.dfdf = df
1803 
1804  def toGeovals(self, OutDir, clobber=True):
1805  """ toGeovals(OutDir,clobber=True)
1806  if model state fields are in the GSI diag file, create
1807  GeoVaLs in an output file for use by JEDI/UFO
1808  """
1809  # note, this is a temporary construct and thus, there is no
1810  # ioda_conv_ncio or equivalent to handle the format
1811 
1812  # set up output file
1813  outname = OutDir+'/'+self.sensorsensor+'_'+self.obstypeobstype+'_geoval_'+self.validtimevalidtime.strftime("%Y%m%d%H")+'.nc4'
1814  if not clobber:
1815  if (os.path.exists(outname)):
1816  print("File exists. Skipping and not overwriting: %s" % outname)
1817  return
1818  OutVars = []
1819  InVars = []
1820  for ncv in self.dfdf.variables:
1821  if ncv in geovals_vars:
1822  OutVars.append(geovals_vars[ncv])
1823  InVars.append(ncv)
1824 
1825  # set up output file
1826  ncout = nc.Dataset(outname, 'w', format='NETCDF4')
1827  ncout.setncattr("date_time", np.int32(self.validtimevalidtime.strftime("%Y%m%d%H")))
1828  # get nlocs
1829  nlocs = self.nobsnobs
1830  ncout.createDimension("nlocs", nlocs)
1831  # other dims
1832  ncout.createDimension("nlevs", self.dfdf.dimensions["nlevs"].size)
1833  # ncout.createDimension("nlevsp1", self.df.dimensions["air_pressure_levels_arr_dim"].size)
1834  for var in self.dfdf.variables.values():
1835  vname = var.name
1836  if vname in geovals_metadata_dict.keys():
1837  dims = ("nlocs",)
1838  var_out = ncout.createVariable(geovals_metadata_dict[vname], var.dtype, dims)
1839  vdata = var[:]
1840  var_out[:] = vdata
1841  elif vname in geovals_vars.keys():
1842  if (len(var.dimensions) == 1):
1843  dims = ("nlocs",)
1844  elif "_levels" in vname:
1845  dims = ("nlocs", "nlevsp1")
1846  else:
1847  dims = ("nlocs", "nlevs")
1848  var_out = ncout.createVariable(geovals_vars[vname], var.dtype, dims)
1849  vdata = var[...]
1850  var_out[...] = vdata
1851  else:
1852  pass
1853  ncout.close()
1854 
1855  def toIODAobs(self, OutDir, clobber=True):
1856  """ toIODAobs(OutDir,clobber=True)
1857  output observations from the specified GSI diag file
1858  to the JEDI/IODA observation format
1859  """
1860  # set up a NcWriter class
1861  outname = OutDir+'/'+self.sensorsensor+'_'+self.obstypeobstype+'_obs_'+self.validtimevalidtime.strftime("%Y%m%d%H")+'.nc4'
1862  if not clobber:
1863  if (os.path.exists(outname)):
1864  print("File exists. Skipping and not overwriting:%s" % outname)
1865  return
1866  LocKeyList = []
1867  LocVars = []
1868  AttrData = {}
1869  varDict = defaultdict(lambda: defaultdict(dict))
1870  outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1871  loc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1872  var_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
1873  # get list of location variable for this var/platform
1874  for ncv in self.dfdf.variables:
1875  if ncv in all_LocKeyList:
1876  LocKeyList.append(all_LocKeyList[ncv])
1877  LocVars.append(ncv)
1878  # for now, record len is 1 and the list is empty?
1879  writer = iconv.NcWriter(outname, LocKeyList)
1880 
1881  nlocs = self.nobsnobs
1882  if self.obstypeobstype == "dbz":
1883  radar_varnames = {
1884  'obsdbz': 'equivalent_reflectivity_factor',
1885  }
1886  elif self.obstypeobstype == "rw":
1887  radar_varnames = {
1888  'obsrw': 'radial_velocity',
1889  }
1890 
1891  for key, value in radar_varnames.items():
1892  varDict[value]['valKey'] = value, writer.OvalName()
1893  varDict[value]['errKey'] = value, writer.OerrName()
1894  varDict[value]['qcKey'] = value, writer.OqcName()
1895 
1896  obsdata = self.varvar(key)
1897  errvarname = radar_err[key]
1898  qcvarname = radar_qc[key]
1899  obserr = self.varvar(errvarname)
1900  obserr[np.isinf(obserr)] = self.FLOAT_FILLFLOAT_FILL
1901  obsqc = self.varvar(qcvarname).astype(int)
1902  # observation data
1903  outdata[varDict[value]['valKey']] = obsdata
1904  outdata[varDict[value]['errKey']] = obserr
1905  outdata[varDict[value]['qcKey']] = obsqc
1906  vname = value
1907  for gsivar, iodavar in gsi_add_vars.items():
1908  # some special actions need to be taken depending on var name...
1909  if gsivar in self.dfdf.variables:
1910  if "Inverse" in gsivar:
1911  tmp = self.varvar(gsivar)
1912  # fix for if some reason 1/small does not result in inf but zero
1913  mask = tmp < self.EPSILONEPSILON
1914  tmp[~mask] = 1.0 / tmp[~mask]
1915  tmp[mask] = self.FLOAT_FILLFLOAT_FILL
1916  else:
1917  tmp = self.varvar(gsivar)[:]
1918  if gsivar in gsiint:
1919  tmp = tmp.astype(int)
1920  else:
1921  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1922  gvname = vname, iodavar
1923  outdata[gvname] = tmp
1924  locKeys = []
1925  for lvar in LocVars:
1926  loc_mdata_name = all_LocKeyList[lvar][0]
1927  if lvar == 'Time':
1928  tmp = self.varvar(lvar)[:]
1929  obstimes = [self.validtimevalidtime+dt.timedelta(hours=float(tmp[a])) for a in range(len(tmp))]
1930  obstimes = [a.strftime("%Y-%m-%dT%H:%M:%SZ") for a in obstimes]
1931  loc_mdata[loc_mdata_name] = writer.FillNcVector(obstimes, "datetime")
1932  else:
1933  tmp = self.varvar(lvar)[:]
1934  tmp[tmp > 4e8] = self.FLOAT_FILLFLOAT_FILL
1935  loc_mdata[loc_mdata_name] = tmp
1936 
1937  # dummy record metadata, for now
1938  loc_mdata['record_number'] = np.full((nlocs), 1, dtype='i4')
1939 
1940  AttrData["date_time_string"] = self.validtimevalidtime.strftime("%Y-%m-%dT%H:%M:%SZ")
1941  AttrData["sensor"] = self.sensorsensor
1942  # set dimension lengths in the writer since we are bypassing
1943  # ExtractObsData
1944  writer._nvars = 1
1945  writer._nlocs = nlocs
1946 
1947  writer.BuildNetcdf(outdata, loc_mdata, var_mdata, AttrData, units_values)
1948  print("Radar obs processed, wrote to: %s" % outname)
def __init__(self, filename)
Definition: gsi_ncdiag.py:1380
def read(self)
Definition: gsi_ncdiag.py:1394
def toIODAobs(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:1457
def toGeovals(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:1404
def _as_array(netcdf_var)
Definition: gsi_ncdiag.py:532
def var(self, var_name)
Definition: gsi_ncdiag.py:535
def __init__(self, filename)
Definition: gsi_ncdiag.py:556
def toIODAobs(self, OutDir, clobber=True, platforms=None)
Definition: gsi_ncdiag.py:667
def close(self)
Definition: gsi_ncdiag.py:582
def toGeovals(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:585
def read(self)
Definition: gsi_ncdiag.py:573
def __init__(self, filename)
Definition: gsi_ncdiag.py:1592
def read(self)
Definition: gsi_ncdiag.py:1606
def toIODAobs(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:1669
def toGeovals(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:1615
def toIODAobs(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:1855
def toGeovals(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:1804
def read(self)
Definition: gsi_ncdiag.py:1795
def __init__(self, filename)
Definition: gsi_ncdiag.py:1781
def toIODAobs(self, OutDir, ObsBias, QCVars, TestRefs, clobber=True)
Definition: gsi_ncdiag.py:1078
def __init__(self, filename)
Definition: gsi_ncdiag.py:909
def toObsdiag(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:993
def toGeovals(self, OutDir, clobber=True)
Definition: gsi_ncdiag.py:936
def grabobsidx(obsdata, platform, var)
Definition: gsi_ncdiag.py:858
void encode(DataHandle &out, const std::vector< ColumnInfo > &columns, const std::vector< ConstStridedData > &data, const std::map< std::string, std::string > &properties, size_t maxRowsPerFrame)
Definition: Odb.cc:400