IODA Bundle
gds2_sst2ioda.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 #
4 # (C) Copyright 2019 UCAR
5 #
6 # This software is licensed under the terms of the Apache Licence Version 2.0
7 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8 #
9 
10 from __future__ import print_function
11 import sys
12 import argparse
13 import netCDF4 as nc
14 from datetime import datetime, timedelta
15 import dateutil.parser
16 import numpy as np
17 from multiprocessing import Pool
18 import os
19 from pathlib import Path
20 
21 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
22 if not IODA_CONV_PATH.is_dir():
23  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
24 sys.path.append(str(IODA_CONV_PATH.resolve()))
25 
26 import ioda_conv_ncio as iconv
27 
28 output_var_names = [
29  "sea_surface_temperature",
30  "sea_surface_skin_temperature"]
31 
32 
33 def read_input(input_args):
34  """
35  Reads/converts a single input file, performing optional thinning also.
36 
37  Arguments:
38 
39  input_args: A tuple of input filename and global_config
40  input_file: The name of file to read
41  global_config: structure for global arguments related to read
42 
43  Returns:
44 
45  A tuple of (obs_data, loc_data, attr_data) needed by the IODA writer.
46  """
47  input_file = input_args[0]
48  global_config = input_args[1]
49 
50  print("Reading ", input_file)
51  ncd = nc.Dataset(input_file, 'r')
52 
53  # get the base time (should only have 1 or 2 time slots)
54  time_base = ncd.variables['time'][:]
55  basetime = dateutil.parser.parse(ncd.variables['time'].units[-20:])
56 
57  # get some of the global attributes that we are interested in
58  attr_data = {}
59  for v in ('platform', 'sensor', 'processing_level'):
60  attr_data[v] = ncd.getncattr(v)
61 
62  # get the QC flags, and calculate a mask from the non-missing values
63  # (since L3 files are mostly empty, fields need a mask applied immediately
64  # to avoid using way too much memory)
65  data_in = {}
66  data_in['quality_level'] = ncd.variables['quality_level'][:].ravel()
67  mask = data_in['quality_level'] >= 0
68  data_in['quality_level'] = data_in['quality_level'][mask]
69 
70  # Determine the lat/lon grid.
71  # If L3, we need to convert 1D lat/lon to 2D lat/lon.
72  # If len(time) > 1, also need to repeat the lat/lon vals
73  lons = ncd.variables['lon'][:].ravel()
74  lats = ncd.variables['lat'][:].ravel()
75  if attr_data['processing_level'][:2] == 'L3':
76  len_grid = len(lons)*len(lats)
77  lons, lats = np.meshgrid(lons, lats, copy=False)
78  lons = np.tile(lons.ravel(), len(time_base)).ravel()[mask]
79  lats = np.tile(lats.ravel(), len(time_base)).ravel()[mask]
80  else:
81  len_grid = len(lons)
82  lons = np.tile(lons, len(time_base)).ravel()[mask]
83  lats = np.tile(lats, len(time_base)).ravel()[mask]
84 
85  # calculate the basetime offsets
86  time = np.tile(np.atleast_2d(time_base).T, (1, len_grid)).ravel()[mask]
87 
88  # load in all the other data and apply the missing value mask
89  input_vars = (
90  'quality_level',
91  'sst_dtime',
92  'sses_bias',
93  'sses_standard_deviation',
94  'sea_surface_temperature')
95  for v in input_vars:
96  if v not in data_in:
97  data_in[v] = ncd.variables[v][:].ravel()[mask]
98  ncd.close()
99 
100  # Create a mask for optional random thinning
101  np.random.seed(
102  int((global_config['date']-datetime(1970, 1, 1)).total_seconds()))
103  mask = np.random.uniform(size=len(lons)) > global_config['thin']
104 
105  # also, sometimes certain input variables have their own mask due to
106  # missing values
107  for v in input_vars:
108  if np.ma.is_masked(data_in[v]):
109  mask = np.logical_and(mask, np.logical_not(data_in[v].mask))
110 
111  # apply the masks for thinning and missing values
112  time = time[mask]
113  lons = lons[mask]
114  lats = lats[mask]
115  for v in input_vars:
116  data_in[v] = data_in[v][mask]
117 
118  # create a string version of the date for each observation
119  dates = []
120  for i in range(len(lons)):
121  obs_date = basetime + \
122  timedelta(seconds=float(time[i]+data_in['sst_dtime'][i]))
123  dates.append(obs_date.strftime("%Y-%m-%dT%H:%M:%SZ"))
124 
125  # calculate output values
126  # Note: the qc flags in GDS2.0 run from 0 to 5, with higher numbers
127  # being better. IODA typically expects 0 to be good, and higher numbers
128  # are bad, so the qc flags flipped here.
129  # TODO change everything in soca to handle K instead of C ?
130  val_sst_skin = data_in['sea_surface_temperature'] - 273.15
131  val_sst = val_sst_skin - data_in['sses_bias']
132  err = data_in['sses_standard_deviation']
133  qc = 5 - data_in['quality_level']
134 
135  # allocate space for output depending on which variables are to be saved
136  num_vars = 0
137  obs_dim = (len(lons))
138  obs_data = {}
139  if global_config['output_sst']:
140  obs_data[(output_var_names[0], global_config['oval_name'])] = np.zeros(obs_dim),
141  obs_data[(output_var_names[0], global_config['oerr_name'])] = np.zeros(obs_dim),
142  obs_data[(output_var_names[0], global_config['opqc_name'])] = np.zeros(obs_dim),
143  num_vars += 1
144  if global_config['output_skin_sst']:
145  obs_data[(output_var_names[1], global_config['oval_name'])] = np.zeros(obs_dim),
146  obs_data[(output_var_names[1], global_config['oerr_name'])] = np.zeros(obs_dim),
147  obs_data[(output_var_names[1], global_config['opqc_name'])] = np.zeros(obs_dim),
148  num_vars += 1
149 
150  # create the final output structures
151  loc_data = {
152  'latitude': lats,
153  'longitude': lons,
154  'datetime': dates,
155  }
156  if global_config['output_sst']:
157  obs_data[(output_var_names[0], global_config['oval_name'])] = val_sst
158  obs_data[(output_var_names[0], global_config['oerr_name'])] = err
159  obs_data[(output_var_names[0], global_config['opqc_name'])] = qc
160  if global_config['output_skin_sst']:
161  obs_data[(output_var_names[1], global_config['oval_name'])] = val_sst_skin
162  obs_data[(output_var_names[1], global_config['oerr_name'])] = err
163  obs_data[(output_var_names[1], global_config['opqc_name'])] = qc
164 
165  return (obs_data, loc_data, attr_data)
166 
167 
168 def main():
169 
170  # Get command line arguments
171  parser = argparse.ArgumentParser(
172  description=(
173  'Reads the sea surface temperature from any GHRRST Data '
174  ' Specification (GDS2.0) formatted L2 or L3 file(s) and converts'
175  ' into IODA formatted output files. Multiple files are'
176  ' concatenated and optional thinning can be performed.')
177  )
178 
179  required = parser.add_argument_group(title='required arguments')
180  required.add_argument(
181  '-i', '--input',
182  help="path of GHRSST GDS2.0 SST observation input file(s)",
183  type=str, nargs='+', required=True)
184  required.add_argument(
185  '-o', '--output',
186  help="path of IODA output file",
187  type=str, required=True)
188  required.add_argument(
189  '-d', '--date',
190  metavar="YYYYMMDDHH",
191  help="base date for the center of the window",
192  type=str, required=True)
193 
194  optional = parser.add_argument_group(title='optional arguments')
195  optional.add_argument(
196  '-t', '--thin',
197  help="percentage of random thinning, from 0.0 to 1.0. Zero indicates"
198  " no thinning is performed. (default: %(default)s)",
199  type=float, default=0.0)
200  optional.add_argument(
201  '--threads',
202  help='multiple threads can be used to load input files in parallel.'
203  ' (default: %(default)s)',
204  type=int, default=1)
205  optional.add_argument(
206  '--sst',
207  help='if set, only the bias corrected bulk sst is output.'
208  ' Otherwise, bulk sst, and skin sst are both output.',
209  action='store_true')
210  optional.add_argument(
211  '--skin_sst',
212  help='if set, only the skin or subskin sst is output.'
213  ' Otherwise, bulk sst, and skin sst are both output.',
214  action='store_true')
215 
216  args = parser.parse_args()
217  args.date = datetime.strptime(args.date, '%Y%m%d%H')
218  if not args.sst and not args.skin_sst:
219  args.sst = True
220  args.skin_sst = True
221 
222  # setup the IODA writer
223  writer = iconv.NcWriter(args.output, [], [])
224 
225  # Setup the configuration that is passed to each worker process
226  # Note: Pool.map creates separate processes, and can only take iterable
227  # objects. Rather than using global variables, embed them into
228  # the iterable object together with argument array passed in (args.input)
229  global_config = {}
230  global_config['date'] = args.date
231  global_config['thin'] = args.thin
232  global_config['oval_name'] = writer.OvalName()
233  global_config['oerr_name'] = writer.OerrName()
234  global_config['opqc_name'] = writer.OqcName()
235  global_config['output_sst'] = args.sst
236  global_config['output_skin_sst'] = args.skin_sst
237  pool_inputs = [(i, global_config) for i in args.input]
238 
239  # read / process files in parallel
240  pool = Pool(args.threads)
241  obs = pool.map(read_input, pool_inputs)
242 
243  # concatenate the data from the files
244  obs_data, loc_data, attr_data = obs[0]
245  loc_data['datetime'] = writer.FillNcVector(
246  loc_data['datetime'], "datetime")
247  for i in range(1, len(obs)):
248  for k in obs_data:
249  axis = len(obs[i][0][k].shape)-1
250  obs_data[k] = np.concatenate(
251  (obs_data[k], obs[i][0][k]), axis=axis)
252  for k in loc_data:
253  d = obs[i][1][k]
254  if k == 'datetime':
255  d = writer.FillNcVector(d, 'datetime')
256  loc_data[k] = np.concatenate((loc_data[k], d), axis=0)
257 
258  # prepare global attributes we want to output in the file,
259  # in addition to the ones already loaded in from the input file
260  attr_data['date_time_string'] = args.date.strftime("%Y-%m-%dT%H:%M:%SZ")
261  attr_data['thinning'] = args.thin
262  attr_data['converter'] = os.path.basename(__file__)
263 
264  # determine which variables we are going to output
265  selected_names = []
266  if args.sst:
267  selected_names.append(output_var_names[0])
268  if args.skin_sst:
269  selected_names.append(output_var_names[1])
270  var_data = {writer._var_list_name: writer.FillNcVector(
271  selected_names, "string")}
272 
273  # pass parameters to the IODA writer
274  # (needed because we are bypassing ExtractObsData within BuildNetcdf)
275  writer._nvars = len(selected_names)
276  writer._nlocs = obs_data[(selected_names[0], 'ObsValue')].shape[0]
277 
278  # use the writer class to create the final output file
279  writer.BuildNetcdf(obs_data, loc_data, var_data, attr_data)
280 
281 
282 if __name__ == '__main__':
283  main()
def read_input(input_args)