IODA Bundle
viirs_modis_l2_oc2ioda.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 #
4 # (C) Copyright 2020 UCAR
5 #
6 # This software is licensed under the terms of the Apache Licence Version 2.0
7 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8 #
9 
10 from __future__ import print_function
11 import sys
12 import argparse
13 import netCDF4 as nc
14 from datetime import datetime, timedelta
15 import dateutil.parser
16 import numpy as np
17 from multiprocessing import Pool
18 import os
19 from pathlib import Path
20 
21 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
22 if not IODA_CONV_PATH.is_dir():
23  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
24 sys.path.append(str(IODA_CONV_PATH.resolve()))
25 
26 import ioda_conv_ncio as iconv
27 
28 output_var_names = [
29  "ocean_mass_content_of_particulate_organic_matter_expressed_as_carbon",
30  "mass_concentration_of_chlorophyll_in_sea_water"]
31 
32 
33 def read_input(input_args):
34  """
35  Reads/converts a single input file, performing optional thinning also.
36 
37  Arguments:
38 
39  input_args: A tuple of input filename and global_config
40  input_file: The name of file to read
41  global_config: structure for global arguments related to read
42 
43  Returns:
44 
45  A tuple of (obs_data, loc_data, attr_data) needed by the IODA writer.
46  """
47  input_file = input_args[0]
48  global_config = input_args[1]
49 
50  print("Reading ", input_file)
51  ncd = nc.Dataset(input_file, 'r')
52 
53  # get global attributes
54  attr_data = {}
55  for v in ('platform', 'instrument', 'processing_level'):
56  attr_data[v] = ncd.getncattr(v)
57 
58  # get QC flags, and calculate a mask from the non-missing values
59  # since L2 OC files are quite empty, need a mask applied immediately
60  # to avoid using too much memory)
61  gpd = ncd.groups['geophysical_data']
62  data_in = {}
63  data_in['l2_flags'] = gpd.variables['l2_flags'][:].ravel()
64  mask = data_in['l2_flags'] < 1073741824
65  data_in['l2_flags'] = data_in['l2_flags'][mask]
66 
67  # Determine the lat/lon grid
68  ngd = ncd.groups['navigation_data']
69  number_of_lines = len(ngd.variables['longitude'][:, 1])
70  pixels_per_line = len(ngd.variables['longitude'][1, :])
71  lons = ngd.variables['longitude'][:].ravel()[mask]
72  lats = ngd.variables['latitude'][:].ravel()[mask]
73 
74  # get basetime and time as a difference in seconds
75  sla = ncd.groups['scan_line_attributes']
76  basetime = datetime(sla.variables['year'][0], 1, 1) + \
77  timedelta(days=int(sla.variables['day'][0]-1),
78  milliseconds=int(sla.variables['msec'][0]))
79  time = (np.repeat(sla.variables['msec'][:].ravel(),
80  pixels_per_line).ravel() - sla.variables['msec'][0])/1000.0
81  data_in['time'] = time[mask]
82 
83  # load in all the other data and apply the missing value mask
84  input_vars = ('poc', 'chlor_a')
85  for v in input_vars:
86  if v not in data_in:
87  data_in[v] = gpd.variables[v][:].ravel()[mask]
88  ncd.close()
89 
90  # Create a mask for optional random thinning
91  np.random.seed(
92  int((global_config['date']-datetime(1970, 1, 1)).total_seconds()))
93  mask = np.random.uniform(size=len(lons)) > global_config['thin']
94 
95  # apply the masks for thinning and missing values
96  lons = lons[mask]
97  lats = lats[mask]
98  data_in['time'] = data_in['time'][mask]
99  data_in['l2_flags'] = data_in['l2_flags'][mask]
100  for v in input_vars:
101  data_in[v] = data_in[v][mask]
102 
103  # create a string version of the date for each observation
104  dates = []
105  for i in range(len(lons)):
106  obs_date = basetime + timedelta(seconds=float(data_in['time'][i]))
107  dates.append(obs_date.strftime("%Y-%m-%dT%H:%M:%SZ"))
108 
109  # allocate space for output depending on which variables are to be saved
110  num_vars = 0
111  obs_dim = (len(lons))
112  obs_data = {}
113  if global_config['output_poc']:
114  obs_data[(output_var_names[0], global_config['oval_name'])] = \
115  np.zeros(obs_dim),
116  obs_data[(output_var_names[0], global_config['oerr_name'])] = \
117  np.zeros(obs_dim),
118  obs_data[(output_var_names[0], global_config['opqc_name'])] = \
119  np.zeros(obs_dim),
120  num_vars += 1
121  if global_config['output_chl']:
122  obs_data[(output_var_names[1], global_config['oval_name'])] = \
123  np.zeros(obs_dim),
124  obs_data[(output_var_names[1], global_config['oerr_name'])] = \
125  np.zeros(obs_dim),
126  obs_data[(output_var_names[1], global_config['opqc_name'])] = \
127  np.zeros(obs_dim),
128  num_vars += 1
129 
130  # create the final output structures
131  loc_data = {
132  'latitude': lats,
133  'longitude': lons,
134  'datetime': dates,
135  }
136  if global_config['output_poc']:
137  obs_data[(output_var_names[0], global_config['oval_name'])] = \
138  data_in['poc']
139  obs_data[(output_var_names[0], global_config['oerr_name'])] = \
140  data_in['poc']*0.0
141  obs_data[(output_var_names[0], global_config['opqc_name'])] = \
142  data_in['l2_flags']
143  if global_config['output_chl']:
144  obs_data[(output_var_names[1], global_config['oval_name'])] = \
145  data_in['chlor_a']
146  obs_data[(output_var_names[1], global_config['oerr_name'])] = \
147  data_in['chlor_a']*0.0
148  obs_data[(output_var_names[1], global_config['opqc_name'])] = \
149  data_in['l2_flags']
150 
151  return (obs_data, loc_data, attr_data)
152 
153 
154 def main():
155 
156  # Get command line arguments
157  parser = argparse.ArgumentParser(
158  description=(
159  'Reads the particulate organic carbon and chlorophyll data from'
160  ' VIIRS/MODIS Specification formatted L2 file(s) and converts'
161  ' into IODA formatted output files. Multiple files are'
162  ' concatenated and optional thinning can be performed.')
163  )
164 
165  required = parser.add_argument_group(title='required arguments')
166  required.add_argument(
167  '-i', '--input',
168  help="path of VIIRS/MODIS L2 Ocean Color observation input file(s)",
169  type=str, nargs='+', required=True)
170  required.add_argument(
171  '-o', '--output',
172  help="path of IODA output file",
173  type=str, required=True)
174  required.add_argument(
175  '-d', '--date',
176  metavar="YYYYMMDDHH",
177  help="base date for the center of the window",
178  type=str, required=True)
179 
180  optional = parser.add_argument_group(title='optional arguments')
181  optional.add_argument(
182  '-t', '--thin',
183  help="percentage of random thinning, from 0.0 to 1.0. Zero indicates"
184  " no thinning is performed. (default: %(default)s)",
185  type=float, default=0.0)
186  optional.add_argument(
187  '--threads',
188  help='multiple threads can be used to load input files in parallel.'
189  ' (default: %(default)s)',
190  type=int, default=1)
191  optional.add_argument(
192  '--poc',
193  help='if set, only poc is output.'
194  ' Otherwise, poc and chlor_a are both output.',
195  action='store_true')
196  optional.add_argument(
197  '--chl',
198  help='if set, only chlor_a (OCI algorithm) is output.'
199  ' Otherwise, poc and chlor_a are both output.',
200  action='store_true')
201 
202  args = parser.parse_args()
203  args.date = datetime.strptime(args.date, '%Y%m%d%H')
204  if not args.chl and not args.poc:
205  args.chl = True
206  args.poc = True
207 
208  # setup the IODA writer
209  writer = iconv.NcWriter(args.output, [], [])
210 
211  # Setup the configuration that is passed to each worker process
212  # Note: Pool.map creates separate processes, and can only take iterable
213  # objects. Rather than using global variables, embed them into
214  # the iterable object together with argument array passed in (args.input)
215  global_config = {}
216  global_config['date'] = args.date
217  global_config['thin'] = args.thin
218  global_config['oval_name'] = writer.OvalName()
219  global_config['oerr_name'] = writer.OerrName()
220  global_config['opqc_name'] = writer.OqcName()
221  global_config['output_poc'] = args.poc
222  global_config['output_chl'] = args.chl
223  pool_inputs = [(i, global_config) for i in args.input]
224 
225  # read / process files in parallel
226  pool = Pool(args.threads)
227  obs = pool.map(read_input, pool_inputs)
228 
229  # concatenate the data from the files
230  obs_data, loc_data, attr_data = obs[0]
231  loc_data['datetime'] = writer.FillNcVector(
232  loc_data['datetime'], "datetime")
233  for i in range(1, len(obs)):
234  for k in obs_data:
235  axis = len(obs[i][0][k].shape)-1
236  obs_data[k] = np.concatenate(
237  (obs_data[k], obs[i][0][k]), axis=axis)
238  for k in loc_data:
239  d = obs[i][1][k]
240  if k == 'datetime':
241  d = writer.FillNcVector(d, 'datetime')
242  loc_data[k] = np.concatenate((loc_data[k], d), axis=0)
243 
244  # prepare global attributes we want to output in the file,
245  # in addition to the ones already loaded in from the input file
246  attr_data['date_time_string'] = args.date.strftime("%Y-%m-%dT%H:%M:%SZ")
247  attr_data['thinning'] = args.thin
248  attr_data['converter'] = os.path.basename(__file__)
249 
250  # determine which variables we are going to output
251  selected_names = []
252  if args.poc:
253  selected_names.append(output_var_names[0])
254  if args.chl:
255  selected_names.append(output_var_names[1])
256  var_data = {writer._var_list_name:
257  writer.FillNcVector(selected_names, "string")}
258 
259  # pass parameters to the IODA writer
260  # (needed because we are bypassing ExtractObsData within BuildNetcdf)
261  writer._nvars = len(selected_names)
262  writer._nlocs = obs_data[(selected_names[0], 'ObsValue')].shape[0]
263 
264  # use the writer class to create the final output file
265  writer.BuildNetcdf(obs_data, loc_data, var_data, attr_data)
266 
267 
268 if __name__ == '__main__':
269  main()