IODA Bundle
ghcn_snod2ioda.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 #
3 # (C) Copyright 2020 NOAA/NWS/NCEP/EMC
4 #
5 # This software is licensed under the terms of the Apache Licence Version 2.0
6 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 #
8 # This script will work with decoded, CSV files created by NCEI at NOAA/GHCN
9 # data acess at ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/
10 #
11 
12 import time, os, sys
13 import argparse
14 import netCDF4 as nc
15 import numpy as np
16 import pandas as pd
17 from datetime import datetime, timedelta
18 from dateutil.parser import parse
19 from pathlib import Path
20 
21 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
22 if not IODA_CONV_PATH.is_dir():
23  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
24 sys.path.append(str(IODA_CONV_PATH.resolve()))
25 
26 import ioda_conv_ncio as iconv
27 from collections import defaultdict, OrderedDict
28 from orddicts import DefaultOrderedDict
29 
30 locationKeyList = [
31  ("latitude", "float"),
32  ("longitude", "float"),
33  ("altitude", "float"),
34  ("datetime", "string")
35 ]
36 
37 obsvars = {
38  'snow_depth': 'snowDepth',
39 }
40 
41 AttrData = {
42  'converter': os.path.basename(__file__),
43 }
44 
45 
46 class ghcn(object):
47 
48  def __init__(self, filename, fixfile, date, mask, writer):
49  self.filenamefilename = filename
50  self.fixfilefixfile = fixfile
51  self.datedate = date
52  self.maskmask = mask
53  self.writerwriter = writer
54  self.varDictvarDict = defaultdict(lambda: defaultdict(dict))
55  self.outdataoutdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
56  self.loc_mdataloc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
57  self.var_mdatavar_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
58  self.unitsunits = {}
59  self._read_read()
60 
61  def _read(self):
62 
63  # set up variable names for IODA
64  for iodavar in ['snowDepth']:
65  self.varDictvarDict[iodavar]['valKey'] = iodavar, self.writerwriter.OvalName()
66  self.varDictvarDict[iodavar]['errKey'] = iodavar, self.writerwriter.OerrName()
67  self.varDictvarDict[iodavar]['qcKey'] = iodavar, self.writerwriter.OqcName()
68  self.unitsunits[iodavar] = 'm'
69 
70  def assignValue(colrowValue, df400):
71  if colrowValue == '' or pd.isnull(colrowValue):
72  outList = -999.0
73  else:
74  ml = df400.loc[df400['ID'] == colrowValue, "DATA_VALUE"]
75  # check if the series is empty
76  if not(ml.empty):
77  outList = ml.iloc[0] # watersourceSer.append(ml.iloc[0])
78  else:
79  outList = -999.0
80  return outList
81 
82  cols = ["ID", "DATETIME", "ELEMENT", "DATA_VALUE", "M_FLAG", "Q_FLAG", "S_FLAG", "OBS_TIME"]
83  sub_cols = ["ID", "DATETIME", "ELEMENT", "DATA_VALUE"]
84  df30_list = []
85  # Fix dtypeWarning with mixed types via set low_memory=False
86  df20all = pd.read_csv(self.filenamefilename, header=None, names=cols, low_memory=False)
87  df20 = df20all[sub_cols]
88  df30_list.append(df20)
89 
90  df30 = pd.concat(df30_list, ignore_index=True)
91  df30 = df30[df30["ELEMENT"] == "SNWD"]
92  df30["DATETIME"] = df30.apply(lambda row: parse(str(row["DATETIME"])).date(), axis=1)
93  # select data with Start date
94  startdate = self.datedate
95  df30 = df30[df30["DATETIME"] == parse(startdate).date()]
96  # Read station files
97  cols = ["ID", "LATITUDE", "LONGITUDE", "ELEVATION", "STATE", "NAME", "GSN_FLAG", "HCNCRN_FLAG", "WMO_ID"]
98  df10all = pd.read_csv(self.fixfilefixfile, header=None, sep='\n')
99  df10all = df10all[0].str.split('\\s+', expand=True)
100  df10 = df10all.iloc[:, 0:4]
101  sub_cols = {0: "ID", 1: "LATITUDE", 2: "LONGITUDE", 3: "ELEVATION"}
102  df10 = df10.rename(columns=sub_cols)
103  df10 = df10.drop_duplicates(subset=["ID"])
104 
105  # use stations list as the number of obs points
106  # if no data for a station and a given date, leave -999.0
107  num_obs = len(df10.index)
108  new_date = parse(startdate).date()
109 
110  # Initialzed data array
111  vals = np.full((num_obs), -999.0)
112  lats = np.full((num_obs), -999.0)
113  lons = np.full((num_obs), -999.0)
114  alts = np.full((num_obs), -999.0)
115  site = np.full((num_obs), -9999, dtype=np.int)
116  id_array = np.chararray((num_obs))
117  id_array[:] = "UNKNOWN"
118 
119  lats = df10["LATITUDE"].values
120  lons = df10["LONGITUDE"].values
121  alts = df10["ELEVATION"].values
122  id_array = df10["ID"].values
123 
124  df100 = pd.DataFrame(data=id_array, columns=['ID'])
125  df100.assign(DATA_VALUE=-999.0)
126  df30Temp = df30.loc[df30["DATETIME"] == new_date]
127  df100["DATA_VALUE"] = df100.apply(lambda row: assignValue(row['ID'], df30Temp), axis=1)
128 
129  vals = df100["DATA_VALUE"].values
130  vals = vals.astype('float32')
131  lats = lats.astype('float32')
132  lons = lons.astype('float32')
133  alts = alts.astype('float32')
134  qflg = 0*vals.astype('int32')
135  errs = 0.0*vals
136  times = np.empty_like(vals, dtype=object)
137 
138  # use maskout options
139  if self.maskmask == "maskout":
140 
141  with np.errstate(invalid='ignore'):
142  mask = vals >= 0.0
143  vals = vals[mask]
144  errs = errs[mask]
145  qflg = qflg[mask]
146  lons = lons[mask]
147  lats = lats[mask]
148  alts = alts[mask]
149  times = times[mask]
150 
151  # get datetime from input
152  my_date = datetime.strptime(startdate, "%Y%m%d")
153  start_datetime = my_date.strftime('%Y-%m-%d')
154  base_datetime = start_datetime + 'T12:00:00Z'
155  AttrData['date_time_string'] = base_datetime
156 
157  for i in range(len(vals)):
158  if vals[i] >= 0.0:
159  errs[i] = 0.0
160  vals[i] = 0.001*vals[i] # coverted to m from mm
161  times[i] = base_datetime
162  self.loc_mdataloc_mdata['datetime'] = self.writerwriter.FillNcVector(times, "datetime")
163  self.loc_mdataloc_mdata['latitude'] = lats
164  self.loc_mdataloc_mdata['longitude'] = lons
165  self.loc_mdataloc_mdata['altitude'] = alts
166  for iodavar in ['snowDepth']:
167  self.outdataoutdata[self.varDictvarDict[iodavar]['valKey']] = vals
168  self.outdataoutdata[self.varDictvarDict[iodavar]['errKey']] = errs
169  self.outdataoutdata[self.varDictvarDict[iodavar]['qcKey']] = qflg
170  self.writerwriter._nvars = len(obsvars)
171  self.writerwriter._nlocs = len(self.loc_mdataloc_mdata['datetime'])
172 
173 
174 def main():
175 
176  parser = argparse.ArgumentParser(
177  description=('Read GHCN snow depth file(s) and Converter'
178  ' of native csv format for observations of snow'
179  ' depth from GHCN to IODA netCDF format.')
180  )
181  parser.add_argument('-i', '--input',
182  help="name of ghcn snow depth input file(s)",
183  type=str, required=True)
184  parser.add_argument('-f', '--fixfile',
185  help="name of ghcn station fixed file(s)",
186  type=str, required=True)
187  parser.add_argument('-o', '--output',
188  help="name of ioda output file",
189  type=str, required=True)
190  parser.add_argument('-d', '--date',
191  help="base date", type=str, required=True)
192  optional = parser.add_argument_group(title='optional arguments')
193  optional.add_argument(
194  '-m', '--mask',
195  help="maskout missing values: maskout/default, default=none",
196  type=str, required=True)
197 
198  args = parser.parse_args()
199 
200  writer = iconv.NcWriter(args.output, locationKeyList)
201 
202  # Read in the profiles
203  snod = ghcn(args.input, args.fixfile, args.date, args.mask, writer)
204 
205  writer.BuildNetcdf(snod.outdata, snod.loc_mdata, snod.var_mdata, AttrData, snod.units)
206 
207 
208 if __name__ == '__main__':
209  main()
def __init__(self, filename, fixfile, date, mask, writer)
void parse(std::string yamlPath)
Definition: bufr2ioda.cpp:29