IODA Bundle
airnow2ioda-nc.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # read airnow data and convert to netcdf
3 import netCDF4 as nc
4 import numpy as np
5 import inspect, os, sys, argparse
6 import pandas as pd
7 from datetime import datetime
8 from pathlib import Path
9 
10 IODA_CONV_PATH = Path(__file__).parent/"@SCRIPT_LIB_PATH@"
11 if not IODA_CONV_PATH.is_dir():
12  IODA_CONV_PATH = Path(__file__).parent/'..'/'lib-python'
13 sys.path.append(str(IODA_CONV_PATH.resolve()))
14 import meteo_utils
15 import ioda_conv_ncio as iconv
16 from collections import defaultdict, OrderedDict
17 from orddicts import DefaultOrderedDict
18 
19 
20 def read_monitor_file(sitefile=None):
21 
22  colsinuse = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
23  airnow = pd.read_csv(sitefile, delimiter='|', header=None,
24  usecols=colsinuse, dtype={0: str}, encoding="ISO-8859-1")
25  airnow.columns = [
26  'siteid', 'Site_Code', 'Site_Name', 'Status', 'Agency',
27  'Agency_Name', 'EPA_region', 'latitude', 'longitude', 'Elevation',
28  'GMT_Offset', 'Country_Code', 'CMSA_Code', 'CMSA_Name', 'MSA_Code',
29  'MSA_Name', 'state_Code', 'state_Name', 'County_Code',
30  'County_Name', 'City_Code']
31  airnow['airnow_flag'] = 'AIRNOW'
32  airnow.columns = [i.lower() for i in airnow.columns]
33  return airnow
34 
35 
37  """Short summary.
38 
39  Returns
40  -------
41  type
42  Description of returned object.
43 
44  """
45 
46  df.loc[(df.obs > 3000) | (df.obs < 0), 'obs'] = np.NaN
47  return df
48 
49 
50 def long_to_wide(df):
51  from pandas import Series, merge
52  w = df.pivot_table(
53  values='obs', index=['time', 'siteid'],
54  columns='variable').reset_index()
55  cols = Series(df.columns)
56  g = df.groupby('variable')
57  for name, group in g:
58  w[name + '_unit'] = group.units.unique()[0]
59 
60  return merge(w, df, on=['siteid', 'time'])
61 
62 
63 def add_data(infile, sitefile):
64  df = pd.read_csv(infile, delimiter='|',
65  header=None,
66  error_bad_lines=False,
67  encoding='ISO-8859-1')
68  cols = ['date', 'time', 'siteid', 'site', 'utcoffset', 'variable', 'units',
69  'obs', 'source']
70  df.columns = cols
71  df['obs'] = df.obs.astype(float)
72  df['siteid'] = df.siteid.str.zfill(9)
73  df['utcoffset'] = df.utcoffset.astype(int)
74  df['time'] = pd.to_datetime(df.date + ' ' + df.time,
75  format='%m/%d/%y %H:%M',
76  exact=True)
77  df.drop(['date'], axis=1, inplace=True)
78  df['time_local'] = df.time + pd.to_timedelta(df.utcoffset, unit='H')
79 
80  monitor_df = read_monitor_file(sitefile)
81  df = pd.merge(df, monitor_df, on='siteid')
82  df.drop_duplicates(inplace=True)
83  df = filter_bad_values(df)
84  return long_to_wide(df)
85 
86 
87 if __name__ == '__main__':
88 
89  parser = argparse.ArgumentParser(
90  description=(
91  'Reads single AIRNow text file '
92  ' and converts into IODA formatted output files.')
93  )
94 
95  required = parser.add_argument_group(title='required arguments')
96  required.add_argument(
97  '-i', '--input',
98  help="path of AIRNow text input file",
99  type=str, required=True)
100  required.add_argument(
101  '-s', '--sitefile',
102  help="path of AIRNow site list file",
103  type=str, required=True)
104  required.add_argument(
105  '-o', '--output',
106  help="path of IODA output file",
107  type=str, required=True)
108 
109  args = parser.parse_args()
110  print('infile=', args.input, args.sitefile)
111  f = add_data(args.input, args.sitefile)
112 
113  f3 = f.dropna(subset=['OZONE', 'PM2.5'], how='all')
114  nlocs, columns = f3.shape
115 
116  obsvars = {'pm25_tot': 'pm25_tot', 'o3': 'o3', }
117  AttrData = {'converter': os.path.basename(__file__), }
118 
119  locationKeyList = [("latitude", "float"), ("longitude", "float"),
120  ("station_elevation", "float"), ("height", "float"), ("station_id", "string"),
121  ("datetime", "string")]
122 
123  writer = iconv.NcWriter(args.output, locationKeyList)
124 
125  varDict = defaultdict(lambda: defaultdict(dict))
126  outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
127  loc_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
128  var_mdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
129  units = {}
130  units['pm25_tot'] = 'microgram/m3'
131  units['o3'] = 'ppmV'
132 
133  for i in ['pm25_tot', 'o3']:
134  varDict[i]['valKey'] = i, writer.OvalName()
135  varDict[i]['errKey'] = i, writer.OerrName()
136  varDict[i]['qcKey'] = i, writer.OqcName()
137 
138  d = np.empty([nlocs], 'S20')
139  d[:] = f3.time[1].strftime('%Y-%m-%dT%H:%M:%SZ')
140  loc_mdata['datetime'] = writer.FillNcVector(d, 'datetime')
141  loc_mdata['latitude'] = np.array(f3['latitude'])
142  loc_mdata['longitude'] = np.array(f3['longitude'])
143  loc_mdata['height'] = np.full((nlocs), 10.)
144  loc_mdata['station_elevation'] = np.full((nlocs), 10.)
145 
146  c = np.empty([nlocs], dtype=str)
147  c[:] = np.array(f3.siteid)
148  loc_mdata['station_id'] = writer.FillNcVector(c, 'string')
149 
150  outdata[varDict['pm25_tot']['valKey']] = np.array(f3['PM2.5'].fillna(nc.default_fillvals['f4']))
151  outdata[varDict['o3']['valKey']] = np.array((f3['OZONE']/1000).fillna(nc.default_fillvals['f4']))
152  for i in ['pm25_tot', 'o3']:
153  outdata[varDict[i]['errKey']] = np.full((nlocs), 0.1)
154  outdata[varDict[i]['qcKey']] = np.full((nlocs), 0)
155 
156  writer._nvars = 2
157  writer._nlocs = nlocs
158  writer.BuildNetcdf(outdata, loc_mdata, var_mdata, AttrData, units)
def read_monitor_file(sitefile=None)
def long_to_wide(df)
def filter_bad_values(df)
def add_data(infile, sitefile)