IODA
check_ioda_nc.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # (C) Copyright 2019 UCAR
5 #
6 # This software is licensed under the terms of the Apache Licence Version 2.0
7 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8 #
9 
10 from __future__ import print_function
11 import sys
12 import os
13 import re
14 import glob
15 import argparse
16 import numpy as np
17 import netCDF4 as nc
18 
19 ###############################
20 # SUBROUTINES
21 ###############################
22 
23 ################################################################
24 # This routine checks the values associated with Var for:
25 # 1) proper usage of missing marks
26 # 2) existence of invalid numerical values
27 #
28 def CheckVarValues(Var):
29  BadMissingVals = False
30  HasInvalidVals = False
31  NotFrequency = (re.search("[Ff]requency", Var.name) == None)
32 
33  # Only do the check for numeric types (skip strings for example)
34  if (np.issubdtype(Var.dtype, np.number)):
35  VarData = Var[:].data.flatten() # no need to preserve array shape
36  VarMask = Var[:].mask.flatten()
37  if (VarMask.size == 1):
38  VarMask = np.full(VarData.shape, VarMask)
39  for i in range(Var.shape[0]):
40  # Check for invalid numeric values (nan, inf and -inf)
41  # first, since these will trigger the bad missing values
42  # check.
43  if ((np.isnan(VarData[i])) or (np.isinf(VarData[i]))):
44  HasInvalidVals = True
45  else:
46  # Valid numeric data.
47  #
48  # The indices that the mask (VarMask[i]) is True are the
49  # locations that the netcdf fill value was used, don't
50  # check these locations. Of the other locations, if the
51  # absolute value of the data is > 1e8, then flag this
52  # as an incorrect missing value mark.
53  #
54  # Channel frequencies can have values > 1e8 so don't include
55  # these variables in the bad missing value check.
56  if ((NotFrequency) and (not VarMask[i]) and (np.fabs(VarData[i]) > 1e8)):
57  BadMissingVals = True
58 
59  return (BadMissingVals, HasInvalidVals)
60 
61 ############################################################
62 # This routine will walk through the list of files from
63 # the command line arguments and create a list of netcdf
64 # files to check. The items can be either files or directories.
65 # If a directory, find all the netcdf files under that directory
66 # and add them to the list.
67 def GenNcFileList(ArgList):
68  # Expand the argument list into all files
69  TempFileList = [ ]
70  for Item in ArgList:
71  if (os.path.isdir(Item)):
72  for RootPath, Dirs, Files in os.walk(Item):
73  for File in Files:
74  TempFileList.append(os.path.join(RootPath, File))
75  else:
76  TempFileList.append(Item)
77 
78  # Select out only the netcdf files (suffix = '.nc' or '.nc4')
79  NcFileList = [ ]
80  for Item in TempFileList:
81  if (Item.endswith('.nc') or (Item.endswith('.nc4'))):
82  NcFileList.append(Item)
83 
84  return NcFileList
85 
86 ############################################################
87 # This routine will check the contents of one file for
88 # compliance with the ioda netcdf conventions. This routine
89 # will return an error count for each of the three categories
90 # outlined below.
91 #
92 def CheckNcFile(NcFileName, Verbose):
93  # Open the netcdf file and check for three conventions:
94  # 1. Variable data type
95  # - No doubles
96  # - Variables in the group PreQC are integer
97  # - All other variables are allowed to remain what they
98  # are declared in the file (as long as they are not doubles)
99  #
100  # 2. Missing value marks
101  # - netcdf fill values are used for missing values
102  # - phasing out the use of large absolute values numbers
103  #
104  # 3. Invalid numerical values (inf, -inf, nan)
105  # - Don't use these in the input file.
106 
107  NcRootGroup = nc.Dataset(NcFileName, 'r')
108  print("Checking netcdf file for ioda conventions: ")
109  print(" {0:s}".format(NcFileName))
110 
111  # Walk through the variables (assume they all live in the root group)
112  # and check the data types, etc.
113  MissingGroupMsg = " Variable: {0:s} " + \
114  "needs to specify a group name (@<group_name> suffix)"
115  DataTypeMsg = " Variable: {0:s} " + \
116  "has unexpected data type " + \
117  "({1} instead of {2})"
118  MissingValMsg = " Variable: {0:s} " + \
119  "needs to use netcdf fill values for missing marks"
120  InvalidNumMsg = " Variable: {0:s} " + \
121  "needs to remove invalid numeric values " + \
122  "(nan, inf, -inf)"
123 
124  MissingGroupErrors = 0
125  DataTypeErrors = 0
126  MissingValErrors = 0
127  InvalidNumErrors = 0
128  for Vname in NcRootGroup.variables:
129  Var = NcRootGroup.variables[Vname]
130  (VarName, Dummy, GroupName) = Vname.partition('@')
131 
132  # Check that the group name is defined.
133  if (GroupName == ""):
134  if (Verbose):
135  print(MissingGroupMsg.format(Vname))
136  MissingGroupErrors += 1
137 
138  # Check the variable data type (VarType)
139  # Expected var type matches what is in the file, except when
140  # GroupName is PreQC which is int32, and float64 is not allowed.
141  VarType = Var.dtype.name
142  if (GroupName == "PreQC"):
143  ExpectedVarType = "int32"
144  elif (VarType == "float64"):
145  ExpectedVarType = "float32"
146  else:
147  ExpectedVarType = VarType
148 
149  if (VarType != ExpectedVarType):
150  if (Verbose):
151  print(DataTypeMsg.format(Vname, VarType, ExpectedVarType))
152  DataTypeErrors += 1
153 
154  # Check the variable values for incorrect missing marks, and
155  # for invalid numeric values.
156  (BadMissingVals, HasInvalidVals) = CheckVarValues(Var)
157  if (BadMissingVals):
158  if (Verbose):
159  print(MissingValMsg.format(Vname))
160  MissingValErrors += 1
161 
162  if (HasInvalidVals):
163  if (Verbose):
164  print(InvalidNumMsg.format(Vname))
165  InvalidNumErrors += 1
166 
167  TotalErrors = MissingGroupErrors + DataTypeErrors + MissingValErrors + InvalidNumErrors
168  ErrorReport = " Error counts: Missing group: {0:d}, " + \
169  "data type: {1:d}, " + \
170  "missing value: {2:d}, " + \
171  "invalid numeric values: {3:d}"
172  print(ErrorReport.format(MissingGroupErrors, DataTypeErrors, MissingValErrors, InvalidNumErrors), end=' ')
173  if (TotalErrors == 0):
174  print("--> PASS")
175  else:
176  print("--> FAIL")
177  print("")
178 
179  return (TotalErrors)
180 
181 ###############################
182 # MAIN
183 ###############################
184 
185 ScriptName = os.path.basename(sys.argv[0])
186 
187 # Parse command line
188 ap = argparse.ArgumentParser()
189 ap.add_argument("-v", "--verbose", action="store_true",
190  help="increase verbosity")
191 ap.add_argument("nc_file_or_dir", nargs='+',
192  help="list of files or directories containing netcdf")
193 
194 MyArgs = ap.parse_args()
195 
196 NetcdfList = MyArgs.nc_file_or_dir
197 Verbose = MyArgs.verbose
198 
199 # Generate the total list of netcdf files to check. The items from
200 # the command line can be either files or directories, and if
201 # a directory then you find all the netcdf files within that directory.
202 NcFileList = GenNcFileList(NetcdfList)
203 
204 # Check the files in the list, and accumulate the error counts.
205 TotalErrorCount = 0
206 for NcFileName in NcFileList:
207  (TotalErrors) = CheckNcFile(NcFileName, Verbose)
208  TotalErrorCount += TotalErrors
209 
210 # Return the error count. If the all files are okay, then error count will
211 # be zero and the shell will see a zero return code from this script.
212 sys.exit(TotalErrorCount)
def GenNcFileList(ArgList)
This routine will walk through the list of files from the command line arguments and create a list of...
def CheckNcFile(NcFileName, Verbose)
This routine will check the contents of one file for compliance with the ioda netcdf conventions.
def CheckVarValues(Var)
SUBROUTINES.