IODA Bundle
ioda/tools/check_ioda_nc.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # (C) Copyright 2019 UCAR
5 #
6 # This software is licensed under the terms of the Apache Licence Version 2.0
7 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8 #
9 
10 from __future__ import print_function
11 import sys
12 import os
13 import re
14 import glob
15 import argparse
16 import numpy as np
17 import netCDF4 as nc
18 
19 ###############################
20 # SUBROUTINES
21 ###############################
22 
23 ################################################################
24 # This routine checks the values associated with Var for:
25 # 1) proper usage of missing marks
26 # 2) existence of invalid numerical values
27 #
28 def CheckVarValues(Var):
29  BadMissingVals = False
30  HasInvalidVals = False
31  NotFrequency = (re.search("[Ff]requency", Var.name) == None)
32 
33  # Only do the check for numeric types (skip strings for example)
34  if (np.issubdtype(Var.dtype, np.number)):
35  VarData = Var[:].data.flatten() # no need to preserve array shape
36  VarMask = Var[:].mask.flatten()
37  if (VarMask.size == 1):
38  VarMask = np.full(VarData.shape, VarMask)
39  for i in range(Var.shape[0]):
40  # Check for invalid numeric values (nan, inf and -inf)
41  # first, since these will trigger the bad missing values
42  # check.
43  if ((np.isnan(VarData[i])) or (np.isinf(VarData[i]))):
44  HasInvalidVals = True
45  else:
46  # Valid numeric data.
47  #
48  # The indices that the mask (VarMask[i]) is True are the
49  # locations that the netcdf fill value was used, don't
50  # check these locations. Of the other locations, if the
51  # absolute value of the data is > 1e8, then flag this
52  # as an incorrect missing value mark.
53  #
54  # Channel frequencies can have values > 1e8 so don't include
55  # these variables in the bad missing value check.
56  if ((NotFrequency) and (not VarMask[i]) and (np.fabs(VarData[i]) > 1e8)):
57  BadMissingVals = True
58 
59  return (BadMissingVals, HasInvalidVals)
60 
61 ############################################################
62 # This routine will walk through the list of files from
63 # the command line arguments and create a list of netcdf
64 # files to check. The items can be either files or directories.
65 # If a directory, find all the netcdf files under that directory
66 # and add them to the list.
67 def GenNcFileList(ArgList):
68  # Expand the argument list into all files
69  TempFileList = [ ]
70  for Item in ArgList:
71  if (os.path.isdir(Item)):
72  for RootPath, Dirs, Files in os.walk(Item):
73  for File in Files:
74  TempFileList.append(os.path.join(RootPath, File))
75  else:
76  TempFileList.append(Item)
77 
78  # Select out only the netcdf files (suffix = '.nc' or '.nc4')
79  NcFileList = [ ]
80  for Item in TempFileList:
81  if (Item.endswith('.nc') or (Item.endswith('.nc4'))):
82  NcFileList.append(Item)
83 
84  return NcFileList
85 
86 ############################################################
87 # This routine will check the contents of one file for
88 # compliance with the ioda netcdf conventions. This routine
89 # will return an error count for each of the three categories
90 # outlined below.
91 #
92 def CheckNcFile(NcFileName, Verbose):
93  # Open the netcdf file and check for three conventions:
94  # 1. Variable data type
95  # - No doubles
96  # - Variables in the group PreQC are integer
97  # - All other variables are allowed to remain what they
98  # are declared in the file (as long as they are not doubles)
99  #
100  # 2. Missing value marks
101  # - netcdf fill values are used for missing values
102  # - phasing out the use of large absolute values numbers
103  #
104  # 3. Invalid numerical values (inf, -inf, nan)
105  # - Don't use these in the input file.
106 
107  NcRootGroup = nc.Dataset(NcFileName, 'r')
108  print("Checking netcdf file for ioda conventions: ")
109  print(" {0:s}".format(NcFileName))
110 
111  # Walk through the variables (assume they all live in the root group)
112  # and check the data types, etc.
113  MissingGroupMsg = " Variable: {0:s} " + \
114  "needs to specify a group name (@<group_name> suffix)"
115  DataTypeMsg = " Variable: {0:s} " + \
116  "has unexpected data type " + \
117  "({1} instead of {2})"
118  MissingValMsg = " Variable: {0:s} " + \
119  "needs to use netcdf fill values for missing marks"
120  InvalidNumMsg = " Variable: {0:s} " + \
121  "needs to remove invalid numeric values " + \
122  "(nan, inf, -inf)"
123 
124  MissingGroupErrors = 0
125  DataTypeErrors = 0
126  MissingValErrors = 0
127  InvalidNumErrors = 0
128  for Vname in NcRootGroup.variables:
129  Var = NcRootGroup.variables[Vname]
130  (VarName, Dummy, GroupName) = Vname.partition('@')
131 
132  # Check that the group name is defined.
133  if (GroupName == ""):
134  if (Verbose):
135  print(MissingGroupMsg.format(Vname))
136  MissingGroupErrors += 1
137 
138  # Check the variable data type (VarType)
139  # Expected var type matches what is in the file, except when
140  # GroupName is PreQC which is int32, and float64 is not allowed.
141  VarType = Var.dtype.name
142  if (GroupName == "PreQC"):
143  ExpectedVarType = "int32"
144  elif (VarType == "float64"):
145  ExpectedVarType = "float32"
146  else:
147  ExpectedVarType = VarType
148 
149  if (VarType != ExpectedVarType):
150  if (Verbose):
151  print(DataTypeMsg.format(Vname, VarType, ExpectedVarType))
152  DataTypeErrors += 1
153 
154  # Check the variable values for incorrect missing marks, and
155  # for invalid numeric values.
156  (BadMissingVals, HasInvalidVals) = CheckVarValues(Var)
157  if (BadMissingVals):
158  if (Verbose):
159  print(MissingValMsg.format(Vname))
160  MissingValErrors += 1
161 
162  if (HasInvalidVals):
163  if (Verbose):
164  print(InvalidNumMsg.format(Vname))
165  InvalidNumErrors += 1
166 
167  TotalErrors = MissingGroupErrors + DataTypeErrors + MissingValErrors + InvalidNumErrors
168  ErrorReport = " Error counts: Missing group: {0:d}, " + \
169  "data type: {1:d}, " + \
170  "missing value: {2:d}, " + \
171  "invalid numeric values: {3:d}"
172  print(ErrorReport.format(MissingGroupErrors, DataTypeErrors, MissingValErrors, InvalidNumErrors), end=' ')
173  if (TotalErrors == 0):
174  print("--> PASS")
175  else:
176  print("--> FAIL")
177  print("")
178 
179  return (TotalErrors)
180 
181 ###############################
182 # MAIN
183 ###############################
184 
185 ScriptName = os.path.basename(sys.argv[0])
186 
187 # Parse command line
188 ap = argparse.ArgumentParser()
189 ap.add_argument("-v", "--verbose", action="store_true",
190  help="increase verbosity")
191 ap.add_argument("nc_file_or_dir", nargs='+',
192  help="list of files or directories containing netcdf")
193 
194 MyArgs = ap.parse_args()
195 
196 NetcdfList = MyArgs.nc_file_or_dir
197 Verbose = MyArgs.verbose
198 
199 # Generate the total list of netcdf files to check. The items from
200 # the command line can be either files or directories, and if
201 # a directory then you find all the netcdf files within that directory.
202 NcFileList = GenNcFileList(NetcdfList)
203 
204 # Check the files in the list, and accumulate the error counts.
205 TotalErrorCount = 0
206 for NcFileName in NcFileList:
207  (TotalErrors) = CheckNcFile(NcFileName, Verbose)
208  TotalErrorCount += TotalErrors
209 
210 # Return the error count. If the all files are okay, then error count will
211 # be zero and the shell will see a zero return code from this script.
212 sys.exit(TotalErrorCount)
def CheckNcFile(NcFileName, Verbose)
This routine will check the contents of one file for compliance with the ioda netcdf conventions.
def GenNcFileList(ArgList)
This routine will walk through the list of files from the command line arguments and create a list of...
def CheckVarValues(Var)
SUBROUTINES.