3 from collections
import defaultdict
5 from copy
import deepcopy
7 from JediDBArgs
import \
8 obsFKey, geoFKey, diagFKey, default_path, fPrefixes
11 from netCDF4
import Dataset
14 import plot_utils
as pu
15 import var_utils
as vu
18 _logger = logging.getLogger(__name__)
20 MAXINT32 = np.int32(1e9)
21 MAXFLOAT = np.float32(1.e12)
22 MAXDOUBLE = np.float64(1.e12)
31 fileName = pathPlusFile.split(
'/')[-1]
34 fileParts = (fileName.split(
'.')[0]).split(
'_')
39 if not pu.isint(rank):
51 ext = File.split(
'.')[-1]
59 _logger.error(
'getIODAFileHandle:unsupported file extension => '+ext)
63 '''wrapper for list of FileHandle objects'''
73 self.
nlocsnlocs += h.nlocs
89 grpVar = vu.IODAVarCtors[self.
fileFormatfileFormat](var, group)
91 if np.isfinite(var1D).sum() > 0: varlist.append(var)
99 varName, group = vu.splitObsVarGrp(grpVar)
102 if group==vu.geoGroup
or group==vu.diagGroup:
109 if fileVar
not in self.
variablesvariables:
110 _logger.error(
'FileHandles.var1DatLocs: fileVar not present: '+fileVar)
113 vals1D = np.empty(self.
nlocsnlocs, dtype=np.object_)
116 iend = istart + h.nlocs
117 tmp = np.empty(h.nlocs, dtype=np.object_)
118 for ii, bytelist
in enumerate(h.variableAS1DArray(fileVar)):
119 tmp[ii] = (b
''.join(bytelist)).decode(
'utf-8')
120 vals1D[istart:iend] = tmp
123 vals1D = np.empty(self.
nlocsnlocs, dtype=dtype)
126 iend = istart + h.nlocs
128 vals1D[istart:iend] = h.variableAS1DArray(fileVar)
131 vals1D[istart:iend] = np.transpose(np.asarray(h.variableAS1DArray(fileVar)))[level][:]
134 assert iend == self.
nlocsnlocs, (
135 'FileHandles.var1DatLocs: incorrect nlocs (',iend,
'!=',nlocs,
') for ', grpVar)
138 if (vu.obsVarPrs
in grpVar
and np.max(vals1D) > 10000.0):
139 vals1D = np.divide(vals1D, 100.0)
143 missing = np.greater(np.abs(vals1D), MAXINT32)
144 elif 'float32' in dtype:
145 missing = np.greater(np.abs(vals1D), MAXFLOAT)
146 elif 'float64' in dtype:
147 missing = np.greater(np.abs(vals1D), MAXDOUBLE)
149 missing = np.full_like(vals1D,
False, dtype=bool)
152 vals1D[missing] = np.NaN
160 '''This serves as a base class for netcdf4 and hdf5 wrapper classes for IODA-formatted and UFO GeoVaLs-formatted files'''
168 virtual method, returns the basic data type of var
170 raise NotImplementedError()
174 virtual method, returns var as a 1D array
176 raise NotImplementedError()
183 assert group!=vu.geoGroup
and group!=vu.diagGroup, (
184 'varsINGroup cannot handle GeoVaLs or ObsDiagnostics')
186 for vargrp
in self.
variablesvariables.keys():
187 var, grp = vu.splitObsVarGrp(vargrp)
188 if grp == group: varlist.append(var)
199 self.
hh = Dataset(File, mode)
200 self.
hh.set_auto_mask(
False)
201 self.
nlocsnlocs = self.
hh.dimensions[
'nlocs'].size
206 return self.
hh.variables[var].datatype.name
209 return self.
hh.variables[var][:]
232 self.
hh = h5.File(File, mode)
237 if type(self.
hh[node])
is h5._hl.group.Group:
238 for var
in self.
hh[node]:
239 varlist += [node+
'/'+var]
243 shape = self.
hh[var].shape
246 dims = np.empty(len(shape), object)
247 for ii, dim
in enumerate(self.
hh[var].attrs[
'DIMENSION_LIST']):
248 dims[ii] = self.
hh[dim[0]]
249 assert len(dims[ii]) == shape[ii], (
'HDFFileHandle.init: len(dims[ii]) and shape[ii] do not match, ii = ', ii)
250 if dims[0] != self.
hh[
'nlocs']:
continue
254 elif len(shape) == 2:
256 for ii, d2Value
in enumerate(dims[1]):
259 _logger.error(
'HDFFileHandle.init: unable to handle more than 2 dimensions')
267 varName, grp = vu.splitObsVarGrp(var)
268 fileVarName, suf = vu.splitIntSuffix(varName)
269 fileVarName = grp+
'/'+fileVarName
270 return self.
hh[fileVarName].dtype.name
278 _logger.error(
'HDFFileHandle.variableAS1DArray: unsupported type')
286 '''This class provides access to UFO feedback files.'''
287 def __init__(self, data_path=default_path, osKeySelect=[]):
292 supportedFileExts=[
'nc4',
'h5']
295 for key, prefix
in fPrefixes.items():
330 for fileType, prefix
in self.
filePrefixesfilePrefixes.items():
331 allFiles[fileType] = defaultdict(list)
332 for fileExt
in supportedFileExts:
333 for pathPlusFile
in glob.glob(data_path+
'/'+prefix+
'*.'+fileExt):
335 fileName = pathPlusFile.split(
'/')[-1]
338 fileParts = (
'.'.join(fileName.split(
'.')[:-1])).split(
'_')
341 fileParts.remove(prefix)
345 osKey =
'_'.join(fileParts[:-1])
348 osKey =
'_'.join(fileParts)
350 allFiles[fileType][osKey].append(pathPlusFile)
355 for fileType, osKeys
in allFiles.items():
356 for osKey, files
in osKeys.items():
357 if osKey
not in self.
FilesFiles:
358 self.
FilesFiles[osKey] = defaultdict(list)
359 for pathPlusFile
in files:
362 self.
FilesFiles[osKey][fileType] = [pathPlusFile]
366 self.
FilesFiles[osKey][fileType].append(pathPlusFile)
372 for osKey, fileTypes
in self.
FilesFiles.items():
373 self.
loggersloggers[osKey] = logging.getLogger(__name__+
'.'+osKey)
374 nObsFiles = len(fileTypes.get(obsFKey,[]))
377 self.
loggersloggers[osKey].error(
'''
378 There are no '''+obsFKey+
'''
379 feedback files with prefix '''+self.
filePrefixesfilePrefixes[obsFKey]+
'''
381 self.
nObsFilesnObsFiles[osKey] = nObsFiles
383 for key, prefix
in self.
filePrefixesfilePrefixes.items():
384 if key == obsFKey:
continue
385 nFiles = len(fileTypes.get(key,[]))
386 if nFiles > 0
and nFiles < nObsFiles:
387 self.
loggersloggers[osKey].error(
'''
388 There are not enough '''+key+
'''
389 feedback files with prefix '''+prefix+
'''
390 #'''+obsFKey+
' = '+str(nObsFiles)+
'''
391 #'''+key+
' = '+str(nFiles)+
'''
397 for osKey
in list(self.
FilesFiles.keys()):
400 expt_parts = osKey.split(
"_")
401 nstr = len(expt_parts)
402 ObsSpaceInfo = conf.nullDiagSpaceInfo
403 for i
in range(0,nstr):
404 ObsSpaceName_ =
'_'.join(expt_parts[i:nstr])
405 ObsSpaceInfo_ = conf.DiagSpaceConfig.get( ObsSpaceName_,conf.nullDiagSpaceInfo)
406 if ObsSpaceInfo_[
'process']:
407 ObsSpaceName = deepcopy(ObsSpaceName_)
408 ObsSpaceInfo = deepcopy(ObsSpaceInfo_)
409 if ((len(osKeySelect)>0
and osKey
not in osKeySelect)
or
410 not ObsSpaceInfo[
'process']
or
411 ObsSpaceInfo.get(
'DiagSpaceGrp',conf.model_s) == conf.model_s):
412 del self.
FilesFiles[osKey]
414 self.
ObsSpaceNameObsSpaceName[osKey] = deepcopy(ObsSpaceName)
415 self.
ObsSpaceGroupObsSpaceGroup[osKey] = deepcopy(ObsSpaceInfo[
'DiagSpaceGrp'])
418 for osKey, fileTypes
in self.
FilesFiles.items():
419 for fileType, files
in fileTypes.items():
421 for fileName
in files:
424 ranks.append(int(rank))
425 elif len(files) == 1:
429 self.
loggersloggers[osKey].error(
'too many files chosen for a concatenated ObsSpace=> '+fileType)
431 indices = list(range(len(files)))
432 indices.sort(key=ranks.__getitem__)
433 self.
FilesFiles[osKey][fileType] = \
434 list(map(files.__getitem__, indices))
444 self.
loggersloggers[osKey].info(
'Initializing UFO file handles...')
446 self.
HandlesHandles[osKey] = {}
448 for fileType, files
in self.
FilesFiles[osKey].items():
449 if len(files) == self.
nObsFilesnObsFiles[osKey]:
450 self.
loggersloggers[osKey].info(
' fileType = '+fileType)
457 if osKey
in self.
HandlesHandles:
458 for fileType, h
in self.
HandlesHandles[osKey].items():
469 fHandles = self.
HandlesHandles[osKey].get(fileType,
None)
471 self.
loggersloggers[osKey].error(
'no files exist => '+fileType)
473 return fHandles.fileFormat
476 def varList(self, osKey, fileType, selectGrp):
483 fHandles = self.
HandlesHandles[osKey].get(fileType,
None)
485 self.
loggersloggers[osKey].error(
'no files exist => '+fileType)
487 assert fileType == obsFKey,
'varList not implemented for '+fileType
489 varlist = fHandles.varsINGroup(selectGrp)
493 indices = list(range(len(varlist)))
494 dictName0, suf = vu.splitIntSuffix(varlist[0])
498 dictName, suf = vu.splitIntSuffix(var)
502 if not pu.isint(suf)
or dictName != dictName0:
508 indices.sort(key=[int(i)
for i
in intlist].__getitem__)
510 indices.sort(key=varlist.__getitem__)
511 varlist = list(map(varlist.__getitem__, indices))
520 self.
loggersloggers[osKey].info(
'Reading requested variables from UFO file(s)...')
522 ObsSpace = self.
HandlesHandles[osKey][obsFKey]
523 ObsDiagnostics = self.
HandlesHandles[osKey].get(diagFKey,
None)
524 GeoVaLs = self.
HandlesHandles[osKey].get(geoFKey,
None)
533 for grpVar
in pu.uniqueMembers(dbVars):
535 varName, grpName = vu.splitObsVarGrp(grpVar)
537 if grpVar
in ObsSpace.variables:
538 varsVals[grpVar] = ObsSpace.var1DatLocs(grpVar)
540 elif vu.geoGroup
in grpName
and GeoVaLs
is not None:
541 varsVals[grpVar] = GeoVaLs.var1DatLocs(grpVar, geoLev)
543 elif vu.diagGroup
in grpName
and ObsDiagnostics
is not None:
544 varsVals[grpVar] = ObsDiagnostics.var1DatLocs(grpVar, diagLev)
547 self.
loggersloggers[osKey].error(
'grpVar not found => '+grpVar)
def varsINGroup(self, group)
def variableAS1DArray(self, var)
def __init__(self, File, mode)
def varsINGroup(self, group)
def __init__(self, files, mode)
def var1DatLocs(self, grpVar, level=None)
def __init__(self, h, var2D, index)
def variableAS1DArray(self, var)
def __init__(self, File, mode)
def readVars(self, osKey, dbVars)
def initHandles(self, osKey)
def __init__(self, data_path=default_path, osKeySelect=[])
def fileFormat(self, osKey, fileType)
def destroyHandles(self, osKey)
def varList(self, osKey, fileType, selectGrp)
def variableAS1DArray(self, var)
def __init__(self, File, mode)
def getIODAFileRank(pathPlusFile)
def getIODAFileHandle(File, mode='r')
def IODAFileIsRanked(pathPlusFile)