3 from collections 
import defaultdict
 
    5 from copy 
import deepcopy
 
    7 from JediDBArgs 
import \
 
    8   obsFKey, geoFKey, diagFKey, default_path, fPrefixes
 
   11 from netCDF4 
import Dataset
 
   14 import plot_utils 
as pu
 
   15 import var_utils 
as vu
 
   18 _logger = logging.getLogger(__name__)
 
   20 MAXINT32  = np.int32(1e9)
 
   21 MAXFLOAT  = np.float32(1.e12)
 
   22 MAXDOUBLE = np.float64(1.e12)
 
   31     fileName = pathPlusFile.split(
'/')[-1]
 
   34     fileParts = (fileName.split(
'.')[0]).split(
'_')
 
   39     if not pu.isint(rank):
 
   51   ext = File.split(
'.')[-1]
 
   59     _logger.error(
'getIODAFileHandle:unsupported file extension => '+ext)
 
   63   '''wrapper for list of FileHandle objects''' 
   73         self.
nlocsnlocs += h.nlocs
 
   89         grpVar = vu.IODAVarCtors[self.
fileFormatfileFormat](var, group)
 
   91         if np.isfinite(var1D).sum() > 0: varlist.append(var)
 
   99     varName, group = vu.splitObsVarGrp(grpVar)
 
  102     if group==vu.geoGroup 
or group==vu.diagGroup:
 
  109     if fileVar 
not in self.
variablesvariables:
 
  110       _logger.error(
'FileHandles.var1DatLocs: fileVar not present: '+fileVar)
 
  113       vals1D = np.empty(self.
nlocsnlocs, dtype=np.object_)
 
  116         iend = istart + h.nlocs
 
  117         tmp = np.empty(h.nlocs, dtype=np.object_)
 
  118         for ii, bytelist 
in enumerate(h.variableAS1DArray(fileVar)):
 
  119           tmp[ii] = (b
''.join(bytelist)).decode(
'utf-8')
 
  120         vals1D[istart:iend] = tmp
 
  123       vals1D = np.empty(self.
nlocsnlocs, dtype=dtype)
 
  126         iend = istart + h.nlocs
 
  128           vals1D[istart:iend] = h.variableAS1DArray(fileVar)
 
  131           vals1D[istart:iend] = np.transpose(np.asarray(h.variableAS1DArray(fileVar)))[level][:]
 
  134     assert iend == self.
nlocsnlocs, (
 
  135       'FileHandles.var1DatLocs: incorrect nlocs (',iend,
'!=',nlocs,
') for ', grpVar)
 
  138     if (vu.obsVarPrs 
in grpVar 
and np.max(vals1D) > 10000.0):
 
  139       vals1D = np.divide(vals1D, 100.0)
 
  143       missing = np.greater(np.abs(vals1D), MAXINT32)
 
  144     elif 'float32' in dtype:
 
  145       missing = np.greater(np.abs(vals1D), MAXFLOAT)
 
  146     elif 'float64' in dtype:
 
  147       missing = np.greater(np.abs(vals1D), MAXDOUBLE)
 
  149       missing = np.full_like(vals1D, 
False, dtype=bool)
 
  152       vals1D[missing] = np.NaN
 
  160   '''This serves as a base class for netcdf4 and hdf5 wrapper classes for IODA-formatted and UFO GeoVaLs-formatted files''' 
  168     virtual method, returns the basic data type of var 
  170     raise NotImplementedError()
 
  174     virtual method, returns var as a 1D array 
  176     raise NotImplementedError()
 
  183     assert group!=vu.geoGroup 
and group!=vu.diagGroup, (
 
  184       'varsINGroup cannot handle GeoVaLs or ObsDiagnostics')
 
  186     for vargrp 
in self.
variablesvariables.keys():
 
  187         var, grp = vu.splitObsVarGrp(vargrp)
 
  188         if grp == group: varlist.append(var)
 
  199     self.
hh = Dataset(File, mode)
 
  200     self.
hh.set_auto_mask(
False)
 
  201     self.
nlocsnlocs = self.
hh.dimensions[
'nlocs'].size
 
  206     return self.
hh.variables[var].datatype.name
 
  209     return self.
hh.variables[var][:]
 
  232     self.
hh = h5.File(File, mode)
 
  237       if type(self.
hh[node]) 
is h5._hl.group.Group:
 
  238         for var 
in self.
hh[node]:
 
  239           varlist += [node+
'/'+var]
 
  243       shape = self.
hh[var].shape
 
  246       dims = np.empty(len(shape), object)
 
  247       for ii, dim 
in enumerate(self.
hh[var].attrs[
'DIMENSION_LIST']):
 
  248         dims[ii] = self.
hh[dim[0]]
 
  249         assert len(dims[ii]) == shape[ii], (
'HDFFileHandle.init: len(dims[ii]) and shape[ii] do not match, ii = ', ii)
 
  250       if dims[0] != self.
hh[
'nlocs']: 
continue 
  254       elif len(shape) == 2:
 
  256         for ii, d2Value 
in enumerate(dims[1]):
 
  259         _logger.error(
'HDFFileHandle.init: unable to handle more than 2 dimensions')
 
  267       varName, grp = vu.splitObsVarGrp(var)
 
  268       fileVarName, suf = vu.splitIntSuffix(varName)
 
  269       fileVarName = grp+
'/'+fileVarName
 
  270     return self.
hh[fileVarName].dtype.name
 
  278       _logger.error(
'HDFFileHandle.variableAS1DArray: unsupported type')
 
  286     '''This class provides access to UFO feedback files.''' 
  287     def __init__(self, data_path=default_path, osKeySelect=[]):
 
  292         supportedFileExts=[
'nc4',
'h5']
 
  295         for key, prefix 
in fPrefixes.items():
 
  330         for fileType, prefix 
in self.
filePrefixesfilePrefixes.items():
 
  331             allFiles[fileType] = defaultdict(list)
 
  332             for fileExt 
in supportedFileExts:
 
  333                 for pathPlusFile 
in glob.glob(data_path+
'/'+prefix+
'*.'+fileExt):
 
  335                     fileName = pathPlusFile.split(
'/')[-1]
 
  338                     fileParts = (
'.'.join(fileName.split(
'.')[:-1])).split(
'_')
 
  341                     fileParts.remove(prefix)
 
  345                         osKey =  
'_'.join(fileParts[:-1])
 
  348                         osKey = 
'_'.join(fileParts)
 
  350                     allFiles[fileType][osKey].append(pathPlusFile)
 
  355         for fileType, osKeys 
in allFiles.items():
 
  356             for osKey, files 
in osKeys.items():
 
  357                 if osKey 
not in self.
FilesFiles:
 
  358                   self.
FilesFiles[osKey] = defaultdict(list)
 
  359                 for pathPlusFile 
in files:
 
  362                         self.
FilesFiles[osKey][fileType] = [pathPlusFile]
 
  366                         self.
FilesFiles[osKey][fileType].append(pathPlusFile)
 
  372         for osKey, fileTypes 
in self.
FilesFiles.items():
 
  373             self.
loggersloggers[osKey] = logging.getLogger(__name__+
'.'+osKey)
 
  374             nObsFiles = len(fileTypes.get(obsFKey,[]))
 
  377                 self.
loggersloggers[osKey].error(
''' 
  378                            There are no '''+obsFKey+
''' 
  379                            feedback files with prefix '''+self.
filePrefixesfilePrefixes[obsFKey]+
''' 
  381             self.
nObsFilesnObsFiles[osKey] = nObsFiles
 
  383             for key, prefix 
in self.
filePrefixesfilePrefixes.items():
 
  384                 if key == obsFKey: 
continue 
  385                 nFiles = len(fileTypes.get(key,[]))
 
  386                 if nFiles > 0 
and nFiles < nObsFiles:
 
  387                     self.
loggersloggers[osKey].error(
''' 
  388                                There are not enough '''+key+
''' 
  389                                feedback files with prefix '''+prefix+
''' 
  390                                #'''+obsFKey+
' = '+str(nObsFiles)+
''' 
  391                                #'''+key+
' = '+str(nFiles)+
''' 
  397         for osKey 
in list(self.
FilesFiles.keys()):
 
  400             expt_parts = osKey.split(
"_")
 
  401             nstr = len(expt_parts)
 
  402             ObsSpaceInfo = conf.nullDiagSpaceInfo
 
  403             for i 
in range(0,nstr):
 
  404                 ObsSpaceName_ = 
'_'.join(expt_parts[i:nstr])
 
  405                 ObsSpaceInfo_ = conf.DiagSpaceConfig.get( ObsSpaceName_,conf.nullDiagSpaceInfo)
 
  406                 if ObsSpaceInfo_[
'process']:
 
  407                     ObsSpaceName = deepcopy(ObsSpaceName_)
 
  408                     ObsSpaceInfo = deepcopy(ObsSpaceInfo_)
 
  409             if ((len(osKeySelect)>0 
and osKey 
not in osKeySelect) 
or 
  410                 not ObsSpaceInfo[
'process'] 
or 
  411                 ObsSpaceInfo.get(
'DiagSpaceGrp',conf.model_s) == conf.model_s):
 
  412                 del self.
FilesFiles[osKey]
 
  414                 self.
ObsSpaceNameObsSpaceName[osKey] = deepcopy(ObsSpaceName)
 
  415                 self.
ObsSpaceGroupObsSpaceGroup[osKey] = deepcopy(ObsSpaceInfo[
'DiagSpaceGrp'])
 
  418         for osKey, fileTypes 
in self.
FilesFiles.items():
 
  419             for fileType, files 
in fileTypes.items():
 
  421                 for fileName 
in files:
 
  424                         ranks.append(int(rank))
 
  425                     elif len(files) == 1:
 
  429                         self.
loggersloggers[osKey].error(
'too many files chosen for a concatenated ObsSpace=> '+fileType)
 
  431                 indices = list(range(len(files)))
 
  432                 indices.sort(key=ranks.__getitem__)
 
  433                 self.
FilesFiles[osKey][fileType] = \
 
  434                     list(map(files.__getitem__, indices))
 
  444         self.
loggersloggers[osKey].info(
'Initializing UFO file handles...')
 
  446         self.
HandlesHandles[osKey] = {}
 
  448         for fileType, files 
in self.
FilesFiles[osKey].items():
 
  449             if len(files) == self.
nObsFilesnObsFiles[osKey]:
 
  450                 self.
loggersloggers[osKey].info(
' fileType = '+fileType)
 
  457         if osKey 
in self.
HandlesHandles:
 
  458             for fileType, h 
in self.
HandlesHandles[osKey].items():
 
  469         fHandles = self.
HandlesHandles[osKey].get(fileType, 
None)
 
  471             self.
loggersloggers[osKey].error(
'no files exist => '+fileType)
 
  473         return fHandles.fileFormat
 
  476     def varList(self, osKey, fileType, selectGrp):
 
  483         fHandles = self.
HandlesHandles[osKey].get(fileType, 
None)
 
  485             self.
loggersloggers[osKey].error(
'no files exist => '+fileType)
 
  487         assert fileType == obsFKey, 
'varList not implemented for '+fileType
 
  489         varlist = fHandles.varsINGroup(selectGrp)
 
  493         indices = list(range(len(varlist)))
 
  494         dictName0, suf = vu.splitIntSuffix(varlist[0])
 
  498             dictName, suf = vu.splitIntSuffix(var)
 
  502             if not pu.isint(suf) 
or dictName != dictName0:
 
  508             indices.sort(key=[int(i) 
for i 
in intlist].__getitem__)
 
  510             indices.sort(key=varlist.__getitem__)
 
  511         varlist = list(map(varlist.__getitem__, indices))
 
  520         self.
loggersloggers[osKey].info(
'Reading requested variables from UFO file(s)...')
 
  522         ObsSpace = self.
HandlesHandles[osKey][obsFKey]
 
  523         ObsDiagnostics = self.
HandlesHandles[osKey].get(diagFKey, 
None)
 
  524         GeoVaLs = self.
HandlesHandles[osKey].get(geoFKey, 
None)
 
  533         for grpVar 
in pu.uniqueMembers(dbVars):
 
  535             varName, grpName = vu.splitObsVarGrp(grpVar)
 
  537             if grpVar 
in ObsSpace.variables:
 
  538                 varsVals[grpVar] = ObsSpace.var1DatLocs(grpVar)
 
  540             elif vu.geoGroup 
in grpName 
and GeoVaLs 
is not None:
 
  541                 varsVals[grpVar] = GeoVaLs.var1DatLocs(grpVar, geoLev)
 
  543             elif vu.diagGroup 
in grpName 
and ObsDiagnostics 
is not None:
 
  544                 varsVals[grpVar] = ObsDiagnostics.var1DatLocs(grpVar, diagLev)
 
  547                 self.
loggersloggers[osKey].error(
'grpVar not found => '+grpVar)
 
def varsINGroup(self, group)
 
def variableAS1DArray(self, var)
 
def __init__(self, File, mode)
 
def varsINGroup(self, group)
 
def __init__(self, files, mode)
 
def var1DatLocs(self, grpVar, level=None)
 
def __init__(self, h, var2D, index)
 
def variableAS1DArray(self, var)
 
def __init__(self, File, mode)
 
def readVars(self, osKey, dbVars)
 
def initHandles(self, osKey)
 
def __init__(self, data_path=default_path, osKeySelect=[])
 
def fileFormat(self, osKey, fileType)
 
def destroyHandles(self, osKey)
 
def varList(self, osKey, fileType, selectGrp)
 
def variableAS1DArray(self, var)
 
def __init__(self, File, mode)
 
def getIODAFileRank(pathPlusFile)
 
def getIODAFileHandle(File, mode='r')
 
def IODAFileIsRanked(pathPlusFile)