MPAS-JEDI
diag_utils.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 import binning_utils as bu
4 from predefined_configs import anIter, appName
5 from copy import deepcopy
6 import logging
7 import numpy as np
8 import pandas as pd
9 import plot_utils as pu
10 import stat_utils as su
11 import var_utils as vu
12 
13 _logger = logging.getLogger(__name__)
14 
15 #====================================
16 # diagnostic functions and dictionary
17 #====================================
18 
19 class BiasCorrectedObs:
20  def __init__(self):
21  self.baseVarsbaseVars = []
22  self.baseVarsbaseVars.append(vu.selfDepValue)
23  self.baseVarsbaseVars.append(vu.selfHofXValue)
24 
25  def evaluate(self, dbVals, insituParameters):
26  bcdep = dbVals[insituParameters[vu.selfDepValue]]
27  mod = dbVals[insituParameters[vu.selfHofXValue]]
28  return np.subtract(mod, bcdep)
29 
30 
31 class BiasCorrection:
32  def __init__(self):
33  self.baseVarsbaseVars = []
34  self.baseVarsbaseVars.append(vu.selfDepValue)
35  self.baseVarsbaseVars.append(vu.selfHofXValue)
36  self.baseVarsbaseVars.append(vu.selfObsValue)
37 
38  def evaluate(self, dbVals, insituParameters):
39  bcdep = dbVals[insituParameters[vu.selfDepValue]]
40  mod = dbVals[insituParameters[vu.selfHofXValue]]
41  obs = dbVals[insituParameters[vu.selfObsValue]]
42  return np.subtract(np.subtract(mod, bcdep), obs)
43 
44 
46  def __init__(self):
47  self.baseVarsbaseVars = []
48  self.baseVarsbaseVars.append(vu.selfDepValue)
49 
50  def evaluate(self, dbVals, insituParameters):
51  bcdep = dbVals[insituParameters[vu.selfDepValue]]
52  return np.negative(bcdep)
53 
54 
56  def __init__(self):
57  self.baseVarsbaseVars = []
58  self.baseVarsbaseVars.append(vu.selfDepValue)
59  self.baseVarsbaseVars.append(vu.selfObsValue)
60 
61  def evaluate(self, dbVals, insituParameters):
62  bcdep = dbVals[insituParameters[vu.selfDepValue]]
63  obs = dbVals[insituParameters[vu.selfObsValue]]
64 
65  OMM = np.negative(bcdep)
66  valid = bu.greatBound(np.abs(obs), 0.0, False)
67  OMM[valid] = np.divide(OMM[valid], obs[valid])
68  OMM[~valid] = np.NaN
69 
70  return np.multiply(OMM, 100.0)
71 
72 
73 class ObsMinusModel:
74  def __init__(self):
75  self.baseVarsbaseVars = []
76  self.baseVarsbaseVars.append(vu.selfHofXValue)
77  self.baseVarsbaseVars.append(vu.selfObsValue)
78 
79  def evaluate(self, dbVals, insituParameters):
80  obs = dbVals[insituParameters[vu.selfObsValue]]
81  mod = dbVals[insituParameters[vu.selfHofXValue]]
82  return np.subtract(obs, mod)
83 
84 
86  def __init__(self):
87  self.baseVarsbaseVars = []
88  self.baseVarsbaseVars.append(vu.selfHofXValue)
89  self.baseVarsbaseVars.append(vu.selfObsValue)
90 
91  def evaluate(self, dbVals, insituParameters):
92  obs = dbVals[insituParameters[vu.selfObsValue]]
93  mod = dbVals[insituParameters[vu.selfHofXValue]]
94 
95  OMM = np.subtract(obs, mod)
96  valid = bu.greatBound(np.abs(obs), 0.0, False)
97  OMM[valid] = np.divide(OMM[valid],obs[valid])
98  OMM[~valid] = np.NaN
99 
100  return np.multiply(OMM, 100.0)
101 
102 
104  def __init__(self):
105  self.baseVarsbaseVars = []
106  self.baseVarsbaseVars.append(vu.selfHofXValue)
107  self.baseVarsbaseVars.append(vu.bgHofXValue)
108 
109  def evaluate(self, dbVals, insituParameters):
110  ana = dbVals[insituParameters[vu.selfHofXValue]]
111  bak = dbVals[insituParameters[vu.bgHofXValue]]
112  return np.subtract(ana, bak)
113 
114 
116  def __init__(self):
117  self.baseVarsbaseVars = []
118  self.baseVarsbaseVars.append(vu.selfHofXValue)
119  self.baseVarsbaseVars.append(vu.selfObsValue)
120  self.baseVarsbaseVars.append(vu.bgHofXValue)
121 
122  def evaluate(self, dbVals, insituParameters):
123  ana = dbVals[insituParameters[vu.selfHofXValue]]
124  bak = dbVals[insituParameters[vu.bgHofXValue]]
125  obs = dbVals[insituParameters[vu.selfObsValue]]
126 
127  AMB = np.subtract(ana, bak)
128  valid = bu.greatBound(np.abs(obs), 0.0, False)
129  AMB[valid] = np.divide(AMB[valid], obs[valid])
130  AMB[~valid] = np.NaN
131 
132  return np.multiply(AMB, 100.0)
133 
135  def __init__(self):
136  self.baseVarsbaseVars = []
137  self.baseVarsbaseVars.append(vu.selfHofXValue)
138  self.baseVarsbaseVars.append(vu.selfObsValue)
139  self.baseVarsbaseVars.append(vu.bgHofXValue)
140 
141  def evaluate(self, dbVals, insituParameters):
142  ana = dbVals[insituParameters[vu.selfHofXValue]]
143  bak = dbVals[insituParameters[vu.bgHofXValue]]
144  obs = dbVals[insituParameters[vu.selfObsValue]]
145 
146  AMBoOMB = np.subtract(ana, bak)
147  OMB = np.subtract(obs, bak)
148  valid = bu.greatBound(np.abs(OMB), 0.0, False)
149 
150  AMBoOMB[valid] = np.divide(AMBoOMB[valid], OMB[valid])
151  AMBoOMB[~valid] = np.NaN
152 
153  return AMBoOMB
154 
155 
156 
157 class STDofHofX:
158  def __init__(self):
159  self.baseVarsbaseVars = []
160  self.baseVarsbaseVars.append(vu.selfHofXValue)
161 
162  def evaluate(self, dbVals, insituParameters):
163  meanVarName = insituParameters[vu.selfHofXValue]
164  memberKeys = []
165  for key in dbVals.keys():
166  if meanVarName+vu.ensSuffixBase in key:
167  memberKeys.append(key)
168  nMembers = len(memberKeys)
169  if nMembers > 0:
170  nLocs = len(dbVals[memberKeys[0]])
171  mods = np.full((nMembers, nLocs), np.NaN)
172  for member, key in enumerate(memberKeys):
173  mods[member,:] = dbVals[key]
174  std = np.nanstd(mods, axis=0, ddof=1)
175  else:
176  std = np.full(nLocs, np.NaN)
177  return std
178 
179 CRStatNames = ['CRd','CRh','CRo']
180 
181 def ConsistencyRatioFromDFW(dfw, stateType):
182 # INPUTS:
183 # dfw - StatisticsDatabase.DFWrapper object containing
184 # a pandas DataFrame with basic statistics for (at least)
185 # 'om'+stateType, 'sigma'+stateType, and 'sigmao'+stateType
186 # stateType - model state diagnostic type (either 'b' for background, 'a' for analysis, or 'f' for forecast)
187  assert stateType in ['b', 'a', 'f'], 'ConsistencyRatioFromDFW: wrong stateType: '+stateType
188 
189 #OUTPUT: pandas DataFrame containing only diagName == 'CRy'+stateType and CRStatNames data columns
190 
191  ommStr = 'om'+stateType
192  sigmamStr = 'sigma'+stateType
193  sigmaoStr = 'sigmao'+stateType
194 
195  # get the three variables needed as numpy arrays
196  availableDiagNames = dfw.levels('diagName')
197  if (ommStr not in availableDiagNames or
198  sigmamStr not in availableDiagNames or
199  sigmaoStr not in availableDiagNames): return None
200 
201  omm = dfw.loc({'diagName': ommStr}, 'MS').to_numpy()
202  sigmam = dfw.loc({'diagName': sigmamStr}, 'MS').to_numpy()
203  sigmao = dfw.loc({'diagName': sigmaoStr}, 'MS').to_numpy()
204 
205  CRStats = {}
206  for statName in CRStatNames:
207  CRStats[statName] = np.full_like(omm, np.NaN)
208 
209  p = np.logical_and(np.isfinite(omm),
210  np.logical_and(np.isfinite(sigmam), np.isfinite(sigmao)))
211  if p.sum() > 0:
212  #For each subpopulation, the consistency ratio can be written three different
213  # ways, depending on the quantity of interest in the analysis. In the
214  # following formulae, MS ≡ Mean Squared Value.
215 
216  #(1) CRd = SQRT( [MS(SIGMAMOD) + MS(SIGMAOBS)] / MS(OBS-MODEL) )
217  # denominator zero check, q
218  q = np.logical_and(bu.greatBound(np.absolute(omm), 0.0), p)
219  CRStats[CRStatNames[0]][q] = (sigmam[q] + sigmao[q]) / omm[q]
220  # benefit: ensures positive-definite numerator and denominator
221 
222  #(2) CRh = SQRT( MS(SIGMAMOD) / [MS(OBS-MODEL) - MS(SIGMAOBS)] )
223  q = np.logical_and(bu.greatBound(np.absolute(omm - sigmao), 0.0), p)
224  CRStats[CRStatNames[1]][q] = sigmam[q] / (omm[q] - sigmao[q])
225  # benefit: directly measures the spread of the forecast in observation space
226 
227  #(3) CRo = SQRT( MS(SIGMAOBS) / [MS(OBS-MODEL) - MS(SIGMAMOD)] )
228  q = np.logical_and(bu.greatBound(np.absolute(omm - sigmam), 0.0), p)
229  CRStats[CRStatNames[2]][q] = sigmao[q] / (omm[q] - sigmam[q])
230  # benefit: directly measures the specified ObsError
231 
232 
233  # create an abbreviated dictionary with only the 'CRy'+stateType diagName and CR statistics
234  CRyDict = dfw.loc({'diagName': [ommStr]}).reset_index().to_dict(orient='list')
235  nrows = len(CRStats[CRStatNames[0]])
236  CRyDict['diagName'] = ['CRy'+stateType] * nrows
237 
238  # perform the sqrt operation for all three CR's where positive-semi-definite
239  for statistic, CRStat in CRStats.items():
240  CRStat[bu.lessBound(CRStat, 0.0)] = np.NaN
241  CRyDict[statistic] = np.sqrt(CRStat)
242 
243  for statName in su.allFileStats:
244  del CRyDict[statName]
245 
246  # convert dict to DataFrame
247  CRyDF = pd.DataFrame.from_dict(CRyDict)
248  del CRyDict
249  CRyDF.set_index(dfw.indexNames, inplace=True)
250  CRyDF.sort_index(inplace=True)
251 
252  return CRyDF
253 
254 
255 ## Table of available diagnostics
256 # format of each entry:
257 # diagName: {
258 # variable: variable name or class used to calculate this diag
259 # iter: iteration of "self" type variables
260 # ('bg', 'an', int, default: None OR vu.bgIter depending on appName)
261 # vu.mean (optional): whether to apply this diagnostic to the mean state (default: True)
262 # vu.ensemble (optional): whether this diagnostic requires variables from ensemble IODA files (default: False)
263 # onlyObsSpaces (optional): list of ObsSpaces for which this diag applies
264 # see config.DiagSpaceConfig keys for available options
265 # analyze (optional): whether to analyze this diagnostic (defualt: True). Useful for turning off analyses for diagnostics that are only needed for calculating other derived diagnostics.
266 # derived (optional): whether the diagnostic is derived from statistics of other diagnostics
267 # if True, then will be calculated in analysis step, not statistics calculation step
268 # (default: False)
269 # function (optional): if derived, the python function used to calculate this diagnostic
270 # there will be 2 arguments to this function (1) a StatisticsDatabase.DFWrapper object and (2) 'staticArg' below
271 # staticArg (optional): a static argument to function (default: None)
272 # analysisStatistics (optional): statistics selected for this diagnostic (default: application dependent, see Analyses module)
273 # label (optional): label for plot axes (TODO: hookup in Analyses.py)
274 #}
275 availableDiagnostics = {
276  'bc': {
277  'variable': BiasCorrection,
278  'iter': 'an',
279  'label': '$y_{bias}$',
280  },
281  'omb': {
282  'variable': BiasCorrectedObsMinusModel,
283  'iter': 'bg',
284  'label': '$y - x_b$',
285 
286  },
287  'oma': {
288  'variable': BiasCorrectedObsMinusModel,
289  'iter': 'an',
290  'label': '$y - x_a$',
291  },
292  'omf': {
293  'variable': ObsMinusModel,
294  'label': '$y - x_f$',
295  },
296  'amb': {
297  'variable': AnalysisMinusBackground,
298  'iter': 'an',
299  'label': '$x_a - x_b$',
300  },
301  'mmgfsan': {
302  'offline': True,
303  'label': '$x - x_{a,GFS}$',
304  },
305  'omb_nobc': {
306  'variable': ObsMinusModel,
307  'iter': 'bg',
308  'label': '$y - x_b$',
309  },
310  'oma_nobc': {
311  'variable': ObsMinusModel,
312  'iter': 'an',
313  'label': '$y - x_a$',
314  },
315  'rltv_omb': {
316  'variable': RelativeBiasCorrectedObsMinusModel,
317  'iter': 'bg',
318  'label': '$(y - x_b) / y$',
319  },
320  'rltv_oma': {
321  'variable': RelativeBiasCorrectedObsMinusModel,
322  'iter': 'an',
323  'label': '$(y - x_a) / y$',
324  },
325  'rltv_omf': {
326  'variable': RelativeObsMinusModel,
327  'label': '$(y - x_f) / y$',
328  },
329  'rltv_amb': {
330  'variable': RelativeAnalysisMinusBackground,
331  'iter': 'an',
332  'label': '$(x_a - x_b) / y$',
333  },
334  'rltv_omb_nobc': {
335  'variable': RelativeObsMinusModel,
336  'iter': 'bg',
337  'label': '$(y - x_b) / y$',
338  },
339  'rltv_oma_nobc': {
340  'variable': RelativeObsMinusModel,
341  'iter': 'an',
342  'label': '$(y - x_a) / y$',
343  },
344  'amb_o_omb': {
345  'variable': AnalysisMinusBackgroundOverObsMinusBackground,
346  'iter': 'an',
347  'label': '$(x_a - x_b) / (y - x_b)$',
348  },
349  'obs': {
350  'variable': vu.selfObsValue,
351  },
352  'obs_bc': {
353  'variable': BiasCorrectedObs,
354  'label': '$y$',
355  },
356  'h(x)': {
357  'variable': vu.selfHofXValue,
358  'label': '$h(x_f)$',
359  },
360  'bak': {
361  'variable': vu.selfHofXValue,
362  'iter': 'bg',
363  'label': '$h(x_b)$',
364  },
365  'ana': {
366  'variable': vu.selfHofXValue,
367  'iter': 'an',
368  'label': '$h(x_a)$',
369  },
370  'sigmaob': {
371  'variable': vu.selfErrorValue,
372  'iter': 'bg',
373  'analyze': False,
374  'label': '$\sigma_o$',
375  },
376  'sigmab': {
377  'variable': STDofHofX,
378  'iter': 'bg',
379  'analyze': False,
380  vu.mean: False,
381  vu.ensemble: True,
382  'label': '$\sigma_{h_b}$',
383  },
384  'sigmaoa': {
385  'variable': vu.selfErrorValue,
386  'iter': 'an',
387  'analyze': False,
388  'label': '$\sigma_o$',
389  },
390  'sigmaa': {
391  'variable': STDofHofX,
392  'iter': 'an',
393  'analyze': False,
394  vu.mean: False,
395  vu.ensemble: True,
396  'label': '$\sigma_{h_a}$',
397  },
398  'sigmaof': {
399  'variable': vu.selfErrorValue,
400  'analyze': False,
401  'label': '$\sigma_o$',
402  },
403  'sigmaf': {
404  'variable': STDofHofX,
405  'analyze': False,
406  vu.mean: False,
407  vu.ensemble: True,
408  'label': '$\sigma_{h_f}$',
409  },
410 #TODO: replace 'derived' bool with 'derivedFrom' list. E.g., for 'CRyb',
411 # that would include 'sigmao' and 'sigmab'. Update writediagstats_obsspace.
412 #TODO: then only add the derived diagnostic (e.g., CRyb) to conf.DiagSpaceConfig[:]['diagNames']
413 # and remove the 'analyze' config entry under availableDiagnostics
414  'CRyb': {
415  'iter': 'bg',
416  'DFWFunction': ConsistencyRatioFromDFW,
417  'derived': True,
418 # 'derivedFrom': ['sigmao','sigmab']
419  'staticArg': 'b',
420  'analysisStatistics': CRStatNames,
421  'label': '$CR_{y,b}$',
422  },
423  'CRya': {
424  'iter': 'an',
425  'DFWFunction': ConsistencyRatioFromDFW,
426  'derived': True,
427 # 'derivedFrom': ['sigmao','sigmaa']
428  'staticArg': 'a',
429  'analysisStatistics': CRStatNames,
430  'label': '$CR_{y,a}$',
431  },
432  'CRyf': {
433  'DFWFunction': ConsistencyRatioFromDFW,
434  'derived': True,
435 # 'derivedFrom': ['sigmaof','sigmaf']
436  'staticArg': 'f',
437  'analysisStatistics': CRStatNames,
438  'label': '$CR_{y,f}$',
439  },
440  'SCI': {
441  'variable': bu.SCIOkamoto,
442  'iter': 'bg',
443  'onlyObsSpaces': ['abi_g16', 'ahi_himawari8'],
444  },
445  'ACI': {
446  'variable': bu.AsymmetricCloudImpact,
447  'onlyObsSpaces': ['abi_g16', 'ahi_himawari8'],
448  },
449  'ABEILambda': {
450  'variable': bu.ABEILambda,
451  'onlyObsSpaces': ['abi_g16', 'ahi_himawari8'],
452  'label': '$\lambda_{ABEI}$',
453  }
454 }
455 
456 #TODO: have this function return a list of diagnosticConfiguration or Diagnostic (new class) objects
457 # instead of a list of dicts
458 def diagnosticConfigs(diagnosticNames_, ObsSpaceName, includeEnsembleDiagnostics=True,
459  analysisStatistics = su.allFileStats, fileFormat=vu.hdfFileFormat):
460 
461  diagnosticConfigs = {}
462  diagnosticNames = list(set(diagnosticNames_))
463 
464  for diagnosticName in diagnosticNames:
465  if diagnosticName in availableDiagnostics:
466  config = deepcopy(availableDiagnostics[diagnosticName])
467  elif diagnosticName in diagnosticConfigs:
468  _logger.warning('diagnosticName is duplicated: '+diagnosticName)
469  continue
470  else:
471  _logger.error('diagnosticName is undefined: '+diagnosticName)
472 
473  config['analyze'] = config.get('analyze', True)
474  config['derived'] = config.get('derived', False)
475  config['offline'] = config.get('offline', False)
476  config[vu.mean] = config.get(vu.mean, True)
477  config[vu.ensemble] = (config.get(vu.ensemble, False) and includeEnsembleDiagnostics)
478  config['onlyObsSpaces'] = config.get('onlyObsSpaces',[])
479  config['analysisStatistics'] = config.get('analysisStatistics', analysisStatistics)
480  config['staticArg'] = config.get('staticArg', None)
481  config['label'] = config.get('label',diagnosticName)
482 
483  # diagnosticConfig is undefined for the following cases
484  if (not config[vu.mean] and not config[vu.ensemble]): continue
485  if (len(config['onlyObsSpaces']) > 0 and
486  ObsSpaceName not in config['onlyObsSpaces']): continue
487 
488  config['osName'] = ObsSpaceName
489  config['fileFormat'] = fileFormat
490  # add this diagnosticConfig to the list
491  diagnosticConfigs[diagnosticName] = deepcopy(config)
492 
493  outerIterStr = config.get('iter', None)
494  if outerIterStr is None or outerIterStr == '':
495  if appName == 'hofx':
496  outerIter = None
497  else:
498  outerIter = vu.bgIter
499  else:
500  if outerIterStr == 'bg':
501  outerIter = vu.bgIter
502  elif outerIterStr == 'an':
503  outerIter = anIter
504  elif pu.isint(outerIterStr):
505  outerIter = outerIterStr
506  else:
507  _logger.error('outerIter is undefined: '+outerIterStr)
508 
509  diagnosticConfigs[diagnosticName]['outerIter'] = outerIter
510 
511  if config['derived']: continue
512  if config['offline']: continue
513  diagnosticConfigs[diagnosticName]['ObsFunction'] = bu.ObsFunctionWrapper(config)
514 
515  return diagnosticConfigs
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:109
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:141
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:25
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:50
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:38
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:79
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:122
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:61
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:91
def evaluate(self, dbVals, insituParameters)
Definition: diag_utils.py:162
def ConsistencyRatioFromDFW(dfw, stateType)
Definition: diag_utils.py:181
def diagnosticConfigs(diagnosticNames_, ObsSpaceName, includeEnsembleDiagnostics=True, analysisStatistics=su.allFileStats, fileFormat=vu.hdfFileFormat)
Definition: diag_utils.py:459