3 from collections 
import defaultdict
 
    5 from netCDF4 
import Dataset
 
   10 _logger = logging.getLogger(__name__)
 
   17 rKindStore = np.float32
 
   25 rKindCompute = np.float64
 
   40 aggregatableFileStats = [
'Count',
'Mean',
'MS',
'RMS',
'STD',
'Min',
'Max']
 
   42 allFileStats = aggregatableFileStats
 
   65 sampleableAggStats = [
'Count',
'Mean',
'MS',
'RMS']
 
   67 fileStatAttributes = [
'DiagSpaceGrp',
'varName',
'varUnits',
'diagName',
 
   68                       'binMethod',
'binVar',
'binVal',
'binUnits']
 
   70 statsFilePrefix = 
'stats_' 
   84   STATS[
'Count']  = np.isfinite(array_f).sum()
 
   85   if STATS[
'Count'] > 0:
 
   86     STATS[
'Mean'] = np.nanmean(array_f)
 
   87     STATS[
'MS']   = np.nanmean(np.square(array_f))
 
   88     STATS[
'RMS']  = np.sqrt(STATS[
'MS'])
 
   89     STATS[
'STD']  = np.nanstd(array_f, ddof=ddof)
 
   90     STATS[
'Min']  = np.nanmin(array_f)
 
   91     STATS[
'Max']  = np.nanmax(array_f)
 
   93     for stat 
in allFileStats:
 
  107   statsFile = statsFilePrefix+statSpace+
'.nc' 
  108   if os.path.exists(statsFile):
 
  111   ncid = Dataset(statsFile, 
'w', format=
'NETCDF4')
 
  112   ncid.description = statSpace+
" diagnostic statistics" 
  114   nrows = len(statsDict[fileStatAttributes[0]])
 
  115   ncid.createDimension(
'nrows', nrows)
 
  117   for attribName 
in fileStatAttributes:
 
  118     attribHandle = ncid.createVariable(attribName, str, 
'nrows')
 
  119     attribHandle[:] = np.array(statsDict[attribName], dtype=object)
 
  121   for statName 
in allFileStats:
 
  122     statHandle = ncid.createVariable(statName, rKindNC, 
'nrows')
 
  123     statHandle[:] = statsDict[statName]
 
  131   ncid = Dataset(statsFile, 
'r')
 
  132   ncid.set_auto_mask(
False)
 
  135   for attribName 
in fileStatAttributes:
 
  136     statsDict[attribName] = np.asarray(ncid.variables[attribName][:])
 
  138   for statName 
in allFileStats:
 
  139     statsDict[statName] = np.asarray(ncid.variables[statName][:], statDtypes[statName])
 
  159   return pd.Series(y, index=aggregatableFileStats)
 
  166   counts = np.array(x_[
'Count'])
 
  167   mask = np.logical_and(np.greater(counts, 0),
 
  172   for stat 
in aggregatableFileStats:
 
  173     x[stat] = np.array(x_[stat], dtype=statDtypesCompute[stat])[mask]
 
  176   for stat 
in aggregatableFileStats:
 
  180   Count = int(np.nansum(x[
'Count']))
 
  183   if Count > 0 
and np.isfinite(Count):
 
  186     v1_ = np.multiply(x[
'Mean'], x[
'Count'].astype(rKindCompute))
 
  192     v1_ = np.multiply(x[
'MS'], x[
'Count'].astype(rKindCompute))
 
  204     v1_ = np.multiply(np.square(x[
'STD']), np.subtract(x[
'Count'].astype(rKindCompute), ddof))
 
  205     v2_ = np.multiply(np.square(x[
'Mean']), x[
'Count'].astype(rKindCompute))
 
  206     v12 = np.nansum(np.sort(np.append(v1_, v2_)))
 
  212     y[
'Min'] = np.nanmin(x[
'Min'])
 
  215     y[
'Max'] = np.nanmax(x[
'Max'])
 
  227 ciTraits = [cimean, cimin, cimax]
 
  236     return np.sqrt(np.nanmean(np.square(x), axis=axis))
 
  254   if type(n_samples) 
is list:
 
  255     nsSamples = n_samples
 
  256   elif (type(n_samples) 
is np.array
 
  257     or type(n_samples) 
is np.ndarray):
 
  258     nsSamples = list(n_samples)
 
  260     nsSamples = [n_samples]
 
  261   max_samples = np.max(nsSamples)
 
  268     iResample = np.random.choice(Ndata, (Ndata, max_samples))
 
  270     iResample = np.random.choice(Ndata, (Ndata, max_samples), p = weights)
 
  272   XResample = X[iResample]
 
  273   Expect = np.nanmean(XResample, axis=0)
 
  275   ciVals = defaultdict(list)
 
  277   for nSamples 
in nsSamples:
 
  278     sampleVals = np.sort(Expect[0:nSamples])
 
  279     nonNaNSamples = np.isfinite(sampleVals).sum()
 
  281     iMid = int(np.around(0.5 * 
rKindCompute(nonNaNSamples)))
 
  282     iLeft = int(np.around(0.5 * alpha * 
rKindCompute(nonNaNSamples)))
 
  283     iRight = int(np.around((1 - 0.5 * alpha) * 
rKindCompute(nonNaNSamples)))
 
  285     ciVals[cimean].append(
rKindStore(sampleVals[iMid]))
 
  286     ciVals[cimin].append(
rKindStore(sampleVals[iLeft]))
 
  287     ciVals[cimax].append(
rKindStore(sampleVals[iRight]))
 
  294                            alpha=0.05, n_samples=8000):
 
  306   if type(n_samples) 
is list:
 
  307     nsSamples = n_samples
 
  308   elif (type(n_samples) 
is np.array
 
  309     or type(n_samples) 
is np.ndarray):
 
  310     nsSamples = list(n_samples)
 
  312     nsSamples = [n_samples]
 
  313   max_samples = np.max(nsSamples)
 
  319   iResample = np.random.choice(Ndata, (Ndata, max_samples))
 
  321   XResample = 
rmsFunc(X[iResample], axis=0)
 
  322   YResample = 
rmsFunc(Y[iResample], axis=0)
 
  324   Expect = statFunc(XResample,YResample)
 
  326   ciVals = defaultdict(list)
 
  328   for nSamples 
in nsSamples:
 
  329     sampleVals = np.sort(Expect[0:nSamples])
 
  330     nonNaNSamples = np.isfinite(sampleVals).sum()
 
  332     iMid = int(np.around(0.5 * 
rKindCompute(nonNaNSamples)))
 
  333     iLeft = int(np.around(0.5 * alpha * 
rKindCompute(nonNaNSamples)))
 
  334     iRight = int(np.around((1 - 0.5 * alpha) * 
rKindCompute(nonNaNSamples)))
 
  336     ciVals[cimean].append(
rKindStore(sampleVals[iMid]))
 
  337     ciVals[cimin].append(
rKindStore(sampleVals[iLeft]))
 
  338     ciVals[cimax].append(
rKindStore(sampleVals[iRight]))
 
  345                            alpha=0.05, n_samples=8000):
 
  360   if type(n_samples) 
is list:
 
  361     nsSamples = n_samples
 
  362   elif (type(n_samples) 
is np.array
 
  363     or type(n_samples) 
is np.ndarray):
 
  364     nsSamples = list(n_samples)
 
  366     nsSamples = [n_samples]
 
  367   max_samples = np.max(nsSamples)
 
  370   mask = np.logical_and(np.isfinite(X), np.isfinite(Y))
 
  371   mask = np.logical_and(mask, Ns > 0.)
 
  373     _logger.error(
"\n\nERROR: no valid subpopulation data")
 
  376   X_ = np.asarray(X[mask], dtype=rKindCompute)
 
  377   Y_ = np.asarray(Y[mask], dtype=rKindCompute)
 
  378   Ns_ = np.asarray(Ns[mask], dtype=rKindCompute)
 
  382   iResample = np.random.choice(Ndata, (Ndata, max_samples))
 
  384   NsResample = Ns_[iResample]
 
  385   XResample = np.nansum(np.multiply(np.square(X_[iResample]), NsResample), axis=0)
 
  386   YResample = np.nansum(np.multiply(np.square(Y_[iResample]), NsResample), axis=0)
 
  388   NaggResample = np.nansum(NsResample, axis=0)
 
  389   XaggResample = np.sqrt(np.divide(XResample, NaggResample))
 
  390   YaggResample = np.sqrt(np.divide(YResample, NaggResample))
 
  392   Expect = statFunc(XaggResample, YaggResample)
 
  394   ciVals = defaultdict(list)
 
  396   for nSamples 
in nsSamples:
 
  397     sampleVals = np.sort(Expect[0:nSamples])
 
  398     nonNaNSamples = np.isfinite(sampleVals).sum()
 
  400     iMid = int(np.around(0.5 * 
rKindCompute(nonNaNSamples)))
 
  401     iLeft = int(np.around(0.5 * alpha * 
rKindCompute(nonNaNSamples)))
 
  402     iRight = int(np.around((1 - 0.5 * alpha) * 
rKindCompute(nonNaNSamples)))
 
  404     ciVals[cimean].append(
rKindStore(sampleVals[iMid]))
 
  405     ciVals[cimin].append(
rKindStore(sampleVals[iLeft]))
 
  406     ciVals[cimax].append(
rKindStore(sampleVals[iRight]))
 
  414                         vecFuncs=[identityFunc],
 
  415                         bootFuncs=[bootStrapVector],
 
  416                         statFunc=np.subtract):
 
  432   if type(n_samples) 
is list:
 
  433     nsSamples = n_samples
 
  434   elif (type(n_samples) 
is np.array
 
  435     or type(n_samples) 
is np.ndarray):
 
  436     nsSamples = list(n_samples)
 
  438       nsSamples = [n_samples]
 
  439   max_samples = np.max(nsSamples)
 
  441   mask = np.logical_and(np.isfinite(X), np.isfinite(Y))
 
  446   for ifunc, func 
in enumerate(vecFuncs):
 
  447     bootFunc = bootFuncs[ifunc]
 
  448     if bootFunc 
is bootStrapVector:
 
  450                  statFunc(func(X_), func(Y_)),
 
  452     elif bootFunc 
is bootStrapVectorRMSFunc:
 
  457     funcCIVals[ifunc] = {}
 
  458     for trait 
in ciTraits:
 
  459       funcCIVals[ifunc][trait] = ciVals[trait]
 
  467                          statFunc=np.subtract,
 
  468                          statNames=sampleableAggStats):
 
  486   for stat 
in statNames:
 
  487     statCIVals[stat] = {}
 
  488     for trait 
in ciTraits:
 
  489       statCIVals[stat][trait] = [np.NaN]
 
  496     if type(n_samples) 
is list:
 
  497       nsSamples = n_samples
 
  498     elif (type(n_samples) 
is np.array
 
  499       or type(n_samples) 
is np.ndarray):
 
  500       nsSamples = list(n_samples)
 
  502       nsSamples = [n_samples]
 
  503     max_samples = np.max(nsSamples)
 
  505     for stat 
in statNames:
 
  506       if stat == 
'Count': 
continue 
  507       Ns = X.loc[:,
'Count'].to_numpy(dtype=rKindCompute)
 
  508       X_ = X.loc[:,stat].to_numpy(dtype=rKindCompute)
 
  509       Y_ = Y.loc[:,stat].to_numpy(dtype=rKindCompute)
 
  512       mask = np.logical_and(np.isfinite(X_), np.isfinite(Y_))
 
  513       mask = np.logical_and(mask, Ns > 0.)
 
  514       if mask.sum() == 0: 
continue 
  519       if stat == 
'Mean' or stat == 
'MS':
 
  520         weights = np.divide(Ns,np.nansum(Ns))
 
  527                    X_, Y_, Ns, statFunc,
 
  530         _logger.error(
"\n\nERROR: stat not implemented: ", stat)
 
  533       for trait 
in ciTraits:
 
  534         statCIVals[stat][trait] = ciVals[trait]
 
def read_stats_nc(statsFile)
 
def bootStrapVectorFunc(X, Y, alpha=0.05, n_samples=5000, vecFuncs=[identityFunc], bootFuncs=[bootStrapVector], statFunc=np.subtract)
 
def bootStrapClusterFunc(X, Y, alpha=0.05, n_samples=5000, statFunc=np.subtract, statNames=sampleableAggStats)
 
def bootStrapVector(X, alpha=0.05, n_samples=8000, weights=None)
 
def bootStrapAggRMSFunc(X, Y, Ns, statFunc=np.subtract, alpha=0.05, n_samples=8000)
 
def write_stats_nc(statSpace, statsDict)
 
def bootStrapVectorRMSFunc(X, Y, statFunc=np.subtract, alpha=0.05, n_samples=8000)