3 from collections
import defaultdict
5 from netCDF4
import Dataset
10 _logger = logging.getLogger(__name__)
17 rKindStore = np.float32
25 rKindCompute = np.float64
40 aggregatableFileStats = [
'Count',
'Mean',
'MS',
'RMS',
'STD',
'Min',
'Max']
42 allFileStats = aggregatableFileStats
65 sampleableAggStats = [
'Count',
'Mean',
'MS',
'RMS']
67 fileStatAttributes = [
'DiagSpaceGrp',
'varName',
'varUnits',
'diagName',
68 'binMethod',
'binVar',
'binVal',
'binUnits']
70 statsFilePrefix =
'stats_'
84 STATS[
'Count'] = np.isfinite(array_f).sum()
85 if STATS[
'Count'] > 0:
86 STATS[
'Mean'] = np.nanmean(array_f)
87 STATS[
'MS'] = np.nanmean(np.square(array_f))
88 STATS[
'RMS'] = np.sqrt(STATS[
'MS'])
89 STATS[
'STD'] = np.nanstd(array_f, ddof=ddof)
90 STATS[
'Min'] = np.nanmin(array_f)
91 STATS[
'Max'] = np.nanmax(array_f)
93 for stat
in allFileStats:
107 statsFile = statsFilePrefix+statSpace+
'.nc'
108 if os.path.exists(statsFile):
111 ncid = Dataset(statsFile,
'w', format=
'NETCDF4')
112 ncid.description = statSpace+
" diagnostic statistics"
114 nrows = len(statsDict[fileStatAttributes[0]])
115 ncid.createDimension(
'nrows', nrows)
117 for attribName
in fileStatAttributes:
118 attribHandle = ncid.createVariable(attribName, str,
'nrows')
119 attribHandle[:] = np.array(statsDict[attribName], dtype=object)
121 for statName
in allFileStats:
122 statHandle = ncid.createVariable(statName, rKindNC,
'nrows')
123 statHandle[:] = statsDict[statName]
131 ncid = Dataset(statsFile,
'r')
132 ncid.set_auto_mask(
False)
135 for attribName
in fileStatAttributes:
136 statsDict[attribName] = np.asarray(ncid.variables[attribName][:])
138 for statName
in allFileStats:
139 statsDict[statName] = np.asarray(ncid.variables[statName][:], statDtypes[statName])
159 return pd.Series(y, index=aggregatableFileStats)
166 counts = np.array(x_[
'Count'])
167 mask = np.logical_and(np.greater(counts, 0),
172 for stat
in aggregatableFileStats:
173 x[stat] = np.array(x_[stat], dtype=statDtypesCompute[stat])[mask]
176 for stat
in aggregatableFileStats:
180 Count = int(np.nansum(x[
'Count']))
183 if Count > 0
and np.isfinite(Count):
186 v1_ = np.multiply(x[
'Mean'], x[
'Count'].astype(rKindCompute))
192 v1_ = np.multiply(x[
'MS'], x[
'Count'].astype(rKindCompute))
204 v1_ = np.multiply(np.square(x[
'STD']), np.subtract(x[
'Count'].astype(rKindCompute), ddof))
205 v2_ = np.multiply(np.square(x[
'Mean']), x[
'Count'].astype(rKindCompute))
206 v12 = np.nansum(np.sort(np.append(v1_, v2_)))
212 y[
'Min'] = np.nanmin(x[
'Min'])
215 y[
'Max'] = np.nanmax(x[
'Max'])
227 ciTraits = [cimean, cimin, cimax]
236 return np.sqrt(np.nanmean(np.square(x), axis=axis))
254 if type(n_samples)
is list:
255 nsSamples = n_samples
256 elif (type(n_samples)
is np.array
257 or type(n_samples)
is np.ndarray):
258 nsSamples = list(n_samples)
260 nsSamples = [n_samples]
261 max_samples = np.max(nsSamples)
268 iResample = np.random.choice(Ndata, (Ndata, max_samples))
270 iResample = np.random.choice(Ndata, (Ndata, max_samples), p = weights)
272 XResample = X[iResample]
273 Expect = np.nanmean(XResample, axis=0)
275 ciVals = defaultdict(list)
277 for nSamples
in nsSamples:
278 sampleVals = np.sort(Expect[0:nSamples])
279 nonNaNSamples = np.isfinite(sampleVals).sum()
281 iMid = int(np.around(0.5 *
rKindCompute(nonNaNSamples)))
282 iLeft = int(np.around(0.5 * alpha *
rKindCompute(nonNaNSamples)))
283 iRight = int(np.around((1 - 0.5 * alpha) *
rKindCompute(nonNaNSamples)))
285 ciVals[cimean].append(
rKindStore(sampleVals[iMid]))
286 ciVals[cimin].append(
rKindStore(sampleVals[iLeft]))
287 ciVals[cimax].append(
rKindStore(sampleVals[iRight]))
294 alpha=0.05, n_samples=8000):
306 if type(n_samples)
is list:
307 nsSamples = n_samples
308 elif (type(n_samples)
is np.array
309 or type(n_samples)
is np.ndarray):
310 nsSamples = list(n_samples)
312 nsSamples = [n_samples]
313 max_samples = np.max(nsSamples)
319 iResample = np.random.choice(Ndata, (Ndata, max_samples))
321 XResample =
rmsFunc(X[iResample], axis=0)
322 YResample =
rmsFunc(Y[iResample], axis=0)
324 Expect = statFunc(XResample,YResample)
326 ciVals = defaultdict(list)
328 for nSamples
in nsSamples:
329 sampleVals = np.sort(Expect[0:nSamples])
330 nonNaNSamples = np.isfinite(sampleVals).sum()
332 iMid = int(np.around(0.5 *
rKindCompute(nonNaNSamples)))
333 iLeft = int(np.around(0.5 * alpha *
rKindCompute(nonNaNSamples)))
334 iRight = int(np.around((1 - 0.5 * alpha) *
rKindCompute(nonNaNSamples)))
336 ciVals[cimean].append(
rKindStore(sampleVals[iMid]))
337 ciVals[cimin].append(
rKindStore(sampleVals[iLeft]))
338 ciVals[cimax].append(
rKindStore(sampleVals[iRight]))
345 alpha=0.05, n_samples=8000):
360 if type(n_samples)
is list:
361 nsSamples = n_samples
362 elif (type(n_samples)
is np.array
363 or type(n_samples)
is np.ndarray):
364 nsSamples = list(n_samples)
366 nsSamples = [n_samples]
367 max_samples = np.max(nsSamples)
370 mask = np.logical_and(np.isfinite(X), np.isfinite(Y))
371 mask = np.logical_and(mask, Ns > 0.)
373 _logger.error(
"\n\nERROR: no valid subpopulation data")
376 X_ = np.asarray(X[mask], dtype=rKindCompute)
377 Y_ = np.asarray(Y[mask], dtype=rKindCompute)
378 Ns_ = np.asarray(Ns[mask], dtype=rKindCompute)
382 iResample = np.random.choice(Ndata, (Ndata, max_samples))
384 NsResample = Ns_[iResample]
385 XResample = np.nansum(np.multiply(np.square(X_[iResample]), NsResample), axis=0)
386 YResample = np.nansum(np.multiply(np.square(Y_[iResample]), NsResample), axis=0)
388 NaggResample = np.nansum(NsResample, axis=0)
389 XaggResample = np.sqrt(np.divide(XResample, NaggResample))
390 YaggResample = np.sqrt(np.divide(YResample, NaggResample))
392 Expect = statFunc(XaggResample, YaggResample)
394 ciVals = defaultdict(list)
396 for nSamples
in nsSamples:
397 sampleVals = np.sort(Expect[0:nSamples])
398 nonNaNSamples = np.isfinite(sampleVals).sum()
400 iMid = int(np.around(0.5 *
rKindCompute(nonNaNSamples)))
401 iLeft = int(np.around(0.5 * alpha *
rKindCompute(nonNaNSamples)))
402 iRight = int(np.around((1 - 0.5 * alpha) *
rKindCompute(nonNaNSamples)))
404 ciVals[cimean].append(
rKindStore(sampleVals[iMid]))
405 ciVals[cimin].append(
rKindStore(sampleVals[iLeft]))
406 ciVals[cimax].append(
rKindStore(sampleVals[iRight]))
414 vecFuncs=[identityFunc],
415 bootFuncs=[bootStrapVector],
416 statFunc=np.subtract):
432 if type(n_samples)
is list:
433 nsSamples = n_samples
434 elif (type(n_samples)
is np.array
435 or type(n_samples)
is np.ndarray):
436 nsSamples = list(n_samples)
438 nsSamples = [n_samples]
439 max_samples = np.max(nsSamples)
441 mask = np.logical_and(np.isfinite(X), np.isfinite(Y))
446 for ifunc, func
in enumerate(vecFuncs):
447 bootFunc = bootFuncs[ifunc]
448 if bootFunc
is bootStrapVector:
450 statFunc(func(X_), func(Y_)),
452 elif bootFunc
is bootStrapVectorRMSFunc:
457 funcCIVals[ifunc] = {}
458 for trait
in ciTraits:
459 funcCIVals[ifunc][trait] = ciVals[trait]
467 statFunc=np.subtract,
468 statNames=sampleableAggStats):
486 for stat
in statNames:
487 statCIVals[stat] = {}
488 for trait
in ciTraits:
489 statCIVals[stat][trait] = [np.NaN]
496 if type(n_samples)
is list:
497 nsSamples = n_samples
498 elif (type(n_samples)
is np.array
499 or type(n_samples)
is np.ndarray):
500 nsSamples = list(n_samples)
502 nsSamples = [n_samples]
503 max_samples = np.max(nsSamples)
505 for stat
in statNames:
506 if stat ==
'Count':
continue
507 Ns = X.loc[:,
'Count'].to_numpy(dtype=rKindCompute)
508 X_ = X.loc[:,stat].to_numpy(dtype=rKindCompute)
509 Y_ = Y.loc[:,stat].to_numpy(dtype=rKindCompute)
512 mask = np.logical_and(np.isfinite(X_), np.isfinite(Y_))
513 mask = np.logical_and(mask, Ns > 0.)
514 if mask.sum() == 0:
continue
519 if stat ==
'Mean' or stat ==
'MS':
520 weights = np.divide(Ns,np.nansum(Ns))
527 X_, Y_, Ns, statFunc,
530 _logger.error(
"\n\nERROR: stat not implemented: ", stat)
533 for trait
in ciTraits:
534 statCIVals[stat][trait] = ciVals[trait]
def read_stats_nc(statsFile)
def bootStrapVectorFunc(X, Y, alpha=0.05, n_samples=5000, vecFuncs=[identityFunc], bootFuncs=[bootStrapVector], statFunc=np.subtract)
def bootStrapClusterFunc(X, Y, alpha=0.05, n_samples=5000, statFunc=np.subtract, statNames=sampleableAggStats)
def bootStrapVector(X, alpha=0.05, n_samples=8000, weights=None)
def bootStrapAggRMSFunc(X, Y, Ns, statFunc=np.subtract, alpha=0.05, n_samples=8000)
def write_stats_nc(statSpace, statsDict)
def bootStrapVectorRMSFunc(X, Y, statFunc=np.subtract, alpha=0.05, n_samples=8000)