5 from copy
import deepcopy
7 import matplotlib.pyplot
as plt
14 import plot_utils
as pu
15 import stat_utils
as su
20 NOMINAL_GROUP_SIZE = 500
22 NOMINAL_NUMBER_OF_GROUPS = 60
25 N_SUPER_SAMPLE = NOMINAL_GROUP_SIZE * NOMINAL_NUMBER_OF_GROUPS
28 nBootSamples = np.array([ np.around(10.0 ** x).astype(int) \
29 for x
in list(np.arange(1.5,4.5,0.25))])
36 fullColor = pu.plotColors[1]
37 clustColor = pu.plotColors[2]
41 vecFuncs = [su.identityFunc, np.square, su.rmsFunc]
42 bootFuncs = [su.bootStrapVector,su.bootStrapVector,su.bootStrapVectorRMSFunc]
44 nFuncs = len(vecFuncs)
47 statFunc = np.subtract
50 randSample1 = np.random.randn(N_SUPER_SAMPLE)
51 X1MEAN = 2.0 ; X1STD = 1.5
52 X1_SAMPLE = X1MEAN + X1STD*randSample1
54 randSample2 = np.random.randn(N_SUPER_SAMPLE)
55 X2MEAN = 1.85 ; X2STD = 1.65
56 X2_SAMPLE = X2MEAN + X2STD*randSample2
58 print(
"\n\nX1_SAMPLE stats:")
59 print(su.calcStats(X1_SAMPLE))
61 print(
"\n\nX2_SAMPLE stats:")
62 print(su.calcStats(X2_SAMPLE))
64 print(
"\n\nstatFunc(vecFunc(X2),vecFunc(X1)) stats:")
66 for ifunc, vecFunc
in enumerate(vecFuncs):
67 if vecFunc
is su.rmsFunc:
68 sampleStats.append({
'Mean': statFunc(vecFunc(X2_SAMPLE),vecFunc(X1_SAMPLE))})
71 sampleStats.append(su.calcStats(statFunc(vecFunc(X2_SAMPLE),vecFunc(X1_SAMPLE))))
79 statsCIFullSamples = su.bootStrapVectorFunc( \
81 n_samples = nBootSamples, \
82 vecFuncs = vecFuncs, \
83 bootFuncs = bootFuncs, \
88 SIGMANS = np.around(np.multiply(NOMINAL_GROUP_SIZE, \
89 [0.005, 0.015, 0.050, 0.150, 0.400])).astype(int)
92 for SIGMAN
in SIGMANS:
114 clustsN = np.multiply(NOMINAL_GROUP_SIZE,np.ones(NOMINAL_NUMBER_OF_GROUPS,dtype=int))
115 for i
in list(range(0,int(NOMINAL_NUMBER_OF_GROUPS/2 - 1))):
116 delta = NOMINAL_GROUP_SIZE - \
117 int(max([np.around(NOMINAL_GROUP_SIZE + np.random.randn(1)*SIGMAN), 1.0]))
119 delta = np.abs(delta)
122 clustsN[i] = np.max([clustsN[i] + delta, 1])
123 clustsN[-(i+1)] = np.max([clustsN[-(i+1)] - delta, 1])
130 clustsN = np.random.choice(clustsN, NOMINAL_NUMBER_OF_GROUPS, replace =
False )
132 clustsStart.append(0)
134 for i
in list(range(0,NOMINAL_NUMBER_OF_GROUPS)):
135 clustsEnd.append(np.sum(clustsN[0:i+1]))
136 clustsStart.append(clustsEnd[-1])
138 print(
"\n\nclustsN = ",clustsN)
144 for stat
in su.aggregatableFileStats:
145 X1ClustsStats[stat] = []
147 for ic, clusts
in enumerate(clustsN):
148 X1ClustsValues.append(X1_SAMPLE[clustsStart[ic]:clustsEnd[ic]])
149 clustStats = su.calcStats(X1ClustsValues[ic])
150 for stat
in su.aggregatableFileStats:
151 X1ClustsStats[stat].append(clustStats[stat])
153 X1ClustsStatsDF = pd.DataFrame.from_dict(X1ClustsStats)
154 X1ClustsStatsDF.sort_index()
160 print(
"\n\nX1 aggregated stats:")
161 aggClustStats = su.aggStatsDF(X1ClustsStatsDF)
168 for stat
in su.aggregatableFileStats:
169 X2ClustsStats[stat] = []
171 for ic, clusts
in enumerate(clustsN):
172 X2ClustsValues.append(X2_SAMPLE[clustsStart[ic]:clustsEnd[ic]])
173 clustStats = su.calcStats(X2ClustsValues[ic])
174 for stat
in su.aggregatableFileStats:
175 X2ClustsStats[stat].append(clustStats[stat])
177 X2ClustsStatsDF = pd.DataFrame.from_dict(X2ClustsStats)
178 X2ClustsStatsDF.sort_index()
185 print(
"\n\nX2 aggregated stats:")
186 aggClustStats = su.aggStatsDF(X2ClustsStatsDF)
190 print(
"\n\nPerforming bootstrap for sigmaN = ",SIGMAN)
193 statsCIClustSamples = su.bootStrapClusterFunc( \
194 X2ClustsStatsDF, X1ClustsStatsDF, \
195 n_samples = nBootSamples, \
205 fig = pu.setup_fig(nx, ny, inch_width=2.2)
207 for ifunc, vecFunc
in enumerate(vecFuncs):
208 bootFunc = bootFuncs[ifunc]
209 if vecFunc
is su.identityFunc:
210 sampleAggStat =
"Mean"
211 elif vecFunc
is np.square:
213 elif vecFunc
is su.rmsFunc:
214 sampleAggStat =
"RMS"
216 print(
"ERROR: vecFunc has no equivalent in aggregated cluster bootstrap")
220 ax1 = fig.add_subplot(ny, nx, nx*ifunc+1)
225 lineVals = deepcopy(statsCIFullSamples[ifunc][
'VALUE'])
226 lineValsMin = deepcopy(statsCIFullSamples[ifunc][
'LO'])
227 lineValsMax = deepcopy(statsCIFullSamples[ifunc][
'HI'])
228 plotVals.append(lineVals)
229 plotVals.append(lineValsMin)
230 plotVals.append(lineValsMax)
233 ax1.plot(xVals, lineVals, \
234 label=
'std bootstrap', \
236 ls=pu.plotLineStyles[1], \
239 ax1.plot(xVals, lineValsMin, \
245 ax1.plot(xVals, lineValsMax, \
252 ax1.plot([xVals[0], xVals[-1]], np.multiply(lineValsMin[-1],[1., 1.]), \
253 ls=
"--", c=fullColor, \
255 linewidth=0.5,markersize=0)
256 ax1.plot([xVals[0], xVals[-1]], np.multiply(lineValsMax[-1],[1., 1.]), \
257 ls=
"--", c=fullColor, \
259 linewidth=0.5,markersize=0)
263 lineVals = deepcopy(statsCIClustSamples[sampleAggStat][
'VALUE'])
264 lineValsMin = deepcopy(statsCIClustSamples[sampleAggStat][
'LO'])
265 lineValsMax = deepcopy(statsCIClustSamples[sampleAggStat][
'HI'])
266 plotVals.append(lineVals)
267 plotVals.append(lineValsMin)
268 plotVals.append(lineValsMax)
270 ax1.plot(xVals, lineVals, \
271 label=
'cluster bootstrap', \
273 ls=pu.plotLineStyles[1], \
276 ax1.plot(xVals, lineValsMin, \
282 ax1.plot(xVals, lineValsMax, \
289 ax1.plot([xVals[0], xVals[-1]], np.multiply(lineValsMin[-1],[1., 1.]), \
290 ls=
"--", c=clustColor, \
292 linewidth=0.5,markersize=0)
293 ax1.plot([xVals[0], xVals[-1]], np.multiply(lineValsMax[-1],[1., 1.]), \
294 ls=
"--", c=clustColor, \
296 linewidth=0.5,markersize=0)
300 ax1.plot([xVals[0], xVals[-1]], np.multiply(sampleStats[ifunc][
'Mean'],[1., 1.]), \
302 linewidth=0.9,markersize=0,label=
'sample pop.')
304 lh = ax1.legend(loc=
'best',fontsize=3,frameon=
True,\
305 framealpha=0.4,ncol=2)
306 lh.get_frame().set_linewidth(0.0)
308 ax1.set_ylabel(sampleAggStat+
"_2 - "+sampleAggStat+
"_1",fontsize=4)
309 ax1.set_xlabel(
'# bootstrap samples',fontsize=4)
310 ax1.set_xscale(
'log')
311 ax1.xaxis.set_tick_params(labelsize=3)
312 ax1.yaxis.set_tick_params(labelsize=3)
314 minyval, maxyval = pu.get_clean_ax_limits(plotVals=plotVals,symmetric=
False)
315 ax1.set_ylim(minyval,maxyval)
319 ax2 = fig.add_subplot(ny, nx, nx*ifunc+2)
320 ax2.hist(clustsN,bins=int(1.2*np.floor(np.sqrt(NOMINAL_NUMBER_OF_GROUPS))))
321 ax2.set_ylabel(
"Count",fontsize=4)
322 ax2.set_xlabel(
'Cluster Size',fontsize=4)
325 filename =
'cluster_bootstrap_test/comparison' \
326 +
'_Nmean='+str(NOMINAL_GROUP_SIZE) \
327 +
'_Nstd='+str(SIGMAN) \
328 +
'_Nt='+str(N_SUPER_SAMPLE)
330 pu.finalize_fig(fig, filename,
'png',
True)
336 if __name__ ==
'__main__':
main()