MPAS-JEDI
analyze_config.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 import datetime as dt
4 import os
5 
6 ########################################################################
7 '''
8 This module is used to configure statistical analyses. Those analyses
9 can be intialized either by directly executing AnalyzeStats.py or
10 by submitting a series of jobs for multiple DiagSpaces using
11 SpawnAnalyzeStats.py and AnalyzeStats.csh.
12 
13 Command-line examples:
14 ----------------------
15 + Carry out analyses for all DiagSpaces that contain "amsua"
16 
17  python AnalyzeStats.py -d amsua
18 
19 + Use 12 processes to carry out analyses for all
20  DiagSpaces with anGroup == "conv"
21 
22  python AnalyzeStats.py -n 12 -g conv
23 
24 + Use 12 processes to carry out analyses for all
25  DiagSpaces that contain "abi", and use 30 processes
26  for reading the StatisticsDatabase
27 
28  python AnalyzeStats.py -n 12 -r 30 -d abi
29 
30 + Get info about more options
31 
32  python AnalyzeStats.py --help
33 
34 Job-submission examples:
35 ------------------------
36 + Spawn one job for each DiagSpace that is enabled in config
37  using the anGroupConfig specified therein
38 
39  python SpawnAnalyzeStats.py
40 
41 + Specify that statistics files come from a JEDI hofx application
42 
43  python SpawnAnalyzeStats.py -app hofx
44 
45 + Spawn one job for each DiagSpace that contains "amsua"
46 
47  python SpawnAnalyzeStats.py -d amsua
48 
49 + Spawn one job for the MPAS model DiagSpace
50 
51  python SpawnAnalyzeStats.py -d mpas
52 
53 + Spawn one job for each DiagSpace in the typical MPAS-Workflow hofx application
54 
55  python SpawnAnalyzeStats.py -app hofx -d mhs,amusa,abi_,ahi_,sonde,airc,sfc,gnssroref,satwind
56 
57 + Choose a unique job account number
58 
59  python SpawnAnalyzeStats.py -a NMMM0043
60 
61 + Get info about more options
62 
63  python SpawnAnalyzeStats.py --help
64 
65 '''
66 ########################################################################
67 #Select the statistics to be analyzed
68 # options: see su.allFileStats
69 # 'analysisStatistics' from individual diagnostics will override this setting
70 analysisStatistics = ['Count','Mean','RMS','STD']
71 
72 #Select diagnostic groupings
73 # + each entry in the dictionary should be of the format:
74 # diagnosticGroup: diagnosticNames
75 # + diagnosticNames is a list of diagnostics, such as
76 # [diagnosticName1, diagnosticName2, etc...]
77 # + diagnosticGroup is up to the user, but it is best to have it match
78 # the strings used for axis labeling in Analyses
79 # + the default behavior is to plot each diagnostic on an independent axis,
80 # which will still be done for any analysis type that does not use
81 # diagnosticGroupings or has maxDiagnosticsPerAnalysis < len(diagnosticNames)
82 diagnosticGroupings = {}
83 diagnosticGroupings['omm'] = ['omb', 'oma']
84 diagnosticGroupings['rltv_omm'] = ['rltv_omb', 'rltv_oma']
85 
86 ## Configure the StatisticsDatabase.StatsDB objects
87 dbConf = {}
88 
89 ## hasFCLenDir whether directory structure includes forecast length
90 ## note: overridden when fcTDeltaLast > fcTDeltaFirst
91 dbConf['hasFCLenDir'] = False
92 
93 ## expDirectory is the top-level directory that contains data
94 # from all experiments. The environment variable, EXP_DIR, can be used
95 # to specify its value externally if desired.
96 user = 'guerrett'
97 dbConf['expDirectory'] = os.getenv('EXP_DIR','/glade/scratch/'+user+'/pandac')
98 
99 ## cntrlExpIndex is the index of the control experiment (used for DiffCI analyses)
100 # in the expNames list
101 dbConf['cntrlExpIndex'] = 0
102 
103 ## expLongNames is a list of directories within expDirectory that contains
104 # data from individual experiments
105 dbConf['expLongNames'] = []
106 
107 ## expNames is a list of experiment names used for database lookups and
108 # analysis labels, e.g., on figures. Make these names concise and
109 # exclude spaces.
110 dbConf['expNames'] = []
111 
112 ## DAMethods is a list of secondary labels that are within the database
113 # intermediate file names. Typical values in MPAS-Workflow applications are
114 # 'hofx' for OMF, 'variational' for OMB/OMA, and '' (empty) for model-space
115 # verification. It is optional for the user to use whatever string they choose
116 # as part of the file name to distinguish the information contained within from
117 # other files, e.g., an experiment characteristic. DAMethods is only important
118 # for file naming and is not used in the analyses
119 dbConf['DAMethods'] = []
120 ## Note: refer to the StatisticsDatabase.StatsDB class for more details
121 
122 ## -------------------------------------------
123 ## Append to expLongNames, expNames, DAMethods
124 ## -------------------------------------------
125 ## EDA w/ baseline config
126 #nEnsDAMembers = 20
127 #for mem in list(range(1,nEnsDAMembers+1)):
128 # member = '{:03d}'.format(mem)
129 # dbConf['expLongNames'].append(
130 # 'guerrett_eda_3denvar_conv_clramsua_NMEM20_120km/Verification/bg/mem'+member)
131 # dbConf['expNames'].append('EDA'+member)
132 # dbConf['DAMethods'].append('omm')
133 #dbConf['cntrlExpIndex'] = nEnsDAMembers
134 
135 # 6-hr forecasts from GEFS
136 #dbConf['expLongNames'].append('guerrett_eda_3denvar_NMEM20_GEFSVerify_120km/Verification/bg/mean')
137 #dbConf['expNames'].append('gefs')
138 #dbConf['DAMethods'].append('hofx')
139 
140 ## 3denvar benchmark (conventional + clear-sky AMSUA)
141 # APRIL 2021
142 #dbConf['expLongNames'].append('guerrett_3denvar_OIE120km_unstructured/Verification/bg')
143 dbConf['expLongNames'].append('guerrett_3denvar_OIE120km_unstructured/Verification/fc/mean')
144 dbConf['expNames'].append('benchmark')
145 dbConf['DAMethods'].append('hofx')
146 
147 dbConf['expLongNames'].append('guerrett_eda_3denvar_NMEM20_LeaveOneOut_OIE120km/Verification/fc/mean')
148 dbConf['expNames'].append('eda20-leave')
149 dbConf['DAMethods'].append('hofx')
150 
151 dbConf['expLongNames'].append('guerrett_eda_3denvar_NMEM40_LeaveOneOut_OIE120km/Verification/fc/mean')
152 dbConf['expNames'].append('eda40-leave')
153 dbConf['DAMethods'].append('hofx')
154 
155 
156 ## -------------------------------
157 ## Cycle times and forecast length
158 ## -------------------------------
159 #First and Last CYCLE dates and increment
160 dbConf['firstCycleDTime'] = dt.datetime(2018,4,15,0,0,0)
161 dbConf['lastCycleDTime'] = dt.datetime(2018,5,14,18,0,0)
162 dbConf['cyTimeInc'] = dt.timedelta(hours=12)
163 
164 #First and Last FORECAST durations and increment
165 dbConf['fcTDeltaFirst'] = dt.timedelta(days=0)
166 dbConf['fcTDeltaLast'] = dt.timedelta(days=0,hours=240)
167 dbConf['fcTimeInc'] = dt.timedelta(hours=12)
168 
169 ## fcDirFormats is used to declare the directory string format
170 # for forecast lengths. Can include any combination of substrings
171 # from the following examples:
172 # "%D" (only number of days)
173 # "%D_%HH:%MM:%SS"
174 # "%MIN:%SEC"
175 # "%s" (total seconds)
176 # "%m", "%MIN" (total full minutes)
177 # "%h" (total full hours)
178 #
179 # By default, all experiments use the same format string. Override
180 # that behavior by redefining the fcDirFormats list below.
181 commonFCDirFormat = "%hhr"
182 dbConf['fcDirFormats'] = [commonFCDirFormat]*len(dbConf['expNames'])
183 
184 ## statsFileSubDirs is the subdirectory within the date directory(ies)
185 # that contains the statstics files for constructing the StatsDB object
186 # examples:
187 #TODO: make StatsDB search for the correct subdirectory for each experiment
188 # 'diagnostic_stats'
189 # 'diagnostic_stats/obs'
190 # 'diagnostic_stats/model'
191 commonStatsFileSubDir = 'diagnostic_stats/obs'
192 #commonStatsFileSubDir = 'diagnostic_stats/model'
193 dbConf['statsFileSubDirs'] = [commonStatsFileSubDir]*len(dbConf['expNames'])
194 
195 ########################################################################
196 ## Configure the analysisTypes to apply to the statistics
197 # - below are recommendations for single/multiple forecast lengths
198 # - analysisTypes can be mixed and matched as desired,
199 # however some of them require nCY, nFC, or nExp > 1
200 # - see the individual classes for more details (Analyses.py)
201 analysisTypes = []
202 if dbConf['fcTDeltaFirst'] == dbConf['fcTDeltaLast']:
203  ## gross error analysisTypes for single forecast length
204  ## -------------------------------------------------------
205  ## recommended
206  analysisTypes.append('CYAxisExpLines')
207 
208  ## potentially useful
209  analysisTypes.append('CYAxisBinValLines')
210  analysisTypes.append('CYandBinValAxes2D')
211 
212 else:
213  ## gross error analysisTypes for multiple forecast lengths
214  ## -------------------------------------------------------
215  ## recommended
216  analysisTypes.append('FCAxisExpLines')
217  if len(dbConf['expNames']) > 1: analysisTypes.append('FCAxisExpLinesDiffCI')
218 
219  ## potentially useful
220  analysisTypes.append('FCandBinValAxes2D')
221  #analysisTypes.append('CYandBinValAxes2D')
222  #analysisTypes.append('CYAxisExpLines')
223  #analysisTypes.append('CYAxisFCLines')
224 
225 ## used to dissect gross errors in more detail
226 analysisTypes.append('BinValAxisProfile')
227 if len(dbConf['expNames']) > 1: analysisTypes.append('BinValAxisProfileDiffCI')
228 ##Note: BinValAxisProfile* analyses work for any forecast duration
229 
230 ## useful for prescribing/evaluating R statistics
231 #analysisTypes.append('BinValAxisPDF')
232 #analysisTypes.append('BinValAxisStatsComposite')
233 #analysisTypes.append('GrossValues')
234