MPAS-JEDI
JobScript.py
Go to the documentation of this file.
1 #!/bin/env python3
2 
3 import os
4 from pathlib import Path
5 import subprocess
6 
7 class JobScriptBase():
8  '''
9  Describes an HPC job script including
10  + reading a configuration
11  + generating the script
12  + submitting the script
13 
14  Each HPC job submission system (e.g., PBSPro on Cheyenne)
15  will have its own derived class that defines
16  the job header and submission command
17  generic config elements:
18  required config parameter(s):
19  basescript (required) - either a list of str's containing individual lines of the script
20  or a str giving the location of the script
21 
22  optional config parameter(s):
23  env - linux environment of the script (e.g., csh, bash, sh, tcsh)
24  name - job name
25  nppernode - processors per node
26  nnode - number of nodes
27  walltime - walltime
28  olog - output log name
29  elog - error log name
30  '''
31  def __init__(self, conf):
32  ## job descriptors
33  self.namename = conf.get('name','PyJobScript')
34  self.nppernodenppernode = conf.get('nppernode',1)
35  self.nnodennode = conf.get('nnode',1)
36  self.walltimewalltime = conf.get('walltime','01:00:00')
37  self.ologolog = conf.get('olog','log.job.out')
38  self.elogelog = conf.get('elog','log.job.err')
39 
40  ## submission descriptors
41  self.envenv = conf.get('env','csh')
42  self.basescriptbasescript = conf['script']
43  assert (isinstance(self.basescriptbasescript,list) or isinstance(self.basescriptbasescript,str)), \
44  "JobScriptBase : basescript must either be a list or a string"
45  self.jobpathjobpath = Path(conf.get('path','./'))
46  self.scriptscript = self.namename+'.job.'+self.envenv
47 
48  self.commandcommand = './'
49  self.headerheader = []
50 
51  def create(self):
52  ## initialize env
53  joblines = ['#!/bin/'+self.envenv+'\n']
54 
55  ## concatenate header
56  for line in self.headerheader: joblines.append(line+'\n')
57 
58  ## concatenate script
59  if isinstance(self.basescriptbasescript,list):
60  # assume self.basescript is a list of lines
61  for line in self.basescriptbasescript: joblines.append(line)
62 
63  elif isinstance(self.basescriptbasescript,str):
64  # assume self.basescript is a file
65  bs = open(self.basescriptbasescript,'r')
66  for line in bs: joblines.append(line)
67  bs.close()
68 
69  ## create the job path
70  self.jobpathjobpath.mkdir(parents=True, exist_ok=True)
71 
72  ## write job script
73  script = str(self.jobpathjobpath/self.scriptscript)
74  if os.path.exists(script):
75  os.remove(script)
76  js = open(script,'w')
77  js.writelines(joblines)
78  js.close()
79  os.system('chmod 744 '+script)
80 
81  def submit(self):
82  ## submit job
83  command = self.commandcommand+self.scriptscript
84  CWD = os.getcwd()
85  os.chdir(str(self.jobpathjobpath))
86  print(command+" in "+os.getcwd())
87  os.system(command)
88  os.chdir(CWD)
89 
90 
92  '''
93  PBSPro job script on Cheyenne
94  unique config elements compared to base class:
95  account - cheyenne account for charging
96  queue - name of job submission queue (see qavail)
97  memory - amount of memory requested per node (see mavail)
98 
99  NOTE: Cheyenne has a maximum of 36 processors available per node
100  '''
101  qavail = ['economy', 'regular', 'premium']
102  mavail = [45, 109]
103  maxnppernode = 36
104  def __init__(self, conf):
105  # Initialize derived config settings
106  super().__init__(conf)
107 
108  # Initialize config settings that are specific to PBSProCheyenne
109  self.accountaccount = conf.get('account','NMMM0043')
110  self.queuequeue = conf.get('queue','regular')
111  assert self.queuequeue in self.qavailqavail, ("ERROR: PBSProCheyenne requires queue to be any of ",self.qavailqavail)
112  self.memorymemory = conf.get('memory',109)
113  assert self.memorymemory in self.mavailmavail, ("ERROR: PBSProCheyenne requires memory (in GB) to be any of", self.mavailmavail)
114  assert self.nppernodenppernode <= self.maxnppernodemaxnppernode, ("ERROR: PBSProCheyenne requires nppernode <= ", self.maxnppernodemaxnppernode)
115 
116  self.headerheaderheader = [
117  '#PBS -N '+self.namename,
118  '#PBS -A '+self.accountaccount,
119  '#PBS -q '+self.queuequeue,
120  '#PBS -l select='+str(self.nnodennode)+':ncpus='+str(self.nppernodenppernode)+':mpiprocs='+str(self.nppernodenppernode)+':mem='+str(self.memorymemory)+'GB',
121  '#PBS -l walltime='+self.walltimewalltime,
122  '#PBS -m ae',
123  '#PBS -k eod',
124  '#PBS -o '+self.ologolog,
125  '#PBS -e '+self.elogelog,
126  ]
127 
128  self.commandcommandcommand = 'qsub '
129 
130 
132  '''
133  SLURM job script on Casper
134  unique config elements compared to base class:
135  account - casper account for charging
136  partition - name of casper partition (see pavail)
137  memory - amount of memory requested per node (see maxmemory)
138 
139  NOTE: Casper has a maximum of 36 processors available per node
140  '''
141  pavail = ['dav']
142  maxnppernode = 36
143  maxmemory = 1100
144  def __init__(self, conf):
145  # Initialize derived config settings
146  super().__init__(conf)
147 
148  # Initialize config settings that are specific to SLURMCasper
149  self.accountaccount = conf.get('account','NMMM0015')
150  self.partitionpartition = conf.get('partition','dav')
151  assert self.partitionpartition in self.pavailpavail, ("ERROR: SLURMCasper requires partition to be any of ",self.pavailpavail)
152  self.memorymemory = conf.get('memory',300)
153  assert self.memorymemory <= self.maxmemorymaxmemory, ("ERROR: SLURMCasper requires memory (in GB) to be <= ", self.maxmemorymaxmemory)
154 
155  assert self.nppernodenppernode <= self.maxnppernodemaxnppernode, ("ERROR: SLURMCasper requires nppernode <= ", self.maxnppernodemaxnppernode)
156 
157  self.headerheaderheader = [
158  '#SBATCH --job-name='+self.namename,
159  '#SBATCH --account='+self.accountaccount,
160  '#SBATCH --ntasks='+str(self.nnodennode),
161  '#SBATCH --cpus-per-task='+str(self.nppernodenppernode),
162  '#SBATCH --mem='+str(self.memorymemory)+'G',
163  '#SBATCH --time='+self.walltimewalltime,
164  '#SBATCH --partition='+self.partitionpartition,
165  '#SBATCH --output='+self.ologolog,
166  ]
167 
168  self.commandcommandcommand = 'sbatch '
169 
170 JobScriptDict = {
171  ## cheyenne
172  # login nodes
173  'cheyenne': PBSProCheyenne,
174  # cron jobs
175  'chadmin': PBSProCheyenne,
176  ## casper
177  'casper': SLURMCasper
178 }
179 
180 
182  ## get system name
183  conf['sysname'] = subprocess.run(['uname','-n'],
184  stdout=subprocess.PIPE).stdout.decode('utf-8')
185 
186  ## match system name with JobScriptDict or return base class object by default
187  for key, jobclass in JobScriptDict.items():
188  if key in conf['sysname']:
189  return jobclass(conf)
190  return JobScriptBase(conf)
env
submission descriptors
Definition: JobScript.py:41
def __init__(self, conf)
Definition: JobScript.py:31
name
job descriptors
Definition: JobScript.py:33
def __init__(self, conf)
Definition: JobScript.py:104
def __init__(self, conf)
Definition: JobScript.py:144
def JobScriptFactory(conf)
Definition: JobScript.py:181