Skip to content

Commit 30c96e8

Browse files
committed
Code style fixes
1 parent 97e9fc7 commit 30c96e8

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

reframe/core/schedulers/pbs.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ def _query_exit_code(self, job):
326326
# Torque does not provide a way to retrieve the history of jobs
327327
return None
328328

329+
329330
@register_scheduler('pbspro')
330331
class PbsProJobScheduler(PbsJobScheduler):
331332
def poll(self, *jobs):
@@ -337,41 +338,42 @@ def poll(self, *jobs):
337338
return
338339

339340
# query status of all jobs
340-
job_status = osext.run_command(
341+
completed = osext.run_command(
341342
f"qstat -xf -F json {' '.join(job.jobid for job in jobs)}"
342343
)
343344

344345
# from Table 14-1: Error Codes in
345346
# https://help.altair.com/2024.1.0/PBS%20Professional/PBSReferenceGuide2024.1.pdf,
346-
# we have the codes PBS returns in case of an error with exit(error_code),
347-
# like exit(15001) for unknown Job ID. however, only the last 8 bits
348-
# of the exit code are returned, so what we get as the actual error code
349-
# is exit_code % 256, which is for example 153 for Unknown Job Identifier.
350-
# 153 is returned if any job id in the list is unknown, even if some others
351-
# are known. these unknown jobids will be caught in the loop over jobs
352-
# below so we can pass on for now. previously 35 was checked here,
353-
# but we only get that for a "History job ID" (when qstat -f is used
354-
# on a jobid that has already ended. Since above we use "-x" we should not
355-
# get exit code 35 anymore)
356-
if job_status.returncode in [153, 0]:
347+
# we have the codes PBS returns in case of an error with
348+
# exit(error_code), like exit(15001) for unknown Job ID. However, only
349+
# the last 8 bits of the exit code are returned, so what we get as the
350+
# actual error code is `exit_code % 256`, which is for example 153 for
351+
# "Unknown Job Identifier". 153 is returned if any job id in the list
352+
# is unknown, even if some others are known. These unknown job ids
353+
# will be caught in the loop over jobs below so we can pass on for
354+
# now. previously 35 was checked here, but we only get that for a
355+
# "History job ID" (when qstat -f is used on a jobid that has already
356+
# ended. Since above we use "-x" we should not get exit code 35
357+
# anymore)
358+
if completed.returncode in [153, 0]:
357359
pass
358-
elif job_status.returncode == 255:
360+
elif completed.returncode == 255:
359361

360362
# try again, qstat is having a problem
361363
self.log(f'qstat failed with exit code {completed.returncode} '
362-
f'(standard error follows):\n{completed.stderr}\n retrying')
364+
f'(standard error follows):\n{completed.stderr}\n'
365+
'retrying')
363366
return
364367
else:
365368
raise JobSchedulerError(
366369
f'qstat failed with exit code {completed.returncode} '
367370
f'(standard error follows):\n{completed.stderr}'
368371
)
369372

370-
job_status_json = json.loads(job_status.stdout)
373+
job_status_json = json.loads(completed.stdout)
371374

372375
# loop over each job
373376
for job in jobs:
374-
375377
# check if the job is in the json
376378
if job.jobid in job_status_json["Jobs"]:
377379

@@ -381,8 +383,9 @@ def poll(self, *jobs):
381383
self.log(f"Job {job.jobid} known to scheduler, state: {state}")
382384
job._state = JOB_STATES[state]
383385

384-
# check if exec_host is in the ouput since exec_host is only in
385-
# the output if job has started to run (not if it's just queued)
386+
# check if exec_host is in the ouput since exec_host is only
387+
# in the output if job has started to run (not if it's just
388+
# queued)
386389
if "exec_host" in job_info:
387390
nodespec = job_info["exec_host"]
388391
self._update_nodelist(job, nodespec)
@@ -393,8 +396,9 @@ def poll(self, *jobs):
393396
job._exitcode = int(exit_code)
394397
job._completed = True
395398
elif job.state in ["QUEUED", "HELD", "WAITING"]:
399+
pending_time = time.time() - job.submit_time
396400
if (job.max_pending_time and
397-
(time.time() - job.submit_time) >= job.max_pending_time):
401+
pending_time >= job.max_pending_time):
398402
self.cancel(job)
399403
job._exception = JobError(
400404
"maximum pending time exceeded", job.jobid
@@ -404,4 +408,3 @@ def poll(self, *jobs):
404408
job._state = "COMPLETED"
405409
self.log(f"Assuming job {job.jobid} completed")
406410
job._completed = True
407-

0 commit comments

Comments
 (0)