Skip to content

Commit 2e35f41

Browse files
committed
Add maintenance as avail state for maintenance reservations
1 parent 6ca5b9c commit 2e35f41

File tree

1 file changed

+25
-14
lines changed

1 file changed

+25
-14
lines changed

reframe/core/schedulers/slurm.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def __init__(self):
147147
self._sched_access_in_submit = self.get_option(
148148
'sched_access_in_submit'
149149
)
150+
self.addl_avail_states = set()
150151

151152
def make_job(self, *args, **kwargs):
152153
return _SlurmJob(*args, **kwargs)
@@ -323,7 +324,7 @@ def allnodes(self):
323324
'could not retrieve node information') from e
324325

325326
node_descriptions = completed.stdout.splitlines()
326-
return _create_nodes(node_descriptions)
327+
return _create_nodes(node_descriptions, self.addl_avail_states)
327328

328329
def _get_default_partition(self):
329330
completed = _run_strict('scontrol -a show -o partitions')
@@ -436,15 +437,23 @@ def _get_reservation_nodes(self, reservation):
436437
raise JobSchedulerError("could not extract the node names for "
437438
"reservation '%s'" % reservation)
438439

440+
flags_match = re.search(r'Flags=(\S+)', completed.stdout)
441+
if flags_match:
442+
if 'MAINT' in flags_match[1].split(','):
443+
self.addl_avail_states.add('MAINTENANCE')
444+
# else:
445+
# raise JobSchedulerError(f"could not extract the reservation "
446+
# f"flags for reservation '{reservation}'")
447+
439448
completed = _run_strict('scontrol -a show -o %s' % reservation_nodes)
440449
node_descriptions = completed.stdout.splitlines()
441-
return _create_nodes(node_descriptions)
450+
return _create_nodes(node_descriptions, self.addl_avail_states)
442451

443452
def _get_nodes_by_name(self, nodespec):
444453
completed = osext.run_command('scontrol -a show -o node %s' %
445454
nodespec)
446455
node_descriptions = completed.stdout.splitlines()
447-
return _create_nodes(node_descriptions)
456+
return _create_nodes(node_descriptions, self.addl_avail_states)
448457

449458
def _update_completion_time(self, job, timestamps):
450459
if job._completion_time is not None:
@@ -691,19 +700,19 @@ def poll(self, *jobs):
691700
self._cancel_if_pending_too_long(job)
692701

693702

694-
def _create_nodes(descriptions):
703+
def _create_nodes(descriptions, addl_avail_states=None):
695704
nodes = set()
696705
for descr in descriptions:
697706
with suppress(JobSchedulerError):
698-
nodes.add(_SlurmNode(descr))
707+
nodes.add(_SlurmNode(descr, addl_avail_states=addl_avail_states))
699708

700709
return nodes
701710

702711

703712
class _SlurmNode(sched.Node):
704713
'''Class representing a Slurm node.'''
705714

706-
def __init__(self, node_descr):
715+
def __init__(self, node_descr, addl_avail_states=None):
707716
self._name = self._extract_attribute('NodeName', node_descr)
708717
if not self._name:
709718
raise JobSchedulerError(
@@ -718,6 +727,15 @@ def __init__(self, node_descr):
718727
'State', node_descr, sep='+') or set()
719728
self._descr = node_descr
720729

730+
self.addl_avail_states = addl_avail_states or set()
731+
self.available_states = {
732+
'ALLOCATED',
733+
'COMPLETING',
734+
'IDLE',
735+
'PLANNED',
736+
'RESERVED'
737+
} | self.addl_avail_states
738+
721739
def __eq__(self, other):
722740
if not isinstance(other, type(self)):
723741
return NotImplemented
@@ -735,14 +753,7 @@ def in_statex(self, state):
735753
return self._states == set(state.upper().split('+'))
736754

737755
def is_avail(self):
738-
available_states = {
739-
'ALLOCATED',
740-
'COMPLETING',
741-
'IDLE',
742-
'PLANNED',
743-
'RESERVED'
744-
}
745-
return self._states <= available_states
756+
return self._states <= self.available_states
746757

747758
def is_down(self):
748759
return not self.is_avail()

0 commit comments

Comments
 (0)