@@ -294,7 +294,14 @@ def sanitize_for_serialization(obj: object) -> object:
294294 return api .sanitize_for_serialization (obj )
295295
296296
297- def role_to_pod (name : str , role : Role , service_account : Optional [str ]) -> "V1Pod" :
297+ def role_to_pod (
298+ name : str ,
299+ role : Role ,
300+ service_account : Optional [str ],
301+ reserved_millicpu : int = RESERVED_MILLICPU ,
302+ reserved_memmb : int = RESERVED_MEMMB ,
303+ efa_device_count : Optional [int ] = None ,
304+ ) -> "V1Pod" :
298305 from kubernetes .client .models import ( # noqa: F811 redefinition of unused
299306 V1Container ,
300307 V1ContainerPort ,
@@ -324,18 +331,29 @@ def role_to_pod(name: str, role: Role, service_account: Optional[str]) -> "V1Pod
324331 if resource .cpu > 0 :
325332 mcpu = int (resource .cpu * 1000 )
326333 limits ["cpu" ] = f"{ mcpu } m"
327- request_mcpu = max (mcpu - RESERVED_MILLICPU , 0 )
334+ request_mcpu = max (mcpu - reserved_millicpu , 0 )
328335 requests ["cpu" ] = f"{ request_mcpu } m"
329336 if resource .memMB > 0 :
330337 limits ["memory" ] = f"{ int (resource .memMB )} M"
331- request_memMB = max (int (resource .memMB ) - RESERVED_MEMMB , 0 )
338+ request_memMB = max (int (resource .memMB ) - reserved_memmb , 0 )
332339 requests ["memory" ] = f"{ request_memMB } M"
333340 if resource .gpu > 0 :
334341 requests ["nvidia.com/gpu" ] = limits ["nvidia.com/gpu" ] = str (resource .gpu )
335342
343+ EFA_DEVICE = "vpc.amazonaws.com/efa"
336344 for device_name , device_limit in resource .devices .items ():
337345 limits [device_name ] = str (device_limit )
338346
347+ # Handle EFA device count override:
348+ # - None (default): use whatever count is in the resource spec (already added above)
349+ # - 0: remove EFA devices entirely
350+ # - N > 0: set EFA device count to N (override or add)
351+ if efa_device_count is not None :
352+ if efa_device_count == 0 :
353+ limits .pop (EFA_DEVICE , None )
354+ else :
355+ limits [EFA_DEVICE ] = str (efa_device_count )
356+
339357 resources = V1ResourceRequirements (
340358 limits = limits ,
341359 requests = requests ,
@@ -475,6 +493,9 @@ def app_to_resource(
475493 queue : str ,
476494 service_account : Optional [str ],
477495 priority_class : Optional [str ] = None ,
496+ reserved_millicpu : int = RESERVED_MILLICPU ,
497+ reserved_memmb : int = RESERVED_MEMMB ,
498+ efa_device_count : Optional [int ] = None ,
478499) -> Dict [str , Any ]:
479500 """
480501 app_to_resource creates a volcano job kubernetes resource definition from
@@ -507,7 +528,14 @@ def app_to_resource(
507528 replica_role .env ["TORCHX_RANK0_HOST" ] = "localhost"
508529 replica_role .env ["TORCHX_IMAGE" ] = replica_role .image
509530
510- pod = role_to_pod (name , replica_role , service_account )
531+ pod = role_to_pod (
532+ name ,
533+ replica_role ,
534+ service_account ,
535+ reserved_millicpu ,
536+ reserved_memmb ,
537+ efa_device_count ,
538+ )
511539 if k8s_metadata := role .metadata .get ("kubernetes" ):
512540 if isinstance (k8s_metadata , str ):
513541 import fsspec
@@ -589,6 +617,9 @@ class KubernetesOpts(TypedDict, total=False):
589617 service_account : Optional [str ]
590618 priority_class : Optional [str ]
591619 validate_spec : Optional [bool ]
620+ reserved_millicpu : Optional [int ]
621+ reserved_memmb : Optional [int ]
622+ efa_device_count : Optional [int ]
592623
593624
594625class KubernetesScheduler (DockerWorkspaceMixin , Scheduler [KubernetesOpts ]):
@@ -783,7 +814,26 @@ def _submit_dryrun(
783814 priority_class , str
784815 ), "priority_class must be a str"
785816
786- resource = app_to_resource (app , queue , service_account , priority_class )
817+ reserved_millicpu = cfg .get ("reserved_millicpu" , RESERVED_MILLICPU )
818+ assert isinstance (reserved_millicpu , int ), "reserved_millicpu must be an int"
819+
820+ reserved_memmb = cfg .get ("reserved_memmb" , RESERVED_MEMMB )
821+ assert isinstance (reserved_memmb , int ), "reserved_memmb must be an int"
822+
823+ efa_device_count = cfg .get ("efa_device_count" )
824+ assert efa_device_count is None or isinstance (
825+ efa_device_count , int
826+ ), "efa_device_count must be an int or None"
827+
828+ resource = app_to_resource (
829+ app ,
830+ queue ,
831+ service_account ,
832+ priority_class ,
833+ reserved_millicpu ,
834+ reserved_memmb ,
835+ efa_device_count ,
836+ )
787837
788838 if cfg .get ("validate_spec" ):
789839 try :
@@ -889,6 +939,25 @@ def _run_opts(self) -> runopts:
889939 help = "Validate job spec using Kubernetes API dry-run before submission" ,
890940 default = True ,
891941 )
942+ opts .add (
943+ "reserved_millicpu" ,
944+ type_ = int ,
945+ help = "Amount of CPU in millicores to reserve for Kubernetes system overhead (default: 100)" ,
946+ default = RESERVED_MILLICPU ,
947+ )
948+ opts .add (
949+ "reserved_memmb" ,
950+ type_ = int ,
951+ help = "Amount of memory in MB to reserve for Kubernetes system overhead (default: 1024)" ,
952+ default = RESERVED_MEMMB ,
953+ )
954+ opts .add (
955+ "efa_device_count" ,
956+ type_ = int ,
957+ help = "EFA device count override: None/unset=use resource spec, "
958+ "0=remove EFA, N>0=set EFA count to N" ,
959+ default = None ,
960+ )
892961 return opts
893962
894963 def describe (self , app_id : str ) -> Optional [DescribeAppResponse ]:
0 commit comments