From 4ab18731ecdc9407ba486c21d7f4c4edb6b6647f Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Fri, 29 Nov 2024 12:59:49 +0530 Subject: [PATCH 1/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index f8438915f..9a90329a4 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -60,6 +60,7 @@ def verify_instance_spark(self): ("SINGLE", ["m"], GPU_T4, None, None), # ("STANDARD", ["m"], GPU_T4, None, None), ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, "NVIDIA"), + ("KERBEROS", ["m"], GPU_T4, None, None), ) def test_install_gpu_default_agent(self, configuration, machine_suffixes, master_accelerator, worker_accelerator, @@ -87,6 +88,7 @@ def test_install_gpu_default_agent(self, configuration, machine_suffixes, @parameterized.parameters( ("SINGLE", ["m"], GPU_T4, None, None), + ("KERBEROS", ["m"], GPU_T4, None, None), ) def test_install_gpu_without_agent(self, configuration, machine_suffixes, master_accelerator, worker_accelerator, @@ -115,6 +117,7 @@ def test_install_gpu_without_agent(self, configuration, machine_suffixes, @parameterized.parameters( ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, None), + ("KERBEROS", ["m"], GPU_T4, GPU_T4, None), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "NVIDIA"), # ("STANDARD", ["m"], GPU_T4, None, "NVIDIA"), ) @@ -147,6 +150,7 @@ def test_install_gpu_with_agent(self, configuration, machine_suffixes, # ("SINGLE", ["m"], GPU_T4, None, "12.0"), ("SINGLE", ["m"], GPU_T4, None, "11.8"), ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, "12.4"), + ("KERBEROS", ["m"], GPU_T4, GPU_T4, "12.4"), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "11.8"), ) def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes, @@ -188,6 +192,7 @@ def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes, ("STANDARD", ["m"], GPU_H100, GPU_A100, "NVIDIA", "11.8"), # ("STANDARD", ["m"], GPU_H100, GPU_A100, "NVIDIA", "12.0"), ("STANDARD", ["m"], GPU_H100, GPU_A100, "NVIDIA", "12.4"), + ("KERBEROS", ["m"], GPU_H100, GPU_A100, "NVIDIA", "12.4"), ) def test_install_gpu_with_mig(self, configuration, machine_suffixes, master_accelerator, worker_accelerator, @@ -232,7 +237,8 @@ def test_install_gpu_with_mig(self, configuration, machine_suffixes, @parameterized.parameters( ("SINGLE", GPU_T4, None, None), - ("STANDARD", GPU_T4, GPU_T4, "NVIDIA") + ("STANDARD", GPU_T4, GPU_T4, "NVIDIA"), + ("KERBEROS", GPU_T4, GPU_T4, "NVIDIA"), ) def test_gpu_allocation(self, configuration, master_accelerator, worker_accelerator, driver_provider): @@ -263,6 +269,7 @@ def test_gpu_allocation(self, configuration, master_accelerator, ("SINGLE", ["m"], GPU_T4, None, "11.8"), # ("STANDARD", ["m"], GPU_T4, None, "12.0"), ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, "12.4"), + ("KERBEROS", ["m"], GPU_T4, GPU_T4, "12.4"), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "11.8"), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "12.0"), ) From f8c0d1afe1a5cc15effe19a804d694323d4ec7ef Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Fri, 29 Nov 2024 14:31:09 +0530 Subject: [PATCH 2/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index 9a90329a4..abe3b7cd7 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -83,7 +83,7 @@ def test_install_gpu_default_agent(self, configuration, machine_suffixes, for machine_suffix in machine_suffixes: machine_name="{}-{}".format(self.getClusterName(),machine_suffix) self.verify_instance(machine_name) - if ( self.getImageOs() != 'rocky' ) or ( configuration != 'SINGLE' ) or ( configuration == 'SINGLE' and self.getImageOs() == 'rocky' and self.getImageVersion() > pkg_resources.parse_version("2.1") ): + if ( self.getImageOs() != 'rocky' ) or ( configuration != 'SINGLE' ) or ( configuration == 'SINGLE' and self.getImageOs() == 'rocky' and self.getImageVersion() > pkg_resources.parse_version("2.1") ) or ( configuration != "KERBEROS" ): self.verify_pyspark(machine_name) @parameterized.parameters( @@ -238,7 +238,6 @@ def test_install_gpu_with_mig(self, configuration, machine_suffixes, @parameterized.parameters( ("SINGLE", GPU_T4, None, None), ("STANDARD", GPU_T4, GPU_T4, "NVIDIA"), - ("KERBEROS", GPU_T4, GPU_T4, "NVIDIA"), ) def test_gpu_allocation(self, configuration, master_accelerator, worker_accelerator, driver_provider): From 3f223236110323ac0cbdf11b19a98ab7936612df Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Fri, 29 Nov 2024 15:51:43 +0530 Subject: [PATCH 3/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index abe3b7cd7..ca89931e8 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -60,7 +60,6 @@ def verify_instance_spark(self): ("SINGLE", ["m"], GPU_T4, None, None), # ("STANDARD", ["m"], GPU_T4, None, None), ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, "NVIDIA"), - ("KERBEROS", ["m"], GPU_T4, None, None), ) def test_install_gpu_default_agent(self, configuration, machine_suffixes, master_accelerator, worker_accelerator, @@ -83,7 +82,7 @@ def test_install_gpu_default_agent(self, configuration, machine_suffixes, for machine_suffix in machine_suffixes: machine_name="{}-{}".format(self.getClusterName(),machine_suffix) self.verify_instance(machine_name) - if ( self.getImageOs() != 'rocky' ) or ( configuration != 'SINGLE' ) or ( configuration == 'SINGLE' and self.getImageOs() == 'rocky' and self.getImageVersion() > pkg_resources.parse_version("2.1") ) or ( configuration != "KERBEROS" ): + if ( self.getImageOs() != 'rocky' ) or ( configuration != 'SINGLE' ) or ( configuration == 'SINGLE' and self.getImageOs() == 'rocky' and self.getImageVersion() > pkg_resources.parse_version("2.1") ): self.verify_pyspark(machine_name) @parameterized.parameters( From ee46ee26e01ab7b3f7b5783d73cee3e51a2c7730 Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Fri, 29 Nov 2024 21:00:14 +0530 Subject: [PATCH 4/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index ca89931e8..bc6c8cb03 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -267,7 +267,6 @@ def test_gpu_allocation(self, configuration, master_accelerator, ("SINGLE", ["m"], GPU_T4, None, "11.8"), # ("STANDARD", ["m"], GPU_T4, None, "12.0"), ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, "12.4"), - ("KERBEROS", ["m"], GPU_T4, GPU_T4, "12.4"), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "11.8"), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "12.0"), ) From 39965f7d5338404a9e81ceee7aa4b90d3d6e4f1c Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Sat, 30 Nov 2024 00:24:46 +0530 Subject: [PATCH 5/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index bc6c8cb03..9f5a8c7d7 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -116,7 +116,6 @@ def test_install_gpu_without_agent(self, configuration, machine_suffixes, @parameterized.parameters( ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, None), - ("KERBEROS", ["m"], GPU_T4, GPU_T4, None), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "NVIDIA"), # ("STANDARD", ["m"], GPU_T4, None, "NVIDIA"), ) From 48e0a33815bec5bd3044210acd15de8e6de1a515 Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Sat, 30 Nov 2024 11:19:47 +0530 Subject: [PATCH 6/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index 9f5a8c7d7..c0b54703e 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -18,7 +18,7 @@ class NvidiaGpuDriverTestCase(DataprocTestCase): def verify_instance(self, name): # Verify that nvidia-smi works - time.sleep(3) # Many failed nvidia-smi attempts have been caused by impatience + time.sleep(6) # Many failed nvidia-smi attempts have been caused by impatience self.assert_instance_command(name, "nvidia-smi", 1) def verify_pyspark(self, name): From b7f26b1559697227eecfb0f6f794f60a8f54b006 Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Sat, 30 Nov 2024 17:01:50 +0530 Subject: [PATCH 7/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index c0b54703e..5cce87132 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -19,7 +19,7 @@ class NvidiaGpuDriverTestCase(DataprocTestCase): def verify_instance(self, name): # Verify that nvidia-smi works time.sleep(6) # Many failed nvidia-smi attempts have been caused by impatience - self.assert_instance_command(name, "nvidia-smi", 1) + self.assert_instance_command(name, "nvidia-smi", 2) def verify_pyspark(self, name): # Verify that pyspark works From 70d118082e13c13e5c8197a1c85ae42a6c2dcb42 Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Sat, 30 Nov 2024 18:41:27 +0530 Subject: [PATCH 8/8] enable kerberos for gpu drivers --- gpu/test_gpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index 5cce87132..346826321 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -148,7 +148,6 @@ def test_install_gpu_with_agent(self, configuration, machine_suffixes, # ("SINGLE", ["m"], GPU_T4, None, "12.0"), ("SINGLE", ["m"], GPU_T4, None, "11.8"), ("STANDARD", ["m", "w-0", "w-1"], GPU_T4, GPU_T4, "12.4"), - ("KERBEROS", ["m"], GPU_T4, GPU_T4, "12.4"), # ("STANDARD", ["w-0", "w-1"], None, GPU_T4, "11.8"), ) def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes,