diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1a295e61493..8c61ca8e4ddd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -140,6 +140,7 @@ jobs: smoke/test_vm_deployment_planner smoke/test_vm_strict_host_tags smoke/test_vm_schedule + smoke/test_deploy_vgpu_enabled_vm smoke/test_vm_life_cycle smoke/test_vm_lifecycle_unmanage_import smoke/test_vm_snapshot_kvm diff --git a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java index 68255e30c751..47255762a05a 100644 --- a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java +++ b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java @@ -221,6 +221,15 @@ public class AgentProperties{ */ public static final Property AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SCRIPT = new Property<>("agent.hooks.libvirt_vm_xml_transformer.script", "libvirt-vm-xml-transformer.groovy"); + /** + * This property is used with the agent.hooks.basedir property to define the Libvirt VM XML transformer shell script.
+ * The shell script is used to execute the Libvirt VM XML transformer script.
+ * For more information see the agent.properties file.
+ * Data type: String.
+ * Default value: libvirt-vm-xml-transformer.sh + */ + public static final Property AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_xml_transformer.shell_script", "libvirt-vm-xml-transformer.sh"); + /** * This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_xml_transformer.script properties to define the Libvirt VM XML transformer method.
* Libvirt XML transformer hook does XML-to-XML transformation.
@@ -241,6 +250,15 @@ public class AgentProperties{ */ public static final Property AGENT_HOOKS_LIBVIRT_VM_ON_START_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_start.script", "libvirt-vm-state-change.groovy"); + /** + * This property is used with the agent.hooks.basedir property to define the Libvirt VM on start shell script.
+ * The shell script is used to execute the Libvirt VM on start script.
+ * For more information see the agent.properties file.
+ * Data type: String.
+ * Default value: libvirt-vm-state-change.sh + */ + public static final Property AGENT_HOOKS_LIBVIRT_VM_ON_START_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_start.shell_script", "libvirt-vm-state-change.sh"); + /** * This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_on_start.script properties to define the Libvirt VM on start method.
* The hook is called right after Libvirt successfully launched the VM.
@@ -260,6 +278,15 @@ public class AgentProperties{ */ public static final Property AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_stop.script", "libvirt-vm-state-change.groovy"); + /** + * This property is used with the agent.hooks.basedir property to define the Libvirt VM on stop shell script.
+ * The shell script is used to execute the Libvirt VM on stop script.
+ * For more information see the agent.properties file.
+ * Data type: String.
+ * Default value: libvirt-vm-state-change.sh + */ + public static final Property AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_stop.shell_script", "libvirt-vm-state-change.sh"); + /** * This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_on_stop.script properties to define the Libvirt VM on stop method.
* The hook is called right after libvirt successfully stopped the VM.
diff --git a/api/src/main/java/com/cloud/agent/api/VgpuTypesInfo.java b/api/src/main/java/com/cloud/agent/api/VgpuTypesInfo.java index 85ffc1898209..5515a9c48bcb 100644 --- a/api/src/main/java/com/cloud/agent/api/VgpuTypesInfo.java +++ b/api/src/main/java/com/cloud/agent/api/VgpuTypesInfo.java @@ -15,10 +15,24 @@ // specific language governing permissions and limitations // under the License. package com.cloud.agent.api; + +import org.apache.cloudstack.gpu.GpuDevice; + public class VgpuTypesInfo { + private boolean passthroughEnabled = true; + private GpuDevice.DeviceType deviceType; + private String parentBusAddress; + private String busAddress; + private String numaNode; + private String pciRoot; + private String deviceId; + private String deviceName; + private String vendorId; + private String vendorName; private String modelName; private String groupName; + private String vmName; private Long maxHeads; private Long videoRam; private Long maxResolutionX; @@ -26,6 +40,7 @@ public class VgpuTypesInfo { private Long maxVgpuPerGpu; private Long remainingCapacity; private Long maxCapacity; + private boolean display = false; public String getModelName() { return modelName; @@ -39,22 +54,42 @@ public Long getVideoRam() { return videoRam; } + public void setVideoRam(Long videoRam) { + this.videoRam = videoRam; + } + public Long getMaxHeads() { return maxHeads; } + public void setMaxHeads(Long maxHeads) { + this.maxHeads = maxHeads; + } + public Long getMaxResolutionX() { return maxResolutionX; } + public void setMaxResolutionX(Long maxResolutionX) { + this.maxResolutionX = maxResolutionX; + } + public Long getMaxResolutionY() { return maxResolutionY; } + public void setMaxResolutionY(Long maxResolutionY) { + this.maxResolutionY = maxResolutionY; + } + public Long getMaxVpuPerGpu() { return maxVgpuPerGpu; } + public void setMaxVgpuPerGpu(Long maxVgpuPerGpu) { + this.maxVgpuPerGpu = maxVgpuPerGpu; + } + public Long getRemainingCapacity() { return remainingCapacity; } @@ -71,8 +106,133 @@ public void setMaxVmCapacity(Long maxCapacity) { this.maxCapacity = maxCapacity; } - public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu, - Long remainingCapacity, Long maxCapacity) { + public boolean isPassthroughEnabled() { + return passthroughEnabled; + } + + public void setPassthroughEnabled(boolean passthroughEnabled) { + this.passthroughEnabled = passthroughEnabled; + } + + public GpuDevice.DeviceType getDeviceType() { + return deviceType; + } + + public void setDeviceType(GpuDevice.DeviceType deviceType) { + this.deviceType = deviceType; + } + + public String getParentBusAddress() { + return parentBusAddress; + } + + public void setParentBusAddress(String parentBusAddress) { + this.parentBusAddress = parentBusAddress; + } + + public String getBusAddress() { + return busAddress; + } + + public void setBusAddress(String busAddress) { + this.busAddress = busAddress; + } + + public String getNumaNode() { + return numaNode; + } + + public void setNumaNode(String numaNode) { + this.numaNode = numaNode; + } + + public String getPciRoot() { + return pciRoot; + } + + public void setPciRoot(String pciRoot) { + this.pciRoot = pciRoot; + } + + public String getDeviceId() { + return deviceId; + } + + public void setDeviceId(String deviceId) { + this.deviceId = deviceId; + } + + public String getDeviceName() { + return deviceName; + } + + public void setDeviceName(String deviceName) { + this.deviceName = deviceName; + } + + public String getVendorId() { + return vendorId; + } + + public void setVendorId(String vendorId) { + this.vendorId = vendorId; + } + + public String getVendorName() { + return vendorName; + } + + public void setVendorName(String vendorName) { + this.vendorName = vendorName; + } + + public String getVmName() { + return vmName; + } + + public void setVmName(String vmName) { + this.vmName = vmName; + } + + public boolean isDisplay() { + return display; + } + + public void setDisplay(boolean display) { + this.display = display; + } + + public VgpuTypesInfo(GpuDevice.DeviceType deviceType, String groupName, String modelName, String busAddress, + String vendorId, String vendorName, String deviceId, String deviceName, String numaNode, String pciRoot + ) { + this.deviceType = deviceType; + this.groupName = groupName; + this.modelName = modelName; + this.busAddress = busAddress; + this.deviceId = deviceId; + this.deviceName = deviceName; + this.vendorId = vendorId; + this.vendorName = vendorName; + this.numaNode = numaNode; + this.pciRoot = pciRoot; + } + + public VgpuTypesInfo(GpuDevice.DeviceType deviceType, String groupName, String modelName, String busAddress, + String vendorId, String vendorName, String deviceId, String deviceName + ) { + this.deviceType = deviceType; + this.groupName = groupName; + this.modelName = modelName; + this.busAddress = busAddress; + this.deviceId = deviceId; + this.deviceName = deviceName; + this.vendorId = vendorId; + this.vendorName = vendorName; + } + + public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, + Long maxResolutionY, Long maxVgpuPerGpu, Long remainingCapacity, Long maxCapacity + ) { this.groupName = groupName; this.modelName = modelName; this.videoRam = videoRam; diff --git a/api/src/main/java/com/cloud/agent/api/to/GPUDeviceTO.java b/api/src/main/java/com/cloud/agent/api/to/GPUDeviceTO.java index 4afe080477b7..6e9cee06dd38 100644 --- a/api/src/main/java/com/cloud/agent/api/to/GPUDeviceTO.java +++ b/api/src/main/java/com/cloud/agent/api/to/GPUDeviceTO.java @@ -16,7 +16,9 @@ // under the License. package com.cloud.agent.api.to; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import com.cloud.agent.api.VgpuTypesInfo; @@ -24,9 +26,23 @@ public class GPUDeviceTO { private String gpuGroup; private String vgpuType; + private int gpuCount; private HashMap> groupDetails = new HashMap>(); + private List gpuDevices = new ArrayList<>(); - public GPUDeviceTO( String gpuGroup, String vgpuType, HashMap> groupDetails) { + public GPUDeviceTO(String gpuGroup, String vgpuType, int gpuCount, + HashMap> groupDetails, + List gpuDevices) { + this.gpuGroup = gpuGroup; + this.vgpuType = vgpuType; + this.groupDetails = groupDetails; + this.gpuCount = gpuCount; + this.gpuDevices = gpuDevices; + + } + + public GPUDeviceTO(String gpuGroup, String vgpuType, + HashMap> groupDetails) { this.gpuGroup = gpuGroup; this.vgpuType = vgpuType; this.groupDetails = groupDetails; @@ -48,6 +64,14 @@ public void setVgpuType(String vgpuType) { this.vgpuType = vgpuType; } + public int getGpuCount() { + return gpuCount; + } + + public void setGpuCount(int gpuCount) { + this.gpuCount = gpuCount; + } + public HashMap> getGroupDetails() { return groupDetails; } @@ -56,4 +80,11 @@ public void setGroupDetails(HashMap> grou this.groupDetails = groupDetails; } + public List getGpuDevices() { + return gpuDevices; + } + + public void setGpuDevices(List gpuDevices) { + this.gpuDevices = gpuDevices; + } } diff --git a/api/src/main/java/com/cloud/configuration/Resource.java b/api/src/main/java/com/cloud/configuration/Resource.java index c7bf44de76c6..97be7f9d64c5 100644 --- a/api/src/main/java/com/cloud/configuration/Resource.java +++ b/api/src/main/java/com/cloud/configuration/Resource.java @@ -37,7 +37,8 @@ enum ResourceType { // All storage type resources are allocated_storage and not backup("backup", 12), backup_storage("backup_storage", 13), bucket("bucket", 14), - object_storage("object_storage", 15); + object_storage("object_storage", 15), + gpu("gpu", 16); private String name; private int ordinal; diff --git a/api/src/main/java/com/cloud/event/EventTypes.java b/api/src/main/java/com/cloud/event/EventTypes.java index beed8432df2f..a8777d5c75b8 100644 --- a/api/src/main/java/com/cloud/event/EventTypes.java +++ b/api/src/main/java/com/cloud/event/EventTypes.java @@ -31,6 +31,9 @@ import org.apache.cloudstack.datacenter.DataCenterIpv4GuestSubnet; import org.apache.cloudstack.extension.Extension; import org.apache.cloudstack.extension.ExtensionCustomAction; +import org.apache.cloudstack.gpu.GpuCard; +import org.apache.cloudstack.gpu.GpuDevice; +import org.apache.cloudstack.gpu.VgpuProfile; import org.apache.cloudstack.ha.HAConfig; import org.apache.cloudstack.network.BgpPeer; import org.apache.cloudstack.network.Ipv4GuestSubnetNetworkMap; @@ -378,6 +381,21 @@ public class EventTypes { public static final String EVENT_DISK_OFFERING_EDIT = "DISK.OFFERING.EDIT"; public static final String EVENT_DISK_OFFERING_DELETE = "DISK.OFFERING.DELETE"; + // GPU Cards + public static final String EVENT_GPU_CARD_CREATE = "GPU.CARD.CREATE"; + public static final String EVENT_GPU_CARD_EDIT = "GPU.CARD.EDIT"; + public static final String EVENT_GPU_CARD_DELETE = "GPU.CARD.DELETE"; + + // vGPU Profile + public static final String EVENT_VGPU_PROFILE_CREATE = "VGPU.PROFILE.CREATE"; + public static final String EVENT_VGPU_PROFILE_EDIT = "VGPU.PROFILE.EDIT"; + public static final String EVENT_VGPU_PROFILE_DELETE = "VGPU.PROFILE.DELETE"; + + // GPU Devices + public static final String EVENT_GPU_DEVICE_CREATE = "GPU.DEVICE.CREATE"; + public static final String EVENT_GPU_DEVICE_EDIT = "GPU.DEVICE.EDIT"; + public static final String EVENT_GPU_DEVICE_DELETE = "GPU.DEVICE.DELETE"; + // Network offerings public static final String EVENT_NETWORK_OFFERING_CREATE = "NETWORK.OFFERING.CREATE"; public static final String EVENT_NETWORK_OFFERING_ASSIGN = "NETWORK.OFFERING.ASSIGN"; @@ -1026,6 +1044,21 @@ public class EventTypes { entityEventDetails.put(EVENT_DISK_OFFERING_EDIT, DiskOffering.class); entityEventDetails.put(EVENT_DISK_OFFERING_DELETE, DiskOffering.class); + // GPU Cards + entityEventDetails.put(EVENT_GPU_CARD_CREATE, GpuCard.class); + entityEventDetails.put(EVENT_GPU_CARD_EDIT, GpuCard.class); + entityEventDetails.put(EVENT_GPU_CARD_DELETE, GpuCard.class); + + // vGPU Profiles + entityEventDetails.put(EVENT_VGPU_PROFILE_CREATE, VgpuProfile.class); + entityEventDetails.put(EVENT_VGPU_PROFILE_EDIT, VgpuProfile.class); + entityEventDetails.put(EVENT_VGPU_PROFILE_DELETE, VgpuProfile.class); + + // GPU Devices + entityEventDetails.put(EVENT_GPU_DEVICE_CREATE, GpuDevice.class); + entityEventDetails.put(EVENT_GPU_DEVICE_EDIT, GpuDevice.class); + entityEventDetails.put(EVENT_GPU_DEVICE_DELETE, GpuDevice.class); + // Network offerings entityEventDetails.put(EVENT_NETWORK_OFFERING_CREATE, NetworkOffering.class); entityEventDetails.put(EVENT_NETWORK_OFFERING_ASSIGN, NetworkOffering.class); diff --git a/api/src/main/java/com/cloud/offering/ServiceOffering.java b/api/src/main/java/com/cloud/offering/ServiceOffering.java index acb7a9f1cf91..532123e4373a 100644 --- a/api/src/main/java/com/cloud/offering/ServiceOffering.java +++ b/api/src/main/java/com/cloud/offering/ServiceOffering.java @@ -142,4 +142,8 @@ enum StorageType { Boolean getDiskOfferingStrictness(); void setDiskOfferingStrictness(boolean diskOfferingStrictness); + + Long getVgpuProfileId(); + + Integer getGpuCount(); } diff --git a/api/src/main/java/com/cloud/user/ResourceLimitService.java b/api/src/main/java/com/cloud/user/ResourceLimitService.java index 2f4ad1347be5..49b20fe2fefc 100644 --- a/api/src/main/java/com/cloud/user/ResourceLimitService.java +++ b/api/src/main/java/com/cloud/user/ResourceLimitService.java @@ -50,8 +50,14 @@ public interface ResourceLimitService { "The default maximum number of projects that can be created for an account",false); static final ConfigKey DefaultMaxDomainProjects = new ConfigKey<>("Domain Defaults",Long.class,"max.domain.projects","50", "The default maximum number of projects that can be created for a domain",false); - - static final List HostTagsSupportingTypes = List.of(ResourceType.user_vm, ResourceType.cpu, ResourceType.memory); + static final ConfigKey DefaultMaxAccountGpus = new ConfigKey<>("Account Defaults",Long.class,"max.account.gpus","20", + "The default maximum number of GPU devices that can be used for an account", false); + static final ConfigKey DefaultMaxDomainGpus = new ConfigKey<>("Domain Defaults",Long.class,"max.domain.gpus","20", + "The default maximum number of GPU devices that can be used for a domain", false); + static final ConfigKey DefaultMaxProjectGpus = new ConfigKey<>("Project Defaults",Long.class,"max.project.gpus","20", + "The default maximum number of GPU devices that can be used for a project", false); + + static final List HostTagsSupportingTypes = List.of(ResourceType.user_vm, ResourceType.cpu, ResourceType.memory, ResourceType.gpu); static final List StorageTagsSupportingTypes = List.of(ResourceType.volume, ResourceType.primary_storage); /** @@ -284,4 +290,8 @@ void checkVmResourceLimitsForTemplateChange(Account owner, Boolean display, Serv void incrementVmMemoryResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long memory); void decrementVmMemoryResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long memory); + void checkVmGpuResourceLimit(Account owner, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) throws ResourceAllocationException; + void incrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu); + void decrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu); + } diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index 00382b76f321..4fef598d3117 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -69,6 +69,7 @@ public class ApiConstants { public static final String BOOTABLE = "bootable"; public static final String BIND_DN = "binddn"; public static final String BIND_PASSWORD = "bindpass"; + public static final String BUS_ADDRESS = "busaddress"; public static final String BYTES_READ_RATE = "bytesreadrate"; public static final String BYTES_READ_RATE_MAX = "bytesreadratemax"; public static final String BYTES_READ_RATE_MAX_LENGTH = "bytesreadratemaxlength"; @@ -162,6 +163,7 @@ public class ApiConstants { public static final String DESTINATION_ZONE_ID = "destzoneid"; public static final String DETAILS = "details"; public static final String DEVICE_ID = "deviceid"; + public static final String DEVICE_NAME = "devicename"; public static final String DIRECT_DOWNLOAD = "directdownload"; public static final String DISK = "disk"; public static final String DISK_OFFERING_ID = "diskofferingid"; @@ -388,6 +390,7 @@ public class ApiConstants { public static final String NEW_START_IP = "newstartip"; public static final String NEW_END_IP = "newendip"; public static final String KUBERNETES_NODE_VERSION = "kubernetesnodeversion"; + public static final String NUMA_NODE = "numanode"; public static final String NUM_RETRIES = "numretries"; public static final String OFFER_HA = "offerha"; public static final String OS_DISTRIBUTION = "osdistribution"; @@ -404,6 +407,13 @@ public class ApiConstants { public static final String OS_TYPE_ID = "ostypeid"; public static final String OS_DISPLAY_NAME = "osdisplayname"; public static final String OS_NAME_FOR_HYPERVISOR = "osnameforhypervisor"; + public static final String GPU_CARD_ID = "gpucardid"; + public static final String GPU_CARD_NAME = "gpucardname"; + public static final String GPU_COUNT = "gpucount"; + public static final String GPU_DISPLAY = "gpudisplay"; + public static final String GPU_DEVICE_TYPE = "gpudevicetype"; + public static final String GPU_ENABLED = "gpuenabled"; + public static final String MAX_VGPU_PER_PHYSICAL_GPU = "maxvgpuperphysicalgpu"; public static final String GUEST_OS_LIST = "guestoslist"; public static final String GUEST_OS_COUNT = "guestoscount"; public static final String OS_MAPPING_CHECK_ENABLED = "osmappingcheckenabled"; @@ -415,9 +425,11 @@ public class ApiConstants { public static final String PARENT = "parent"; public static final String PARENT_ID = "parentid"; public static final String PARENT_DOMAIN_ID = "parentdomainid"; + public static final String PARENT_GPU_DEVICE_ID = "parentgpudeviceid"; public static final String PARENT_SUBNET = "parentsubnet"; public static final String PARENT_TEMPLATE_ID = "parenttemplateid"; public static final String PASSWORD = "password"; + public static final String PCI_ROOT = "pciroot"; public static final String CURRENT_PASSWORD = "currentpassword"; public static final String SHOULD_UPDATE_PASSWORD = "update_passwd_on_host"; public static final String PASSWORD_ENABLED = "passwordenabled"; @@ -588,6 +600,10 @@ public class ApiConstants { public static final String VALIDATION_FORMAT = "validationformat"; public static final String VALUE = "value"; public static final String VALUE_OPTIONS = "valueoptions"; + public static final String VENDOR_ID = "vendorid"; + public static final String VENDOR_NAME = "vendorname"; + public static final String VGPU_PROFILE_ID = "vgpuprofileid"; + public static final String VGPU_PROFILE_NAME = "vgpuprofilename"; public static final String VIRTUAL_MACHINE = "virtualmachine"; public static final String VIRTUAL_MACHINE_ID = "virtualmachineid"; public static final String VIRTUAL_MACHINE_IDS = "virtualmachineids"; diff --git a/api/src/main/java/org/apache/cloudstack/api/BaseCmd.java b/api/src/main/java/org/apache/cloudstack/api/BaseCmd.java index 317d72eb9713..8f47d51b19d4 100644 --- a/api/src/main/java/org/apache/cloudstack/api/BaseCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/BaseCmd.java @@ -39,6 +39,7 @@ import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.gpu.GpuService; import org.apache.cloudstack.network.RoutedIpv4Manager; import org.apache.cloudstack.network.lb.ApplicationLoadBalancerService; import org.apache.cloudstack.network.lb.InternalLoadBalancerVMService; @@ -131,6 +132,8 @@ public static enum CommandType { @Inject public UserVmService _userVmService; @Inject + public GpuService gpuService; + @Inject public ManagementService _mgr; @Inject public StorageService _storageService; diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmd.java new file mode 100644 index 000000000000..2faad89bf67e --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmd.java @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.user.Account; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.gpu.GpuCard; + + +@APICommand(name = "createGpuCard", description = "Creates a GPU card definition in the system", + responseObject = GpuCardResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0") +public class CreateGpuCardCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.DEVICE_ID, type = CommandType.STRING, required = true, + description = "the device ID of the GPU card") + private String deviceId; + + @Parameter(name = ApiConstants.DEVICE_NAME, type = CommandType.STRING, required = true, + description = "the device name of the GPU card") + private String deviceName; + + @Parameter(name = ApiConstants.NAME, type = CommandType.STRING, required = true, + description = "the display name of the GPU card") + private String name; + + @Parameter(name = ApiConstants.VENDOR_NAME, type = CommandType.STRING, required = true, + description = "the vendor name of the GPU card") + private String vendorName; + + @Parameter(name = ApiConstants.VENDOR_ID, type = CommandType.STRING, required = true, + description = "the vendor ID of the GPU card") + private String vendorId; + + // Optional parameters for the passthrough vGPU profile display properties + @Parameter(name = ApiConstants.VIDEORAM, type = CommandType.LONG, + description = "the video RAM size in MB for the passthrough vGPU profile") + private Long videoRam; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public String getDeviceId() { + return deviceId; + } + + public String getDeviceName() { + return deviceName; + } + + public String getName() { + return name; + } + + public String getVendorName() { + return vendorName; + } + + public String getVendorId() { + return vendorId; + } + + public Long getVideoRam() { + return videoRam; + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, + ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + try { + GpuCard gpuCard = gpuService.createGpuCard(this); + if (gpuCard != null) { + GpuCardResponse response = new GpuCardResponse(gpuCard); + response.setResponseName(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to create GPU card"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to create GPU card: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmd.java new file mode 100644 index 000000000000..e6386082a448 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmd.java @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; +import org.apache.cloudstack.gpu.GpuDevice; +import org.apache.commons.lang3.EnumUtils; +import org.apache.commons.lang3.StringUtils; + + +@APICommand(name = "createGpuDevice", description = "Creates a GPU device manually on a host", + responseObject = GpuDeviceResponse.class, since = "4.21.0", requestHasSensitiveInfo = false, + responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) +public class CreateGpuDeviceCmd extends BaseCmd { + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, required = true, + description = "ID of the host where the GPU device is located") + private Long hostId; + + @Parameter(name = ApiConstants.BUS_ADDRESS, type = CommandType.STRING, required = true, + description = "PCI bus address of the GPU device (e.g., 0000:01:00.0) or UUID for MDEV devices.") + private String busAddress; + + @Parameter(name = ApiConstants.GPU_CARD_ID, type = CommandType.UUID, entityType = GpuCardResponse.class, + required = true, description = "ID of the GPU card type") + private Long gpuCardId; + + @Parameter(name = ApiConstants.VGPU_PROFILE_ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class, + required = true, description = "ID of the vGPU profile") + private Long vgpuProfileId; + + @Parameter(name = ApiConstants.TYPE, type = CommandType.STRING, + description = "Type of GPU device (PCI, MDEV, VGPUOnly). Defaults to PCI.") + private String type; + + @Parameter(name = ApiConstants.PARENT_GPU_DEVICE_ID, type = CommandType.UUID, entityType = GpuDeviceResponse.class, + description = "ID of the parent GPU device (for virtual GPU devices)") + private Long parentGpuDeviceId; + + @Parameter(name = ApiConstants.NUMA_NODE, type = CommandType.STRING, + description = "NUMA node of the GPU device (e.g., 0, 1, etc.). This is optional and can be used to " + + "specify the NUMA node for the GPU device which is used during allocation. Defaults to -1") + private String numaNode; + + public Long getHostId() { + return hostId; + } + + public String getBusAddress() { + return busAddress; + } + + public Long getGpuCardId() { + return gpuCardId; + } + + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public GpuDevice.DeviceType getType() { + GpuDevice.DeviceType deviceType = GpuDevice.DeviceType.PCI; + if (StringUtils.isNotBlank(type)) { + deviceType = EnumUtils.getEnumIgnoreCase(GpuDevice.DeviceType.class, type); + if (deviceType == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Invalid GPU device type: " + type); + } + } + return deviceType; + } + + public Long getParentGpuDeviceId() { + return parentGpuDeviceId; + } + + public String getNumaNode() { + if (StringUtils.isBlank(numaNode)) { + return "-1"; // Default value for NUMA node + } + return numaNode; + } + + @Override + public void execute() { + try { + GpuDeviceResponse response = gpuService.createGpuDevice(this); + response.setResponseName(getCommandName()); + setResponseObject(response); + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, e.getMessage()); + } + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateVgpuProfileCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateVgpuProfileCmd.java new file mode 100644 index 000000000000..3210773b6f45 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateVgpuProfileCmd.java @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; + + +@APICommand(name = "createVgpuProfile", description = "Creates a vGPU profile in the system", + responseObject = VgpuProfileResponse.class, requestHasSensitiveInfo = false, + responseHasSensitiveInfo = false, since = "4.21.0", authorized = {RoleType.Admin}) +public class CreateVgpuProfileCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.NAME, type = CommandType.STRING, required = true, + description = "the name of the vGPU profile") + private String name; + + @Parameter(name = ApiConstants.DESCRIPTION, type = CommandType.STRING, + description = "the description of the vGPU profile") + private String description; + + @Parameter(name = ApiConstants.GPU_CARD_ID, type = CommandType.UUID, entityType = GpuCardResponse.class, + required = true, description = "the GPU card ID associated with this GPU device") + private Long cardId; + + @Parameter(name = ApiConstants.MAX_VGPU_PER_PHYSICAL_GPU, type = CommandType.LONG, + description = "Max vGPU per physical GPU. This is used to calculate capacity.") + private Long maxVgpuPerPgpu; + + @Parameter(name = ApiConstants.VIDEORAM, type = CommandType.LONG, + description = "the video RAM size in MB") + private Long videoRam; + + @Parameter(name = ApiConstants.MAXHEADS, type = CommandType.LONG, + description = "the maximum number of display heads") + private Long maxHeads; + + @Parameter(name = ApiConstants.MAXRESOLUTIONX, type = CommandType.LONG, + description = "the maximum X resolution") + private Long maxResolutionX; + + @Parameter(name = ApiConstants.MAXRESOLUTIONY, type = CommandType.LONG, + description = "the maximum Y resolution") + private Long maxResolutionY; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public String getName() { + return name; + } + + public String getDescription() { + return description; + } + + public Long getCardId() { + return cardId; + } + + public Long getMaxVgpuPerPgpu() { + return maxVgpuPerPgpu; + } + + public Long getVideoRam() { + return videoRam; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + @Override + public void execute() { + try { + VgpuProfileResponse response = gpuService.createVgpuProfile(this); + if (response != null) { + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to create vGPU profile"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, + "Failed to create vGPU profile: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuCardCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuCardCmd.java new file mode 100644 index 000000000000..9a510ecdead9 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuCardCmd.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.SuccessResponse; + + +@APICommand(name = "deleteGpuCard", description = "Deletes a GPU card definition from the system", + responseObject = SuccessResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0", authorized = {RoleType.Admin}) +public class DeleteGpuCardCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = GpuCardResponse.class, required = true, + description = "the ID of the GPU card") + private Long id; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + @Override + public void execute() { + try { + boolean success = gpuService.deleteGpuCard(this); + if (success) { + SuccessResponse response = new SuccessResponse(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to delete GPU card"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to delete GPU card: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuDeviceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuDeviceCmd.java new file mode 100644 index 000000000000..9224afc66ecf --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuDeviceCmd.java @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.SuccessResponse; + +import java.util.List; + +@APICommand(name = "deleteGpuDevice", description = "Deletes a vGPU profile from the system", + responseObject = SuccessResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0", authorized = {RoleType.Admin}) +public class DeleteGpuDeviceCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.IDS, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = GpuDeviceResponse.class, required = true, + description = "comma separated list of IDs of the GPU device") + private List ids; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public List getIds() { + return ids; + } + + @Override + public void execute() { + try { + boolean success = gpuService.deleteGpuDevices(this); + if (success) { + SuccessResponse response = new SuccessResponse(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to delete vGPU profile"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, + "Failed to delete vGPU profile: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteVgpuProfileCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteVgpuProfileCmd.java new file mode 100644 index 000000000000..a09a04199f57 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DeleteVgpuProfileCmd.java @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; + + +@APICommand(name = "deleteVgpuProfile", description = "Deletes a vGPU profile from the system", + responseObject = SuccessResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0", authorized = {RoleType.Admin}) +public class DeleteVgpuProfileCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class, required = true, + description = "the ID of the vGPU profile") + private Long id; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + @Override + public void execute() { + try { + boolean success = gpuService.deleteVgpuProfile(this); + if (success) { + SuccessResponse response = new SuccessResponse(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to delete vGPU profile"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, + "Failed to delete vGPU profile: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DiscoverGpuDevicesCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DiscoverGpuDevicesCmd.java new file mode 100644 index 000000000000..2ac07a9fb3a0 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/DiscoverGpuDevicesCmd.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseListCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.context.CallContext; + + +@APICommand(name = "discoverGpuDevices", description = "Discovers available GPU devices on a host", + responseObject = GpuDeviceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0", authorized = {RoleType.Admin}) +public class DiscoverGpuDevicesCmd extends BaseListCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = HostResponse.class, required = true, + description = "ID of the host") + private Long id; + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation ////////////////// + /// ////////////////////////////////////////////////// + + @Override + public void execute() { + CallContext.current().setEventDetails("Discovering GPU Devices on host id: " + getId()); + ListResponse response = gpuService.discoverGpuDevices(this); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/ListGpuDevicesCmdByAdmin.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/ListGpuDevicesCmdByAdmin.java new file mode 100644 index 000000000000..b3c57713fcd1 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/ListGpuDevicesCmdByAdmin.java @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ResponseObject; +import org.apache.cloudstack.api.command.admin.AdminCmd; +import org.apache.cloudstack.api.command.user.gpu.ListGpuDevicesCmd; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; + + +@APICommand(name = "listGpuDevices", description = "Lists all available GPU devices", + responseView = ResponseObject.ResponseView.Full, + responseObject = GpuDeviceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0") +public class ListGpuDevicesCmdByAdmin extends ListGpuDevicesCmd implements AdminCmd { + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = GpuDeviceResponse.class, + description = "ID of the GPU device") + private Long id; + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "the host ID where the GPU device is attached") + private Long hostId; + + @Parameter(name = ApiConstants.GPU_CARD_ID, type = CommandType.UUID, entityType = GpuCardResponse.class, + description = "the GPU card ID associated with the GPU device") + private Long gpuCardId; + + @Parameter(name = ApiConstants.VGPU_PROFILE_ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class, + description = "the vGPU profile ID assigned to the GPU device") + private Long vgpuProfileId; + + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + public Long getHostId() { + return hostId; + } + + public Long getGpuCardId() { + return gpuCardId; + } + + public Long getVgpuProfileId() { + return vgpuProfileId; + } + +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/ManageGpuDeviceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/ManageGpuDeviceCmd.java new file mode 100644 index 000000000000..5dfe6c3deee0 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/ManageGpuDeviceCmd.java @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.SuccessResponse; + +import java.util.List; + +@APICommand(name = "manageGpuDevice", description = "Manages a GPU device", responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, since = "4.21.0", + authorized = {RoleType.Admin}) +public class ManageGpuDeviceCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.IDS, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = GpuDeviceResponse.class, required = true, + description = "comma separated list of IDs of the GPU device") + private List ids; + + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public List getIds() { + return ids; + } + + @Override + public void execute() { + try { + if (gpuService.enableGpuDevice(this)) { + SuccessResponse response = new SuccessResponse(); + response.setResponseName(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to enable GPU device"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to enable GPU device: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UnmanageGpuDeviceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UnmanageGpuDeviceCmd.java new file mode 100644 index 000000000000..46de23ec44be --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UnmanageGpuDeviceCmd.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.SuccessResponse; + +import java.util.List; + +@APICommand(name = "unmanageGpuDevice", description = "Unmanage a GPU device", responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, since = "4.21.0", + authorized = {RoleType.Admin}) +public class UnmanageGpuDeviceCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.IDS, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = GpuDeviceResponse.class, required = true, + description = "comma separated list of IDs of the GPU device") + private List ids; + + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public List getIds() { + return ids; + } + + @Override + public void execute() { + try { + if (gpuService.disableGpuDevice(this)) { + SuccessResponse response = new SuccessResponse(); + response.setResponseName(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to disable GPU device"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, + "Failed to disable GPU device: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuCardCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuCardCmd.java new file mode 100644 index 000000000000..0061149a985f --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuCardCmd.java @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.gpu.GpuCard; + + +@APICommand(name = "updateGpuCard", description = "Updates a GPU card definition in the system", + responseObject = GpuCardResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0", authorized = {RoleType.Admin}) +public class UpdateGpuCardCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = GpuCardResponse.class, required = true, + description = "the ID of the GPU card") + private Long id; + + @Parameter(name = ApiConstants.DEVICE_NAME, type = CommandType.STRING, + description = "the device name of the GPU card") + private String deviceName; + + @Parameter(name = ApiConstants.NAME, type = CommandType.STRING, description = "the display name of the GPU card") + private String name; + + @Parameter(name = ApiConstants.VENDOR_NAME, type = CommandType.STRING, + description = "the vendor name of the GPU card") + private String vendorName; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + public String getDeviceName() { + return deviceName; + } + + public String getName() { + return name; + } + + public String getVendorName() { + return vendorName; + } + + @Override + public void execute() { + try { + GpuCard gpuCard = gpuService.updateGpuCard(this); + if (gpuCard != null) { + GpuCardResponse response = new GpuCardResponse(gpuCard); + response.setResponseName(getCommandName()); + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to update GPU card"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to update GPU card: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuDeviceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuDeviceCmd.java new file mode 100644 index 000000000000..5ad6e6e1a6f6 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuDeviceCmd.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; +import org.apache.cloudstack.gpu.GpuDevice; +import org.apache.commons.lang3.EnumUtils; +import org.apache.commons.lang3.StringUtils; + + +@APICommand(name = "updateGpuDevice", description = "Updates an existing GPU device", + responseObject = GpuDeviceResponse.class, since = "4.21.0", requestHasSensitiveInfo = false, + responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) +public class UpdateGpuDeviceCmd extends BaseCmd { + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = GpuDeviceResponse.class, required = true, + description = "ID of the GPU device to update") + private Long id; + + @Parameter(name = ApiConstants.GPU_CARD_ID, type = CommandType.UUID, entityType = GpuCardResponse.class, + description = "New GPU card ID") + private Long gpuCardId; + + @Parameter(name = ApiConstants.VGPU_PROFILE_ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class, + description = "New vGPU profile ID") + private Long vgpuProfileId; + + @Parameter(name = "type", type = CommandType.STRING, description = "New type of GPU device (PCI, MDEV, VGPUOnly)") + private String type; + + @Parameter(name = "parentgpudeviceid", type = CommandType.UUID, entityType = GpuDeviceResponse.class, + description = "New parent GPU device ID (for virtual GPU devices)") + private Long parentGpuDeviceId; + + @Parameter(name = ApiConstants.NUMA_NODE, type = CommandType.STRING, + description = "New NUMA node of the GPU device") + private String numaNode; + + public Long getId() { + return id; + } + + public Long getGpuCardId() { + return gpuCardId; + } + + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public GpuDevice.DeviceType getType() { + GpuDevice.DeviceType deviceType = null; + if (StringUtils.isNotBlank(type)) { + deviceType = EnumUtils.getEnumIgnoreCase(GpuDevice.DeviceType.class, type); + if (deviceType == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Invalid GPU device type: " + type); + } + } + return deviceType; + } + + public Long getParentGpuDeviceId() { + return parentGpuDeviceId; + } + + public String getNumaNode() { + return numaNode; + } + + @Override + public void execute() { + try { + GpuDeviceResponse response = gpuService.updateGpuDevice(this); + response.setResponseName(getCommandName()); + setResponseObject(response); + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, e.getMessage()); + } + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateVgpuProfileCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateVgpuProfileCmd.java new file mode 100644 index 000000000000..c8d60739bd45 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/UpdateVgpuProfileCmd.java @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.VgpuProfileResponse; + + +@APICommand(name = "updateVgpuProfile", description = "Updates a vGPU profile in the system", + responseObject = VgpuProfileResponse.class, requestHasSensitiveInfo = false, + responseHasSensitiveInfo = false, since = "4.21.0", authorized = {RoleType.Admin}) +public class UpdateVgpuProfileCmd extends BaseCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class, required = true, + description = "the ID of the vGPU profile") + private Long id; + + @Parameter(name = ApiConstants.NAME, type = CommandType.STRING, description = "the name of the vGPU profile") + private String profileName; + + @Parameter(name = ApiConstants.DESCRIPTION, type = CommandType.STRING, + description = "the description of the vGPU profile") + private String description; + + @Parameter(name = ApiConstants.MAX_VGPU_PER_PHYSICAL_GPU, type = CommandType.LONG, + description = "the maximum number of vGPUs per physical GPU") + private Long maxVgpuPerPgpu; + + @Parameter(name = ApiConstants.VIDEORAM, type = CommandType.LONG, + description = "the video RAM size in MB") + private Long videoRam; + + @Parameter(name = ApiConstants.MAXHEADS, type = CommandType.LONG, + description = "the maximum number of display heads") + private Long maxHeads; + + @Parameter(name = ApiConstants.MAXRESOLUTIONX, type = CommandType.LONG, + description = "the maximum X resolution") + private Long maxResolutionX; + + @Parameter(name = ApiConstants.MAXRESOLUTIONY, type = CommandType.LONG, + description = "the maximum Y resolution") + private Long maxResolutionY; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + public String getProfileName() { + return profileName; + } + + public String getDescription() { + return description; + } + + public Long getMaxVgpuPerPgpu() { + return maxVgpuPerPgpu; + } + + public Long getVideoRam() { + return videoRam; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + @Override + public void execute() { + try { + VgpuProfileResponse response = gpuService.updateVgpuProfile(this); + if (response != null) { + setResponseObject(response); + } else { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to update vGPU profile"); + } + } catch (Exception e) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, + "Failed to update vGPU profile: " + e.getMessage()); + } + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmd.java index 019ed58febb4..3d20ed50a5db 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmd.java @@ -32,6 +32,7 @@ import org.apache.cloudstack.api.response.DiskOfferingResponse; import org.apache.cloudstack.api.response.DomainResponse; import org.apache.cloudstack.api.response.ServiceOfferingResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; import org.apache.cloudstack.api.response.VsphereStoragePoliciesResponse; import org.apache.cloudstack.api.response.ZoneResponse; import org.apache.cloudstack.vm.lease.VMLeaseManager; @@ -263,6 +264,25 @@ public class CreateServiceOfferingCmd extends BaseCmd { description = "Lease expiry action, valid values are STOP and DESTROY") private String leaseExpiryAction; + @Parameter(name = ApiConstants.VGPU_PROFILE_ID, + type = CommandType.UUID, + entityType = VgpuProfileResponse.class, + description = "the ID of the vGPU profile to which service offering should be mapped", + since = "4.21") + private Long vgpuProfileId; + + @Parameter(name = ApiConstants.GPU_COUNT, + type = CommandType.INTEGER, + description = "Count of GPUs to be used with this service offering. This is applicable only when passed with vGPU profile.", + since = "4.21") + private Integer gpuCount; + + @Parameter(name = ApiConstants.GPU_DISPLAY, + type = CommandType.BOOLEAN, + description = "Whether to enable GPU display for this service offering. This is applicable only when passed with vGPU profile. Defaults to false.", + since = "4.21") + private Boolean gpuDisplay; + @Parameter(name = ApiConstants.EXTERNAL_DETAILS, type = CommandType.MAP, description = "Details in key/value pairs using format externaldetails[i].keyname=keyvalue. Example: externaldetails[0].endpoint.url=urlvalue", @@ -529,6 +549,18 @@ public boolean isPurgeResources() { return Boolean.TRUE.equals(purgeResources); } + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public Integer getGpuCount() { + return gpuCount; + } + + public Boolean getGpuDisplay() { + return Boolean.TRUE.equals(gpuDisplay); + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListGpuCardsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListGpuCardsCmd.java new file mode 100644 index 000000000000..b035ecbe71b8 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListGpuCardsCmd.java @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.user.gpu; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseListCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.ListResponse; + +@APICommand(name = "listGpuCards", description = "Lists all available GPU cards", + responseObject = GpuCardResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0") +public class ListGpuCardsCmd extends BaseListCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = GpuCardResponse.class, + description = "ID of the GPU card") + private Long id; + + @Parameter(name = ApiConstants.VENDOR_NAME, type = CommandType.STRING, + description = "vendor name of the GPU card") + private String vendorName; + + @Parameter(name = ApiConstants.VENDOR_ID, type = CommandType.STRING, + description = "vendor ID of the GPU card") + private String vendorId; + + @Parameter(name = ApiConstants.DEVICE_ID, type = CommandType.STRING, + description = "device ID of the GPU card") + private String deviceId; + + @Parameter(name = ApiConstants.DEVICE_NAME, type = CommandType.STRING, + description = "device name of the GPU card") + private String deviceName; + + @Parameter(name = ApiConstants.ACTIVE_ONLY, type = CommandType.BOOLEAN, + description = "If true, only GPU cards which have a device will be listed. If false, all GPU cards will be listed.") + private Boolean activeOnly; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + public String getVendorName() { + return vendorName; + } + + public String getVendorId() { + return vendorId; + } + + public String getDeviceId() { + return deviceId; + } + + public String getDeviceName() { + return deviceName; + } + + public boolean getActiveOnly() { + return Boolean.TRUE.equals(activeOnly); + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override public void execute() { + ListResponse response = gpuService.listGpuCards(this); + response.setResponseName(getCommandName()); + setResponseObject(response); + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListGpuDevicesCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListGpuDevicesCmd.java new file mode 100644 index 000000000000..19c920628519 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListGpuDevicesCmd.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.user.gpu; + +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseListCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ResponseObject; +import org.apache.cloudstack.api.command.user.UserCmd; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.UserVmResponse; +import org.apache.cloudstack.context.CallContext; + +@APICommand(name = "listGpuDevices", description = "Lists all available GPU devices", + responseView = ResponseObject.ResponseView.Restricted, + responseObject = GpuDeviceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0", authorized = {RoleType.Admin, RoleType.ResourceAdmin, RoleType.DomainAdmin, RoleType.User}) +public class ListGpuDevicesCmd extends BaseListCmd implements UserCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.VIRTUAL_MACHINE_ID, type = CommandType.UUID, entityType = UserVmResponse.class, + description = "the virtual machine ID to which the GPU device is allocated") + private Long vmId; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getVmId() { + return vmId; + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation ////////////////// + /// ////////////////////////////////////////////////// + + @Override + public void execute() { + CallContext.current().setEventDetails("Listing GPU devices"); + ListResponse response = gpuService.listGpuDevices(this); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListVgpuProfilesCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListVgpuProfilesCmd.java new file mode 100644 index 000000000000..85bf91d7aeea --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/gpu/ListVgpuProfilesCmd.java @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.command.user.gpu; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseListCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; + +@APICommand(name = "listVgpuProfiles", description = "Lists all available vGPU profiles", + responseObject = VgpuProfileResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.21.0") +public class ListVgpuProfilesCmd extends BaseListCmd { + + /// ////////////////////////////////////////////////// + /// ///////////// API parameters ///////////////////// + /// ////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class, + description = "ID of the vGPU profile") + private Long id; + + @Parameter(name = ApiConstants.NAME, type = CommandType.STRING, description = "name of the vGPU profile") + private String name; + + @Parameter(name = ApiConstants.GPU_CARD_ID, type = CommandType.UUID, entityType = GpuCardResponse.class, + description = "the GPU card ID associated with this GPU device") + private Long cardId; + + @Parameter(name = ApiConstants.ACTIVE_ONLY, type = CommandType.BOOLEAN, + description = "If true, only vGPU profiles which have a device will be listed. If false, all vGPU profiles will be listed.") + private Boolean activeOnly; + + /// ////////////////////////////////////////////////// + /// //////////////// Accessors /////////////////////// + /// ////////////////////////////////////////////////// + + public Long getId() { + return id; + } + + public String getName() { + return name; + } + + public Long getCardId() { + return cardId; + } + + public boolean getActiveOnly() { + return Boolean.TRUE.equals(activeOnly); + } + + /// ////////////////////////////////////////////////// + /// //////////// API Implementation/////////////////// + /// ////////////////////////////////////////////////// + + @Override public void execute() { + ListResponse response = gpuService.listVgpuProfiles(this); + response.setResponseName(getCommandName()); + setResponseObject(response); + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/offering/ListServiceOfferingsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/offering/ListServiceOfferingsCmd.java index 1b3f531e370d..3b693fe57b7e 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/user/offering/ListServiceOfferingsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/offering/ListServiceOfferingsCmd.java @@ -26,6 +26,7 @@ import org.apache.cloudstack.api.response.ServiceOfferingResponse; import org.apache.cloudstack.api.response.TemplateResponse; import org.apache.cloudstack.api.response.UserVmResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; import org.apache.cloudstack.api.response.ZoneResponse; import org.apache.commons.lang3.EnumUtils; import org.apache.commons.lang3.StringUtils; @@ -110,6 +111,19 @@ public class ListServiceOfferingsCmd extends BaseListProjectAndAccountResourcesC since = "4.20.0") private Long templateId; + @Parameter(name = ApiConstants.VGPU_PROFILE_ID, + type = CommandType.UUID, + entityType = VgpuProfileResponse.class, + description = "The ID of the vGPU profile that listed offerings must support", + since = "4.21.0") + private Long vgpuProfileId; + + @Parameter(name = ApiConstants.GPU_ENABLED, + type = CommandType.BOOLEAN, + description = "Flag to indicate if the service offering supports GPU. If set to true, only service offerings that support GPU will be returned.", + since = "4.21.0") + private Boolean gpuEnabled; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -171,6 +185,14 @@ public Long getTemplateId() { return templateId; } + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public Boolean getGpuEnabled() { + return gpuEnabled; + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/vm/ListVMsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/vm/ListVMsCmd.java index 8489a0a68a05..a561b6fd24f5 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/user/vm/ListVMsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/vm/ListVMsCmd.java @@ -172,6 +172,12 @@ public class ListVMsCmd extends BaseListRetrieveOnlyResourceCountCmd implements since = "4.21.0") private Boolean onlyLeasedInstances = false; + @Parameter(name = ApiConstants.GPU_ENABLED, + type = CommandType.BOOLEAN, + description = "Flag to indicate if the VMs should be filtered by GPU support. If set to true, only VMs that support GPU will be returned.", + since = "4.21.0") + private Boolean gpuEnabled; + @Parameter(name = ApiConstants.EXTENSION_ID, type = CommandType.UUID, entityType = ExtensionResponse.class, description = "The ID of the Orchestrator extension for the VM", since = "4.21.0") @@ -324,6 +330,10 @@ public boolean getOnlyLeasedInstances() { return BooleanUtils.toBoolean(onlyLeasedInstances); } + public Boolean getGpuEnabled() { + return gpuEnabled; + } + public Long getExtensionId() { return extensionId; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/AccountResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/AccountResponse.java index aaad7f985fc3..3c99e2cbec62 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/AccountResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/AccountResponse.java @@ -231,6 +231,18 @@ public class AccountResponse extends BaseResponse implements ResourceLimitAndCou @Param(description = "the total memory (in MB) available to be created for this account", since = "4.2.0") private String memoryAvailable; + @SerializedName("gpulimit") + @Param(description = "the total number of gpus the account can own", since = "4.21.0") + private String gpuLimit; + + @SerializedName("gputotal") + @Param(description = "the total number of gpus owned by account", since = "4.21.0") + private Long gpuTotal; + + @SerializedName("gpuavailable") + @Param(description = "the total number of gpus available to be created for this account", since = "4.21.0") + private String gpuAvailable; + @SerializedName("primarystoragelimit") @Param(description = "the total primary storage space (in GiB) the account can own", since = "4.2.0") private String primaryStorageLimit; @@ -489,6 +501,21 @@ public void setVmRunning(Integer vmRunning) { this.vmRunning = vmRunning; } + @Override + public void setGpuLimit(String gpuLimit) { + this.gpuLimit = gpuLimit; + } + + @Override + public void setGpuTotal(Long gpuTotal) { + this.gpuTotal = gpuTotal; + } + + @Override + public void setGpuAvailable(String gpuAvailable) { + this.gpuAvailable = gpuAvailable; + } + public void setState(String state) { this.state = state; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/DomainResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/DomainResponse.java index 74fa2cbb1e4c..e2246a1d531f 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/DomainResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/DomainResponse.java @@ -183,6 +183,15 @@ public class DomainResponse extends BaseResponseWithAnnotations implements Resou @SerializedName("memoryavailable") @Param(description="the total memory (in MB) available to be created for this domain", since="4.2.0") private String memoryAvailable; + @SerializedName("gpulimit") @Param(description="the total number of gpus the domain can own", since="4.21.0") + private String gpuLimit; + + @SerializedName("gputotal") @Param(description="the total number of gpus owned by domain", since="4.21.0") + private Long gpuTotal; + + @SerializedName("gpuavailable") @Param(description="the total number of gpus available to be created for this domain", since="4.21.0") + private String gpuAvailable; + @SerializedName("primarystoragelimit") @Param(description="the total primary storage space (in GiB) the domain can own", since="4.2.0") private String primaryStorageLimit; @@ -478,6 +487,21 @@ public void setMemoryAvailable(String memoryAvailable) { this.memoryAvailable = memoryAvailable; } + @Override + public void setGpuLimit(String gpuLimit) { + this.gpuLimit = gpuLimit; + } + + @Override + public void setGpuTotal(Long gpuTotal) { + this.gpuTotal = gpuTotal; + } + + @Override + public void setGpuAvailable(String gpuAvailable) { + this.gpuAvailable = gpuAvailable; + } + @Override public void setPrimaryStorageLimit(String primaryStorageLimit) { this.primaryStorageLimit = primaryStorageLimit; diff --git a/api/src/main/java/org/apache/cloudstack/api/response/GpuCardResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/GpuCardResponse.java new file mode 100644 index 000000000000..ad91b3490efa --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/GpuCardResponse.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.gpu.GpuCard; + +@EntityReference(value = GpuCard.class) +public class GpuCardResponse extends BaseResponse { + @SerializedName(ApiConstants.ID) + @Param(description = "the ID of the GPU card") + protected String id; + + @SerializedName("deviceid") + @Param(description = "the device ID of the GPU card") + protected String deviceId; + + @SerializedName("devicename") + @Param(description = "the device name of the GPU card") + protected String deviceName; + + @SerializedName("name") + @Param(description = "the display name of the GPU card") + protected String name; + + @SerializedName("vendorname") + @Param(description = "the vendor name of the GPU card") + protected String vendorName; + + @SerializedName("vendorid") + @Param(description = "the vendor ID of the GPU card") + protected String vendorId; + + public GpuCardResponse(GpuCard gpuCard) { + super("gpucard"); + id = gpuCard.getUuid(); + deviceId = gpuCard.getDeviceId(); + deviceName = gpuCard.getDeviceName(); + name = gpuCard.getName(); + vendorName = gpuCard.getVendorName(); + vendorId = gpuCard.getVendorId(); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getDeviceId() { + return deviceId; + } + + public void setDeviceId(String deviceId) { + this.deviceId = deviceId; + } + + public String getDeviceName() { + return deviceName; + } + + public void setDeviceName(String deviceName) { + this.deviceName = deviceName; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getVendorName() { + return vendorName; + } + + public void setVendorName(String vendorName) { + this.vendorName = vendorName; + } + + public String getVendorId() { + return vendorId; + } + + public void setVendorId(String vendorId) { + this.vendorId = vendorId; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/GpuDeviceResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/GpuDeviceResponse.java new file mode 100644 index 000000000000..09e98b54eaa5 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/GpuDeviceResponse.java @@ -0,0 +1,227 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.cloud.vm.VirtualMachine; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.gpu.GpuDevice; + +@EntityReference(value = GpuDevice.class) +public class GpuDeviceResponse extends BaseResponse { + + @SerializedName(ApiConstants.ID) + @Param(description = "the ID of the GPU device") + private String id; + + @SerializedName(ApiConstants.BUS_ADDRESS) + @Param(description = "bus address of the GPU device or MDEV UUID for vGPU devices") + private String bussAddress; + + @SerializedName(ApiConstants.GPU_DEVICE_TYPE) + @Param(description = "bus address of the GPU device") + private GpuDevice.DeviceType type; + + @SerializedName(ApiConstants.HOST_ID) + @Param(description = "the host ID where the GPU device is attached") + private String hostId; + + @SerializedName(ApiConstants.HOST_NAME) + @Param(description = "the host name where the GPU device is attached") + private String hostName; + + @SerializedName(ApiConstants.GPU_CARD_ID) + @Param(description = "the GPU card ID associated with this GPU device") + private String gpuCardId; + + @SerializedName(ApiConstants.GPU_CARD_NAME) + @Param(description = "the GPU card name associated with this GPU device") + private String gpuCardName; + + @SerializedName(ApiConstants.VGPU_PROFILE_ID) + @Param(description = "the vGPU profile ID assigned to this GPU device") + private String vgpuProfileId; + + @SerializedName(ApiConstants.VGPU_PROFILE_NAME) + @Param(description = "the vGPU profile name assigned to this GPU device") + private String vgpuProfileName; + + @SerializedName(ApiConstants.VIRTUAL_MACHINE_ID) + @Param(description = "the vGPU profile ID assigned to this GPU device") + private String vmId; + + @SerializedName(ApiConstants.VIRTUAL_MACHINE_NAME) + @Param(description = "the vGPU profile name assigned to this GPU device") + private String vmName; + + @SerializedName(ApiConstants.VIRTUAL_MACHINE_STATE) + @Param(description = "the state of the virtual machine to which this GPU device is allocated") + private VirtualMachine.State vmState; + + @SerializedName(ApiConstants.STATE) + @Param(description = "the vGPU profile name assigned to this GPU device") + private GpuDevice.State state; + + @SerializedName(ApiConstants.MANAGED_STATE) + @Param(description = "the managed state of the GPU device (Enabled/Disabled)") + private GpuDevice.ManagedState managedState; + + @SerializedName(ApiConstants.PARENT_GPU_DEVICE_ID) + @Param(description = "the ID of the parent GPU device, if this is a vGPU") + private String parentGpuDeviceId; + + @SerializedName(ApiConstants.NUMA_NODE) + @Param(description = "the NUMA node where the GPU device is located") + private String numaNode; + + + public GpuDeviceResponse() { + // Empty constructor for serialization + super("gpudevice"); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getBussAddress() { + return bussAddress; + } + + public void setBussAddress(String bussAddress) { + this.bussAddress = bussAddress; + } + + public GpuDevice.DeviceType getType() { + return type; + } + + public void setType(GpuDevice.DeviceType type) { + this.type = type; + } + + public String getHostId() { + return hostId; + } + + public void setHostId(String hostId) { + this.hostId = hostId; + } + + public String getHostName() { + return hostName; + } + + public void setHostName(String hostName) { + this.hostName = hostName; + } + + public String getGpuCardId() { + return gpuCardId; + } + + public void setGpuCardId(String gpuCardId) { + this.gpuCardId = gpuCardId; + } + + public String getGpuCardName() { + return gpuCardName; + } + + public void setGpuCardName(String gpuCardName) { + this.gpuCardName = gpuCardName; + } + + public String getVgpuProfileId() { + return vgpuProfileId; + } + + public void setVgpuProfileId(String vgpuProfileId) { + this.vgpuProfileId = vgpuProfileId; + } + + public String getVgpuProfileName() { + return vgpuProfileName; + } + + public void setVgpuProfileName(String vgpuProfileName) { + this.vgpuProfileName = vgpuProfileName; + } + + public String getVmId() { + return vmId; + } + + public void setVmId(String vmId) { + this.vmId = vmId; + } + + public String getVmName() { + return vmName; + } + + public void setVmName(String vmName) { + this.vmName = vmName; + } + + public VirtualMachine.State getVmState() { + return vmState; + } + + public void setVmState(VirtualMachine.State vmState) { + this.vmState = vmState; + } + + public GpuDevice.State getState() { + return state; + } + + public void setState(GpuDevice.State state) { + this.state = state; + } + + public GpuDevice.ManagedState getManagedState() { + return managedState; + } + + public void setManagedState(GpuDevice.ManagedState managedState) { + this.managedState = managedState; + } + + public String getParentGpuDeviceId() { + return parentGpuDeviceId; + } + + public void setParentGpuDeviceId(String parentGpuDeviceId) { + this.parentGpuDeviceId = parentGpuDeviceId; + } + + public String getNumaNode() { + return numaNode; + } + + public void setNumaNode(String numaNode) { + this.numaNode = numaNode; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java index 692779b0e30a..ca31bd8b1556 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java @@ -165,6 +165,14 @@ public class HostResponse extends BaseResponseWithAnnotations { @Param(description = "the amount of the host's memory currently used") private Long memoryUsed; + @SerializedName("gputotal") + @Param(description = "Total GPUs on the Host", responseObject = Long.class, since = "4.21") + private Long gpuTotal; + + @SerializedName("gpuused") + @Param(description = "Used GPUs on the Host", responseObject = Long.class, since = "4.21") + private Long gpuUsed; + @SerializedName(ApiConstants.GPUGROUP) @Param(description = "GPU cards present in the host", responseObject = GpuResponse.class, since = "4.4") private List gpuGroup; @@ -448,6 +456,14 @@ public void setMemoryUsed(Long memoryUsed) { this.memoryUsed = memoryUsed; } + public void setGpuTotal(Long gpuTotal) { + this.gpuTotal = gpuTotal; + } + + public void setGpuUsed(Long gpuUsed) { + this.gpuUsed = gpuUsed; + } + public void setGpuGroup(List gpuGroup) { this.gpuGroup = gpuGroup; } @@ -920,6 +936,14 @@ public Long getMemoryAllocatedBytes() { return memoryAllocatedBytes; } + public Long getGpuTotal() { + return gpuTotal; + } + + public Long getGpuUsed() { + return gpuUsed; + } + public Boolean getTagARule() { return isTagARule; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ProjectResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ProjectResponse.java index 8bdf042add08..e72f6b860369 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ProjectResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ProjectResponse.java @@ -116,6 +116,18 @@ public class ProjectResponse extends BaseResponse implements ResourceLimitAndCou @Param(description = "the total memory (in MB) available to be created for this project", since = "4.2.0") private String memoryAvailable; + @SerializedName("gpulimit") + @Param(description = "the total number of gpus the project can own", since = "4.21.0") + private String gpuLimit; + + @SerializedName("gputotal") + @Param(description = "the total number of gpus owned by project", since = "4.21.0") + private Long gpuTotal; + + @SerializedName("gpuavailable") + @Param(description = "the total number of gpus available to be created for this project", since = "4.21.0") + private String gpuAvailable; + @SerializedName("primarystoragelimit") @Param(description = "the total primary storage space (in GiB) the project can own", since = "4.2.0") private String primaryStorageLimit; @@ -483,6 +495,21 @@ public void setMemoryAvailable(String memoryAvailable) { this.memoryAvailable = memoryAvailable; } + @Override + public void setGpuLimit(String gpuLimit) { + this.gpuLimit = gpuLimit; + } + + @Override + public void setGpuTotal(Long gpuTotal) { + this.gpuTotal = gpuTotal; + } + + @Override + public void setGpuAvailable(String gpuAvailable) { + this.gpuAvailable = gpuAvailable; + } + @Override public void setPrimaryStorageLimit(String primaryStorageLimit) { this.primaryStorageLimit = primaryStorageLimit; diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ResourceLimitAndCountResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ResourceLimitAndCountResponse.java index b86723b36c41..66de71dd7634 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ResourceLimitAndCountResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ResourceLimitAndCountResponse.java @@ -48,6 +48,12 @@ public interface ResourceLimitAndCountResponse { public void setMemoryAvailable(String memoryAvailable); + public void setGpuLimit(String gpuLimit); + + public void setGpuTotal(Long gpuTotal); + + public void setGpuAvailable(String gpuAvailable); + public void setPrimaryStorageLimit(String primaryStorageLimit); public void setPrimaryStorageTotal(Long primaryStorageTotal); diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ServiceOfferingResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ServiceOfferingResponse.java index ca9358e2e5e8..4565a878b348 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ServiceOfferingResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ServiceOfferingResponse.java @@ -234,6 +234,46 @@ public class ServiceOfferingResponse extends BaseResponseWithAnnotations { @Param(description = "true if virtual machine root disk will be encrypted on storage", since = "4.18") private Boolean encryptRoot; + @SerializedName(ApiConstants.GPU_CARD_ID) + @Param(description = "the ID of the gpu card to which service offering is linked", since = "4.21") + private String gpuCardId; + + @SerializedName(ApiConstants.GPU_CARD_NAME) + @Param(description = "the name of the gpu card to which service offering is linked", since = "4.21") + private String gpuCardName; + + @SerializedName(ApiConstants.VGPU_PROFILE_ID) + @Param(description = "the ID of the vgpu profile to which service offering is linked", since = "4.21") + private String vgpuProfileId; + + @SerializedName(ApiConstants.VGPU_PROFILE_NAME) + @Param(description = "the name of the vgpu profile to which service offering is linked", since = "4.21") + private String vgpuProfileName; + + @SerializedName(ApiConstants.VIDEORAM) + @Param(description = "the video RAM size in MB") + private Long videoRam; + + @SerializedName(ApiConstants.MAXHEADS) + @Param(description = "the maximum number of display heads") + private Long maxHeads; + + @SerializedName(ApiConstants.MAXRESOLUTIONX) + @Param(description = "the maximum X resolution") + private Long maxResolutionX; + + @SerializedName(ApiConstants.MAXRESOLUTIONY) + @Param(description = "the maximum Y resolution") + private Long maxResolutionY; + + @SerializedName(ApiConstants.GPU_COUNT) + @Param(description = "the count of GPUs to attach ", since = "4.21") + private Integer gpuCount; + + @SerializedName(ApiConstants.GPU_DISPLAY) + @Param(description = "whether GPU device is used for display or not ", since = "4.21") + private Boolean gpuDisplay; + @SerializedName(ApiConstants.PURGE_RESOURCES) @Param(description = "Whether to cleanup VM and its associated resource upon expunge", since = "4.20") private Boolean purgeResources; @@ -584,6 +624,86 @@ public String getDiskOfferingDisplayText() { public void setEncryptRoot(Boolean encrypt) { this.encryptRoot = encrypt; } + public String getVgpuProfileName() { + return vgpuProfileName; + } + + public void setVgpuProfileName(String vgpuProfileName) { + this.vgpuProfileName = vgpuProfileName; + } + + public Long getVideoRam() { + return videoRam; + } + + public void setVideoRam(Long videoRam) { + this.videoRam = videoRam; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public void setMaxHeads(Long maxHeads) { + this.maxHeads = maxHeads; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public void setMaxResolutionX(Long maxResolutionX) { + this.maxResolutionX = maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public void setMaxResolutionY(Long maxResolutionY) { + this.maxResolutionY = maxResolutionY; + } + + public String getVgpuProfileId() { + return vgpuProfileId; + } + + public void setVgpuProfileId(String vgpuProfileId) { + this.vgpuProfileId = vgpuProfileId; + } + + public String getGpuCardName() { + return gpuCardName; + } + + public void setGpuCardName(String gpuCardName) { + this.gpuCardName = gpuCardName; + } + + public String getGpuCardId() { + return gpuCardId; + } + + public void setGpuCardId(String gpuCardId) { + this.gpuCardId = gpuCardId; + } + + public Integer getGpuCount() { + return gpuCount; + } + + public void setGpuCount(Integer gpuCount) { + this.gpuCount = gpuCount; + } + + public Boolean getGpuDisplay() { + return gpuDisplay; + } + + public void setGpuDisplay(Boolean gpuDisplay) { + this.gpuDisplay = gpuDisplay; + } + public void setPurgeResources(Boolean purgeResources) { this.purgeResources = purgeResources; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/UserVmResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/UserVmResponse.java index d873bc65709b..ca5bd09a9aa8 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/UserVmResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/UserVmResponse.java @@ -182,6 +182,42 @@ public class UserVmResponse extends BaseResponseWithTagInformation implements Co @Param(description = "the name of the disk offering of the virtual machine. This parameter should not be used for retrieving disk offering details of DATA volumes. Use listVolumes API instead", since = "4.4") private String diskOfferingName; + @SerializedName(ApiConstants.GPU_CARD_ID) + @Param(description = "the ID of the gpu card to which service offering is linked", since = "4.21") + private String gpuCardId; + + @SerializedName(ApiConstants.GPU_CARD_NAME) + @Param(description = "the name of the gpu card to which service offering is linked", since = "4.21") + private String gpuCardName; + + @SerializedName(ApiConstants.VGPU_PROFILE_ID) + @Param(description = "the ID of the vgpu profile to which service offering is linked", since = "4.21") + private String vgpuProfileId; + + @SerializedName(ApiConstants.VGPU_PROFILE_NAME) + @Param(description = "the name of the vgpu profile to which service offering is linked", since = "4.21") + private String vgpuProfileName; + + @SerializedName(ApiConstants.VIDEORAM) + @Param(description = "the video RAM size in MB") + private Long videoRam; + + @SerializedName(ApiConstants.MAXHEADS) + @Param(description = "the maximum number of display heads") + private Long maxHeads; + + @SerializedName(ApiConstants.MAXRESOLUTIONX) + @Param(description = "the maximum X resolution") + private Long maxResolutionX; + + @SerializedName(ApiConstants.MAXRESOLUTIONY) + @Param(description = "the maximum Y resolution") + private Long maxResolutionY; + + @SerializedName(ApiConstants.GPU_COUNT) + @Param(description = "the count of GPUs on the virtual machine", since = "4.21") + private Integer gpuCount; + @SerializedName(ApiConstants.BACKUP_OFFERING_ID) @Param(description = "the ID of the backup offering of the virtual machine", since = "4.14") private String backupOfferingId; @@ -565,6 +601,42 @@ public String getDiskOfferingName() { return diskOfferingName; } + public String getGpuCardId() { + return gpuCardId; + } + + public String getGpuCardName() { + return gpuCardName; + } + + public String getVgpuProfileId() { + return vgpuProfileId; + } + + public String getVgpuProfileName() { + return vgpuProfileName; + } + + public Long getVideoRam() { + return videoRam; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public Integer getGpuCount() { + return gpuCount; + } + public String getBackupOfferingId() { return backupOfferingId; } @@ -847,6 +919,42 @@ public void setDiskOfferingName(String diskOfferingName) { this.diskOfferingName = diskOfferingName; } + public void setGpuCardId(String gpuCardId) { + this.gpuCardId = gpuCardId; + } + + public void setGpuCardName(String gpuCardName) { + this.gpuCardName = gpuCardName; + } + + public void setVgpuProfileId(String vgpuProfileId) { + this.vgpuProfileId = vgpuProfileId; + } + + public void setVgpuProfileName(String vgpuProfileName) { + this.vgpuProfileName = vgpuProfileName; + } + + public void setVideoRam(Long videoRam) { + this.videoRam = videoRam; + } + + public void setMaxHeads(Long maxHeads) { + this.maxHeads = maxHeads; + } + + public void setMaxResolutionX(Long maxResolutionX) { + this.maxResolutionX = maxResolutionX; + } + + public void setMaxResolutionY(Long maxResolutionY) { + this.maxResolutionY = maxResolutionY; + } + + public void setGpuCount(Integer gpuCount) { + this.gpuCount = gpuCount; + } + public void setBackupOfferingId(String backupOfferingId) { this.backupOfferingId = backupOfferingId; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/VgpuProfileResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/VgpuProfileResponse.java new file mode 100644 index 000000000000..382b391ef592 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/VgpuProfileResponse.java @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.gpu.GpuCard; +import org.apache.cloudstack.gpu.VgpuProfile; + +@EntityReference(value = VgpuProfile.class) +public class VgpuProfileResponse extends GpuCardResponse { + + @SerializedName(ApiConstants.DESCRIPTION) + @Param(description = "the description of the vGPU profile") + private String description; + + @SerializedName(ApiConstants.GPU_CARD_ID) + @Param(description = "the ID of the GPU card associated with this vGPU profile") + private String gpuCardId; + + @SerializedName(ApiConstants.GPU_CARD_NAME) + @Param(description = "the name of the vGPU profile") + private String gpuCardName; + + @SerializedName(ApiConstants.MAX_VGPU_PER_PHYSICAL_GPU) + @Param(description = "the maximum number of vGPUs per physical GPU") + private Long maxVgpuPerPgpu; + + @SerializedName(ApiConstants.VIDEORAM) + @Param(description = "the video RAM size in MB") + private Long videoRam; + + @SerializedName(ApiConstants.MAXHEADS) + @Param(description = "the maximum number of display heads") + private Long maxHeads; + + @SerializedName(ApiConstants.MAXRESOLUTIONX) + @Param(description = "the maximum X resolution") + private Long maxResolutionX; + + @SerializedName(ApiConstants.MAXRESOLUTIONY) + @Param(description = "the maximum Y resolution") + private Long maxResolutionY; + + public VgpuProfileResponse(VgpuProfile vgpuProfile, GpuCard gpuCard) { + super(gpuCard); + id = vgpuProfile.getUuid(); + name = vgpuProfile.getName(); + description = vgpuProfile.getDescription(); + gpuCardId = gpuCard.getUuid(); + gpuCardName = gpuCard.getName(); + maxVgpuPerPgpu = vgpuProfile.getMaxVgpuPerPgpu(); + videoRam = vgpuProfile.getVideoRam(); + maxHeads = vgpuProfile.getMaxHeads(); + maxResolutionX = vgpuProfile.getMaxResolutionX(); + maxResolutionY = vgpuProfile.getMaxResolutionY(); + setObjectName("vgpuprofile"); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public String getGpuCardId() { + return gpuCardId; + } + + public String getGpuCardName() { + return gpuCardName; + } + + public Long getMaxVgpuPerPgpu() { + return maxVgpuPerPgpu; + } + + public void setDescription(String description) { + this.description = description; + } + + public Long getVideoRam() { + return videoRam; + } + + public void setVideoRam(Long videoRam) { + this.videoRam = videoRam; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public void setMaxHeads(Long maxHeads) { + this.maxHeads = maxHeads; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public void setMaxResolutionX(Long maxResolutionX) { + this.maxResolutionX = maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public void setMaxResolutionY(Long maxResolutionY) { + this.maxResolutionY = maxResolutionY; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ZoneResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ZoneResponse.java index 975d9edcca1f..09e53dbb1462 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ZoneResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ZoneResponse.java @@ -97,6 +97,14 @@ public class ZoneResponse extends BaseResponseWithAnnotations implements SetReso @Param(description = "true if security groups support is enabled, false otherwise") private Boolean securityGroupsEnabled; + @SerializedName("gputotal") + @Param(description = "Total GPUs in the Zone", responseObject = Long.class, since = "4.21") + private Long gpuTotal; + + @SerializedName("gpuused") + @Param(description = "Used GPUs in the Zone", responseObject = Long.class, since = "4.21") + private Long gpuUsed; + @SerializedName("allocationstate") @Param(description = "the allocation state of the cluster") private String allocationState; @@ -231,6 +239,14 @@ public void setSecurityGroupsEnabled(boolean securityGroupsEnabled) { this.securityGroupsEnabled = securityGroupsEnabled; } + public void setGpuTotal(Long gpuTotal) { + this.gpuTotal = gpuTotal; + } + + public void setGpuUsed(Long gpuUsed) { + this.gpuUsed = gpuUsed; + } + public void setAllocationState(String allocationState) { this.allocationState = allocationState; } @@ -366,6 +382,14 @@ public boolean isSecurityGroupsEnabled() { return securityGroupsEnabled; } + public Long getGpuUsed() { + return gpuUsed; + } + + public Long getGpuTotal() { + return gpuTotal; + } + public boolean isLocalStorageEnabled() { return localStorageEnabled; } diff --git a/api/src/main/java/org/apache/cloudstack/gpu/GpuCard.java b/api/src/main/java/org/apache/cloudstack/gpu/GpuCard.java new file mode 100644 index 000000000000..2c02a0e30c26 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/gpu/GpuCard.java @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.gpu; + +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.api.InternalIdentity; + +import java.util.Date; + +/** + * GPU card interface representing a physical GPU card model + */ +public interface GpuCard extends InternalIdentity, Identity { + /** + * @return the UUID of the GPU card + */ + String getUuid(); + + /** + * @return the device ID of the GPU card + */ + String getDeviceId(); + + /** + * @return the device name of the GPU card + */ + String getDeviceName(); + + /** + * @return the name of the GPU card + */ + String getName(); + + /** + * @return the vendor name of the GPU card + */ + String getVendorName(); + + /** + * @return the vendor ID of the GPU card + */ + String getVendorId(); + + /** + * @return the date when the GPU card was created + */ + Date getCreated(); + + + /** + * @return the group name of the GPU card based on how the XenServer expects it. + */ + String getGroupName(); + +} diff --git a/api/src/main/java/org/apache/cloudstack/gpu/GpuDevice.java b/api/src/main/java/org/apache/cloudstack/gpu/GpuDevice.java new file mode 100644 index 000000000000..22adda464779 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/gpu/GpuDevice.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.gpu; + +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.api.InternalIdentity; + +/** + * GPU device interface representing a physical GPU device + */ +public interface GpuDevice extends InternalIdentity, Identity { + + enum State { + Allocated, Free, Error, PartiallyAllocated, + } + + enum ManagedState { + Managed, Unmanaged, + } + + enum DeviceType { + PCI, MDEV, VGPUOnly, + } + + long getHostId(); + + State getState(); +} diff --git a/api/src/main/java/org/apache/cloudstack/gpu/GpuService.java b/api/src/main/java/org/apache/cloudstack/gpu/GpuService.java new file mode 100644 index 000000000000..1e7928a319f6 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/gpu/GpuService.java @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.gpu; + +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.agent.api.to.GPUDeviceTO; +import com.cloud.host.Host; +import com.cloud.utils.component.Manager; +import com.cloud.vm.VirtualMachine; +import org.apache.cloudstack.api.command.admin.gpu.CreateGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.CreateGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.CreateVgpuProfileCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteVgpuProfileCmd; +import org.apache.cloudstack.api.command.admin.gpu.UnmanageGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.DiscoverGpuDevicesCmd; +import org.apache.cloudstack.api.command.admin.gpu.ManageGpuDeviceCmd; +import org.apache.cloudstack.api.command.user.gpu.ListGpuDevicesCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateVgpuProfileCmd; +import org.apache.cloudstack.api.command.user.gpu.ListGpuCardsCmd; +import org.apache.cloudstack.api.command.user.gpu.ListVgpuProfilesCmd; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; +import org.apache.cloudstack.framework.config.ConfigKey; + +import java.util.HashMap; +import java.util.List; + +public interface GpuService extends Manager { + + ConfigKey GpuDetachOnStop = new ConfigKey<>(Boolean.class, "gpu.detach.on.stop", "Advanced", "false", + "Whether to detach GPU devices from VM on stop or keep them allocated", true, ConfigKey.Scope.Domain, null); + + GpuCard createGpuCard(CreateGpuCardCmd cmd); + + GpuCard updateGpuCard(UpdateGpuCardCmd cmd); + + boolean deleteGpuCard(DeleteGpuCardCmd cmd); + + VgpuProfileResponse createVgpuProfile(CreateVgpuProfileCmd cmd); + + VgpuProfileResponse updateVgpuProfile(UpdateVgpuProfileCmd cmd); + + boolean deleteVgpuProfile(DeleteVgpuProfileCmd cmd); + + ListResponse listGpuCards(ListGpuCardsCmd cmd); + + ListResponse listVgpuProfiles(ListVgpuProfilesCmd cmd); + + GpuDeviceResponse createGpuDevice(CreateGpuDeviceCmd cmd); + + GpuDeviceResponse updateGpuDevice(UpdateGpuDeviceCmd cmd); + + ListResponse listGpuDevices(ListGpuDevicesCmd cmd); + + boolean disableGpuDevice(UnmanageGpuDeviceCmd cmd); + + boolean enableGpuDevice(ManageGpuDeviceCmd cmd); + + /** + * Deallocate GPU devices for a VM on a host. + * + * @param vmId The ID of the VM to deallocate GPU devices for. + */ + void deallocateAllGpuDevicesForVm(long vmId); + + + /** + * Deallocate GPU devices for a VM on a host. + * + * @param vmId The ID of the VM to deallocate GPU devices for. + */ + void deallocateGpuDevicesForVmOnHost(long vmId, long hostId); + + /** + * Deallocate existing GPU devices for a VM on a host and allocate new GPU devices to the VM. + * + * @param vmId The ID of the VM to allocate GPU devices to. + * @param hostId The ID of the host to allocate GPU devices to. + * @param gpuDevices The list of GPU devices to allocate to the VM. + */ + void allocateGpuDevicesToVmOnHost(long vmId, long hostId, List gpuDevices); + + /** + * Discover GPU devices on a host by using the getGPUStatistics command and updating the GPU details for the host. + * + * @param cmd The command to discover GPU devices. + * @return The list of GPU devices. + */ + ListResponse discoverGpuDevices(DiscoverGpuDevicesCmd cmd); + + /** + * Check if GPU devices are available for a VM on a host by checking the number of available GPU devices for the + * vGPU profile. + * + * @param host The host to check GPU devices for. + * @param vmId The ID of the VM to check GPU devices for. + * @param vgpuProfile The vGPU profile to check GPU devices for. + * @param gpuCount The number of GPU devices to check for. + * @return True if GPU devices are available, false otherwise. + */ + boolean isGPUDeviceAvailable(Host host, Long vmId, VgpuProfile vgpuProfile, int gpuCount); + + /** + * Get GPU devices for a VM on a host by checking the number of available GPU devices for the vGPU profile. + * If the VM already has GPU devices assigned, deallocate them and allocate new GPU devices to the VM. + * The new GPU devices are allocated optimally to the VM. + * + * @param vm The VM to get GPU devices for. + * @param vgpuProfile The vGPU profile to get GPU devices for. + * @param gpuCount The number of GPU devices to get. + * @return The GPU devices. + */ + GPUDeviceTO getGPUDevice(VirtualMachine vm, long hostId, VgpuProfile vgpuProfile, int gpuCount); + + /** + * Gets the GPU group details from the GPU devices on a host. + * This fetches the GPU devices from the host and prepares the GPU group details for the host. + * The GPU group details are a map of GPU group name (Card's device name) to a map of vGPU profile name to + * VgpuTypesInfo. + * The VgpuTypesInfo contains the information about the GPU device. + * + * @param hostId The host ID to get GPU group details for. + * @return The GPU group details. + */ + HashMap> getGpuGroupDetailsFromGpuDevicesOnHost(long hostId); + + /** + * This method is used to add the GPU devices to the host when the host is discovered or when the GPU devices are + * updated. + * + * @param host The host to add the GPU devices to. + * @param newGpuDevicesInfo The list of GPU devices to add to the host. + */ + void addGpuDevicesToHost(Host host, List newGpuDevicesInfo); + + boolean deleteGpuDevices(DeleteGpuDeviceCmd deleteGpuDeviceCmd); +} diff --git a/api/src/main/java/org/apache/cloudstack/gpu/VgpuProfile.java b/api/src/main/java/org/apache/cloudstack/gpu/VgpuProfile.java new file mode 100644 index 000000000000..8cfac2a20de8 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/gpu/VgpuProfile.java @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.gpu; + +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.api.InternalIdentity; + +import java.util.Date; + +/** + * vGPU profile interface representing a virtualized GPU profile + */ +public interface VgpuProfile extends InternalIdentity, Identity { + /** + * @return the UUID of the vGPU profile + */ + String getUuid(); + + /** + * @return the name of the vGPU profile + */ + String getName(); + + /** + * @return the description of the vGPU profile + */ + String getDescription(); + + /** + * @return the date when the vGPU profile was created + */ + Date getCreated(); + + Long getCardId(); + + /** + * @return the maximum number of vGPUs per physical GPU + */ + Long getMaxVgpuPerPgpu(); + + /** + * @return the video RAM size in MB + */ + Long getVideoRam(); + + /** + * @return the maximum number of display heads + */ + Long getMaxHeads(); + + /** + * @return the maximum X resolution + */ + Long getMaxResolutionX(); + + /** + * @return the maximum Y resolution + */ + Long getMaxResolutionY(); +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmdTest.java new file mode 100644 index 000000000000..be21384109c2 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmdTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class CreateGpuCardCmdTest { + + @Test + public void getDeviceId() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertNull(cmd.getDeviceId()); + String deviceId = "0000:00:1f.6"; + ReflectionTestUtils.setField(cmd, "deviceId", deviceId); + assertEquals(deviceId, cmd.getDeviceId()); + } + + @Test + public void getDeviceName() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertNull(cmd.getDeviceName()); + String deviceName = "NVIDIA GeForce GTX 1080"; + ReflectionTestUtils.setField(cmd, "deviceName", deviceName); + assertEquals(deviceName, cmd.getDeviceName()); + } + + @Test + public void getName() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertNull(cmd.getName()); + String name = "Test GPU Card"; + ReflectionTestUtils.setField(cmd, "name", name); + assertEquals(name, cmd.getName()); + } + + @Test + public void getVendorName() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertNull(cmd.getVendorName()); + String vendorName = "NVIDIA"; + ReflectionTestUtils.setField(cmd, "vendorName", vendorName); + assertEquals(vendorName, cmd.getVendorName()); + } + + @Test + public void getVendorId() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertNull(cmd.getVendorId()); + String vendorId = "10de"; // NVIDIA vendor ID + ReflectionTestUtils.setField(cmd, "vendorId", vendorId); + assertEquals(vendorId, cmd.getVendorId()); + } + + @Test + public void getVideoRam() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertNull(cmd.getVideoRam()); + Long videoRam = 8192L; // 8 GB + ReflectionTestUtils.setField(cmd, "videoRam", videoRam); + assertEquals(videoRam, cmd.getVideoRam()); + } + + @Test + public void getEntityOwnerId() { + CreateGpuCardCmd cmd = new CreateGpuCardCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmdTest.java new file mode 100644 index 000000000000..fd5c568d5bc5 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmdTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.gpu.GpuDevice; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class CreateGpuDeviceCmdTest { + + @Test + public void getHostId() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertNull(cmd.getHostId()); + Long hostId = 1L; + ReflectionTestUtils.setField(cmd, "hostId", hostId); + assertEquals(hostId, cmd.getHostId()); + } + + @Test + public void getBusAddress() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertNull(cmd.getBusAddress()); + String busAddress = "0000:00:1f.6"; + ReflectionTestUtils.setField(cmd, "busAddress", busAddress); + assertEquals(busAddress, cmd.getBusAddress()); + } + + @Test + public void getGpuCardId() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertNull(cmd.getGpuCardId()); + Long gpuCardId = 1L; + ReflectionTestUtils.setField(cmd, "gpuCardId", gpuCardId); + assertEquals(gpuCardId, cmd.getGpuCardId()); + } + + @Test + public void getVgpuProfileId() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertNull(cmd.getVgpuProfileId()); + Long vgpuProfileId = 1L; + ReflectionTestUtils.setField(cmd, "vgpuProfileId", vgpuProfileId); + assertEquals(vgpuProfileId, cmd.getVgpuProfileId()); + } + + @Test + public void getType() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertEquals(GpuDevice.DeviceType.PCI, cmd.getType()); + String type = "MDEV"; + ReflectionTestUtils.setField(cmd, "type", type); + assertEquals(GpuDevice.DeviceType.MDEV, cmd.getType()); + } + + @Test + public void getParentGpuDeviceId() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertNull(cmd.getParentGpuDeviceId()); + Long parentGpuDeviceId = 1L; + ReflectionTestUtils.setField(cmd, "parentGpuDeviceId", parentGpuDeviceId); + assertEquals(parentGpuDeviceId, cmd.getParentGpuDeviceId()); + } + + @Test + public void getNumaNode() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertEquals("-1", cmd.getNumaNode()); + String numaNode = "0"; + ReflectionTestUtils.setField(cmd, "numaNode", numaNode); + assertEquals(numaNode, cmd.getNumaNode()); + } + + @Test + public void getEntityOwnerId() { + CreateGpuDeviceCmd cmd = new CreateGpuDeviceCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateVgpuProfileCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateVgpuProfileCmdTest.java new file mode 100644 index 000000000000..c71286bda652 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/CreateVgpuProfileCmdTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class CreateVgpuProfileCmdTest { + + @Test + public void getName() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getName()); + String name = "Test VGPU Profile"; + ReflectionTestUtils.setField(cmd, "name", name); + assertEquals(name, cmd.getName()); + } + + @Test + public void getDescription() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getDescription()); + String description = "Test VGPU Profile Description"; + ReflectionTestUtils.setField(cmd, "description", description); + assertEquals(description, cmd.getDescription()); + } + + @Test + public void getCardId() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getCardId()); + Long cardId = 1L; + ReflectionTestUtils.setField(cmd, "cardId", cardId); + assertEquals(cardId, cmd.getCardId()); + } + + @Test + public void getMaxVgpuPerPgpu() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getMaxVgpuPerPgpu()); + Long maxVgpuPerPgpu = 8L; + ReflectionTestUtils.setField(cmd, "maxVgpuPerPgpu", maxVgpuPerPgpu); + assertEquals(maxVgpuPerPgpu, cmd.getMaxVgpuPerPgpu()); + } + + @Test + public void getVideoRam() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getVideoRam()); + Long videoRam = 8192L; // 8 GB + ReflectionTestUtils.setField(cmd, "videoRam", videoRam); + assertEquals(videoRam, cmd.getVideoRam()); + } + + @Test + public void getMaxHeads() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getMaxHeads()); + Long maxHeads = 2L; + ReflectionTestUtils.setField(cmd, "maxHeads", maxHeads); + assertEquals(maxHeads, cmd.getMaxHeads()); + } + + @Test + public void getMaxResolutionX() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getMaxResolutionX()); + Long maxResolutionX = 1920L; // 1920 pixels + ReflectionTestUtils.setField(cmd, "maxResolutionX", maxResolutionX); + assertEquals(maxResolutionX, cmd.getMaxResolutionX()); + } + + @Test + public void getMaxResolutionY() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertNull(cmd.getMaxResolutionY()); + Long maxResolutionY = 1080L; // 1080 pixels + ReflectionTestUtils.setField(cmd, "maxResolutionY", maxResolutionY); + assertEquals(maxResolutionY, cmd.getMaxResolutionY()); + } + + @Test + public void getEntityOwnerId() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuCardCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuCardCmdTest.java new file mode 100644 index 000000000000..21df915b4209 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuCardCmdTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class DeleteGpuCardCmdTest { + + @Test + public void getId() { + DeleteGpuCardCmd cmd = new DeleteGpuCardCmd(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } + + @Test + public void getEntityOwnerId() { + CreateVgpuProfileCmd cmd = new CreateVgpuProfileCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuDeviceCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuDeviceCmdTest.java new file mode 100644 index 000000000000..02b04dd307a8 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteGpuDeviceCmdTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class DeleteGpuDeviceCmdTest { + + @Test + public void getIds() { + DeleteGpuDeviceCmd cmd = new DeleteGpuDeviceCmd(); + assertNull(cmd.getIds()); + List ids = List.of(1L, 2L, 3L); + ReflectionTestUtils.setField(cmd, "ids", ids); + assertEquals(ids, cmd.getIds()); + } + + @Test + public void getEntityOwnerId() { + DeleteGpuDeviceCmd cmd = new DeleteGpuDeviceCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteVgpuProfileCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteVgpuProfileCmdTest.java new file mode 100644 index 000000000000..ecd43810e65c --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DeleteVgpuProfileCmdTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class DeleteVgpuProfileCmdTest { + + @Test + public void getId() { + DeleteVgpuProfileCmd cmd = new DeleteVgpuProfileCmd(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } + + @Test + public void getEntityOwnerId() { + DeleteVgpuProfileCmd cmd = new DeleteVgpuProfileCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DiscoverGpuDevicesCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DiscoverGpuDevicesCmdTest.java new file mode 100644 index 000000000000..8295e06e0d55 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/DiscoverGpuDevicesCmdTest.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class DiscoverGpuDevicesCmdTest { + + @Test + public void getId() { + DiscoverGpuDevicesCmd cmd = new DiscoverGpuDevicesCmd(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/ListGpuDevicesCmdByAdminTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/ListGpuDevicesCmdByAdminTest.java new file mode 100644 index 000000000000..200bce769336 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/ListGpuDevicesCmdByAdminTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class ListGpuDevicesCmdByAdminTest { + + @Test + public void getId() { + ListGpuDevicesCmdByAdmin cmd = new ListGpuDevicesCmdByAdmin(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } + + @Test + public void getHostId() { + ListGpuDevicesCmdByAdmin cmd = new ListGpuDevicesCmdByAdmin(); + assertNull(cmd.getHostId()); + Long hostId = 1L; + ReflectionTestUtils.setField(cmd, "hostId", hostId); + assertEquals(hostId, cmd.getHostId()); + } + + @Test + public void getGpuCardId() { + ListGpuDevicesCmdByAdmin cmd = new ListGpuDevicesCmdByAdmin(); + assertNull(cmd.getGpuCardId()); + Long gpuCardId = 1L; + ReflectionTestUtils.setField(cmd, "gpuCardId", gpuCardId); + assertEquals(gpuCardId, cmd.getGpuCardId()); + } + + @Test + public void getVgpuProfileId() { + ListGpuDevicesCmdByAdmin cmd = new ListGpuDevicesCmdByAdmin(); + assertNull(cmd.getVgpuProfileId()); + Long vgpuProfileId = 1L; + ReflectionTestUtils.setField(cmd, "vgpuProfileId", vgpuProfileId); + assertEquals(vgpuProfileId, cmd.getVgpuProfileId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/ManageGpuDeviceCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/ManageGpuDeviceCmdTest.java new file mode 100644 index 000000000000..ee862409a939 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/ManageGpuDeviceCmdTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class ManageGpuDeviceCmdTest { + + @Test + public void getIds() { + ManageGpuDeviceCmd cmd = new ManageGpuDeviceCmd(); + assertNull(cmd.getIds()); + List ids = List.of(1L, 2L, 3L); + ReflectionTestUtils.setField(cmd, "ids", ids); + assertEquals(ids, cmd.getIds()); + } + + @Test + public void getEntityOwnerId() { + ManageGpuDeviceCmd cmd = new ManageGpuDeviceCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UnmanageGpuDeviceCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UnmanageGpuDeviceCmdTest.java new file mode 100644 index 000000000000..63700d9e908d --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UnmanageGpuDeviceCmdTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class UnmanageGpuDeviceCmdTest { + + @Test + public void getIds() { + UnmanageGpuDeviceCmd cmd = new UnmanageGpuDeviceCmd(); + assertNull(cmd.getIds()); + List ids = List.of(1L, 2L, 3L); + ReflectionTestUtils.setField(cmd, "ids", ids); + assertEquals(ids, cmd.getIds()); + } + + @Test + public void getEntityOwnerId() { + UnmanageGpuDeviceCmd cmd = new UnmanageGpuDeviceCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuCardCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuCardCmdTest.java new file mode 100644 index 000000000000..ead7ab9d3d30 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuCardCmdTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class UpdateGpuCardCmdTest { + + @Test + public void getId() { + UpdateGpuCardCmd cmd = new UpdateGpuCardCmd(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } + + @Test + public void getDeviceName() { + UpdateGpuCardCmd cmd = new UpdateGpuCardCmd(); + assertNull(cmd.getDeviceName()); + String deviceName = "GPU-1234"; + ReflectionTestUtils.setField(cmd, "deviceName", deviceName); + assertEquals(deviceName, cmd.getDeviceName()); + } + + @Test + public void getName() { + UpdateGpuCardCmd cmd = new UpdateGpuCardCmd(); + assertNull(cmd.getName()); + String name = "Test GPU Card"; + ReflectionTestUtils.setField(cmd, "name", name); + assertEquals(name, cmd.getName()); + } + + @Test + public void getVendorName() { + UpdateGpuCardCmd cmd = new UpdateGpuCardCmd(); + assertNull(cmd.getVendorName()); + String vendorName = "NVIDIA"; + ReflectionTestUtils.setField(cmd, "vendorName", vendorName); + assertEquals(vendorName, cmd.getVendorName()); + } + + @Test + public void getEntityOwnerId() { + UpdateGpuCardCmd cmd = new UpdateGpuCardCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuDeviceCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuDeviceCmdTest.java new file mode 100644 index 000000000000..6ebec48aa003 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateGpuDeviceCmdTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.apache.cloudstack.gpu.GpuDevice; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class UpdateGpuDeviceCmdTest { + + @Test + public void getId() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } + + @Test + public void getGpuCardId() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertNull(cmd.getGpuCardId()); + Long gpuCardId = 1L; + ReflectionTestUtils.setField(cmd, "gpuCardId", gpuCardId); + assertEquals(gpuCardId, cmd.getGpuCardId()); + } + + @Test + public void getVgpuProfileId() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertNull(cmd.getVgpuProfileId()); + Long vgpuProfileId = 1L; + ReflectionTestUtils.setField(cmd, "vgpuProfileId", vgpuProfileId); + assertEquals(vgpuProfileId, cmd.getVgpuProfileId()); + } + + @Test + public void getType() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertNull(cmd.getType()); + String type = "MDEV"; + ReflectionTestUtils.setField(cmd, "type", type); + assertEquals(GpuDevice.DeviceType.MDEV, cmd.getType()); + } + + @Test + public void getParentGpuDeviceId() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertNull(cmd.getParentGpuDeviceId()); + Long parentGpuDeviceId = 1L; + ReflectionTestUtils.setField(cmd, "parentGpuDeviceId", parentGpuDeviceId); + assertEquals(parentGpuDeviceId, cmd.getParentGpuDeviceId()); + } + + @Test + public void getNumaNode() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertNull(cmd.getNumaNode()); + String numaNode = "0"; + ReflectionTestUtils.setField(cmd, "numaNode", numaNode); + assertEquals(numaNode, cmd.getNumaNode()); + } + + @Test + public void getEntityOwnerId() { + UpdateGpuDeviceCmd cmd = new UpdateGpuDeviceCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateVgpuProfileCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateVgpuProfileCmdTest.java new file mode 100644 index 000000000000..95acd71096e9 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/gpu/UpdateVgpuProfileCmdTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.admin.gpu; + +import com.cloud.user.Account; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class UpdateVgpuProfileCmdTest { + + @Test + public void getId() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + assertEquals(id, cmd.getId()); + } + + @Test + public void getProfileName() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getProfileName()); + String profileName = "Test VGPU Profile"; + ReflectionTestUtils.setField(cmd, "profileName", profileName); + assertEquals(profileName, cmd.getProfileName()); + } + + @Test + public void getDescription() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getDescription()); + String description = "Test VGPU Profile Description"; + ReflectionTestUtils.setField(cmd, "description", description); + assertEquals(description, cmd.getDescription()); + } + + @Test + public void getMaxVgpuPerPgpu() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getMaxVgpuPerPgpu()); + Long maxVgpuPerPgpu = 8L; + ReflectionTestUtils.setField(cmd, "maxVgpuPerPgpu", maxVgpuPerPgpu); + assertEquals(maxVgpuPerPgpu, cmd.getMaxVgpuPerPgpu()); + } + + @Test + public void getVideoRam() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getVideoRam()); + Long videoRam = 8192L; // 8 GB + ReflectionTestUtils.setField(cmd, "videoRam", videoRam); + assertEquals(videoRam, cmd.getVideoRam()); + } + + @Test + public void getMaxHeads() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getMaxHeads()); + Long maxHeads = 2L; + ReflectionTestUtils.setField(cmd, "maxHeads", maxHeads); + assertEquals(maxHeads, cmd.getMaxHeads()); + } + + @Test + public void getMaxResolutionX() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getMaxResolutionX()); + Long maxResolutionX = 1920L; // Example resolution + ReflectionTestUtils.setField(cmd, "maxResolutionX", maxResolutionX); + assertEquals(maxResolutionX, cmd.getMaxResolutionX()); + } + + @Test + public void getMaxResolutionY() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertNull(cmd.getMaxResolutionY()); + Long maxResolutionY = 1080L; // Example resolution + ReflectionTestUtils.setField(cmd, "maxResolutionY", maxResolutionY); + assertEquals(maxResolutionY, cmd.getMaxResolutionY()); + } + + @Test + public void getEntityOwnerId() { + UpdateVgpuProfileCmd cmd = new UpdateVgpuProfileCmd(); + assertEquals(Account.ACCOUNT_ID_SYSTEM, cmd.getEntityOwnerId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmdTest.java index d6f9d9b69371..bc7f65b07561 100644 --- a/api/src/test/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmdTest.java +++ b/api/src/test/java/org/apache/cloudstack/api/command/admin/offering/CreateServiceOfferingCmdTest.java @@ -78,4 +78,23 @@ public void testGetLeaseExpiryActionInvalidValue() { ReflectionTestUtils.setField(createServiceOfferingCmd, "leaseExpiryAction", "Unknown"); Assert.assertEquals(null, createServiceOfferingCmd.getLeaseExpiryAction()); } + + @Test + public void testGetVgpuProfileId() { + ReflectionTestUtils.setField(createServiceOfferingCmd, "vgpuProfileId", 10L); + Assert.assertEquals(10L, createServiceOfferingCmd.getVgpuProfileId().longValue()); + } + + @Test + public void testGetGpuCount() { + ReflectionTestUtils.setField(createServiceOfferingCmd, "gpuCount", 2); + Assert.assertEquals(2, createServiceOfferingCmd.getGpuCount().intValue()); + } + + @Test + public void testGetGpuDisplay() { + ReflectionTestUtils.setField(createServiceOfferingCmd, "gpuDisplay", true); + Assert.assertTrue(createServiceOfferingCmd.getGpuDisplay()); + } + } diff --git a/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListGpuCardsCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListGpuCardsCmdTest.java new file mode 100644 index 000000000000..54e726eadbe0 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListGpuCardsCmdTest.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.user.gpu; + +import org.junit.Assert; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + + +public class ListGpuCardsCmdTest { + + @Test + public void getId() { + ListGpuCardsCmd cmd = new ListGpuCardsCmd(); + Assert.assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + Assert.assertEquals(id, cmd.getId()); + } + + @Test + public void getVendorName() { + ListGpuCardsCmd cmd = new ListGpuCardsCmd(); + Assert.assertNull(cmd.getVendorName()); + String vendorName = "vendor name"; + ReflectionTestUtils.setField(cmd, "vendorName", vendorName); + Assert.assertEquals(vendorName, cmd.getVendorName()); + } + + @Test + public void getVendorId() { + ListGpuCardsCmd cmd = new ListGpuCardsCmd(); + Assert.assertNull(cmd.getVendorId()); + String vendorId = "vendor id"; + ReflectionTestUtils.setField(cmd, "vendorId", vendorId); + Assert.assertEquals(vendorId, cmd.getVendorId()); + } + + @Test + public void getDeviceId() { + ListGpuCardsCmd cmd = new ListGpuCardsCmd(); + Assert.assertNull(cmd.getDeviceId()); + String deviceId = "device id"; + ReflectionTestUtils.setField(cmd, "deviceId", deviceId); + Assert.assertEquals(deviceId, cmd.getDeviceId()); + } + + @Test + public void getDeviceName() { + ListGpuCardsCmd cmd = new ListGpuCardsCmd(); + Assert.assertNull(cmd.getDeviceName()); + String deviceName = "device name"; + ReflectionTestUtils.setField(cmd, "deviceName", deviceName); + Assert.assertEquals(deviceName, cmd.getDeviceName()); + } + + @Test + public void getActiveOnly() { + ListGpuCardsCmd cmd = new ListGpuCardsCmd(); + Assert.assertFalse(cmd.getActiveOnly()); + Boolean activeOnly = true; + ReflectionTestUtils.setField(cmd, "activeOnly", activeOnly); + Assert.assertEquals(activeOnly, cmd.getActiveOnly()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListGpuDevicesCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListGpuDevicesCmdTest.java new file mode 100644 index 000000000000..e1a65ee0ece3 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListGpuDevicesCmdTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.user.gpu; + +import org.junit.Assert; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +public class ListGpuDevicesCmdTest { + + @Test + public void getVmId() { + ListGpuDevicesCmd cmd = new ListGpuDevicesCmd(); + Assert.assertNull(cmd.getVmId()); + Long vmId = 1L; + ReflectionTestUtils.setField(cmd, "vmId", vmId); + Assert.assertEquals(vmId, cmd.getVmId()); + } +} diff --git a/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListVgpuProfilesCmdTest.java b/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListVgpuProfilesCmdTest.java new file mode 100644 index 000000000000..7616abd1f8d5 --- /dev/null +++ b/api/src/test/java/org/apache/cloudstack/api/command/user/gpu/ListVgpuProfilesCmdTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.api.command.user.gpu; + +import org.junit.Assert; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +public class ListVgpuProfilesCmdTest { + + @Test + public void getId() { + ListVgpuProfilesCmd cmd = new ListVgpuProfilesCmd(); + Assert.assertNull(cmd.getId()); + Long id = 1L; + ReflectionTestUtils.setField(cmd, "id", id); + Assert.assertEquals(id, cmd.getId()); + } + + @Test + public void getName() { + ListVgpuProfilesCmd cmd = new ListVgpuProfilesCmd(); + Assert.assertNull(cmd.getName()); + String name = "Test VGPU Profile"; + ReflectionTestUtils.setField(cmd, "name", name); + Assert.assertEquals(name, cmd.getName()); + } + + @Test + public void getCardId() { + ListVgpuProfilesCmd cmd = new ListVgpuProfilesCmd(); + Assert.assertNull(cmd.getCardId()); + Long cardId = 1L; + ReflectionTestUtils.setField(cmd, "cardId", cardId); + Assert.assertEquals(cardId, cmd.getCardId()); + } + + @Test + public void getActiveOnly() { + ListVgpuProfilesCmd cmd = new ListVgpuProfilesCmd(); + Assert.assertFalse(cmd.getActiveOnly()); + Boolean activeOnly = true; + ReflectionTestUtils.setField(cmd, "activeOnly", activeOnly); + Assert.assertEquals(activeOnly, cmd.getActiveOnly()); + } +} diff --git a/core/src/main/java/com/cloud/agent/api/GetGPUStatsAnswer.java b/core/src/main/java/com/cloud/agent/api/GetGPUStatsAnswer.java index 8b3cd44e207f..5bf70ed086f6 100644 --- a/core/src/main/java/com/cloud/agent/api/GetGPUStatsAnswer.java +++ b/core/src/main/java/com/cloud/agent/api/GetGPUStatsAnswer.java @@ -19,7 +19,9 @@ package com.cloud.agent.api; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import com.cloud.agent.api.LogLevel.Log4jLevel; @@ -27,6 +29,7 @@ public class GetGPUStatsAnswer extends Answer { private HashMap> groupDetails; + private List gpuDevices = new ArrayList<>(); public GetGPUStatsAnswer(final GetGPUStatsCommand cmd, final HashMap> groupDetails) { super(cmd); @@ -37,7 +40,21 @@ public GetGPUStatsAnswer(final GetGPUStatsCommand cmd, final boolean success, fi super(cmd, success, details); } + public GetGPUStatsAnswer(final GetGPUStatsCommand cmd, final List gpuDevices) { + super(cmd); + this.gpuDevices = gpuDevices; + } + + public HashMap> getGroupDetails() { return groupDetails; } + + public List getGpuDevices() { + return gpuDevices; + } + + public void setGpuDevices(List gpuDevices) { + this.gpuDevices = gpuDevices; + } } diff --git a/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java b/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java index 286fced0c58a..068196aabe54 100644 --- a/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java +++ b/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java @@ -45,6 +45,7 @@ public class StartupRoutingCommand extends StartupCommand { List hostTags = new ArrayList(); String hypervisorVersion; HashMap> groupDetails = new HashMap>(); + List gpuDevices = new ArrayList<>(); private Boolean hostHealthCheckResult; public StartupRoutingCommand() { @@ -179,7 +180,7 @@ public void setHostTags(List hostTags) { this.hostTags = hostTags; } - public HashMap> getGpuGroupDetails() { + public HashMap> getGpuGroupDetails() { return groupDetails; } @@ -187,6 +188,14 @@ public void setGpuGroupDetails(HashMap> g this.groupDetails = groupDetails; } + public List getGpuDevices() { + return gpuDevices; + } + + public void setGpuDevices(List gpuDevices) { + this.gpuDevices = gpuDevices; + } + public boolean getSupportsClonedVolumes() { return supportsClonedVolumes; } diff --git a/engine/components-api/src/main/java/com/cloud/resource/Discoverer.java b/engine/components-api/src/main/java/com/cloud/resource/Discoverer.java index a2bb5945a9d1..60b0167758c7 100644 --- a/engine/components-api/src/main/java/com/cloud/resource/Discoverer.java +++ b/engine/components-api/src/main/java/com/cloud/resource/Discoverer.java @@ -50,5 +50,4 @@ Map> find(long dcId, Long podId, L public void putParam(Map params); ServerResource reloadResource(HostVO host); - } diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java index 83f9768a62ac..8550dfdd9067 100755 --- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java +++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java @@ -21,8 +21,12 @@ import java.util.List; import java.util.Map; + +import com.cloud.offering.ServiceOffering; import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreInfo; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.vm.VirtualMachine; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; @@ -194,29 +198,34 @@ public interface ResourceManager extends ResourceService, Configurable { */ boolean isHostGpuEnabled(long hostId); + boolean isGPUDeviceAvailable(ServiceOffering offering, Host host, Long vmId); + /** - * Check if host has GPU devices available - * @param hostId the host to be checked - * @param groupName: gpuCard name - * @param vgpuType the VGPU type - * @return true when the host has the capacity with given VGPU type + * Get available GPU device + * + * @param vm the vm for which GPU device is requested + * @param vgpuProfile the VGPU profile + * @param gpuCount + * @return GPUDeviceTO[] */ - boolean isGPUDeviceAvailable(Host host, String groupName, String vgpuType); + GPUDeviceTO getGPUDevice(VirtualMachine vm, long hostId, VgpuProfileVO vgpuProfile, int gpuCount); /** * Get available GPU device - * @param hostId the host to be checked - * @param groupName: gpuCard name - * @param vgpuType the VGPU type + * + * @param hostId the host to be checked + * @param groupName gpuCard name + * @param vgpuType the VGPU type * @return GPUDeviceTO[] */ GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType); /** * Return listof available GPU devices - * @param hostId, the host to be checked - * @param groupName: gpuCard name - * @param vgpuType the VGPU type + * + * @param hostId the host to be checked + * @param groupName gpuCard name + * @param vgpuType the VGPU type * @return List of HostGpuGroupsVO. */ List listAvailableGPUDevice(long hostId, String groupName, String vgpuType); @@ -228,6 +237,16 @@ public interface ResourceManager extends ResourceService, Configurable { */ void updateGPUDetails(long hostId, HashMap> groupDetails); + /** + * Update GPU device details (post VM deployment) + * + * @param vm the VirtualMachine object + * @param gpuDeviceTO GPU device details + */ + void updateGPUDetailsForVmStop(VirtualMachine vm, GPUDeviceTO gpuDeviceTO); + + void updateGPUDetailsForVmStart(long hostId, long vmId, GPUDeviceTO gpuDevice); + /** * Get GPU details for a host * @param host, the Host object diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index b57643323dca..3cd8ec0aae35 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -87,6 +87,7 @@ import org.apache.cloudstack.framework.messagebus.MessageBus; import org.apache.cloudstack.framework.messagebus.MessageDispatcher; import org.apache.cloudstack.framework.messagebus.MessageHandler; +import org.apache.cloudstack.gpu.GpuService; import org.apache.cloudstack.jobs.JobInfo; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.reservation.dao.ReservationDao; @@ -395,6 +396,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac @Inject private VolumeOrchestrationService volumeMgr; @Inject + private GpuService gpuService; + @Inject private DeploymentPlanningManager _dpMgr; @Inject private MessageBus _messageBus; @@ -1530,7 +1533,7 @@ public void orchestrateStart(final String vmUuid, final Map(vm.getHostId(), hostId), _vmDao); } @@ -2577,6 +2577,8 @@ public void destroy(final String vmUuid, final boolean expunge) throws AgentUnav deleteVMSnapshots(vm, expunge); + gpuService.deallocateAllGpuDevicesForVm(vm.getId()); + Transaction.execute(new TransactionCallbackWithExceptionNoReturn() { @Override public void doInTransactionWithoutResult(final TransactionStatus status) throws CloudRuntimeException { @@ -3147,6 +3149,8 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy logger.info("Migration was unsuccessful. Cleaning up: {}", vm); _networkMgr.rollbackNicForMigration(vmSrc, profile); volumeMgr.release(vm.getId(), dstHostId); + // deallocate GPU devices for the VM on the destination host + gpuService.deallocateGpuDevicesForVmOnHost(vm.getId(), dstHostId); _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getInstanceName() + " from host " + fromHost.getName() + " in zone " + dest.getDataCenter().getName() + " and pod " + @@ -3165,6 +3169,8 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy } else { _networkMgr.commitNicForMigration(vmSrc, profile); volumeMgr.release(vm.getId(), srcHostId); + // deallocate GPU devices for the VM on the src host after migration is complete + gpuService.deallocateGpuDevicesForVmOnHost(vm.getId(), srcHostId); _networkMgr.setHypervisorHostname(profile, dest, true); recreateCheckpointsKvmOnVmAfterMigration(vm, dstHostId); @@ -3963,6 +3969,9 @@ private void orchestrateReboot(final String vmUuid, final Map findByOwnersAndTypeAndTag(List ownerIdList, Resource long removeEntriesByOwner(long ownerId, ResourceOwnerType ownerType); - /** - * Counts the number of CPU cores allocated for the given account. - * - * Side note: This method is not using the "resource_count" table. It is executing the actual count instead. - */ - long countCpuNumberAllocatedToAccount(long accountId); - - /** - * Counts the amount of memory allocated for the given account. - * - * Side note: This method is not using the "resource_count" table. It is executing the actual count instead. - */ - long countMemoryAllocatedToAccount(long accountId); - void removeResourceCountsForNonMatchingTags(Long ownerId, ResourceOwnerType ownerType, List types, List tags); List lockRows(Set ids); diff --git a/engine/schema/src/main/java/com/cloud/configuration/dao/ResourceCountDaoImpl.java b/engine/schema/src/main/java/com/cloud/configuration/dao/ResourceCountDaoImpl.java index 0ab834423374..2083fb422d28 100644 --- a/engine/schema/src/main/java/com/cloud/configuration/dao/ResourceCountDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/configuration/dao/ResourceCountDaoImpl.java @@ -339,19 +339,6 @@ public long removeEntriesByOwner(long ownerId, ResourceOwnerType ownerType) { + " left join vm_instance_details vmd on vmd.vm_id = vm.id and vmd.name = '%s' " + " where vm.type = 'User' and state not in ('Destroyed', 'Error', 'Expunging') and display_vm = true and account_id = ? "; - @Override - public long countCpuNumberAllocatedToAccount(long accountId) { - String sqlCountCpuNumberAllocatedToAccount = String.format(baseSqlCountComputingResourceAllocatedToAccount, ResourceType.cpu, ResourceType.cpu, "cpuNumber"); - return executeSqlCountComputingResourcesForAccount(accountId, sqlCountCpuNumberAllocatedToAccount); - } - - @Override - public long countMemoryAllocatedToAccount(long accountId) { - String serviceOfferingRamSizeField = "ram_size"; - String sqlCountCpuNumberAllocatedToAccount = String.format(baseSqlCountComputingResourceAllocatedToAccount, serviceOfferingRamSizeField, serviceOfferingRamSizeField, "memory"); - return executeSqlCountComputingResourcesForAccount(accountId, sqlCountCpuNumberAllocatedToAccount); - } - private long executeSqlCountComputingResourcesForAccount(long accountId, String sqlCountComputingResourcesAllocatedToAccount) { TransactionLegacy tx = TransactionLegacy.currentTxn(); try { diff --git a/engine/schema/src/main/java/com/cloud/gpu/GpuCardVO.java b/engine/schema/src/main/java/com/cloud/gpu/GpuCardVO.java new file mode 100644 index 000000000000..2410077c84ad --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/GpuCardVO.java @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.gpu; + +import com.cloud.utils.db.GenericDao; +import org.apache.cloudstack.gpu.GpuCard; +import org.apache.cloudstack.utils.reflectiontostringbuilderutils.ReflectionToStringBuilderUtils; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; +import java.util.Date; +import java.util.UUID; + +@Entity +@Table(name = "gpu_card") +public class GpuCardVO implements GpuCard { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id") + private long id; + + @Column(name = "uuid") + private String uuid; + + @Column(name = "device_id") + private String deviceId; + + @Column(name = "device_name") + private String deviceName; + + @Column(name = "name") + private String name; + + @Column(name = "vendor_name") + private String vendorName; + + @Column(name = "vendor_id") + private String vendorId; + + @Column(name = GenericDao.CREATED_COLUMN) + private Date created; + + public GpuCardVO() { + this.uuid = UUID.randomUUID().toString(); + } + + public GpuCardVO(String deviceId, String deviceName, String name, String vendorName, String vendorId) { + this.uuid = UUID.randomUUID().toString(); + this.deviceId = deviceId; + this.deviceName = deviceName; + this.name = name; + this.vendorName = vendorName; + this.vendorId = vendorId; + this.created = new Date(); + } + + @Override + public String toString() { + return String.format("GPUCard %s", ReflectionToStringBuilderUtils.reflectOnlySelectedFields( + this, "id", "uuid", "name", "deviceId", "deviceName", "vendorId", "vendorName")); + } + + @Override + public long getId() { + return id; + } + + @Override + public String getUuid() { + return uuid; + } + + @Override + public String getDeviceId() { + return deviceId; + } + + public void setDeviceId(String deviceId) { + this.deviceId = deviceId; + } + + @Override + public String getDeviceName() { + return deviceName; + } + + public void setDeviceName(String deviceName) { + this.deviceName = deviceName; + } + + @Override + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @Override + public String getVendorName() { + return vendorName; + } + + public void setVendorName(String vendorName) { + this.vendorName = vendorName; + } + + @Override + public String getVendorId() { + return vendorId; + } + + public void setVendorId(String vendorId) { + this.vendorId = vendorId; + } + + @Override + public Date getCreated() { + return created; + } + + @Override + public String getGroupName() { + return "Group of " + getVendorName() + " " + getDeviceName() + " GPUs"; + } +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/GpuDeviceVO.java b/engine/schema/src/main/java/com/cloud/gpu/GpuDeviceVO.java new file mode 100644 index 000000000000..ac20e74c3604 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/GpuDeviceVO.java @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu; + +import org.apache.cloudstack.gpu.GpuDevice; +import org.apache.cloudstack.utils.reflectiontostringbuilderutils.ReflectionToStringBuilderUtils; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; +import java.util.UUID; + +@Entity +@Table(name = "gpu_device") +public class GpuDeviceVO implements GpuDevice { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id") + private long id; + + @Column(name = "uuid") + private String uuid; + + @Column(name = "card_id") + private long cardId; + + @Column(name = "vgpu_profile_id") + private long vgpuProfileId; + + @Column(name = "bus_address") + private String busAddress; + + @Column(name = "host_id") + private long hostId; + + @Column(name = "vm_id") + private Long vmId; + + @Column(name = "type") + @Enumerated(value = EnumType.STRING) + private DeviceType type = DeviceType.PCI; + + @Column(name = "state") + @Enumerated(value = EnumType.STRING) + private State state = State.Free; + + @Column(name = "managed_state") + @Enumerated(value = EnumType.STRING) + private ManagedState managedState = ManagedState.Managed; + + @Column(name = "parent_gpu_device_id") + private Long parentGpuDeviceId; + + @Column(name = "numa_node") + private String numaNode; + + @Column(name = "pci_root") + private String pciRoot; + + public GpuDeviceVO() { + this.uuid = UUID.randomUUID().toString(); + } + + public GpuDeviceVO(long cardId, long vgpuProfileId, String busAddress, long hostId, Long parentGpuDeviceId, + String numaNode, String pciRoot) { + this.uuid = UUID.randomUUID().toString(); + this.cardId = cardId; + this.vgpuProfileId = vgpuProfileId; + this.busAddress = busAddress; + this.hostId = hostId; + this.parentGpuDeviceId = parentGpuDeviceId; + this.numaNode = numaNode; + this.pciRoot = pciRoot; + } + + @Override + public String toString() { + return String.format("GpuDevice %s", ReflectionToStringBuilderUtils.reflectOnlySelectedFields( + this, "id", "uuid", "cardId", "vgpuProfileId", "busAddress", "hostId", "vmId", + "parentGpuDeviceId", "numaNode", "pciRoot", "state", "resourceState")); + } + + @Override + public long getId() { + return id; + } + + public String getUuid() { + return uuid; + } + + public long getCardId() { + return cardId; + } + + public void setCardId(long cardId) { + this.cardId = cardId; + } + + public long getVgpuProfileId() { + return vgpuProfileId; + } + + public void setVgpuProfileId(long vgpuProfileId) { + this.vgpuProfileId = vgpuProfileId; + } + + public String getBusAddress() { + return busAddress; + } + + public void setBusAddress(String busAddress) { + this.busAddress = busAddress; + } + + public long getHostId() { + return hostId; + } + + public void setHostId(long hostId) { + this.hostId = hostId; + } + + @Override + public State getState() { + return state; + } + + public void setState(State state) { + this.state = state; + } + + public DeviceType getType() { + return type; + } + + public void setType(DeviceType type) { + this.type = type; + } + + public ManagedState getManagedState() { + return managedState; + } + + public void setManagedState(ManagedState managedState) { + this.managedState = managedState; + } + + public Long getVmId() { + return vmId; + } + + public void setVmId(Long vmId) { + this.vmId = vmId; + } + + public Long getParentGpuDeviceId() { + return parentGpuDeviceId; + } + + public void setParentGpuDeviceId(Long parentGpuDeviceId) { + this.parentGpuDeviceId = parentGpuDeviceId; + } + + public String getNumaNode() { + return numaNode; + } + + public void setNumaNode(String numaNode) { + this.numaNode = numaNode; + } + + public String getPciRoot() { + return pciRoot; + } + + public void setPciRoot(String pciRoot) { + this.pciRoot = pciRoot; + } +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/VGPUTypesVO.java b/engine/schema/src/main/java/com/cloud/gpu/VGPUTypesVO.java index 5bbf90854ee6..4944d51f1b44 100644 --- a/engine/schema/src/main/java/com/cloud/gpu/VGPUTypesVO.java +++ b/engine/schema/src/main/java/com/cloud/gpu/VGPUTypesVO.java @@ -40,19 +40,19 @@ public class VGPUTypesVO implements InternalIdentity { private String vgpuType; @Column(name="video_ram") - private long videoRam; + private Long videoRam; @Column(name="max_heads") - private long maxHeads; + private Long maxHeads; @Column(name="max_resolution_x") - private long maxResolutionX; + private Long maxResolutionX; @Column(name="max_resolution_y") - private long maxResolutionY; + private Long maxResolutionY; @Column(name="max_vgpu_per_pgpu") - private long maxVgpuPerPgpu; + private Long maxVgpuPerPgpu; @Column(name="remaining_capacity") private long remainingCapacity; @@ -63,7 +63,7 @@ public class VGPUTypesVO implements InternalIdentity { protected VGPUTypesVO() { } - public VGPUTypesVO(long gpuGroupId, String vgpuType, long videoRam, long maxHeads, long maxResolutionX, long maxResolutionY, long maxVgpuPerPgpu, + public VGPUTypesVO(long gpuGroupId, String vgpuType, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerPgpu, long remainingCapacity, long maxCapacity) { this.gpuGroupId = gpuGroupId; this.vgpuType = vgpuType; @@ -92,43 +92,43 @@ public void setVgpuType(String vgpuType) { this.vgpuType = vgpuType; } - public long getVideoRam() { + public Long getVideoRam() { return videoRam; } - public void setVideoRam(long videoRam) { + public void setVideoRam(Long videoRam) { this.videoRam = videoRam; } - public long getMaxHeads() { + public Long getMaxHeads() { return maxHeads; } - public void setMaxHeads(long maxHeads) { + public void setMaxHeads(Long maxHeads) { this.maxHeads = maxHeads; } - public long getMaxResolutionX() { + public Long getMaxResolutionX() { return maxResolutionX; } - public void setMaxResolutionX(long maxResolutionX) { + public void setMaxResolutionX(Long maxResolutionX) { this.maxResolutionX = maxResolutionX; } - public long getMaxResolutionY() { + public Long getMaxResolutionY() { return maxResolutionY; } - public void setMaxResolutionY(long maxResolutionY) { + public void setMaxResolutionY(Long maxResolutionY) { this.maxResolutionY = maxResolutionY; } - public long getMaxVgpuPerPgpu() { + public Long getMaxVgpuPerPgpu() { return maxVgpuPerPgpu; } - public void setMaxVgpuPerPgpu(long maxVgpuPerPgpu) { + public void setMaxVgpuPerPgpu(Long maxVgpuPerPgpu) { this.maxVgpuPerPgpu = maxVgpuPerPgpu; } diff --git a/engine/schema/src/main/java/com/cloud/gpu/VgpuProfileVO.java b/engine/schema/src/main/java/com/cloud/gpu/VgpuProfileVO.java new file mode 100644 index 000000000000..86f5eb94415b --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/VgpuProfileVO.java @@ -0,0 +1,191 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.gpu; + +import com.cloud.utils.db.GenericDao; +import org.apache.cloudstack.gpu.VgpuProfile; +import org.apache.cloudstack.utils.reflectiontostringbuilderutils.ReflectionToStringBuilderUtils; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; +import java.util.Date; +import java.util.UUID; + +@Entity +@Table(name = "vgpu_profile") +public class VgpuProfileVO implements VgpuProfile { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id") + private long id; + + @Column(name = "uuid") + private String uuid; + + @Column(name = "name") + private String name; + + @Column(name = "description") + private String description; + + @Column(name = "card_id") + private Long cardId; + + @Column(name = "max_vgpu_per_pgpu") + private Long maxVgpuPerPgpu; + + @Column(name = "video_ram") + private Long videoRam; + + @Column(name = "max_heads") + private Long maxHeads; + + @Column(name = "max_resolution_x") + private Long maxResolutionX; + + @Column(name = "max_resolution_y") + private Long maxResolutionY; + + @Column(name = GenericDao.CREATED_COLUMN) + private Date created; + + public VgpuProfileVO() { + this.uuid = UUID.randomUUID().toString(); + } + + public VgpuProfileVO(String name, String description, Long gpuCardId, Long maxVgpuPerPgpu) { + this.uuid = UUID.randomUUID().toString(); + this.name = name; + this.description = description; + this.cardId = gpuCardId; + this.maxVgpuPerPgpu = maxVgpuPerPgpu; + this.created = new Date(); + } + + + public VgpuProfileVO(String name, String description, Long gpuCardId, Long maxVgpuPerPgpu, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY) { + this.uuid = UUID.randomUUID().toString(); + this.name = name; + this.description = description; + this.cardId = gpuCardId; + this.maxVgpuPerPgpu = maxVgpuPerPgpu; + this.videoRam = videoRam; + this.maxHeads = maxHeads; + this.maxResolutionX = maxResolutionX; + this.maxResolutionY = maxResolutionY; + this.created = new Date(); + } + + @Override + public String toString() { + return String.format("VgpuProfile %s", ReflectionToStringBuilderUtils.reflectOnlySelectedFields( + this, "id", "uuid", "name", "cardId")); + } + + @Override + public long getId() { + return id; + } + + @Override + public String getUuid() { + return uuid; + } + + @Override + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @Override + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + @Override + public Date getCreated() { + return created; + } + + @Override + public Long getCardId() { + return cardId; + } + + public void setCardId(Long cardId) { + this.cardId = cardId; + } + + @Override + public Long getMaxVgpuPerPgpu() { + return maxVgpuPerPgpu; + } + + public void setMaxVgpuPerPgpu(Long maxVgpuPerPgpu) { + this.maxVgpuPerPgpu = maxVgpuPerPgpu; + } + + @Override + public Long getVideoRam() { + return videoRam; + } + + public void setVideoRam(Long videoRam) { + this.videoRam = videoRam; + } + + @Override + public Long getMaxHeads() { + return maxHeads; + } + + public void setMaxHeads(Long maxHeads) { + this.maxHeads = maxHeads; + } + + @Override + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public void setMaxResolutionX(Long maxResolutionX) { + this.maxResolutionX = maxResolutionX; + } + + @Override + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public void setMaxResolutionY(Long maxResolutionY) { + this.maxResolutionY = maxResolutionY; + } +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/GpuCardDao.java b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuCardDao.java new file mode 100644 index 000000000000..4463a690ae92 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuCardDao.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu.dao; + +import com.cloud.gpu.GpuCardVO; +import com.cloud.utils.Pair; +import com.cloud.utils.db.GenericDao; + +import java.util.List; + +public interface GpuCardDao extends GenericDao { + + /** + * Find GPU card by vendor and device id + * + * @param vendorId the vendor id + * @param deviceId the device id + * @return GpuCardVO + */ + GpuCardVO findByVendorIdAndDeviceId(String vendorId, String deviceId); + + Pair, Integer> searchAndCountGpuCards( + Long id, String keyword, String vendorId, String vendorName, + String deviceId, String deviceName, boolean activeOnly, Long startIndex, Long pageSize); +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/GpuCardDaoImpl.java b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuCardDaoImpl.java new file mode 100644 index 000000000000..8aad85d45086 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuCardDaoImpl.java @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu.dao; + +import com.cloud.gpu.GpuCardVO; +import com.cloud.utils.Pair; +import com.cloud.utils.db.Filter; +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import java.util.List; + +@Component +public class GpuCardDaoImpl extends GenericDaoBase implements GpuCardDao { + + private final SearchBuilder allFieldSearch; + + @Inject + private GpuDeviceDao gpuDeviceDao; + + public GpuCardDaoImpl() { + allFieldSearch = createSearchBuilder(); + allFieldSearch.and("name", allFieldSearch.entity().getName(), SearchCriteria.Op.EQ); + allFieldSearch.and("vendorId", allFieldSearch.entity().getVendorId(), SearchCriteria.Op.EQ); + allFieldSearch.and("vendorName", allFieldSearch.entity().getVendorName(), SearchCriteria.Op.EQ); + allFieldSearch.and("deviceId", allFieldSearch.entity().getDeviceId(), SearchCriteria.Op.EQ); + allFieldSearch.and("deviceName", allFieldSearch.entity().getDeviceName(), SearchCriteria.Op.EQ); + allFieldSearch.done(); + } + + @Override + public GpuCardVO findByVendorIdAndDeviceId(String vendorId, String deviceId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters("vendorId", vendorId); + sc.setParameters("deviceId", deviceId); + return findOneBy(sc); + } + + @Override + public Pair, Integer> searchAndCountGpuCards(Long id, String keyword, String vendorId, + String vendorName, String deviceId, String deviceName, boolean activeOnly, Long startIndex, Long pageSize + ) { + + Filter searchFilter = new Filter(GpuCardVO.class, "id", true, startIndex, pageSize); + SearchBuilder sb = createSearchBuilder(); + + if (id != null) { + sb.and("id", sb.entity().getId(), SearchCriteria.Op.EQ); + } + if (keyword != null) { + sb.op("nameKeyword", sb.entity().getName(), SearchCriteria.Op.LIKE); + sb.and("deviceNameKeyword", sb.entity().getDeviceName(), SearchCriteria.Op.LIKE); + sb.and("vendorNameKeyword", sb.entity().getVendorName(), SearchCriteria.Op.LIKE); + sb.cp(); + } + if (vendorId != null) { + sb.and("vendorId", sb.entity().getVendorId(), SearchCriteria.Op.EQ); + } + if (vendorName != null) { + sb.and("vendorName", sb.entity().getVendorName(), SearchCriteria.Op.EQ); + } + if (deviceId != null) { + sb.and("deviceId", sb.entity().getDeviceId(), SearchCriteria.Op.EQ); + } + if (deviceName != null) { + sb.and("deviceName", sb.entity().getDeviceName(), SearchCriteria.Op.EQ); + } + if (activeOnly) { + sb.and("ids", sb.entity().getId(), SearchCriteria.Op.IN); + } + sb.done(); + + // Build search criteria + SearchCriteria sc = sb.create(); + if (id != null) { + sc.setParameters("id", id); + } + if (keyword != null) { + sc.setParameters("nameKeyword", "%" + keyword + "%"); + sc.setParameters("deviceNameKeyword", "%" + keyword + "%"); + sc.setParameters("vendorNameKeyword", "%" + keyword + "%"); + } + if (vendorId != null) { + sc.setParameters("vendorId", vendorId); + } + if (vendorName != null) { + sc.setParameters("vendorName", vendorName); + } + if (deviceId != null) { + sc.setParameters("deviceId", deviceId); + } + if (deviceName != null) { + sc.setParameters("deviceName", deviceName); + } + if (activeOnly) { + List cardIds = gpuDeviceDao.getDistinctGpuCardIds(); + if (cardIds.isEmpty()) { + return new Pair<>(List.of(), 0); + } + sc.setParameters("ids", cardIds.toArray()); + } + + return searchAndCount(sc, searchFilter); + } +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDao.java b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDao.java new file mode 100644 index 000000000000..e362f23888d3 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDao.java @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu.dao; + +import com.cloud.gpu.GpuDeviceVO; +import com.cloud.utils.Pair; +import com.cloud.utils.db.GenericDao; + +import java.util.List; + +public interface GpuDeviceDao extends GenericDao { + + List listByIds(List ids); + + /** + * Find GPU device by host ID and bus address + * + * @param hostId the host ID + * @param busAddress the PCI bus address + * @return GpuDeviceVO + */ + GpuDeviceVO findByHostIdAndBusAddress(long hostId, String busAddress); + + /** + * List GPU devices by host ID + * + * @param hostId the ID of the host + * @return a list of GPU devices for the host + */ + List listByHostId(long hostId); + + /** + * List GPU devices by VM ID + * + * @param vmId the VM ID + * @return list of GpuDeviceVO + */ + List listByVmId(long vmId); + + boolean isVgpuProfileInUse(long vgpuProfileId); + + boolean isGpuCardInUse(long cardId); + + List listByHostAndVm(Long hostId, long vmId); + + List listDevicesForAllocation(Long hostId, Long vgpuProfileId); + + Pair, Integer> searchAndCountGpuDevices( + Long id, String keyword, Long hostId, Long vmId, Long gpuCardId, Long vgpuProfileId, + Long startIndex, Long pageSize); + + List getDistinctGpuCardIds(); + + List getDistinctVgpuProfileIds(); + + List listByParentGpuDeviceId(Long parentGpuDeviceId); +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDaoImpl.java b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDaoImpl.java new file mode 100644 index 000000000000..bd7032aff27b --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/GpuDeviceDaoImpl.java @@ -0,0 +1,260 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu.dao; + +import com.cloud.gpu.GpuCardVO; +import com.cloud.gpu.GpuDeviceVO; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.utils.Pair; +import com.cloud.utils.db.Filter; +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.JoinBuilder; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; +import org.apache.cloudstack.gpu.GpuDevice; +import org.apache.commons.collections.CollectionUtils; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import javax.naming.ConfigurationException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Component +public class GpuDeviceDaoImpl extends GenericDaoBase implements GpuDeviceDao { + + private static final String IDS = "ids"; + private static final String HOST_ID = "hostId"; + private static final String VM_ID = "vmId"; + private static final String BUS_ADDRESS = "busAddress"; + private static final String CARD_ID = "cardId"; + private static final String VGPU_PROFILE_ID = "vgpuProfileId"; + private static final String PARENT_GPU_DEVICE_ID = "parentGpuDeviceId"; + private static final String STATE = "state"; + private static final String MANAGED_STATE = "managedState"; + private static final String TYPE = "type"; + private final SearchBuilder allFieldSearch; + private SearchBuilder devicesForAllocationSearch; + @Inject + private GpuCardDao gpuCardDao; + @Inject + private VgpuProfileDao vgpuProfileDao; + + public GpuDeviceDaoImpl() { + allFieldSearch = createSearchBuilder(); + allFieldSearch.and(IDS, allFieldSearch.entity().getId(), SearchCriteria.Op.IN); + allFieldSearch.and(HOST_ID, allFieldSearch.entity().getHostId(), SearchCriteria.Op.EQ); + allFieldSearch.and(CARD_ID, allFieldSearch.entity().getCardId(), SearchCriteria.Op.EQ); + allFieldSearch.and(BUS_ADDRESS, allFieldSearch.entity().getBusAddress(), SearchCriteria.Op.EQ); + allFieldSearch.and(STATE, allFieldSearch.entity().getState(), SearchCriteria.Op.EQ); + allFieldSearch.and(VGPU_PROFILE_ID, allFieldSearch.entity().getVgpuProfileId(), SearchCriteria.Op.EQ); + allFieldSearch.and(PARENT_GPU_DEVICE_ID, allFieldSearch.entity().getParentGpuDeviceId(), SearchCriteria.Op.EQ); + allFieldSearch.and(VM_ID, allFieldSearch.entity().getVmId(), SearchCriteria.Op.EQ); + allFieldSearch.done(); + + devicesForAllocationSearch = createSearchBuilder(); + devicesForAllocationSearch.and(HOST_ID, devicesForAllocationSearch.entity().getHostId(), SearchCriteria.Op.EQ); + devicesForAllocationSearch.and(VGPU_PROFILE_ID, devicesForAllocationSearch.entity().getVgpuProfileId(), SearchCriteria.Op.IN); + devicesForAllocationSearch.and(STATE, devicesForAllocationSearch.entity().getState(), SearchCriteria.Op.EQ); + devicesForAllocationSearch.and(MANAGED_STATE, devicesForAllocationSearch.entity().getManagedState(), SearchCriteria.Op.EQ); + devicesForAllocationSearch.and(TYPE, devicesForAllocationSearch.entity().getType(), SearchCriteria.Op.NEQ); + devicesForAllocationSearch.done(); + } + + @Override + public boolean configure(String name, Map params) throws ConfigurationException { + return super.configure(name, params); + } + + @Override + public List listByIds(List ids) { + if (CollectionUtils.isEmpty(ids)) { + return Collections.emptyList(); + } + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(IDS, ids.toArray()); + return listBy(sc); + } + + @Override + public GpuDeviceVO findByHostIdAndBusAddress(long hostId, String busAddress) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(HOST_ID, hostId); + sc.setParameters(BUS_ADDRESS, busAddress); + return findOneBy(sc); + } + + @Override + public List listByHostId(long hostId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(HOST_ID, hostId); + return listBy(sc); + } + + @Override + public List listByVmId(long vmId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(VM_ID, vmId); + return listBy(sc); + } + + @Override + public boolean isVgpuProfileInUse(long vgpuProfileId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(VGPU_PROFILE_ID, vgpuProfileId); + return getCount(sc) > 0; + } + + @Override + public boolean isGpuCardInUse(long cardId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(CARD_ID, cardId); + return getCount(sc) > 0; + } + + @Override + public List listByHostAndVm(Long hostId, long vmId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(HOST_ID, hostId); + sc.setParameters(VM_ID, vmId); + return search(sc, null); + } + + @Override + public List listDevicesForAllocation(Long hostId, Long vgpuProfileId) { + SearchCriteria sc = devicesForAllocationSearch.create(); + sc.setParameters(HOST_ID, hostId); + sc.setParameters(VGPU_PROFILE_ID, vgpuProfileId); + sc.setParameters(STATE, GpuDevice.State.Free); + sc.setParameters(MANAGED_STATE, GpuDevice.ManagedState.Managed); + sc.setParameters(TYPE, GpuDevice.DeviceType.VGPUOnly); + return search(sc, null); + } + + @Override + public Pair, Integer> searchAndCountGpuDevices(Long id, String keyword, Long hostId, Long vmId, + Long gpuCardId, Long vgpuProfileId, Long startIndex, Long pageSize) { + Filter searchFilter = new Filter(GpuDeviceVO.class, "id", true, startIndex, pageSize); + SearchBuilder sb = createSearchBuilder(); + + if (id != null) { + sb.and("id", sb.entity().getId(), SearchCriteria.Op.EQ); + } + if (hostId != null) { + sb.and("hostId", sb.entity().getHostId(), SearchCriteria.Op.EQ); + } + if (vmId != null) { + sb.and("vmId", sb.entity().getVmId(), SearchCriteria.Op.EQ); + } + if (gpuCardId != null) { + sb.and("cardId", sb.entity().getCardId(), SearchCriteria.Op.EQ); + } + if (vgpuProfileId != null) { + sb.and("vgpuProfileId", sb.entity().getVgpuProfileId(), SearchCriteria.Op.EQ); + } + if (keyword != null) { + SearchBuilder cardSb = gpuCardDao.createSearchBuilder(); + SearchBuilder profileSb = vgpuProfileDao.createSearchBuilder(); + sb.join("cardJoin", cardSb, sb.entity().getCardId(), cardSb.entity().getId(), JoinBuilder.JoinType.INNER); + sb.join("profileJoin", profileSb, sb.entity().getCardId(), profileSb.entity().getId(), + JoinBuilder.JoinType.INNER); + + sb.op("cardNameKeyword", cardSb.entity().getName(), SearchCriteria.Op.LIKE); + sb.or("cardNameKeyword", cardSb.entity().getVendorName(), SearchCriteria.Op.LIKE); + sb.or("cardNameKeyword", cardSb.entity().getDeviceName(), SearchCriteria.Op.LIKE); + + sb.op("profileNameKeyword", profileSb.entity().getName(), SearchCriteria.Op.LIKE); + sb.op("profileDescriptionKeyword", profileSb.entity().getDescription(), SearchCriteria.Op.LIKE); + sb.cp(); + } + + sb.done(); + + // Build search criteria + SearchCriteria sc = sb.create(); + if (id != null) { + sc.setParameters("id", id); + } + if (hostId != null) { + sc.setParameters("hostId", hostId); + } + if (vmId != null) { + sc.setParameters("vmId", vmId); + } + if (gpuCardId != null) { + sc.setParameters("cardId", gpuCardId); + } + if (vgpuProfileId != null) { + sc.setParameters("vgpuProfileId", vgpuProfileId); + } + + if (keyword != null) { + sc.setJoinParameters("cardJoin", "cardNameKeyword", "%" + keyword + "%"); + sc.setJoinParameters("cardJoin", "cardNameKeyword", "%" + keyword + "%"); + sc.setJoinParameters("cardJoin", "cardNameKeyword", "%" + keyword + "%"); + sc.setJoinParameters("profileJoin", "profileNameKeyword", "%" + keyword + "%"); + sc.setJoinParameters("profileJoin", "profileDescriptionKeyword", "%" + keyword + "%"); + } + + return searchAndCount(sc, searchFilter); + } + + @Override + public List getDistinctGpuCardIds() { + SearchBuilder sb = createSearchBuilder(); + sb.select(null, SearchCriteria.Func.DISTINCT, sb.entity().getCardId()); + sb.done(); + SearchCriteria sc = sb.create(); + + List gpuDevices = listBy(sc); + if (CollectionUtils.isEmpty(gpuDevices)) { + return Collections.emptyList(); + } + + return gpuDevices.stream() + .map(GpuDeviceVO::getCardId) + .distinct() + .collect(Collectors.toList()); + } + + @Override + public List getDistinctVgpuProfileIds() { + SearchBuilder sb = createSearchBuilder(); + sb.select(null, SearchCriteria.Func.DISTINCT, sb.entity().getVgpuProfileId()); + sb.done(); + SearchCriteria sc = sb.create(); + + List gpuDevices = listBy(sc); + if (CollectionUtils.isEmpty(gpuDevices)) { + return Collections.emptyList(); + } + + return gpuDevices.stream() + .map(GpuDeviceVO::getVgpuProfileId) + .distinct() + .collect(Collectors.toList()); + } + + @Override + public List listByParentGpuDeviceId(Long parentGpuDeviceId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters(PARENT_GPU_DEVICE_ID, parentGpuDeviceId); + return listBy(sc); + } +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDao.java b/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDao.java index 8e4f2f742ac5..99e336175393 100644 --- a/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDao.java +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDao.java @@ -19,6 +19,7 @@ import java.util.List; import com.cloud.gpu.HostGpuGroupsVO; +import com.cloud.utils.Pair; import com.cloud.utils.db.GenericDao; public interface HostGpuGroupsDao extends GenericDao { @@ -57,4 +58,15 @@ public interface HostGpuGroupsDao extends GenericDao { */ void persist(long hostId, List gpuGroups); + + /** + * Returns max and remaining GPU capacity + * + * @param dcId + * @param podId + * @param clusterId + * @param hostId + * @return Pair containing max GPU capacity and remaining GPU capacity + */ + Pair getGpuStats(Long dcId, Long podId, Long clusterId, Long hostId); } diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDaoImpl.java b/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDaoImpl.java index 30535c7e27d5..343b144597c5 100644 --- a/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/HostGpuGroupsDaoImpl.java @@ -16,9 +16,16 @@ // under the License. package com.cloud.gpu.dao; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; +import com.cloud.utils.Pair; +import com.cloud.utils.db.TransactionLegacy; +import com.cloud.utils.exception.CloudRuntimeException; import org.springframework.stereotype.Component; import com.cloud.gpu.HostGpuGroupsVO; @@ -87,4 +94,75 @@ public void deleteGpuEntries(long hostId) { sc.setParameters("hostId", hostId); remove(sc); } + + @Override + public Pair getGpuStats(Long dcId, Long podId, Long clusterId, Long hostId) { + TransactionLegacy txn = TransactionLegacy.currentTxn(); + Pair result = null; + List resourceIdList = new ArrayList<>(); + String query = getStatsQuery(resourceIdList, dcId, podId, clusterId, hostId); + + try { + PreparedStatement pstmt = txn.prepareAutoCloseStatement(query); + for (int i = 0; i < resourceIdList.size(); i++) { + pstmt.setLong(1 + i, resourceIdList.get(i)); + } + + ResultSet rs = pstmt.executeQuery(); + while (rs.next()) { + result = new Pair<>(rs.getLong(1), rs.getLong(2)); + } + return result; + } catch (SQLException e) { + throw new CloudRuntimeException("Error while fetching GPU stats: " + e.getMessage(), e); + } catch (Throwable e) { + throw new CloudRuntimeException("Caught: " + query, e); + } + } + + private String getStatsQuery(List resourceIdList, Long dcId, Long podId, Long clusterId, Long hostId) { + StringBuilder query = new StringBuilder("SELECT SUM(max_capacity), SUM(remaining_capacity)" + + "FROM vgpu_types " + + "WHERE" + + " gpu_group_id IN (" + + " SELECT" + + " host_gpu_groups.id" + + " FROM" + + " host_gpu_groups" + + " INNER JOIN host ON host.id = host_gpu_groups.host_id "); + if (dcId != null) { + query.append("WHERE host.data_center_id = ? "); + resourceIdList.add(dcId); + } + + if (podId != null) { + if (resourceIdList.isEmpty()) { + query.append("WHERE "); + } else { + query.append("AND "); + } + query.append(" host.pod_id = ? "); + resourceIdList.add(podId); + } + if (clusterId != null) { + if (resourceIdList.isEmpty()) { + query.append("WHERE "); + } else { + query.append("AND "); + } + query.append(" host.cluster_id = ? "); + resourceIdList.add(clusterId); + } + if (hostId != null) { + if (resourceIdList.isEmpty()) { + query.append("WHERE "); + } else { + query.append("AND "); + } + query.append(" host.id = ? "); + resourceIdList.add(hostId); + } + query.append(" )"); + return query.toString(); + } } diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/VGPUTypesDaoImpl.java b/engine/schema/src/main/java/com/cloud/gpu/dao/VGPUTypesDaoImpl.java index edc5e1f67c86..524feed24679 100644 --- a/engine/schema/src/main/java/com/cloud/gpu/dao/VGPUTypesDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/VGPUTypesDaoImpl.java @@ -16,19 +16,6 @@ //under the License. package com.cloud.gpu.dao; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map.Entry; - -import javax.inject.Inject; - -import org.springframework.stereotype.Component; - import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.gpu.HostGpuGroupsVO; import com.cloud.gpu.VGPUTypesVO; @@ -37,30 +24,42 @@ import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.TransactionLegacy; import com.cloud.utils.exception.CloudRuntimeException; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; @Component public class VGPUTypesDaoImpl extends GenericDaoBase implements VGPUTypesDao { - private final SearchBuilder _searchByGroupId; - private final SearchBuilder _searchByGroupIdVGPUType; - - @Inject protected HostGpuGroupsDao _hostGpuGroupsDao; + @Inject + protected HostGpuGroupsDao hostGpuGroupsDao; private static final String LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES = "SELECT host_gpu_groups.group_name, vgpu_type, max_vgpu_per_pgpu, SUM(remaining_capacity) AS remaining_capacity, SUM(max_capacity) AS total_capacity FROM" + - " `cloud`.`vgpu_types` INNER JOIN `cloud`.`host_gpu_groups` ON vgpu_types.gpu_group_id = host_gpu_groups.id INNER JOIN `cloud`.`host`" + - " ON host_gpu_groups.host_id = host.id WHERE host.type = 'Routing' AND host.data_center_id = ?"; + " `cloud`.`vgpu_types` INNER JOIN `cloud`.`host_gpu_groups` ON vgpu_types.gpu_group_id = host_gpu_groups.id INNER JOIN `cloud`.`host`" + + " ON host_gpu_groups.host_id = host.id WHERE host.type = 'Routing' AND vgpu_types.max_capacity > 0 AND host.data_center_id = ?"; + + private final SearchBuilder searchByGroupId; + private final SearchBuilder searchByGroupIdVGPUType; public VGPUTypesDaoImpl() { - _searchByGroupId = createSearchBuilder(); - _searchByGroupId.and("groupId", _searchByGroupId.entity().getGpuGroupId(), SearchCriteria.Op.EQ); - _searchByGroupId.done(); + searchByGroupId = createSearchBuilder(); + searchByGroupId.and("groupId", searchByGroupId.entity().getGpuGroupId(), SearchCriteria.Op.EQ); + searchByGroupId.done(); - _searchByGroupIdVGPUType = createSearchBuilder(); - _searchByGroupIdVGPUType.and("groupId", _searchByGroupIdVGPUType.entity().getGpuGroupId(), SearchCriteria.Op.EQ); - _searchByGroupIdVGPUType.and("vgpuType", _searchByGroupIdVGPUType.entity().getVgpuType(), SearchCriteria.Op.EQ); - _searchByGroupIdVGPUType.done(); + searchByGroupIdVGPUType = createSearchBuilder(); + searchByGroupIdVGPUType.and("groupId", searchByGroupIdVGPUType.entity().getGpuGroupId(), SearchCriteria.Op.EQ); + searchByGroupIdVGPUType.and("vgpuType", searchByGroupIdVGPUType.entity().getVgpuType(), SearchCriteria.Op.EQ); + searchByGroupIdVGPUType.done(); } @Override @@ -83,7 +82,7 @@ public List listGPUCapacities(Long dcId, Long podId, Long cluster finalQuery.append(" AND host.cluster_id = ?"); resourceIdList.add(clusterId); } - finalQuery.append(" GROUP BY host_gpu_groups.group_name, vgpu_type"); + finalQuery.append(" GROUP BY host_gpu_groups.group_name, vgpu_type, max_vgpu_per_pgpu"); try { pstmt = txn.prepareAutoCloseStatement(finalQuery.toString()); @@ -106,14 +105,14 @@ public List listGPUCapacities(Long dcId, Long podId, Long cluster @Override public List listByGroupId(long groupId) { - SearchCriteria sc = _searchByGroupId.create(); + SearchCriteria sc = searchByGroupId.create(); sc.setParameters("groupId", groupId); return listBy(sc); } @Override public VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType) { - SearchCriteria sc = _searchByGroupIdVGPUType.create(); + SearchCriteria sc = searchByGroupIdVGPUType.create(); sc.setParameters("groupId", groupId); sc.setParameters("vgpuType", vgpuType); return findOneBy(sc); @@ -124,7 +123,7 @@ public void persist(long hostId, HashMap> Iterator>> it1 = groupDetails.entrySet().iterator(); while (it1.hasNext()) { Entry> entry = it1.next(); - HostGpuGroupsVO gpuGroup = _hostGpuGroupsDao.findByHostIdGroupName(hostId, entry.getKey()); + HostGpuGroupsVO gpuGroup = hostGpuGroupsDao.findByHostIdGroupName(hostId, entry.getKey()); HashMap values = entry.getValue(); Iterator> it2 = values.entrySet().iterator(); while (it2.hasNext()) { diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/VgpuProfileDao.java b/engine/schema/src/main/java/com/cloud/gpu/dao/VgpuProfileDao.java new file mode 100644 index 000000000000..2628f1851f22 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/VgpuProfileDao.java @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu.dao; + +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.utils.Pair; +import com.cloud.utils.db.GenericDao; + +import java.util.List; + +public interface VgpuProfileDao extends GenericDao { + + VgpuProfileVO findByNameAndCardId(String name, long cardId); + + int removeByCardId(long cardId); + + Pair, Integer> searchAndCountVgpuProfiles(Long id, String name, String keyword, Long gpuCardId, + boolean activeOnly, Long startIndex, Long pageSize); +} diff --git a/engine/schema/src/main/java/com/cloud/gpu/dao/VgpuProfileDaoImpl.java b/engine/schema/src/main/java/com/cloud/gpu/dao/VgpuProfileDaoImpl.java new file mode 100644 index 000000000000..11dd7edb30d5 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/gpu/dao/VgpuProfileDaoImpl.java @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.gpu.dao; + +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.utils.Pair; +import com.cloud.utils.db.Filter; +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import java.util.List; + +@Component +public class VgpuProfileDaoImpl extends GenericDaoBase implements VgpuProfileDao { + + private final SearchBuilder allFieldSearch; + + @Inject + private GpuDeviceDao gpuDeviceDao; + + public VgpuProfileDaoImpl() { + allFieldSearch = createSearchBuilder(); + allFieldSearch.and("name", allFieldSearch.entity().getName(), SearchCriteria.Op.EQ); + allFieldSearch.and("cardId", allFieldSearch.entity().getCardId(), SearchCriteria.Op.IN); + allFieldSearch.done(); + } + + @Override + public VgpuProfileVO findByNameAndCardId(String name, long cardId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters("name", name); + sc.setParameters("cardId", cardId); + return findOneBy(sc); + } + + @Override + public int removeByCardId(long cardId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters("cardId", cardId); + return remove(sc); + } + + @Override + public Pair, Integer> searchAndCountVgpuProfiles(Long id, String name, String keyword, + Long gpuCardId, boolean activeOnly, Long startIndex, Long pageSize) { + Filter searchFilter = new Filter(VgpuProfileVO.class, "id", true, startIndex, pageSize); + SearchBuilder sb = createSearchBuilder(); + + if (id != null) { + sb.and("id", sb.entity().getId(), SearchCriteria.Op.EQ); + } + if (name != null) { + sb.and("name", sb.entity().getName(), SearchCriteria.Op.EQ); + } + if (keyword != null) { + sb.and("keywordName", sb.entity().getName(), SearchCriteria.Op.LIKE); + sb.and("keywordDescription", sb.entity().getDescription(), SearchCriteria.Op.LIKE); + } + if (gpuCardId != null) { + sb.and("cardId", sb.entity().getCardId(), SearchCriteria.Op.EQ); + } + if (activeOnly) { + sb.and("ids", sb.entity().getId(), SearchCriteria.Op.IN); + } + sb.done(); + + // Build search criteria + SearchCriteria sc = sb.create(); + if (id != null) { + sc.setParameters("id", id); + } + if (name != null) { + sc.setParameters("name", name); + } + if (keyword != null) { + sc.setParameters("keywordName", "%" + keyword + "%"); + sc.setParameters("keywordDescription", "%" + keyword + "%"); + } + if (gpuCardId != null) { + sc.setParameters("cardId", gpuCardId); + } + + if (activeOnly) { + List vgpuProfileIds = gpuDeviceDao.getDistinctVgpuProfileIds(); + if (vgpuProfileIds.isEmpty()) { + return new Pair<>(List.of(), 0); + } + sc.setParameters("ids", vgpuProfileIds.toArray()); + } + + return searchAndCount(sc, searchFilter); + } +} diff --git a/engine/schema/src/main/java/com/cloud/service/ServiceOfferingVO.java b/engine/schema/src/main/java/com/cloud/service/ServiceOfferingVO.java index 7f5c1a7afa19..cfe8049f5b2c 100644 --- a/engine/schema/src/main/java/com/cloud/service/ServiceOfferingVO.java +++ b/engine/schema/src/main/java/com/cloud/service/ServiceOfferingVO.java @@ -124,6 +124,15 @@ public class ServiceOfferingVO implements ServiceOffering { @Column(name = "dynamic_scaling_enabled") private boolean dynamicScalingEnabled = true; + @Column(name = "vgpu_profile_id") + private Long vgpuProfileId; + + @Column(name = "gpu_count") + private Integer gpuCount; + + @Column(name = "gpu_display") + private Boolean gpuDisplay; + // This is a delayed load value. If the value is null, // then this field has not been loaded yet. // Call service offering dao to load it. @@ -198,6 +207,8 @@ public ServiceOfferingVO(ServiceOfferingVO offering) { systemUse = offering.isSystemUse(); dynamicScalingEnabled = offering.isDynamicScalingEnabled(); diskOfferingStrictness = offering.diskOfferingStrictness; + vgpuProfileId = offering.vgpuProfileId; + gpuCount = offering.gpuCount; } @Override @@ -445,4 +456,30 @@ public Boolean getDiskOfferingStrictness() { public void setDiskOfferingStrictness(boolean diskOfferingStrictness) { this.diskOfferingStrictness = diskOfferingStrictness; } + + @Override + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public void setVgpuProfileId(Long vgpuProfileId) { + this.vgpuProfileId = vgpuProfileId; + } + + @Override + public Integer getGpuCount() { + return gpuCount; + } + + public void setGpuCount(Integer gpuCount) { + this.gpuCount = gpuCount; + } + + public Boolean getGpuDisplay() { + return gpuDisplay; + } + + public void setGpuDisplay(Boolean gpuDisplay) { + this.gpuDisplay = gpuDisplay; + } } diff --git a/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDao.java b/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDao.java index ceb5b0a4fc1c..d3bab4fcbe27 100644 --- a/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDao.java +++ b/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDao.java @@ -22,6 +22,7 @@ import com.cloud.service.ServiceOfferingVO; import com.cloud.storage.Storage.ProvisioningType; import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.SearchBuilder; import com.cloud.vm.VirtualMachine; /* @@ -57,4 +58,6 @@ List createSystemServiceOfferings(String name, String uniqueN ServiceOfferingVO findServiceOfferingByComputeOnlyDiskOffering(long diskOfferingId, boolean includingRemoved); List listIdsByHostTag(String tag); + + void addCheckForGpuEnabled(SearchBuilder serviceOfferingSearch, Boolean gpuEnabled); } diff --git a/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDaoImpl.java b/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDaoImpl.java index 2710aea3b9d1..f360770ad686 100644 --- a/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/service/dao/ServiceOfferingDaoImpl.java @@ -312,4 +312,13 @@ public List listIdsByHostTag(String tag) { sc.setParameters("tagEndLike", "%," + tag); return customSearch(sc, null); } + + @Override + public void addCheckForGpuEnabled(SearchBuilder serviceOfferingSearch, Boolean gpuEnabled) { + if (gpuEnabled) { + serviceOfferingSearch.and("gpuEnabled", serviceOfferingSearch.entity().getVgpuProfileId(), SearchCriteria.Op.NNULL); + } else { + serviceOfferingSearch.and("gpuDisabled", serviceOfferingSearch.entity().getVgpuProfileId(), SearchCriteria.Op.NULL); + } + } } diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/UserVmDaoImpl.java b/engine/schema/src/main/java/com/cloud/vm/dao/UserVmDaoImpl.java index fc0322c25e79..41bcb3155e54 100644 --- a/engine/schema/src/main/java/com/cloud/vm/dao/UserVmDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/UserVmDaoImpl.java @@ -821,6 +821,7 @@ public UserVmVO persist(UserVmVO entity) { reservationDao.setResourceId(Resource.ResourceType.user_vm, userVM.getId()); reservationDao.setResourceId(Resource.ResourceType.cpu, userVM.getId()); reservationDao.setResourceId(Resource.ResourceType.memory, userVM.getId()); + reservationDao.setResourceId(Resource.ResourceType.gpu, userVM.getId()); return userVM; }); } diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java index ef10af63bae0..dc0391f71fd9 100755 --- a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -131,12 +131,22 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem private static final String ORDER_HOSTS_NUMBER_OF_VMS_FOR_ACCOUNT_PART2 = " GROUP BY host.id ORDER BY 2 ASC "; - private static final String COUNT_VMS_BASED_ON_VGPU_TYPES1 = + private static final String COUNT_VMS_BASED_ON_VGPU_TYPES1_LEGACY = "SELECT pci, type, SUM(vmcount) FROM (SELECT MAX(IF(offering.name = 'pciDevice',value,'')) AS pci, MAX(IF(offering.name = 'vgpuType', value,'')) " + "AS type, COUNT(DISTINCT vm.id) AS vmcount FROM service_offering_details offering INNER JOIN vm_instance vm ON offering.service_offering_id = vm.service_offering_id " + "INNER JOIN `cloud`.`host` ON vm.host_id = host.id WHERE vm.state = 'Running' AND host.data_center_id = ? "; + private static final String COUNT_VMS_BASED_ON_VGPU_TYPES2_LEGACY = + "GROUP BY vm.service_offering_id) results GROUP BY pci, type"; + + private static final String COUNT_VMS_BASED_ON_VGPU_TYPES1 = + "SELECT CONCAT(gpu_card.vendor_name, ' ', gpu_card.device_name), vgpu_profile.name, COUNT(gpu_device.vm_id) " + + "FROM `cloud`.`gpu_device` " + + "INNER JOIN `cloud`.`host` ON gpu_device.host_id = host.id " + + "INNER JOIN `cloud`.`gpu_card` ON gpu_device.card_id = gpu_card.id " + + "INNER JOIN `cloud`.`vgpu_profile` ON vgpu_profile.id = gpu_device.vgpu_profile_id " + + "WHERE vm_id IS NOT NULL AND host.data_center_id = ? "; private static final String COUNT_VMS_BASED_ON_VGPU_TYPES2 = - "GROUP BY offering.service_offering_id) results GROUP BY pci, type"; + "GROUP BY gpu_card.name, vgpu_profile.name"; private static final String UPDATE_SYSTEM_VM_TEMPLATE_ID_FOR_HYPERVISOR = "UPDATE `cloud`.`vm_instance` SET vm_template_id = ? WHERE type <> 'User' AND hypervisor_type = ? AND removed is NULL"; @@ -794,40 +804,52 @@ public List listHostIdsByVmCount(long dcId, Long podId, Long clusterId, lo @Override public HashMap countVgpuVMs(Long dcId, Long podId, Long clusterId) { + StringBuilder finalQueryLegacy = new StringBuilder(); StringBuilder finalQuery = new StringBuilder(); TransactionLegacy txn = TransactionLegacy.currentTxn(); + PreparedStatement pstmtLegacy = null; PreparedStatement pstmt = null; List resourceIdList = new ArrayList(); HashMap result = new HashMap(); resourceIdList.add(dcId); + finalQueryLegacy.append(COUNT_VMS_BASED_ON_VGPU_TYPES1_LEGACY); finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES1); if (podId != null) { + finalQueryLegacy.append("AND host.pod_id = ? "); finalQuery.append("AND host.pod_id = ? "); resourceIdList.add(podId); } if (clusterId != null) { + finalQueryLegacy.append("AND host.cluster_id = ? "); finalQuery.append("AND host.cluster_id = ? "); resourceIdList.add(clusterId); } + finalQueryLegacy.append(COUNT_VMS_BASED_ON_VGPU_TYPES2_LEGACY); finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES2); try { + pstmtLegacy = txn.prepareAutoCloseStatement(finalQueryLegacy.toString()); pstmt = txn.prepareAutoCloseStatement(finalQuery.toString()); for (int i = 0; i < resourceIdList.size(); i++) { + pstmtLegacy.setLong(1 + i, resourceIdList.get(i)); pstmt.setLong(1 + i, resourceIdList.get(i)); } - ResultSet rs = pstmt.executeQuery(); + ResultSet rs = pstmtLegacy.executeQuery(); + while (rs.next()) { + result.put(rs.getString(1).concat(rs.getString(2)), rs.getLong(3)); + } + rs = pstmt.executeQuery(); while (rs.next()) { result.put(rs.getString(1).concat(rs.getString(2)), rs.getLong(3)); } return result; } catch (SQLException e) { - throw new CloudRuntimeException("DB Exception on: " + finalQuery, e); + throw new CloudRuntimeException("DB Exception on: " + finalQueryLegacy, e); } catch (Throwable e) { - throw new CloudRuntimeException("Caught: " + finalQuery, e); + throw new CloudRuntimeException("Caught: " + finalQueryLegacy, e); } } diff --git a/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml b/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml index 1e144dde8f15..4d75c35c9337 100644 --- a/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml +++ b/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml @@ -305,4 +305,7 @@ + + + diff --git a/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql b/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql index 275549e5eee3..103b9363d6c5 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql @@ -238,6 +238,68 @@ CREATE TABLE IF NOT EXISTS `cloud`.`gui_themes_details` ( CONSTRAINT `fk_gui_themes_details__gui_theme_id` FOREIGN KEY (`gui_theme_id`) REFERENCES `gui_themes`(`id`) ); +-- Create the GPU card table to hold the GPU card information +CREATE TABLE IF NOT EXISTS `cloud`.`gpu_card` ( + `id` bigint unsigned NOT NULL AUTO_INCREMENT COMMENT 'id', + `uuid` varchar(40) NOT NULL UNIQUE, + `device_id` varchar(4) NOT NULL COMMENT 'device id of the GPU card', + `device_name` varchar(255) NOT NULL COMMENT 'device name of the GPU card', + `name` varchar(255) NOT NULL COMMENT 'name of the GPU card', + `vendor_name` varchar(255) NOT NULL COMMENT 'vendor name of the GPU card', + `vendor_id` varchar(4) NOT NULL COMMENT 'vendor id of the GPU card', + `created` datetime NOT NULL COMMENT 'date created', + PRIMARY KEY (`id`), + UNIQUE KEY (`vendor_id`, `device_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='GPU cards supported by CloudStack'; + +-- Create the vGPU profile table to hold the vGPU profile information. +CREATE TABLE IF NOT EXISTS `cloud`.`vgpu_profile` ( + `id` bigint unsigned NOT NULL AUTO_INCREMENT COMMENT 'id', + `uuid` varchar(40) NOT NULL UNIQUE, + `name` varchar(255) NOT NULL COMMENT 'name of the vGPU profile', + `description` varchar(255) DEFAULT NULL COMMENT 'description of the vGPU profile', + `card_id` bigint unsigned NOT NULL COMMENT 'id of the GPU card', + `video_ram` bigint unsigned DEFAULT NULL COMMENT 'video RAM of the vGPU profile', + `max_heads` bigint unsigned DEFAULT NULL COMMENT 'maximum number of heads of the vGPU profile', + `max_resolution_x` bigint unsigned DEFAULT NULL COMMENT 'maximum resolution x of the vGPU profile', + `max_resolution_y` bigint unsigned DEFAULT NULL COMMENT 'maximum resolution y of the vGPU profile', + `max_vgpu_per_pgpu` bigint unsigned DEFAULT NULL COMMENT 'Maximum number of vGPUs per physical GPU', + `created` datetime NOT NULL COMMENT 'date created', + PRIMARY KEY (`id`), + UNIQUE KEY (`name`, `card_id`), + CONSTRAINT `fk_vgpu_profile_card_id` FOREIGN KEY (`card_id`) REFERENCES `gpu_card`(`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='vGPU profiles supported by CloudStack'; + +-- Create the GPU device table to hold the GPU device information on different hosts +CREATE TABLE IF NOT EXISTS `cloud`.`gpu_device` ( + `id` bigint unsigned NOT NULL AUTO_INCREMENT COMMENT 'id', + `uuid` varchar(40) NOT NULL UNIQUE, + `card_id` bigint unsigned NOT NULL COMMENT 'id of the GPU card', + `vgpu_profile_id` bigint unsigned DEFAULT NULL COMMENT 'id of the vGPU profile.', + `bus_address` varchar(255) NOT NULL COMMENT 'PCI bus address of the GPU device', + `type` varchar(32) NOT NULL COMMENT 'type of the GPU device. PCI or MDEV', + `host_id` bigint unsigned NOT NULL COMMENT 'id of the host where GPU is installed', + `vm_id` bigint unsigned DEFAULT NULL COMMENT 'id of the VM using this GPU device', + `numa_node` varchar(255) DEFAULT NULL COMMENT 'NUMA node of the GPU device', + `pci_root` varchar(255) DEFAULT NULL COMMENT 'PCI root of the GPU device', + `parent_gpu_device_id` bigint unsigned DEFAULT NULL COMMENT 'id of the parent GPU device. null if it is a physical GPU device and for vGPUs points to the actual GPU', + `state` varchar(32) NOT NULL COMMENT 'state of the GPU device', + `managed_state` varchar(32) NOT NULL COMMENT 'resource state of the GPU device', + PRIMARY KEY (`id`), + UNIQUE KEY (`bus_address`, `host_id`), + CONSTRAINT `fk_gpu_devices__card_id` FOREIGN KEY (`card_id`) REFERENCES `gpu_card` (`id`) ON DELETE CASCADE, + CONSTRAINT `fk_gpu_devices__host_id` FOREIGN KEY (`host_id`) REFERENCES `host` (`id`) ON DELETE CASCADE, + CONSTRAINT `fk_gpu_devices__vm_id` FOREIGN KEY (`vm_id`) REFERENCES `vm_instance` (`id`) ON DELETE SET NULL, + CONSTRAINT `fk_gpu_devices__parent_gpu_device_id` FOREIGN KEY (`parent_gpu_device_id`) REFERENCES `gpu_device` (`id`) ON DELETE SET NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='GPU devices installed on hosts'; + +-- Add references to GPU tables +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.service_offering', 'vgpu_profile_id', 'bigint unsigned DEFAULT NULL COMMENT "vgpu profile ID"'); +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.service_offering', 'gpu_count', 'int unsigned DEFAULT NULL COMMENT "number of GPUs"'); +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.service_offering', 'gpu_display', 'boolean DEFAULT false COMMENT "enable GPU display"'); +CALL `cloud`.`IDEMPOTENT_DROP_FOREIGN_KEY`('cloud.service_offering','fk_service_offering__vgpu_profile_id'); +CALL `cloud`.`IDEMPOTENT_ADD_FOREIGN_KEY`('cloud.service_offering', 'fk_service_offering__vgpu_profile_id', '(vgpu_profile_id)', '`vgpu_profile`(`id`)'); + -- Netris Plugin CREATE TABLE `cloud`.`netris_providers` ( `id` bigint unsigned NOT NULL auto_increment COMMENT 'id', diff --git a/engine/schema/src/main/resources/META-INF/db/views/cloud.account_view.sql b/engine/schema/src/main/resources/META-INF/db/views/cloud.account_view.sql index 6092fe8e845a..edc164c40cbd 100644 --- a/engine/schema/src/main/resources/META-INF/db/views/cloud.account_view.sql +++ b/engine/schema/src/main/resources/META-INF/db/views/cloud.account_view.sql @@ -64,6 +64,8 @@ select `cpucount`.`count` AS `cpuTotal`, `memorylimit`.`max` AS `memoryLimit`, `memorycount`.`count` AS `memoryTotal`, + `gpulimit`.`max` AS `gpuLimit`, + `gpucount`.`count` AS `gpuTotal`, `primary_storage_limit`.`max` AS `primaryStorageLimit`, `primary_storage_count`.`count` AS `primaryStorageTotal`, `secondary_storage_limit`.`max` AS `secondaryStorageLimit`, @@ -156,6 +158,12 @@ from `cloud`.`resource_count` memorycount ON account.id = memorycount.account_id and memorycount.type = 'memory' and memorycount.tag IS NULL left join + `cloud`.`resource_limit` gpulimit ON account.id = gpulimit.account_id + and gpulimit.type = 'gpu' and gpulimit.tag IS NULL + left join + `cloud`.`resource_count` gpucount ON account.id = gpucount.account_id + and gpucount.type = 'gpu' and gpucount.tag IS NULL + left join `cloud`.`resource_limit` primary_storage_limit ON account.id = primary_storage_limit.account_id and primary_storage_limit.type = 'primary_storage' and primary_storage_limit.tag IS NULL left join diff --git a/engine/schema/src/main/resources/META-INF/db/views/cloud.domain_view.sql b/engine/schema/src/main/resources/META-INF/db/views/cloud.domain_view.sql index c9f7bfc51e43..14fd87536aa1 100644 --- a/engine/schema/src/main/resources/META-INF/db/views/cloud.domain_view.sql +++ b/engine/schema/src/main/resources/META-INF/db/views/cloud.domain_view.sql @@ -55,6 +55,8 @@ select `cpucount`.`count` AS `cpuTotal`, `memorylimit`.`max` AS `memoryLimit`, `memorycount`.`count` AS `memoryTotal`, + `gpulimit`.`max` AS `gpuLimit`, + `gpucount`.`count` AS `gpuTotal`, `primary_storage_limit`.`max` AS `primaryStorageLimit`, `primary_storage_count`.`count` AS `primaryStorageTotal`, `secondary_storage_limit`.`max` AS `secondaryStorageLimit`, @@ -130,6 +132,12 @@ from `cloud`.`resource_count` memorycount ON domain.id = memorycount.domain_id and memorycount.type = 'memory' and memorycount.tag IS NULL left join + `cloud`.`resource_limit` gpulimit ON domain.id = gpulimit.domain_id + and gpulimit.type = 'gpu' and gpulimit.tag IS NULL + left join + `cloud`.`resource_count` gpucount ON domain.id = gpucount.domain_id + and gpucount.type = 'gpu' and gpucount.tag IS NULL + left join `cloud`.`resource_limit` primary_storage_limit ON domain.id = primary_storage_limit.domain_id and primary_storage_limit.type = 'primary_storage' and primary_storage_limit.tag IS NULL left join diff --git a/engine/schema/src/main/resources/META-INF/db/views/cloud.service_offering_view.sql b/engine/schema/src/main/resources/META-INF/db/views/cloud.service_offering_view.sql index 18e6231ef89a..eb987af3ffb6 100644 --- a/engine/schema/src/main/resources/META-INF/db/views/cloud.service_offering_view.sql +++ b/engine/schema/src/main/resources/META-INF/db/views/cloud.service_offering_view.sql @@ -73,6 +73,18 @@ SELECT `vsphere_storage_policy`.`value` AS `vsphere_storage_policy`, `lease_duration_details`.`value` AS `lease_duration`, `lease_expiry_action_details`.`value` AS `lease_expiry_action`, + `gpu_card`.`id` AS `gpu_card_id`, + `gpu_card`.`uuid` AS `gpu_card_uuid`, + `gpu_card`.`name` AS `gpu_card_name`, + `vgpu_profile`.`id` AS `vgpu_profile_id`, + `vgpu_profile`.`uuid` AS `vgpu_profile_uuid`, + `vgpu_profile`.`name` AS `vgpu_profile_name`, + `vgpu_profile`.`video_ram` AS `vgpu_profile_video_ram`, + `vgpu_profile`.`max_heads` AS `vgpu_profile_max_heads`, + `vgpu_profile`.`max_resolution_x` AS `vgpu_profile_max_resolution_x`, + `vgpu_profile`.`max_resolution_y` AS `vgpu_profile_max_resolution_y`, + `service_offering`.`gpu_count` AS `gpu_count`, + `service_offering`.`gpu_display` AS `gpu_display`, GROUP_CONCAT(DISTINCT(domain.id)) AS domain_id, GROUP_CONCAT(DISTINCT(domain.uuid)) AS domain_uuid, GROUP_CONCAT(DISTINCT(domain.name)) AS domain_name, @@ -89,6 +101,10 @@ FROM INNER JOIN `cloud`.`disk_offering` ON service_offering.disk_offering_id = disk_offering.id LEFT JOIN + `cloud`.`vgpu_profile` ON service_offering.vgpu_profile_id = vgpu_profile.id + LEFT JOIN + `cloud`.`gpu_card` ON vgpu_profile.card_id = gpu_card.id + LEFT JOIN `cloud`.`service_offering_details` AS `domain_details` ON `domain_details`.`service_offering_id` = `service_offering`.`id` AND `domain_details`.`name`='domainid' LEFT JOIN `cloud`.`domain` AS `domain` ON FIND_IN_SET(`domain`.`id`, `domain_details`.`value`) diff --git a/engine/schema/src/main/resources/META-INF/db/views/cloud.user_vm_view.sql b/engine/schema/src/main/resources/META-INF/db/views/cloud.user_vm_view.sql index 534579779c46..94bc8640fd54 100644 --- a/engine/schema/src/main/resources/META-INF/db/views/cloud.user_vm_view.sql +++ b/engine/schema/src/main/resources/META-INF/db/views/cloud.user_vm_view.sql @@ -103,6 +103,17 @@ SELECT `backup_offering`.`uuid` AS `backup_offering_uuid`, `backup_offering`.`id` AS `backup_offering_id`, `service_offering`.`name` AS `service_offering_name`, + `service_offering`.`vgpu_profile_id` AS `vgpu_profile_id`, + `vgpu_profile`.`uuid` AS `vgpu_profile_uuid`, + `vgpu_profile`.`name` AS `vgpu_profile_name`, + `vgpu_profile`.`video_ram` AS `vgpu_profile_video_ram`, + `vgpu_profile`.`max_heads` AS `vgpu_profile_max_heads`, + `vgpu_profile`.`max_resolution_x` AS `vgpu_profile_max_resolution_x`, + `vgpu_profile`.`max_resolution_y` AS `vgpu_profile_max_resolution_y`, + `gpu_card`.`id` AS `gpu_card_id`, + `gpu_card`.`uuid` AS `gpu_card_uuid`, + `gpu_card`.`name` AS `gpu_card_name`, + `service_offering`.`gpu_count` AS `gpu_count`, `disk_offering`.`name` AS `disk_offering_name`, `backup_offering`.`name` AS `backup_offering_name`, `storage_pool`.`id` AS `pool_id`, @@ -174,7 +185,7 @@ SELECT `lease_expiry_action`.`value` AS `lease_expiry_action`, `lease_action_execution`.`value` AS `lease_action_execution` FROM - (((((((((((((((((((((((((((((((((((`user_vm` + (((((((((((((((((((((((((((((((((((((`user_vm` JOIN `vm_instance` ON (((`vm_instance`.`id` = `user_vm`.`id`) AND ISNULL(`vm_instance`.`removed`)))) JOIN `account` ON ((`vm_instance`.`account_id` = `account`.`id`))) @@ -192,6 +203,8 @@ FROM LEFT JOIN `service_offering` ON ((`vm_instance`.`service_offering_id` = `service_offering`.`id`))) LEFT JOIN `disk_offering` `svc_disk_offering` ON ((`volumes`.`disk_offering_id` = `svc_disk_offering`.`id`))) LEFT JOIN `disk_offering` ON ((`volumes`.`disk_offering_id` = `disk_offering`.`id`))) + LEFT JOIN `vgpu_profile` ON ((`service_offering`.`vgpu_profile_id` = `vgpu_profile`.`id`))) + LEFT JOIN `gpu_card` ON ((`vgpu_profile`.`card_id` = `gpu_card`.`id`))) LEFT JOIN `backup_offering` ON ((`vm_instance`.`backup_offering_id` = `backup_offering`.`id`))) LEFT JOIN `storage_pool` ON ((`volumes`.`pool_id` = `storage_pool`.`id`))) LEFT JOIN `security_group_vm_map` ON ((`vm_instance`.`id` = `security_group_vm_map`.`instance_id`))) diff --git a/engine/schema/src/test/java/com/cloud/gpu/dao/GpuCardDaoImplTest.java b/engine/schema/src/test/java/com/cloud/gpu/dao/GpuCardDaoImplTest.java new file mode 100644 index 000000000000..e0a283add992 --- /dev/null +++ b/engine/schema/src/test/java/com/cloud/gpu/dao/GpuCardDaoImplTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.cloud.gpu.dao; + +import com.cloud.gpu.GpuCardVO; +import com.cloud.utils.db.SearchCriteria; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.InjectMocks; +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +@RunWith(MockitoJUnitRunner.class) +public class GpuCardDaoImplTest { + + @Spy + @InjectMocks + GpuCardDaoImpl gpuCardDaoImpl = new GpuCardDaoImpl(); + + @Test + public void findByVendorIdAndDeviceId() { + doReturn(mock(GpuCardVO.class)).when(gpuCardDaoImpl).findOneBy(any(SearchCriteria.class)); + + GpuCardVO gpuCard = gpuCardDaoImpl.findByVendorIdAndDeviceId("0d1a", "1a3b"); + Assert.assertNotNull("Expected non-null gpu card", gpuCard); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuCardDaoImpl).findOneBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", + "gpu_card.vendor_id = ? AND gpu_card.device_id = ?", + scCaptor.getValue().getWhereClause().trim()); + } +} diff --git a/engine/schema/src/test/java/com/cloud/gpu/dao/GpuDeviceDaoImplTest.java b/engine/schema/src/test/java/com/cloud/gpu/dao/GpuDeviceDaoImplTest.java new file mode 100644 index 000000000000..1780fbd3df38 --- /dev/null +++ b/engine/schema/src/test/java/com/cloud/gpu/dao/GpuDeviceDaoImplTest.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.cloud.gpu.dao; + +import com.cloud.gpu.GpuDeviceVO; +import com.cloud.utils.db.Filter; +import com.cloud.utils.db.SearchCriteria; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.InjectMocks; +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Collections; +import java.util.List; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class GpuDeviceDaoImplTest { + + @Spy + @InjectMocks + GpuDeviceDaoImpl gpuDeviceDao = new GpuDeviceDaoImpl(); + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void listByIds_emptyList() { + List devices = gpuDeviceDao.listByIds(null); + Assert.assertTrue("Expected empty list", devices.isEmpty()); + devices = gpuDeviceDao.listByIds(Collections.emptyList()); + Assert.assertTrue("Expected empty list", devices.isEmpty()); + } + + @Test + public void listByIds() { + doReturn(List.of(mock(GpuDeviceVO.class))).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List devices = gpuDeviceDao.listByIds(List.of(1L, 2L, 3L)); + + Assert.assertFalse("Expected non empty list", devices.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + SearchCriteria sc = scCaptor.getValue(); + Assert.assertEquals("Expected correct where clause", "gpu_device.id IN (?,?,?)", sc.getWhereClause().trim()); + } + + @Test + public void findByHostIdAndBusAddress() { + doReturn(mock(GpuDeviceVO.class)).when(gpuDeviceDao).findOneBy(any(SearchCriteria.class)); + + GpuDeviceVO device = gpuDeviceDao.findByHostIdAndBusAddress(1L, "0000:00:1f.6"); + + Assert.assertNotNull("Expected non-null device", device); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).findOneBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "gpu_device.host_id = ? AND gpu_device.bus_address = ?", + scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void listByHostId() { + doReturn(List.of(mock(GpuDeviceVO.class))).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List devices = gpuDeviceDao.listByHostId(1L); + + Assert.assertFalse("Expected non empty list", devices.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "gpu_device.host_id = ?", + scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void listByVmId() { + doReturn(List.of(mock(GpuDeviceVO.class))).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List devices = gpuDeviceDao.listByVmId(1L); + + Assert.assertFalse("Expected non empty list", devices.isEmpty()); + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + + Assert.assertEquals("Expected correct where clause", "gpu_device.vm_id = ?", + scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void isVgpuProfileInUse() { + doReturn(1).when(gpuDeviceDao).getCount(any(SearchCriteria.class)); + + boolean vgpuProfileInUse = gpuDeviceDao.isVgpuProfileInUse(1L); + + Assert.assertTrue("Expected vGPU profile to be in use", vgpuProfileInUse); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).getCount(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "gpu_device.vgpu_profile_id = ?", + scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void isGpuCardInUse() { + doReturn(1).when(gpuDeviceDao).getCount(any(SearchCriteria.class)); + + boolean vgpuProfileInUse = gpuDeviceDao.isGpuCardInUse(1L); + + Assert.assertTrue("Expected GPU Card to be in use", vgpuProfileInUse); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).getCount(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "gpu_device.card_id = ?", + scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void listByHostAndVm() { + doReturn(List.of(mock(GpuDeviceVO.class))).when(gpuDeviceDao).search(any(SearchCriteria.class), any()); + + List devices = gpuDeviceDao.listByHostAndVm(1L, 2L); + + Assert.assertFalse("Expected non empty list", devices.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(Filter.class); + verify(gpuDeviceDao).search(scCaptor.capture(), filterCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "gpu_device.host_id = ? AND gpu_device.vm_id = ?", + scCaptor.getValue().getWhereClause().trim()); + Assert.assertNull("Expected no filter", filterCaptor.getValue()); + } + + @Test + public void listDevicesForAllocation() { + doReturn(List.of(mock(GpuDeviceVO.class))).when(gpuDeviceDao).search(any(SearchCriteria.class), any()); + + List devices = gpuDeviceDao.listDevicesForAllocation(1L, 2L); + + Assert.assertFalse("Expected non empty list", devices.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(Filter.class); + verify(gpuDeviceDao).search(scCaptor.capture(), filterCaptor.capture()); + Assert.assertEquals("Expected correct where clause", + "gpu_device.host_id = ? AND gpu_device.vgpu_profile_id=? AND gpu_device.state = ? AND gpu_device" + + ".managed_state = ? AND gpu_device.type != ?", + scCaptor.getValue().getWhereClause().trim()); + Assert.assertNull("Expected no filter", filterCaptor.getValue()); + } + + @Test + public void searchAndCountGpuDevices() { + } + + @Test + public void getDistinctGpuCardIds_no_devices() { + doReturn(null).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List cardIds = gpuDeviceDao.getDistinctGpuCardIds(); + + Assert.assertTrue("Expected empty list", cardIds.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "", scCaptor.getValue().getWhereClause().trim()); + } + + + @Test + public void getDistinctGpuCardIds() { + GpuDeviceVO device1 = mock(GpuDeviceVO.class); + GpuDeviceVO device2 = mock(GpuDeviceVO.class); + GpuDeviceVO device3 = mock(GpuDeviceVO.class); + when(device1.getCardId()).thenReturn(1L); + when(device2.getCardId()).thenReturn(2L); + when(device3.getCardId()).thenReturn(1L); + + doReturn(List.of(device1, device2, device3)).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List cardIds = gpuDeviceDao.getDistinctGpuCardIds(); + + Assert.assertEquals("Expected 2 card IDs", 2, cardIds.size()); + + Assert.assertTrue("Expected card ID 1 in list", cardIds.contains(1L)); + Assert.assertTrue("Expected card ID 2 in list", cardIds.contains(2L)); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "", scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void getDistinctVgpuProfileIds_no_devices() { + doReturn(null).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List cardIds = gpuDeviceDao.getDistinctVgpuProfileIds(); + + Assert.assertTrue("Expected empty list", cardIds.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "", scCaptor.getValue().getWhereClause().trim()); + } + + + @Test + public void getDistinctVgpuProfileIds() { + GpuDeviceVO device1 = mock(GpuDeviceVO.class); + GpuDeviceVO device2 = mock(GpuDeviceVO.class); + GpuDeviceVO device3 = mock(GpuDeviceVO.class); + when(device1.getVgpuProfileId()).thenReturn(1L); + when(device2.getVgpuProfileId()).thenReturn(2L); + when(device3.getVgpuProfileId()).thenReturn(1L); + + doReturn(List.of(device1, device2, device3)).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List cardIds = gpuDeviceDao.getDistinctVgpuProfileIds(); + + Assert.assertEquals("Expected 2 VgpuProfile IDs", 2, cardIds.size()); + + Assert.assertTrue("Expected VgpuProfile ID 1 in list", cardIds.contains(1L)); + Assert.assertTrue("Expected VgpuProfile ID 2 in list", cardIds.contains(2L)); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "", scCaptor.getValue().getWhereClause().trim()); + } + + + @Test + public void listByParentGpuDeviceId() { + doReturn(List.of(mock(GpuDeviceVO.class))).when(gpuDeviceDao).listBy(any(SearchCriteria.class)); + + List devices = gpuDeviceDao.listByParentGpuDeviceId(1L); + + Assert.assertFalse("Expected non empty list", devices.isEmpty()); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(gpuDeviceDao).listBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "gpu_device.parent_gpu_device_id = ?", + scCaptor.getValue().getWhereClause().trim()); + } +} diff --git a/engine/schema/src/test/java/com/cloud/gpu/dao/VgpuProfileDaoImplTest.java b/engine/schema/src/test/java/com/cloud/gpu/dao/VgpuProfileDaoImplTest.java new file mode 100644 index 000000000000..cd7199d020f3 --- /dev/null +++ b/engine/schema/src/test/java/com/cloud/gpu/dao/VgpuProfileDaoImplTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.cloud.gpu.dao; + +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.utils.db.SearchCriteria; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.InjectMocks; +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +@RunWith(MockitoJUnitRunner.class) +public class VgpuProfileDaoImplTest { + + @Spy + @InjectMocks + VgpuProfileDaoImpl vgpuProfileDaoImpl = new VgpuProfileDaoImpl(); + + @Test + public void findByNameAndCardId() { + doReturn(mock(VgpuProfileVO.class)).when(vgpuProfileDaoImpl).findOneBy(any(SearchCriteria.class)); + + VgpuProfileVO vgpuProfile = vgpuProfileDaoImpl.findByNameAndCardId("test-profile", 1L); + Assert.assertNotNull("Expected non-null vgpu profile", vgpuProfile); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(vgpuProfileDaoImpl).findOneBy(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", + "vgpu_profile.name = ? AND vgpu_profile.card_id=?", + scCaptor.getValue().getWhereClause().trim()); + } + + @Test + public void removeByCardId() { + doReturn(1).when(vgpuProfileDaoImpl).remove(any(SearchCriteria.class)); + + int removed = vgpuProfileDaoImpl.removeByCardId(123L); + Assert.assertEquals("Expected one vgpu profile removed", 1, removed); + + ArgumentCaptor scCaptor = ArgumentCaptor.forClass(SearchCriteria.class); + verify(vgpuProfileDaoImpl).remove(scCaptor.capture()); + Assert.assertEquals("Expected correct where clause", "vgpu_profile.card_id=?", + scCaptor.getValue().getWhereClause().trim()); + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index d96290a86acb..3a6512cd01c7 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -74,6 +74,7 @@ import org.apache.cloudstack.command.ReconcileCommandService; import org.apache.cloudstack.command.ReconcileCommandUtils; import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; +import org.apache.cloudstack.gpu.GpuDevice; import org.apache.cloudstack.storage.command.browser.ListDataStoreObjectsCommand; import org.apache.cloudstack.storage.configdrive.ConfigDrive; import org.apache.cloudstack.storage.to.PrimaryDataStoreTO; @@ -103,9 +104,9 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.builder.ReflectionToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; -import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.Logger; import org.apache.xerces.impl.xpath.regex.Match; import org.joda.time.Duration; import org.libvirt.Connect; @@ -130,7 +131,6 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; - import com.cloud.agent.api.Answer; import com.cloud.agent.api.Command; import com.cloud.agent.api.HostVmStateReportEntry; @@ -143,6 +143,7 @@ import com.cloud.agent.api.StartupCommand; import com.cloud.agent.api.StartupRoutingCommand; import com.cloud.agent.api.StartupStorageCommand; +import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.agent.api.VmDiskStatsEntry; import com.cloud.agent.api.VmNetworkStatsEntry; import com.cloud.agent.api.VmStatsEntry; @@ -213,8 +214,8 @@ import com.cloud.network.Networks.RouterPrivateIpStrategy; import com.cloud.network.Networks.TrafficType; import com.cloud.resource.AgentStatusUpdater; -import com.cloud.resource.ResourceStatusUpdater; import com.cloud.resource.RequestWrapper; +import com.cloud.resource.ResourceStatusUpdater; import com.cloud.resource.ServerResource; import com.cloud.resource.ServerResourceBase; import com.cloud.storage.JavaStorageLayer; @@ -241,6 +242,10 @@ import com.cloud.vm.VmDetailConstants; import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; /** * LibvirtComputingResource execute requests on the computing/routing host using @@ -379,6 +384,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv private String modifyVlanPath; private String versionStringPath; + private String gpuDiscoveryPath; private String patchScriptPath; private String createVmPath; private String manageSnapshotPath; @@ -487,12 +493,15 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv protected String agentHooksBasedir = "/etc/cloudstack/agent/hooks"; protected String agentHooksLibvirtXmlScript = "libvirt-vm-xml-transformer.groovy"; + protected String agentHooksLibvirtXmlShellScript = "libvirt-vm-xml-transformer.sh"; protected String agentHooksLibvirtXmlMethod = "transform"; protected String agentHooksVmOnStartScript = "libvirt-vm-state-change.groovy"; + protected String agentHooksVmOnStartShellScript = "libvirt-vm-state-change.sh"; protected String agentHooksVmOnStartMethod = "onStart"; protected String agentHooksVmOnStopScript = "libvirt-vm-state-change.groovy"; + protected String agentHooksVmOnStopShellScript = "libvirt-vm-state-change.sh"; protected String agentHooksVmOnStopMethod = "onStop"; protected static final String LOCAL_STORAGE_PATH = "local.storage.path"; @@ -686,15 +695,15 @@ public void setHostCpuMaxCapacity(int hostCpuMaxCapacity) { } public LibvirtKvmAgentHook getTransformer() throws IOException { - return new LibvirtKvmAgentHook(agentHooksBasedir, agentHooksLibvirtXmlScript, agentHooksLibvirtXmlMethod); + return new LibvirtKvmAgentHook(agentHooksBasedir, agentHooksLibvirtXmlScript, agentHooksLibvirtXmlShellScript, agentHooksLibvirtXmlMethod); } public LibvirtKvmAgentHook getStartHook() throws IOException { - return new LibvirtKvmAgentHook(agentHooksBasedir, agentHooksVmOnStartScript, agentHooksVmOnStartMethod); + return new LibvirtKvmAgentHook(agentHooksBasedir, agentHooksVmOnStartScript, agentHooksVmOnStartShellScript, agentHooksVmOnStartMethod); } public LibvirtKvmAgentHook getStopHook() throws IOException { - return new LibvirtKvmAgentHook(agentHooksBasedir, agentHooksVmOnStopScript, agentHooksVmOnStopMethod); + return new LibvirtKvmAgentHook(agentHooksBasedir, agentHooksVmOnStopScript, agentHooksVmOnStopShellScript, agentHooksVmOnStopMethod); } public LibvirtUtilitiesHelper getLibvirtUtilitiesHelper() { @@ -1039,6 +1048,11 @@ public boolean configure(final String name, final Map params) th throw new ConfigurationException("Unable to find versions.sh"); } + gpuDiscoveryPath = Script.findScript(kvmScriptsDir, "gpudiscovery.sh"); + if (gpuDiscoveryPath == null) { + throw new ConfigurationException("Unable to find gpudiscovery.sh"); + } + patchScriptPath = Script.findScript(kvmScriptsDir, "patch.sh"); if (patchScriptPath == null) { throw new ConfigurationException("Unable to find patch.sh"); @@ -1593,18 +1607,27 @@ private void configureAgentHooks() { agentHooksLibvirtXmlScript = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SCRIPT); LOGGER.debug("agent.hooks.libvirt_vm_xml_transformer.script is " + agentHooksLibvirtXmlScript); + agentHooksLibvirtXmlShellScript = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SHELL_SCRIPT); + LOGGER.debug("agent.hooks.libvirt_vm_xml_transformer.shell_script is " + agentHooksLibvirtXmlShellScript); + agentHooksLibvirtXmlMethod = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_METHOD); LOGGER.debug("agent.hooks.libvirt_vm_xml_transformer.method is " + agentHooksLibvirtXmlMethod); agentHooksVmOnStartScript = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_ON_START_SCRIPT); LOGGER.debug("agent.hooks.libvirt_vm_on_start.script is " + agentHooksVmOnStartScript); + agentHooksVmOnStartShellScript = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_ON_START_SHELL_SCRIPT); + LOGGER.debug("agent.hooks.libvirt_vm_on_start.shell_script is " + agentHooksVmOnStartShellScript); + agentHooksVmOnStartMethod = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_ON_START_METHOD); LOGGER.debug("agent.hooks.libvirt_vm_on_start.method is " + agentHooksVmOnStartMethod); agentHooksVmOnStopScript = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SCRIPT); LOGGER.debug("agent.hooks.libvirt_vm_on_stop.script is " + agentHooksVmOnStopScript); + agentHooksVmOnStopShellScript = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SHELL_SCRIPT); + LOGGER.debug("agent.hooks.libvirt_vm_on_stop.shell_script is " + agentHooksVmOnStopShellScript); + agentHooksVmOnStopMethod = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_HOOKS_LIBVIRT_VM_ON_STOP_METHOD); LOGGER.debug("agent.hooks.libvirt_vm_on_stop.method is " + agentHooksVmOnStopMethod); } @@ -1955,6 +1978,173 @@ public boolean passCmdLine(final String vmName, final String cmdLine) throws Int return true; } + public List getGpuDevices() { + LOGGER.debug("Executing GPU discovery script at: {}", gpuDiscoveryPath); + final Script command = new Script(gpuDiscoveryPath, Duration.standardSeconds(30), LOGGER); + + OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); + String result = command.execute(parser); + if (result == null) { + LOGGER.debug("GPU discovery command executed successfully"); + result = parser.getLines(); + } + + if (result == null || result.trim().isEmpty()) { + LOGGER.error("GPU discovery failed: command returned null or empty result. Script path: {}, Exit code: {}", + gpuDiscoveryPath, command.getExitValue()); + return Collections.emptyList(); + } + + LOGGER.debug("GPU discovery result: {}", result); + + // This will be used to update the GPU device list when agent on a host is unavailable or the VM is imported. + return parseGpuDevicesFromResult(result); + } + + protected List parseGpuDevicesFromResult(String result) { + List gpuDevices = new ArrayList<>(); + try { + JsonParser jsonParser = new JsonParser(); + JsonArray jsonArray = jsonParser.parse(result).getAsJsonObject().get("gpus").getAsJsonArray(); + + for (JsonElement jsonElement : jsonArray) { + JsonObject jsonObject = jsonElement.getAsJsonObject(); + String busAddress = jsonObject.get("pci_address").getAsString(); + String vendorId = jsonObject.get("vendor_id").getAsString(); + String vendorName = jsonObject.get("vendor").getAsString(); + String deviceId = jsonObject.get("device_id").getAsString(); + String deviceName = jsonObject.get("device").getAsString(); + + // vgpu instances uses mdev uuid + // vf instances uses vf_pci_address + + JsonArray vgpuInstances = jsonObject.get("vgpu_instances").getAsJsonArray(); + JsonArray vfInstances = jsonObject.get("vf_instances").getAsJsonArray(); + + JsonObject fullPassthrough = jsonObject.get("full_passthrough").getAsJsonObject(); + boolean fullPassthroughEnabled = fullPassthrough.get("enabled").getAsInt() == 1; + + String numaNode = getJsonStringValueOrNull(jsonObject, "numa_node"); + String pciRoot = getJsonStringValueOrNull(jsonObject, "pci_root"); + + Long maxInstances = getJsonLongValueOrNull(jsonObject, "max_instances"); + Long videoRam = getJsonLongValueOrNull(jsonObject, "video_ram"); + Long maxHeads = getJsonLongValueOrNull(jsonObject, "max_heads"); + Long maxResolutionX = getJsonLongValueOrNull(jsonObject, "max_resolution_x"); + Long maxResolutionY = getJsonLongValueOrNull(jsonObject, "max_resolution_y"); + + VgpuTypesInfo vgpuType = new VgpuTypesInfo(GpuDevice.DeviceType.PCI, vendorName + " " + deviceName, + "passthrough", busAddress, vendorId, vendorName, deviceId, deviceName, numaNode, pciRoot); + + vgpuType.setMaxVgpuPerGpu(maxInstances); + vgpuType.setVideoRam(videoRam); + vgpuType.setMaxHeads(maxHeads); + vgpuType.setMaxResolutionX(maxResolutionX); + vgpuType.setMaxResolutionY(maxResolutionY); + + if (fullPassthroughEnabled) { + vgpuType.setPassthroughEnabled(true); + } else { + vgpuType.setPassthroughEnabled(false); + } + vgpuType.setVmName(getJsonStringValueOrNull(fullPassthrough, "used_by_vm")); + + gpuDevices.add(vgpuType); + + for (JsonElement vgpuInstance : vgpuInstances) { + VgpuTypesInfo vgpu = getGpuDeviceFromVgpuInstance(vgpuInstance, busAddress, vendorId, vendorName, + deviceId, deviceName, numaNode, pciRoot); + if (vgpu != null) { + gpuDevices.add(vgpu); + } + } + + for (JsonElement vfInstance : vfInstances) { + VgpuTypesInfo vf = getGpuDeviceFromVfInstance(vfInstance, busAddress, vendorId, vendorName, + deviceId, deviceName, numaNode, pciRoot); + if (vf != null) { + gpuDevices.add(vf); + } + } + } + + } catch (Exception e) { + LOGGER.error("Failed to parse GPU discovery result: {}", e.getMessage(), e); + } + return gpuDevices; + } + + protected VgpuTypesInfo getGpuDeviceFromVgpuInstance(JsonElement vgpuInstance, String busAddress, String vendorId, + String vendorName, String deviceId, String deviceName, String numaNode, String pciRoot) { + JsonObject vgpuInstanceJsonObject = vgpuInstance.getAsJsonObject(); + String mdevUuid = getJsonStringValueOrNull(vgpuInstanceJsonObject, "mdev_uuid"); + String profileName = getJsonStringValueOrNull(vgpuInstanceJsonObject, "profile_name"); + if (profileName == null || profileName.isEmpty()) { + return null; // Skip if profile name is not provided + } + Long maxInstances = getJsonLongValueOrNull(vgpuInstanceJsonObject, "max_instances"); + Long videoRam = getJsonLongValueOrNull(vgpuInstanceJsonObject, "video_ram"); + Long maxHeads = getJsonLongValueOrNull(vgpuInstanceJsonObject, "max_heads"); + Long maxResolutionX = getJsonLongValueOrNull(vgpuInstanceJsonObject, "max_resolution_x"); + Long maxResolutionY = getJsonLongValueOrNull(vgpuInstanceJsonObject, "max_resolution_y"); + VgpuTypesInfo device = new VgpuTypesInfo(GpuDevice.DeviceType.MDEV, vendorName + " " + deviceName, profileName, mdevUuid, vendorId, vendorName, deviceId, deviceName, numaNode, pciRoot); + device.setParentBusAddress(busAddress); + device.setMaxVgpuPerGpu(maxInstances); + device.setVideoRam(videoRam); + device.setMaxHeads(maxHeads); + device.setMaxResolutionX(maxResolutionX); + device.setMaxResolutionY(maxResolutionY); + device.setVmName(getJsonStringValueOrNull(vgpuInstance.getAsJsonObject(), "used_by_vm")); + return device; + } + + protected VgpuTypesInfo getGpuDeviceFromVfInstance(JsonElement vfInstance, String busAddress, String vendorId, + String vendorName, String deviceId, String deviceName, String numaNode, String pciRoot) { + JsonObject vfInstanceJsonObject = vfInstance.getAsJsonObject(); + String vfPciAddress = vfInstanceJsonObject.get("vf_pci_address").getAsString(); + String vfProfile = vfInstanceJsonObject.get("vf_profile").getAsString(); + if (vfProfile == null || vfProfile.isEmpty()) { + return null; // Skip if profile name is not provided + } + Long maxInstances = getJsonLongValueOrNull(vfInstanceJsonObject, "max_instances"); + Long videoRam = getJsonLongValueOrNull(vfInstanceJsonObject, "video_ram"); + Long maxHeads = getJsonLongValueOrNull(vfInstanceJsonObject, "max_heads"); + Long maxResolutionX = getJsonLongValueOrNull(vfInstanceJsonObject, "max_resolution_x"); + Long maxResolutionY = getJsonLongValueOrNull(vfInstanceJsonObject, "max_resolution_y"); + VgpuTypesInfo device = new VgpuTypesInfo(GpuDevice.DeviceType.PCI, vendorName + " " + deviceName, vfProfile, vfPciAddress, vendorId, vendorName, deviceId, deviceName, numaNode, pciRoot); + device.setParentBusAddress(busAddress); + device.setMaxVgpuPerGpu(maxInstances); + device.setVideoRam(videoRam); + device.setMaxHeads(maxHeads); + device.setMaxResolutionX(maxResolutionX); + device.setMaxResolutionY(maxResolutionY); + device.setVmName(getJsonStringValueOrNull(vfInstanceJsonObject, "used_by_vm")); + return device; + } + + /** + * Safely extracts a string value from a JSON object, returning null if the field is missing or null. + * + * @param jsonObject the JSON object to extract from + * @param fieldName the name of the field to extract + * @return the string value of the field, or null if the field is missing or null + */ + protected String getJsonStringValueOrNull(JsonObject jsonObject, String fieldName) { + JsonElement element = jsonObject.get(fieldName); + if (element == null || element.isJsonNull()) { + return null; + } + return element.getAsString(); + } + + protected Long getJsonLongValueOrNull(JsonObject jsonObject, String fieldName) { + JsonElement element = jsonObject.get(fieldName); + if (element == null || element.isJsonNull()) { + return null; + } + return element.getAsLong(); + } + boolean isDirectAttachedNetwork(final String type) { if ("untagged".equalsIgnoreCase(type)) { return true; @@ -2807,6 +2997,10 @@ protected DevicesDef createDevicesDef(VirtualMachineTO vmTO, GuestDef guest, int devices.addDevice(createConsoleDef()); devices.addDevice(createGraphicDef(vmTO)); + if (vmTO.getGpuDevice() != null && CollectionUtils.isNotEmpty(vmTO.getGpuDevice().getGpuDevices())) { + attachGpuDevices(vmTO, devices); + } + if (!isGuestS390x()) { devices.addDevice(createTabletInputDef()); } @@ -2834,6 +3028,19 @@ protected DevicesDef createDevicesDef(VirtualMachineTO vmTO, GuestDef guest, int return devices; } + protected void attachGpuDevices(final VirtualMachineTO vmTO, final DevicesDef devicesDef) { + // GPU device is not set for the VM + List gpuDevices = vmTO.getGpuDevice().getGpuDevices(); + for (VgpuTypesInfo gpuDevice : gpuDevices) { + LibvirtGpuDef gpu = new LibvirtGpuDef(); + + gpu.defGpu(gpuDevice); + + devicesDef.addDevice(gpu); + LOGGER.info("Attached GPU device " + gpuDevice.getDeviceName() + " to VM " + vmTO.getName()); + } + } + protected WatchDogDef createWatchDogDef() { return new WatchDogDef(watchDogAction, watchDogModel); } @@ -3947,6 +4154,8 @@ public StartupCommand[] initialize() { hostDistro = cmd.getHostDetails().get("Host.OS"); } + cmd.setGpuDevices(getGpuDevices()); + List startupCommands = new ArrayList<>(); startupCommands.add(cmd); for (int i = 0; i < localStoragePaths.size(); i++) { @@ -6214,5 +6423,4 @@ public String getHypervisorPath() { public String getGuestCpuArch() { return guestCpuArch; } - } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java new file mode 100644 index 000000000000..80a34b33b596 --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource; + +import com.cloud.agent.api.VgpuTypesInfo; +import org.apache.cloudstack.gpu.GpuDevice; + +public class LibvirtGpuDef { + + private VgpuTypesInfo vgpuType; + + public LibvirtGpuDef() {} + + public void defGpu(VgpuTypesInfo vgpuType) { + this.vgpuType = vgpuType; + } + + @Override + public String toString() { + StringBuilder gpuBuilder = new StringBuilder(); + GpuDevice.DeviceType deviceType = vgpuType.getDeviceType(); + + if (deviceType == GpuDevice.DeviceType.MDEV) { + // Generate XML for MDEV device (vGPU, including MIG instances) + generateMdevXml(gpuBuilder); + } else { + // Generate XML for PCI device (passthrough GPU or VF) + generatePciXml(gpuBuilder); + } + + return gpuBuilder.toString(); + } + + private void generateMdevXml(StringBuilder gpuBuilder) { + String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID + String displayAttribute = vgpuType.isDisplay() ? "on" : "off"; + + gpuBuilder.append("\n"); + gpuBuilder.append(" \n"); + gpuBuilder.append("
\n"); + gpuBuilder.append(" \n"); + gpuBuilder.append("\n"); + } + + private void generatePciXml(StringBuilder gpuBuilder) { + String busAddress = vgpuType.getBusAddress(); + + // For VDI use cases with display=on, ramfb provides early boot framebuffer + // before GPU driver loads. This is critical for: + // - Windows VDI guests (require framebuffer during boot) + // - UEFI/OVMF firmware environments + // - ARM64 hosts (cache coherency issues with traditional VGA) + // - Multi-monitor VDI setups (primary display) + if (vgpuType.isDisplay()) { + gpuBuilder.append("\n"); + } else { + // Compute-only workloads don't need display or ramfb + gpuBuilder.append("\n"); + } + gpuBuilder.append(" \n"); + gpuBuilder.append(" \n"); + + // Parse the bus address (e.g., 00:02.0) into domain, bus, slot, function + String domain = "0x0000"; + String bus = "0x00"; + String slot = "0x00"; + String function = "0x0"; + + if (busAddress != null && !busAddress.isEmpty()) { + String[] parts = busAddress.split(":"); + if (parts.length > 1) { + bus = "0x" + parts[0]; + String[] slotFunctionParts = parts[1].split("\\."); + if (slotFunctionParts.length > 0) { + slot = "0x" + slotFunctionParts[0]; + if (slotFunctionParts.length > 1) { + function = "0x" + slotFunctionParts[1].trim(); + } + } + } + } + + gpuBuilder.append("
\n"); + gpuBuilder.append(" \n"); + gpuBuilder.append("\n"); + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHook.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHook.java index edcc5a053269..5d4ca37e457b 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHook.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHook.java @@ -17,6 +17,8 @@ package com.cloud.hypervisor.kvm.resource; +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; import groovy.lang.Binding; import groovy.lang.GroovyObject; import groovy.util.GroovyScriptEngine; @@ -25,18 +27,39 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; import org.codehaus.groovy.runtime.metaclass.MissingMethodExceptionNoStack; +import org.joda.time.Duration; import java.io.File; import java.io.IOException; public class LibvirtKvmAgentHook { private final String script; + private final String shellScript; private final String method; private final GroovyScriptEngine gse; private final Binding binding = new Binding(); protected Logger logger = LogManager.getLogger(getClass()); + public LibvirtKvmAgentHook(String path, String script, String shellScript, String method) throws IOException { + this.script = script; + this.method = method; + File full_path = new File(path, script); + if (!full_path.canRead()) { + logger.warn("Groovy script '" + full_path.toString() + "' is not available. Transformations will not be applied."); + this.gse = null; + } else { + this.gse = new GroovyScriptEngine(path); + } + full_path = new File(path, shellScript); + if (!full_path.canRead()) { + logger.warn("Shell script '" + full_path.toString() + "' is not available. Transformations will not be applied."); + this.shellScript = null; + } else { + this.shellScript = full_path.getAbsolutePath(); + } + } + public LibvirtKvmAgentHook(String path, String script, String method) throws IOException { this.script = script; this.method = method; @@ -47,31 +70,69 @@ public LibvirtKvmAgentHook(String path, String script, String method) throws IOE } else { this.gse = new GroovyScriptEngine(path); } + this.shellScript = null; } public boolean isInitialized() { return this.gse != null; } + /** + * Sanitizes a string for safe use as a bash command argument by escaping special characters. + * This prevents shell injection and parsing issues when passing multiline content like XML. + */ + String sanitizeBashCommandArgument(String input) { + if (input == null) { + return ""; + } + StringBuilder sanitized = new StringBuilder(); + for (char c : input.toCharArray()) { + if ("\\\"'`$|&;()<>*?![]{}~".indexOf(c) != -1) { + sanitized.append('\\'); + } + sanitized.append(c); + } + return sanitized.toString(); + } + public Object handle(Object arg) throws ResourceException, ScriptException { - if (!isInitialized()) { + Object res = arg; + if (isInitialized()) { + GroovyObject cls = (GroovyObject) this.gse.run(this.script, binding); + if (null == cls) { + logger.warn("Groovy object is not received from script '" + this.script + "'."); + return arg; + } else { + Object[] params = {logger, arg}; + try { + res = cls.invokeMethod(this.method, params); + } catch (MissingMethodExceptionNoStack e) { + logger.error("Error occurred when calling method from groovy script, {}", e); + res = arg; + } + } + } else { logger.warn("Groovy scripting engine is not initialized. Data transformation skipped."); - return arg; } - GroovyObject cls = (GroovyObject) this.gse.run(this.script, binding); - if (null == cls) { - logger.warn("Groovy object is not received from script '" + this.script + "'."); - return arg; - } else { - Object[] params = {logger, arg}; - try { - Object res = cls.invokeMethod(this.method, params); - return res; - } catch (MissingMethodExceptionNoStack e) { - logger.error("Error occurred when calling method from groovy script, {}", e); - return arg; + // Shell script + if (this.shellScript != null) { + logger.debug("Executing Shell script for transformation at: {}", this.shellScript); + final Script command = new Script(this.shellScript, Duration.standardSeconds(30), logger); + command.add(String.valueOf(this.method)); + command.add(String.valueOf(res)); + + OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); + String result = command.execute(parser); + if (result == null) { + logger.debug("GPU discovery command executed successfully"); + res = parser.getLines(); + } else { + logger.warn("Error occurred when calling script for transformation: {}", result); } + } else { + logger.debug("No shell script provided for transformation. Data transformation skipped."); } + return res; } } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtGetGPUStatsCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtGetGPUStatsCommandWrapper.java new file mode 100644 index 000000000000..9d4da0224d66 --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtGetGPUStatsCommandWrapper.java @@ -0,0 +1,37 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.hypervisor.kvm.resource.wrapper; + +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.GetGPUStatsAnswer; +import com.cloud.agent.api.GetGPUStatsCommand; +import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; +import com.cloud.resource.CommandWrapper; +import com.cloud.resource.ResourceWrapper; + +@ResourceWrapper(handles = GetGPUStatsCommand.class) +public final class LibvirtGetGPUStatsCommandWrapper extends CommandWrapper { + + @Override + public Answer execute(final GetGPUStatsCommand command, final LibvirtComputingResource libvirtComputingResource) { + return new GetGPUStatsAnswer(command, libvirtComputingResource.getGpuDevices()); + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java index 0d0dcd96ffa2..94e603272537 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java @@ -23,6 +23,7 @@ import java.io.InputStream; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -40,9 +41,13 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.agent.api.to.GPUDeviceTO; +import com.cloud.hypervisor.kvm.resource.LibvirtGpuDef; import com.cloud.hypervisor.kvm.resource.LibvirtXMLParser; import org.apache.cloudstack.utils.security.ParserUtils; import org.apache.commons.collections.MapUtils; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -226,6 +231,8 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0. xmlDesc = updateVmSharesIfNeeded(command, xmlDesc, libvirtComputingResource); + xmlDesc = updateGpuDevicesIfNeeded(command, xmlDesc, libvirtComputingResource); + dconn = libvirtUtilitiesHelper.retrieveQemuConnection(destinationUri); if (to.getType() == VirtualMachine.Type.User) { @@ -417,6 +424,116 @@ protected Set getMigrateStorageDeviceLabels(List diskDefinition return setOfLabels; } + String updateGpuDevicesIfNeeded(MigrateCommand migrateCommand, String xmlDesc, LibvirtComputingResource libvirtComputingResource) + throws ParserConfigurationException, IOException, SAXException, TransformerException { + GPUDeviceTO gpuDevice = migrateCommand.getVirtualMachine().getGpuDevice(); + if (gpuDevice == null || CollectionUtils.isEmpty(gpuDevice.getGpuDevices())) { + logger.debug("No GPU device to update for VM [{}].", migrateCommand.getVmName()); + return xmlDesc; + } + + List devices = gpuDevice.getGpuDevices(); + logger.info("Updating GPU devices for VM [{}] during migration. Number of devices: {}", + migrateCommand.getVmName(), devices.size()); + + // Parse XML and find devices element + DocumentBuilderFactory docFactory = ParserUtils.getSaferDocumentBuilderFactory(); + DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); + Document document; + try (InputStream inputStream = IOUtils.toInputStream(xmlDesc, StandardCharsets.UTF_8)) { + document = docBuilder.parse(inputStream); + } + + NodeList devicesList = document.getElementsByTagName("devices"); + if (devicesList.getLength() == 0) { + logger.warn("No devices section found in XML for VM [{}]", migrateCommand.getVmName()); + return xmlDesc; + } + + Element devicesElement = (Element) devicesList.item(0); + + // Remove existing GPU hostdev elements and add new ones + removeExistingGpuHostdevElements(devicesElement); + addNewGpuHostdevElements(document, devicesElement, devices); + + String newXmlDesc = LibvirtXMLParser.getXml(document); + logger.debug("Updated XML configuration for VM [{}] with new GPU devices", migrateCommand.getVmName()); + + return newXmlDesc; + } + + /** + * Removes existing GPU hostdev elements from the devices section. + * GPU devices are identified as hostdev elements with type='pci' or type='mdev'. + */ + private void removeExistingGpuHostdevElements(Element devicesElement) { + NodeList hostdevNodes = devicesElement.getElementsByTagName("hostdev"); + List nodesToRemove = new ArrayList<>(); + + for (int i = 0; i < hostdevNodes.getLength(); i++) { + Node hostdevNode = hostdevNodes.item(i); + if (hostdevNode.getNodeType() == Node.ELEMENT_NODE) { + Element hostdevElement = (Element) hostdevNode; + String hostdevType = hostdevElement.getAttribute("type"); + + // Remove hostdev elements that represent GPU devices (type='pci' or type='mdev') + if ("pci".equals(hostdevType) || "mdev".equals(hostdevType)) { + // Additional check: ensure this is actually a GPU device by checking mode='subsystem' + String mode = hostdevElement.getAttribute("mode"); + if ("subsystem".equals(mode)) { + nodesToRemove.add(hostdevNode); + } + } + } + } + + // Remove the nodes + for (Node node : nodesToRemove) { + devicesElement.removeChild(node); + } + + logger.debug("Removed {} existing GPU hostdev elements", nodesToRemove.size()); + } + + /** + * Adds new GPU hostdev elements to the devices section based on the GPU devices + * allocated on the destination host. + */ + private void addNewGpuHostdevElements(Document document, Element devicesElement, List devices) + throws ParserConfigurationException, IOException, SAXException { + if (devices.isEmpty()) { + return; + } + + // Reuse parser for efficiency + DocumentBuilderFactory factory = ParserUtils.getSaferDocumentBuilderFactory(); + DocumentBuilder builder = factory.newDocumentBuilder(); + + for (VgpuTypesInfo deviceInfo : devices) { + Element hostdevElement = createGpuHostdevElement(document, deviceInfo, builder); + devicesElement.appendChild(hostdevElement); + logger.debug("Added new GPU hostdev element for device: {} (type: {}, busAddress: {})", + deviceInfo.getDeviceName(), deviceInfo.getDeviceType(), deviceInfo.getBusAddress()); + } + } + + /** + * Creates a hostdev element for a GPU device using LibvirtGpuDef. + */ + private Element createGpuHostdevElement(Document document, VgpuTypesInfo deviceInfo, DocumentBuilder builder) + throws IOException, SAXException { + // Generate GPU XML using LibvirtGpuDef + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + gpuDef.defGpu(deviceInfo); + String gpuXml = gpuDef.toString(); + + // Parse and import into target document + try (InputStream xmlStream = IOUtils.toInputStream(gpuXml, StandardCharsets.UTF_8)) { + Document gpuDocument = builder.parse(xmlStream); + Element hostdevElement = gpuDocument.getDocumentElement(); + return (Element) document.importNode(hostdevElement, true); + } + } /** * Checks if the CPU shares are equal in the source host and destination host. diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java index ea380f1a8a74..fc4ec048d492 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; @@ -63,6 +64,8 @@ import com.cloud.utils.net.NetUtils; import com.cloud.vm.VmDetailConstants; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; import org.apache.cloudstack.api.ApiConstants.IoDriverPolicy; import org.apache.cloudstack.storage.command.AttachAnswer; import org.apache.cloudstack.storage.command.AttachCommand; @@ -163,6 +166,7 @@ import com.cloud.agent.api.UnsupportedAnswer; import com.cloud.agent.api.UpdateHostPasswordCommand; import com.cloud.agent.api.UpgradeSnapshotCommand; +import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.agent.api.VmStatsEntry; import com.cloud.agent.api.check.CheckSshCommand; import com.cloud.agent.api.proxy.CheckConsoleProxyLoadCommand; @@ -6624,11 +6628,13 @@ public void recreateCheckpointsOnVmTestVolumesHaveCheckpoints() { Mockito.doReturn(List.of("path")).when(volumeObjectToMock).getCheckpointPaths(); - Mockito.doNothing().when(libvirtComputingResourceSpy).recreateCheckpointsOfDisk(Mockito.any(), Mockito.any(), Mockito.any()); + Mockito.doNothing().when(libvirtComputingResourceSpy) + .recreateCheckpointsOfDisk(Mockito.any(), Mockito.any(), Mockito.any()); boolean result = libvirtComputingResourceSpy.recreateCheckpointsOnVm(List.of(volumeObjectToMock), null, null); - Mockito.verify(libvirtComputingResourceSpy, Mockito.times(1)).recreateCheckpointsOfDisk(Mockito.any(), Mockito.any(), Mockito.any()); + Mockito.verify(libvirtComputingResourceSpy, Mockito.times(1)) + .recreateCheckpointsOfDisk(Mockito.any(), Mockito.any(), Mockito.any()); Assert.assertTrue(result); } @@ -6779,4 +6785,345 @@ public void manuallyDeleteUnusedSnapshotFileTestLibvirtSupportingFlagDeleteOnCom libvirtComputingResourceSpy.manuallyDeleteUnusedSnapshotFile(true, ""); Mockito.verify(libvirtComputingResourceSpy, Mockito.never()).deleteIfExists(""); } + + @Test + public void testGetJsonStringValueOrNullWithValidStringValue() { + // Test case: field exists and has a string value + String jsonString = "{\"testField\": \"testValue\"}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "testField"); + + assertEquals("testValue", result); + } + + @Test + public void testGetJsonStringValueOrNull_withEmptyStringValue() { + // Test case: field exists and has an empty string value + String jsonString = "{\"testField\": \"\"}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "testField"); + + assertEquals("", result); + } + + @Test + public void testGetJsonStringValueOrNull_withNullValue() { + // Test case: field exists but is null + String jsonString = "{\"testField\": null}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "testField"); + + assertNull(result); + } + + @Test + public void testGetJsonStringValueOrNull_withMissingField() { + // Test case: field doesn't exist in the JSON object + String jsonString = "{\"otherField\": \"otherValue\"}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "missingField"); + + assertNull(result); + } + + @Test + public void testGetJsonStringValueOrNull_withEmptyJsonObject() { + // Test case: empty JSON object + String jsonString = "{}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "anyField"); + + assertNull(result); + } + + @Test + public void testGetJsonStringValueOrNull_withNumericValue() { + // Test case: field exists but contains a numeric value (should still work as it gets converted to string) + String jsonString = "{\"numericField\": 123}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "numericField"); + + assertEquals("123", result); + } + + @Test + public void testGetJsonStringValueOrNull_withBooleanValue() { + // Test case: field exists but contains a boolean value (should still work as it gets converted to string) + String jsonString = "{\"booleanField\": true}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "booleanField"); + + assertEquals("true", result); + } + + @Test + public void testGetJsonStringValueOrNull_withNullFieldName() { + // Test case: null field name should return null + String jsonString = "{\"testField\": \"testValue\"}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, null); + + assertNull(result); + } + + @Test + public void testGetJsonStringValueOrNull_withLongStringValue() { + // Test case: field exists and has a long string value + String longValue = "This is a very long string value that contains multiple words and special characters like @#$%^&*()"; + String jsonString = "{\"longField\": \"" + longValue + "\"}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "longField"); + + assertEquals(longValue, result); + } + + @Test(expected = NullPointerException.class) + public void testGetJsonStringValueOrNull_withNullJsonObject() { + // Test case: null JSON object should throw NullPointerException + // This tests that the method doesn't handle null objects gracefully, which is expected behavior + libvirtComputingResourceSpy.getJsonStringValueOrNull(null, "testField"); + } + + @Test + public void testGetJsonStringValueOrNull_withSpecialCharacters() { + // Test case: field contains JSON special characters and unicode + String jsonString = "{\"specialField\": \"Value with \\\"quotes\\\", \\n newlines, and unicode: \\u00E9\"}"; + JsonParser jsonParser = new JsonParser(); + JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); + + String result = libvirtComputingResourceSpy.getJsonStringValueOrNull(jsonObject, "specialField"); + + assertEquals("Value with \"quotes\", \n newlines, and unicode: é", result); + } + + @Test + public void testParseGpuDevicesFromResult_withSuccess() { + String result = "{\"gpus\": [" + + " {" + + " \"pci_address\": \"00:03.0\"," + + " \"vendor_id\": \"10de\"," + + " \"device_id\": \"2484\"," + + " \"vendor\": \"NVIDIA Corporation\"," + + " \"device\": \"GeForce RTX 3070\"," + + " \"driver\": \"nvidia\"," + + " \"pci_class\": \"VGA compatible controller\"," + + " \"iommu_group\": \"8\"," + + " \"sriov_totalvfs\": 0," + + " \"sriov_numvfs\": 0," + + " \"full_passthrough\": {" + + " \"enabled\": 1," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0x03\"," + + " \"function\": \"0x0\"" + + " }," + + " \"used_by_vm\": \"win10\"" + + " }," + + " \"vgpu_instances\": []," + + " \"vf_instances\": []" + + " }," + + " {" + + " \"pci_address\": \"00:AF.0\"," + + " \"vendor_id\": \"10de\"," + + " \"device_id\": \"1EB8\"," + + " \"vendor\": \"NVIDIA Corporation\"," + + " \"device\": \"Tesla T4\"," + + " \"driver\": \"nvidia\"," + + " \"pci_class\": \"3D controller\"," + + " \"iommu_group\": \"12\"," + + " \"sriov_totalvfs\": 0," + + " \"sriov_numvfs\": 0," + + " \"full_passthrough\": {" + + " \"enabled\": 0," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0xAF\"," + + " \"function\": \"0x0\"" + + " }," + + " \"used_by_vm\": null" + + " }," + + " \"vgpu_instances\": [" + + " {" + + " \"mdev_uuid\": \"a1b2c3d4-5678-4e9a-8b0c-d1e2f3a4b5c6\"," + + " \"profile_name\": \"grid_t4-16c\"," + + " \"max_instances\": 4," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0xAF\"," + + " \"function\": \"0x0\"" + + " }," + + " \"used_by_vm\": \"vm1\"" + + " }," + + " {" + + " \"mdev_uuid\": \"b2c3d4e5-6789-4f0a-9c1d-e2f3a4b5c6d7\"," + + " \"profile_name\": \"grid_t4-8c\"," + + " \"max_instances\": 8," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0xAF\"," + + " \"function\": \"0x1\"" + + " }," + + " \"used_by_vm\": \"vm2\"" + + " }" + + " ]," + + " \"vf_instances\": []" + + " }," + + " {" + + " \"pci_address\": \"00:65.0\"," + + " \"vendor_id\": \"10de\"," + + " \"device_id\": \"20B0\"," + + " \"vendor\": \"NVIDIA Corporation\"," + + " \"device\": \"A100-SXM4-40GB\"," + + " \"driver\": \"nvidia\"," + + " \"pci_class\": \"VGA compatible controller\"," + + " \"iommu_group\": \"15\"," + + " \"sriov_totalvfs\": 7," + + " \"sriov_numvfs\": 7," + + " \"full_passthrough\": {" + + " \"enabled\": 0," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0x65\"," + + " \"function\": \"0x0\"" + + " }," + + " \"used_by_vm\": null" + + " }," + + " \"vgpu_instances\": []," + + " \"vf_instances\": [" + + " {" + + " \"vf_pci_address\": \"00:65.2\"," + + " \"vf_profile\": \"1g.5gb\"," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0x65\"," + + " \"function\": \"0x2\"" + + " }," + + " \"used_by_vm\": \"ml\"" + + " }," + + " {" + + " \"vf_pci_address\": \"00:65.3\"," + + " \"vf_profile\": \"2g.10gb\"," + + " \"libvirt_address\": {" + + " \"domain\": \"0x0000\"," + + " \"bus\": \"0x00\"," + + " \"slot\": \"0x65\"," + + " \"function\": \"0x3\"" + + " }," + + " \"used_by_vm\": null" + + " }" + + " ]" + + " }" + + " ]" + + "}"; + List gpuDevices = libvirtComputingResourceSpy.parseGpuDevicesFromResult(result); + assertEquals(7, gpuDevices.size()); + // Verify first GPU device (RTX 3070) + VgpuTypesInfo firstGpu = gpuDevices.get(0); + assertEquals("00:03.0", firstGpu.getBusAddress()); + assertEquals("10de", firstGpu.getVendorId()); + assertEquals("2484", firstGpu.getDeviceId()); + assertEquals("NVIDIA Corporation", firstGpu.getVendorName()); + assertEquals("GeForce RTX 3070", firstGpu.getDeviceName()); + assertEquals("passthrough", firstGpu.getModelName()); + assertEquals("NVIDIA Corporation GeForce RTX 3070", firstGpu.getGroupName()); + assertTrue(firstGpu.isPassthroughEnabled()); + assertEquals("win10", firstGpu.getVmName()); + + // Verify second GPU device (Tesla T4) + VgpuTypesInfo secondGpu = gpuDevices.get(1); + assertEquals("00:AF.0", secondGpu.getBusAddress()); + assertEquals("10de", secondGpu.getVendorId()); + assertEquals("1EB8", secondGpu.getDeviceId()); + assertEquals("NVIDIA Corporation", secondGpu.getVendorName()); + assertEquals("Tesla T4", secondGpu.getDeviceName()); + assertEquals("passthrough", secondGpu.getModelName()); + assertEquals("NVIDIA Corporation Tesla T4", secondGpu.getGroupName()); + assertFalse(secondGpu.isPassthroughEnabled()); + assertNull(secondGpu.getVmName()); + + // Verify third GPU device (A100-SXM4-40GB) + VgpuTypesInfo thirdGpu = gpuDevices.get(4); + assertEquals("00:65.0", thirdGpu.getBusAddress()); + assertEquals("10de", thirdGpu.getVendorId()); + assertEquals("20B0", thirdGpu.getDeviceId()); + assertEquals("NVIDIA Corporation", thirdGpu.getVendorName()); + assertEquals("A100-SXM4-40GB", thirdGpu.getDeviceName()); + assertEquals("NVIDIA Corporation A100-SXM4-40GB", thirdGpu.getGroupName()); + assertEquals("passthrough", thirdGpu.getModelName()); + assertEquals("NVIDIA Corporation A100-SXM4-40GB", thirdGpu.getGroupName()); + assertFalse(thirdGpu.isPassthroughEnabled()); + assertNull(thirdGpu.getVmName()); + + // Verify vGPU instances from Tesla T4 + VgpuTypesInfo vgpuInstance1 = gpuDevices.get(2); + assertEquals("a1b2c3d4-5678-4e9a-8b0c-d1e2f3a4b5c6", vgpuInstance1.getBusAddress()); + assertEquals("00:AF.0", vgpuInstance1.getParentBusAddress()); + assertEquals("10de", vgpuInstance1.getVendorId()); + assertEquals("1EB8", vgpuInstance1.getDeviceId()); + assertEquals("NVIDIA Corporation", vgpuInstance1.getVendorName()); + assertEquals("Tesla T4", vgpuInstance1.getDeviceName()); + assertEquals("NVIDIA Corporation Tesla T4", vgpuInstance1.getGroupName()); + assertEquals("grid_t4-16c", vgpuInstance1.getModelName()); + assertEquals(Long.valueOf(4), vgpuInstance1.getMaxVpuPerGpu()); + assertEquals("vm1", vgpuInstance1.getVmName()); + + VgpuTypesInfo vgpuInstance2 = gpuDevices.get(3); + assertEquals("b2c3d4e5-6789-4f0a-9c1d-e2f3a4b5c6d7", vgpuInstance2.getBusAddress()); + assertEquals("00:AF.0", vgpuInstance2.getParentBusAddress()); + assertEquals("10de", vgpuInstance2.getVendorId()); + assertEquals("1EB8", vgpuInstance2.getDeviceId()); + assertEquals("NVIDIA Corporation", vgpuInstance2.getVendorName()); + assertEquals("Tesla T4", vgpuInstance2.getDeviceName()); + assertEquals("NVIDIA Corporation Tesla T4", vgpuInstance2.getGroupName()); + assertEquals("grid_t4-8c", vgpuInstance2.getModelName()); + assertEquals(Long.valueOf(8), vgpuInstance2.getMaxVpuPerGpu()); + assertEquals("vm2", vgpuInstance2.getVmName()); + + // Verify VF instances from NVIDIA Corporation A100-SXM4-40GB + VgpuTypesInfo vfInstance1 = gpuDevices.get(5); + assertEquals("00:65.0", vfInstance1.getParentBusAddress()); + assertEquals("00:65.2", vfInstance1.getBusAddress()); + assertEquals("10de", vfInstance1.getVendorId()); + assertEquals("20B0", vfInstance1.getDeviceId()); + assertEquals("NVIDIA Corporation", vfInstance1.getVendorName()); + assertEquals("A100-SXM4-40GB", vfInstance1.getDeviceName()); + assertEquals("NVIDIA Corporation A100-SXM4-40GB", vfInstance1.getGroupName()); + assertEquals("1g.5gb", vfInstance1.getModelName()); + assertEquals("ml", vfInstance1.getVmName()); + + VgpuTypesInfo vfInstance2 = gpuDevices.get(6); + assertEquals("00:65.0", vfInstance2.getParentBusAddress()); + assertEquals("00:65.3", vfInstance2.getBusAddress()); + assertEquals("10de", vfInstance2.getVendorId()); + assertEquals("20B0", vfInstance2.getDeviceId()); + assertEquals("NVIDIA Corporation", vfInstance2.getVendorName()); + assertEquals("A100-SXM4-40GB", vfInstance2.getDeviceName()); + assertEquals("NVIDIA Corporation A100-SXM4-40GB", vfInstance1.getGroupName()); + assertEquals("2g.10gb", vfInstance2.getModelName()); + assertNull(vfInstance2.getVmName()); + } } diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java new file mode 100644 index 000000000000..5dbea4fabf95 --- /dev/null +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.hypervisor.kvm.resource; + +import com.cloud.agent.api.VgpuTypesInfo; +import junit.framework.TestCase; +import org.apache.cloudstack.gpu.GpuDevice; +import org.junit.Test; + +public class LibvirtGpuDefTest extends TestCase { + + @Test + public void testGpuDef_withPciPassthrough() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo pciGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + "00:02.0", + "10de", + "NVIDIA Corporation", + "1b38", + "Tesla T4" + ); + gpuDef.defGpu(pciGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("
")); + assertTrue(gpuXml.contains("")); + } + + @Test + public void testGpuDef_withMdevDevice() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo mdevGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.MDEV, + "nvidia-63", + "GRID T4-2Q", + "4b20d080-1b54-4048-85b3-a6a62d165c01", + "10de", + "NVIDIA Corporation", + "1eb8", + "Tesla T4" + ); + gpuDef.defGpu(mdevGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("
")); + assertTrue(gpuXml.contains("")); + assertFalse(gpuXml.contains("vfio")); // MDEV should not contain vfio driver element + } + + @Test + public void testGpuDef_withSriovVirtualFunction() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo vfGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "VF-Profile", + "VF-Profile", + "00:10.1", + "8086", + "Intel Corporation", + "1515", + "X710 Virtual Function" + ); + gpuDef.defGpu(vfGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("
")); + assertTrue(gpuXml.contains("")); + } + + @Test + public void testGpuDef_withComplexPciAddress() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo complexPciGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + "81:00.0", + "1002", + "Advanced Micro Devices", + "73a3", + "Navi 21" + ); + gpuDef.defGpu(complexPciGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("
")); + assertTrue(gpuXml.contains("")); + } + + @Test + public void testGpuDef_withNullDeviceType() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo nullTypeGpuInfo = new VgpuTypesInfo( + null, // null device type should default to PCI behavior + "passthrough", + "passthrough", + "00:05.0", + "10de", + "NVIDIA Corporation", + "1db4", + "V100" + ); + gpuDef.defGpu(nullTypeGpuInfo); + + String gpuXml = gpuDef.toString(); + + // Should default to PCI behavior when device type is null + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("
")); + assertTrue(gpuXml.contains("")); + } +} diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHookTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHookTest.java index 9cf9ca31e0f9..80a04aaff09a 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHookTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtKvmAgentHookTest.java @@ -102,4 +102,47 @@ public void testWrongDir() throws IOException, ResourceException, ScriptExceptio assertEquals(t.isInitialized(), false); assertEquals(t.handle(source), source); } + + @Test + public void testSanitizeBashCommandArgument() throws IOException { + LibvirtKvmAgentHook hook = new LibvirtKvmAgentHook(dir, script, method); + + // Test case map: input -> expected output + java.util.Map testCases = new java.util.LinkedHashMap<>(); + + // Edge cases + testCases.put("", ""); // Empty string + testCases.put("normalString123", "normalString123"); // Normal string without special chars + + // Special character escaping + testCases.put("test\"string'with`special$chars", "test\\\"string\\'with\\`special\\$chars"); + testCases.put("\\\"'`$|&;()<>*?![]{}~", "\\\\\\\"\\'\\`\\$\\|\\&\\;\\(\\)\\<\\>\\*\\?\\!\\[\\]\\{\\}\\~"); + + // XML content scenarios + testCases.put("\n test-vm\n 1048576\n", + "\\\n \\test-vm\\\n " + + "\\1048576\\\n\\"); + testCases.put("&", + "\\\\&\\;\\"); + + // Multiline content (newlines should not be escaped) + testCases.put("line1\nline2\rline3\r\nline4", "line1\nline2\rline3\r\nline4"); + + // Security test cases + testCases.put("normal; rm -rf /; echo 'gotcha'", "normal\\; rm -rf /\\; echo \\'gotcha\\'"); + testCases.put("data | grep pattern > output.txt", "data \\| grep pattern \\> output.txt"); + testCases.put("$(whoami) and `date`", "\\$\\(whoami\\) and \\`date\\`"); + + // Test each case + for (java.util.Map.Entry testCase : testCases.entrySet()) { + String input = testCase.getKey(); + String expected = testCase.getValue(); + String actual = hook.sanitizeBashCommandArgument(input); + assertEquals("Failed for input: " + input, expected, actual); + } + + // Test null input separately since it can't be a map key + String nullResult = hook.sanitizeBashCommandArgument(null); + assertEquals("Null input should return empty string", "", nullResult); + } } diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapperTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapperTest.java index b62fce5c1050..0407ed8bbfde 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapperTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapperTest.java @@ -63,12 +63,15 @@ import com.cloud.agent.api.MigrateCommand.MigrateDiskInfo.DiskType; import com.cloud.agent.api.MigrateCommand.MigrateDiskInfo.DriverType; import com.cloud.agent.api.MigrateCommand.MigrateDiskInfo.Source; +import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.agent.api.to.DpdkTO; +import com.cloud.agent.api.to.GPUDeviceTO; import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; import com.cloud.hypervisor.kvm.resource.LibvirtConnection; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.DiskDef; import com.cloud.utils.exception.CloudRuntimeException; +import org.apache.cloudstack.gpu.GpuDevice; @RunWith(MockitoJUnitRunner.class) public class LibvirtMigrateCommandWrapperTest { @@ -578,6 +581,72 @@ public class LibvirtMigrateCommandWrapperTest { " \n" + "\n"; + private String xmlWithGpuDevices = + "\n" + + " i-2-3-VM\n" + + " 91860126-7dda-4876-ac1e-48d06cd4b2eb\n" + + " 524288\n" + + " 524288\n" + + " 1\n" + + " \n" + + " /usr/libexec/qemu-kvm\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "
\n" + + " \n" + + " \n" + + " \n" + + " \n" + + "
\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + ""; + + private String xmlWithoutGpuDevices = + "\n" + + " i-2-3-VM\n" + + " 91860126-7dda-4876-ac1e-48d06cd4b2eb\n" + + " 524288\n" + + " 524288\n" + + " 1\n" + + " \n" + + " /usr/libexec/qemu-kvm\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + ""; + + private String xmlNoDevicesSection = + "\n" + + " i-2-3-VM\n" + + " 91860126-7dda-4876-ac1e-48d06cd4b2eb\n" + + " 524288\n" + + " 524288\n" + + " 1\n" + + ""; + private Map createMapMigrateStorage(String sourceText, String path) { Map mapMigrateStorage = new HashMap(); @@ -1019,4 +1088,154 @@ public void replaceCdromIsoPathTest() throws ParserConfigurationException, IOExc Assert.assertTrue(finalXml.contains(newIsoVolumePath)); } + + @Test + public void updateGpuDevicesIfNeededTestNoGpuDevice() throws Exception { + Mockito.doReturn(virtualMachineTOMock).when(migrateCommandMock).getVirtualMachine(); + Mockito.doReturn(null).when(virtualMachineTOMock).getGpuDevice(); + + String result = libvirtMigrateCmdWrapper.updateGpuDevicesIfNeeded(migrateCommandMock, xmlWithoutGpuDevices, libvirtComputingResourceMock); + + Assert.assertEquals("XML should remain unchanged when no GPU device is present", xmlWithoutGpuDevices, result); + } + + @Test + public void updateGpuDevicesIfNeededTestNoDevicesSection() throws Exception { + List gpuDevices = createTestMixedGpuDevices(); + GPUDeviceTO gpuDeviceTO = Mockito.mock(GPUDeviceTO.class); + Mockito.doReturn(gpuDevices).when(gpuDeviceTO).getGpuDevices(); + + Mockito.doReturn(virtualMachineTOMock).when(migrateCommandMock).getVirtualMachine(); + Mockito.doReturn(gpuDeviceTO).when(virtualMachineTOMock).getGpuDevice(); + + String result = libvirtMigrateCmdWrapper.updateGpuDevicesIfNeeded(migrateCommandMock, xmlNoDevicesSection, libvirtComputingResourceMock); + + Assert.assertEquals("XML should remain unchanged when no devices section is found", xmlNoDevicesSection, result); + } + + @Test + public void updateGpuDevicesIfNeededTestWithPciDevice() throws Exception { + List gpuDevices = createTestPciGpuDevice(); + GPUDeviceTO gpuDeviceTO = Mockito.mock(GPUDeviceTO.class); + Mockito.doReturn(gpuDevices).when(gpuDeviceTO).getGpuDevices(); + + Mockito.doReturn(virtualMachineTOMock).when(migrateCommandMock).getVirtualMachine(); + Mockito.doReturn(gpuDeviceTO).when(virtualMachineTOMock).getGpuDevice(); + + String result = libvirtMigrateCmdWrapper.updateGpuDevicesIfNeeded(migrateCommandMock, xmlWithGpuDevices, libvirtComputingResourceMock); + + // Verify that old GPU devices are removed and new ones are added + Assert.assertFalse("Old PCI device should be removed", result.contains("bus='0x01' slot='0x00'")); + Assert.assertFalse("Old MDEV device should be removed", result.contains("4b20d080-1b54-4048-85b3-a6a62d165c01")); + Assert.assertTrue("New PCI device should be added", result.contains("bus=\"0x02\"")); + Assert.assertTrue("New PCI device should be added", result.contains("slot=\"0x00\"")); + Assert.assertTrue("PCI device should have vfio driver", result.contains("name=\"vfio\"")); + } + + @Test + public void updateGpuDevicesIfNeededTestWithMdevDevice() throws Exception { + List gpuDevices = createTestMdevGpuDevice(); + GPUDeviceTO gpuDeviceTO = Mockito.mock(GPUDeviceTO.class); + Mockito.doReturn(gpuDevices).when(gpuDeviceTO).getGpuDevices(); + + Mockito.doReturn(virtualMachineTOMock).when(migrateCommandMock).getVirtualMachine(); + Mockito.doReturn(gpuDeviceTO).when(virtualMachineTOMock).getGpuDevice(); + + String result = libvirtMigrateCmdWrapper.updateGpuDevicesIfNeeded(migrateCommandMock, xmlWithGpuDevices, libvirtComputingResourceMock); + + // Verify that old GPU devices are removed and new ones are added + Assert.assertFalse("Old PCI device should be removed", result.contains("bus='0x01' slot='0x00'")); + Assert.assertFalse("Old MDEV device should be removed", result.contains("4b20d080-1b54-4048-85b3-a6a62d165c01")); + Assert.assertTrue("New MDEV device should be added", result.contains("6f20d080-1b54-4048-85b3-a6a62d165c01")); + Assert.assertTrue("MDEV device should have display=off", result.contains("display=\"off\"")); + } + + @Test + public void updateGpuDevicesIfNeededTestWithMixedDevices() throws Exception { + List gpuDevices = createTestMixedGpuDevices(); + GPUDeviceTO gpuDeviceTO = Mockito.mock(GPUDeviceTO.class); + Mockito.doReturn(gpuDevices).when(gpuDeviceTO).getGpuDevices(); + + Mockito.doReturn(virtualMachineTOMock).when(migrateCommandMock).getVirtualMachine(); + Mockito.doReturn(gpuDeviceTO).when(virtualMachineTOMock).getGpuDevice(); + + String result = libvirtMigrateCmdWrapper.updateGpuDevicesIfNeeded(migrateCommandMock, xmlWithGpuDevices, libvirtComputingResourceMock); + + // Verify both PCI and MDEV devices are added + Assert.assertTrue("PCI device should be added", result.contains("bus=\"0x02\"")); + Assert.assertTrue("PCI device should be added", result.contains("slot=\"0x00\"")); + Assert.assertTrue("MDEV device should be added", result.contains("6f20d080-1b54-4048-85b3-a6a62d165c01")); + + // Count hostdev elements to ensure we have both + long hostdevCount = result.lines().filter(line -> line.contains(" gpuDevices = new ArrayList<>(); // Empty list + GPUDeviceTO gpuDeviceTO = Mockito.mock(GPUDeviceTO.class); + Mockito.doReturn(gpuDevices).when(gpuDeviceTO).getGpuDevices(); + + Mockito.doReturn(virtualMachineTOMock).when(migrateCommandMock).getVirtualMachine(); + Mockito.doReturn(gpuDeviceTO).when(virtualMachineTOMock).getGpuDevice(); + + String result = libvirtMigrateCmdWrapper.updateGpuDevicesIfNeeded(migrateCommandMock, xmlWithoutGpuDevices, libvirtComputingResourceMock); + + // Verify all GPU devices are removed + Assert.assertFalse("Old PCI device should be removed", result.contains("bus=\"0x01\"")); + Assert.assertFalse("Old PCI device should be removed", result.contains("slot=\"0x00\"")); + Assert.assertFalse("Old MDEV device should be removed", result.contains("4b20d080-1b54-4048-85b3-a6a62d165c01")); + + // Verify no hostdev elements remain + long hostdevCount = result.lines().filter(line -> line.contains(" createTestPciGpuDevice() { + List devices = new ArrayList<>(); + VgpuTypesInfo pciDevice = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "NVIDIA Corporation Tesla T4", + "passthrough", + "02:00.0", // New bus address for destination host + "10de", + "NVIDIA Corporation", + "1eb8", + "Tesla T4" + ); + pciDevice.setDisplay(false); + devices.add(pciDevice); + return devices; + } + + private List createTestMdevGpuDevice() { + List devices = new ArrayList<>(); + VgpuTypesInfo mdevDevice = new VgpuTypesInfo( + GpuDevice.DeviceType.MDEV, + "nvidia-63", + "GRID T4-2Q", + "6f20d080-1b54-4048-85b3-a6a62d165c01", // New UUID for destination host + "10de", + "NVIDIA Corporation", + "1eb8", + "Tesla T4" + ); + mdevDevice.setDisplay(false); + devices.add(mdevDevice); + return devices; + } + + private List createTestMixedGpuDevices() { + List devices = new ArrayList<>(); + + // Add PCI device + devices.addAll(createTestPciGpuDevice()); + + // Add MDEV device + devices.addAll(createTestMdevGpuDevice()); + + return devices; + } } diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManager.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManager.java index 260ffe594f3c..2c3ebcbd1eba 100644 --- a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManager.java +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManager.java @@ -16,10 +16,13 @@ // under the License. package com.cloud.agent.manager; +import java.util.List; import java.util.Map; import javax.naming.ConfigurationException; +import com.cloud.agent.api.GetGPUStatsCommand; +import com.cloud.agent.api.VgpuTypesInfo; import org.apache.cloudstack.ca.SetupCertificateCommand; import org.apache.cloudstack.ca.SetupKeyStoreCommand; @@ -55,6 +58,10 @@ boolean handleSystemVMStart(long vmId, String privateIpAddress, String privateMa Answer checkHealth(CheckHealthCommand cmd); + Answer getGpuStats(GetGPUStatsCommand cmd, long hostId); + + List getGPUDevices(long hostId); + Answer pingTest(PingTestCommand cmd); Answer setupKeyStore(SetupKeyStoreCommand cmd); diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManagerImpl.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManagerImpl.java index d3d6f646a7e7..1a4d6add968e 100644 --- a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManagerImpl.java +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockAgentManagerImpl.java @@ -21,11 +21,14 @@ import com.cloud.agent.api.CheckHealthCommand; import com.cloud.agent.api.CheckNetworkAnswer; import com.cloud.agent.api.CheckNetworkCommand; +import com.cloud.agent.api.GetGPUStatsAnswer; +import com.cloud.agent.api.GetGPUStatsCommand; import com.cloud.agent.api.GetHostStatsAnswer; import com.cloud.agent.api.GetHostStatsCommand; import com.cloud.agent.api.HostStatsEntry; import com.cloud.agent.api.MaintainAnswer; import com.cloud.agent.api.PingTestCommand; +import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.api.commands.SimulatorAddSecondaryAgent; import com.cloud.dc.DataCenter; @@ -40,10 +43,14 @@ import com.cloud.resource.AgentStorageResource; import com.cloud.resource.Discoverer; import com.cloud.resource.ResourceManager; +import com.cloud.resource.SimulatorDiscoverer; import com.cloud.resource.SimulatorSecondaryDiscoverer; +import com.cloud.simulator.MockGpuDevice; +import com.cloud.simulator.MockGpuDeviceVO; import com.cloud.simulator.MockHost; import com.cloud.simulator.MockHostVO; import com.cloud.simulator.MockVMVO; +import com.cloud.simulator.dao.MockGpuDeviceDao; import com.cloud.simulator.dao.MockHostDao; import com.cloud.simulator.dao.MockVMDao; import com.cloud.user.AccountManager; @@ -61,12 +68,14 @@ import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.diagnostics.DiagnosticsAnswer; import org.apache.cloudstack.diagnostics.DiagnosticsCommand; +import org.apache.cloudstack.gpu.GpuDevice; import org.springframework.stereotype.Component; import javax.inject.Inject; import javax.naming.ConfigurationException; import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -88,6 +97,8 @@ public class MockAgentManagerImpl extends ManagerBase implements MockAgentManage @Inject MockVMDao _mockVmDao = null; @Inject + MockGpuDeviceDao _mockGpuDeviceDao = null; + @Inject SimulatorManager _simulatorMgr = null; @Inject AgentManager _agentMgr = null; @@ -97,7 +108,8 @@ public class MockAgentManagerImpl extends ManagerBase implements MockAgentManage ResourceManager _resourceMgr; @Inject private AccountManager _accountMgr; - SimulatorSecondaryDiscoverer discoverer; + SimulatorDiscoverer discoverer; + SimulatorSecondaryDiscoverer ssDiscoverer; @Inject HostDao hostDao; @@ -184,10 +196,19 @@ public Map> createServerResources(Map> createServerResources(Map params) throws ConfigurationException { try { @@ -371,7 +487,7 @@ public void run() { storageResource.configure("secondaryStorage", params); storageResource.start(); _resources.put(this.guid, storageResource); - discoverer.setResource(storageResource); + ssDiscoverer.setResource(storageResource); SimulatorAddSecondaryAgent cmd = new SimulatorAddSecondaryAgent("sim://" + this.guid, this.dcId); try { _resourceMgr.discoverHosts(cmd); @@ -469,6 +585,72 @@ public Answer checkHealth(CheckHealthCommand cmd) { return new Answer(cmd); } + @Override + public Answer getGpuStats(GetGPUStatsCommand cmd, long hostId) { + return new GetGPUStatsAnswer(cmd, getGPUDevices(hostId)); + } + + @Override + public List getGPUDevices(long hostId) { + List gpuDevices = new ArrayList<>(); + List mockGpuDevices; + // List all mock GPU devices from database + TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.SIMULATOR_DB); + try { + txn.start(); + mockGpuDevices = _mockGpuDeviceDao.listByHostId(hostId); + + if (mockGpuDevices != null && !mockGpuDevices.isEmpty()) { + logger.debug("Found {} mock GPU devices in the database", mockGpuDevices.size()); + + for (MockGpuDeviceVO mockGpuDevice : mockGpuDevices) { + String busAddress = mockGpuDevice.getBusAddress(); + String vendorId = mockGpuDevice.getVendorId(); + String deviceId = mockGpuDevice.getDeviceId(); + String vendorName = mockGpuDevice.getVendorName(); + String deviceName = mockGpuDevice.getDeviceName(); + String modelName = mockGpuDevice.getProfileName(); + boolean isPassthrough = mockGpuDevice.isPassthroughEnabled(); + Long maxVgpuPerGpu = mockGpuDevice.getMaxVgpuPerPgpu(); + Integer numaNode = mockGpuDevice.getNumaNode(); + String pciRoot = mockGpuDevice.getPciRoot(); + + VgpuTypesInfo vgpuTypesInfo = new VgpuTypesInfo(mockGpuDevice.getDeviceType(), + vendorName + " " + deviceName, modelName, busAddress, vendorId, vendorName, + deviceId, deviceName, numaNode.toString(), pciRoot); + vgpuTypesInfo.setPassthroughEnabled(isPassthrough); + vgpuTypesInfo.setMaxVgpuPerGpu(maxVgpuPerGpu); + vgpuTypesInfo.setVideoRam(mockGpuDevice.getVideoRam()); + vgpuTypesInfo.setMaxResolutionX(mockGpuDevice.getMaxResolutionX()); + vgpuTypesInfo.setMaxResolutionY(mockGpuDevice.getMaxResolutionY()); + vgpuTypesInfo.setMaxHeads(mockGpuDevice.getMaxHeads()); + + if (mockGpuDevice.getVmId() != null) { + MockVMVO mockVm = _mockVmDao.findById(mockGpuDevice.getVmId()); + vgpuTypesInfo.setVmName(mockVm.getName()); + } + + if (mockGpuDevice.getParentDeviceId() != null) { + MockGpuDeviceVO parentDevice = _mockGpuDeviceDao.findById(mockGpuDevice.getParentDeviceId()); + if (parentDevice != null) { + vgpuTypesInfo.setParentBusAddress(parentDevice.getBusAddress()); + } + } + gpuDevices.add(vgpuTypesInfo); + } + } + txn.commit(); + } catch (Exception ex) { + txn.rollback(); + throw new CloudRuntimeException("Unable to get GPU devices on hostId " + hostId + " due to " + ex.getMessage(), ex); + } finally { + txn.close(); + txn = TransactionLegacy.open(TransactionLegacy.CLOUD_DB); + txn.close(); + } + return gpuDevices; + } + @Override public Answer pingTest(PingTestCommand cmd) { return new Answer(cmd); @@ -497,12 +679,14 @@ public Answer setupCertificate(SetupCertificateCommand cmd) { public boolean start() { for (Discoverer discoverer : discoverers) { if (discoverer instanceof SimulatorSecondaryDiscoverer) { - this.discoverer = (SimulatorSecondaryDiscoverer)discoverer; - break; + this.ssDiscoverer = (SimulatorSecondaryDiscoverer)discoverer; + } + if (discoverer instanceof SimulatorDiscoverer) { + this.discoverer = (SimulatorDiscoverer)discoverer; } } - if (this.discoverer == null) { + if (this.ssDiscoverer == null) { throw new IllegalStateException("Failed to find SimulatorSecondaryDiscoverer"); } diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockVmManagerImpl.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockVmManagerImpl.java index 953c5792a500..6a088bce4b9e 100644 --- a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockVmManagerImpl.java +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/MockVmManagerImpl.java @@ -25,7 +25,13 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.agent.api.to.GPUDeviceTO; +import com.cloud.simulator.MockGpuDevice; +import com.cloud.simulator.MockGpuDeviceVO; +import com.cloud.simulator.dao.MockGpuDeviceDao; import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; +import org.apache.commons.collections4.CollectionUtils; import org.springframework.stereotype.Component; import com.cloud.agent.api.Answer; @@ -99,6 +105,8 @@ public class MockVmManagerImpl extends ManagerBase implements MockVmManager { @Inject MockHostDao _mockHostDao = null; @Inject + MockGpuDeviceDao _mockGpuDeviceDao = null; + @Inject MockSecurityRulesDao _mockSecurityDao = null; private final Map>> _securityRules = new ConcurrentHashMap>>(); @@ -111,7 +119,7 @@ public boolean configure(final String name, final Map params) th return true; } - public String startVM(final String vmName, final NicTO[] nics, final int cpuHz, final long ramSize, final String bootArgs, final String hostGuid) { + public String startVM(final String vmName, final NicTO[] nics, GPUDeviceTO gpuDeviceTO, final int cpuHz, final long ramSize, final String bootArgs, final String hostGuid) { TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.SIMULATOR_DB); MockHost host = null; @@ -157,6 +165,22 @@ public String startVM(final String vmName, final NicTO[] nics, final int cpuHz, try { txn.start(); vm = _mockVmDao.persist((MockVMVO)vm); + if (gpuDeviceTO != null) { + List gpuDevices = gpuDeviceTO.getGpuDevices(); + for (VgpuTypesInfo gpuDevice : gpuDevices) { + MockGpuDeviceVO mockGpuDevice = _mockGpuDeviceDao.listByHostIdAndBusAddress(host.getId(), gpuDevice.getBusAddress()); + mockGpuDevice.setVmId(vm.getId()); + mockGpuDevice.setState(MockGpuDevice.State.Allocated); + _mockGpuDeviceDao.persist(mockGpuDevice); + } + } else { + List mockGpuDevices = _mockGpuDeviceDao.listByVmId(vm.getId()); + for (MockGpuDeviceVO mockGpuDevice : mockGpuDevices) { + mockGpuDevice.setVmId(null); + mockGpuDevice.setState(MockGpuDevice.State.Available); + _mockGpuDeviceDao.persist(mockGpuDevice); + } + } txn.commit(); } catch (final Exception ex) { txn.rollback(); @@ -331,7 +355,7 @@ public CheckVirtualMachineAnswer checkVmState(final CheckVirtualMachineCommand c @Override public StartAnswer startVM(final StartCommand cmd, final SimulatorInfo info) { final VirtualMachineTO vm = cmd.getVirtualMachine(); - final String result = startVM(vm.getName(), vm.getNics(), vm.getCpus() * vm.getMaxSpeed(), vm.getMaxRam(), vm.getBootArgs(), info.getHostUuid()); + final String result = startVM(vm.getName(), vm.getNics(), vm.getGpuDevice(), vm.getCpus() * vm.getMaxSpeed(), vm.getMaxRam(), vm.getBootArgs(), info.getHostUuid()); if (result != null) { return new StartAnswer(cmd, result); } else { @@ -362,6 +386,33 @@ public MigrateAnswer migrate(final MigrateCommand cmd, final SimulatorInfo info) } vm.setHostId(destHost.getId()); _mockVmDao.update(vm.getId(), vm); + + // Unassign existing GPU Devices + List devices = _mockGpuDeviceDao.listByVmId(vm.getId()); + for (MockGpuDeviceVO mockGpuDevice : devices) { + mockGpuDevice.setVmId(null); + mockGpuDevice.setState(MockGpuDevice.State.Available); + _mockGpuDeviceDao.persist(mockGpuDevice); + } + + // Assign GPU Devices to the new host + GPUDeviceTO gpuDeviceTO = cmd.getVirtualMachine().getGpuDevice(); + if (gpuDeviceTO != null) { + List gpuDevices = gpuDeviceTO.getGpuDevices(); + if (CollectionUtils.isNotEmpty(gpuDevices)) { + for (VgpuTypesInfo gpuDevice : gpuDevices) { + MockGpuDeviceVO mockGpuDevice = _mockGpuDeviceDao.listByHostIdAndBusAddress(destHost.getId(), gpuDevice.getBusAddress()); + if (mockGpuDevice != null) { + mockGpuDevice.setVmId(vm.getId()); + mockGpuDevice.setState(MockGpuDevice.State.Allocated); + _mockGpuDeviceDao.persist(mockGpuDevice); + } else { + return new MigrateAnswer(cmd, false, "No GPU device found on destination host for bus address: " + gpuDevice.getBusAddress(), null); + } + } + } + } + txn.commit(); return new MigrateAnswer(cmd, true, null, 0); } catch (final Exception ex) { @@ -496,6 +547,11 @@ public StopAnswer stopVM(final StopCommand cmd) { final MockVm vm = _mockVmDao.findByVmName(vmName); if (vm != null) { vm.setPowerState(PowerState.PowerOff); + _mockGpuDeviceDao.listByVmId(vm.getId()).forEach(mockGpuDevice -> { + mockGpuDevice.setVmId(null); + mockGpuDevice.setState(MockGpuDevice.State.Available); + _mockGpuDeviceDao.persist(mockGpuDevice); + }); _mockVmDao.update(vm.getId(), (MockVMVO)vm); } diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/SimulatorManagerImpl.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/SimulatorManagerImpl.java index cb8d71985e34..39e504d7f952 100644 --- a/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/SimulatorManagerImpl.java +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/agent/manager/SimulatorManagerImpl.java @@ -26,6 +26,7 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.agent.api.GetGPUStatsCommand; import org.apache.cloudstack.ca.SetupCertificateCommand; import org.apache.cloudstack.ca.SetupKeyStoreCommand; import org.apache.cloudstack.diagnostics.DiagnosticsCommand; @@ -287,8 +288,10 @@ public Answer simulate(final Command cmd, final String hostGuid) { if (answer == null) { if (cmd instanceof GetHostStatsCommand) { answer = _mockAgentMgr.getHostStatistic((GetHostStatsCommand)cmd); + } else if (cmd instanceof GetGPUStatsCommand) { + answer = _mockAgentMgr.getGpuStats((GetGPUStatsCommand) cmd, host.getId()); } else if (cmd instanceof CheckHealthCommand) { - answer = _mockAgentMgr.checkHealth((CheckHealthCommand)cmd); + answer = _mockAgentMgr.checkHealth((CheckHealthCommand) cmd); } else if (cmd instanceof PingTestCommand) { answer = _mockAgentMgr.pingTest((PingTestCommand)cmd); } else if (cmd instanceof SetupKeyStoreCommand) { diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/resource/SimulatorDiscoverer.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/resource/SimulatorDiscoverer.java index 332ac6098033..12489334ab04 100644 --- a/plugins/hypervisors/simulator/src/main/java/com/cloud/resource/SimulatorDiscoverer.java +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/resource/SimulatorDiscoverer.java @@ -300,6 +300,8 @@ public HostVO createHostVOForDirectConnectAgent(HostVO host, StartupCommand[] st return null; } + ssCmd.setGpuDevices(_mockAgentMgr.getGPUDevices(host.getId())); + return _resourceMgr.fillRoutingHostVO(host, ssCmd, HypervisorType.Simulator, details, hostTags); } diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDevice.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDevice.java new file mode 100644 index 000000000000..66ed29cdb254 --- /dev/null +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDevice.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.simulator; + +public interface MockGpuDevice { + + enum State { + Allocated, Available + } + + long getId(); + + String getBusAddress(); + + String getVendorId(); + + String getDeviceId(); + + String getVendorName(); + + String getDeviceName(); + + Long getHostId(); + + Long getVmId(); + + State getState(); +} diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDeviceVO.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDeviceVO.java new file mode 100644 index 000000000000..9def4cd8f2c6 --- /dev/null +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/MockGpuDeviceVO.java @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.simulator; + +import org.apache.cloudstack.api.InternalIdentity; +import org.apache.cloudstack.gpu.GpuDevice; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; + +@Entity +@Table(name = "mockgpudevice") +public class MockGpuDeviceVO implements MockGpuDevice, InternalIdentity { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id") + private long id; + + // PCI address for parent devices, MDEV UUID for MDEV devices, VF PCI address for VF devices + @Column(name = "bus_address", nullable = false) + private String busAddress; + + @Column(name = "vendor_id", nullable = false) + private String vendorId; + + @Column(name = "device_id", nullable = false) + private String deviceId; + + @Column(name = "vendor_name", nullable = false) + private String vendorName; + + @Column(name = "device_name", nullable = false) + private String deviceName; + + @Column(name = "host_id") + private Long hostId; + + @Column(name = "vm_id") + private Long vmId; + + @Column(name = "max_vgpu_per_pgpu", nullable = false) + private Long maxVgpuPerPgpu = 1L; + + @Column(name = "video_ram", nullable = false) + private Long videoRam = 0L; + + @Column(name = "max_resolution_x", nullable = false) + private Long maxResolutionX = 0L; + + @Column(name = "max_resolution_y", nullable = false) + private Long maxResolutionY = 0L; + + @Column(name = "max_heads", nullable = false) + private Long maxHeads = 1L; + + @Column(name = "state") + @Enumerated(EnumType.STRING) + private State state; + + @Column(name = "device_type") + @Enumerated(EnumType.STRING) + private GpuDevice.DeviceType deviceType = GpuDevice.DeviceType.PCI; + + @Column(name = "parent_device_id") + private Long parentDeviceId; + + @Column(name = "profile_name") + private String profileName; + + @Column(name = "passthrough_enabled") + private boolean passthroughEnabled = true; + + @Column(name = "numa_node") + private Integer numaNode; + + @Column(name = "pci_root") + private String pciRoot; + + public MockGpuDeviceVO() { + } + + public MockGpuDeviceVO(String busAddress, String vendorId, String deviceId, String vendorName, String deviceName, + Long hostId) { + this.busAddress = busAddress; + this.vendorId = vendorId; + this.deviceId = deviceId; + this.vendorName = vendorName; + this.deviceName = deviceName; + this.hostId = hostId; + this.state = State.Available; + this.deviceType = GpuDevice.DeviceType.PCI; + this.profileName = "passthrough"; + this.passthroughEnabled = true; + } + + @Override + public long getId() { + return id; + } + + public void setId(long id) { + this.id = id; + } + + @Override + public String getBusAddress() { + return busAddress; + } + + public void setBusAddress(String busAddress) { + this.busAddress = busAddress; + } + + @Override + public String getVendorId() { + return vendorId; + } + + public void setVendorId(String vendorId) { + this.vendorId = vendorId; + } + + @Override + public String getDeviceId() { + return deviceId; + } + + public void setDeviceId(String deviceId) { + this.deviceId = deviceId; + } + + @Override + public String getVendorName() { + return vendorName; + } + + public void setVendorName(String vendorName) { + this.vendorName = vendorName; + } + + @Override + public String getDeviceName() { + return deviceName; + } + + public void setDeviceName(String deviceName) { + this.deviceName = deviceName; + } + + @Override + public Long getHostId() { + return hostId; + } + + public void setHostId(Long hostId) { + this.hostId = hostId; + } + + @Override + public Long getVmId() { + return vmId; + } + + public void setVmId(Long vmId) { + this.vmId = vmId; + } + + @Override + public State getState() { + return state; + } + + public void setState(State state) { + this.state = state; + } + + public Long getMaxVgpuPerPgpu() { + return maxVgpuPerPgpu; + } + + public void setMaxVgpuPerPgpu(Long maxVgpuPerGpu) { + this.maxVgpuPerPgpu = maxVgpuPerGpu; + } + + public Long getVideoRam() { + return videoRam; + } + + public void setVideoRam(Long videoRam) { + this.videoRam = videoRam; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public void setMaxResolutionX(Long maxResolutionX) { + this.maxResolutionX = maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public void setMaxResolutionY(Long maxResolutionY) { + this.maxResolutionY = maxResolutionY; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public void setMaxHeads(Long maxHeads) { + this.maxHeads = maxHeads; + } + + public GpuDevice.DeviceType getDeviceType() { + return deviceType; + } + + public void setDeviceType(GpuDevice.DeviceType deviceType) { + this.deviceType = deviceType; + } + + public Long getParentDeviceId() { + return parentDeviceId; + } + + public void setParentDeviceId(Long parentDeviceId) { + this.parentDeviceId = parentDeviceId; + } + + public String getProfileName() { + return profileName; + } + + public void setProfileName(String profileName) { + this.profileName = profileName; + } + + public boolean isPassthroughEnabled() { + return passthroughEnabled; + } + + public void setPassthroughEnabled(boolean passthroughEnabled) { + this.passthroughEnabled = passthroughEnabled; + } + + public Integer getNumaNode() { + return numaNode; + } + + public void setNumaNode(Integer numaNode) { + this.numaNode = numaNode; + } + + public String getPciRoot() { + return pciRoot; + } + + public void setPciRoot(String pciRoot) { + this.pciRoot = pciRoot; + } + + /** + * Helper method to get the MDEV UUID (when device_type is MDEV) + * + * @return MDEV UUID or null if not an MDEV device + */ + public String getMdevUuid() { + return GpuDevice.DeviceType.MDEV.equals(this.deviceType) ? this.busAddress : null; + } + + /** + * Helper method to get the VF PCI address (when device_type is PCI and has + * parent) + * + * @return VF PCI address or null if not a VF device + */ + public String getVfPciAddress() { + return GpuDevice.DeviceType.PCI.equals(this.deviceType) && this.parentDeviceId != null ? this.busAddress : null; + } + + /** + * Helper method to get the parent PCI bus address (when device_type is PCI and + * no parent) + * + * @return Parent PCI bus address or null if not a parent device + */ + public String getParentPciBusAddress() { + return GpuDevice.DeviceType.PCI.equals(this.deviceType) && this.parentDeviceId == null ? this.busAddress : null; + } +} diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/dao/MockGpuDeviceDao.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/dao/MockGpuDeviceDao.java new file mode 100644 index 000000000000..cb952535d199 --- /dev/null +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/dao/MockGpuDeviceDao.java @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.simulator.dao; + +import com.cloud.simulator.MockGpuDeviceVO; +import com.cloud.utils.db.GenericDao; + +import java.util.List; + +public interface MockGpuDeviceDao extends GenericDao { + + /** + * Find GPU device by bus address + * + * @param busAddress the bus address + * @return the GPU device or null if not found + */ + MockGpuDeviceVO listByHostIdAndBusAddress(long hostId, String busAddress); + + /** + * List GPU devices by host ID + * + * @param hostId the host ID + * @return list of GPU devices + */ + List listByHostId(Long hostId); + + List listByVmId(Long vmId); +} diff --git a/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/dao/MockGpuDeviceDaoImpl.java b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/dao/MockGpuDeviceDaoImpl.java new file mode 100644 index 000000000000..90f6a1d0edbb --- /dev/null +++ b/plugins/hypervisors/simulator/src/main/java/com/cloud/simulator/dao/MockGpuDeviceDaoImpl.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.simulator.dao; + +import com.cloud.simulator.MockGpuDeviceVO; +import com.cloud.utils.db.Filter; +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component +public class MockGpuDeviceDaoImpl extends GenericDaoBase implements MockGpuDeviceDao { + + private final SearchBuilder allFieldSearch; + + public MockGpuDeviceDaoImpl() { + allFieldSearch = createSearchBuilder(); + allFieldSearch.and("busAddress", allFieldSearch.entity().getBusAddress(), SearchCriteria.Op.EQ); + allFieldSearch.and("hostId", allFieldSearch.entity().getHostId(), SearchCriteria.Op.EQ); + allFieldSearch.and("vendorName", allFieldSearch.entity().getVendorName(), SearchCriteria.Op.EQ); + allFieldSearch.and("vendorId", allFieldSearch.entity().getVendorId(), SearchCriteria.Op.EQ); + allFieldSearch.and("deviceId", allFieldSearch.entity().getDeviceId(), SearchCriteria.Op.EQ); + allFieldSearch.and("deviceName", allFieldSearch.entity().getDeviceName(), SearchCriteria.Op.EQ); + allFieldSearch.and("state", allFieldSearch.entity().getState(), SearchCriteria.Op.EQ); + allFieldSearch.and("vmId", allFieldSearch.entity().getVmId(), SearchCriteria.Op.EQ); + allFieldSearch.done(); + } + + @Override + public MockGpuDeviceVO listByHostIdAndBusAddress(long hostId, String busAddress) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters("hostId", hostId); + sc.setParameters("busAddress", busAddress); + return findOneBy(sc); + } + + @Override + public List listByHostId(Long hostId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters("hostId", hostId); + return search(sc, new Filter(MockGpuDeviceVO.class, "id", true)); + } + + @Override + public List listByVmId(Long vmId) { + SearchCriteria sc = allFieldSearch.create(); + sc.setParameters("vmId", vmId); + return search(sc, null); + } +} diff --git a/plugins/hypervisors/simulator/src/main/resources/META-INF/cloudstack/core/spring-simulator-core-context.xml b/plugins/hypervisors/simulator/src/main/resources/META-INF/cloudstack/core/spring-simulator-core-context.xml index e0ed6066bc52..c085e309aac6 100644 --- a/plugins/hypervisors/simulator/src/main/resources/META-INF/cloudstack/core/spring-simulator-core-context.xml +++ b/plugins/hypervisors/simulator/src/main/resources/META-INF/cloudstack/core/spring-simulator-core-context.xml @@ -29,6 +29,7 @@ + diff --git a/plugins/integrations/prometheus/src/main/java/org/apache/cloudstack/metrics/PrometheusExporterImpl.java b/plugins/integrations/prometheus/src/main/java/org/apache/cloudstack/metrics/PrometheusExporterImpl.java index 32ec2f532111..8908bfa000f8 100644 --- a/plugins/integrations/prometheus/src/main/java/org/apache/cloudstack/metrics/PrometheusExporterImpl.java +++ b/plugins/integrations/prometheus/src/main/java/org/apache/cloudstack/metrics/PrometheusExporterImpl.java @@ -394,6 +394,7 @@ private void addVlanMetrics(final List metricsList, final long dcId, final private void addDomainLimits(final List metricsList) { Long totalCpuLimit = 0L; Long totalMemoryLimit = 0L; + Long totalGpuLimit = 0L; for (final DomainJoinVO domain: domainDao.listAll()) { if (domain == null || domain.getLevel() != 1) { @@ -411,6 +412,12 @@ private void addDomainLimits(final List metricsList) { totalMemoryLimit += memoryLimit; } + long gpuLimit = ApiDBUtils.findCorrectResourceLimitForDomain(domain.getGpuLimit(), false, + Resource.ResourceType.gpu, domain.getId()); + if (gpuLimit > 0) { + totalGpuLimit += gpuLimit; + } + long primaryStorageLimit = ApiDBUtils.findCorrectResourceLimitForDomain(domain.getPrimaryStorageLimit(), false, Resource.ResourceType.primary_storage, domain.getId()); long secondaryStorageLimit = ApiDBUtils.findCorrectResourceLimitForDomain(domain.getSecondaryStorageLimit(), false, @@ -419,6 +426,7 @@ private void addDomainLimits(final List metricsList) { // Add per domain cpu, memory and storage count metricsList.add(new ItemPerDomainResourceLimit(cpuLimit, domain.getPath(), Resource.ResourceType.cpu.getName())); metricsList.add(new ItemPerDomainResourceLimit(memoryLimit, domain.getPath(), Resource.ResourceType.memory.getName())); + metricsList.add(new ItemPerDomainResourceLimit(gpuLimit, domain.getPath(), Resource.ResourceType.gpu.getName())); metricsList.add(new ItemPerDomainResourceLimit(primaryStorageLimit, domain.getPath(), Resource.ResourceType.primary_storage.getName())); metricsList.add(new ItemPerDomainResourceLimit(secondaryStorageLimit, domain.getPath(), Resource.ResourceType.secondary_storage.getName())); } @@ -442,6 +450,8 @@ private void addDomainResourceCount(final List metricsList) { Resource.ResourceType.memory, null); long cpuUsed = _resourceCountDao.getResourceCount(domain.getId(), Resource.ResourceOwnerType.Domain, Resource.ResourceType.cpu, null); + long gpuUsed = _resourceCountDao.getResourceCount(domain.getId(), Resource.ResourceOwnerType.Domain, + Resource.ResourceType.gpu, null); long primaryStorageUsed = _resourceCountDao.getResourceCount(domain.getId(), Resource.ResourceOwnerType.Domain, Resource.ResourceType.primary_storage, null); long secondaryStorageUsed = _resourceCountDao.getResourceCount(domain.getId(), Resource.ResourceOwnerType.Domain, @@ -449,6 +459,7 @@ private void addDomainResourceCount(final List metricsList) { metricsList.add(new ItemPerDomainResourceCount(memoryUsed, domain.getPath(), Resource.ResourceType.memory.getName())); metricsList.add(new ItemPerDomainResourceCount(cpuUsed, domain.getPath(), Resource.ResourceType.cpu.getName())); + metricsList.add(new ItemPerDomainResourceCount(gpuUsed, domain.getPath(), Resource.ResourceType.gpu.getName())); metricsList.add(new ItemPerDomainResourceCount(primaryStorageUsed, domain.getPath(), Resource.ResourceType.primary_storage.getName())); metricsList.add(new ItemPerDomainResourceCount(secondaryStorageUsed, domain.getPath(), diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh new file mode 100755 index 000000000000..67627deef571 --- /dev/null +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -0,0 +1,734 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Enumerate GPUs (NVIDIA, Intel, AMD) and output JSON for libvirt, +# including: +# - PCI metadata (address, vendor/device IDs, driver, pci_class) +# - IOMMU group +# - PCI root (for PCIe topology grouping) +# - NUMA node +# - SR-IOV VF counts +# - full_passthrough block (with VM usage) +# - vGPU (MDEV) instances (fetching profile “name” and “max_instance” from description) +# - VF (SR-IOV / MIG) instances (with VM usage) +# +# Uses `lspci -nnm` for GPU discovery and `virsh` to detect VM attachments. +# Compatible with Ubuntu (20.04+, 22.04+) and RHEL/CentOS (7/8), Bash ≥4. +# +# +# Sample JSON: +# { +# "gpus": [ +# { +# "pci_address": "00:03.0", +# "vendor_id": "10de", +# "device_id": "2484", +# "vendor": "NVIDIA Corporation", +# "device": "GeForce RTX 3070", +# "driver": "nvidia", +# "pci_class": "VGA compatible controller", +# "iommu_group": "8", +# "sriov_totalvfs": 0, +# "sriov_numvfs": 0, + +# "full_passthrough": { +# "enabled": true, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x03", +# "function": "0x0" +# }, +# "used_by_vm": "win10" +# }, + +# "vgpu_instances": [], + +# "vf_instances": [] +# }, +# { +# "pci_address": "00:AF.0", +# "vendor_id": "10de", +# "device_id": "1EB8", +# "vendor": "NVIDIA Corporation", +# "device": "Tesla T4", +# "driver": "nvidia", +# "pci_class": "3D controller", +# "iommu_group": "12", +# "sriov_totalvfs": 0, +# "sriov_numvfs": 0, + +# "full_passthrough": { +# "enabled": false, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0xAF", +# "function": "0x0" +# }, +# "used_by_vm": null +# }, + +# "vgpu_instances": [ +# { +# "mdev_uuid": "a1b2c3d4-5678-4e9a-8b0c-d1e2f3a4b5c6", +# "profile_name": "grid_t4-16c", +# "max_instances": 4, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0xAF", +# "function": "0x0" +# }, +# "used_by_vm": "vm1" +# }, +# { +# "mdev_uuid": "b2c3d4e5-6789-4f0a-9c1d-e2f3a4b5c6d7", +# "profile_name": "grid_t4-8c", +# "max_instances": 8, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0xAF", +# "function": "0x1" +# }, +# "used_by_vm": "vm2" +# } +# ], + +# "vf_instances": [] +# }, +# { +# "pci_address": "00:65.0", +# "vendor_id": "10de", +# "device_id": "20B0", +# "vendor": "NVIDIA Corporation", +# "device": "A100-SXM4-40GB", +# "driver": "nvidia", +# "pci_class": "VGA compatible controller", +# "iommu_group": "15", +# "sriov_totalvfs": 7, +# "sriov_numvfs": 7, + +# "full_passthrough": { +# "enabled": false, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x65", +# "function": "0x0" +# }, +# "used_by_vm": null +# }, + +# "vgpu_instances": [ +# { +# "mdev_uuid": "f4a2c8de-1234-4b3a-8c9d-0a1b2c3d4e5f", +# "profile_name": "grid_a100-8c", +# "max_instances": 8, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x65", +# "function": "0x0" +# }, +# "used_by_vm": null +# }, +# { +# "mdev_uuid": "e5b3d9ef-5678-4c2b-9d0e-1b2c3d4e5f6a", +# "profile_name": "grid_a100-5c", +# "max_instances": 5, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x65", +# "function": "0x1" +# }, +# "used_by_vm": null +# } +# ], + +# "vf_instances": [ +# { +# "vf_pci_address": "65:00.2", +# "vf_profile": "1g.5gb", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x65", +# "function": "0x2" +# }, +# "used_by_vm": "ml" +# }, +# { +# "vf_pci_address": "65:00.3", +# "vf_profile": "2g.10gb", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x65", +# "function": "0x3" +# }, +# "used_by_vm": null +# } +# ] +# }, +# { +# "pci_address": "00:02.0", +# "vendor_id": "8086", +# "device_id": "46A6", +# "vendor": "Intel Corporation", +# "device": "Alder Lake-P GT2 [Iris Xe Graphics]", +# "driver": "i915", +# "pci_class": "VGA compatible controller", +# "iommu_group": "0", +# "sriov_totalvfs": 4, +# "sriov_numvfs": 4, + +# "full_passthrough": { +# "enabled": false, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x02", +# "function": "0x0" +# }, +# "used_by_vm": null +# }, + +# "vgpu_instances": [ +# { +# "mdev_uuid": "b7c8d9fe-1111-2222-3333-444455556666", +# "profile_name": "i915-GVTg_V5_4", +# "max_instances": 4, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x02", +# "function": "0x0" +# }, +# "used_by_vm": null +# }, +# { +# "mdev_uuid": "c8d9e0af-7777-8888-9999-000011112222", +# "profile_name": "i915-GVTg_V5_8", +# "max_instances": 8, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x02", +# "function": "0x1" +# }, +# "used_by_vm": null +# } +# ], + +# "vf_instances": [ +# { +# "vf_pci_address": "00:02.1", +# "vf_profile": "Intel SR-IOV VF 1", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x02", +# "function": "0x1" +# }, +# "used_by_vm": "linux01" +# }, +# { +# "vf_pci_address": "00:02.2", +# "vf_profile": "Intel SR-IOV VF 2", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x02", +# "function": "0x2" +# }, +# "used_by_vm": null +# } +# ] +# }, +# { +# "pci_address": "00:03.0", +# "vendor_id": "1002", +# "device_id": "7340", +# "vendor": "AMD", +# "device": "Instinct MI210", +# "driver": "amdgpu", +# "pci_class": "3D controller", +# "iommu_group": "8", +# "sriov_totalvfs": 8, +# "sriov_numvfs": 8, + +# "full_passthrough": { +# "enabled": false, +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x03", +# "function": "0x0" +# }, +# "used_by_vm": null +# }, + +# "vgpu_instances": [], + +# "vf_instances": [ +# { +# "vf_pci_address": "03:00.1", +# "vf_profile": "mi210-4c", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x03", +# "function": "0x1" +# }, +# "used_by_vm": null +# }, +# { +# "vf_pci_address": "03:00.2", +# "vf_profile": "mi210-2c", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x03", +# "function": "0x2" +# }, +# "used_by_vm": null +# }, +# { +# "vf_pci_address": "03:00.3", +# "vf_profile": "mi210-1c", +# "libvirt_address": { +# "domain": "0x0000", +# "bus": "0x00", +# "slot": "0x03", +# "function": "0x3" +# }, +# "used_by_vm": null +# } +# ] +# } +# ] +# } +# + +set -euo pipefail + +# === Utility Functions === + +# Escape a string for JSON +json_escape() { + local str="$1" + str=${str//\\/\\\\} + str=${str//\"/\\\"} + str=${str// +/\\n} + str=${str// +/\\r} + str=${str// /\\t} + printf '"%s"' "$str" +} + +# Cache for nodedev XML data to avoid repeated virsh calls +declare -A nodedev_cache + +# Get nodedev name for a PCI address (e.g. "00:02.0" -> "pci_0000_00_02_0") +get_nodedev_name() { + local addr="$1" + echo "pci_$(echo "$addr" | sed 's/[:.]/\_/g' | sed 's/^/0000_/')" +} + +# Get cached nodedev XML for a PCI address +get_nodedev_xml() { + local addr="$1" + local nodedev_name + nodedev_name=$(get_nodedev_name "$addr") + + if [[ -z "${nodedev_cache[$nodedev_name]:-}" ]]; then + if nodedev_cache[$nodedev_name]=$(virsh nodedev-dumpxml "$nodedev_name" 2>/dev/null); then + true # Cache populated successfully + else + nodedev_cache[$nodedev_name]="" # Cache empty result to avoid retries + fi + fi + + echo "${nodedev_cache[$nodedev_name]}" +} + +# Given a PCI address (e.g. "00:02.0"), return its IOMMU group or "null" +get_iommu_group() { + local addr="$1" + local xml + xml=$(get_nodedev_xml "$addr") + local group + group=$(echo "$xml" | xmlstarlet sel -t -v "//iommuGroup/@number" 2>/dev/null || true) + echo "${group:-null}" +} + +# Given a PCI address, output "TOTALVFS NUMVFS" +get_sriov_counts() { + local addr="$1" + local xml + xml=$(get_nodedev_xml "$addr") + + local totalvfs=0 + local numvfs=0 + + if [[ -n "$xml" ]]; then + # Check for SR-IOV capability before parsing + local cap_xml + cap_xml=$(echo "$xml" | xmlstarlet sel -t -c "//capability[@type='virt_functions']" 2>/dev/null || true) + + if [[ -n "$cap_xml" ]]; then + totalvfs=$(echo "$cap_xml" | xmlstarlet sel -t -v "/capability/@maxCount" 2>/dev/null || true) + numvfs=$(echo "$cap_xml" | xmlstarlet sel -t -v "count(/capability/address)" 2>/dev/null || true) + fi + fi + + echo "${totalvfs:-0} ${numvfs:-0}" +} + +# Given a PCI address, return its NUMA node (or -1 if none) +get_numa_node() { + local addr="$1" + local xml + xml=$(get_nodedev_xml "$addr") + local node + node=$(echo "$xml" | xmlstarlet sel -t -v "//numa/@node" 2>/dev/null || true) + echo "${node:--1}" +} + +# Given a PCI address, return its PCI root (the top‐level bridge ID, e.g. "0000:00:03") +get_pci_root() { + local addr="$1" + local xml + xml=$(get_nodedev_xml "$addr") + + if [[ -n "$xml" ]]; then + # Extract the parent device from XML + local parent + parent=$(echo "$xml" | xmlstarlet sel -t -v "/device/parent" 2>/dev/null || true) + if [[ -n "$parent" ]]; then + # If parent is a PCI device, recursively find its root + if [[ $parent =~ ^pci_0000_([0-9A-Fa-f]{2})_([0-9A-Fa-f]{2})_([0-9A-Fa-f])$ ]]; then + local parent_addr="${BASH_REMATCH[1]}:${BASH_REMATCH[2]}.${BASH_REMATCH[3]}" + get_pci_root "$parent_addr" + return + else + # Parent is not PCI device, so current device is the root + echo "0000:$addr" + return + fi + fi + fi + + # fallback + echo "0000:$addr" +} + +# Build VM → hostdev maps: +# pci_to_vm[BDF] = VM name that attaches that BDF +# mdev_to_vm[UUID] = VM name that attaches that MDEV UUID +declare -A pci_to_vm mdev_to_vm + +# Gather all VM names (including inactive) +mapfile -t VMS < <(virsh list --all --name | grep -v '^$') +for VM in "${VMS[@]}"; do + # Skip if dumpxml fails + if ! xml=$(virsh dumpxml "$VM" 2>/dev/null); then + continue + fi + + # -- PCI hostdevs: use xmlstarlet to extract BDF for all PCI host devices -- + while read -r bus slot func; do + [[ -n "$bus" && -n "$slot" && -n "$func" ]] || continue + # Format to match lspci output (e.g., 01:00.0) by padding with zeros + bus_fmt=$(printf "%02x" "0x$bus") + slot_fmt=$(printf "%02x" "0x$slot") + func_fmt=$(printf "%x" "0x$func") + BDF="$bus_fmt:$slot_fmt.$func_fmt" + pci_to_vm["$BDF"]="$VM" + done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='pci']/source/address" \ + -v "substring-after(@bus, '0x')" -o " " \ + -v "substring-after(@slot, '0x')" -o " " \ + -v "substring-after(@function, '0x')" -n 2>/dev/null || true) + + # -- MDEV hostdevs: use xmlstarlet to extract UUIDs -- + while IFS= read -r UUID; do + [[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM" + done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']" -v "@uuid" -n 2>/dev/null || true) +done + +# Helper: convert a VM name to JSON value (quoted string or null) +to_json_vm() { + local vm="$1" + if [[ -z "$vm" ]]; then + echo "null" + else + json_escape "$vm" + fi +} + +# Parse a "description" file for GPU properties and set global variables +# Expects one argument: the path to the description file +parse_and_add_gpu_properties() { + local desc_file="$1" + # Reset properties to null defaults + MAX_INSTANCES="null" + VIDEO_RAM="null" + MAX_HEADS="null" + MAX_RESOLUTION_X="null" + MAX_RESOLUTION_Y="null" + + if [[ -f "$desc_file" ]]; then + local desc + desc=$(<"$desc_file") + if [[ $desc =~ max_instance=([0-9]+) ]]; then + MAX_INSTANCES="${BASH_REMATCH[1]}" + fi + if [[ $desc =~ framebuffer=([0-9]+)M? ]]; then # Support with or without 'M' suffix + VIDEO_RAM="${BASH_REMATCH[1]}" + fi + if [[ $desc =~ num_heads=([0-9]+) ]]; then + MAX_HEADS="${BASH_REMATCH[1]}" + fi + if [[ $desc =~ max_resolution=([0-9]+)x([0-9]+) ]]; then + MAX_RESOLUTION_X="${BASH_REMATCH[1]}" + MAX_RESOLUTION_Y="${BASH_REMATCH[2]}" + fi + fi +} + +# === GPU Discovery === + +mapfile -t LINES < <(lspci -nnm) + +echo '{ "gpus": [' + +first_gpu=true +for LINE in "${LINES[@]}"; do + # Parse lspci -nnm fields: SLOT "CLASS [CODE]" "VENDOR [VID]" "DEVICE [DID]" ... + if [[ $LINE =~ ^([^[:space:]]+)[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\" ]]; then + PCI_ADDR="${BASH_REMATCH[1]}" + PCI_CLASS="${BASH_REMATCH[2]}" + VENDOR_FIELD="${BASH_REMATCH[3]}" + DEVICE_FIELD="${BASH_REMATCH[4]}" + else + continue + fi + + # If this is a VF, skip it. It will be processed under its PF. + if [[ -e "/sys/bus/pci/devices/0000:$PCI_ADDR/physfn" ]]; then + continue + fi + + # Only process GPU classes (3D controller) + if [[ ! "$PCI_CLASS" =~ (3D\ controller) ]]; then + continue + fi + + # Extract vendor name and ID + VENDOR=$(sed -E 's/ \[[0-9A-Fa-f]{4}\]$//' <<<"$VENDOR_FIELD") + VENDOR_ID=$(sed -E 's/.*\[([0-9A-Fa-f]{4})\]$/\1/' <<<"$VENDOR_FIELD") + # Extract device name and ID + DEVICE=$(sed -E 's/ \[[0-9A-Fa-f]{4}\]$//' <<<"$DEVICE_FIELD") + DEVICE_ID=$(sed -E 's/.*\[([0-9A-Fa-f]{4})\]$/\1/' <<<"$DEVICE_FIELD") + + # Kernel driver + DRV_PATH="/sys/bus/pci/devices/0000:$PCI_ADDR/driver" + if [[ -L $DRV_PATH ]]; then + DRIVER=$(basename "$(readlink "$DRV_PATH")") + else + DRIVER="unknown" + fi + + # IOMMU group + IOMMU=$(get_iommu_group "$PCI_ADDR") + + # PCI root (to group GPUs under same PCIe switch/root complex) + PCI_ROOT=$(get_pci_root "$PCI_ADDR") + + # NUMA node + NUMA_NODE=$(get_numa_node "$PCI_ADDR") + + # SR-IOV counts + read -r TOTALVFS NUMVFS < <(get_sriov_counts "$PCI_ADDR") + + # Get Physical GPU properties from its own description file, if available + PF_DESC_PATH="/sys/bus/pci/devices/0000:$PCI_ADDR/description" + parse_and_add_gpu_properties "$PF_DESC_PATH" + # Save physical function's properties before they are overwritten by vGPU/VF processing + PF_MAX_INSTANCES=$MAX_INSTANCES + PF_VIDEO_RAM=$VIDEO_RAM + PF_MAX_HEADS=$MAX_HEADS + PF_MAX_RESOLUTION_X=$MAX_RESOLUTION_X + PF_MAX_RESOLUTION_Y=$MAX_RESOLUTION_Y + + # === full_passthrough usage === + raw="${pci_to_vm[$PCI_ADDR]:-}" + FULL_USED_JSON=$(to_json_vm "$raw") + + # === vGPU (MDEV) instances === + VGPU_ARRAY="[]" + declare -a vlist=() + MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types" + if [[ -d "$MDEV_BASE" ]]; then + for PROF_DIR in "$MDEV_BASE"/*; do + [[ -d "$PROF_DIR" ]] || continue + + # Read the human-readable profile name from the 'name' file + if [[ -f "$PROF_DIR/name" ]]; then + PROFILE_NAME=$(<"$PROF_DIR/name") + else + PROFILE_NAME=$(basename "$PROF_DIR") + fi + + # Fetch max_instance from the description file, if present + parse_and_add_gpu_properties "$PROF_DIR/description" + + # Under each profile, existing UUIDs appear in: + # /sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types//devices/* + DEVICE_DIR="$PROF_DIR/devices" + if [[ -d "$DEVICE_DIR" ]]; then + for UDIR in "$DEVICE_DIR"/*; do + [[ -d $UDIR ]] || continue + MDEV_UUID=$(basename "$UDIR") + + # libvirt_address uses PF BDF + DOMAIN="0x0000" + BUS="0x${PCI_ADDR:0:2}" + SLOT="0x${PCI_ADDR:3:2}" + FUNC="0x${PCI_ADDR:6:1}" + + # Determine which VM uses this UUID + raw="${mdev_to_vm[$MDEV_UUID]:-}" + USED_JSON=$(to_json_vm "$raw") + + vlist+=( + "{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") + done + fi + done + if [ ${#vlist[@]} -gt 0 ]; then + VGPU_ARRAY="[$( + IFS=, + echo "${vlist[*]}" + )]" + fi + fi + + # === VF instances (SR-IOV / MIG) === + VF_ARRAY="[]" + declare -a flist=() + if ((TOTALVFS > 0)); then + for VF_LINK in /sys/bus/pci/devices/0000:"$PCI_ADDR"/virtfn*; do + [[ -L $VF_LINK ]] || continue + VF_PATH=$(readlink -f "$VF_LINK") + VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2" + VF_BDF="${VF_ADDR:5}" # "65:00.2" + + DOMAIN="0x0000" + BUS="0x${VF_BDF:0:2}" + SLOT="0x${VF_BDF:3:2}" + FUNC="0x${VF_BDF:6:1}" + + # Determine vf_profile + VF_PROFILE="" + if VF_LINE=$(lspci -nnm -s "$VF_BDF" 2>/dev/null); then + if [[ $VF_LINE =~ \"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\" ]]; then + VF_DEVICE_FIELD="${BASH_REMATCH[4]}" + VF_PROFILE=$(sed -E 's/ \[[0-9A-Fa-f]{4}\]$//' <<<"$VF_DEVICE_FIELD") + fi + fi + VF_PROFILE_JSON=$(json_escape "$VF_PROFILE") + + # Determine which VM uses this VF_BDF + raw="${pci_to_vm[$VF_BDF]:-}" + USED_JSON=$(to_json_vm "$raw") + + flist+=( + "{\"vf_pci_address\":\"$VF_BDF\",\"vf_profile\":$VF_PROFILE_JSON,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") + done + if [ ${#flist[@]} -gt 0 ]; then + VF_ARRAY="[$( + IFS=, + echo "${flist[*]}" + )]" + fi + fi + + # === full_passthrough block === + # If vgpu_instances and vf_instances are empty, we can assume full passthrough + FP_ENABLED=0 + if [[ ${#vlist[@]} -eq 0 && ${#flist[@]} -eq 0 ]]; then + FP_ENABLED=1 + fi + DOMAIN="0x0000" + BUS="0x${PCI_ADDR:0:2}" + SLOT="0x${PCI_ADDR:3:2}" + FUNC="0x${PCI_ADDR:6:1}" + + # Emit JSON + if $first_gpu; then + first_gpu=false + else + echo "," + fi + + cat < allocateTo(VirtualMachineProfile vmProfile, DeploymentPlan pla avoid.addHost(host.getId()); } - return allocateTo(plan, offering, template, avoid, clusterHosts, returnUpTo, considerReservedCapacity, account); + return allocateTo(vmProfile, plan, offering, template, avoid, clusterHosts, returnUpTo, considerReservedCapacity, account); } @Override @@ -285,13 +289,13 @@ public List allocateTo(VirtualMachineProfile vmProfile, DeploymentPlan pla hostsCopy.addAll(_hostDao.findHostsWithTagRuleThatMatchComputeOferringTags(hostTagOnOffering)); if (!hostsCopy.isEmpty()) { - suitableHosts = allocateTo(plan, offering, template, avoid, hostsCopy, returnUpTo, considerReservedCapacity, account); + suitableHosts = allocateTo(vmProfile, plan, offering, template, avoid, hostsCopy, returnUpTo, considerReservedCapacity, account); } return suitableHosts; } - protected List allocateTo(DeploymentPlan plan, ServiceOffering offering, VMTemplateVO template, ExcludeList avoid, List hosts, int returnUpTo, + protected List allocateTo(VirtualMachineProfile vmProfile, DeploymentPlan plan, ServiceOffering offering, VMTemplateVO template, ExcludeList avoid, List hosts, int returnUpTo, boolean considerReservedCapacity, Account account) { String vmAllocationAlgorithm = DeploymentClusterPlanner.VmAllocationAlgorithm.value(); if (vmAllocationAlgorithm.equals("random") || vmAllocationAlgorithm.equals("userconcentratedpod_random")) { @@ -342,14 +346,15 @@ protected List allocateTo(DeploymentPlan plan, ServiceOffering offering, V } // Check if GPU device is required by offering and host has the availability - if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null) { - ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.pciDevice.toString()); - if(!_resourceMgr.isGPUDeviceAvailable(host, groupName.getValue(), offeringDetails.getValue())){ - logger.debug("Adding host [{}] to avoid set, because this host does not have required GPU devices available.", host); - avoid.addHost(host.getId()); - continue; - } + if (_resourceMgr.isGPUDeviceAvailable(offering, host, vmProfile.getId())) { + logger.debug("Host [{}] has required GPU devices available.", host); + } else { + // If GPU is not available, skip this host + logger.debug("Adding host [{}] to avoid set, because this host does not have required GPU devices available.", host); + avoid.addHost(host.getId()); + continue; } + Pair cpuCapabilityAndCapacity = _capacityMgr.checkIfHostHasCpuCapabilityAndCapacity(host, offering, considerReservedCapacity); if (cpuCapabilityAndCapacity.first() && cpuCapabilityAndCapacity.second()) { if (logger.isDebugEnabled()) { @@ -537,7 +542,7 @@ protected List prioritizeHosts(VMTemplateVO template, ServiceOff prioritizedHosts.addAll(lowPriorityHosts); // if service offering is not GPU enabled then move all the GPU enabled hosts to the end of priority list. - if (_serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) == null) { + if (_serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) == null && offering.getVgpuProfileId() == null) { List gpuEnabledHosts = new ArrayList<>(); // Check for GPU enabled hosts. diff --git a/server/src/main/java/com/cloud/api/ApiResponseHelper.java b/server/src/main/java/com/cloud/api/ApiResponseHelper.java index 5bce31b9fc6e..a4dafd605627 100644 --- a/server/src/main/java/com/cloud/api/ApiResponseHelper.java +++ b/server/src/main/java/com/cloud/api/ApiResponseHelper.java @@ -300,7 +300,6 @@ import com.cloud.event.Event; import com.cloud.exception.InvalidParameterValueException; import com.cloud.exception.PermissionDeniedException; -import com.cloud.gpu.GPU; import com.cloud.host.ControlState; import com.cloud.host.Host; import com.cloud.host.HostVO; @@ -2195,15 +2194,13 @@ public List createCapacityResponse(List re result.get(0).getPodId(), result.get(0).getClusterId())) != null) { HashMap vgpuVMs = ApiDBUtils.getVgpuVmsCount(result.get(0).getDataCenterId(), result.get(0).getPodId(), result.get(0).getClusterId()); - float capacityUsed = 0; + long capacityUsed = 0; long capacityMax = 0; for (VgpuTypesInfo capacity : gpuCapacities) { if (vgpuVMs.containsKey(capacity.getGroupName().concat(capacity.getModelName()))) { - capacityUsed += (float)vgpuVMs.get(capacity.getGroupName().concat(capacity.getModelName())) / capacity.getMaxVpuPerGpu(); - } - if (capacity.getModelName().equals(GPU.GPUType.passthrough.toString())) { - capacityMax += capacity.getMaxCapacity(); + capacityUsed += vgpuVMs.get(capacity.getGroupName().concat(capacity.getModelName())); } + capacityMax += capacity.getMaxCapacity(); } DataCenter zone = ApiDBUtils.findZoneById(result.get(0).getDataCenterId()); @@ -2224,10 +2221,11 @@ public List createCapacityResponse(List re } capacityResponse.setCapacityType(Capacity.CAPACITY_TYPE_GPU); capacityResponse.setCapacityName(CapacityVO.getCapacityName(Capacity.CAPACITY_TYPE_GPU)); - capacityResponse.setCapacityUsed((long)Math.ceil(capacityUsed)); + capacityResponse.setCapacityUsed(capacityUsed); + capacityResponse.setCapacityAllocated(capacityUsed); capacityResponse.setCapacityTotal(capacityMax); if (capacityMax > 0) { - capacityResponse.setPercentUsed(format.format(capacityUsed / capacityMax * 100f)); + capacityResponse.setPercentUsed(format.format((float)capacityUsed / capacityMax * 100f)); } else { capacityResponse.setPercentUsed(format.format(0)); } diff --git a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java index e91c76adc796..1643c1962ce6 100644 --- a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java +++ b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java @@ -1535,6 +1535,13 @@ private Pair, Integer> searchForUserVMIdsAndCount(ListVMsCmd cmd) { userVmSearchBuilder.entity().getId(), JoinBuilder.JoinType.INNER); } + if (cmd.getGpuEnabled() != null) { + SearchBuilder serviceOfferingSearch = _srvOfferingDao.createSearchBuilder(); + _srvOfferingDao.addCheckForGpuEnabled(serviceOfferingSearch, cmd.getGpuEnabled()); + + userVmSearchBuilder.join("serviceOffering", serviceOfferingSearch, serviceOfferingSearch.entity().getId(), userVmSearchBuilder.entity().getServiceOfferingId(), JoinBuilder.JoinType.INNER); + } + if (keyPairName != null) { SearchBuilder vmDetailSearchKeys = vmInstanceDetailsDao.createSearchBuilder(); SearchBuilder vmDetailSearchVmIds = vmInstanceDetailsDao.createSearchBuilder(); @@ -4043,6 +4050,8 @@ private Pair, Integer> searchForServiceOfferingIdsAndCount(ListServic String storageType = cmd.getStorageType(); ServiceOffering.State state = cmd.getState(); final Long templateId = cmd.getTemplateId(); + final Long vgpuProfileId = cmd.getVgpuProfileId(); + final Boolean gpuEnabled = cmd.getGpuEnabled(); final Account owner = accountMgr.finalizeOwner(caller, accountName, domainId, projectId); @@ -4082,6 +4091,14 @@ private Pair, Integer> searchForServiceOfferingIdsAndCount(ListServic serviceOfferingSearch.and("state", serviceOfferingSearch.entity().getState(), Op.EQ); } + if (vgpuProfileId != null) { + serviceOfferingSearch.and("vgpuProfileId", serviceOfferingSearch.entity().getVgpuProfileId(), Op.EQ); + } + + if (gpuEnabled != null) { + _srvOfferingDao.addCheckForGpuEnabled(serviceOfferingSearch, gpuEnabled); + } + if (vmId != null) { currentVmOffering = _srvOfferingDao.findByIdIncludingRemoved(vmInstance.getId(), vmInstance.getServiceOfferingId()); diskOffering = _diskOfferingDao.findByIdIncludingRemoved(currentVmOffering.getDiskOfferingId()); @@ -4368,6 +4385,10 @@ private Pair, Integer> searchForServiceOfferingIdsAndCount(ListServic sc.setParameters("state", state); } + if (vgpuProfileId != null) { + sc.setParameters("vgpuProfileId", vgpuProfileId); + } + if (vmId != null) { if (!currentVmOffering.isDynamic()) { sc.setParameters("idNEQ", currentVmOffering.getId()); diff --git a/server/src/main/java/com/cloud/api/query/ViewResponseHelper.java b/server/src/main/java/com/cloud/api/query/ViewResponseHelper.java index 2779e3a22e66..76db6fd66b51 100644 --- a/server/src/main/java/com/cloud/api/query/ViewResponseHelper.java +++ b/server/src/main/java/com/cloud/api/query/ViewResponseHelper.java @@ -471,6 +471,7 @@ private static void copyResourceLimitsIntoMap(Map r resourceLimitMap.put(Resource.ResourceType.vpc, domainJoinVO.getVpcLimit()); resourceLimitMap.put(Resource.ResourceType.cpu, domainJoinVO.getCpuLimit()); resourceLimitMap.put(Resource.ResourceType.memory, domainJoinVO.getMemoryLimit()); + resourceLimitMap.put(Resource.ResourceType.gpu, domainJoinVO.getGpuLimit()); resourceLimitMap.put(Resource.ResourceType.primary_storage, domainJoinVO.getPrimaryStorageLimit()); resourceLimitMap.put(Resource.ResourceType.secondary_storage, domainJoinVO.getSecondaryStorageLimit()); resourceLimitMap.put(Resource.ResourceType.project, domainJoinVO.getProjectLimit()); @@ -490,6 +491,7 @@ private static void copyResourceLimitsFromMap(Map r domainJoinVO.setVpcLimit(resourceLimitMap.get(Resource.ResourceType.vpc)); domainJoinVO.setCpuLimit(resourceLimitMap.get(Resource.ResourceType.cpu)); domainJoinVO.setMemoryLimit(resourceLimitMap.get(Resource.ResourceType.memory)); + domainJoinVO.setGpuLimit(resourceLimitMap.get(Resource.ResourceType.gpu)); domainJoinVO.setPrimaryStorageLimit(resourceLimitMap.get(Resource.ResourceType.primary_storage)); domainJoinVO.setSecondaryStorageLimit(resourceLimitMap.get(Resource.ResourceType.secondary_storage)); domainJoinVO.setProjectLimit(resourceLimitMap.get(Resource.ResourceType.project)); @@ -512,6 +514,7 @@ private static void setParentResourceLimitIfNeeded(Map searchByIds(Long... accountIds) { // set detail batch query size diff --git a/server/src/main/java/com/cloud/api/query/dao/DataCenterJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/DataCenterJoinDaoImpl.java index a3177594e7f9..f2d061c7bb0a 100644 --- a/server/src/main/java/com/cloud/api/query/dao/DataCenterJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/DataCenterJoinDaoImpl.java @@ -25,11 +25,13 @@ import com.cloud.cpu.CPU; import com.cloud.dc.ASNumberRangeVO; import com.cloud.dc.dao.ASNumberRangeDao; +import com.cloud.gpu.dao.HostGpuGroupsDao; import com.cloud.network.Network; import com.cloud.network.dao.NetrisProviderDao; import com.cloud.network.dao.NsxProviderDao; import com.cloud.network.element.NetrisProviderVO; import com.cloud.network.element.NsxProviderVO; +import com.cloud.utils.Pair; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.api.ResponseObject.ResponseView; @@ -69,6 +71,8 @@ public class DataCenterJoinDaoImpl extends GenericDaoBase gpuStats = hostGpuGroupsDao.getGpuStats(dataCenter.getId(), null, null, null); + if (gpuStats != null) { + Long totalGpuDevices = gpuStats.first(); + Long usedGpuDevices = totalGpuDevices - gpuStats.second(); + zoneResponse.setGpuTotal(totalGpuDevices); + zoneResponse.setGpuUsed(usedGpuDevices); + } if ((dataCenter.getDescription() != null) && !dataCenter.getDescription().equalsIgnoreCase("null")) { zoneResponse.setDescription(dataCenter.getDescription()); diff --git a/server/src/main/java/com/cloud/api/query/dao/DomainJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/DomainJoinDaoImpl.java index 79376a37b976..4a0929744cf8 100644 --- a/server/src/main/java/com/cloud/api/query/dao/DomainJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/DomainJoinDaoImpl.java @@ -195,6 +195,9 @@ public void setResourceLimits(DomainJoinVO domain, boolean fullView, ResourceLim response.setMemoryTotal(memoryTotal); response.setMemoryAvailable(memoryAvail); + //get resource limits for gpus + setGpuResourceLimits(domain, fullView, response); + //get resource limits for primary storage space and convert it from Bytes to GiB long primaryStorageLimit = ApiDBUtils.findCorrectResourceLimitForDomain(domain.getPrimaryStorageLimit(), ResourceType.primary_storage, domain.getId()); String primaryStorageLimitDisplay = (fullView || primaryStorageLimit == -1) ? Resource.UNLIMITED : String.valueOf(primaryStorageLimit / ResourceType.bytesToGiB); @@ -250,6 +253,16 @@ public void setResourceLimits(DomainJoinVO domain, boolean fullView, ResourceLim response.setObjectStorageAvailable(objectStorageAvail); } + private void setGpuResourceLimits(DomainJoinVO domain, boolean fullView, ResourceLimitAndCountResponse response) { + long gpuLimit = ApiDBUtils.findCorrectResourceLimitForDomain(domain.getGpuLimit(), ResourceType.gpu, domain.getId()); + String gpuLimitDisplay = (fullView || gpuLimit == -1) ? Resource.UNLIMITED : String.valueOf(gpuLimit); + long gpuTotal = (domain.getGpuTotal() == null) ? 0 : domain.getGpuTotal(); + String gpuAvail = (fullView || gpuLimit == -1) ? Resource.UNLIMITED : String.valueOf(gpuLimit - gpuTotal); + response.setGpuLimit(gpuLimitDisplay); + response.setGpuTotal(gpuTotal); + response.setGpuAvailable(gpuAvail); + } + @Override public List searchByIds(Long... domainIds) { // set detail batch query size diff --git a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java index 66472597f972..e7265a7e3b9a 100644 --- a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -158,6 +158,8 @@ private void setNewHostResponseBase(HostJoinVO host, EnumSet detail List gpuGroups = ApiDBUtils.getGpuGroups(host.getId()); if (gpuGroups != null && !gpuGroups.isEmpty()) { List gpus = new ArrayList(); + long gpuRemaining = 0; + long gpuTotal = 0; for (HostGpuGroupsVO entry : gpuGroups) { GpuResponse gpuResponse = new GpuResponse(); gpuResponse.setGpuGroupName(entry.getGroupName()); @@ -175,11 +177,15 @@ private void setNewHostResponseBase(HostJoinVO host, EnumSet detail vgpuResponse.setRemainingCapacity(vgpuType.getRemainingCapacity()); vgpuResponse.setmaxCapacity(vgpuType.getMaxCapacity()); vgpus.add(vgpuResponse); + gpuRemaining += vgpuType.getRemainingCapacity(); + gpuTotal += vgpuType.getMaxCapacity(); } gpuResponse.setVgpu(vgpus); } gpus.add(gpuResponse); } + hostResponse.setGpuTotal(gpuTotal); + hostResponse.setGpuUsed(gpuTotal - gpuRemaining); hostResponse.setGpuGroup(gpus); } if (details.contains(HostDetails.all) || details.contains(HostDetails.capacity) || details.contains(HostDetails.stats) || details.contains(HostDetails.events)) { diff --git a/server/src/main/java/com/cloud/api/query/dao/ServiceOfferingJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/ServiceOfferingJoinDaoImpl.java index 4ae5074c511c..579425a68c13 100644 --- a/server/src/main/java/com/cloud/api/query/dao/ServiceOfferingJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/ServiceOfferingJoinDaoImpl.java @@ -137,6 +137,16 @@ public ServiceOfferingResponse newServiceOfferingResponse(ServiceOfferingJoinVO offeringResponse.setDomainId(offering.getDomainUuid()); offeringResponse.setZone(offering.getZoneName()); offeringResponse.setZoneId(offering.getZoneUuid()); + offeringResponse.setGpuCardId(offering.getGpuCardUuid()); + offeringResponse.setGpuCardName(offering.getGpuCardName()); + offeringResponse.setVgpuProfileId(offering.getVgpuProfileUuid()); + offeringResponse.setVgpuProfileName(offering.getVgpuProfileName()); + offeringResponse.setVideoRam(offering.getVideoRam()); + offeringResponse.setMaxHeads(offering.getMaxHeads()); + offeringResponse.setMaxResolutionX(offering.getMaxResolutionX()); + offeringResponse.setMaxResolutionY(offering.getMaxResolutionY()); + offeringResponse.setGpuCount(offering.getGpuCount()); + offeringResponse.setGpuDisplay(offering.getGpuDisplay()); offeringResponse.setNetworkRate(offering.getRateMbps()); offeringResponse.setHostTag(offering.getHostTag()); offeringResponse.setDeploymentPlanner(offering.getDeploymentPlanner()); diff --git a/server/src/main/java/com/cloud/api/query/dao/UserVmJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/UserVmJoinDaoImpl.java index c174da2993a7..8092c63c6fd4 100644 --- a/server/src/main/java/com/cloud/api/query/dao/UserVmJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/UserVmJoinDaoImpl.java @@ -17,13 +17,14 @@ package com.cloud.api.query.dao; import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Collections; import java.time.LocalDate; import java.time.ZoneId; import java.time.temporal.ChronoUnit; -import java.util.ArrayList; import java.util.Calendar; -import java.util.Collections; import java.util.Date; + import java.util.HashMap; import java.util.Hashtable; import java.util.List; @@ -33,6 +34,8 @@ import javax.inject.Inject; +import com.cloud.gpu.dao.VgpuProfileDao; +import com.cloud.service.dao.ServiceOfferingDao; import org.apache.cloudstack.affinity.AffinityGroupResponse; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; @@ -89,6 +92,7 @@ import com.cloud.vm.VmStats; import com.cloud.vm.dao.NicExtraDhcpOptionDao; import com.cloud.vm.dao.NicSecondaryIpVO; + import com.cloud.vm.dao.VMInstanceDetailsDao; @Component @@ -116,6 +120,10 @@ public class UserVmJoinDaoImpl extends GenericDaoBaseWithTagInformation VmDetailSearch; private final SearchBuilder activeVmByIsoSearch; @@ -251,6 +259,7 @@ public UserVmResponse newUserVmResponse(ResponseView view, String objectName, Us userVmResponse.setDiskOfferingName(userVm.getDiskOfferingName()); } } + if (details.contains(VMDetails.all) || details.contains(VMDetails.backoff)) { userVmResponse.setBackupOfferingId(userVm.getBackupOfferingUuid()); userVmResponse.setBackupOfferingName(userVm.getBackupOfferingName()); @@ -259,6 +268,19 @@ public UserVmResponse newUserVmResponse(ResponseView view, String objectName, Us userVmResponse.setCpuNumber(userVm.getCpu()); userVmResponse.setCpuSpeed(userVm.getSpeed()); userVmResponse.setMemory(userVm.getRamSize()); + userVmResponse.setGpuCount(userVm.getGpuCount()); + userVmResponse.setGpuCardName(userVm.getGpuCardName()); + if (caller.getType() == Account.Type.ADMIN) { + userVmResponse.setGpuCardId(userVm.getGpuCardUuid()); + userVmResponse.setVgpuProfileId(userVm.getVgpuProfileUuid()); + } + userVmResponse.setVgpuProfileName(userVm.getVgpuProfileName()); + userVmResponse.setVideoRam(userVm.getVideoRam()); + userVmResponse.setMaxHeads(userVm.getMaxHeads()); + userVmResponse.setMaxResolutionX(userVm.getMaxResolutionX()); + userVmResponse.setMaxResolutionY(userVm.getMaxResolutionY()); + userVmResponse.setVgpu(userVm.getVgpuProfileName()); + ServiceOfferingDetailsVO serviceOfferingDetail = ApiDBUtils.findServiceOfferingDetail(userVm.getServiceOfferingId(), GPU.Keys.vgpuType.toString()); if (serviceOfferingDetail != null) { userVmResponse.setVgpu(serviceOfferingDetail.getValue()); @@ -743,8 +765,10 @@ public List newUserVmView(VirtualMachine... vms) { public List listByAccountServiceOfferingTemplateAndNotInState(long accountId, List states, List offeringIds, List templateIds) { SearchBuilder userVmSearch = createSearchBuilder(); + userVmSearch.selectFields(userVmSearch.entity().getId(), userVmSearch.entity().getCpu(), - userVmSearch.entity().getRamSize()); + userVmSearch.entity().getRamSize(), userVmSearch.entity().getGpuCount()); + userVmSearch.and("accountId", userVmSearch.entity().getAccountId(), Op.EQ); userVmSearch.and("serviceOfferingId", userVmSearch.entity().getServiceOfferingId(), Op.IN); userVmSearch.and("templateId", userVmSearch.entity().getTemplateId(), Op.IN); diff --git a/server/src/main/java/com/cloud/api/query/vo/AccountJoinVO.java b/server/src/main/java/com/cloud/api/query/vo/AccountJoinVO.java index 2e39816ed413..91abd235ae84 100644 --- a/server/src/main/java/com/cloud/api/query/vo/AccountJoinVO.java +++ b/server/src/main/java/com/cloud/api/query/vo/AccountJoinVO.java @@ -177,6 +177,12 @@ public class AccountJoinVO extends BaseViewVO implements InternalIdentity, Ident @Column(name = "memoryTotal") private Long memoryTotal; + @Column(name = "gpuLimit") + private Long gpuLimit; + + @Column(name = "gpuTotal") + private Long gpuTotal; + @Column(name = "primaryStorageLimit") private Long primaryStorageLimit; @@ -357,6 +363,10 @@ public Long getMemoryTotal() { return memoryTotal; } + public Long getGpuTotal() { + return gpuTotal; + } + public Long getPrimaryStorageTotal() { return primaryStorageTotal; } @@ -421,6 +431,10 @@ public Long getMemoryLimit() { return memoryLimit; } + public Long getGpuLimit() { + return gpuLimit; + } + public Long getPrimaryStorageLimit() { return primaryStorageLimit; } diff --git a/server/src/main/java/com/cloud/api/query/vo/DomainJoinVO.java b/server/src/main/java/com/cloud/api/query/vo/DomainJoinVO.java index 3e623690f10a..83461d470d81 100644 --- a/server/src/main/java/com/cloud/api/query/vo/DomainJoinVO.java +++ b/server/src/main/java/com/cloud/api/query/vo/DomainJoinVO.java @@ -152,6 +152,11 @@ public class DomainJoinVO extends BaseViewVO implements InternalIdentity, Identi @Column(name="memoryTotal") private Long memoryTotal; + @Column(name = "gpuLimit") + private Long gpuLimit; + + @Column(name = "gpuTotal") + private Long gpuTotal; @Column(name="primaryStorageLimit") private Long primaryStorageLimit; @@ -404,6 +409,14 @@ public void setMemoryTotal(Long memoryTotal) { } + public Long getGpuTotal() { + return gpuTotal; + } + + public void setGpuTotal(Long gpuTotal) { + this.gpuTotal = gpuTotal; + } + public Long getPrimaryStorageTotal() { return primaryStorageTotal; } @@ -545,6 +558,14 @@ public void setMemoryLimit(Long memoryLimit) { } + public Long getGpuLimit() { + return gpuLimit; + } + + public void setGpuLimit(Long gpuLimit) { + this.gpuLimit = gpuLimit; + } + public Long getPrimaryStorageLimit() { return primaryStorageLimit; } diff --git a/server/src/main/java/com/cloud/api/query/vo/ServiceOfferingJoinVO.java b/server/src/main/java/com/cloud/api/query/vo/ServiceOfferingJoinVO.java index 5b75c5729331..1d86d14cf63a 100644 --- a/server/src/main/java/com/cloud/api/query/vo/ServiceOfferingJoinVO.java +++ b/server/src/main/java/com/cloud/api/query/vo/ServiceOfferingJoinVO.java @@ -227,6 +227,42 @@ public class ServiceOfferingJoinVO extends BaseViewVO implements InternalIdentit @Enumerated(value = EnumType.STRING) private VMLeaseManager.ExpiryAction leaseExpiryAction; + @Column(name = "gpu_card_id") + private Long gpuCardId; + + @Column(name = "gpu_card_uuid") + private String gpuCardUuid; + + @Column(name = "gpu_card_name") + private String gpuCardName; + + @Column(name = "vgpu_profile_id") + private Long vgpuProfileId; + + @Column(name = "vgpu_profile_uuid") + private String vgpuProfileUuid; + + @Column(name = "vgpu_profile_name") + private String vgpuProfileName; + + @Column(name = "vgpu_profile_video_ram") + private Long videoRam; + + @Column(name = "vgpu_profile_max_heads") + private Long maxHeads; + + @Column(name = "vgpu_profile_max_resolution_x") + private Long maxResolutionX; + + @Column(name = "vgpu_profile_max_resolution_y") + private Long maxResolutionY; + + @Column(name = "gpu_count") + private Integer gpuCount; + + @Column(name = "gpu_display") + private Boolean gpuDisplay; + public ServiceOfferingJoinVO() { } @@ -473,4 +509,52 @@ public Integer getLeaseDuration() { public VMLeaseManager.ExpiryAction getLeaseExpiryAction() { return leaseExpiryAction; } + + public Long getGpuCardId() { + return gpuCardId; + } + + public String getGpuCardUuid() { + return gpuCardUuid; + } + + public String getGpuCardName() { + return gpuCardName; + } + + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public String getVgpuProfileUuid() { + return vgpuProfileUuid; + } + + public String getVgpuProfileName() { + return vgpuProfileName; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public Long getVideoRam() { + return videoRam; + } + + public Integer getGpuCount() { + return gpuCount; + } + + public Boolean getGpuDisplay() { + return gpuDisplay; + } } diff --git a/server/src/main/java/com/cloud/api/query/vo/UserVmJoinVO.java b/server/src/main/java/com/cloud/api/query/vo/UserVmJoinVO.java index a0680f557531..eab34081d514 100644 --- a/server/src/main/java/com/cloud/api/query/vo/UserVmJoinVO.java +++ b/server/src/main/java/com/cloud/api/query/vo/UserVmJoinVO.java @@ -240,6 +240,40 @@ public class UserVmJoinVO extends BaseViewWithTagInformationVO implements Contro @Column(name = "service_offering_name") private String serviceOfferingName; + + @Column(name = "gpu_card_id") + private Long gpuCardId; + + @Column(name = "gpu_card_uuid") + private String gpuCardUuid; + + @Column(name = "gpu_card_name") + private String gpuCardName; + + @Column(name = "vgpu_profile_id") + private Long vgpuProfileId; + + @Column(name = "vgpu_profile_uuid") + private String vgpuProfileUuid; + + @Column(name = "vgpu_profile_name") + private String vgpuProfileName; + + @Column(name = "vgpu_profile_video_ram") + private Long videoRam; + + @Column(name = "vgpu_profile_max_heads") + private Long maxHeads; + + @Column(name = "vgpu_profile_max_resolution_x") + private Long maxResolutionX; + + @Column(name = "vgpu_profile_max_resolution_y") + private Long maxResolutionY; + + @Column(name = "gpu_count") + private Integer gpuCount; + @Column(name = "backup_offering_id") private Long backupOfferingId; @@ -703,6 +737,50 @@ public String getServiceOfferingName() { return serviceOfferingName; } + public Long getGpuCardId() { + return gpuCardId; + } + + public String getGpuCardUuid() { + return gpuCardUuid; + } + + public String getGpuCardName() { + return gpuCardName; + } + + public Long getVgpuProfileId() { + return vgpuProfileId; + } + + public String getVgpuProfileUuid() { + return vgpuProfileUuid; + } + + public String getVgpuProfileName() { + return vgpuProfileName; + } + + public Long getVideoRam() { + return videoRam; + } + + public Long getMaxHeads() { + return maxHeads; + } + + public Long getMaxResolutionX() { + return maxResolutionX; + } + + public Long getMaxResolutionY() { + return maxResolutionY; + } + + public Integer getGpuCount() { + return gpuCount; + } + public String getBackupOfferingUuid() { return backupOfferingUuid; } diff --git a/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java b/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java index dd204d41141a..a60b84bf2b69 100644 --- a/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java +++ b/server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java @@ -54,6 +54,8 @@ import com.cloud.network.element.NetrisProviderVO; import com.cloud.network.netris.NetrisService; import org.apache.cloudstack.acl.RoleType; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.gpu.dao.VgpuProfileDao; import org.apache.cloudstack.acl.SecurityChecker; import org.apache.cloudstack.affinity.AffinityGroup; import org.apache.cloudstack.affinity.AffinityGroupService; @@ -360,6 +362,8 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati @Inject DiskOfferingDetailsDao diskOfferingDetailsDao; @Inject + VgpuProfileDao vgpuProfileDao; + @Inject NetworkOfferingDao _networkOfferingDao; @Inject NetworkOfferingJoinDao networkOfferingJoinDao; @@ -3450,6 +3454,9 @@ public ServiceOffering createServiceOffering(final CreateServiceOfferingCmd cmd) Integer leaseDuration = cmd.getLeaseDuration(); VMLeaseManager.ExpiryAction leaseExpiryAction = validateAndGetLeaseExpiryAction(leaseDuration, cmd.getLeaseExpiryAction()); + final Long vgpuProfileId = cmd.getVgpuProfileId(); + Integer gpuCount = validateVgpuProfileAndGetGpuCount(vgpuProfileId, cmd.getGpuCount()); + return createServiceOffering(userId, cmd.isSystem(), vmType, cmd.getServiceOfferingName(), cpuNumber, memory, cpuSpeed, cmd.getDisplayText(), cmd.getProvisioningType(), localStorageRequired, offerHA, limitCpuUse, volatileVm, cmd.getTags(), cmd.getDomainIds(), cmd.getZoneIds(), cmd.getHostTag(), cmd.getNetworkRate(), cmd.getDeploymentPlanner(), details, cmd.getRootDiskSize(), isCustomizedIops, cmd.getMinIops(), cmd.getMaxIops(), @@ -3458,7 +3465,24 @@ public ServiceOffering createServiceOffering(final CreateServiceOfferingCmd cmd) cmd.getIopsReadRate(), cmd.getIopsReadRateMax(), cmd.getIopsReadRateMaxLength(), cmd.getIopsWriteRate(), cmd.getIopsWriteRateMax(), cmd.getIopsWriteRateMaxLength(), cmd.getHypervisorSnapshotReserve(), cmd.getCacheMode(), storagePolicyId, cmd.getDynamicScalingEnabled(), diskOfferingId, - cmd.getDiskOfferingStrictness(), cmd.isCustomized(), cmd.getEncryptRoot(), cmd.isPurgeResources(), leaseDuration, leaseExpiryAction); + cmd.getDiskOfferingStrictness(), cmd.isCustomized(), cmd.getEncryptRoot(), vgpuProfileId, gpuCount, cmd.getGpuDisplay(), cmd.isPurgeResources(), leaseDuration, leaseExpiryAction); + } + + private Integer validateVgpuProfileAndGetGpuCount(final Long vgpuProfileId, Integer gpuCount) { + Integer finalGpuCount = gpuCount; + if (vgpuProfileId != null) { + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(vgpuProfileId); + if (vgpuProfile == null) { + throw new InvalidParameterValueException("Please specify a valid vgpu profile."); + } + if (gpuCount != null && gpuCount < 1) { + throw new InvalidParameterValueException("GPU count must be greater than 0."); + } + if (gpuCount == null) { + finalGpuCount = 1; + } + } + return finalGpuCount; } protected ServiceOfferingVO createServiceOffering(final long userId, final boolean isSystem, final VirtualMachine.Type vmType, @@ -3471,7 +3495,7 @@ protected ServiceOfferingVO createServiceOffering(final long userId, final boole Long iopsWriteRate, Long iopsWriteRateMax, Long iopsWriteRateMaxLength, final Integer hypervisorSnapshotReserve, String cacheMode, final Long storagePolicyID, final boolean dynamicScalingEnabled, final Long diskOfferingId, final boolean diskOfferingStrictness, - final boolean isCustomized, final boolean encryptRoot, final boolean purgeResources, Integer leaseDuration, VMLeaseManager.ExpiryAction leaseExpiryAction) { + final boolean isCustomized, final boolean encryptRoot, Long vgpuProfileId, Integer gpuCount, Boolean gpuDisplay, final boolean purgeResources, Integer leaseDuration, VMLeaseManager.ExpiryAction leaseExpiryAction) { // Filter child domains when both parent and child domains are present List filteredDomainIds = filterChildSubDomains(domainIds); @@ -3553,6 +3577,9 @@ protected ServiceOfferingVO createServiceOffering(final long userId, final boole } serviceOffering.setDiskOfferingStrictness(diskOfferingStrictness); + serviceOffering.setVgpuProfileId(vgpuProfileId); + serviceOffering.setGpuCount(gpuCount); + serviceOffering.setGpuDisplay(gpuDisplay); DiskOfferingVO diskOffering = null; if (diskOfferingId == null) { diff --git a/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java b/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java index 9d68c3395d42..daaebb42a340 100644 --- a/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java +++ b/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java @@ -36,6 +36,7 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.gpu.dao.VgpuProfileDao; import org.apache.cloudstack.affinity.AffinityGroupDomainMapVO; import org.apache.cloudstack.affinity.AffinityGroupProcessor; import org.apache.cloudstack.affinity.AffinityGroupService; @@ -94,7 +95,6 @@ import com.cloud.exception.ConnectionException; import com.cloud.exception.InsufficientServerCapacityException; import com.cloud.exception.StorageUnavailableException; -import com.cloud.gpu.GPU; import com.cloud.host.DetailVO; import com.cloud.host.Host; import com.cloud.host.HostVO; @@ -106,7 +106,6 @@ import com.cloud.org.Cluster; import com.cloud.org.Grouping; import com.cloud.resource.ResourceManager; -import com.cloud.service.ServiceOfferingDetailsVO; import com.cloud.service.dao.ServiceOfferingDetailsDao; import com.cloud.storage.DiskOfferingVO; import com.cloud.storage.GuestOSVO; @@ -251,6 +250,8 @@ public void setHostAllocators(List hostAllocators) { @Inject protected ResourceManager _resourceMgr; @Inject + protected VgpuProfileDao vgpuProfileDao; + @Inject protected ServiceOfferingDetailsDao _serviceOfferingDetailsDao; protected List _planners; @@ -586,10 +587,8 @@ private boolean canUseLastHost(HostVO host, ExcludeList avoids, DeploymentPlan p return false; } - ServiceOfferingDetailsVO offeringDetails = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()); - ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString()); - if (offeringDetails != null && !_resourceMgr.isGPUDeviceAvailable(host, groupName.getValue(), offeringDetails.getValue())) { - logger.debug("Cannot deploy VM [{}] in the last host [{}] because this host does not have the required GPU devices available. Skipping this and trying other available hosts.", + if (!_resourceMgr.isGPUDeviceAvailable(offering, host, vm.getId())) { + logger.debug("Cannot deploy VM [{}] in the last host [{}] because this host does not have GPU devices available. Skipping this and trying other available hosts.", vm, host); return false; } diff --git a/server/src/main/java/com/cloud/deploy/FirstFitPlanner.java b/server/src/main/java/com/cloud/deploy/FirstFitPlanner.java index 63377a66b6c3..3aab852ba7fc 100644 --- a/server/src/main/java/com/cloud/deploy/FirstFitPlanner.java +++ b/server/src/main/java/com/cloud/deploy/FirstFitPlanner.java @@ -203,8 +203,13 @@ public List orderClusters(VirtualMachineProfile vmProfile, DeploymentPlan } } - // In case of non-GPU VMs, protect GPU enabled Hosts and prefer VM deployment on non-GPU Hosts. - if (((serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) == null) && !(hostGpuGroupsDao.listHostIds().isEmpty())) || nonUefiVMDeploy) { + // In case of non-GPU VMs, protect GPU enabled Hosts and prefer VM deployment on + // non-GPU Hosts. + if (((offering.getVgpuProfileId() == null && + serviceOfferingDetailsDao.findDetail(offering.getId(), + GPU.Keys.vgpuType.toString()) == null) + && !(hostGpuGroupsDao.listHostIds().isEmpty())) || nonUefiVMDeploy + ) { int requiredCpu = offering.getCpu() * offering.getSpeed(); long requiredRam = offering.getRamSize() * 1024L * 1024L; reorderClustersBasedOnImplicitTags(clusterList, requiredCpu, requiredRam); diff --git a/server/src/main/java/com/cloud/hypervisor/HypervisorGuruBase.java b/server/src/main/java/com/cloud/hypervisor/HypervisorGuruBase.java index 1be822d673eb..1aa9a1af3faa 100644 --- a/server/src/main/java/com/cloud/hypervisor/HypervisorGuruBase.java +++ b/server/src/main/java/com/cloud/hypervisor/HypervisorGuruBase.java @@ -23,10 +23,13 @@ import javax.inject.Inject; +import com.cloud.agent.api.to.GPUDeviceTO; import com.cloud.dc.DataCenter; import com.cloud.dc.dao.DataCenterDao; import com.cloud.domain.Domain; import com.cloud.domain.dao.DomainDao; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.gpu.dao.VgpuProfileDao; import com.cloud.network.vpc.VpcVO; import com.cloud.network.vpc.dao.VpcDao; import com.cloud.user.Account; @@ -110,6 +113,8 @@ public abstract class HypervisorGuruBase extends AdapterBase implements Hypervis @Inject protected ServiceOfferingDetailsDao _serviceOfferingDetailsDao; @Inject + protected VgpuProfileDao vgpuProfileDao; + @Inject protected ServiceOfferingDao serviceOfferingDao; @Inject private NetworkDetailsDao networkDetailsDao; @@ -325,9 +330,8 @@ protected VirtualMachineTO toVirtualMachineTO(VirtualMachineProfile vmProfile) { // Set GPU details ServiceOfferingDetailsVO offeringDetail = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()); - if (offeringDetail != null) { - ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString()); - to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), groupName.getValue(), offeringDetail.getValue())); + if (offering.getVgpuProfileId() != null || offeringDetail != null) { + to.setGpuDevice(getGpuDevice(offering, offeringDetail, vm, vmProfile.getHostId())); } // Workaround to make sure the TO has the UUID we need for Niciri integration @@ -345,6 +349,21 @@ protected VirtualMachineTO toVirtualMachineTO(VirtualMachineProfile vmProfile) { return to; } + private GPUDeviceTO getGpuDevice(ServiceOffering offering, ServiceOfferingDetailsVO offeringDetail, VirtualMachine vm, long hostId) { + if (offering.getVgpuProfileId() != null) { + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(offering.getVgpuProfileId()); + if (vgpuProfile != null) { + int gpuCount = offering.getGpuCount() != null ? offering.getGpuCount() : 1; + return _resourceMgr.getGPUDevice(vm, hostId, vgpuProfile, gpuCount); + } + } else if (offeringDetail != null) { + ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString()); + return _resourceMgr.getGPUDevice(vm.getHostId(), groupName.getValue(), offeringDetail.getValue()); + } + return null; + } + + protected Long findClusterOfVm(VirtualMachine vm) { HostVO host = hostDao.findById(vm.getHostId()); if (host != null) { diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 936dfd9cf950..47f23bca4b0f 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -18,6 +18,7 @@ import static com.cloud.configuration.ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS; import static com.cloud.configuration.ConfigurationManagerImpl.SET_HOST_DOWN_TO_MAINTENANCE; +import static org.apache.cloudstack.gpu.GpuService.GpuDetachOnStop; import java.net.URI; import java.net.URISyntaxException; @@ -38,6 +39,16 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.gpu.dao.VgpuProfileDao; +import com.cloud.offering.ServiceOffering; +import com.cloud.service.ServiceOfferingDetailsVO; +import com.cloud.storage.ScopeType; +import com.cloud.storage.StoragePoolAndAccessGroupMapVO; +import com.cloud.storage.dao.StoragePoolAndAccessGroupMapDao; +import com.cloud.storage.dao.StoragePoolTagsDao; +import com.cloud.gpu.GpuCardVO; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.gpu.dao.GpuCardDao; import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; @@ -66,6 +77,7 @@ import org.apache.cloudstack.framework.extensions.manager.ExtensionsManager; import org.apache.cloudstack.framework.extensions.vo.ExtensionResourceMapVO; import org.apache.cloudstack.framework.extensions.vo.ExtensionVO; +import org.apache.cloudstack.gpu.GpuService; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.cloudstack.utils.identity.ManagementServerNode; @@ -171,10 +183,8 @@ import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.service.dao.ServiceOfferingDetailsDao; import com.cloud.storage.GuestOSCategoryVO; -import com.cloud.storage.ScopeType; import com.cloud.storage.StorageManager; import com.cloud.storage.StoragePool; -import com.cloud.storage.StoragePoolAndAccessGroupMapVO; import com.cloud.storage.StoragePoolHostVO; import com.cloud.storage.StoragePoolStatus; import com.cloud.storage.StorageService; @@ -182,9 +192,7 @@ import com.cloud.storage.Volume; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.GuestOSCategoryDao; -import com.cloud.storage.dao.StoragePoolAndAccessGroupMapDao; import com.cloud.storage.dao.StoragePoolHostDao; -import com.cloud.storage.dao.StoragePoolTagsDao; import com.cloud.storage.dao.VMTemplateDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.user.Account; @@ -266,6 +274,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, @Inject protected VGPUTypesDao _vgpuTypesDao; @Inject + protected VgpuProfileDao vgpuProfileDao; + @Inject + private GpuCardDao gpuCardDao; + @Inject private PrimaryDataStoreDao _storagePoolDao; @Inject private StoragePoolTagsDao _storagePoolTagsDao; @@ -293,6 +305,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, private ServiceOfferingDetailsDao _serviceOfferingDetailsDao; @Inject private UserVmManager userVmManager; + @Inject + private GpuService gpuService; private List _discoverers; @@ -1501,8 +1515,11 @@ private boolean doMaintain(final long hostId) { } for (final VMInstanceVO vm : vms) { + ServiceOfferingVO offering = serviceOfferingDao.findById(vm.getServiceOfferingId()); if (hosts == null || hosts.isEmpty() || !answer.getMigrate() - || _serviceOfferingDetailsDao.findDetail(vm.getServiceOfferingId(), GPU.Keys.vgpuType.toString()) != null) { + || offering.getVgpuProfileId() != null + || _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) != null + ) { handleVmForLastHostOrWithVGpu(host, vm); } else if (HypervisorType.LXC.equals(host.getHypervisorType()) && VirtualMachine.Type.User.equals(vm.getType())){ //Migration is not supported for LXC Vms. Schedule restart instead. @@ -2896,7 +2913,7 @@ public boolean configure(final String name, final Map params) th _gpuAvailability.and("groupName", _gpuAvailability.entity().getGroupName(), Op.EQ); final SearchBuilder join1 = _vgpuTypesDao.createSearchBuilder(); join1.and("vgpuType", join1.entity().getVgpuType(), Op.EQ); - join1.and("remainingCapacity", join1.entity().getRemainingCapacity(), Op.GT); + join1.and("remainingCapacity", join1.entity().getRemainingCapacity(), Op.GTEQ); _gpuAvailability.join("groupId", join1, _gpuAvailability.entity().getId(), join1.entity().getGpuGroupId(), JoinBuilder.JoinType.INNER); _gpuAvailability.done(); @@ -3224,7 +3241,7 @@ protected HostVO createHostVO(final StartupCommand[] cmds, final ServerResource } if (newHost) { - host = _hostDao.persist(host); + host = persistNewHost(host, startup); } else { _hostDao.update(host.getId(), host); } @@ -3253,6 +3270,18 @@ protected HostVO createHostVO(final StartupCommand[] cmds, final ServerResource return host; } + private HostVO persistNewHost(HostVO host, StartupCommand startup) { + HostVO hostVo = _hostDao.persist(host); + // Check for GPU devices again because we couldn't persist the GPU devices earlier due to missing host ID + if (startup instanceof StartupRoutingCommand && + CollectionUtils.isNotEmpty(((StartupRoutingCommand) startup).getGpuDevices())) { + StartupRoutingCommand ssCmd = ((StartupRoutingCommand) startup); + host.setGpuGroups(getGroupDetails(host, ssCmd.getGpuDevices(), ssCmd.getGpuGroupDetails())); + _hostDao.update(hostVo.getId(), host); + } + return hostVo; + } + private void updateSupportsClonedVolumes(HostVO host, boolean supportsClonedVolumes) { final String name = "supportsResign"; @@ -3639,7 +3668,7 @@ public HostVO fillRoutingHostVO(final HostVO host, final StartupRoutingCommand s host.setSpeed(ssCmd.getSpeed()); host.setHypervisorType(hyType); host.setHypervisorVersion(ssCmd.getHypervisorVersion()); - host.setGpuGroups(ssCmd.getGpuGroupDetails()); + host.setGpuGroups(getGroupDetails(host, ssCmd.getGpuDevices(), ssCmd.getGpuGroupDetails())); return host; } @@ -4263,7 +4292,6 @@ public List listAvailableGPUDevice(final long hostId, final Str sc.setParameters("hostId", hostId); sc.setParameters("groupName", groupName); sc.setJoinParameters("groupId", "vgpuType", vgpuType); - sc.setJoinParameters("groupId", "remainingCapacity", 0); return _hostGpuGroupsDao.customSearch(sc, searchFilter); } @@ -4287,7 +4315,14 @@ public List listAllHostsInOneZoneNotInClusterByHypervisors(List gpuDeviceList = listAvailableGPUDevice(hostId, groupName, vgpuType); @@ -4322,6 +4416,31 @@ public void updateGPUDetails(final long hostId, final HashMap> groupDetails; + if (gpuDevice == null || gpuDevice.getGpuDevices() != null) { + HostVO host = _hostDao.findById(vm.getHostId()); + if (GpuDetachOnStop.valueIn(vm.getDomainId())) { + gpuService.deallocateAllGpuDevicesForVm(vm.getId()); + } + groupDetails = gpuService.getGpuGroupDetailsFromGpuDevicesOnHost(host.getId()); + } else { + groupDetails = gpuDevice.getGroupDetails(); + } + updateGPUDetails(vm.getHostId(), groupDetails); + } + + @Override + public void updateGPUDetailsForVmStart(long hostId, long vmId, GPUDeviceTO gpuDevice) { + HashMap> groupDetails = gpuDevice.getGroupDetails(); + if (gpuDevice.getGpuDevices() != null) { + gpuService.allocateGpuDevicesToVmOnHost(vmId, hostId, gpuDevice.getGpuDevices()); + groupDetails = gpuService.getGpuGroupDetailsFromGpuDevicesOnHost(hostId); + } + updateGPUDetails(hostId, groupDetails); + } + @Override public HashMap> getGPUStatistics(final HostVO host) { final Answer answer = _agentMgr.easySend(host.getId(), new GetGPUStatsCommand(host.getGuid(), host.getName())); @@ -4332,15 +4451,27 @@ public HashMap> getGPUStatistics(final Ho final String msg = String.format("Unable to obtain GPU stats for %s", host); logger.warn(msg); return null; - } else { - // now construct the result object - if (answer instanceof GetGPUStatsAnswer) { - return ((GetGPUStatsAnswer)answer).getGroupDetails(); - } + } else if (answer instanceof GetGPUStatsAnswer) { + GetGPUStatsAnswer gpuStatsAnswer = (GetGPUStatsAnswer) answer; + return getGroupDetails(host, gpuStatsAnswer.getGpuDevices(), gpuStatsAnswer.getGroupDetails()); } return null; } + private HashMap> getGroupDetails(HostVO host, List gpuDevices, HashMap> groupDetails) { + HashMap> finalGroupDetails; + if (host.getId() > 0) { + // The below method needs the host to be persisted in the DB to save the GPU devices for the host + gpuService.addGpuDevicesToHost(host, gpuDevices); + } + if (CollectionUtils.isNotEmpty(gpuDevices)) { + finalGroupDetails = gpuService.getGpuGroupDetailsFromGpuDevicesOnHost(host.getId()); + } else { + finalGroupDetails = groupDetails; + } + return finalGroupDetails; + } + @Override public HostVO findOneRandomRunningHostByHypervisor(final HypervisorType type, final Long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); diff --git a/server/src/main/java/com/cloud/resourcelimit/ResourceLimitManagerImpl.java b/server/src/main/java/com/cloud/resourcelimit/ResourceLimitManagerImpl.java index 37d744457760..7ada4b7f4cc6 100644 --- a/server/src/main/java/com/cloud/resourcelimit/ResourceLimitManagerImpl.java +++ b/server/src/main/java/com/cloud/resourcelimit/ResourceLimitManagerImpl.java @@ -297,6 +297,7 @@ public boolean configure(final String name, final Map params) th projectResourceLimitMap.put(Resource.ResourceType.vpc.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxProjectVpcs.key()))); projectResourceLimitMap.put(Resource.ResourceType.cpu.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxProjectCpus.key()))); projectResourceLimitMap.put(Resource.ResourceType.memory.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxProjectMemory.key()))); + projectResourceLimitMap.put(Resource.ResourceType.gpu.name(), DefaultMaxProjectGpus.value()); projectResourceLimitMap.put(Resource.ResourceType.primary_storage.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxProjectPrimaryStorage.key()))); projectResourceLimitMap.put(Resource.ResourceType.secondary_storage.name(), MaxProjectSecondaryStorage.value()); projectResourceLimitMap.put(Resource.ResourceType.backup.name(), Long.parseLong(_configDao.getValue(BackupManager.DefaultMaxProjectBackups.key()))); @@ -313,6 +314,7 @@ public boolean configure(final String name, final Map params) th accountResourceLimitMap.put(Resource.ResourceType.vpc.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxAccountVpcs.key()))); accountResourceLimitMap.put(Resource.ResourceType.cpu.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxAccountCpus.key()))); accountResourceLimitMap.put(Resource.ResourceType.memory.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxAccountMemory.key()))); + accountResourceLimitMap.put(Resource.ResourceType.gpu.name(), DefaultMaxAccountGpus.value()); accountResourceLimitMap.put(Resource.ResourceType.primary_storage.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxAccountPrimaryStorage.key()))); accountResourceLimitMap.put(Resource.ResourceType.secondary_storage.name(), MaxAccountSecondaryStorage.value()); accountResourceLimitMap.put(Resource.ResourceType.project.name(), DefaultMaxAccountProjects.value()); @@ -333,6 +335,7 @@ public boolean configure(final String name, final Map params) th domainResourceLimitMap.put(Resource.ResourceType.primary_storage.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxDomainPrimaryStorage.key()))); domainResourceLimitMap.put(Resource.ResourceType.secondary_storage.name(), Long.parseLong(_configDao.getValue(Config.DefaultMaxDomainSecondaryStorage.key()))); domainResourceLimitMap.put(Resource.ResourceType.project.name(), DefaultMaxDomainProjects.value()); + domainResourceLimitMap.put(Resource.ResourceType.gpu.name(), DefaultMaxDomainGpus.value()); domainResourceLimitMap.put(Resource.ResourceType.backup.name(), Long.parseLong(_configDao.getValue(BackupManager.DefaultMaxDomainBackups.key()))); domainResourceLimitMap.put(Resource.ResourceType.backup_storage.name(), Long.parseLong(_configDao.getValue(BackupManager.DefaultMaxDomainBackupStorage.key()))); domainResourceLimitMap.put(Resource.ResourceType.bucket.name(), Long.parseLong(_configDao.getValue(BucketApiService.DefaultMaxDomainBuckets.key()))); @@ -1303,6 +1306,8 @@ protected long recalculateAccountResourceCount(final long accountId, final Resou newCount = calculateVmCpuCountForAccount(accountId, tag); } else if (type == Resource.ResourceType.memory) { newCount = calculateVmMemoryCountForAccount(accountId, tag); + } else if (type == Resource.ResourceType.gpu) { + newCount = calculateVmGpuCountForAccount(accountId, tag); } else if (type == Resource.ResourceType.primary_storage) { newCount = calculatePrimaryStorageForAccount(accountId, tag); } else if (type == Resource.ResourceType.secondary_storage) { @@ -1429,6 +1434,22 @@ protected long calculateVmMemoryCountForAccount(long accountId, String tag) { return memory - reservedMemory; } + protected long calculateVmGpuCountForAccount(long accountId, String tag) { + if (StringUtils.isEmpty(tag)) { + return calculateGpuForAccount(accountId); + } + long gputotal = 0; + List vms = getVmsWithAccountAndTag(accountId, tag); + + for (UserVmJoinVO vm : vms) { + if (vm.getGpuCount() != null) { + gputotal += vm.getGpuCount(); + } + } + long reservedGpus = calculateReservedResources(vms, accountId, ResourceType.gpu, tag); + return gputotal - reservedGpus; + } + public long countCpusForAccount(long accountId) { long cputotal = 0; List userVms = getVmsWithAccount(accountId); @@ -1449,6 +1470,18 @@ public long calculateMemoryForAccount(long accountId) { return ramtotal - reservedRamTotal; } + public long calculateGpuForAccount(long accountId) { + long gputotal = 0; + List userVms = getVmsWithAccount(accountId); + for (UserVmJoinVO vm : userVms) { + if (vm.getGpuCount() != null) { + gputotal += vm.getGpuCount(); + } + } + long reservedGpuTotal = calculateReservedResources(userVms, accountId, ResourceType.gpu, null); + return gputotal - reservedGpuTotal; + } + public long calculateSecondaryStorageForAccount(long accountId) { long totalVolumesSize = _volumeDao.secondaryStorageUsedForAccount(accountId); long totalSnapshotsSize = 0; @@ -1854,24 +1887,18 @@ private void updateVmResourceCountForServiceOfferingAndTemplateChange(long accou if (newMemory == null) { newMemory = newOffering.getRamSize() != null ? Long.valueOf(newOffering.getRamSize()) : 0L; } + Long currentGpu = currentOffering.getGpuCount() != null ? Long.valueOf(currentOffering.getGpuCount()) : 0L; + Long newGpu = newOffering.getGpuCount() != null ? Long.valueOf(newOffering.getGpuCount()) : 0L; Set sameTags = updatedResourceLimitHostTags.first(); Set newTags = updatedResourceLimitHostTags.second(); Set removedTags = updatedResourceLimitHostTags.third(); - if (!newCpu.equals(currentCpu) || !newMemory.equals(currentMemory)) { + if (!newCpu.equals(currentCpu) || !newMemory.equals(currentMemory) || !newGpu.equals(currentGpu)) { for (String tag : sameTags) { - if (newCpu - currentCpu > 0) { - incrementResourceCountWithTag(accountId, ResourceType.cpu, tag, newCpu - currentCpu); - } else if (newCpu - currentCpu < 0) { - decrementResourceCountWithTag(accountId, ResourceType.cpu, tag, currentCpu - newCpu); - } - - if (newMemory - currentMemory > 0) { - incrementResourceCountWithTag(accountId, ResourceType.memory, tag, newMemory - currentMemory); - } else if (newMemory - currentMemory < 0) { - decrementResourceCountWithTag(accountId, ResourceType.memory, tag, currentMemory - newMemory); - } + adjustResourceCount(newCpu, currentCpu, ResourceType.cpu, accountId, tag); + adjustResourceCount(newMemory, currentMemory, ResourceType.memory, accountId, tag); + adjustResourceCount(newGpu, currentGpu, ResourceType.gpu, accountId, tag); } } @@ -1879,12 +1906,22 @@ private void updateVmResourceCountForServiceOfferingAndTemplateChange(long accou decrementResourceCountWithTag(accountId, ResourceType.user_vm, tag, 1L); decrementResourceCountWithTag(accountId, ResourceType.cpu, tag, currentCpu); decrementResourceCountWithTag(accountId, ResourceType.memory, tag, currentMemory); + decrementResourceCountWithTag(accountId, ResourceType.gpu, tag, currentGpu); } for (String tag : newTags) { incrementResourceCountWithTag(accountId, ResourceType.user_vm, tag, 1L); incrementResourceCountWithTag(accountId, ResourceType.cpu, tag, newCpu); incrementResourceCountWithTag(accountId, ResourceType.memory, tag, newMemory); + incrementResourceCountWithTag(accountId, ResourceType.gpu, tag, newGpu); + } + } + + private void adjustResourceCount(Long newValue, Long currentValue, Resource.ResourceType type, long accountId, String tag) { + if (newValue - currentValue > 0) { + incrementResourceCountWithTag(accountId, type, tag, newValue - currentValue); + } else if (newValue - currentValue < 0) { + decrementResourceCountWithTag(accountId, type, tag, currentValue - newValue); } } @@ -1986,10 +2023,12 @@ public void checkVmResourceLimit(Account owner, Boolean display, ServiceOffering } Long cpu = serviceOffering.getCpu() != null ? Long.valueOf(serviceOffering.getCpu()) : 0L; Long ram = serviceOffering.getRamSize() != null ? Long.valueOf(serviceOffering.getRamSize()) : 0L; + Long gpu = serviceOffering.getGpuCount() != null ? Long.valueOf(serviceOffering.getGpuCount()) : 0L; for (String tag : tags) { checkResourceLimitWithTag(owner, ResourceType.user_vm, tag); checkResourceLimitWithTag(owner, ResourceType.cpu, tag, cpu); checkResourceLimitWithTag(owner, ResourceType.memory, tag, ram); + checkResourceLimitWithTag(owner, ResourceType.gpu, tag, gpu); } } @@ -2004,10 +2043,12 @@ public void doInTransactionWithoutResult(TransactionStatus status) { } Long cpu = serviceOffering.getCpu() != null ? Long.valueOf(serviceOffering.getCpu()) : 0L; Long ram = serviceOffering.getRamSize() != null ? Long.valueOf(serviceOffering.getRamSize()) : 0L; + Long gpu = serviceOffering.getGpuCount() != null ? Long.valueOf(serviceOffering.getGpuCount()) : 0L; for (String tag : tags) { incrementResourceCountWithTag(accountId, ResourceType.user_vm, tag); incrementResourceCountWithTag(accountId, ResourceType.cpu, tag, cpu); incrementResourceCountWithTag(accountId, ResourceType.memory, tag, ram); + incrementResourceCountWithTag(accountId, ResourceType.gpu, tag, gpu); } } }); @@ -2025,10 +2066,12 @@ public void doInTransactionWithoutResult(TransactionStatus status) { } Long cpu = serviceOffering.getCpu() != null ? Long.valueOf(serviceOffering.getCpu()) : 0L; Long ram = serviceOffering.getRamSize() != null ? Long.valueOf(serviceOffering.getRamSize()) : 0L; + Long gpu = serviceOffering.getGpuCount() != null ? Long.valueOf(serviceOffering.getGpuCount()) : 0L; for (String tag : tags) { decrementResourceCountWithTag(accountId, ResourceType.user_vm, tag); decrementResourceCountWithTag(accountId, ResourceType.cpu, tag, cpu); decrementResourceCountWithTag(accountId, ResourceType.memory, tag, ram); + decrementResourceCountWithTag(accountId, ResourceType.gpu, tag, gpu); } } }); @@ -2071,11 +2114,13 @@ private void checkVmResourceLimitsForServiceOfferingAndTemplateChange(Account ow if (newMemory == null) { newMemory = newOffering.getRamSize() != null ? Long.valueOf(newOffering.getRamSize()) : 0L; } + Long currentGpu = currentOffering.getGpuCount() != null ? Long.valueOf(currentOffering.getGpuCount()) : 0L; + Long newGpu = newOffering.getGpuCount() != null ? Long.valueOf(newOffering.getGpuCount()) : 0L; Set sameTags = updatedResourceLimitHostTags.first(); Set newTags = updatedResourceLimitHostTags.second(); - if (newCpu - currentCpu > 0 || newMemory - currentMemory > 0) { + if (newCpu - currentCpu > 0 || newMemory - currentMemory > 0 || newGpu - currentGpu > 0) { for (String tag : sameTags) { if (newCpu - currentCpu > 0) { checkResourceLimitWithTag(owner, ResourceType.cpu, tag, newCpu - currentCpu); @@ -2084,6 +2129,10 @@ private void checkVmResourceLimitsForServiceOfferingAndTemplateChange(Account ow if (newMemory - currentMemory > 0) { checkResourceLimitWithTag(owner, ResourceType.memory, tag, newMemory - currentMemory); } + + if (newGpu - currentGpu > 0) { + checkResourceLimitWithTag(owner, ResourceType.gpu, tag, newGpu - currentGpu); + } } } @@ -2091,6 +2140,7 @@ private void checkVmResourceLimitsForServiceOfferingAndTemplateChange(Account ow checkResourceLimitWithTag(owner, ResourceType.user_vm, tag, 1L); checkResourceLimitWithTag(owner, ResourceType.cpu, tag, newCpu); checkResourceLimitWithTag(owner, ResourceType.memory, tag, newMemory); + checkResourceLimitWithTag(owner, ResourceType.gpu, tag, newGpu); } } @@ -2178,6 +2228,48 @@ public void decrementVmMemoryResourceCount(long accountId, Boolean display, Serv } } + @Override + public void checkVmGpuResourceLimit(Account owner, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) throws ResourceAllocationException { + List tags = getResourceLimitHostTagsForResourceCountOperation(display, serviceOffering, template); + if (CollectionUtils.isEmpty(tags)) { + return; + } + if (gpu == null) { + gpu = serviceOffering.getGpuCount() != null ? Long.valueOf(serviceOffering.getGpuCount()) : 0L; + } + for (String tag : tags) { + checkResourceLimitWithTag(owner, ResourceType.gpu, tag, gpu); + } + } + + @Override + public void incrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) { + List tags = getResourceLimitHostTagsForResourceCountOperation(display, serviceOffering, template); + if (CollectionUtils.isEmpty(tags)) { + return; + } + if (gpu == null) { + gpu = serviceOffering.getGpuCount() != null ? Long.valueOf(serviceOffering.getGpuCount()) : 0L; + } + for (String tag : tags) { + incrementResourceCountWithTag(accountId, ResourceType.gpu, tag, gpu); + } + } + + @Override + public void decrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) { + List tags = getResourceLimitHostTagsForResourceCountOperation(display, serviceOffering, template); + if (CollectionUtils.isEmpty(tags)) { + return; + } + if (gpu == null) { + gpu = serviceOffering.getGpuCount() != null ? Long.valueOf(serviceOffering.getGpuCount()) : 0L; + } + for (String tag : tags) { + decrementResourceCountWithTag(accountId, ResourceType.gpu, tag, gpu); + } + } + @Override public String getConfigComponentName() { return ResourceLimitManagerImpl.class.getName(); @@ -2193,7 +2285,10 @@ public ConfigKey[] getConfigKeys() { ResourceLimitHostTags, ResourceLimitStorageTags, DefaultMaxAccountProjects, - DefaultMaxDomainProjects + DefaultMaxDomainProjects, + DefaultMaxAccountGpus, + DefaultMaxDomainGpus, + DefaultMaxProjectGpus }; } diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index 352783cdcc70..21e8c2eac8b0 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -44,6 +44,18 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.cpu.CPU; +import com.cloud.dc.VlanDetailsVO; +import com.cloud.dc.dao.VlanDetailsDao; +import com.cloud.network.dao.NetrisProviderDao; +import com.cloud.network.dao.NsxProviderDao; + +import com.cloud.utils.security.CertificateHelper; +import com.cloud.api.query.dao.ManagementServerJoinDao; +import com.cloud.api.query.vo.ManagementServerJoinVO; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.gpu.dao.VgpuProfileDao; +import com.cloud.offering.ServiceOffering; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.acl.SecurityChecker; import org.apache.cloudstack.affinity.AffinityGroupProcessor; @@ -681,9 +693,7 @@ import com.cloud.alert.AlertVO; import com.cloud.alert.dao.AlertDao; import com.cloud.api.ApiDBUtils; -import com.cloud.api.query.dao.ManagementServerJoinDao; import com.cloud.api.query.dao.StoragePoolJoinDao; -import com.cloud.api.query.vo.ManagementServerJoinVO; import com.cloud.api.query.vo.StoragePoolJoinVO; import com.cloud.capacity.Capacity; import com.cloud.capacity.CapacityVO; @@ -694,7 +704,6 @@ import com.cloud.configuration.ConfigurationManagerImpl; import com.cloud.consoleproxy.ConsoleProxyManagementState; import com.cloud.consoleproxy.ConsoleProxyManager; -import com.cloud.cpu.CPU; import com.cloud.dc.AccountVlanMapVO; import com.cloud.dc.ClusterVO; import com.cloud.dc.DataCenterVO; @@ -704,7 +713,6 @@ import com.cloud.dc.PodVlanMapVO; import com.cloud.dc.Vlan; import com.cloud.dc.Vlan.VlanType; -import com.cloud.dc.VlanDetailsVO; import com.cloud.dc.VlanVO; import com.cloud.dc.dao.AccountVlanMapDao; import com.cloud.dc.dao.ClusterDao; @@ -713,7 +721,6 @@ import com.cloud.dc.dao.HostPodDao; import com.cloud.dc.dao.PodVlanMapDao; import com.cloud.dc.dao.VlanDao; -import com.cloud.dc.dao.VlanDetailsDao; import com.cloud.deploy.DataCenterDeployment; import com.cloud.deploy.DeploymentPlanner; import com.cloud.deploy.DeploymentPlanner.ExcludeList; @@ -761,14 +768,12 @@ import com.cloud.network.dao.IPAddressVO; import com.cloud.network.dao.LoadBalancerDao; import com.cloud.network.dao.LoadBalancerVO; -import com.cloud.network.dao.NetrisProviderDao; import com.cloud.network.dao.NetworkAccountDao; import com.cloud.network.dao.NetworkAccountVO; import com.cloud.network.dao.NetworkDao; import com.cloud.network.dao.NetworkDomainDao; import com.cloud.network.dao.NetworkDomainVO; import com.cloud.network.dao.NetworkVO; -import com.cloud.network.dao.NsxProviderDao; import com.cloud.network.dao.PublicIpQuarantineDao; import com.cloud.network.vpc.dao.VpcDao; import com.cloud.org.Cluster; @@ -845,7 +850,6 @@ import com.cloud.utils.fsm.StateMachine2; import com.cloud.utils.net.MacAddress; import com.cloud.utils.net.NetUtils; -import com.cloud.utils.security.CertificateHelper; import com.cloud.utils.ssh.SSHKeysHelper; import com.cloud.vm.ConsoleProxyVO; import com.cloud.vm.DiskProfile; @@ -961,6 +965,8 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe @Inject private HostPodDao _hostPodDao; @Inject + private VgpuProfileDao vgpuProfileDao; + @Inject private VMInstanceDao _vmInstanceDao; @Inject private VolumeDao _volumeDao; @@ -1557,6 +1563,8 @@ public Ternary, Integer>, List, Map, Integer> allHostsPair = null; List allHosts = null; @@ -1683,6 +1691,17 @@ public Ternary, Integer>, List, Map(otherHosts, suitableHosts, requiresStorageMotion); } + private void validateVgpuProfileForVmMigration(final VirtualMachineProfile vmProfile) { + // Validate if the VM is using a vGPU profile that supports migration. + ServiceOffering serviceOffering = vmProfile.getServiceOffering(); + if (serviceOffering.getVgpuProfileId() != null) { + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(serviceOffering.getVgpuProfileId()); + if (vgpuProfile == null || "passthrough".equals(vgpuProfile.getName())) { + throw new InvalidParameterValueException("Unsupported operation, VM uses host passthrough, cannot migrate"); + } + } + } + /** * Add non DPDK enabled hosts to the avoid list */ diff --git a/server/src/main/java/com/cloud/storage/StorageManagerImpl.java b/server/src/main/java/com/cloud/storage/StorageManagerImpl.java index f144745fc5cb..76be0ed6b56c 100644 --- a/server/src/main/java/com/cloud/storage/StorageManagerImpl.java +++ b/server/src/main/java/com/cloud/storage/StorageManagerImpl.java @@ -1958,7 +1958,7 @@ public void createCapacityEntry(StoragePoolVO storagePool, short capacityType, l logger.debug("Total over provisioned capacity of the pool {} is {}", storagePool, toHumanReadableSize(totalOverProvCapacity)); CapacityState capacityState = CapacityState.Enabled; if (storagePool.getScope() == ScopeType.ZONE) { - DataCenterVO dc = ApiDBUtils.findZoneById(storagePool.getDataCenterId()); + DataCenterVO dc = _dcDao.findById(storagePool.getDataCenterId()); AllocationState allocationState = dc.getAllocationState(); capacityState = (allocationState == AllocationState.Disabled) ? CapacityState.Disabled : CapacityState.Enabled; } else { diff --git a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java index 771d3c0c5a74..91897c0977de 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java @@ -2805,18 +2805,39 @@ protected void verifyVmLimits(UserVmVO vmInstance, Map details) long currentCpu = currentServiceOffering.getCpu(); long currentMemory = currentServiceOffering.getRamSize(); VMTemplateVO template = _templateDao.findByIdIncludingRemoved(vmInstance.getTemplateId()); + Long currentGpu = currentServiceOffering.getGpuCount() != null ? Long.valueOf(currentServiceOffering.getGpuCount()) : 0L; + Long newGpu = svcOffering.getGpuCount() != null ? Long.valueOf(svcOffering.getGpuCount()) : 0L; try { - if (newCpu > currentCpu) { - _resourceLimitMgr.checkVmCpuResourceLimit(owner, vmInstance.isDisplay(), svcOffering, template, newCpu - currentCpu); - } - if (newMemory > currentMemory) { - _resourceLimitMgr.checkVmMemoryResourceLimit(owner, vmInstance.isDisplay(), svcOffering, template, newMemory - currentMemory); - } + checkVmLimits(owner, vmInstance, svcOffering, template, newCpu, currentCpu, newMemory, currentMemory, newGpu, currentGpu); } catch (ResourceAllocationException e) { logger.error(String.format("Failed to updated VM due to: %s", e.getLocalizedMessage())); throw new InvalidParameterValueException(e.getLocalizedMessage()); } + adjustVmLimits(owner, vmInstance, svcOffering, template, newCpu, currentCpu, newMemory, currentMemory, newGpu, currentGpu); + } + private void checkVmLimits(Account owner, UserVmVO vmInstance, ServiceOfferingVO svcOffering, + VMTemplateVO template, Long newCpu, Long currentCpu, Long newMemory, Long currentMemory, + Long newGpu, Long currentGpu + ) throws ResourceAllocationException { + if (newCpu > currentCpu) { + _resourceLimitMgr.checkVmCpuResourceLimit(owner, vmInstance.isDisplay(), svcOffering, + template, newCpu - currentCpu); + } + if (newMemory > currentMemory) { + _resourceLimitMgr.checkVmMemoryResourceLimit(owner, vmInstance.isDisplay(), svcOffering, + template, newMemory - currentMemory); + } + if (newGpu > currentGpu) { + _resourceLimitMgr.checkVmGpuResourceLimit(owner, vmInstance.isDisplay(), svcOffering, + template, newGpu - currentGpu); + } + } + + private void adjustVmLimits(Account owner, UserVmVO vmInstance, ServiceOfferingVO svcOffering, + VMTemplateVO template, Long newCpu, Long currentCpu, Long newMemory, Long currentMemory, + Long newGpu, Long currentGpu + ) { if (newCpu > currentCpu) { _resourceLimitMgr.incrementVmCpuResourceCount(owner.getAccountId(), vmInstance.isDisplay(), svcOffering, template, newCpu - currentCpu); } else if (newCpu > 0 && currentCpu > newCpu){ @@ -2827,6 +2848,11 @@ protected void verifyVmLimits(UserVmVO vmInstance, Map details) } else if (newMemory > 0 && currentMemory > newMemory){ _resourceLimitMgr.decrementVmMemoryResourceCount(owner.getAccountId(), vmInstance.isDisplay(), svcOffering, template, currentMemory - newMemory); } + if (newGpu > currentGpu) { + _resourceLimitMgr.incrementVmGpuResourceCount(owner.getAccountId(), vmInstance.isDisplay(), svcOffering, template, newGpu - currentGpu); + } else if (newGpu > 0 && currentGpu > newGpu){ + _resourceLimitMgr.decrementVmGpuResourceCount(owner.getAccountId(), vmInstance.isDisplay(), svcOffering, template, currentGpu - newGpu); + } } @Override @@ -4211,6 +4237,8 @@ private UserVm getCheckedUserVmResource(DataCenter zone, String hostName, String try (CheckedReservation vmReservation = new CheckedReservation(owner, ResourceType.user_vm, resourceLimitHostTags, 1l, reservationDao, resourceLimitService); CheckedReservation cpuReservation = new CheckedReservation(owner, ResourceType.cpu, resourceLimitHostTags, Long.valueOf(offering.getCpu()), reservationDao, resourceLimitService); CheckedReservation memReservation = new CheckedReservation(owner, ResourceType.memory, resourceLimitHostTags, Long.valueOf(offering.getRamSize()), reservationDao, resourceLimitService); + CheckedReservation gpuReservation = offering.getGpuCount() != null && offering.getGpuCount() > 0 ? + new CheckedReservation(owner, ResourceType.gpu, resourceLimitHostTags, Long.valueOf(offering.getGpuCount()), reservationDao, resourceLimitService) : null; ) { return getUncheckedUserVmResource(zone, hostName, displayName, owner, diskOfferingId, diskSize, networkList, securityGroupIdList, group, httpmethod, userData, userDataId, userDataDetails, sshKeyPairs, caller, requestedIps, defaultIps, isDisplayVm, keyboard, affinityGroupIdList, customParameters, customId, dhcpOptionMap, datadiskTemplateToDiskOfferringMap, userVmOVFPropertiesMap, dynamicScalingEnabled, vmType, template, hypervisorType, accountId, offering, isIso, rootDiskOfferingId, volumesSize, additionalDiskSize, volume, snapshot); } catch (ResourceAllocationException | CloudRuntimeException e) { @@ -5841,6 +5869,8 @@ public UserVm destroyVm(long vmId, boolean expunge) throws ResourceUnavailableEx try (CheckedReservation vmReservation = new CheckedReservation(owner, ResourceType.user_vm, vmId, null, -1L, reservationDao, resourceLimitService); CheckedReservation cpuReservation = new CheckedReservation(owner, ResourceType.cpu, vmId, null, -1 * Long.valueOf(offering.getCpu()), reservationDao, resourceLimitService); CheckedReservation memReservation = new CheckedReservation(owner, ResourceType.memory, vmId, null, -1 * Long.valueOf(offering.getRamSize()), reservationDao, resourceLimitService); + CheckedReservation gpuReservation = offering.getGpuCount() != null && offering.getGpuCount() > 0 ? + new CheckedReservation(owner, ResourceType.gpu, vmId, null, -1 * Long.valueOf(offering.getGpuCount()), reservationDao, resourceLimitService) : null; ) { try { VirtualMachineEntity vmEntity = _orchSrvc.getVirtualMachine(vm.getUuid()); diff --git a/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java b/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java new file mode 100644 index 000000000000..5890b176c8e8 --- /dev/null +++ b/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java @@ -0,0 +1,1385 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.gpu; + +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.agent.api.to.GPUDeviceTO; +import com.cloud.event.ActionEvent; +import com.cloud.event.EventTypes; +import com.cloud.exception.InvalidParameterValueException; +import com.cloud.gpu.GpuCardVO; +import com.cloud.gpu.GpuDeviceVO; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.gpu.dao.GpuCardDao; +import com.cloud.gpu.dao.GpuDeviceDao; +import com.cloud.gpu.dao.VgpuProfileDao; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.resource.ResourceManager; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.service.dao.ServiceOfferingDao; +import com.cloud.utils.Pair; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.component.PluggableService; +import com.cloud.utils.db.DB; +import com.cloud.utils.db.GlobalLock; +import com.cloud.utils.db.Transaction; +import com.cloud.utils.db.TransactionCallback; +import com.cloud.utils.db.TransactionCallbackNoReturn; +import com.cloud.utils.db.TransactionStatus; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.UserVmManager; +import com.cloud.vm.UserVmVO; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.ResponseObject; +import org.apache.cloudstack.api.command.admin.gpu.CreateGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.CreateGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.CreateVgpuProfileCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteVgpuProfileCmd; +import org.apache.cloudstack.api.command.admin.gpu.DiscoverGpuDevicesCmd; +import org.apache.cloudstack.api.command.admin.gpu.ListGpuDevicesCmdByAdmin; +import org.apache.cloudstack.api.command.admin.gpu.ManageGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.UnmanageGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateVgpuProfileCmd; +import org.apache.cloudstack.api.command.user.gpu.ListGpuCardsCmd; +import org.apache.cloudstack.api.command.user.gpu.ListGpuDevicesCmd; +import org.apache.cloudstack.api.command.user.gpu.ListVgpuProfilesCmd; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import javax.naming.ConfigurationException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +@Component +public class GpuServiceImpl extends ManagerBase implements GpuService, PluggableService, Configurable { + + @Inject + private GpuCardDao gpuCardDao; + + @Inject + private VgpuProfileDao vgpuProfileDao; + + @Inject + private GpuDeviceDao gpuDeviceDao; + + @Inject + private ServiceOfferingDao serviceOfferingDao; + + @Inject + private HostDao hostDao; + + @Inject + private UserVmManager userVmManager; + + @Inject + private VMInstanceDao vmInstanceDao; + + @Inject + private ResourceManager resourceManager; + + @Override + public boolean configure(String name, java.util.Map params) throws ConfigurationException { + logger.info("Configuring GpuServiceImpl: {}", name); + return true; + } + + @Override + public List> getCommands() { + List> cmdList = new ArrayList<>(); + // GPU Card Commands + cmdList.add(CreateGpuCardCmd.class); + cmdList.add(UpdateGpuCardCmd.class); + cmdList.add(DeleteGpuCardCmd.class); + cmdList.add(ListGpuCardsCmd.class); + + // vGPU Profile Commands + cmdList.add(CreateVgpuProfileCmd.class); + cmdList.add(UpdateVgpuProfileCmd.class); + cmdList.add(DeleteVgpuProfileCmd.class); + cmdList.add(ListVgpuProfilesCmd.class); + + // GPU Device Commands + cmdList.add(ListGpuDevicesCmd.class); + cmdList.add(ListGpuDevicesCmdByAdmin.class); + cmdList.add(UnmanageGpuDeviceCmd.class); + cmdList.add(ManageGpuDeviceCmd.class); + cmdList.add(DiscoverGpuDevicesCmd.class); + cmdList.add(CreateGpuDeviceCmd.class); + cmdList.add(UpdateGpuDeviceCmd.class); + cmdList.add(DeleteGpuDeviceCmd.class); + + return cmdList; + } + + @Override + public String getConfigComponentName() { + return GpuService.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[]{GpuDetachOnStop}; + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_GPU_CARD_CREATE, eventDescription = "creating GPU Card") + public GpuCardVO createGpuCard(CreateGpuCardCmd cmd) { + final String deviceId = cmd.getDeviceId(); + final String deviceName = cmd.getDeviceName(); + final String name = cmd.getName(); + final String vendorName = cmd.getVendorName(); + final String vendorId = cmd.getVendorId(); + final Long videoRam = cmd.getVideoRam(); + + // Validate inputs + validateCreateGpuCardParams(deviceId, deviceName, name, vendorName, vendorId); + + GpuCardVO gpuCard = new GpuCardVO(deviceId, deviceName, name, vendorName, vendorId); + gpuCard = gpuCardDao.persist(gpuCard); + + // Create passthrough vGPU profile with optional display parameters + VgpuProfileVO passthroughProfile = new VgpuProfileVO("passthrough", "passthrough", gpuCard.getId(), 1L); + passthroughProfile.setVideoRam(videoRam); + vgpuProfileDao.persist(passthroughProfile); + + return gpuCard; + } + + private void validateCreateGpuCardParams(String deviceId, String deviceName, String name, String vendorName, String vendorId) { + if (StringUtils.isBlank(deviceId)) { + throw new InvalidParameterValueException("Device ID cannot be blank"); + } else if (!deviceId.matches("^[a-zA-Z0-9]+$")) { + throw new InvalidParameterValueException("Device ID must be alphanumeric and in hexadecimal format"); + } + if (StringUtils.isBlank(deviceName)) { + throw new InvalidParameterValueException("Device name cannot be blank"); + } + if (StringUtils.isBlank(name)) { + throw new InvalidParameterValueException("Display name cannot be blank"); + } + if (StringUtils.isBlank(vendorName)) { + throw new InvalidParameterValueException("Vendor name cannot be blank"); + } + if (StringUtils.isBlank(vendorId)) { + throw new InvalidParameterValueException("Vendor ID cannot be blank"); + } else if (!vendorId.matches("^[a-zA-Z0-9]+$")) { + throw new InvalidParameterValueException("Vendor ID must be alphanumeric and in hexadecimal format"); + } + + // Check if a GPU card with the same vendor ID and device ID already exists + GpuCardVO existingGpuCard = gpuCardDao.findByVendorIdAndDeviceId(vendorId, deviceId); + if (existingGpuCard != null) { + throw new InvalidParameterValueException( + String.format("GPU card with vendor ID %s and device ID %s already exists", vendorId, deviceId)); + } + } + + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_GPU_CARD_EDIT, eventDescription = "Updating GPU Card") + public GpuCardVO updateGpuCard(UpdateGpuCardCmd cmd) { + final Long id = cmd.getId(); + final String deviceName = cmd.getDeviceName(); + final String name = cmd.getName(); + final String vendorName = cmd.getVendorName(); + + // Validate inputs + GpuCardVO gpuCard = gpuCardDao.findById(id); + if (gpuCard == null) { + throw new InvalidParameterValueException("GPU card with ID " + id + " not found"); + } + + if (deviceName != null) { + gpuCard.setDeviceName(deviceName); + } + if (name != null) { + gpuCard.setName(name); + } + if (vendorName != null) { + gpuCard.setVendorName(vendorName); + } + gpuCardDao.update(id, gpuCard); + return gpuCard; + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_GPU_CARD_DELETE, eventDescription = "deleting the GPU Card") + public boolean deleteGpuCard(DeleteGpuCardCmd cmd) { + final Long id = cmd.getId(); + + // Validate inputs + GpuCardVO gpuCard = gpuCardDao.findById(id); + if (gpuCard == null) { + throw new InvalidParameterValueException("GPU card with ID " + id + " not found"); + } + + // Check if a GPU card is in use + if (gpuDeviceDao.isGpuCardInUse(id)) { + throw new InvalidParameterValueException( + "Cannot delete GPU card " + gpuCard + " as it is in use by one or more GPU devices"); + } + + // delete gpu profiles associated with this GPU card + int rowsRemoved = vgpuProfileDao.removeByCardId(id); + logger.info("Removed {} vGPU profiles associated with GPU card {}", rowsRemoved, gpuCard); + + return gpuCardDao.remove(id); + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_VGPU_PROFILE_CREATE, eventDescription = "creating vGPU profile") + public VgpuProfileResponse createVgpuProfile(CreateVgpuProfileCmd cmd) { + final String profileName = cmd.getName(); + final String profileDescription = cmd.getDescription(); + final Long gpuCardId = cmd.getCardId(); + final Long maxVgpuPerPgpu = cmd.getMaxVgpuPerPgpu(); + final Long videoRam = cmd.getVideoRam(); + final Long maxHeads = cmd.getMaxHeads(); + final Long maxResolutionX = cmd.getMaxResolutionX(); + final Long maxResolutionY = cmd.getMaxResolutionY(); + + // Validate inputs + if (StringUtils.isBlank(profileName)) { + throw new InvalidParameterValueException("vGPU profile name cannot be empty"); + } + + // Check if the GPU card ID is valid + GpuCardVO gpuCard = gpuCardDao.findById(gpuCardId); + if (gpuCard == null) { + throw new InvalidParameterValueException(String.format("GPU card with ID %d not found", gpuCardId)); + } + + // Check if a vGPU profile with the same name already exists + VgpuProfileVO existingProfile = vgpuProfileDao.findByNameAndCardId(profileName, gpuCardId); + if (existingProfile != null) { + throw new InvalidParameterValueException( + String.format("vGPU profile with name %s already exists", profileName)); + } + + VgpuProfileVO vgpuProfile = new VgpuProfileVO(profileName, profileDescription, gpuCardId, maxVgpuPerPgpu, + videoRam, maxHeads, maxResolutionX, maxResolutionY); + vgpuProfile = vgpuProfileDao.persist(vgpuProfile); + + VgpuProfileResponse response = new VgpuProfileResponse(vgpuProfile, gpuCard); + response.setResponseName(cmd.getCommandName()); + return response; + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_VGPU_PROFILE_EDIT, eventDescription = "updating vGPU profile") + public VgpuProfileResponse updateVgpuProfile(UpdateVgpuProfileCmd cmd) { + final Long id = cmd.getId(); + final String profileName = cmd.getProfileName(); + final String profileDescription = cmd.getDescription(); + final Long maxVgpuPerPgpu = cmd.getMaxVgpuPerPgpu(); + final Long videoRam = cmd.getVideoRam(); + final Long maxHeads = cmd.getMaxHeads(); + final Long maxResolutionX = cmd.getMaxResolutionX(); + final Long maxResolutionY = cmd.getMaxResolutionY(); + + // Validate inputs + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(id); + if (vgpuProfile == null) { + throw new InvalidParameterValueException(String.format("vGPU profile with ID %d not found", id)); + } + + // Check if a vGPU profile with the same name already exists (if the name is being updated) + if (profileName != null && !profileName.equals(vgpuProfile.getName())) { + VgpuProfileVO existingProfile = vgpuProfileDao.findByNameAndCardId(profileName, vgpuProfile.getCardId()); + if (existingProfile != null) { + throw new InvalidParameterValueException( + String.format("vGPU profile with name %s already exists", profileName)); + } + } + + if (profileName != null) { + vgpuProfile.setName(profileName); + } + if (profileDescription != null) { + vgpuProfile.setDescription(profileDescription); + } + if (maxVgpuPerPgpu != null) { + vgpuProfile.setMaxVgpuPerPgpu(maxVgpuPerPgpu); + } + if (videoRam != null) { + vgpuProfile.setVideoRam(videoRam); + } + if (maxHeads != null) { + vgpuProfile.setMaxHeads(maxHeads); + } + if (maxResolutionX != null) { + vgpuProfile.setMaxResolutionX(maxResolutionX); + } + if (maxResolutionY != null) { + vgpuProfile.setMaxResolutionY(maxResolutionY); + } + vgpuProfileDao.update(id, vgpuProfile); + + VgpuProfileResponse response = new VgpuProfileResponse(vgpuProfile, + gpuCardDao.findById(vgpuProfile.getCardId())); + response.setResponseName(cmd.getCommandName()); + return response; + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_VGPU_PROFILE_DELETE, eventDescription = "Deleting vGPU profile") + public boolean deleteVgpuProfile(DeleteVgpuProfileCmd cmd) { + final Long id = cmd.getId(); + + // Validate inputs + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(id); + if (vgpuProfile == null) { + throw new InvalidParameterValueException(String.format("vGPU profile with ID %d not found", id)); + } + + // Check if vGPU profile is in use + if (gpuDeviceDao.isVgpuProfileInUse(id)) { + throw new InvalidParameterValueException(String.format( + "Cannot delete vGPU profile with ID %d as it is in use by one or more GPU " + "devices", id)); + } + + return vgpuProfileDao.remove(id); + } + + @Override + public ListResponse listGpuCards(ListGpuCardsCmd cmd) { + Long id = cmd.getId(); + String keyword = cmd.getKeyword(); + String vendorName = cmd.getVendorName(); + String vendorId = cmd.getVendorId(); + String deviceId = cmd.getDeviceId(); + String deviceName = cmd.getDeviceName(); + boolean activeOnly = cmd.getActiveOnly(); + + Pair, Integer> gpuCardsAndCount = gpuCardDao.searchAndCountGpuCards(id, keyword, vendorId, + vendorName, deviceId, deviceName, activeOnly, cmd.getStartIndex(), cmd.getPageSizeVal()); + + return getGpuCardResponseListResponse(cmd, gpuCardsAndCount.first(), gpuCardsAndCount.second()); + } + + private static ListResponse getGpuCardResponseListResponse(ListGpuCardsCmd cmd, + List gpuCards, Integer count) { + ListResponse response = new ListResponse<>(); + List gpuCardResponses = new ArrayList<>(); + + for (GpuCardVO gpuCard : gpuCards) { + GpuCardResponse gpuCardResponse = new GpuCardResponse(gpuCard); + response.setResponseName(cmd.getCommandName()); + gpuCardResponses.add(gpuCardResponse); + } + + response.setResponses(gpuCardResponses, count); + response.setResponseName(cmd.getCommandName()); + return response; + } + + @Override + public ListResponse listVgpuProfiles(ListVgpuProfilesCmd cmd) { + Long id = cmd.getId(); + String name = cmd.getName(); + String keyword = cmd.getKeyword(); + Long gpuCardId = cmd.getCardId(); + boolean activeOnly = cmd.getActiveOnly(); + + Pair, Integer> vgpuProfilesAndCount = vgpuProfileDao.searchAndCountVgpuProfiles(id, name, + keyword, gpuCardId, activeOnly, cmd.getStartIndex(), cmd.getPageSizeVal()); + + return getVgpuProfileResponseListResponse(cmd, vgpuProfilesAndCount.first(), vgpuProfilesAndCount.second()); + } + + private ListResponse getVgpuProfileResponseListResponse(ListVgpuProfilesCmd cmd, + List vgpuProfiles, Integer count) { + ListResponse response = new ListResponse<>(); + List vgpuProfileResponses = new ArrayList<>(); + + Map cardMap = new HashMap<>(); + for (VgpuProfileVO vgpuProfile : vgpuProfiles) { + GpuCardVO gpuCard = cardMap.get(vgpuProfile.getCardId()); + if (gpuCard == null) { + gpuCard = gpuCardDao.findById(vgpuProfile.getCardId()); + cardMap.put(vgpuProfile.getCardId(), gpuCard); + } + VgpuProfileResponse vgpuProfileResponse = new VgpuProfileResponse(vgpuProfile, gpuCard); + vgpuProfileResponse.setResponseName(cmd.getCommandName()); + vgpuProfileResponses.add(vgpuProfileResponse); + } + + response.setResponses(vgpuProfileResponses, count); + response.setResponseName(cmd.getCommandName()); + return response; + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_GPU_CARD_DELETE, eventDescription = "creating GPU device") + public GpuDeviceResponse createGpuDevice(CreateGpuDeviceCmd cmd) { + final Long hostId = cmd.getHostId(); + String busAddress = cmd.getBusAddress(); + final Long gpuCardId = cmd.getGpuCardId(); + final Long vgpuProfileId = cmd.getVgpuProfileId(); + final GpuDevice.DeviceType type = cmd.getType(); + final Long parentGpuDeviceId = cmd.getParentGpuDeviceId(); + final String numaNode = cmd.getNumaNode(); + + // Validate inputs + HostVO host = hostDao.findById(hostId); + if (host == null) { + throw new InvalidParameterValueException(String.format("Host with ID %d not found", hostId)); + } + + if (StringUtils.isBlank(busAddress)) { + throw new InvalidParameterValueException("Bus address cannot be empty"); + } + busAddress = busAddress.trim(); + + // Check if a GPU device with the same bus address already exists on this host + GpuDeviceVO existingDevice = gpuDeviceDao.findByHostIdAndBusAddress(hostId, busAddress); + if (existingDevice != null) { + throw new InvalidParameterValueException( + String.format("GPU device with bus address %s already exists on host %s", busAddress, + host.getName())); + } + + // Validate GPU card + GpuCardVO gpuCard = gpuCardDao.findById(gpuCardId); + if (gpuCard == null) { + throw new InvalidParameterValueException(String.format("GPU card with ID %d not found", gpuCardId)); + } + + // Validate vGPU profile + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(vgpuProfileId); + if (vgpuProfile == null) { + throw new InvalidParameterValueException(String.format("vGPU profile with ID %d not found", vgpuProfileId)); + } + + // Validate that the vGPU profile belongs to the specified GPU card + if (!vgpuProfile.getCardId().equals(gpuCardId)) { + throw new InvalidParameterValueException( + String.format("vGPU profile %s does not belong to GPU card %s", vgpuProfile.getName(), + gpuCard.getName())); + } + + // Validate parent GPU device if specified + if (parentGpuDeviceId != null) { + GpuDeviceVO parentDevice = gpuDeviceDao.findById(parentGpuDeviceId); + if (parentDevice == null) { + throw new InvalidParameterValueException( + String.format("Parent GPU device with ID %d not found", parentGpuDeviceId)); + } + if (!hostId.equals(parentDevice.getHostId())) { + throw new InvalidParameterValueException("Parent GPU device must be on the same host"); + } + } + + // Create the GPU device + GpuDeviceVO gpuDevice = new GpuDeviceVO(gpuCardId, vgpuProfileId, busAddress, hostId, parentGpuDeviceId, + numaNode, null); + gpuDevice.setType(type); + gpuDevice.setState(GpuDevice.State.Free); + gpuDevice.setManagedState(GpuDevice.ManagedState.Managed); + + gpuDevice = gpuDeviceDao.persist(gpuDevice); + + logger.info("Successfully created GPU device {} on host {}", gpuDevice.getUuid(), host.getName()); + return createGpuDeviceResponse(gpuDevice, ResponseObject.ResponseView.Full); + } + + @Override + @DB + @ActionEvent(eventType = EventTypes.EVENT_GPU_DEVICE_EDIT, eventDescription = "updating GPU device") + public GpuDeviceResponse updateGpuDevice(UpdateGpuDeviceCmd cmd) { + final Long id = cmd.getId(); + final Long gpuCardId = cmd.getGpuCardId(); + final Long vgpuProfileId = cmd.getVgpuProfileId(); + final GpuDevice.DeviceType type = cmd.getType(); + final Long parentGpuDeviceId = cmd.getParentGpuDeviceId(); + final String numaNode = cmd.getNumaNode(); + + // Validate inputs + GpuDeviceVO gpuDevice = gpuDeviceDao.findById(id); + if (gpuDevice == null) { + throw new InvalidParameterValueException(String.format("GPU device with ID %d not found", id)); + } + + // Check if device is currently allocated to a VM + if (gpuDevice.getVmId() != null) { + throw new InvalidParameterValueException( + String.format("Cannot update GPU device %s as it is currently allocated to VM %d", + gpuDevice.getUuid(), gpuDevice.getVmId())); + } + + // Validate GPU card if specified + if (gpuCardId != null) { + GpuCardVO gpuCard = gpuCardDao.findById(gpuCardId); + if (gpuCard == null) { + throw new InvalidParameterValueException(String.format("GPU card with ID %d not found", gpuCardId)); + } + } + + // Validate vGPU profile if specified + VgpuProfileVO vgpuProfile = null; + if (vgpuProfileId != null) { + vgpuProfile = vgpuProfileDao.findById(vgpuProfileId); + if (vgpuProfile == null) { + throw new InvalidParameterValueException( + String.format("vGPU profile with ID %d not found", vgpuProfileId)); + } + + // Check if vGPU profile belongs to the GPU card (either current or new) + Long targetCardId = gpuCardId != null ? gpuCardId : gpuDevice.getCardId(); + if (!vgpuProfile.getCardId().equals(targetCardId)) { + GpuCardVO targetCard = gpuCardDao.findById(targetCardId); + throw new InvalidParameterValueException( + String.format("vGPU profile %s does not belong to GPU card %s", vgpuProfile.getName(), + targetCard.getName())); + } + } + + // Validate parent GPU device if specified + if (parentGpuDeviceId != null) { + GpuDeviceVO parentDevice = gpuDeviceDao.findById(parentGpuDeviceId); + if (parentDevice == null) { + throw new InvalidParameterValueException( + String.format("Parent GPU device with ID %d not found", parentGpuDeviceId)); + } + if (parentDevice.getHostId() != gpuDevice.getHostId()) { + throw new InvalidParameterValueException("Parent GPU device must be on the same host"); + } + if (parentDevice.getId() == gpuDevice.getId()) { + throw new InvalidParameterValueException("GPU device cannot be its own parent"); + } + } + + // Update the GPU device + if (gpuCardId != null) { + gpuDevice.setCardId(gpuCardId); + } + if (vgpuProfileId != null) { + gpuDevice.setVgpuProfileId(vgpuProfileId); + } + if (type != null) { + gpuDevice.setType(type); + } + if (parentGpuDeviceId != null) { + gpuDevice.setParentGpuDeviceId(parentGpuDeviceId); + } + if (numaNode != null) { + gpuDevice.setNumaNode(numaNode); + } + gpuDeviceDao.update(id, gpuDevice); + + logger.info("Successfully updated GPU device {}", gpuDevice.getUuid()); + return createGpuDeviceResponse(gpuDevice, ResponseObject.ResponseView.Full); + } + + @Override + public ListResponse listGpuDevices(ListGpuDevicesCmd cmd) { + Long id = null; + Long hostId = null; + Long gpuCardId = null; + Long vgpuProfileId = null; + if (cmd instanceof ListGpuDevicesCmdByAdmin) { + ListGpuDevicesCmdByAdmin adminCmd = (ListGpuDevicesCmdByAdmin) cmd; + id = adminCmd.getId(); + hostId = adminCmd.getHostId(); + gpuCardId = adminCmd.getGpuCardId(); + vgpuProfileId = adminCmd.getVgpuProfileId(); + } + String keyword = cmd.getKeyword(); + Long vmId = cmd.getVmId(); + + Pair, Integer> gpuDevicesAndCount = gpuDeviceDao.searchAndCountGpuDevices(id, keyword, hostId, + vmId, gpuCardId, vgpuProfileId, cmd.getStartIndex(), cmd.getPageSizeVal()); + + return getGpuDeviceResponseListResponse(cmd, gpuDevicesAndCount.first(), gpuDevicesAndCount.second()); + } + + @Override + public boolean disableGpuDevice(UnmanageGpuDeviceCmd cmd) { + return updateGpuDeviceManagedState(cmd.getIds(), GpuDevice.ManagedState.Unmanaged); + } + + @Override + public boolean enableGpuDevice(ManageGpuDeviceCmd cmd) { + return updateGpuDeviceManagedState(cmd.getIds(), GpuDevice.ManagedState.Managed); + } + + @Override + public void deallocateAllGpuDevicesForVm(long vmId) { + List devices = gpuDeviceDao.listByVmId(vmId); + deallocateGpuDevices(devices); + } + + @Override + public void deallocateGpuDevicesForVmOnHost(long vmId, long hostId) { + List devices = gpuDeviceDao.listByHostAndVm(hostId, vmId); + deallocateGpuDevices(devices); + } + + private void deallocateGpuDevices(List devices) { + if (CollectionUtils.isNotEmpty(devices)) { + for (GpuDeviceVO device : devices) { + device.setState(GpuDevice.State.Free); + device.setVmId(null); + gpuDeviceDao.persist(device); + checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId()); + } + } + } + + protected void checkAndUpdateParentGpuDeviceState(Long parentGpuDeviceId) { + if (parentGpuDeviceId != null) { + GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(parentGpuDeviceId); + checkAndUpdateParentGpuDeviceState(parentGpuDevice); + } + } + + protected void checkAndUpdateParentGpuDeviceState(GpuDeviceVO parentDevice) { + if (parentDevice != null) { + List childDevices = gpuDeviceDao.listByParentGpuDeviceId(parentDevice.getId()); + GpuDevice.State finalState = GpuDevice.State.Free; + for (GpuDeviceVO childDevice : childDevices) { + if (childDevice.getState().equals(GpuDevice.State.Allocated)) { + finalState = GpuDevice.State.PartiallyAllocated; + } else if (childDevice.getState().equals(GpuDevice.State.Error)) { + finalState = GpuDevice.State.Error; + break; + } + } + if (!finalState.equals(parentDevice.getState())) { + parentDevice.setState(finalState); + gpuDeviceDao.update(parentDevice.getId(), parentDevice); + } + } + } + + @Override + public void allocateGpuDevicesToVmOnHost(long vmId, long hostId, List gpuDevices) { + Transaction.execute(new TransactionCallbackNoReturn() { + @Override + public void doInTransactionWithoutResult(TransactionStatus status) { + // Deallocate existing GPU devices for the VM on the host + deallocateAllGpuDevicesForVm(vmId); + + // Allocate new GPU devices to the VM on the host + for (VgpuTypesInfo gpuDevice : gpuDevices) { + GpuDeviceVO device = gpuDeviceDao.findByHostIdAndBusAddress(hostId, gpuDevice.getBusAddress()); + if (device != null) { + device.setState(GpuDevice.State.Allocated); + device.setVmId(vmId); + gpuDeviceDao.persist(device); + checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId()); + } else { + throw new CloudRuntimeException( + String.format("GPU device not found for VM %d on host %d", vmId, hostId)); + } + } + } + }); + } + + @Override + public ListResponse discoverGpuDevices(DiscoverGpuDevicesCmd cmd) { + final Long hostId = cmd.getId(); + HostVO host = hostDao.findById(hostId); + if (host == null) { + throw new InvalidParameterValueException(String.format("Host with ID %d not found", hostId)); + } + if (!Status.Up.equals(host.getStatus())) { + throw new InvalidParameterValueException(String.format("Host [%s] is not in Up status", host)); + } + + // Get GPU stats on the host and update GPU details + // getGPUStatistics() fetches the stats + HashMap> groupDetails = resourceManager.getGPUStatistics(host); + if (!MapUtils.isEmpty(groupDetails)) { + resourceManager.updateGPUDetails(host.getId(), groupDetails); + } + + // Return the list of GPU devices for the host + List gpuDevices = gpuDeviceDao.listByHostId(hostId); + return getGpuDeviceResponseListResponse(cmd, gpuDevices, gpuDevices.size()); + } + + @Override + public boolean isGPUDeviceAvailable(Host host, Long vmId, VgpuProfile vgpuProfile, int gpuCount) { + List availableGpuDevices = gpuDeviceDao.listDevicesForAllocation(host.getId(), + vgpuProfile.getId()); + if (availableGpuDevices.size() >= gpuCount) { + return true; + } else { + // Check if there are already GPU devices assigned to the VM and belonging to the same vGPU profile + List existingGpuDevices = gpuDeviceDao.listByHostAndVm(host.getId(), vmId); + existingGpuDevices = existingGpuDevices.stream().filter( + device -> device.getVgpuProfileId() == vgpuProfile.getId()).collect(Collectors.toList()); + return existingGpuDevices.size() + availableGpuDevices.size() >= gpuCount; + } + } + + @Override + @DB + public GPUDeviceTO getGPUDevice(VirtualMachine vm, long hostId, VgpuProfile vgpuProfile, int gpuCount) { + return Transaction.execute(new TransactionCallback() { + @Override + public GPUDeviceTO doInTransaction(TransactionStatus status) { + if (vm.getHostId() == hostId) { + deallocateAllGpuDevicesForVm(vm.getId()); + } + + List availableGpuDevices = gpuDeviceDao.listDevicesForAllocation(hostId, + vgpuProfile.getId()); + + if (availableGpuDevices.size() < gpuCount) { + logger.debug("Not enough GPU devices available for VM {}", vm); + throw new CloudRuntimeException( + String.format("Not enough GPU devices available for VM %s", vm.getUuid())); + } + + List finalGpuDevices = getGpuDevicesToAllocate(availableGpuDevices, gpuCount); + + GpuCardVO gpuCard = gpuCardDao.findById(vgpuProfile.getCardId()); + + ServiceOfferingVO serviceOffering = serviceOfferingDao.findById(vm.getServiceOfferingId()); + List vgpuInfoList = new ArrayList<>(); + for (GpuDeviceVO gpuDevice : finalGpuDevices) { + gpuDevice.setState(GpuDevice.State.Allocated); + gpuDevice.setVmId(vm.getId()); + gpuDeviceDao.persist(gpuDevice); + + VgpuTypesInfo vgpuInfo = new VgpuTypesInfo(gpuDevice.getType(), gpuCard.getName(), + vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(), + gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName()); + vgpuInfo.setDisplay(serviceOffering.getGpuDisplay()); + + if (gpuDevice.getParentGpuDeviceId() != null) { + GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId()); + if (parentGpuDevice != null) { + vgpuInfo.setParentBusAddress(parentGpuDevice.getBusAddress()); + checkAndUpdateParentGpuDeviceState(parentGpuDevice); + } + } + vgpuInfoList.add(vgpuInfo); + } + + HashMap> groupDetails = getGpuGroupDetailsFromGpuDevicesOnHost(hostId); + return new GPUDeviceTO(gpuCard.getName(), vgpuProfile.getName(), gpuCount, groupDetails, vgpuInfoList); + } + }); + } + + @Override + public HashMap> getGpuGroupDetailsFromGpuDevicesOnHost(final long hostId) { + HashMap> gpuGroupDetails = new HashMap<>(); + List gpuDevices = gpuDeviceDao.listByHostId(hostId); + for (final GpuDeviceVO device : gpuDevices) { + // Calculate GPU capacity and update gpuGroupDetails + GpuCardVO card = gpuCardDao.findById(device.getCardId()); + String groupName = card.getName(); + if (!gpuGroupDetails.containsKey(groupName)) { + gpuGroupDetails.put(groupName, new HashMap<>()); + } + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(device.getVgpuProfileId()); + + VgpuTypesInfo gpuDeviceInfo = gpuGroupDetails.get(groupName).get(vgpuProfile.getName()); + long remainingCapacity = 0L; + long maxCapacity = 1L; + if (GpuDevice.State.Free.equals(device.getState()) && GpuDevice.ManagedState.Managed.equals( + device.getManagedState())) { + remainingCapacity = 1L; + } + if (GpuDevice.DeviceType.VGPUOnly.equals(device.getType()) || + GpuDevice.ManagedState.Unmanaged.equals(device.getManagedState()) || + GpuDevice.State.Error.equals(device.getState()) + ) { + maxCapacity = 0L; + remainingCapacity = 0L; + } + if (gpuDeviceInfo == null) { + gpuDeviceInfo = new VgpuTypesInfo(card.getName(), vgpuProfile.getName(), vgpuProfile.getVideoRam(), vgpuProfile.getMaxHeads(), + vgpuProfile.getMaxResolutionX(), vgpuProfile.getMaxResolutionY(), + vgpuProfile.getMaxVgpuPerPgpu(), remainingCapacity, maxCapacity); + gpuDeviceInfo.setDeviceName(card.getDeviceName()); + gpuDeviceInfo.setVendorId(card.getVendorId()); + gpuDeviceInfo.setVendorName(card.getVendorName()); + gpuDeviceInfo.setDeviceId(card.getDeviceId()); + gpuGroupDetails.get(groupName).put(vgpuProfile.getName(), gpuDeviceInfo); + } else { + // Update the existing VgpuTypesInfo with the new device's information + gpuDeviceInfo.setRemainingCapacity(gpuDeviceInfo.getRemainingCapacity() + remainingCapacity); + gpuDeviceInfo.setMaxVmCapacity(gpuDeviceInfo.getMaxCapacity() + maxCapacity); + } + } + return gpuGroupDetails; + } + + /* + * For the devices in newGpuDevicesInfo, create the GPU card and vGPU profile if they don't exist. + * For the devices in existingGpuDevices but not in newGpuDevicesInfo, disable the device. + * For the devices in newGpuDevicesInfo but not in existingGpuDevices, add them to the host. + * For the devices in both, update the device's info. + */ + @Override + @DB + public void addGpuDevicesToHost(final Host host, final List newGpuDevicesInfo) { + List existingGpuDevices = gpuDeviceDao.listByHostId(host.getId()); + Map existingGpuDevicesMap = new HashMap<>(); + Map gpuDevicesToDisableMap = new HashMap<>(); + for (final GpuDeviceVO device : existingGpuDevices) { + existingGpuDevicesMap.put(device.getBusAddress(), device); + gpuDevicesToDisableMap.put(device.getBusAddress(), device); + } + + GlobalLock lock = GlobalLock.getInternLock("add-gpu-devices-to-host-" + host.getId()); + try { + if (lock.lock(30)) { + try { + Map cardMap = new HashMap<>(); + Map vgpuProfileMap = new HashMap<>(); + + for (final VgpuTypesInfo deviceInfo : newGpuDevicesInfo) { + GpuCardVO card = getGpuCardAndUpdateMap(deviceInfo, cardMap, vgpuProfileMap); + VgpuProfileVO vgpuProfile = getVgpuProfileAndUpdateMap(deviceInfo, card, vgpuProfileMap); + + GpuDeviceVO existingDevice = existingGpuDevicesMap.get(deviceInfo.getBusAddress()); + if (existingDevice == null) { + createAndAddGpuDeviceToHost(deviceInfo, host, card, vgpuProfile); + } else { + // Update the device's info + GpuDeviceVO parentGpuDevice = null; + if (existingDevice.getParentGpuDeviceId() == null + && deviceInfo.getParentBusAddress() != null) { + parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(), + deviceInfo.getParentBusAddress()); + if (parentGpuDevice != null) { + existingDevice.setParentGpuDeviceId(parentGpuDevice.getId()); + } + } + if (existingDevice.getPciRoot() == null) { + existingDevice.setPciRoot(deviceInfo.getPciRoot()); + } + setStateAndVmName(deviceInfo, existingDevice, parentGpuDevice); + gpuDeviceDao.update(existingDevice.getId(), existingDevice); + checkAndUpdateParentGpuDeviceState(existingDevice.getParentGpuDeviceId()); + } + gpuDevicesToDisableMap.remove(deviceInfo.getBusAddress()); + } + + // Disable the devices that are not in the new list + for (final GpuDeviceVO device : gpuDevicesToDisableMap.values()) { + logger.info("Disabling GPU device {} on host {} due to missing address in the new devices on the host.", device, host); + device.setState(GpuDevice.State.Error); + device.setManagedState(GpuDevice.ManagedState.Unmanaged); + gpuDeviceDao.update(device.getId(), device); + checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId()); + } + } finally { + lock.unlock(); + } + } + } finally { + lock.releaseRef(); + } + } + + @Override + public boolean deleteGpuDevices(DeleteGpuDeviceCmd deleteGpuDeviceCmd) { + List gpuDeviceIds = deleteGpuDeviceCmd.getIds(); + if (CollectionUtils.isEmpty(gpuDeviceIds)) { + throw new InvalidParameterValueException("GPU device IDs cannot be empty"); + } + + List gpuDevices = gpuDeviceDao.listByIds(gpuDeviceIds); + if (gpuDevices.isEmpty()) { + throw new InvalidParameterValueException("No GPU devices found for the provided IDs"); + } + + for (GpuDeviceVO gpuDevice : gpuDevices) { + if (gpuDevice.getVmId() != null) { + throw new InvalidParameterValueException( + String.format("Cannot delete GPU device %s as it is currently allocated to VM %d", + gpuDevice.getUuid(), gpuDevice.getVmId())); + } + gpuDeviceDao.remove(gpuDevice.getId()); + } + return true; + } + + private GpuCardVO getGpuCardAndUpdateMap(VgpuTypesInfo deviceInfo, Map cardMap, + Map vgpuProfileMap) { + String cardMapKey = deviceInfo.getDeviceId() + " - " + deviceInfo.getVendorId(); + GpuCardVO card = cardMap.get(cardMapKey); + if (card == null) { + card = gpuCardDao.findByVendorIdAndDeviceId(deviceInfo.getVendorId(), deviceInfo.getDeviceId()); + if (card == null) { + // Create GPU card if it doesn't exist + logger.info("Creating new GPU card for vendor ID: {} and device ID: {}", deviceInfo.getVendorId(), + deviceInfo.getDeviceId()); + + String deviceName = StringUtils.isNotBlank(deviceInfo.getDeviceName()) ? + deviceInfo.getDeviceName() : + deviceInfo.getGroupName(); + String vendorName = StringUtils.isNotBlank(deviceInfo.getVendorName()) ? + deviceInfo.getVendorName() : + "Unknown Vendor"; + String cardDisplayName = vendorName + " " + deviceName; + + card = new GpuCardVO(deviceInfo.getDeviceId(), deviceName, cardDisplayName, vendorName, + deviceInfo.getVendorId()); + card = gpuCardDao.persist(card); + + // Create default passthrough profile for the new card + VgpuProfileVO passthroughProfile = new VgpuProfileVO("passthrough", "passthrough", card.getId(), 1L); + passthroughProfile.setVideoRam(deviceInfo.getVideoRam()); + passthroughProfile.setMaxResolutionX(deviceInfo.getMaxResolutionX()); + passthroughProfile.setMaxResolutionY(deviceInfo.getMaxResolutionY()); + passthroughProfile.setMaxHeads(deviceInfo.getMaxHeads()); + passthroughProfile = vgpuProfileDao.persist(passthroughProfile); + + String vgpuProfileKey = card.getUuid() + " | " + deviceInfo.getModelName(); + vgpuProfileMap.put(vgpuProfileKey, passthroughProfile); + logger.info("Created GPU card: {} with passthrough profile: {}", card, passthroughProfile); + } + cardMap.put(cardMapKey, card); + } + return card; + } + + private VgpuProfileVO getVgpuProfileAndUpdateMap(VgpuTypesInfo deviceInfo, GpuCardVO card, + Map vgpuProfileMap) { + String vgpuProfileKey = card.getUuid() + " | " + deviceInfo.getModelName(); + VgpuProfileVO vgpuProfile = vgpuProfileMap.get(vgpuProfileKey); + if (vgpuProfile == null) { + vgpuProfile = vgpuProfileDao.findByNameAndCardId(deviceInfo.getModelName(), card.getId()); + if (vgpuProfile == null) { + // Create vGPU profile if it doesn't exist + logger.info("Creating new vGPU profile: {} for GPU card: {}", deviceInfo.getModelName(), + card.getName()); + + vgpuProfile = new VgpuProfileVO(deviceInfo.getModelName(), deviceInfo.getModelName(), card.getId(), + deviceInfo.getMaxVpuPerGpu() != null ? deviceInfo.getMaxVpuPerGpu() : 1L); + vgpuProfile.setVideoRam(deviceInfo.getVideoRam()); + vgpuProfile.setMaxResolutionX(deviceInfo.getMaxResolutionX()); + vgpuProfile.setMaxResolutionY(deviceInfo.getMaxResolutionY()); + vgpuProfile.setMaxHeads(deviceInfo.getMaxHeads()); + vgpuProfile = vgpuProfileDao.persist(vgpuProfile); + + logger.info("Created vGPU profile: {}", vgpuProfile); + } + vgpuProfileMap.put(vgpuProfileKey, vgpuProfile); + } + return vgpuProfile; + } + + private void createAndAddGpuDeviceToHost(VgpuTypesInfo deviceInfo, Host host, GpuCardVO card, + VgpuProfileVO vgpuProfile) { + Long parentGpuDeviceId = null; + GpuDeviceVO parentGpuDevice = null; + if (deviceInfo.getParentBusAddress() != null) { + parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(), + deviceInfo.getParentBusAddress()); + if (parentGpuDevice != null) { + parentGpuDeviceId = parentGpuDevice.getId(); + } + } + GpuDeviceVO gpuDevice = new GpuDeviceVO(card.getId(), vgpuProfile.getId(), deviceInfo.getBusAddress(), + host.getId(), parentGpuDeviceId, deviceInfo.getNumaNode(), deviceInfo.getPciRoot()); + gpuDevice.setHostId(host.getId()); + gpuDevice.setBusAddress(deviceInfo.getBusAddress()); + gpuDevice.setCardId(card.getId()); + setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice); + if (!deviceInfo.isPassthroughEnabled()) { + gpuDevice.setType(GpuDevice.DeviceType.VGPUOnly); + } + + gpuDevice = gpuDeviceDao.persist(gpuDevice); + checkAndUpdateParentGpuDeviceState(parentGpuDevice); + logger.info("Added new GPU device {} to host {}", gpuDevice, host); + } + + private void setStateAndVmName(VgpuTypesInfo deviceInfo, GpuDeviceVO device, GpuDeviceVO parentGpuDevice) { + if (StringUtils.isNotBlank(deviceInfo.getVmName())) { + VMInstanceVO vm = vmInstanceDao.findVMByInstanceName(deviceInfo.getVmName()); + if (vm != null) { + device.setVmId(vm.getId()); + device.setState(GpuDevice.State.Allocated); + } else { + device.setState(GpuDevice.State.Error); + logger.warn("VM with name {} not found for GPU device {}. Setting state to Error.", + deviceInfo.getVmName(), device); + } + } else { + // If no VM name is provided, it's possible that the device is allocated to a stopped VM or not allocated at all. + if (device.getVmId() == null && !device.getState().equals(GpuDevice.State.PartiallyAllocated)) { + device.setState(GpuDevice.State.Free); + } else { + VMInstanceVO vm = vmInstanceDao.findById(device.getVmId()); + if (vm != null && vm.getState().equals(VirtualMachine.State.Stopped) && !GpuDetachOnStop.valueIn(vm.getDomainId())) { + device.setState(GpuDevice.State.Allocated); + } else { + logger.warn("VM with ID {} not found for GPU device {}. Allocated to a removed VM. Setting state to Free.", + device.getVmId(), device); + device.setState(GpuDevice.State.Free); + device.setVmId(null); + } + } + } + } + + private boolean updateGpuDeviceManagedState(List gpuDeviceIds, GpuDevice.ManagedState managedState) { + if (CollectionUtils.isEmpty(gpuDeviceIds)) { + throw new InvalidParameterValueException("GPU device IDs cannot be empty"); + } + List gpuDevices = new ArrayList<>(); + Set hostIds = new HashSet<>(); + for (Long gpuDeviceId : gpuDeviceIds) { + GpuDeviceVO gpuDevice = gpuDeviceDao.findById(gpuDeviceId); + if (gpuDevice == null) { + throw new InvalidParameterValueException(String.format("GPU device with ID %d not found", gpuDeviceId)); + } + + if (gpuDevice.getManagedState().equals(managedState)) { + logger.debug("GPU device {} is already in resource state: {}. Skipping state update.", gpuDevice, + managedState); + } + + if (gpuDevice.getVmId() != null) { + throw new InvalidParameterValueException( + String.format("Cannot change resource state of GPU device %s as it is in use by VM %d", + gpuDevice, gpuDevice.getVmId())); + } + gpuDevices.add(gpuDevice); + hostIds.add(gpuDevice.getHostId()); + } + + for (GpuDeviceVO gpuDevice : gpuDevices) { + gpuDevice.setManagedState(managedState); + gpuDeviceDao.update(gpuDevice.getId(), gpuDevice); + } + + for (Long hostId : hostIds) { + resourceManager.updateGPUDetails(hostId, getGpuGroupDetailsFromGpuDevicesOnHost(hostId)); + } + + return true; + } + + private ListResponse getGpuDeviceResponseListResponse(BaseCmd cmd, List gpuDevices, + Integer count) { + ListResponse response = new ListResponse<>(); + List gpuDeviceResponses = new ArrayList<>(); + + ResponseObject.ResponseView view = ResponseObject.ResponseView.Full; + if (cmd instanceof ListGpuDevicesCmdByAdmin) { + ListGpuDevicesCmd listCmd = (ListGpuDevicesCmdByAdmin) cmd; + view = listCmd.getResponseView(); + } else if (cmd instanceof ListGpuDevicesCmd) { + ListGpuDevicesCmd listCmd = (ListGpuDevicesCmd) cmd; + view = listCmd.getResponseView(); + } + + for (GpuDeviceVO gpuDevice : gpuDevices) { + GpuDeviceResponse gpuDeviceResponse = createGpuDeviceResponse(gpuDevice, view); + gpuDeviceResponses.add(gpuDeviceResponse); + } + + response.setResponses(gpuDeviceResponses, count); + response.setResponseName(cmd.getCommandName()); + return response; + } + + private GpuDeviceResponse createGpuDeviceResponse(GpuDeviceVO gpuDevice, ResponseObject.ResponseView view) { + GpuDeviceResponse response = new GpuDeviceResponse(); + response.setId(gpuDevice.getUuid()); + if (view.equals(ResponseObject.ResponseView.Full)) { + response.setBussAddress(gpuDevice.getBusAddress()); + response.setState(gpuDevice.getState()); + response.setManagedState(gpuDevice.getManagedState()); + response.setType(gpuDevice.getType()); + response.setNumaNode(gpuDevice.getNumaNode()); + + // Host name lookup + HostVO host = hostDao.findById(gpuDevice.getHostId()); + if (host != null) { + response.setHostName(host.getName()); + response.setHostId(host.getUuid()); + } + + if (gpuDevice.getParentGpuDeviceId() != null) { + GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId()); + if (parentGpuDevice != null) { + response.setParentGpuDeviceId(parentGpuDevice.getUuid()); + } else { + logger.debug("Parent GPU device with ID {} not found for GPU device {}", + gpuDevice.getParentGpuDeviceId(), gpuDevice.getUuid()); + } + } + } + // GPU card info + GpuCardVO gpuCard = gpuCardDao.findById(gpuDevice.getCardId()); + if (gpuCard != null) { + if (view.equals(ResponseObject.ResponseView.Full)) { + response.setGpuCardId(gpuCard.getUuid()); + } + response.setGpuCardName(gpuCard.getName()); + } + + // vGPU profile info + VgpuProfileVO vgpuProfile = vgpuProfileDao.findById(gpuDevice.getVgpuProfileId()); + if (vgpuProfile != null) { + if (view.equals(ResponseObject.ResponseView.Full)) { + response.setVgpuProfileId(vgpuProfile.getUuid()); + } + response.setVgpuProfileName(vgpuProfile.getName()); + } + + if (gpuDevice.getVmId() != null) { + UserVmVO vm = userVmManager.getVirtualMachine(gpuDevice.getVmId()); + if (vm != null) { + response.setVmId(vm.getUuid()); + response.setVmName(vm.getInstanceName()); + response.setVmState(vm.getState()); + } else { + logger.debug("VM with ID {} not found for GPU device {}", gpuDevice.getVmId(), gpuDevice.getUuid()); + } + } + + return response; + } + + /** + * Allocates optimal GPU devices for KVM performance based on NUMA node alignment and PCI root optimization. + *

+ * Performance priority (best to acceptable): + * 1. All GPUs from same NUMA node and same PCI root (best memory locality and PCI bandwidth) + * 2. All GPUs from same NUMA node across different PCI roots (good memory locality) + * 3. GPUs distributed across NUMA nodes with PCI root preference within each node + * + * @param availableGpuDevices List of available GPU devices + * @param gpuCount Number of GPUs to allocate + * @return List of optimally selected GPU devices + */ + public List getGpuDevicesToAllocate(List availableGpuDevices, int gpuCount) { + if (availableGpuDevices.size() < gpuCount) { + throw new CloudRuntimeException( + String.format("Insufficient GPU devices available. Required: %d, Available: %d", gpuCount, + availableGpuDevices.size())); + } + + // Strategy 1: Try optimal allocation (same NUMA node + same PCI root) + List optimalSelection = tryOptimalAllocation(availableGpuDevices, gpuCount); + if (optimalSelection != null) { + logger.info("Allocated {} GPU devices using optimal strategy (same NUMA node and PCI root)", gpuCount); + return optimalSelection; + } + + // Strategy 2: Try single NUMA node allocation across multiple PCI roots + List singleNumaSelection = trySingleNumaAllocation(availableGpuDevices, gpuCount); + if (singleNumaSelection != null) { + logger.info("Allocated {} GPU devices using single NUMA node strategy", gpuCount); + return singleNumaSelection; + } + + // Strategy 3: Distribute across NUMA nodes with PCI root optimization + List distributedSelection = tryDistributedAllocation(availableGpuDevices, gpuCount); + logger.info("Allocated {} GPU devices using distributed NUMA strategy", gpuCount); + return distributedSelection; + } + + /** + * Attempts to allocate all GPUs from the same NUMA node and PCI root for optimal performance. + */ + private List tryOptimalAllocation(List availableDevices, int gpuCount) { + Map>> topology = buildNumaPciTopology(availableDevices); + + for (Map> numaNode : topology.values()) { + for (List pciRootDevices : numaNode.values()) { + if (pciRootDevices.size() >= gpuCount) { + return selectDevicesFromGroup(pciRootDevices, gpuCount); + } + } + } + return null; + } + + /** + * Attempts to allocate all GPUs from a single NUMA node across multiple PCI roots. + */ + private List trySingleNumaAllocation(List availableDevices, int gpuCount) { + Map> devicesByNuma = groupDevicesByNuma(availableDevices); + + // Prioritize NUMA nodes with balanced device distribution + List>> sortedNumaNodes = new ArrayList<>(); + for (Map.Entry> entry : devicesByNuma.entrySet()) { + if (entry.getValue().size() >= gpuCount) { + sortedNumaNodes.add(entry); + } + } + sortedNumaNodes.sort(Map.Entry.comparingByValue(Comparator.comparing(List::size))); + + for (Map.Entry> numaEntry : sortedNumaNodes) { + List selected = selectDevicesWithPciOptimization(numaEntry.getValue(), gpuCount); + if (selected.size() == gpuCount) { + return selected; + } + } + return null; + } + + /** + * Distributes GPU allocation across NUMA nodes while optimizing PCI root usage within each node. + */ + private List tryDistributedAllocation(List availableDevices, int gpuCount) { + Map> devicesByNuma = groupDevicesByNuma(availableDevices); + List selectedDevices = new ArrayList<>(); + + // Sort NUMA nodes by device count in order for balanced distribution + List>> sortedNumaNodes = new ArrayList<>(devicesByNuma.entrySet()); + sortedNumaNodes.sort(Map.Entry.comparingByValue(Comparator.comparing(List::size))); + + int remainingCount = gpuCount; + for (Map.Entry> numaEntry : sortedNumaNodes) { + if (remainingCount <= 0) break; + + int devicesNeeded = Math.min(remainingCount, numaEntry.getValue().size()); + List selectedFromNuma = selectDevicesWithPciOptimization(numaEntry.getValue(), devicesNeeded); + + selectedDevices.addAll(selectedFromNuma); + remainingCount -= selectedFromNuma.size(); + + logger.debug("Selected {} devices from NUMA node {}", selectedFromNuma.size(), numaEntry.getKey()); + } + + if (selectedDevices.size() < gpuCount) { + throw new CloudRuntimeException( + String.format("Could not allocate required GPU devices. Required: %d, Allocated: %d", gpuCount, + selectedDevices.size())); + } + + return selectedDevices; + } + + /** + * Builds a hierarchical topology map: NUMA node -> PCI root -> devices. + */ + private Map>> buildNumaPciTopology(List devices) { + Map>> map = new HashMap<>(); + for (GpuDeviceVO device : devices) { + map.computeIfAbsent( + getNumaNodeKey(device), key -> new HashMap<>() + ).computeIfAbsent( + getPciRootKey(device), k -> new ArrayList<>() + ).add(device); + } + return map; + } + + /** + * Groups devices by NUMA node. + */ + private Map> groupDevicesByNuma(List devices) { + Map> map = new HashMap<>(); + for (GpuDeviceVO device : devices) { + map.computeIfAbsent(getNumaNodeKey(device), k -> new ArrayList<>()).add(device); + } + return map; + } + + /** + * Selects devices from a group with PCI root optimization, prioritizing same PCI roots. + */ + private List selectDevicesWithPciOptimization(List numaDevices, int count) { + Map> devicesByPciRoot = new HashMap<>(); + for (GpuDeviceVO numaDevice : numaDevices) { + devicesByPciRoot.computeIfAbsent(getPciRootKey(numaDevice), k -> new ArrayList<>()).add(numaDevice); + } + + List selected = new ArrayList<>(); + + // Sort PCI roots by device count (descending) to prioritize roots with more devices + List> sortedPciGroups = new ArrayList<>(devicesByPciRoot.values()); + sortedPciGroups.sort(Comparator.comparing(List::size).reversed()); + + for (List pciGroup : sortedPciGroups) { + if (selected.size() >= count) break; + + int devicesNeeded = Math.min(count - selected.size(), pciGroup.size()); + selected.addAll(selectDevicesFromGroup(pciGroup, devicesNeeded)); + } + + return selected; + } + + /** + * Selects devices from a group, sorted by ID for consistency. + */ + private List selectDevicesFromGroup(List devices, int count) { + List toSort = new ArrayList<>(devices); + toSort.sort(Comparator.comparing(GpuDeviceVO::getId)); + List list = new ArrayList<>(); + long limit = count; + for (GpuDeviceVO device : toSort) { + if (limit-- == 0) break; + list.add(device); + } + return list; + } + + /** + * Gets the NUMA node key, handling null/blank values. + */ + private String getNumaNodeKey(GpuDeviceVO device) { + return StringUtils.isNotBlank(device.getNumaNode()) ? device.getNumaNode() : "unknown"; + } + + /** + * Gets the PCI root key, handling null/blank values. + */ + private String getPciRootKey(GpuDeviceVO device) { + return StringUtils.isNotBlank(device.getPciRoot()) ? device.getPciRoot() : "unknown"; + } +} diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 9e570e635fa3..b90c40dc95e7 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -392,6 +392,8 @@ + + diff --git a/server/src/test/java/com/cloud/agent/manager/allocator/impl/FirstFitAllocatorTest.java b/server/src/test/java/com/cloud/agent/manager/allocator/impl/FirstFitAllocatorTest.java index 83498fbbe766..0d6a6fa8ff92 100644 --- a/server/src/test/java/com/cloud/agent/manager/allocator/impl/FirstFitAllocatorTest.java +++ b/server/src/test/java/com/cloud/agent/manager/allocator/impl/FirstFitAllocatorTest.java @@ -28,6 +28,7 @@ import com.cloud.service.dao.ServiceOfferingDetailsDao; import com.cloud.user.Account; import com.cloud.utils.Pair; +import com.cloud.vm.VirtualMachineProfile; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.junit.Assert; import org.junit.Before; @@ -63,6 +64,7 @@ public class FirstFitAllocatorTest { private Host host1; private Host host2; + private VirtualMachineProfile vmProfile; ConfigurationDao configDao; @Before @@ -86,12 +88,16 @@ public void setUp() { host1 = mock(Host.class); host2 = mock(Host.class); + vmProfile = mock(VirtualMachineProfile.class); + when(vmProfile.getId()).thenReturn(1L); + when(plan.getDataCenterId()).thenReturn(1L); when(offering.getCpu()).thenReturn(2); when(offering.getSpeed()).thenReturn(1000); when(offering.getRamSize()).thenReturn(2048); when(offering.getId()).thenReturn(123L); when(offering.getHostTag()).thenReturn(null); + when(offering.getVgpuProfileId()).thenReturn(null); } @Test @@ -119,7 +125,11 @@ public void testAllocateTo_SuccessfulMatch() { when(capacityMgr.checkIfHostHasCpuCapabilityAndCapacity(eq(host2), eq(offering), eq(true))) .thenReturn(new Pair<>(true, false)); - List result = allocator.allocateTo(plan, offering, null, avoid, inputHosts, 2, true, account); + + when(resourceMgr.isGPUDeviceAvailable(offering, host1, vmProfile.getId())).thenReturn(true); + when(resourceMgr.isGPUDeviceAvailable(offering, host2, vmProfile.getId())).thenReturn(true); + + List result = allocator.allocateTo(vmProfile, plan, offering, null, avoid, inputHosts, 2, true, account); // Only host1 should be returned assertEquals(1, result.size()); @@ -136,7 +146,7 @@ public void testAllocateTo_AvoidSetAndGuestLimit() { when(capacityMgr.checkIfHostReachMaxGuestLimit(host2)).thenReturn(true); // Reached limit - List result = allocator.allocateTo(plan, offering, null, avoid, inputHosts, 2, true, account); + List result = allocator.allocateTo(vmProfile, plan, offering, null, avoid, inputHosts, 2, true, account); assertTrue(result.isEmpty()); } @@ -154,9 +164,9 @@ public void testAllocateTo_GPUNotAvailable() { when(pciDetail.getValue()).thenReturn("NVIDIA"); when(vgpuDetail.getValue()).thenReturn("GRID"); - when(resourceMgr.isGPUDeviceAvailable(eq(host1), eq("NVIDIA"), eq("GRID"))).thenReturn(false); + when(resourceMgr.isGPUDeviceAvailable(offering, host1, vmProfile.getId())).thenReturn(false); - List result = allocator.allocateTo(plan, offering, null, avoid, inputHosts, 1, true, account); + List result = allocator.allocateTo(vmProfile, plan, offering, null, avoid, inputHosts, 1, true, account); assertTrue(result.isEmpty()); } diff --git a/server/src/test/java/com/cloud/deploy/DeploymentPlanningManagerImplTest.java b/server/src/test/java/com/cloud/deploy/DeploymentPlanningManagerImplTest.java index 7e25570dc82a..5b03260d2d66 100644 --- a/server/src/test/java/com/cloud/deploy/DeploymentPlanningManagerImplTest.java +++ b/server/src/test/java/com/cloud/deploy/DeploymentPlanningManagerImplTest.java @@ -39,6 +39,7 @@ import com.cloud.exception.InsufficientServerCapacityException; import com.cloud.gpu.GPU; import com.cloud.gpu.dao.HostGpuGroupsDao; +import com.cloud.gpu.dao.VgpuProfileDao; import com.cloud.host.Host; import com.cloud.host.HostVO; import com.cloud.host.Status; @@ -1153,6 +1154,11 @@ public VMTemplateDao vmTemplateDao() { return Mockito.mock(VMTemplateDao.class); } + @Bean + public VgpuProfileDao vgpuProfileDao() { + return Mockito.mock(VgpuProfileDao.class); + } + public static class Library implements TypeFilter { @Override diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 587aafa1587c..1408a8153227 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -30,16 +30,19 @@ import com.cloud.exception.InvalidParameterValueException; import com.cloud.exception.ResourceInUseException; import com.cloud.gpu.HostGpuGroupsVO; +import com.cloud.gpu.VgpuProfileVO; import com.cloud.host.Host; import com.cloud.host.Host.Type; import com.cloud.host.HostStats; import com.cloud.host.HostVO; import com.cloud.host.Status; import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.offering.ServiceOffering; import com.cloud.org.Cluster; import com.cloud.resource.ResourceState.Event; import com.cloud.utils.component.ManagerBase; import com.cloud.utils.fsm.NoTransitionException; +import com.cloud.vm.VirtualMachine; import org.apache.cloudstack.api.command.admin.cluster.AddClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; @@ -650,11 +653,17 @@ public void updateHostStorageAccessGroups(Long hostId, List newStorageAc } @Override - public boolean isGPUDeviceAvailable(final Host host, final String groupName, final String vgpuType) { + public boolean isGPUDeviceAvailable(ServiceOffering offering, Host host, Long vmId) { // TODO Auto-generated method stub return false; } + @Override + public GPUDeviceTO getGPUDevice(VirtualMachine vm, long hostId, VgpuProfileVO vgpuProfile, int gpuCount) { + // TODO Auto-generated method stub + return null; + } + @Override public GPUDeviceTO getGPUDevice(final long hostId, final String groupName, final String vgpuType) { // TODO Auto-generated method stub @@ -672,6 +681,16 @@ public void updateGPUDetails(final long hostId, final HashMap> getGPUStatistics(final HostVO host) { // TODO Auto-generated method stub diff --git a/server/src/test/java/com/cloud/resourcelimit/ResourceLimitManagerImplTest.java b/server/src/test/java/com/cloud/resourcelimit/ResourceLimitManagerImplTest.java index ad841a58ddc6..a968a2da0b7d 100644 --- a/server/src/test/java/com/cloud/resourcelimit/ResourceLimitManagerImplTest.java +++ b/server/src/test/java/com/cloud/resourcelimit/ResourceLimitManagerImplTest.java @@ -335,6 +335,24 @@ public void testCheckVmMemoryResourceLimit() { } } + @Test + public void testCheckVmGpuResourceLimit() { + ServiceOffering serviceOffering = Mockito.mock(ServiceOffering.class); + VirtualMachineTemplate template = Mockito.mock(VirtualMachineTemplate.class); + Mockito.when(serviceOffering.getHostTag()).thenReturn(hostTags.get(0)); + Mockito.when(template.getTemplateTag()).thenReturn(hostTags.get(0)); + Account account = Mockito.mock(Account.class); + long gpuCount = 2L; + try { + Mockito.doNothing().when(resourceLimitManager).checkResourceLimitWithTag(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any()); + resourceLimitManager.checkVmGpuResourceLimit(account, true, serviceOffering, template, gpuCount); + Mockito.verify(resourceLimitManager, Mockito.times(1)).checkResourceLimitWithTag(account, Resource.ResourceType.gpu, null, gpuCount); + Mockito.verify(resourceLimitManager, Mockito.times(1)).checkResourceLimitWithTag(account, Resource.ResourceType.gpu, hostTags.get(0), gpuCount); + } catch (ResourceAllocationException e) { + Assert.fail("Exception encountered: " + e.getMessage()); + } + } + @Test public void testCheckVolumeResourceLimit() { String checkTag = storageTags.get(0); @@ -855,6 +873,26 @@ public void testCalculateVmMemoryCountForAccount() { Assert.assertEquals(vms.size() * memory, resourceLimitManager.calculateVmMemoryCountForAccount(accountId, tag)); } + @Test + public void testCalculateVmGpuCountForAccount() { + long accountId = 1L; + String tag = null; + Mockito.doReturn(1L).when(resourceLimitManager).calculateGpuForAccount(accountId); + Assert.assertEquals(1L, resourceLimitManager.calculateVmGpuCountForAccount(accountId, tag)); + + tag = ""; + Mockito.doReturn(2L).when(resourceLimitManager).calculateGpuForAccount(accountId); + Assert.assertEquals(2L, resourceLimitManager.calculateVmGpuCountForAccount(accountId, tag)); + + tag = "tag"; + UserVmJoinVO vm = Mockito.mock(UserVmJoinVO.class); + int gpuCount = 2; + Mockito.when(vm.getGpuCount()).thenReturn(gpuCount); + List vms = List.of(vm, vm); + Mockito.doReturn(vms).when(resourceLimitManager).getVmsWithAccountAndTag(accountId, tag); + Assert.assertEquals(vms.size() * gpuCount, resourceLimitManager.calculateVmGpuCountForAccount(accountId, tag)); + } + @Test public void testCalculatePrimaryStorageForAccount() { long accountId = 1L; @@ -1245,6 +1283,56 @@ public void testDecrementVmMemoryResourceCount() { .decrementResourceCountWithTag(accountId, Resource.ResourceType.memory, tag, Long.valueOf(memory)); } + @Test + public void testIncrementVmGpuResourceCount() { + long accountId = 1L; + String tag = "tag"; + Mockito.doReturn(new ArrayList<>()).when(resourceLimitManager) + .getResourceLimitHostTagsForResourceCountOperation(Mockito.anyBoolean(), + Mockito.any(ServiceOffering.class), Mockito.any(VirtualMachineTemplate.class)); + resourceLimitManager.incrementVmGpuResourceCount(accountId, false, + Mockito.mock(ServiceOffering.class), Mockito.mock(VirtualMachineTemplate.class), null); + Mockito.verify(resourceLimitManager, Mockito.never()).incrementResourceCountWithTag(Mockito.anyLong(), + Mockito.eq(Resource.ResourceType.gpu), Mockito.anyString(), Mockito.anyLong()); + + Mockito.doReturn(List.of(tag)).when(resourceLimitManager) + .getResourceLimitHostTagsForResourceCountOperation(Mockito.anyBoolean(), + Mockito.any(ServiceOffering.class), Mockito.any(VirtualMachineTemplate.class)); + mockIncrementResourceCountWithTag(); + ServiceOffering offering = Mockito.mock(ServiceOffering.class); + Long gpuCount = 2L; + resourceLimitManager.incrementVmGpuResourceCount(accountId, false, + offering, Mockito.mock(VirtualMachineTemplate.class), gpuCount); + Mockito.verify(resourceLimitManager, Mockito.times(1)) + .incrementResourceCountWithTag(accountId, Resource.ResourceType.gpu, tag, gpuCount); + } + + + @Test + public void testDecrementVmGpuResourceCount() { + long accountId = 1L; + String tag = "tag"; + Mockito.doReturn(new ArrayList<>()).when(resourceLimitManager) + .getResourceLimitHostTagsForResourceCountOperation(Mockito.anyBoolean(), + Mockito.any(ServiceOffering.class), Mockito.any(VirtualMachineTemplate.class)); + resourceLimitManager.decrementVmGpuResourceCount(accountId, false, + Mockito.mock(ServiceOffering.class), Mockito.mock(VirtualMachineTemplate.class), null); + Mockito.verify(resourceLimitManager, Mockito.never()).decrementResourceCountWithTag(Mockito.anyLong(), + Mockito.eq(Resource.ResourceType.gpu), Mockito.anyString(), Mockito.anyLong()); + + Mockito.doReturn(List.of(tag)).when(resourceLimitManager) + .getResourceLimitHostTagsForResourceCountOperation(Mockito.anyBoolean(), + Mockito.any(ServiceOffering.class), Mockito.any(VirtualMachineTemplate.class)); + mockDecrementResourceCountWithTag(); + ServiceOffering offering = Mockito.mock(ServiceOffering.class); + int gpuCount = 1; + Mockito.when(offering.getGpuCount()).thenReturn(gpuCount); + resourceLimitManager.decrementVmGpuResourceCount(accountId, false, + offering, Mockito.mock(VirtualMachineTemplate.class), null); + Mockito.verify(resourceLimitManager, Mockito.times(1)) + .decrementResourceCountWithTag(accountId, Resource.ResourceType.gpu, tag, Long.valueOf(gpuCount)); + } + @Test public void testUpdateResourceLimitForAccount() { Long accountId = 1L; diff --git a/server/src/test/java/com/cloud/vm/FirstFitPlannerTest.java b/server/src/test/java/com/cloud/vm/FirstFitPlannerTest.java index fb188d159e21..5b877cc66169 100644 --- a/server/src/test/java/com/cloud/vm/FirstFitPlannerTest.java +++ b/server/src/test/java/com/cloud/vm/FirstFitPlannerTest.java @@ -350,6 +350,7 @@ private void initializeForTest(VirtualMachineProfileImpl vmProfile, DataCenterDe when(offering.getCpu()).thenReturn(noOfCpusInOffering); when(offering.getSpeed()).thenReturn(cpuSpeedInOffering); when(offering.getRamSize()).thenReturn(ramInOffering); + when(offering.getVgpuProfileId()).thenReturn(null); List clustersWithEnoughCapacity = new ArrayList(); clustersWithEnoughCapacity.add(1L); diff --git a/server/src/test/java/com/cloud/vpc/MockResourceLimitManagerImpl.java b/server/src/test/java/com/cloud/vpc/MockResourceLimitManagerImpl.java index 3f3220d09341..151c7ff8908a 100644 --- a/server/src/test/java/com/cloud/vpc/MockResourceLimitManagerImpl.java +++ b/server/src/test/java/com/cloud/vpc/MockResourceLimitManagerImpl.java @@ -381,4 +381,19 @@ public void incrementVmMemoryResourceCount(long accountId, Boolean display, Serv public void decrementVmMemoryResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long memory) { } + + @Override + public void checkVmGpuResourceLimit(Account owner, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) throws ResourceAllocationException { + + } + + @Override + public void incrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) { + + } + + @Override + public void decrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) { + + } } diff --git a/server/src/test/java/org/apache/cloudstack/gpu/GpuServiceImplTest.java b/server/src/test/java/org/apache/cloudstack/gpu/GpuServiceImplTest.java new file mode 100644 index 000000000000..b8c5bbc1dea4 --- /dev/null +++ b/server/src/test/java/org/apache/cloudstack/gpu/GpuServiceImplTest.java @@ -0,0 +1,1495 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.gpu; + +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.agent.api.to.GPUDeviceTO; +import com.cloud.exception.InvalidParameterValueException; +import com.cloud.gpu.GpuCardVO; +import com.cloud.gpu.GpuDeviceVO; +import com.cloud.gpu.VgpuProfileVO; +import com.cloud.gpu.dao.GpuCardDao; +import com.cloud.gpu.dao.GpuDeviceDao; +import com.cloud.gpu.dao.VgpuProfileDao; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.resource.ResourceManager; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.service.dao.ServiceOfferingDao; +import com.cloud.utils.Pair; +import com.cloud.utils.db.GlobalLock; +import com.cloud.utils.db.Transaction; +import com.cloud.utils.db.TransactionCallback; +import com.cloud.utils.db.TransactionCallbackNoReturn; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.UserVmManager; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.api.ResponseObject; +import org.apache.cloudstack.api.command.admin.gpu.CreateGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.CreateGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.CreateVgpuProfileCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.DeleteVgpuProfileCmd; +import org.apache.cloudstack.api.command.admin.gpu.ListGpuDevicesCmdByAdmin; +import org.apache.cloudstack.api.command.admin.gpu.UnmanageGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.DiscoverGpuDevicesCmd; +import org.apache.cloudstack.api.command.admin.gpu.ManageGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateGpuCardCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateGpuDeviceCmd; +import org.apache.cloudstack.api.command.admin.gpu.UpdateVgpuProfileCmd; +import org.apache.cloudstack.api.command.user.gpu.ListGpuCardsCmd; +import org.apache.cloudstack.api.command.user.gpu.ListVgpuProfilesCmd; +import org.apache.cloudstack.api.response.GpuCardResponse; +import org.apache.cloudstack.api.response.GpuDeviceResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.VgpuProfileResponse; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; + +import javax.naming.ConfigurationException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class GpuServiceImplTest { + + // Test Constants + private static final String GPU_DEVICE_ID = "1234"; + private static final String GPU_DEVICE_NAME = "RTX 4090"; + private static final String GPU_CARD_NAME = "NVIDIA RTX 4090"; + private static final String GPU_VENDOR_NAME = "NVIDIA"; + private static final String GPU_VENDOR_ID = "10de"; + private static final String VGPU_PROFILE_NAME = "grid_rtx4090-8q"; + private static final String VGPU_PROFILE_DESCRIPTION = "RTX 4090 8GB profile"; + private static final String GPU_BUS_ADDRESS = "0000:01:00.0"; + private static final String GPU_BUS_ADDRESS_2 = "0000:02:00.0"; + private static final String GPU_BUS_ADDRESS_3 = "0000:03:00.0"; + private static final String GPU_CARD_UUID = "gpu-card-uuid"; + private static final String VGPU_PROFILE_UUID = "vgpu-profile-uuid"; + private static final String GPU_DEVICE_UUID = "gpu-device-uuid"; + private static final String HOST_UUID = "host-uuid"; + private static final String HOST_NAME = "test-host"; + private static final String VM_NAME = "test-vm"; + private static final String NUMA_NODE_0 = "numa0"; + private static final String NUMA_NODE_1 = "numa1"; + private static final String PCI_ROOT_1 = "0000:00"; + private static final String PCI_ROOT_2 = "0000:40"; + private static final String PCI_ROOT_3 = "0000:80"; + private static final String TEST_PROFILE_NAME = "test-profile"; + private static final String UPDATED_PROFILE_NAME = "updated-profile"; + private static final String UPDATED_PROFILE_DESCRIPTION = "Updated description"; + private static final String UPDATED_GPU_DEVICE_NAME = "Updated RTX 4090"; + private static final String UPDATED_GPU_CARD_NAME = "Updated NVIDIA RTX 4090"; + private static final String UPDATED_GPU_VENDOR_NAME = "Updated NVIDIA"; + private static final String PASSTHROUGH_PROFILE = "passthrough"; + private static final String COMMAND_CREATE_VGPU_PROFILE = "createVgpuProfile"; + private static final String COMMAND_UPDATE_VGPU_PROFILE = "updateVgpuProfile"; + private static final String COMMAND_LIST_GPU_CARDS = "listGpuCards"; + private static final String COMMAND_LIST_VGPU_PROFILES = "listVgpuProfiles"; + private static final String COMMAND_LIST_GPU_DEVICES = "listGpuDevices"; + private static final String COMMAND_DISCOVER_GPU_DEVICES = "discoverGpuDevices"; + private static final String CONFIG_COMPONENT_NAME = "GpuService"; + + // Test IDs + private static final Long GPU_CARD_ID = 1L; + private static final Long SERVICE_OFFERING_ID = 1L; + private static final Long VGPU_PROFILE_ID = 1L; + private static final Long GPU_DEVICE_ID_LONG = 1L; + private static final Long HOST_ID = 1L; + private static final Long VM_ID = 1L; + private static final Long MAX_VGPU_PER_PGPU = 4L; + private static final Long UPDATED_MAX_VGPU_PER_PGPU = 8L; + private static final Long START_INDEX = 0L; + private static final Long PAGE_SIZE = 20L; + private static final Long INVALID_ID = 999L; + private static final Long VM_ID_100 = 100L; + private static final Long GPU_CARD_ID_2 = 2L; + private static final Long GPU_DEVICE_ID_2 = 2L; + private static final Long PARENT_GPU_DEVICE_ID = 2L; + private static final Long DIFFERENT_HOST_ID = 99L; + private static final Long DIFFERENT_CARD_ID = 99L; + + @Mock + private GpuCardDao gpuCardDao; + + @Mock + private VgpuProfileDao vgpuProfileDao; + + @Mock + private GpuDeviceDao gpuDeviceDao; + + @Mock + private ServiceOfferingDao serviceOfferingDao; + + @Mock + private HostDao hostDao; + + @Mock + private UserVmManager userVmManager; + + @Mock + private VMInstanceDao vmInstanceDao; + + @Mock + private ResourceManager resourceManager; + + @InjectMocks + @Spy + private GpuServiceImpl gpuService; + + @Mock + private GpuCardVO mockGpuCard; + + @Mock + private VgpuProfileVO mockVgpuProfile; + + @Mock + private ServiceOfferingVO mockServiceOffering; + + @Mock + private GpuDeviceVO mockGpuDevice; + + @Mock + private HostVO mockHost; + + @Before + public void setUp() { + // Setup GPU Card mock + when(mockGpuCard.getId()).thenReturn(GPU_CARD_ID); + when(mockGpuCard.getUuid()).thenReturn(GPU_CARD_UUID); + when(mockGpuCard.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(mockGpuCard.getDeviceName()).thenReturn(GPU_DEVICE_NAME); + when(mockGpuCard.getName()).thenReturn(GPU_CARD_NAME); + when(mockGpuCard.getVendorName()).thenReturn(GPU_VENDOR_NAME); + when(mockGpuCard.getVendorId()).thenReturn(GPU_VENDOR_ID); + + // Setup vGPU Profile mock + when(mockVgpuProfile.getId()).thenReturn(VGPU_PROFILE_ID); + when(mockVgpuProfile.getUuid()).thenReturn(VGPU_PROFILE_UUID); + when(mockVgpuProfile.getName()).thenReturn(VGPU_PROFILE_NAME); + when(mockVgpuProfile.getDescription()).thenReturn(VGPU_PROFILE_DESCRIPTION); + when(mockVgpuProfile.getCardId()).thenReturn(GPU_CARD_ID); + when(mockVgpuProfile.getMaxVgpuPerPgpu()).thenReturn(MAX_VGPU_PER_PGPU); + + // Setup GPU Device mock + when(mockGpuDevice.getId()).thenReturn(GPU_DEVICE_ID_LONG); + when(mockGpuDevice.getUuid()).thenReturn(GPU_DEVICE_UUID); + when(mockGpuDevice.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + when(mockGpuDevice.getHostId()).thenReturn(HOST_ID); + when(mockGpuDevice.getCardId()).thenReturn(GPU_CARD_ID); + when(mockGpuDevice.getVgpuProfileId()).thenReturn(VGPU_PROFILE_ID); + when(mockGpuDevice.getType()).thenReturn(GpuDevice.DeviceType.PCI); + when(mockGpuDevice.getState()).thenReturn(GpuDevice.State.Free); + when(mockGpuDevice.getManagedState()).thenReturn(GpuDevice.ManagedState.Managed); + when(mockGpuDevice.getVmId()).thenReturn(null); + when(mockGpuDevice.getParentGpuDeviceId()).thenReturn(null); + + // Setup Host mock + when(mockHost.getId()).thenReturn(HOST_ID); + when(mockHost.getUuid()).thenReturn(HOST_UUID); + when(mockHost.getName()).thenReturn(HOST_NAME); + when(mockHost.getStatus()).thenReturn(Status.Up); + + // Setup Service Offering mock + when(mockServiceOffering.getGpuDisplay()).thenReturn(false); + + } + + @Test + public void testConfigure() throws ConfigurationException { + Map params = new HashMap<>(); + boolean result = gpuService.configure(CONFIG_COMPONENT_NAME, params); + assertTrue(result); + } + + @Test + public void testGetCommands() { + List> commands = gpuService.getCommands(); + assertNotNull(commands); + assertFalse(commands.isEmpty()); + assertTrue(commands.contains(CreateGpuCardCmd.class)); + assertTrue(commands.contains(ListGpuCardsCmd.class)); + assertTrue(commands.contains(CreateVgpuProfileCmd.class)); + assertTrue(commands.contains(ListGpuDevicesCmdByAdmin.class)); + } + + @Test + public void testGetConfigComponentName() { + String componentName = gpuService.getConfigComponentName(); + assertEquals(CONFIG_COMPONENT_NAME, componentName); + } + + // GPU Card Tests + @Test + public void testCreateGpuCard_Success() { + CreateGpuCardCmd cmd = mock(CreateGpuCardCmd.class); + when(cmd.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(cmd.getDeviceName()).thenReturn(GPU_DEVICE_NAME); + when(cmd.getName()).thenReturn(GPU_CARD_NAME); + when(cmd.getVendorName()).thenReturn(GPU_VENDOR_NAME); + when(cmd.getVendorId()).thenReturn(GPU_VENDOR_ID); + + when(gpuCardDao.findByVendorIdAndDeviceId(GPU_VENDOR_ID, GPU_DEVICE_ID)).thenReturn(null); + when(gpuCardDao.persist(any(GpuCardVO.class))).thenReturn(mockGpuCard); + when(vgpuProfileDao.persist(any(VgpuProfileVO.class))).thenReturn(mockVgpuProfile); + + GpuCardVO result = gpuService.createGpuCard(cmd); + + assertNotNull(result); + assertEquals(GPU_CARD_UUID, result.getUuid()); + assertEquals(GPU_DEVICE_ID, result.getDeviceId()); + assertEquals(GPU_DEVICE_NAME, result.getDeviceName()); + assertEquals(GPU_CARD_NAME, result.getName()); + assertEquals(GPU_VENDOR_NAME, result.getVendorName()); + assertEquals(GPU_VENDOR_ID, result.getVendorId()); + verify(gpuCardDao).persist(any(GpuCardVO.class)); + verify(vgpuProfileDao).persist(any(VgpuProfileVO.class)); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuCard_BlankDeviceId() { + CreateGpuCardCmd cmd = mock(CreateGpuCardCmd.class); + when(cmd.getDeviceId()).thenReturn(""); + + gpuService.createGpuCard(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuCard_NullDeviceId() { + CreateGpuCardCmd cmd = mock(CreateGpuCardCmd.class); + when(cmd.getDeviceId()).thenReturn(null); + + gpuService.createGpuCard(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuCard_DuplicateCard() { + CreateGpuCardCmd cmd = mock(CreateGpuCardCmd.class); + when(cmd.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(cmd.getDeviceName()).thenReturn(GPU_DEVICE_NAME); + when(cmd.getName()).thenReturn(GPU_CARD_NAME); + when(cmd.getVendorName()).thenReturn(GPU_VENDOR_NAME); + when(cmd.getVendorId()).thenReturn(GPU_VENDOR_ID); + + when(gpuCardDao.findByVendorIdAndDeviceId(GPU_VENDOR_ID, GPU_DEVICE_ID)).thenReturn(mockGpuCard); + + gpuService.createGpuCard(cmd); + } + + @Test + public void testUpdateGpuCard_Success() { + UpdateGpuCardCmd cmd = mock(UpdateGpuCardCmd.class); + when(cmd.getId()).thenReturn(GPU_CARD_ID); + when(cmd.getDeviceName()).thenReturn(UPDATED_GPU_DEVICE_NAME); + when(cmd.getName()).thenReturn(UPDATED_GPU_CARD_NAME); + when(cmd.getVendorName()).thenReturn(UPDATED_GPU_VENDOR_NAME); + + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(gpuCardDao.update(eq(GPU_CARD_ID), any(GpuCardVO.class))).thenReturn(true); + + GpuCardVO result = gpuService.updateGpuCard(cmd); + + assertNotNull(result); + assertEquals(GPU_CARD_UUID, result.getUuid()); + verify(gpuCardDao).update(eq(GPU_CARD_ID), any(GpuCardVO.class)); + } + + @Test(expected = InvalidParameterValueException.class) + public void testUpdateGpuCard_NotFound() { + UpdateGpuCardCmd cmd = mock(UpdateGpuCardCmd.class); + when(cmd.getId()).thenReturn(INVALID_ID); + + when(gpuCardDao.findById(INVALID_ID)).thenReturn(null); + + gpuService.updateGpuCard(cmd); + } + + @Test + public void testDeleteGpuCard_Success() { + DeleteGpuCardCmd cmd = mock(DeleteGpuCardCmd.class); + when(cmd.getId()).thenReturn(GPU_CARD_ID); + + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(gpuDeviceDao.isGpuCardInUse(GPU_CARD_ID)).thenReturn(false); + when(gpuCardDao.remove(GPU_CARD_ID)).thenReturn(true); + + boolean result = gpuService.deleteGpuCard(cmd); + + assertTrue(result); + verify(gpuCardDao).remove(GPU_CARD_ID); + } + + @Test(expected = InvalidParameterValueException.class) + public void testDeleteGpuCard_InUse() { + DeleteGpuCardCmd cmd = mock(DeleteGpuCardCmd.class); + when(cmd.getId()).thenReturn(GPU_CARD_ID); + + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(gpuDeviceDao.isGpuCardInUse(GPU_CARD_ID)).thenReturn(true); + + gpuService.deleteGpuCard(cmd); + } + + // vGPU Profile Tests + @Test + public void testCreateVgpuProfile_Success() { + CreateVgpuProfileCmd cmd = mock(CreateVgpuProfileCmd.class); + when(cmd.getName()).thenReturn(VGPU_PROFILE_NAME); + when(cmd.getDescription()).thenReturn(VGPU_PROFILE_DESCRIPTION); + when(cmd.getCardId()).thenReturn(GPU_CARD_ID); + when(cmd.getMaxVgpuPerPgpu()).thenReturn(MAX_VGPU_PER_PGPU); + when(cmd.getCommandName()).thenReturn(COMMAND_CREATE_VGPU_PROFILE); + + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.persist(any(VgpuProfileVO.class))).thenReturn(mockVgpuProfile); + + VgpuProfileResponse result = gpuService.createVgpuProfile(cmd); + + assertNotNull(result); + assertEquals(VGPU_PROFILE_UUID, result.getId()); + assertEquals(VGPU_PROFILE_NAME, result.getName()); + assertEquals(VGPU_PROFILE_DESCRIPTION, result.getDescription()); + assertEquals(GPU_CARD_UUID, result.getGpuCardId()); + verify(vgpuProfileDao).persist(any(VgpuProfileVO.class)); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateVgpuProfile_BlankName() { + CreateVgpuProfileCmd cmd = mock(CreateVgpuProfileCmd.class); + when(cmd.getName()).thenReturn(""); + + gpuService.createVgpuProfile(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateVgpuProfile_InvalidCardId() { + CreateVgpuProfileCmd cmd = mock(CreateVgpuProfileCmd.class); + when(cmd.getName()).thenReturn(TEST_PROFILE_NAME); + when(cmd.getCardId()).thenReturn(INVALID_ID); + + when(gpuCardDao.findById(INVALID_ID)).thenReturn(null); + + gpuService.createVgpuProfile(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateVgpuProfile_DuplicateName() { + CreateVgpuProfileCmd cmd = mock(CreateVgpuProfileCmd.class); + when(cmd.getName()).thenReturn(TEST_PROFILE_NAME); + when(cmd.getCardId()).thenReturn(GPU_CARD_ID); + + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findByNameAndCardId(TEST_PROFILE_NAME, GPU_CARD_ID)).thenReturn(mockVgpuProfile); + + gpuService.createVgpuProfile(cmd); + } + + @Test + public void testUpdateVgpuProfile_Success() { + UpdateVgpuProfileCmd cmd = mock(UpdateVgpuProfileCmd.class); + when(cmd.getId()).thenReturn(VGPU_PROFILE_ID); + when(cmd.getProfileName()).thenReturn(UPDATED_PROFILE_NAME); + when(cmd.getDescription()).thenReturn(UPDATED_PROFILE_DESCRIPTION); + when(cmd.getMaxVgpuPerPgpu()).thenReturn(UPDATED_MAX_VGPU_PER_PGPU); + when(cmd.getCommandName()).thenReturn(COMMAND_UPDATE_VGPU_PROFILE); + + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(vgpuProfileDao.findByNameAndCardId(UPDATED_PROFILE_NAME, GPU_CARD_ID)).thenReturn(null); + when(vgpuProfileDao.update(eq(VGPU_PROFILE_ID), any(VgpuProfileVO.class))).thenReturn(true); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + + VgpuProfileResponse result = gpuService.updateVgpuProfile(cmd); + + assertNotNull(result); + assertEquals(VGPU_PROFILE_UUID, result.getId()); + verify(vgpuProfileDao).update(eq(VGPU_PROFILE_ID), any(VgpuProfileVO.class)); + } + + @Test + public void testDeleteVgpuProfile_Success() { + DeleteVgpuProfileCmd cmd = mock(DeleteVgpuProfileCmd.class); + when(cmd.getId()).thenReturn(VGPU_PROFILE_ID); + + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(gpuDeviceDao.isVgpuProfileInUse(VGPU_PROFILE_ID)).thenReturn(false); + when(vgpuProfileDao.remove(VGPU_PROFILE_ID)).thenReturn(true); + + boolean result = gpuService.deleteVgpuProfile(cmd); + + assertTrue(result); + verify(vgpuProfileDao).remove(VGPU_PROFILE_ID); + } + + @Test(expected = InvalidParameterValueException.class) + public void testDeleteVgpuProfile_InUse() { + DeleteVgpuProfileCmd cmd = mock(DeleteVgpuProfileCmd.class); + when(cmd.getId()).thenReturn(VGPU_PROFILE_ID); + + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(gpuDeviceDao.isVgpuProfileInUse(VGPU_PROFILE_ID)).thenReturn(true); + + gpuService.deleteVgpuProfile(cmd); + } + + // List Methods Tests + @Test + public void testListGpuCards_Success() { + ListGpuCardsCmd cmd = mock(ListGpuCardsCmd.class); + when(cmd.getId()).thenReturn(null); + when(cmd.getKeyword()).thenReturn(null); + when(cmd.getVendorName()).thenReturn(null); + when(cmd.getVendorId()).thenReturn(null); + when(cmd.getDeviceId()).thenReturn(null); + when(cmd.getDeviceName()).thenReturn(null); + when(cmd.getStartIndex()).thenReturn(START_INDEX); + when(cmd.getPageSizeVal()).thenReturn(PAGE_SIZE); + when(cmd.getCommandName()).thenReturn(COMMAND_LIST_GPU_CARDS); + + List gpuCards = List.of(mockGpuCard); + Pair, Integer> result = new Pair<>(gpuCards, 1); + when(gpuCardDao.searchAndCountGpuCards(any(), any(), any(), any(), any(), any(), anyBoolean(), any(), any())).thenReturn( + result); + + ListResponse response = gpuService.listGpuCards(cmd); + + assertNotNull(response); + assertEquals(1, response.getResponses().size()); + assertEquals(GPU_CARD_UUID, response.getResponses().get(0).getId()); + } + + @Test + public void testListVgpuProfiles_Success() { + ListVgpuProfilesCmd cmd = mock(ListVgpuProfilesCmd.class); + when(cmd.getId()).thenReturn(null); + when(cmd.getName()).thenReturn(null); + when(cmd.getKeyword()).thenReturn(null); + when(cmd.getCardId()).thenReturn(null); + when(cmd.getStartIndex()).thenReturn(START_INDEX); + when(cmd.getPageSizeVal()).thenReturn(PAGE_SIZE); + when(cmd.getCommandName()).thenReturn(COMMAND_LIST_VGPU_PROFILES); + + List vgpuProfiles = List.of(mockVgpuProfile); + Pair, Integer> result = new Pair<>(vgpuProfiles, 1); + when(vgpuProfileDao.searchAndCountVgpuProfiles(any(), any(), any(), any(), anyBoolean(), any(), any())).thenReturn(result); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + + ListResponse response = gpuService.listVgpuProfiles(cmd); + + assertNotNull(response); + assertEquals(1, response.getResponses().size()); + assertEquals(VGPU_PROFILE_UUID, response.getResponses().get(0).getId()); + } + + // GPU Device Tests + @Test + public void testCreateGpuDevice_Success() { + CreateGpuDeviceCmd cmd = mock(CreateGpuDeviceCmd.class); + when(cmd.getHostId()).thenReturn(HOST_ID); + when(cmd.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + when(cmd.getGpuCardId()).thenReturn(GPU_CARD_ID); + when(cmd.getVgpuProfileId()).thenReturn(VGPU_PROFILE_ID); + when(cmd.getType()).thenReturn(GpuDevice.DeviceType.PCI); + when(cmd.getParentGpuDeviceId()).thenReturn(null); + when(cmd.getNumaNode()).thenReturn(NUMA_NODE_0); + + when(hostDao.findById(HOST_ID)).thenReturn(mockHost); + when(gpuDeviceDao.findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS)).thenReturn(null); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(gpuDeviceDao.persist(any(GpuDeviceVO.class))).thenReturn(mockGpuDevice); + + GpuDeviceResponse result = gpuService.createGpuDevice(cmd); + + assertNotNull(result); + assertEquals(GPU_DEVICE_UUID, result.getId()); + assertEquals(GPU_BUS_ADDRESS, result.getBussAddress()); + assertEquals(GPU_CARD_UUID, result.getGpuCardId()); + assertEquals(VGPU_PROFILE_UUID, result.getVgpuProfileId()); + assertEquals(HOST_UUID, result.getHostId()); + verify(gpuDeviceDao).persist(any(GpuDeviceVO.class)); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuDevice_HostNotFound() { + CreateGpuDeviceCmd cmd = mock(CreateGpuDeviceCmd.class); + when(cmd.getHostId()).thenReturn(INVALID_ID); + + when(hostDao.findById(INVALID_ID)).thenReturn(null); + + gpuService.createGpuDevice(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuDevice_DuplicateBusAddress() { + CreateGpuDeviceCmd cmd = mock(CreateGpuDeviceCmd.class); + when(cmd.getHostId()).thenReturn(HOST_ID); + when(cmd.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + + when(hostDao.findById(HOST_ID)).thenReturn(mockHost); + when(gpuDeviceDao.findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS)).thenReturn(mockGpuDevice); + + gpuService.createGpuDevice(cmd); + } + + @Test + public void testUpdateGpuDevice_Success() { + UpdateGpuDeviceCmd cmd = mock(UpdateGpuDeviceCmd.class); + when(cmd.getId()).thenReturn(GPU_DEVICE_ID_LONG); + when(cmd.getGpuCardId()).thenReturn(GPU_CARD_ID_2); + when(cmd.getVgpuProfileId()).thenReturn(GPU_CARD_ID_2); + when(cmd.getParentGpuDeviceId()).thenReturn(null); + + GpuDeviceVO device = mock(GpuDeviceVO.class); + when(device.getCardId()).thenReturn(GPU_CARD_ID); + when(device.getVgpuProfileId()).thenReturn(VGPU_PROFILE_ID); + when(device.getVmId()).thenReturn(null); + + VgpuProfileVO newProfile = mock(VgpuProfileVO.class); + when(newProfile.getCardId()).thenReturn(GPU_CARD_ID_2); + + when(gpuDeviceDao.findById(GPU_DEVICE_ID_LONG)).thenReturn(device); + when(gpuCardDao.findById(GPU_CARD_ID_2)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findById(GPU_CARD_ID_2)).thenReturn(newProfile); + when(gpuDeviceDao.update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class))).thenReturn(true); + + GpuDeviceResponse result = gpuService.updateGpuDevice(cmd); + + assertNotNull(result); + verify(gpuDeviceDao).update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class)); + } + + @Test(expected = InvalidParameterValueException.class) + public void testUpdateGpuDevice_DeviceAllocated() { + UpdateGpuDeviceCmd cmd = mock(UpdateGpuDeviceCmd.class); + when(cmd.getId()).thenReturn(GPU_DEVICE_ID_LONG); + + GpuDeviceVO device = mock(GpuDeviceVO.class); + when(device.getVmId()).thenReturn(VM_ID_100); + + when(gpuDeviceDao.findById(GPU_DEVICE_ID_LONG)).thenReturn(device); + + gpuService.updateGpuDevice(cmd); + } + + @Test + public void testListGpuDevices_Success() { + ListGpuDevicesCmdByAdmin cmd = mock(ListGpuDevicesCmdByAdmin.class); + when(cmd.getId()).thenReturn(null); + when(cmd.getKeyword()).thenReturn(null); + when(cmd.getHostId()).thenReturn(null); + when(cmd.getGpuCardId()).thenReturn(null); + when(cmd.getVgpuProfileId()).thenReturn(null); + when(cmd.getVmId()).thenReturn(null); + when(cmd.getStartIndex()).thenReturn(START_INDEX); + when(cmd.getPageSizeVal()).thenReturn(PAGE_SIZE); + when(cmd.getCommandName()).thenReturn(COMMAND_LIST_GPU_DEVICES); + when(cmd.getResponseView()).thenReturn(ResponseObject.ResponseView.Full); + + List gpuDevices = List.of(mockGpuDevice); + Pair, Integer> result = new Pair<>(gpuDevices, 1); + when(gpuDeviceDao.searchAndCountGpuDevices(any(), any(), any(), any(), any(), any(), any(), any())).thenReturn( + result); + + ListResponse response = gpuService.listGpuDevices(cmd); + + assertNotNull(response); + assertEquals(1, response.getResponses().size()); + assertEquals(GPU_DEVICE_UUID, response.getResponses().get(0).getId()); + } + + @Test + public void testDisableGpuDevice_Success() { + UnmanageGpuDeviceCmd cmd = mock(UnmanageGpuDeviceCmd.class); + when(cmd.getIds()).thenReturn(List.of(GPU_DEVICE_ID_LONG)); + + GpuDeviceVO device = mock(GpuDeviceVO.class); + when(device.getId()).thenReturn(GPU_DEVICE_ID_LONG); + when(device.getManagedState()).thenReturn(GpuDevice.ManagedState.Managed); + when(device.getVmId()).thenReturn(null); + when(device.getHostId()).thenReturn(HOST_ID); + + when(gpuDeviceDao.findById(GPU_DEVICE_ID_LONG)).thenReturn(device); + when(gpuDeviceDao.update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class))).thenReturn(true); + + doReturn(null).when(gpuService).getGpuGroupDetailsFromGpuDevicesOnHost(anyLong()); + + boolean result = gpuService.disableGpuDevice(cmd); + + assertTrue(result); + verify(gpuDeviceDao).update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class)); + } + + @Test + public void testEnableGpuDevice_Success() { + ManageGpuDeviceCmd cmd = mock(ManageGpuDeviceCmd.class); + when(cmd.getIds()).thenReturn(List.of(GPU_DEVICE_ID_LONG)); + + GpuDeviceVO device = mock(GpuDeviceVO.class); + when(device.getId()).thenReturn(GPU_DEVICE_ID_LONG); + when(device.getManagedState()).thenReturn(GpuDevice.ManagedState.Unmanaged); + when(device.getVmId()).thenReturn(null); + when(device.getHostId()).thenReturn(HOST_ID); + + when(gpuDeviceDao.findById(GPU_DEVICE_ID_LONG)).thenReturn(device); + when(gpuDeviceDao.update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class))).thenReturn(true); + + doReturn(null).when(gpuService).getGpuGroupDetailsFromGpuDevicesOnHost(anyLong()); + + boolean result = gpuService.enableGpuDevice(cmd); + + assertTrue(result); + verify(gpuDeviceDao).update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class)); + } + + @Test(expected = InvalidParameterValueException.class) + public void testDisableGpuDevice_EmptyIds() { + UnmanageGpuDeviceCmd cmd = mock(UnmanageGpuDeviceCmd.class); + when(cmd.getIds()).thenReturn(new ArrayList<>()); + + gpuService.disableGpuDevice(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testDisableGpuDevice_DeviceInUse() { + UnmanageGpuDeviceCmd cmd = mock(UnmanageGpuDeviceCmd.class); + when(cmd.getIds()).thenReturn(List.of(GPU_DEVICE_ID_LONG)); + + GpuDeviceVO device = mock(GpuDeviceVO.class); + when(device.getVmId()).thenReturn(VM_ID); + when(device.getManagedState()).thenReturn(GpuDevice.ManagedState.Managed); + + when(gpuDeviceDao.findById(GPU_DEVICE_ID_LONG)).thenReturn(device); + + gpuService.disableGpuDevice(cmd); + } + + @Test + public void testDeallocateAllGpuDevicesForVm_Success() { + List devices = List.of(mockGpuDevice); + when(gpuDeviceDao.listByVmId(VM_ID)).thenReturn(devices); + when(gpuDeviceDao.persist(any(GpuDeviceVO.class))).thenReturn(mockGpuDevice); + + gpuService.deallocateAllGpuDevicesForVm(VM_ID); + + verify(gpuDeviceDao).persist(any(GpuDeviceVO.class)); + } + + @Test + public void testDeallocateAllGpuDevicesForVmOnHost_NoDevices() { + when(gpuDeviceDao.listByVmId(VM_ID)).thenReturn(new ArrayList<>()); + + gpuService.deallocateAllGpuDevicesForVm(VM_ID); + + verify(gpuDeviceDao, never()).persist(any(GpuDeviceVO.class)); + } + + @Test + public void testDiscoverGpuDevices_Success() { + DiscoverGpuDevicesCmd cmd = mock(DiscoverGpuDevicesCmd.class); + when(cmd.getId()).thenReturn(HOST_ID); + when(cmd.getCommandName()).thenReturn(COMMAND_DISCOVER_GPU_DEVICES); + + when(hostDao.findById(HOST_ID)).thenReturn(mockHost); + when(resourceManager.getGPUStatistics(mockHost)).thenReturn(new HashMap<>()); + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(List.of(mockGpuDevice)); + + ListResponse result = gpuService.discoverGpuDevices(cmd); + + assertNotNull(result); + verify(resourceManager).getGPUStatistics(mockHost); + } + + @Test(expected = InvalidParameterValueException.class) + public void testDiscoverGpuDevices_HostNotFound() { + DiscoverGpuDevicesCmd cmd = mock(DiscoverGpuDevicesCmd.class); + when(cmd.getId()).thenReturn(INVALID_ID); + + when(hostDao.findById(INVALID_ID)).thenReturn(null); + + gpuService.discoverGpuDevices(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testDiscoverGpuDevices_HostNotUp() { + DiscoverGpuDevicesCmd cmd = mock(DiscoverGpuDevicesCmd.class); + when(cmd.getId()).thenReturn(HOST_ID); + + HostVO downHost = mock(HostVO.class); + when(downHost.getStatus()).thenReturn(Status.Down); + + when(hostDao.findById(HOST_ID)).thenReturn(downHost); + + gpuService.discoverGpuDevices(cmd); + } + + @Test + public void testIsGPUDeviceAvailable_Sufficient() { + VgpuProfile vgpuProfile = mock(VgpuProfile.class); + when(vgpuProfile.getId()).thenReturn(VGPU_PROFILE_ID); + + GpuDeviceVO additionalDevice = mock(GpuDeviceVO.class); + List availableDevices = List.of(mockGpuDevice, additionalDevice); + when(gpuDeviceDao.listDevicesForAllocation(HOST_ID, VGPU_PROFILE_ID)).thenReturn(availableDevices); + + boolean result = gpuService.isGPUDeviceAvailable(mockHost, VM_ID, vgpuProfile, 2); + + assertTrue(result); + } + + @Test + public void testIsGPUDeviceAvailable_Insufficient() { + VgpuProfile vgpuProfile = mock(VgpuProfile.class); + when(vgpuProfile.getId()).thenReturn(VGPU_PROFILE_ID); + + List availableDevices = List.of(mockGpuDevice); + List existingDevices = new ArrayList<>(); + + when(gpuDeviceDao.listDevicesForAllocation(HOST_ID, VGPU_PROFILE_ID)).thenReturn(availableDevices); + when(gpuDeviceDao.listByHostAndVm(HOST_ID, VM_ID)).thenReturn(existingDevices); + + boolean result = gpuService.isGPUDeviceAvailable(mockHost, VM_ID, vgpuProfile, 3); + + assertFalse(result); + } + + @Test + public void testGetGpuGroupDetailsFromGpuDevicesOnHost_Success() { + List devices = List.of(mockGpuDevice); + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(devices); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + + HashMap> result = gpuService.getGpuGroupDetailsFromGpuDevicesOnHost(HOST_ID); + + assertNotNull(result); + assertFalse(result.isEmpty()); + } + + @Test + public void testAddGpuDevicesToHost_NewDevice() { + VgpuTypesInfo deviceInfo = mock(VgpuTypesInfo.class); + when(deviceInfo.getBusAddress()).thenReturn(GPU_BUS_ADDRESS_2); + when(deviceInfo.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(deviceInfo.getVendorId()).thenReturn(GPU_VENDOR_ID); + when(deviceInfo.getDeviceName()).thenReturn(GPU_DEVICE_NAME); + when(deviceInfo.getVendorName()).thenReturn(GPU_VENDOR_NAME); + when(deviceInfo.getModelName()).thenReturn(PASSTHROUGH_PROFILE); + + List newDevices = List.of(deviceInfo); + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(new ArrayList<>()); + when(gpuCardDao.findByVendorIdAndDeviceId(GPU_VENDOR_ID, GPU_DEVICE_ID)).thenReturn(null); + when(gpuCardDao.persist(any(GpuCardVO.class))).thenReturn(mockGpuCard); + when(vgpuProfileDao.persist(any(VgpuProfileVO.class))).thenReturn(mockVgpuProfile); + when(gpuDeviceDao.persist(any(GpuDeviceVO.class))).thenReturn(mockGpuDevice); + + try (MockedStatic ignored = Mockito.mockStatic(GlobalLock.class)) { + GlobalLock lock = mock(GlobalLock.class); + when(GlobalLock.getInternLock("add-gpu-devices-to-host-" + HOST_ID)).thenReturn(lock); + when(lock.lock(30)).thenReturn(true); + + gpuService.addGpuDevicesToHost(mockHost, newDevices); + + verify(gpuCardDao).persist(any(GpuCardVO.class)); + verify(vgpuProfileDao, times(1)).persist(any(VgpuProfileVO.class)); // passthrough + verify(gpuDeviceDao).persist(any(GpuDeviceVO.class)); + } + } + + @Test + public void testAddGpuDevicesToHost_ExistingDevice() { + VgpuTypesInfo deviceInfo = mock(VgpuTypesInfo.class); + when(deviceInfo.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + when(deviceInfo.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(deviceInfo.getVendorId()).thenReturn(GPU_VENDOR_ID); + when(deviceInfo.getModelName()).thenReturn(PASSTHROUGH_PROFILE); + + List newDevices = List.of(deviceInfo); + List existingDevices = List.of(mockGpuDevice); + + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(existingDevices); + when(gpuCardDao.findByVendorIdAndDeviceId(GPU_VENDOR_ID, GPU_DEVICE_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findByNameAndCardId(PASSTHROUGH_PROFILE, GPU_CARD_ID)).thenReturn(mockVgpuProfile); + when(gpuDeviceDao.update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class))).thenReturn(true); + + try (MockedStatic ignored = Mockito.mockStatic(GlobalLock.class)) { + GlobalLock lock = mock(GlobalLock.class); + when(GlobalLock.getInternLock("add-gpu-devices-to-host-" + HOST_ID)).thenReturn(lock); + when(lock.lock(30)).thenReturn(true); + + gpuService.addGpuDevicesToHost(mockHost, newDevices); + verify(gpuDeviceDao).update(eq(GPU_DEVICE_ID_LONG), any(GpuDeviceVO.class)); + } + } + + @Test + public void testAddGpuDevicesToHost_WithVmName() { + VgpuTypesInfo deviceInfo = mock(VgpuTypesInfo.class); + when(deviceInfo.getBusAddress()).thenReturn(GPU_BUS_ADDRESS_2); + when(deviceInfo.getVmName()).thenReturn(VM_NAME); + when(deviceInfo.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(deviceInfo.getVendorId()).thenReturn(GPU_VENDOR_ID); + when(deviceInfo.getModelName()).thenReturn(PASSTHROUGH_PROFILE); + + VMInstanceVO vm = mock(VMInstanceVO.class); + when(vm.getId()).thenReturn(VM_ID_100); + + List newDevices = List.of(deviceInfo); + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(new ArrayList<>()); + when(gpuCardDao.findByVendorIdAndDeviceId(GPU_VENDOR_ID, GPU_DEVICE_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findByNameAndCardId(PASSTHROUGH_PROFILE, GPU_CARD_ID)).thenReturn(mockVgpuProfile); + when(vmInstanceDao.findVMByInstanceName(VM_NAME)).thenReturn(vm); + when(gpuDeviceDao.persist(any(GpuDeviceVO.class))).thenReturn(mockGpuDevice); + + try (MockedStatic ignored = Mockito.mockStatic(GlobalLock.class)) { + GlobalLock lock = mock(GlobalLock.class); + when(GlobalLock.getInternLock("add-gpu-devices-to-host-" + HOST_ID)).thenReturn(lock); + when(lock.lock(30)).thenReturn(true); + + gpuService.addGpuDevicesToHost(mockHost, newDevices); + + verify(vmInstanceDao).findVMByInstanceName(VM_NAME); + verify(gpuDeviceDao).persist(any(GpuDeviceVO.class)); + } + } + + @Test + public void testGetGPUDevice_Success() { + VirtualMachine vm = mock(VirtualMachine.class); + when(vm.getId()).thenReturn(VM_ID); + when(vm.getHostId()).thenReturn(HOST_ID); + when(vm.getServiceOfferingId()).thenReturn(SERVICE_OFFERING_ID); + + VgpuProfile vgpuProfile = mock(VgpuProfile.class); + when(vgpuProfile.getId()).thenReturn(VGPU_PROFILE_ID); + when(vgpuProfile.getCardId()).thenReturn(GPU_CARD_ID); + when(vgpuProfile.getName()).thenReturn(TEST_PROFILE_NAME); + + List availableDevices = List.of(mockGpuDevice); + // Setup mocks before transaction execution + when(gpuDeviceDao.listDevicesForAllocation(HOST_ID, VGPU_PROFILE_ID)).thenReturn(availableDevices); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(gpuDeviceDao.persist(any(GpuDeviceVO.class))).thenReturn(mockGpuDevice); + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(availableDevices); + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(serviceOfferingDao.findById(SERVICE_OFFERING_ID)).thenReturn(mockServiceOffering); + try (MockedStatic transactionMock = Mockito.mockStatic(Transaction.class)) { + + transactionMock.when(() -> Transaction.execute(any(TransactionCallback.class))).thenAnswer(invocation -> { + TransactionCallback callback = invocation.getArgument(0); + return callback.doInTransaction(null); + }); + + GPUDeviceTO result = gpuService.getGPUDevice(vm, HOST_ID, vgpuProfile, 1); + + assertNotNull(result); + } + } + + @Test(expected = CloudRuntimeException.class) + public void testGetGPUDevice_InsufficientDevices() { + VirtualMachine vm = mock(VirtualMachine.class); + when(vm.getId()).thenReturn(VM_ID); + when(vm.getHostId()).thenReturn(HOST_ID); + + VgpuProfile vgpuProfile = mock(VgpuProfile.class); + when(vgpuProfile.getId()).thenReturn(VGPU_PROFILE_ID); + + try (MockedStatic transactionMock = Mockito.mockStatic(Transaction.class)) { + when(gpuDeviceDao.listDevicesForAllocation(HOST_ID, VGPU_PROFILE_ID)).thenReturn(new ArrayList<>()); + + transactionMock.when(() -> Transaction.execute(any(TransactionCallback.class))).thenAnswer(invocation -> { + TransactionCallback callback = invocation.getArgument(0); + return callback.doInTransaction(null); + }); + + gpuService.getGPUDevice(vm, HOST_ID, vgpuProfile, 2); + } + } + + @Test + public void testAllocateGpuDevicesToVmOnHost_Success() { + VgpuTypesInfo deviceInfo = mock(VgpuTypesInfo.class); + when(deviceInfo.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + + List gpuDevices = List.of(deviceInfo); + when(gpuDeviceDao.listByVmId(VM_ID)).thenReturn(new ArrayList<>()); + when(gpuDeviceDao.findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS)).thenReturn(mockGpuDevice); + when(gpuDeviceDao.persist(any(GpuDeviceVO.class))).thenReturn(mockGpuDevice); + + try (MockedStatic transactionMock = Mockito.mockStatic(Transaction.class)) { + transactionMock.when(() -> Transaction.execute(any(TransactionCallbackNoReturn.class))).thenAnswer( + invocation -> { + TransactionCallbackNoReturn callback = invocation.getArgument(0); + callback.doInTransactionWithoutResult(null); + return null; + }); + + gpuService.allocateGpuDevicesToVmOnHost(VM_ID, HOST_ID, gpuDevices); + + verify(gpuDeviceDao).findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS); + } + } + + @Test(expected = CloudRuntimeException.class) + public void testAllocateGpuDevicesToVmOnHost_DeviceNotFound() { + VgpuTypesInfo deviceInfo = mock(VgpuTypesInfo.class); + when(deviceInfo.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + + List gpuDevices = List.of(deviceInfo); + when(gpuDeviceDao.listByVmId(VM_ID)).thenReturn(new ArrayList<>()); + when(gpuDeviceDao.findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS)).thenReturn(null); + + try (MockedStatic transactionMock = Mockito.mockStatic(Transaction.class)) { + transactionMock.when(() -> Transaction.execute(any(TransactionCallbackNoReturn.class))).thenAnswer( + invocation -> { + TransactionCallbackNoReturn callback = invocation.getArgument(0); + callback.doInTransactionWithoutResult(null); + return null; + }); + + gpuService.allocateGpuDevicesToVmOnHost(VM_ID, HOST_ID, gpuDevices); + } + } + + // Additional edge case tests + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuCard_BlankVendorName() { + CreateGpuCardCmd cmd = mock(CreateGpuCardCmd.class); + when(cmd.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(cmd.getDeviceName()).thenReturn(GPU_DEVICE_NAME); + when(cmd.getName()).thenReturn(GPU_CARD_NAME); + when(cmd.getVendorName()).thenReturn(""); + when(cmd.getVendorId()).thenReturn(GPU_VENDOR_ID); + + gpuService.createGpuCard(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuCard_BlankName() { + CreateGpuCardCmd cmd = mock(CreateGpuCardCmd.class); + when(cmd.getDeviceId()).thenReturn(GPU_DEVICE_ID); + when(cmd.getDeviceName()).thenReturn(GPU_DEVICE_NAME); + when(cmd.getName()).thenReturn(""); + when(cmd.getVendorName()).thenReturn(GPU_VENDOR_NAME); + when(cmd.getVendorId()).thenReturn(GPU_VENDOR_ID); + + gpuService.createGpuCard(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuDevice_BlankBusAddress() { + CreateGpuDeviceCmd cmd = mock(CreateGpuDeviceCmd.class); + when(cmd.getHostId()).thenReturn(HOST_ID); + when(cmd.getBusAddress()).thenReturn(""); + + when(hostDao.findById(HOST_ID)).thenReturn(mockHost); + + gpuService.createGpuDevice(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuDevice_InvalidVgpuProfile() { + CreateGpuDeviceCmd cmd = mock(CreateGpuDeviceCmd.class); + when(cmd.getHostId()).thenReturn(HOST_ID); + when(cmd.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + when(cmd.getGpuCardId()).thenReturn(GPU_CARD_ID); + when(cmd.getVgpuProfileId()).thenReturn(GPU_CARD_ID_2); // Different card ID + + VgpuProfileVO wrongProfile = mock(VgpuProfileVO.class); + when(wrongProfile.getCardId()).thenReturn(DIFFERENT_CARD_ID); // Different card ID + + when(hostDao.findById(HOST_ID)).thenReturn(mockHost); + when(gpuDeviceDao.findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS)).thenReturn(null); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findById(GPU_CARD_ID_2)).thenReturn(wrongProfile); + + gpuService.createGpuDevice(cmd); + } + + @Test(expected = InvalidParameterValueException.class) + public void testCreateGpuDevice_ParentDeviceOnDifferentHost() { + CreateGpuDeviceCmd cmd = mock(CreateGpuDeviceCmd.class); + when(cmd.getHostId()).thenReturn(HOST_ID); + when(cmd.getBusAddress()).thenReturn(GPU_BUS_ADDRESS); + when(cmd.getGpuCardId()).thenReturn(GPU_CARD_ID); + when(cmd.getVgpuProfileId()).thenReturn(VGPU_PROFILE_ID); + when(cmd.getParentGpuDeviceId()).thenReturn(PARENT_GPU_DEVICE_ID); + + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getHostId()).thenReturn(DIFFERENT_HOST_ID); // Different host + + when(hostDao.findById(HOST_ID)).thenReturn(mockHost); + when(gpuDeviceDao.findByHostIdAndBusAddress(HOST_ID, GPU_BUS_ADDRESS)).thenReturn(null); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(gpuDeviceDao.findById(PARENT_GPU_DEVICE_ID)).thenReturn(parentDevice); + + gpuService.createGpuDevice(cmd); + } + + @Test + public void testIsGPUDeviceAvailable_WithExistingDevices() { + VgpuProfile vgpuProfile = mock(VgpuProfile.class); + when(vgpuProfile.getId()).thenReturn(VGPU_PROFILE_ID); + + List availableDevices = List.of(mockGpuDevice); + + GpuDeviceVO existingDevice = mock(GpuDeviceVO.class); + when(existingDevice.getVgpuProfileId()).thenReturn(VGPU_PROFILE_ID); + List existingDevices = List.of(existingDevice); + + when(gpuDeviceDao.listDevicesForAllocation(HOST_ID, VGPU_PROFILE_ID)).thenReturn(availableDevices); + when(gpuDeviceDao.listByHostAndVm(HOST_ID, VM_ID)).thenReturn(existingDevices); + + boolean result = gpuService.isGPUDeviceAvailable(mockHost, VM_ID, vgpuProfile, 2); + + assertTrue(result); + } + + @Test + public void testAddGpuDevicesToHost_DisableRemovedDevices() { + // Setup existing device that won't be in the new list + GpuDeviceVO existingDevice = mock(GpuDeviceVO.class); + when(existingDevice.getId()).thenReturn(GPU_DEVICE_ID_2); + when(existingDevice.getBusAddress()).thenReturn(GPU_BUS_ADDRESS_3); + List existingDevices = List.of(existingDevice); + + List newDevices = new ArrayList<>(); + + when(gpuDeviceDao.listByHostId(HOST_ID)).thenReturn(existingDevices); + when(gpuDeviceDao.update(eq(GPU_DEVICE_ID_2), any(GpuDeviceVO.class))).thenReturn(true); + + try (MockedStatic ignored = Mockito.mockStatic(GlobalLock.class)) { + GlobalLock lock = mock(GlobalLock.class); + when(GlobalLock.getInternLock("add-gpu-devices-to-host-" + HOST_ID)).thenReturn(lock); + when(lock.lock(30)).thenReturn(true); + + gpuService.addGpuDevicesToHost(mockHost, newDevices); + verify(gpuDeviceDao).update(eq(GPU_DEVICE_ID_2), any(GpuDeviceVO.class)); + } + } + + @Test + public void testUpdateVgpuProfile_SameName() { + UpdateVgpuProfileCmd cmd = mock(UpdateVgpuProfileCmd.class); + when(cmd.getId()).thenReturn(VGPU_PROFILE_ID); + when(cmd.getProfileName()).thenReturn(VGPU_PROFILE_NAME); // Same as existing + when(cmd.getDescription()).thenReturn(UPDATED_PROFILE_DESCRIPTION); + when(cmd.getCommandName()).thenReturn(COMMAND_UPDATE_VGPU_PROFILE); + + when(vgpuProfileDao.findById(VGPU_PROFILE_ID)).thenReturn(mockVgpuProfile); + when(vgpuProfileDao.update(eq(VGPU_PROFILE_ID), any(VgpuProfileVO.class))).thenReturn(true); + when(gpuCardDao.findById(GPU_CARD_ID)).thenReturn(mockGpuCard); + + VgpuProfileResponse result = gpuService.updateVgpuProfile(cmd); + + assertNotNull(result); + assertEquals(VGPU_PROFILE_UUID, result.getId()); + verify(vgpuProfileDao, never()).findByNameAndCardId(anyString(), anyLong()); + } + + // Tests for the refactored GPU allocation methods + @Test + public void testGetGpuDevicesToAllocate_OptimalAllocation() { + // Create devices in same NUMA node and PCIe root + List availableDevices = createMockGpuDevices(4, NUMA_NODE_0, PCI_ROOT_1); + + List result = gpuService.getGpuDevicesToAllocate(availableDevices, 2); + + assertNotNull(result); + assertEquals(2, result.size()); + // Should select first 2 devices (sorted by ID) + assertEquals(GPU_DEVICE_ID_LONG.longValue(), result.get(0).getId()); + assertEquals(GPU_DEVICE_ID_2.longValue(), result.get(1).getId()); + } + + // Helper methods for creating mock GPU devices + private List createMockGpuDevices(int count, String numaNode, String pciRoot) { + return createMockGpuDevices(count, numaNode, pciRoot, GPU_DEVICE_ID_LONG); + } + + private List createMockGpuDevices(int count, String numaNode, String pciRoot, long startId) { + List devices = new ArrayList<>(); + for (int i = 0; i < count; i++) { + GpuDeviceVO device = mock(GpuDeviceVO.class); + Long deviceId = startId + i; + when(device.getId()).thenReturn(deviceId); + when(device.getNumaNode()).thenReturn(numaNode); + when(device.getPciRoot()).thenReturn(pciRoot); + // Ensure mock is properly configured + devices.add(device); + } + return devices; + } + + @Test + public void testGetGpuDevicesToAllocate_SingleNumaAllocation() { + // Create devices in same NUMA node but different PCIe roots + List availableDevices = new ArrayList<>(); + availableDevices.addAll(createMockGpuDevices(2, NUMA_NODE_0, PCI_ROOT_1, GPU_DEVICE_ID_LONG)); + availableDevices.addAll(createMockGpuDevices(2, NUMA_NODE_0, PCI_ROOT_2, 3L)); + + List result = gpuService.getGpuDevicesToAllocate(availableDevices, 3); + + assertNotNull(result); + assertEquals(3, result.size()); + // Should prefer devices from same PCIe root first + assertTrue(result.stream().allMatch(device -> NUMA_NODE_0.equals(device.getNumaNode()))); + } + + @Test + public void testGetGpuDevicesToAllocate_DistributedAllocation() { + // Create devices across different NUMA nodes + List availableDevices = new ArrayList<>(); + availableDevices.addAll(createMockGpuDevices(2, NUMA_NODE_0, PCI_ROOT_1, GPU_DEVICE_ID_LONG)); + availableDevices.addAll(createMockGpuDevices(2, NUMA_NODE_1, PCI_ROOT_3, 3L)); + + List result = gpuService.getGpuDevicesToAllocate(availableDevices, 3); + + assertNotNull(result); + assertEquals(3, result.size()); + // Should distribute across NUMA nodes + long numa0Count = result.stream().filter(device -> NUMA_NODE_0.equals(device.getNumaNode())).count(); + long numa1Count = result.stream().filter(device -> NUMA_NODE_1.equals(device.getNumaNode())).count(); + assertTrue(numa0Count > 0 && numa1Count > 0); + } + + @Test(expected = CloudRuntimeException.class) + public void testGetGpuDevicesToAllocate_InsufficientDevices() { + List availableDevices = createMockGpuDevices(2, NUMA_NODE_0, PCI_ROOT_1); + + gpuService.getGpuDevicesToAllocate(availableDevices, 5); + } + + @Test + public void testGetGpuDevicesToAllocate_UnknownNumaAndPci() { + // Test with devices having null/blank NUMA nodes and PCIe roots + List availableDevices = createMockGpuDevicesWithNullValues(3); + + List result = gpuService.getGpuDevicesToAllocate(availableDevices, 2); + + assertNotNull(result); + assertEquals(2, result.size()); + } + + private List createMockGpuDevicesWithNullValues(int count) { + List devices = new ArrayList<>(); + for (int i = 0; i < count; i++) { + GpuDeviceVO device = mock(GpuDeviceVO.class); + when(device.getId()).thenReturn((long) (i + 1)); + when(device.getNumaNode()).thenReturn(i == 0 ? null : (i == 1 ? "" : NUMA_NODE_0)); + when(device.getPciRoot()).thenReturn(i == 0 ? null : (i == 1 ? "" : PCI_ROOT_1)); + devices.add(device); + } + return devices; + } + + @Test + public void testGetGpuDevicesToAllocate_SimpleDebug() { + // Simple test with minimal setup + GpuDeviceVO device1 = mock(GpuDeviceVO.class); + when(device1.getId()).thenReturn(GPU_DEVICE_ID_LONG); + when(device1.getNumaNode()).thenReturn(NUMA_NODE_0); + when(device1.getPciRoot()).thenReturn(PCI_ROOT_1); + + GpuDeviceVO device2 = mock(GpuDeviceVO.class); + when(device2.getId()).thenReturn(GPU_DEVICE_ID_2); + when(device2.getNumaNode()).thenReturn(NUMA_NODE_0); + when(device2.getPciRoot()).thenReturn(PCI_ROOT_1); + + List availableDevices = Arrays.asList(device1, device2); + + List result = gpuService.getGpuDevicesToAllocate(availableDevices, 1); + + assertNotNull(result); + assertEquals(1, result.size()); + assertEquals(GPU_DEVICE_ID_LONG.longValue(), result.get(0).getId()); + } + + // Tests for checkAndUpdateParentGpuDeviceState methods + @Test + public void testCheckAndUpdateParentGpuDeviceState_NullParentId() { + // Should not throw exception and not call any DAO methods + gpuService.checkAndUpdateParentGpuDeviceState((Long) null); + + verify(gpuDeviceDao, never()).findById(any()); + verify(gpuDeviceDao, never()).listByParentGpuDeviceId(any()); + verify(gpuDeviceDao, never()).update(anyLong(), any(GpuDeviceVO.class)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_ValidParentId() { + Long parentDeviceId = PARENT_GPU_DEVICE_ID; + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(parentDeviceId); + when(parentDevice.getState()).thenReturn(GpuDevice.State.Free); + + when(gpuDeviceDao.findById(parentDeviceId)).thenReturn(parentDevice); + when(gpuDeviceDao.listByParentGpuDeviceId(parentDeviceId)).thenReturn(new ArrayList<>()); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDeviceId); + + verify(gpuDeviceDao).findById(parentDeviceId); + verify(gpuDeviceDao).listByParentGpuDeviceId(parentDeviceId); + // Should not update since no child devices and already Free + verify(gpuDeviceDao, never()).update(eq(parentDeviceId), any(GpuDeviceVO.class)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_NullParentDevice() { + // Should not throw exception and not call any DAO methods + gpuService.checkAndUpdateParentGpuDeviceState((GpuDeviceVO) null); + + verify(gpuDeviceDao, never()).listByParentGpuDeviceId(any()); + verify(gpuDeviceDao, never()).update(anyLong(), any(GpuDeviceVO.class)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_NoChildDevices() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.PartiallyAllocated); + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(new ArrayList<>()); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.Free); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_AllFreeChildDevices() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.PartiallyAllocated); + + // Create child devices all in Free state + List childDevices = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + GpuDeviceVO child = mock(GpuDeviceVO.class); + when(child.getState()).thenReturn(GpuDevice.State.Free); + childDevices.add(child); + } + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.Free); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_SomeAllocatedChildDevices() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.Free); + + // Create child devices with mixed states - some Free, some Allocated + List childDevices = new ArrayList<>(); + + GpuDeviceVO child1 = mock(GpuDeviceVO.class); + when(child1.getState()).thenReturn(GpuDevice.State.Free); + childDevices.add(child1); + + GpuDeviceVO child2 = mock(GpuDeviceVO.class); + when(child2.getState()).thenReturn(GpuDevice.State.Allocated); + childDevices.add(child2); + + GpuDeviceVO child3 = mock(GpuDeviceVO.class); + when(child3.getState()).thenReturn(GpuDevice.State.Free); + childDevices.add(child3); + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.PartiallyAllocated); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_ErrorChildDevices() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.Free); + + // Create child devices with mixed states - Error should take priority + List childDevices = new ArrayList<>(); + + GpuDeviceVO child1 = mock(GpuDeviceVO.class); + when(child1.getState()).thenReturn(GpuDevice.State.Free); + childDevices.add(child1); + + GpuDeviceVO child2 = mock(GpuDeviceVO.class); + when(child2.getState()).thenReturn(GpuDevice.State.Allocated); + childDevices.add(child2); + + GpuDeviceVO child3 = mock(GpuDeviceVO.class); + when(child3.getState()).thenReturn(GpuDevice.State.Error); + childDevices.add(child3); + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.Error); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_AllAllocatedChildDevices() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.Free); + + // Create child devices all in Allocated state + List childDevices = new ArrayList<>(); + for (int i = 0; i < 2; i++) { + GpuDeviceVO child = mock(GpuDeviceVO.class); + when(child.getState()).thenReturn(GpuDevice.State.Allocated); + childDevices.add(child); + } + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.PartiallyAllocated); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_NoStateChange() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.Free); + + // Create child devices all in Free state - should not change parent state + List childDevices = new ArrayList<>(); + for (int i = 0; i < 2; i++) { + GpuDeviceVO child = mock(GpuDeviceVO.class); + when(child.getState()).thenReturn(GpuDevice.State.Free); + childDevices.add(child); + } + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice, never()).setState(any()); + verify(gpuDeviceDao, never()).update(anyLong(), any(GpuDeviceVO.class)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_MultipleErrorStates() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.PartiallyAllocated); + + // Create child devices with multiple Error states + List childDevices = new ArrayList<>(); + + GpuDeviceVO child1 = mock(GpuDeviceVO.class); + when(child1.getState()).thenReturn(GpuDevice.State.Error); + childDevices.add(child1); + + GpuDeviceVO child2 = mock(GpuDeviceVO.class); + childDevices.add(child2); + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.Error); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } + + @Test + public void testCheckAndUpdateParentGpuDeviceState_ErrorTakesPriority() { + GpuDeviceVO parentDevice = mock(GpuDeviceVO.class); + when(parentDevice.getId()).thenReturn(PARENT_GPU_DEVICE_ID); + when(parentDevice.getState()).thenReturn(GpuDevice.State.Free); + + // Create child devices - should break on first Error state found + List childDevices = new ArrayList<>(); + + GpuDeviceVO child1 = mock(GpuDeviceVO.class); + when(child1.getState()).thenReturn(GpuDevice.State.Allocated); + childDevices.add(child1); + + GpuDeviceVO child2 = mock(GpuDeviceVO.class); + when(child2.getState()).thenReturn(GpuDevice.State.Error); + childDevices.add(child2); + + // This child should not affect the final state since Error was found first + GpuDeviceVO child3 = mock(GpuDeviceVO.class); + childDevices.add(child3); + + when(gpuDeviceDao.listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID)).thenReturn(childDevices); + when(gpuDeviceDao.update(eq(PARENT_GPU_DEVICE_ID), any(GpuDeviceVO.class))).thenReturn(true); + + gpuService.checkAndUpdateParentGpuDeviceState(parentDevice); + + verify(gpuDeviceDao).listByParentGpuDeviceId(PARENT_GPU_DEVICE_ID); + verify(parentDevice).setState(GpuDevice.State.Error); + verify(gpuDeviceDao).update(eq(PARENT_GPU_DEVICE_ID), eq(parentDevice)); + } +} diff --git a/setup/db/create-schema-simulator.sql b/setup/db/create-schema-simulator.sql index 6cb6786311ae..a723ab119ab6 100644 --- a/setup/db/create-schema-simulator.sql +++ b/setup/db/create-schema-simulator.sql @@ -22,6 +22,7 @@ DROP TABLE IF EXISTS `simulator`.`mockstoragepool`; DROP TABLE IF EXISTS `simulator`.`mockvm`; DROP TABLE IF EXISTS `simulator`.`mockvolume`; DROP TABLE IF EXISTS `simulator`.`mocksecurityrules`; +DROP TABLE IF EXISTS `simulator`.`mockgpudevice`; CREATE TABLE `simulator`.`mockhost` ( `id` bigint unsigned NOT NULL auto_increment, @@ -127,3 +128,30 @@ CREATE TABLE `simulator`.`mocksecurityrules` ( INDEX `i_mocksecurityrules__vmid`(`vmid`), INDEX `i_mocksecurityrules__hostid`(`hostid`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; + + +-- Mock GPU Devices for Simulator +CREATE TABLE IF NOT EXISTS `simulator`.`mockgpudevice` ( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `bus_address` varchar(64) NOT NULL, + `vendor_id` varchar(32) NOT NULL, + `device_id` varchar(32) NOT NULL, + `numa_node` int unsigned NOT NULL, + `pci_root` varchar(64) NOT NULL, + `vendor_name` varchar(128) NOT NULL, + `device_name` varchar(128) NOT NULL, + `host_id` bigint unsigned DEFAULT NULL, + `vm_id` bigint unsigned DEFAULT NULL, + `max_vgpu_per_pgpu` bigint unsigned NOT NULL DEFAULT 1, + `video_ram` bigint unsigned NOT NULL DEFAULT 0, + `max_resolution_x` bigint unsigned NOT NULL DEFAULT 0, + `max_resolution_y` bigint unsigned NOT NULL DEFAULT 0, + `max_heads` bigint unsigned NOT NULL DEFAULT 0, + `state` varchar(32) DEFAULT 'Available', + `device_type` varchar(32) DEFAULT 'PCI', + `parent_device_id` bigint unsigned DEFAULT NULL, + `profile_name` varchar(128) DEFAULT NULL, + `passthrough_enabled` tinyint(1) DEFAULT 1, + PRIMARY KEY (`id`), + UNIQUE KEY `uk_mockgpudevice__bus_address` (`bus_address`, `host_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/test/integration/smoke/test_deploy_vgpu_enabled_vm.py b/test/integration/smoke/test_deploy_vgpu_enabled_vm.py index 2808a3ce06e6..d7994e3fe8d5 100644 --- a/test/integration/smoke/test_deploy_vgpu_enabled_vm.py +++ b/test/integration/smoke/test_deploy_vgpu_enabled_vm.py @@ -24,7 +24,7 @@ # base - contains all resources as entities and defines create, delete, # list operations on them -from marvin.lib.base import Account, VirtualMachine, ServiceOffering, NetworkOffering, Network, Template +from marvin.lib.base import Account, Host, Capacities, VirtualMachine, ServiceOffering, NetworkOffering, Network, Template, GpuDevice # utils - utility classes for common cleanup, external library wrappers etc from marvin.lib.utils import cleanup_resources, get_hypervisor_type, validateList @@ -44,33 +44,34 @@ class TestDeployvGPUenabledVM(cloudstackTestCase): Test deploy a vGPU enabled VM into a user account """ @classmethod - def setUpClass(self): - testClient = super(TestDeployvGPUenabledVM, self).getClsTestClient() - self.apiclient = testClient.getApiClient() - self.testdata = self.testClient.getParsedTestDataConfig() - self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ - self._cleanup = [] - self.unsupportedHypervisor = False - self.noSuitableHost = False - # Need to add check whether zone containing the xen hypervisor or not - # as well + def setUpClass(cls): + testClient = super(TestDeployvGPUenabledVM, cls).getClsTestClient() + cls.apiclient = testClient.getApiClient() + cls.testdata = cls.testClient.getParsedTestDataConfig() + cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + cls._cleanup = [] + cls.unsupportedHypervisor = False + cls.noSuitableHost = False + cls.hypervisor = testClient.getHypervisorInfo() + + if cls.hypervisor.lower() not in ["xenserver", "vmware", "kvm", "simulator"]: + cls.unsupportedHypervisor = True + cls.skipTest("Skipping test because suitable hypervisor/host not present") hosts = list_hosts( - self.apiclient, - hypervisor="XenServer" + cls.apiclient ) if hosts is None: - # GPU feature is supported only on XenServer.Check listhosts response - self.unsupportedHypervisor = True + cls.unsupportedHypervisor = True return - else: - gpuhosts = 0 + gpuhosts = 0 + if cls.hypervisor.lower() in ["xenserver"]: for ghost in hosts: - if ghost.hypervisorversion >= "6.2.0": + if ghost and ghost.hypervisorversion >= "6.2.0": sshClient = SshClient( host=ghost.ipaddress, - port=self.testdata['configurableData']['host']["publicport"], - user=self.hostConfig['username'], - passwd=self.hostConfig['password']) + port=cls.testdata['configurableData']['host']["publicport"], + user=cls.hostConfig['username'], + passwd=cls.hostConfig['password']) if ghost.hypervisorversion == "6.2.0": res = sshClient.execute( "xe patch-list uuid=0850b186-4d47-11e3-a720-001b2151a503") @@ -82,9 +83,25 @@ def setUpClass(self): gpuhosts = gpuhosts + 1 else: continue + elif cls.hypervisor.lower() == "kvm" or cls.hypervisor.lower() == "simulator": + # Check if the host has a GPU + for host in hosts: + h = Host(host.__dict__) + h.discoverGpuDevices(cls.apiclient) + + hosts = list_hosts( + cls.apiclient + ) + for host in hosts: + if host.gputotal != None and host.gputotal > 0: + gpuhosts = gpuhosts + 1 + elif cls.hypervisor.lower() in ["vmware"]: + # + cls.noSuitableHost = True + if gpuhosts == 0: # No XenServer available with GPU Drivers installed - self.noSuitableHost = True + cls.noSuitableHost = True return def setUp(self): @@ -92,11 +109,10 @@ def setUp(self): self.apiclient = self.testClient.getApiClient() self.dbclient = self.testClient.getDbConnection() if self.noSuitableHost or self.unsupportedHypervisor: - self.hypervisor = get_hypervisor_type(self.apiclient) - if self.hypervisor.lower() not in ["vmware"]: - self.skipTest("Skipping test because suitable hypervisor/host not\ - present") - self.testdata = self.testClient.getParsedTestDataConfig() + self.skipTest("Skipping test because suitable hypervisor/host not\ + present") + self.hypervisor = get_hypervisor_type(self.apiclient) + self.testdata = self.testClient.getParsedTestDataConfig() self.cleanup = [] @@ -110,6 +126,8 @@ def setUp(self): domainid=self.domain.id ) + self.gpu_capacity_total, self.gpu_capacity_used = self.get_gpu_capacity() + if self.hypervisor.lower() in ["xenserver"]: # Before running this test for Xen Server, register a windows template with ostype as @@ -123,39 +141,39 @@ def setUp(self): self.testdata["mode"] = self.zone.networktype if self.template == FAILED: - assert False, "get_template() failed to return template with description %s" % self.testdata[ - "ostype"] + assert False, "get_template() failed to return template with description %s" % self.testdata[ + "ostype"] self.testdata["small"]["zoneid"] = self.zone.id self.testdata["small"]["template"] = self.template.id self.testdata["service_offerings"]["vgpu260qwin"]["serviceofferingdetails"] = [ - { - 'pciDevice': 'Group of NVIDIA Corporation GK107GL [GRID K1] GPUs'}, { - 'vgpuType': 'GRID K120Q'}] - # create a service offering + { + 'pciDevice': 'Group of NVIDIA Corporation GK107GL [GRID K1] GPUs'}, { + 'vgpuType': 'GRID K120Q'}] + # create a service offering self.service_offering = ServiceOffering.create( - self.apiclient, - self.testdata["service_offerings"]["vgpu260qwin"], - ) + self.apiclient, + self.testdata["service_offerings"]["vgpu260qwin"], + ) self.cleanup.append(self.service_offering) elif self.hypervisor.lower() in ["vmware"]: self.testdata["isolated_network"]["zoneid"] = self.zone.id self.userapiclient = self.testClient.getUserApiClient( - UserName=self.account.name, - DomainName=self.account.domain - ) + UserName=self.account.name, + DomainName=self.account.domain + ) self.service_offering = ServiceOffering.create( - self.apiclient, - self.testdata["service_offering"]) + self.apiclient, + self.testdata["service_offering"]) # Create Shared Network Offering self.isolated_network_offering = NetworkOffering.create( - self.apiclient, - self.testdata["isolated_network_offering"]) - # Enable Isolated Network offering + self.apiclient, + self.testdata["isolated_network_offering"]) + # Enable Isolated Network offering self.isolated_network_offering.update(self.apiclient, state='Enabled') # Register a private template in the account with nic adapter vmxnet3 @@ -169,9 +187,51 @@ def setUp(self): details=[{"mks.enable3d" : "true", "mks.use3dRenderer" : "automatic", "svga.autodetect" : "false", "svga.vramSize" : "131072"}] ) + elif self.hypervisor.lower() in ["kvm", "simulator"]: + self.template = get_template( + self.apiclient, + self.zone.id, + self.testdata["ostype"]) + self.cleanup.append(self.template) + + # 1. Fetch available vgpu profile IDs from gpu devices on hosts + # 2. Create a service offering with vgpu profile ID + vgpu_profile_id_count_map = {} + vgpu_profile_id_device_map = {} + devices = GpuDevice.list(self.apiclient) + for device in devices: + if (device.state.lower() == "free" and device.managedstate.lower() == "managed" and device.gpudevicetype.lower() != "vgpuonly"): + vgpu_profile_id_count_map[device.vgpuprofileid] = vgpu_profile_id_count_map.get(device.vgpuprofileid, 0) + 1 + vgpu_profile_id_device_map[device.vgpuprofileid] = device + if len(vgpu_profile_id_count_map) == 0: + self.skipTest("No GPU devices found on the host with state 'Free' and managed state 'Managed'") + else: + self.vgpu_profile_id = max(vgpu_profile_id_count_map, key=vgpu_profile_id_count_map.get) + self.vgpu_profile_name = vgpu_profile_id_device_map[self.vgpu_profile_id].vgpuprofilename + self.gpu_card_id = vgpu_profile_id_device_map[self.vgpu_profile_id].gpucardid + self.gpu_card_name = vgpu_profile_id_device_map[self.vgpu_profile_id].gpucardname + self.testdata["service_offering"]["vgpuprofileid"] = self.vgpu_profile_id + self.testdata["service_offering"]["gpucount"] = 1 + + self.service_offering = ServiceOffering.create( + self.apiclient, + self.testdata["service_offering"]) + self.cleanup.append(self.service_offering) + else: + self.skipTest("Skipping test because suitable hypervisor/host not\ + present") - @attr(tags=['advanced', 'basic', 'vgpu'], required_hardware="true") + def get_gpu_capacity(self): + """Get GPU capacity for the host + """ + capacities = Capacities.list(self.apiclient, fetchlatest=True) + for c in capacities: + if c.name == "GPU": + return c.capacitytotal, c.capacityused + return 0, 0 + + @attr(tags=['advanced', 'basic', 'vgpu']) def test_deploy_vgpu_enabled_vm(self): """Test Deploy Virtual Machine @@ -180,18 +240,20 @@ def test_deploy_vgpu_enabled_vm(self): # 2. Virtual Machine is vGPU enabled (via SSH) # 3. listVirtualMachines returns accurate information """ - if self.hypervisor.lower() not in ["xenserver"]: + if self.hypervisor.lower() not in ["xenserver", "kvm", "simulator"]: self.cleanup.append(self.account) self.skipTest("This test case is written specifically\ - for XenServer hypervisor") + for XenServer, KVM & Simulator hypervisor") self.virtual_machine = VirtualMachine.create( self.apiclient, - self.testdata["small"], + self.testdata["virtual_machine"], + hypervisor=self.hypervisor, accountid=self.account.name, domainid=self.account.domainid, serviceofferingid=self.service_offering.id, - mode=self.testdata['mode'] + templateid=self.template.id, + zoneid=self.zone.id ) self.cleanup.append(self.virtual_machine) @@ -199,10 +261,7 @@ def test_deploy_vgpu_enabled_vm(self): self.apiclient, id=self.virtual_machine.id) - self.debug( - "Verify listVirtualMachines response for virtual machine: %s" - % self.virtual_machine.id - ) + self.debug("Verify listVirtualMachines response for virtual machine: %s" % self.virtual_machine.id) self.assertEqual( isinstance(list_vms, list), @@ -231,31 +290,53 @@ def test_deploy_vgpu_enabled_vm(self): "Running", msg="VM is not in Running state" ) - hosts = list_hosts( - self.apiclient, - id=vm.hostid + + # Check capacity changes + total, used = self.get_gpu_capacity() + + self.assertEqual( + total, + self.gpu_capacity_total, + "Total GPU capacity did not change after VM deployment" ) - hostip = hosts[0].ipaddress - try: - sshClient = SshClient( - host=hostip, - port=self.testdata['configurableData']['host']["publicport"], - user=self.testdata['configurableData']['host']["username"], - passwd=self.testdata['configurableData']['host']["password"]) - res = sshClient.execute( - "xe vgpu-list vm-name-label=%s params=type-uuid %s" % - (vm.instancename)) - self.debug("SSH result: %s" % res) - except Exception as e: - self.fail("SSH Access failed for %s: %s" % - (hostip, e) - ) - result = str(res) self.assertEqual( - result.count("type-uuid"), - 1, - "VM is vGPU enabled." + used, + self.gpu_capacity_used + 1, + "Used GPU capacity did not change by 1 after VM deployment" ) + + if self.hypervisor.lower() in ["xenserver"]: + hosts = list_hosts( + self.apiclient, + id=vm.hostid + ) + hostip = hosts[0].ipaddress + try: + sshClient = SshClient( + host=hostip, + port=self.testdata['configurableData']['host']["publicport"], + user=self.testdata['configurableData']['host']["username"], + passwd=self.testdata['configurableData']['host']["password"]) + res = sshClient.execute( + "xe vgpu-list vm-name-label=%s params=type-uuid %s" % + (vm.instancename)) + self.debug("SSH result: %s" % res) + except Exception as e: + self.fail("SSH Access failed for %s: %s" % + (hostip, e) + ) + result = str(res) + self.assertEqual( + result.count("type-uuid"), + 1, + "VM is vGPU enabled." + ) + elif self.hypervisor.lower() in ["kvm", "simulator"]: + self.assertEqual(self.virtual_machine.vgpuprofileid, self.vgpu_profile_id, "VM is vGPU enabled.") + self.assertEqual(self.virtual_machine.gpucount, 1, "VM is vGPU enabled.") + self.assertEqual(self.virtual_machine.gpucardid, self.gpu_card_id, "VM is vGPU enabled.") + self.assertEqual(self.virtual_machine.gpucardname, self.gpu_card_name, "VM is vGPU enabled.") + self.assertEqual(self.virtual_machine.vgpuprofilename, self.vgpu_profile_name, "VM is vGPU enabled.") self.cleanup.append(self.account) def tearDown(self): diff --git a/tools/apidoc/gen_toc.py b/tools/apidoc/gen_toc.py index d34d5480efcc..bcaaca2e39a4 100644 --- a/tools/apidoc/gen_toc.py +++ b/tools/apidoc/gen_toc.py @@ -266,6 +266,8 @@ 'createGuiTheme': 'GUI Theme', 'updateGuiTheme': 'GUI Theme', 'removeGuiTheme': 'GUI Theme', + 'Gpu': 'GPU', + 'Vgpu': 'GPU', 'Extension' : 'Extension', 'Extensions' : 'Extension', 'CustomAction' : 'Extension', diff --git a/tools/marvin/marvin/lib/base.py b/tools/marvin/marvin/lib/base.py index 97bfe43896a2..0c9843f43993 100755 --- a/tools/marvin/marvin/lib/base.py +++ b/tools/marvin/marvin/lib/base.py @@ -2714,6 +2714,13 @@ def create(cls, apiclient, services, tags=None, domainid=None, cacheMode=None, * if "diskofferingid" in services: cmd.diskofferingid = services["diskofferingid"] + if "vgpuprofileid" in services: + cmd.vgpuprofileid = services["vgpuprofileid"] + cmd.gpucount = 1 + + if "gpucount" in services: + cmd.gpucount = services["gpucount"] + # Service Offering private to that domain if domainid: cmd.domainid = domainid @@ -3334,6 +3341,14 @@ def delete(self, apiclient): apiclient.deleteHost(cmd) return + def discoverGpuDevices(self, apiclient): + """Discover GPU devices on the host""" + # Host must be in maintenance mode before deletion + cmd = discoverGpuDevices.discoverGpuDevicesCmd() + cmd.id = self.id + apiclient.discoverGpuDevices(cmd) + return + @classmethod def enableMaintenance(cls, apiclient, id): """enables maintenance mode Host""" @@ -7831,3 +7846,83 @@ def changediskoffering(self, apiclient, diskofferingid=None, size=None): cmd.diskofferingid = diskofferingid cmd.size = size return (apiclient.changeSharedFileSystemDiskOffering(cmd)) + +class GpuDevice: + + def __init__(self, items): + self.__dict__.update(items) + + """Manage GPU Device""" + @classmethod + def create(cls, apiclient, services, name, description=None, hostid=None, busaddress=None, gpuCardId=None, vgpuProfileId=None, type=None, parentGpuDeviceId=None, numaNode=None, pciRoot=None): + """Create GPU Device""" + cmd = createGpuDevice.createGpuDeviceCmd() + cmd.name = name + + if description: + cmd.description = description + + + if hostid: + cmd.hostid = hostid + elif "hostid" in services: + cmd.hostid = services["hostid"] + + if busaddress: + cmd.busaddress = busaddress + elif "busaddress" in services: + cmd.busaddress = services["busaddress"] + + if gpuCardId: + cmd.gpuCardId = gpuCardId + elif "gpuCardId" in services: + cmd.gpuCardId = services["gpuCardId"] + + if vgpuProfileId: + cmd.vgpuProfileId = vgpuProfileId + elif "vgpuProfileId" in services: + cmd.vgpuProfileId = services["vgpuProfileId"] + + if type: + cmd.type = type + elif "type" in services: + cmd.type = services["type"] + + if parentGpuDeviceId: + cmd.parentGpuDeviceId = parentGpuDeviceId + elif "parentGpuDeviceId" in services: + cmd.parentGpuDeviceId = services["parentGpuDeviceId"] + + if numaNode: + cmd.numaNode = numaNode + elif "numaNode" in services: + cmd.numaNode = services["numaNode"] + + if pciRoot: + cmd.pciRoot = pciRoot + elif "pciRoot" in services: + cmd.pciRoot = services["pciRoot"] + + return GpuDevice(apiclient.createGpuDevice(cmd).__dict__) + + def delete(self, apiclient, expunge=True, forced=True): + """Delete GPU Device""" + cmd = deleteGpuDevice.deleteGpuDeviceCmd() + cmd.id = self.id + cmd.expunge = expunge + cmd.forced = forced + apiclient.deleteGpuDevice(cmd) + + + @classmethod + def list(cls, apiclient, **kwargs): + cmd = listGpuDevices.listGpuDevicesCmd() + [setattr(cmd, k, v) for k, v in list(kwargs.items())] + return (apiclient.listGpuDevices(cmd)) + + def update(self, apiclient, **kwargs): + """Update GPU Device""" + cmd = updateGpuDevice.updateGpuDeviceCmd() + cmd.id = self.id + [setattr(cmd, k, v) for k, v in list(kwargs.items())] + return (apiclient.updateGpuDevice(cmd)) diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 4ec03d375a37..e644acb4e07a 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -89,6 +89,7 @@ "label.action.delete.egress.firewall": "Delete egress firewall rule", "label.action.delete.firewall": "Delete firewall rule", "label.action.delete.interface.static.route": "Remove Tungsten Fabric interface static route", +"label.action.delete.gpu.card": "Delete GPU card", "label.action.delete.guest.os": "Delete guest OS", "label.action.delete.guest.os.category": "Delete guest OS category", "label.action.delete.guest.os.hypervisor.mapping": "Delete guest OS hypervisor mapping", @@ -110,6 +111,7 @@ "label.action.delete.template": "Delete Template", "label.action.delete.tungsten.router.table": "Remove Tungsten Fabric route table from Network", "label.action.delete.user": "Delete User", +"label.action.delete.vgpu.profile": "Delete vGPU profile", "label.action.delete.volume": "Delete volume", "label.action.delete.zone": "Delete zone", "label.action.destroy.instance": "Destroy Instance", @@ -268,6 +270,8 @@ "label.add.f5.device": "Add F5 device", "label.add.firewall": "Add firewall rule", "label.add.firewallrule": "Add Firewall Rule", +"label.add.gpu.card": "Add GPU card", +"label.add.gpu.device": "Add GPU device", "label.add.guest.network": "Add guest Network", "label.add.guest.os": "Add guest OS", "label.add.guest.os.category": "Add guest OS category", @@ -339,6 +343,7 @@ "label.add.user": "Add User", "label.add.upstream.ipv4.routes": "Add upstream IPv4 routes", "label.add.upstream.ipv6.routes": "Add upstream IPv6 routes", +"label.add.vgpu.profile": "Add profile", "label.add.vm": "Add Instance", "label.add.vms": "Add Instances", "label.add.vmware.datacenter": "Add VMware datacenter", @@ -538,6 +543,7 @@ "label.cks.cluster.worker.nodes.templateid": "Template for Worker Nodes", "label.cleanup": "Clean up", "label.clear": "Clear", +"label.clear.all": "Clear all", "label.clear.list": "Clear list", "label.clear.notification": "Clear notification", "label.clientid": "Provider Client ID", @@ -625,6 +631,7 @@ "label.copy.password": "Copy password", "label.core": "Core", "label.core.zone.type": "Core zone type", +"label.count": "Count", "label.counter": "Counter", "label.counter.name": "Name of the counter for which the policy will be evaluated", "label.cpu": "CPU", @@ -829,6 +836,7 @@ "label.desttaguuid": "Destination Tag", "label.details": "Details", "label.deviceid": "Device ID", +"label.devicename": "Device Name", "label.devices": "Devices", "label.dhcp": "DHCP", "label.direct.attached.public.ip": "Direct attached public IP", @@ -847,6 +855,7 @@ "label.disable.webhook": "Disable Webhook", "label.disabled": "Disabled", "label.disconnected": "Last disconnected", +"label.discover.gpu.devices": "Discover GPU devices", "label.disk": "Disk", "label.disk.offerings": "Disk offerings", "label.disk.path": "Disk Path", @@ -1090,6 +1099,26 @@ "label.glustervolume": "Volume", "label.go.back": "Go back", "label.gpu": "GPU", +"label.gpucardid": "GPU Card", +"label.gpucardname": "GPU Card", +"label.gpu.card": "GPU Card", +"label.gpu.card.types": "GPU Card Types", +"label.gpu.count": "GPU Count", +"label.gpucount": "GPU Count", +"label.gpu.device": "GPU Device", +"label.gpu.devices": "GPU Devices", +"label.gpu.enabled": "GPU Enabled", +"label.gpuenabled": "GPU Enabled", +"label.gpudevicetype": "Device Type", +"label.gpu.devices.add": "Add GPU Device", +"label.gpu.devices.delete": "Delete GPU Device", +"label.gpu.devices.manage": "Manage GPU Device", +"label.gpu.devices.unmanage": "Unmanage GPU Device", +"label.gpu.display": "GPU Display", +"label.gpulimit": "GPU limits", +"label.gpu.summary": "Summary", +"label.gputotal": "GPU Total", +"label.gpuused": "GPU Used", "label.chart.info": "Information about the charts", "label.group": "Group", "label.group.optional": "Group (Optional)", @@ -1472,7 +1501,9 @@ "label.maxcpunumber": "Max CPU cores", "label.maxdatavolumeslimit": "Max data volumes limit", "label.maxerrorretry": "Max error retry", +"label.maxgpu": "Max. GPUs", "label.maxguestslimit": "Max guest limit", +"label.maxheads": "Max. heads", "label.maxhostspercluster": "Max hosts per cluster", "label.maximum": "Maximum", "label.maxinstance": "Max Instances", @@ -1484,12 +1515,15 @@ "label.maxprimarystorage": "Max. primary storage (GiB)", "label.maxproject": "Max. projects", "label.maxpublicip": "Max. public IPs", +"label.maxresolutionx": "Max. resolution X", +"label.maxresolutiony": "Max. resolution Y", "label.maxsecondarystorage": "Max. secondary storage (GiB)", "label.maxsize": "Maximum size", "label.maxsnapshot": "Max. Snapshots", "label.maxtemplate": "Max. Templates", "label.maxuservm": "Max. User Instances", "label.maxvolume": "Max. volumes", +"label.maxvgpuperphysicalgpu": "Max. vGPUs per physical GPU", "label.maxvpc": "Max. VPCs", "label.may.continue": "You may now continue.", "label.mb.memory": "MB memory", @@ -1673,6 +1707,7 @@ "label.nsx.supports.internal.lb": "Enable NSX internal LB service", "label.nsx.supports.lb": "Enable NSX LB service", "label.num.cpu.cores": "# of CPU cores", +"label.numanode": "NUMA node", "label.number": "#Rule", "label.numretries": "Number of retries", "label.nvpdeviceid": "ID", @@ -1759,6 +1794,7 @@ "label.param.name": "Parameter name", "label.param.value": "Parameter value", "label.parentdomainname": "Parent domain", +"label.parentgpudeviceid": "Parent GPU device", "label.parentname": "Parent", "label.parentsubnet": "Parent Subnet", "label.passive": "Passive", @@ -2033,6 +2069,7 @@ "label.reset.userdata.on.autoscale.vm.group": "Reset Userdata on AutoScale VM Group", "label.reset.userdata.on.vm": "Reset Userdata on Instance", "label.reset.vpn.connection": "Reset VPN connection", +"label.resolution": "Resolution", "label.resource": "Resource", "label.resource.limit.exceeded": "Resource limit exceeded", "label.resource.name": "Resource name", @@ -2141,6 +2178,8 @@ "label.see.more.info.shown.charts": "See more info about the shown charts", "label.select-view": "Select view", "label.select.a.zone": "Select a zone", +"label.select.all": "Select all", +"label.select.columns": "Select columns", "label.select.deployment.infrastructure": "Select deployment infrastructure", "label.select.guest.os.type": "Please select the guest OS type", "label.select.network": "Select Network", @@ -2198,6 +2237,7 @@ "label.sharedrouteripv6": "IPv6 address for the VR in this shared Network.", "label.sharewith": "Share with", "label.showing": "Showing", +"label.show.only.gpu.enabled.offerings": "Show only GPU enabled offerings", "label.show.usage.records": "Show usage records", "label.showing.results.for": "Showing results for \"%x\"", "label.shrinkok": "Shrink OK", @@ -2504,6 +2544,8 @@ "label.update.autoscale.vmgroup": "Update AutoScaling Group", "label.update.bgp.peer": "Update BGP peer", "label.update.condition": "Update condition", +"label.update.gpu.device": "Update GPU device", +"label.update.vgpu.profile": "Update vGPU profile", "label.update.custom.action": "Update Custom Action", "label.update.extension": "Update Extension", "label.update.sharedfs": "Update Shared FileSystem", @@ -2583,11 +2625,18 @@ "label.vcenterpassword": "vCenter password", "label.vcenterusername": "vCenter username", "label.vcsdeviceid": "ID", +"label.vendorid": "Vendor ID", +"label.vendorname": "Vendor Name", "label.verify": "Verify", "label.version": "Version", "label.versions": "Versions", -"label.vgpu": "VGPU", +"label.vgpu": "Profile", +"label.vgpuprofileid": "Profile", +"label.vgpuprofileids": "Profile", +"label.vgpuprofilename": "Profile", +"label.vgpu.profile": "GPU Profile", "label.vgputype": "vGPU type", +"label.videoram": "Video RAM", "label.view": "View", "label.view.all": "View all", "label.view.console": "View console", @@ -2704,6 +2753,7 @@ "label.vpn.users": "VPN Users", "label.vpncustomergateway": "IP address of the remote gateway", "label.vpncustomergatewayid": "VPN customer gateway", +"label.vramsize": "VRAM Size", "label.vsmipaddress": "Nexus 1000v IP address", "label.vsmpassword": "Nexus 1000v password", "label.vsmusername": "Nexus 1000v username", @@ -2751,6 +2801,7 @@ "label.zones": "Zones", "label.zonewizard.traffictype.storage": "Storage: Traffic between primary and secondary storage servers, such as Instance Templates and Snapshots.", "label.buckets": "Buckets", +"label.busaddress": "Address", "label.objectstorageid": "Object Storage Pool", "label.oobm.address": "Out-of-band management address", "label.oobm.driver": "Out-of-band management driver", @@ -2796,6 +2847,7 @@ "message.action.delete.external.load.balancer": "Please confirm that you would like to remove this external load balancer. Warning: If you are planning to add back the same external load balancer, you must reset usage data on the device.", "message.action.delete.ingress.rule": "Please confirm that you want to delete this ingress rule.", "message.action.delete.ipv4.subnet": "Please confirm that you want to delete this IPv4 subnet.", +"message.action.delete.gpu.card": "Please confirm that you want to delete this GPU card.", "message.action.delete.guest.os": "Please confirm that you want to delete this guest os. System defined entry cannot be deleted.", "message.action.delete.guest.os.category": "Please confirm that you want to delete this guest os category.", "message.action.delete.guest.os.hypervisor.mapping": "Please confirm that you want to delete this guest os hypervisor mapping. System defined entry cannot be deleted.", @@ -2814,6 +2866,7 @@ "message.action.delete.snapshot": "Please confirm that you want to delete this Snapshot.", "message.action.delete.template": "Please confirm that you want to delete this Template.", "message.action.delete.tungsten.router.table": "Please confirm that you want to remove Route Table from this Network?", +"message.action.delete.vgpu.profile": "Please confirm that you want to delete this vGPU profile.", "message.action.delete.volume": "Please confirm that you want to delete this volume. Note: this will not delete any Snapshots of this volume.", "message.action.delete.vpn.user": "Please confirm that you want to delete the VPN user.", "message.action.delete.zone": "Please confirm that you want to delete this zone.", @@ -2832,6 +2885,7 @@ "message.action.disable.pod": "Please confirm that you want to disable this pod.", "message.action.disable.static.nat": "Please confirm that you want to disable static NAT.", "message.action.disable.zone": "Please confirm that you want to disable this zone.", +"message.action.discover.gpu.devices": "Please confirm that you want to discover GPU devices.", "message.action.download.iso": "Please confirm that you want to download this ISO.", "message.action.download.snapshot": "Please confirm that you want to download this Snapshot.", "message.action.download.template": "Please confirm that you want to download this Template.", @@ -3031,6 +3085,8 @@ "message.confirm.delete.brocadevcs": "Please confirm that you would like to delete Brocade Vcs Switch.", "message.confirm.delete.ciscoasa1000v": "Please confirm you want to delete CiscoASA1000v.", "message.confirm.delete.ciscovnmc.resource": "Please confirm you want to delete CiscoVNMC resource.", +"message.confirm.delete.gpu.devices": "Please confirm that you would like to delete this GPU device?", +"message.confirm.delete.vgpu.profile": "Please confirm that you want to delete this vGPU profile?", "message.confirm.delete.f5": "Please confirm that you would like to delete F5.", "message.confirm.delete.internal.lb": "Please confirm you want to delete internal LB.", "message.confirm.delete.netscaler": "Please confirm that you would like to delete NetScaler.", @@ -3049,6 +3105,7 @@ "message.confirm.disable.storage": "Please confirm that you want to disable the storage pool.", "message.confirm.disable.vpc.offering": "Are you sure you want to disable this VPC offering?", "message.confirm.disable.webhook": "Please confirm that you want to disable this webhook.", +"message.confirm.discover.gpu.devices": "Please confirm that you want to discover GPU devices?", "message.confirm.enable.autoscale.vmgroup": "Please confirm that you want to enable this autoscaling group.", "message.confirm.enable.custom.action": "Please confirm that you want to enable this custom action.", "message.confirm.enable.extension": "Please confirm that you want to enable this extension.", @@ -3058,6 +3115,7 @@ "message.confirm.enable.storage": "Please confirm that you want to enable the storage pool.", "message.confirm.enable.vpc.offering": "Are you sure you want to enable this VPC offering?", "message.confirm.enable.webhook": "Please confirm that you want to enable this webhook.", +"message.confirm.manage.gpu.devices": "Please confirm that you want to manage the selected GPU devices?", "message.confirm.remove.firewall.rule": "Please confirm that you want to delete this Firewall Rule?", "message.confirm.remove.ip.range": "Please confirm that you would like to remove this IP range.", "message.confirm.remove.network.offering": "Are you sure you want to remove this Network offering?", @@ -3075,6 +3133,7 @@ "message.confirm.start.lb.vm": "Please confirm you want to start LB Instance.", "message.confirm.sync.storage": "Please confirm you want to sync the storage pool", "message.confirm.type": "To confirm, please type", +"message.confirm.unmanage.gpu.devices": "Please confirm that you want to unmanage the selected GPU devices?", "message.confirm.upgrade.router.newer.template": "Please confirm that you want to upgrade router to use newer Template.", "message.cpu.usage.info": "The CPU usage percentage can exceed 100% if the Instance has more than 1 vCPU or when CPU Cap is not enabled. This behavior happens according to the hypervisor being used (e.g: in KVM), due to how they account the stats", "message.create.bucket.failed": "Failed to create bucket.", @@ -3648,6 +3707,7 @@ "message.success.add.egress.rule": "Successfully added new egress rule", "message.success.add.firewall.rule": "Successfully added new firewall rule", "message.success.add.guest.network": "Successfully created guest Network", +"message.success.add.gpu.device": "Successfully added GPU device", "message.success.add.interface.static.route": "Successfully added interface Static Route", "message.success.add.iprange": "Successfully added IP range", "message.success.add.ipv4.subnet": "Successfully added IPv4 subnet", @@ -3718,6 +3778,7 @@ "message.success.delete.backup.schedule": "Successfully deleted configure Instance backup schedule", "message.success.delete.bgp.peer": "Successfully deleted BGP peer", "message.success.delete.custom.action": "Successfully deleted Custom Action", +"message.success.delete.gpu.devices": "Successfully deleted GPU device(s)", "message.success.delete.icon": "Successfully deleted icon of", "message.success.delete.interface.static.route": "Successfully removed interface Static Route", "message.success.delete.ipv4.subnet": "Successfully removed IPv4 subnet", @@ -3733,12 +3794,14 @@ "message.success.delete.vpn.gateway": "Successfully deleted VPN gateway", "message.success.disable.saml.auth": "Successfully disabled SAML authorization", "message.success.disable.vpn": "Successfully disabled VPN", +"message.success.discover.gpu.devices": "Successfully discovered GPU devices", "message.success.edit.acl": "Successfully edited ACL rule", "message.success.edit.primary.storage": "Successfully edited Primary Storage", "message.success.edit.rule": "Successfully edited rule", "message.success.enable.saml.auth": "Successfully enabled SAML Authorization", "message.success.import.instance": "Successfully imported Instance", "message.success.import.volume": "Successfully imported Volume", +"message.success.manage.gpu.devices": "Successfully managed GPU device(s)", "message.success.migrate.volume": "Successfully migrated volume", "message.success.migrating": "Migration completed successfully for", "message.success.migration": "Migration completed successfully", @@ -3772,6 +3835,7 @@ "message.success.reset.network.permissions": "Successfully reset Network Permissions", "message.success.resize.volume": "Successfully resized volume", "message.success.scale.kubernetes": "Successfully scaled Kubernetes cluster", +"message.success.unmanage.gpu.devices": "Successfully unmanaged GPU device(s)", "message.success.unmanage.instance": "Successfully unmanaged Instance", "message.success.unmanage.volume": "Successfully unmanaged Volume", "message.success.unregister.extension": "Successfull unregistered Extension", @@ -3779,6 +3843,10 @@ "message.success.update.bgp.peer": "Successfully updated BGP peer", "message.success.update.bucket": "Successfully updated bucket", "message.success.update.condition": "Successfully updated condition", +"message.success.update.gpu.device": "Successfully updated GPU device", +"message.success.create.vgpu.profile": "Successfully created vGPU profile", +"message.success.update.vgpu.profile": "Successfully updated vGPU profile", +"message.success.delete.vgpu.profile": "Successfully deleted vGPU profile", "message.success.update.custom.action": "Successfully updated Custom Action", "message.success.update.extension": "Successfully updated Extension", "message.success.update.sharedfs": "Successfully updated Shared FileSystem", @@ -3947,6 +4015,7 @@ "state.stopped": "Stopped", "state.stopping": "Stopping", "state.suspended": "Suspended", +"state.partiallyallocated": "Partially Allocated", "user.login": "Login", "user.logout": "Logout", "ALLOCATED_VM": "Allocated VM", diff --git a/ui/src/components/view/GPUDevicesTab.vue b/ui/src/components/view/GPUDevicesTab.vue new file mode 100644 index 000000000000..ee67a81b07d8 --- /dev/null +++ b/ui/src/components/view/GPUDevicesTab.vue @@ -0,0 +1,902 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + + + + diff --git a/ui/src/components/view/GPUSummaryTab.vue b/ui/src/components/view/GPUSummaryTab.vue new file mode 100644 index 000000000000..8b649e056629 --- /dev/null +++ b/ui/src/components/view/GPUSummaryTab.vue @@ -0,0 +1,301 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + + + + diff --git a/ui/src/components/view/GPUTab.vue b/ui/src/components/view/GPUTab.vue new file mode 100644 index 000000000000..d2cd2f3d2cb5 --- /dev/null +++ b/ui/src/components/view/GPUTab.vue @@ -0,0 +1,440 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + + + + diff --git a/ui/src/components/view/InfoCard.vue b/ui/src/components/view/InfoCard.vue index baedfc12170f..449172112a03 100644 --- a/ui/src/components/view/InfoCard.vue +++ b/ui/src/components/view/InfoCard.vue @@ -256,6 +256,26 @@ +

+
{{ $t('label.gpu') }}
+
+ + + {{ resource.gpucount ? resource.gpucount + ' x ' : '' }} + {{ resource.gpucardname}} + {{ resource.gpucardname }} + {{ ' (' + resource.vgpuprofilename + ')' }} + {{ ' (' + resource.vgpuprofilename + ')' }} + +
{{ ' [' + (resource.videoram ? (resource.videoram + 'MB') : '') + ((resource.videoram && resource.maxresolutionx && resource.maxresolutiony) ? ', ' : '') + + (resource.maxresolutionx && resource.maxresolutiony ? resource.maxresolutionx + 'x' + resource.maxresolutiony : '') + ']' }} +
+
+
+
{{ $t('label.memory') }}
@@ -321,6 +341,27 @@
+
+
{{ $t('label.gpu') }}
+
+ + {{ resource.gputotal + ' ' + $t('label.gpu') }} +
+
+ + + +
+
{{ $t('label.size') }}
{{ $t('label.disksize') }}
@@ -584,6 +625,17 @@ {{ resource.serviceofferingname || resource.serviceofferingid }} {{ resource.serviceofferingname || resource.serviceofferingid }} {{ resource.serviceofferingname || resource.serviceofferingid }} + + + + + +
diff --git a/ui/src/components/view/ListView.vue b/ui/src/components/view/ListView.vue index 8f9d17ff23bf..e5d843d4ce49 100644 --- a/ui/src/components/view/ListView.vue +++ b/ui/src/components/view/ListView.vue @@ -29,10 +29,20 @@ :style="{ 'overflow-y': this.$route.name === 'usage' ? 'hidden' : 'auto' }" >