diff --git a/data/adopters.yaml b/data/adopters.yaml new file mode 100644 index 0000000..61a4d28 --- /dev/null +++ b/data/adopters.yaml @@ -0,0 +1,7 @@ +adopters: + project: Kepler + description: > + Check out everything about Kepler on our [website](https://sustainable-computing.io/). + companies: + - name: Kepler + url: https://sustainable-computing.io/ diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 604d9f9..0000000 --- a/docs/README.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: readme -slug: /readme ---- \ No newline at end of file diff --git a/docs/FAQ/FAQ.md b/docs/en/FAQ/FAQ.md similarity index 100% rename from docs/FAQ/FAQ.md rename to docs/en/FAQ/FAQ.md diff --git a/blog/2024-12-18-support-blog-post/index.md b/docs/en/blog/2024-12-18-support-blog-post/index.md similarity index 100% rename from blog/2024-12-18-support-blog-post/index.md rename to docs/en/blog/2024-12-18-support-blog-post/index.md diff --git a/blog/2024-12-31-post/index.md b/docs/en/blog/2024-12-31-post/index.md similarity index 100% rename from blog/2024-12-31-post/index.md rename to docs/en/blog/2024-12-31-post/index.md diff --git a/changelog/source/authors.json b/docs/en/changelog/source/authors.json similarity index 100% rename from changelog/source/authors.json rename to docs/en/changelog/source/authors.json diff --git a/changelog/source/v2.5.0.md b/docs/en/changelog/source/v2.5.0.md similarity index 100% rename from changelog/source/v2.5.0.md rename to docs/en/changelog/source/v2.5.0.md diff --git a/changelog/source/v2.5.1.md b/docs/en/changelog/source/v2.5.1.md similarity index 100% rename from changelog/source/v2.5.1.md rename to docs/en/changelog/source/v2.5.1.md diff --git a/docs/contributor/adopters.md b/docs/en/contributor/adopters.md similarity index 100% rename from docs/contributor/adopters.md rename to docs/en/contributor/adopters.md diff --git a/docs/contributor/cherry-picks.md b/docs/en/contributor/cherry-picks.md similarity index 100% rename from docs/contributor/cherry-picks.md rename to docs/en/contributor/cherry-picks.md diff --git a/docs/contributor/contribute-docs.md b/docs/en/contributor/contribute-docs.md similarity index 100% rename from docs/contributor/contribute-docs.md rename to docs/en/contributor/contribute-docs.md diff --git a/docs/contributor/contributers.md b/docs/en/contributor/contributers.md similarity index 100% rename from docs/contributor/contributers.md rename to docs/en/contributor/contributers.md diff --git a/docs/contributor/contributing.md b/docs/en/contributor/contributing.md similarity index 100% rename from docs/contributor/contributing.md rename to docs/en/contributor/contributing.md diff --git a/docs/contributor/github-workflow.md b/docs/en/contributor/github-workflow.md similarity index 100% rename from docs/contributor/github-workflow.md rename to docs/en/contributor/github-workflow.md diff --git a/docs/contributor/goverance.md b/docs/en/contributor/goverance.md similarity index 100% rename from docs/contributor/goverance.md rename to docs/en/contributor/goverance.md diff --git a/docs/contributor/ladder.md b/docs/en/contributor/ladder.md similarity index 100% rename from docs/contributor/ladder.md rename to docs/en/contributor/ladder.md diff --git a/docs/contributor/lifted.md b/docs/en/contributor/lifted.md similarity index 100% rename from docs/contributor/lifted.md rename to docs/en/contributor/lifted.md diff --git a/docs/core-concepts/architecture.md b/docs/en/core-concepts/architecture.md similarity index 100% rename from docs/core-concepts/architecture.md rename to docs/en/core-concepts/architecture.md diff --git a/docs/core-concepts/introduction.md b/docs/en/core-concepts/introduction.md similarity index 100% rename from docs/core-concepts/introduction.md rename to docs/en/core-concepts/introduction.md diff --git a/docs/developers/Dynamic-mig.md b/docs/en/developers/Dynamic-mig.md similarity index 100% rename from docs/developers/Dynamic-mig.md rename to docs/en/developers/Dynamic-mig.md diff --git a/docs/developers/HAMi-core-design.md b/docs/en/developers/HAMi-core-design.md similarity index 100% rename from docs/developers/HAMi-core-design.md rename to docs/en/developers/HAMi-core-design.md diff --git a/docs/developers/build.md b/docs/en/developers/build.md similarity index 100% rename from docs/developers/build.md rename to docs/en/developers/build.md diff --git a/docs/developers/kunlunxin-topology.md b/docs/en/developers/kunlunxin-topology.md similarity index 100% rename from docs/developers/kunlunxin-topology.md rename to docs/en/developers/kunlunxin-topology.md diff --git a/docs/developers/mindmap.md b/docs/en/developers/mindmap.md similarity index 100% rename from docs/developers/mindmap.md rename to docs/en/developers/mindmap.md diff --git a/docs/developers/protocol.md b/docs/en/developers/protocol.md similarity index 100% rename from docs/developers/protocol.md rename to docs/en/developers/protocol.md diff --git a/docs/developers/scheduling.md b/docs/en/developers/scheduling.md similarity index 100% rename from docs/developers/scheduling.md rename to docs/en/developers/scheduling.md diff --git a/docs/get-started/deploy-with-helm.md b/docs/en/get-started/deploy-with-helm.md similarity index 100% rename from docs/get-started/deploy-with-helm.md rename to docs/en/get-started/deploy-with-helm.md diff --git a/docs/installation/aws-installation.md b/docs/en/installation/aws-installation.md similarity index 100% rename from docs/installation/aws-installation.md rename to docs/en/installation/aws-installation.md diff --git a/docs/installation/how-to-use-volcano-vgpu.md b/docs/en/installation/how-to-use-volcano-vgpu.md similarity index 100% rename from docs/installation/how-to-use-volcano-vgpu.md rename to docs/en/installation/how-to-use-volcano-vgpu.md diff --git a/docs/installation/offline-installation.md b/docs/en/installation/offline-installation.md similarity index 100% rename from docs/installation/offline-installation.md rename to docs/en/installation/offline-installation.md diff --git a/docs/installation/online-installation.md b/docs/en/installation/online-installation.md similarity index 100% rename from docs/installation/online-installation.md rename to docs/en/installation/online-installation.md diff --git a/docs/installation/prequisities.md b/docs/en/installation/prequisities.md similarity index 100% rename from docs/installation/prequisities.md rename to docs/en/installation/prequisities.md diff --git a/docs/installation/uninstall.md b/docs/en/installation/uninstall.md similarity index 100% rename from docs/installation/uninstall.md rename to docs/en/installation/uninstall.md diff --git a/docs/installation/upgrade.md b/docs/en/installation/upgrade.md similarity index 100% rename from docs/installation/upgrade.md rename to docs/en/installation/upgrade.md diff --git a/docs/installation/webui-installation.md b/docs/en/installation/webui-installation.md similarity index 100% rename from docs/installation/webui-installation.md rename to docs/en/installation/webui-installation.md diff --git a/docs/key-features/device-resource-isolation.md b/docs/en/key-features/device-resource-isolation.md similarity index 100% rename from docs/key-features/device-resource-isolation.md rename to docs/en/key-features/device-resource-isolation.md diff --git a/docs/key-features/device-sharing.md b/docs/en/key-features/device-sharing.md similarity index 100% rename from docs/key-features/device-sharing.md rename to docs/en/key-features/device-sharing.md diff --git a/docs/resources/HAMI-VGPU-mind-map-English.png b/docs/en/resources/HAMI-VGPU-mind-map-English.png similarity index 100% rename from docs/resources/HAMI-VGPU-mind-map-English.png rename to docs/en/resources/HAMI-VGPU-mind-map-English.png diff --git a/docs/resources/Karmada-logo-horizontal-color.png b/docs/en/resources/Karmada-logo-horizontal-color.png similarity index 100% rename from docs/resources/Karmada-logo-horizontal-color.png rename to docs/en/resources/Karmada-logo-horizontal-color.png diff --git a/docs/resources/administrator/prometheus/grafana.png b/docs/en/resources/administrator/prometheus/grafana.png similarity index 100% rename from docs/resources/administrator/prometheus/grafana.png rename to docs/en/resources/administrator/prometheus/grafana.png diff --git a/docs/resources/adoptions-ci123-architecture.png b/docs/en/resources/adoptions-ci123-architecture.png similarity index 100% rename from docs/resources/adoptions-ci123-architecture.png rename to docs/en/resources/adoptions-ci123-architecture.png diff --git a/docs/resources/adoptions-ci123-aries.png b/docs/en/resources/adoptions-ci123-aries.png similarity index 100% rename from docs/resources/adoptions-ci123-aries.png rename to docs/en/resources/adoptions-ci123-aries.png diff --git a/docs/resources/adoptions-ci123-automation-cluster-en.png b/docs/en/resources/adoptions-ci123-automation-cluster-en.png similarity index 100% rename from docs/resources/adoptions-ci123-automation-cluster-en.png rename to docs/en/resources/adoptions-ci123-automation-cluster-en.png diff --git a/docs/resources/adoptions-ci123-automation-cluster-zh.png b/docs/en/resources/adoptions-ci123-automation-cluster-zh.png similarity index 100% rename from docs/resources/adoptions-ci123-automation-cluster-zh.png rename to docs/en/resources/adoptions-ci123-automation-cluster-zh.png diff --git a/docs/resources/adoptions-ci123-capability-visualization.png b/docs/en/resources/adoptions-ci123-capability-visualization.png similarity index 100% rename from docs/resources/adoptions-ci123-capability-visualization.png rename to docs/en/resources/adoptions-ci123-capability-visualization.png diff --git a/docs/resources/adoptions-ci123-cluster-inspection.png b/docs/en/resources/adoptions-ci123-cluster-inspection.png similarity index 100% rename from docs/resources/adoptions-ci123-cluster-inspection.png rename to docs/en/resources/adoptions-ci123-cluster-inspection.png diff --git a/docs/resources/adoptions-ci123-gpu-resources.png b/docs/en/resources/adoptions-ci123-gpu-resources.png similarity index 100% rename from docs/resources/adoptions-ci123-gpu-resources.png rename to docs/en/resources/adoptions-ci123-gpu-resources.png diff --git a/docs/resources/adoptions-ci123-msp-multicluster-1.png b/docs/en/resources/adoptions-ci123-msp-multicluster-1.png similarity index 100% rename from docs/resources/adoptions-ci123-msp-multicluster-1.png rename to docs/en/resources/adoptions-ci123-msp-multicluster-1.png diff --git a/docs/resources/adoptions-ci123-msp-multicluster-2.png b/docs/en/resources/adoptions-ci123-msp-multicluster-2.png similarity index 100% rename from docs/resources/adoptions-ci123-msp-multicluster-2.png rename to docs/en/resources/adoptions-ci123-msp-multicluster-2.png diff --git a/docs/resources/adoptions-ci123-multicluster-capability.png b/docs/en/resources/adoptions-ci123-multicluster-capability.png similarity index 100% rename from docs/resources/adoptions-ci123-multicluster-capability.png rename to docs/en/resources/adoptions-ci123-multicluster-capability.png diff --git a/docs/resources/adoptions-ci123-override.png b/docs/en/resources/adoptions-ci123-override.png similarity index 100% rename from docs/resources/adoptions-ci123-override.png rename to docs/en/resources/adoptions-ci123-override.png diff --git a/docs/resources/adoptions-ci123-sequence-status.png b/docs/en/resources/adoptions-ci123-sequence-status.png similarity index 100% rename from docs/resources/adoptions-ci123-sequence-status.png rename to docs/en/resources/adoptions-ci123-sequence-status.png diff --git a/docs/resources/adoptions-ci123-unified-view-1.png b/docs/en/resources/adoptions-ci123-unified-view-1.png similarity index 100% rename from docs/resources/adoptions-ci123-unified-view-1.png rename to docs/en/resources/adoptions-ci123-unified-view-1.png diff --git a/docs/resources/adoptions-ci123-unified-view-2.png b/docs/en/resources/adoptions-ci123-unified-view-2.png similarity index 100% rename from docs/resources/adoptions-ci123-unified-view-2.png rename to docs/en/resources/adoptions-ci123-unified-view-2.png diff --git a/docs/resources/adoptions-ci123-velero.png b/docs/en/resources/adoptions-ci123-velero.png similarity index 100% rename from docs/resources/adoptions-ci123-velero.png rename to docs/en/resources/adoptions-ci123-velero.png diff --git a/docs/resources/adoptions-vipkid-architecture-en.png b/docs/en/resources/adoptions-vipkid-architecture-en.png similarity index 100% rename from docs/resources/adoptions-vipkid-architecture-en.png rename to docs/en/resources/adoptions-vipkid-architecture-en.png diff --git a/docs/resources/adoptions-vipkid-architecture-zh.png b/docs/en/resources/adoptions-vipkid-architecture-zh.png similarity index 100% rename from docs/resources/adoptions-vipkid-architecture-zh.png rename to docs/en/resources/adoptions-vipkid-architecture-zh.png diff --git a/docs/resources/architect.jpg b/docs/en/resources/architect.jpg similarity index 100% rename from docs/resources/architect.jpg rename to docs/en/resources/architect.jpg diff --git a/docs/resources/architecture.drawio b/docs/en/resources/architecture.drawio similarity index 100% rename from docs/resources/architecture.drawio rename to docs/en/resources/architecture.drawio diff --git a/docs/resources/architecture.png b/docs/en/resources/architecture.png similarity index 100% rename from docs/resources/architecture.png rename to docs/en/resources/architecture.png diff --git a/docs/resources/argocd-new-app-cluster.png b/docs/en/resources/argocd-new-app-cluster.png similarity index 100% rename from docs/resources/argocd-new-app-cluster.png rename to docs/en/resources/argocd-new-app-cluster.png diff --git a/docs/resources/argocd-new-app-name.png b/docs/en/resources/argocd-new-app-name.png similarity index 100% rename from docs/resources/argocd-new-app-name.png rename to docs/en/resources/argocd-new-app-name.png diff --git a/docs/resources/argocd-new-app-repo.png b/docs/en/resources/argocd-new-app-repo.png similarity index 100% rename from docs/resources/argocd-new-app-repo.png rename to docs/en/resources/argocd-new-app-repo.png diff --git a/docs/resources/argocd-new-app.png b/docs/en/resources/argocd-new-app.png similarity index 100% rename from docs/resources/argocd-new-app.png rename to docs/en/resources/argocd-new-app.png diff --git a/docs/resources/argocd-register-karmada.png b/docs/en/resources/argocd-register-karmada.png similarity index 100% rename from docs/resources/argocd-register-karmada.png rename to docs/en/resources/argocd-register-karmada.png diff --git a/docs/resources/argocd-status-aggregated.png b/docs/en/resources/argocd-status-aggregated.png similarity index 100% rename from docs/resources/argocd-status-aggregated.png rename to docs/en/resources/argocd-status-aggregated.png diff --git a/docs/resources/argocd-status-overview.png b/docs/en/resources/argocd-status-overview.png similarity index 100% rename from docs/resources/argocd-status-overview.png rename to docs/en/resources/argocd-status-overview.png diff --git a/docs/resources/argocd-status-resourcebinding.png b/docs/en/resources/argocd-status-resourcebinding.png similarity index 100% rename from docs/resources/argocd-status-resourcebinding.png rename to docs/en/resources/argocd-status-resourcebinding.png diff --git a/docs/resources/argocd-sync-apps.png b/docs/en/resources/argocd-sync-apps.png similarity index 100% rename from docs/resources/argocd-sync-apps.png rename to docs/en/resources/argocd-sync-apps.png diff --git a/docs/resources/binding-controller-process.drawio b/docs/en/resources/binding-controller-process.drawio similarity index 100% rename from docs/resources/binding-controller-process.drawio rename to docs/en/resources/binding-controller-process.drawio diff --git a/docs/resources/binding-controller-process.png b/docs/en/resources/binding-controller-process.png similarity index 100% rename from docs/resources/binding-controller-process.png rename to docs/en/resources/binding-controller-process.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-architecture.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-architecture.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-architecture.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-architecture.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-aries.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-aries.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-aries.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-aries.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-automation-cluster-en.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-automation-cluster-en.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-automation-cluster-en.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-automation-cluster-en.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-automation-cluster-zh.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-automation-cluster-zh.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-automation-cluster-zh.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-automation-cluster-zh.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-capability-visualization.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-capability-visualization.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-capability-visualization.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-capability-visualization.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-cluster-inspection.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-cluster-inspection.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-cluster-inspection.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-cluster-inspection.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-gpu-resources.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-gpu-resources.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-gpu-resources.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-gpu-resources.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-1.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-1.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-1.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-1.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-2.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-2.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-2.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-msp-multicluster-2.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-multicluster-capability.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-multicluster-capability.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-multicluster-capability.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-multicluster-capability.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-override.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-override.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-override.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-override.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-sequence-status.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-sequence-status.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-sequence-status.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-sequence-status.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-unified-view-1.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-unified-view-1.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-unified-view-1.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-unified-view-1.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-unified-view-2.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-unified-view-2.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-unified-view-2.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-unified-view-2.png diff --git a/docs/resources/casestudies/ci123/adoptions-ci123-velero.png b/docs/en/resources/casestudies/ci123/adoptions-ci123-velero.png similarity index 100% rename from docs/resources/casestudies/ci123/adoptions-ci123-velero.png rename to docs/en/resources/casestudies/ci123/adoptions-ci123-velero.png diff --git a/docs/resources/casestudies/vipkid/adoptions-vipkid-architecture.png b/docs/en/resources/casestudies/vipkid/adoptions-vipkid-architecture.png similarity index 100% rename from docs/resources/casestudies/vipkid/adoptions-vipkid-architecture.png rename to docs/en/resources/casestudies/vipkid/adoptions-vipkid-architecture.png diff --git a/docs/resources/cluster-controller-process.drawio b/docs/en/resources/cluster-controller-process.drawio similarity index 100% rename from docs/resources/cluster-controller-process.drawio rename to docs/en/resources/cluster-controller-process.drawio diff --git a/docs/resources/cluster-controller-process.png b/docs/en/resources/cluster-controller-process.png similarity index 100% rename from docs/resources/cluster-controller-process.png rename to docs/en/resources/cluster-controller-process.png diff --git a/docs/resources/cncf-logo.png b/docs/en/resources/cncf-logo.png similarity index 100% rename from docs/resources/cncf-logo.png rename to docs/en/resources/cncf-logo.png diff --git a/docs/resources/contributor/click-next.png b/docs/en/resources/contributor/click-next.png similarity index 100% rename from docs/resources/contributor/click-next.png rename to docs/en/resources/contributor/click-next.png diff --git a/docs/resources/contributor/debug-docs.png b/docs/en/resources/contributor/debug-docs.png similarity index 100% rename from docs/resources/contributor/debug-docs.png rename to docs/en/resources/contributor/debug-docs.png diff --git a/docs/resources/contributor/git_workflow.png b/docs/en/resources/contributor/git_workflow.png similarity index 100% rename from docs/resources/contributor/git_workflow.png rename to docs/en/resources/contributor/git_workflow.png diff --git a/docs/resources/demo-3in1.svg b/docs/en/resources/demo-3in1.svg similarity index 100% rename from docs/resources/demo-3in1.svg rename to docs/en/resources/demo-3in1.svg diff --git a/docs/resources/developers/grafana_metrics.png b/docs/en/resources/developers/grafana_metrics.png similarity index 100% rename from docs/resources/developers/grafana_metrics.png rename to docs/en/resources/developers/grafana_metrics.png diff --git a/docs/resources/device_registration.png b/docs/en/resources/device_registration.png similarity index 100% rename from docs/resources/device_registration.png rename to docs/en/resources/device_registration.png diff --git a/docs/resources/example.png b/docs/en/resources/example.png similarity index 100% rename from docs/resources/example.png rename to docs/en/resources/example.png diff --git a/docs/resources/execution-controller-process.drawio b/docs/en/resources/execution-controller-process.drawio similarity index 100% rename from docs/resources/execution-controller-process.drawio rename to docs/en/resources/execution-controller-process.drawio diff --git a/docs/resources/execution-controller-process.png b/docs/en/resources/execution-controller-process.png similarity index 100% rename from docs/resources/execution-controller-process.png rename to docs/en/resources/execution-controller-process.png diff --git a/docs/resources/general/Karmada-logo-horizontal-color.png b/docs/en/resources/general/Karmada-logo-horizontal-color.png similarity index 100% rename from docs/resources/general/Karmada-logo-horizontal-color.png rename to docs/en/resources/general/Karmada-logo-horizontal-color.png diff --git a/docs/resources/general/architecture.drawio b/docs/en/resources/general/architecture.drawio similarity index 100% rename from docs/resources/general/architecture.drawio rename to docs/en/resources/general/architecture.drawio diff --git a/docs/resources/general/architecture.png b/docs/en/resources/general/architecture.png similarity index 100% rename from docs/resources/general/architecture.png rename to docs/en/resources/general/architecture.png diff --git a/docs/resources/general/binding-controller-process.drawio b/docs/en/resources/general/binding-controller-process.drawio similarity index 100% rename from docs/resources/general/binding-controller-process.drawio rename to docs/en/resources/general/binding-controller-process.drawio diff --git a/docs/resources/general/binding-controller-process.png b/docs/en/resources/general/binding-controller-process.png similarity index 100% rename from docs/resources/general/binding-controller-process.png rename to docs/en/resources/general/binding-controller-process.png diff --git a/docs/resources/general/cluster-controller-process.drawio b/docs/en/resources/general/cluster-controller-process.drawio similarity index 100% rename from docs/resources/general/cluster-controller-process.drawio rename to docs/en/resources/general/cluster-controller-process.drawio diff --git a/docs/resources/general/cluster-controller-process.png b/docs/en/resources/general/cluster-controller-process.png similarity index 100% rename from docs/resources/general/cluster-controller-process.png rename to docs/en/resources/general/cluster-controller-process.png diff --git a/docs/resources/general/cncf-logo.png b/docs/en/resources/general/cncf-logo.png similarity index 100% rename from docs/resources/general/cncf-logo.png rename to docs/en/resources/general/cncf-logo.png diff --git a/docs/resources/general/demo-3in1.svg b/docs/en/resources/general/demo-3in1.svg similarity index 100% rename from docs/resources/general/demo-3in1.svg rename to docs/en/resources/general/demo-3in1.svg diff --git a/docs/resources/general/execution-controller-process.drawio b/docs/en/resources/general/execution-controller-process.drawio similarity index 100% rename from docs/resources/general/execution-controller-process.drawio rename to docs/en/resources/general/execution-controller-process.drawio diff --git a/docs/resources/general/execution-controller-process.png b/docs/en/resources/general/execution-controller-process.png similarity index 100% rename from docs/resources/general/execution-controller-process.png rename to docs/en/resources/general/execution-controller-process.png diff --git a/docs/resources/general/karmada-resource-relation.drawio b/docs/en/resources/general/karmada-resource-relation.drawio similarity index 100% rename from docs/resources/general/karmada-resource-relation.drawio rename to docs/en/resources/general/karmada-resource-relation.drawio diff --git a/docs/resources/general/karmada-resource-relation.png b/docs/en/resources/general/karmada-resource-relation.png similarity index 100% rename from docs/resources/general/karmada-resource-relation.png rename to docs/en/resources/general/karmada-resource-relation.png diff --git a/docs/resources/general/object-association-map.drawio b/docs/en/resources/general/object-association-map.drawio similarity index 100% rename from docs/resources/general/object-association-map.drawio rename to docs/en/resources/general/object-association-map.drawio diff --git a/docs/resources/general/object-association-map.png b/docs/en/resources/general/object-association-map.png similarity index 100% rename from docs/resources/general/object-association-map.png rename to docs/en/resources/general/object-association-map.png diff --git a/docs/resources/general/policy-controller-process.drawio b/docs/en/resources/general/policy-controller-process.drawio similarity index 100% rename from docs/resources/general/policy-controller-process.drawio rename to docs/en/resources/general/policy-controller-process.drawio diff --git a/docs/resources/general/policy-controller-process.png b/docs/en/resources/general/policy-controller-process.png similarity index 100% rename from docs/resources/general/policy-controller-process.png rename to docs/en/resources/general/policy-controller-process.png diff --git a/docs/resources/general/sample-nginx.svg b/docs/en/resources/general/sample-nginx.svg similarity index 100% rename from docs/resources/general/sample-nginx.svg rename to docs/en/resources/general/sample-nginx.svg diff --git a/docs/resources/git_workflow.png b/docs/en/resources/git_workflow.png similarity index 100% rename from docs/resources/git_workflow.png rename to docs/en/resources/git_workflow.png diff --git a/docs/resources/gpu-scheduler-policy-demo.png b/docs/en/resources/gpu-scheduler-policy-demo.png similarity index 100% rename from docs/resources/gpu-scheduler-policy-demo.png rename to docs/en/resources/gpu-scheduler-policy-demo.png diff --git a/docs/resources/grafana.png b/docs/en/resources/grafana.png similarity index 100% rename from docs/resources/grafana.png rename to docs/en/resources/grafana.png diff --git a/docs/resources/hami-arch.png b/docs/en/resources/hami-arch.png similarity index 100% rename from docs/resources/hami-arch.png rename to docs/en/resources/hami-arch.png diff --git a/docs/resources/hami-core-position.png b/docs/en/resources/hami-core-position.png similarity index 100% rename from docs/resources/hami-core-position.png rename to docs/en/resources/hami-core-position.png diff --git a/docs/resources/hami-dynamic-mig-procedure.png b/docs/en/resources/hami-dynamic-mig-procedure.png similarity index 100% rename from docs/resources/hami-dynamic-mig-procedure.png rename to docs/en/resources/hami-dynamic-mig-procedure.png diff --git a/docs/resources/hami-dynamic-mig-structure.png b/docs/en/resources/hami-dynamic-mig-structure.png similarity index 100% rename from docs/resources/hami-dynamic-mig-structure.png rename to docs/en/resources/hami-dynamic-mig-structure.png diff --git a/docs/resources/hard_limit.jpg b/docs/en/resources/hard_limit.jpg similarity index 100% rename from docs/resources/hard_limit.jpg rename to docs/en/resources/hard_limit.jpg diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/admin.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/admin.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/admin.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/admin.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/etcd/apiserver-etcd-client.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/apiserver-etcd-client.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/etcd/apiserver-etcd-client.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/apiserver-etcd-client.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/etcd/healthcheck-client.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/healthcheck-client.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/etcd/healthcheck-client.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/healthcheck-client.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/etcd/peer.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/peer.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/etcd/peer.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/peer.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/etcd/server.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/server.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/etcd/server.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/etcd/server.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/front-proxy-client.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/front-proxy-client.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/front-proxy-client.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/front-proxy-client.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/karmada.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/karmada.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/karmada.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/karmada.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/kube-apiserver.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/kube-apiserver.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/kube-apiserver.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/kube-apiserver.conf diff --git a/docs/resources/installation/install-binary/generate_cert/csr_config/kube-controller-manager.conf b/docs/en/resources/installation/install-binary/generate_cert/csr_config/kube-controller-manager.conf similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/csr_config/kube-controller-manager.conf rename to docs/en/resources/installation/install-binary/generate_cert/csr_config/kube-controller-manager.conf diff --git a/docs/resources/installation/install-binary/generate_cert/generate_ca.sh b/docs/en/resources/installation/install-binary/generate_cert/generate_ca.sh similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/generate_ca.sh rename to docs/en/resources/installation/install-binary/generate_cert/generate_ca.sh diff --git a/docs/resources/installation/install-binary/generate_cert/generate_etcd.sh b/docs/en/resources/installation/install-binary/generate_cert/generate_etcd.sh similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/generate_etcd.sh rename to docs/en/resources/installation/install-binary/generate_cert/generate_etcd.sh diff --git a/docs/resources/installation/install-binary/generate_cert/generate_leaf.sh b/docs/en/resources/installation/install-binary/generate_cert/generate_leaf.sh similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/generate_leaf.sh rename to docs/en/resources/installation/install-binary/generate_cert/generate_leaf.sh diff --git a/docs/resources/installation/install-binary/generate_cert/util.sh b/docs/en/resources/installation/install-binary/generate_cert/util.sh similarity index 100% rename from docs/resources/installation/install-binary/generate_cert/util.sh rename to docs/en/resources/installation/install-binary/generate_cert/util.sh diff --git a/docs/resources/installation/install-binary/other_scripts/check_status.sh b/docs/en/resources/installation/install-binary/other_scripts/check_status.sh similarity index 100% rename from docs/resources/installation/install-binary/other_scripts/check_status.sh rename to docs/en/resources/installation/install-binary/other_scripts/check_status.sh diff --git a/docs/resources/installation/install-binary/other_scripts/create_kubeconfig_file.sh b/docs/en/resources/installation/install-binary/other_scripts/create_kubeconfig_file.sh similarity index 100% rename from docs/resources/installation/install-binary/other_scripts/create_kubeconfig_file.sh rename to docs/en/resources/installation/install-binary/other_scripts/create_kubeconfig_file.sh diff --git a/docs/resources/istio-on-karmada-different-network.png b/docs/en/resources/istio-on-karmada-different-network.png similarity index 100% rename from docs/resources/istio-on-karmada-different-network.png rename to docs/en/resources/istio-on-karmada-different-network.png diff --git a/docs/resources/istio-on-karmada.png b/docs/en/resources/istio-on-karmada.png similarity index 100% rename from docs/resources/istio-on-karmada.png rename to docs/en/resources/istio-on-karmada.png diff --git a/docs/resources/karmada-resource-relation.drawio b/docs/en/resources/karmada-resource-relation.drawio similarity index 100% rename from docs/resources/karmada-resource-relation.drawio rename to docs/en/resources/karmada-resource-relation.drawio diff --git a/docs/resources/karmada-resource-relation.png b/docs/en/resources/karmada-resource-relation.png similarity index 100% rename from docs/resources/karmada-resource-relation.png rename to docs/en/resources/karmada-resource-relation.png diff --git a/docs/resources/key-features/cluster-failover.png b/docs/en/resources/key-features/cluster-failover.png similarity index 100% rename from docs/resources/key-features/cluster-failover.png rename to docs/en/resources/key-features/cluster-failover.png diff --git a/docs/resources/key-features/overall-relationship.png b/docs/en/resources/key-features/overall-relationship.png similarity index 100% rename from docs/resources/key-features/overall-relationship.png rename to docs/en/resources/key-features/overall-relationship.png diff --git a/docs/resources/key-features/overall-rescheduling.png b/docs/en/resources/key-features/overall-rescheduling.png similarity index 100% rename from docs/resources/key-features/overall-rescheduling.png rename to docs/en/resources/key-features/overall-rescheduling.png diff --git a/docs/resources/key-features/overall-scheduling.png b/docs/en/resources/key-features/overall-scheduling.png similarity index 100% rename from docs/resources/key-features/overall-scheduling.png rename to docs/en/resources/key-features/overall-scheduling.png diff --git a/docs/resources/key-features/service-governance.png b/docs/en/resources/key-features/service-governance.png similarity index 100% rename from docs/resources/key-features/service-governance.png rename to docs/en/resources/key-features/service-governance.png diff --git a/docs/resources/key-features/unified-access.png b/docs/en/resources/key-features/unified-access.png similarity index 100% rename from docs/resources/key-features/unified-access.png rename to docs/en/resources/key-features/unified-access.png diff --git a/docs/resources/key-features/unified-operation.png b/docs/en/resources/key-features/unified-operation.png similarity index 100% rename from docs/resources/key-features/unified-operation.png rename to docs/en/resources/key-features/unified-operation.png diff --git a/docs/resources/key-features/unified-resourcequota.png b/docs/en/resources/key-features/unified-resourcequota.png similarity index 100% rename from docs/resources/key-features/unified-resourcequota.png rename to docs/en/resources/key-features/unified-resourcequota.png diff --git a/docs/resources/key-features/unified-search.png b/docs/en/resources/key-features/unified-search.png similarity index 100% rename from docs/resources/key-features/unified-search.png rename to docs/en/resources/key-features/unified-search.png diff --git a/docs/resources/kunlunxin_filter.png b/docs/en/resources/kunlunxin_filter.png similarity index 100% rename from docs/resources/kunlunxin_filter.png rename to docs/en/resources/kunlunxin_filter.png diff --git a/docs/resources/kunlunxin_topo.jpg b/docs/en/resources/kunlunxin_topo.jpg similarity index 100% rename from docs/resources/kunlunxin_topo.jpg rename to docs/en/resources/kunlunxin_topo.jpg diff --git a/docs/resources/metax_binpack.jpg b/docs/en/resources/metax_binpack.jpg similarity index 100% rename from docs/resources/metax_binpack.jpg rename to docs/en/resources/metax_binpack.jpg diff --git a/docs/resources/metax_spread.jpg b/docs/en/resources/metax_spread.jpg similarity index 100% rename from docs/resources/metax_spread.jpg rename to docs/en/resources/metax_spread.jpg diff --git a/docs/resources/metax_topo.jpg b/docs/en/resources/metax_topo.jpg similarity index 100% rename from docs/resources/metax_topo.jpg rename to docs/en/resources/metax_topo.jpg diff --git a/docs/resources/node-shceduler-policy-demo.png b/docs/en/resources/node-shceduler-policy-demo.png similarity index 100% rename from docs/resources/node-shceduler-policy-demo.png rename to docs/en/resources/node-shceduler-policy-demo.png diff --git a/docs/resources/object-association-map.drawio b/docs/en/resources/object-association-map.drawio similarity index 100% rename from docs/resources/object-association-map.drawio rename to docs/en/resources/object-association-map.drawio diff --git a/docs/resources/object-association-map.png b/docs/en/resources/object-association-map.png similarity index 100% rename from docs/resources/object-association-map.png rename to docs/en/resources/object-association-map.png diff --git a/docs/resources/policy-controller-process.drawio b/docs/en/resources/policy-controller-process.drawio similarity index 100% rename from docs/resources/policy-controller-process.drawio rename to docs/en/resources/policy-controller-process.drawio diff --git a/docs/resources/policy-controller-process.png b/docs/en/resources/policy-controller-process.png similarity index 100% rename from docs/resources/policy-controller-process.png rename to docs/en/resources/policy-controller-process.png diff --git a/docs/resources/sample-nginx.svg b/docs/en/resources/sample-nginx.svg similarity index 100% rename from docs/resources/sample-nginx.svg rename to docs/en/resources/sample-nginx.svg diff --git a/docs/resources/sample_nvidia-smi.png b/docs/en/resources/sample_nvidia-smi.png similarity index 100% rename from docs/resources/sample_nvidia-smi.png rename to docs/en/resources/sample_nvidia-smi.png diff --git a/docs/resources/scheduler-policy-story.png b/docs/en/resources/scheduler-policy-story.png similarity index 100% rename from docs/resources/scheduler-policy-story.png rename to docs/en/resources/scheduler-policy-story.png diff --git a/docs/resources/task_dispatch.png b/docs/en/resources/task_dispatch.png similarity index 100% rename from docs/resources/task_dispatch.png rename to docs/en/resources/task_dispatch.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-new-app-cluster.png b/docs/en/resources/userguide/cicd/argocd/argocd-new-app-cluster.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-new-app-cluster.png rename to docs/en/resources/userguide/cicd/argocd/argocd-new-app-cluster.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-new-app-name.png b/docs/en/resources/userguide/cicd/argocd/argocd-new-app-name.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-new-app-name.png rename to docs/en/resources/userguide/cicd/argocd/argocd-new-app-name.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-new-app-repo.png b/docs/en/resources/userguide/cicd/argocd/argocd-new-app-repo.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-new-app-repo.png rename to docs/en/resources/userguide/cicd/argocd/argocd-new-app-repo.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-new-app.png b/docs/en/resources/userguide/cicd/argocd/argocd-new-app.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-new-app.png rename to docs/en/resources/userguide/cicd/argocd/argocd-new-app.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-register-karmada.png b/docs/en/resources/userguide/cicd/argocd/argocd-register-karmada.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-register-karmada.png rename to docs/en/resources/userguide/cicd/argocd/argocd-register-karmada.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-status-aggregated.png b/docs/en/resources/userguide/cicd/argocd/argocd-status-aggregated.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-status-aggregated.png rename to docs/en/resources/userguide/cicd/argocd/argocd-status-aggregated.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-status-overview.png b/docs/en/resources/userguide/cicd/argocd/argocd-status-overview.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-status-overview.png rename to docs/en/resources/userguide/cicd/argocd/argocd-status-overview.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-status-resourcebinding.png b/docs/en/resources/userguide/cicd/argocd/argocd-status-resourcebinding.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-status-resourcebinding.png rename to docs/en/resources/userguide/cicd/argocd/argocd-status-resourcebinding.png diff --git a/docs/resources/userguide/cicd/argocd/argocd-sync-apps.png b/docs/en/resources/userguide/cicd/argocd/argocd-sync-apps.png similarity index 100% rename from docs/resources/userguide/cicd/argocd/argocd-sync-apps.png rename to docs/en/resources/userguide/cicd/argocd/argocd-sync-apps.png diff --git a/docs/resources/userguide/failover/failover-overview.png b/docs/en/resources/userguide/failover/failover-overview.png similarity index 100% rename from docs/resources/userguide/failover/failover-overview.png rename to docs/en/resources/userguide/failover/failover-overview.png diff --git a/docs/resources/userguide/service/istio/istio-on-karmada-different-network.png b/docs/en/resources/userguide/service/istio/istio-on-karmada-different-network.png similarity index 100% rename from docs/resources/userguide/service/istio/istio-on-karmada-different-network.png rename to docs/en/resources/userguide/service/istio/istio-on-karmada-different-network.png diff --git a/docs/resources/userguide/service/istio/istio-on-karmada.png b/docs/en/resources/userguide/service/istio/istio-on-karmada.png similarity index 100% rename from docs/resources/userguide/service/istio/istio-on-karmada.png rename to docs/en/resources/userguide/service/istio/istio-on-karmada.png diff --git a/docs/troubleshooting/troubleshooting.md b/docs/en/troubleshooting/troubleshooting.md similarity index 100% rename from docs/troubleshooting/troubleshooting.md rename to docs/en/troubleshooting/troubleshooting.md diff --git a/docs/userguide/AWSNeuron-device/enable-awsneuron-managing.md b/docs/en/userguide/AWSNeuron-device/enable-awsneuron-managing.md similarity index 100% rename from docs/userguide/AWSNeuron-device/enable-awsneuron-managing.md rename to docs/en/userguide/AWSNeuron-device/enable-awsneuron-managing.md diff --git a/docs/userguide/AWSNeuron-device/examples/allocate-neuron-core.md b/docs/en/userguide/AWSNeuron-device/examples/allocate-neuron-core.md similarity index 100% rename from docs/userguide/AWSNeuron-device/examples/allocate-neuron-core.md rename to docs/en/userguide/AWSNeuron-device/examples/allocate-neuron-core.md diff --git a/docs/userguide/AWSNeuron-device/examples/allocate-neuron-device.md b/docs/en/userguide/AWSNeuron-device/examples/allocate-neuron-device.md similarity index 100% rename from docs/userguide/AWSNeuron-device/examples/allocate-neuron-device.md rename to docs/en/userguide/AWSNeuron-device/examples/allocate-neuron-device.md diff --git a/docs/userguide/Ascend-device/device-template.md b/docs/en/userguide/Ascend-device/device-template.md similarity index 100% rename from docs/userguide/Ascend-device/device-template.md rename to docs/en/userguide/Ascend-device/device-template.md diff --git a/docs/userguide/Ascend-device/enable-ascend-sharing.md b/docs/en/userguide/Ascend-device/enable-ascend-sharing.md similarity index 100% rename from docs/userguide/Ascend-device/enable-ascend-sharing.md rename to docs/en/userguide/Ascend-device/enable-ascend-sharing.md diff --git a/docs/userguide/Ascend-device/examples/allocate-310p.md b/docs/en/userguide/Ascend-device/examples/allocate-310p.md similarity index 100% rename from docs/userguide/Ascend-device/examples/allocate-310p.md rename to docs/en/userguide/Ascend-device/examples/allocate-310p.md diff --git a/docs/userguide/Ascend-device/examples/allocate-910b.md b/docs/en/userguide/Ascend-device/examples/allocate-910b.md similarity index 100% rename from docs/userguide/Ascend-device/examples/allocate-910b.md rename to docs/en/userguide/Ascend-device/examples/allocate-910b.md diff --git a/docs/userguide/Ascend-device/examples/allocate-exclusive.md b/docs/en/userguide/Ascend-device/examples/allocate-exclusive.md similarity index 100% rename from docs/userguide/Ascend-device/examples/allocate-exclusive.md rename to docs/en/userguide/Ascend-device/examples/allocate-exclusive.md diff --git a/docs/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md b/docs/en/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md similarity index 100% rename from docs/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md rename to docs/en/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md diff --git a/docs/userguide/Cambricon-device/examples/allocate-core-and-memory.md b/docs/en/userguide/Cambricon-device/examples/allocate-core-and-memory.md similarity index 100% rename from docs/userguide/Cambricon-device/examples/allocate-core-and-memory.md rename to docs/en/userguide/Cambricon-device/examples/allocate-core-and-memory.md diff --git a/docs/userguide/Cambricon-device/examples/allocate-exclusive.md b/docs/en/userguide/Cambricon-device/examples/allocate-exclusive.md similarity index 100% rename from docs/userguide/Cambricon-device/examples/allocate-exclusive.md rename to docs/en/userguide/Cambricon-device/examples/allocate-exclusive.md diff --git a/docs/userguide/Cambricon-device/specify-device-core-usage.md b/docs/en/userguide/Cambricon-device/specify-device-core-usage.md similarity index 100% rename from docs/userguide/Cambricon-device/specify-device-core-usage.md rename to docs/en/userguide/Cambricon-device/specify-device-core-usage.md diff --git a/docs/userguide/Cambricon-device/specify-device-memory-usage.md b/docs/en/userguide/Cambricon-device/specify-device-memory-usage.md similarity index 100% rename from docs/userguide/Cambricon-device/specify-device-memory-usage.md rename to docs/en/userguide/Cambricon-device/specify-device-memory-usage.md diff --git a/docs/userguide/Cambricon-device/specify-device-type-to-use.md b/docs/en/userguide/Cambricon-device/specify-device-type-to-use.md similarity index 100% rename from docs/userguide/Cambricon-device/specify-device-type-to-use.md rename to docs/en/userguide/Cambricon-device/specify-device-type-to-use.md diff --git a/docs/userguide/Device-supported.md b/docs/en/userguide/Device-supported.md similarity index 100% rename from docs/userguide/Device-supported.md rename to docs/en/userguide/Device-supported.md diff --git a/docs/userguide/Enflame-device/enable-enflame-gcu-sharing.md b/docs/en/userguide/Enflame-device/enable-enflame-gcu-sharing.md similarity index 100% rename from docs/userguide/Enflame-device/enable-enflame-gcu-sharing.md rename to docs/en/userguide/Enflame-device/enable-enflame-gcu-sharing.md diff --git a/docs/userguide/Hygon-device/enable-hygon-dcu-sharing.md b/docs/en/userguide/Hygon-device/enable-hygon-dcu-sharing.md similarity index 100% rename from docs/userguide/Hygon-device/enable-hygon-dcu-sharing.md rename to docs/en/userguide/Hygon-device/enable-hygon-dcu-sharing.md diff --git a/docs/userguide/Hygon-device/examples/allocate-core-and-memory.md b/docs/en/userguide/Hygon-device/examples/allocate-core-and-memory.md similarity index 100% rename from docs/userguide/Hygon-device/examples/allocate-core-and-memory.md rename to docs/en/userguide/Hygon-device/examples/allocate-core-and-memory.md diff --git a/docs/userguide/Hygon-device/examples/allocate-exclusive.md b/docs/en/userguide/Hygon-device/examples/allocate-exclusive.md similarity index 100% rename from docs/userguide/Hygon-device/examples/allocate-exclusive.md rename to docs/en/userguide/Hygon-device/examples/allocate-exclusive.md diff --git a/docs/userguide/Hygon-device/examples/specify-certain-cards.md b/docs/en/userguide/Hygon-device/examples/specify-certain-cards.md similarity index 100% rename from docs/userguide/Hygon-device/examples/specify-certain-cards.md rename to docs/en/userguide/Hygon-device/examples/specify-certain-cards.md diff --git a/docs/userguide/Hygon-device/specify-device-core-usage.md b/docs/en/userguide/Hygon-device/specify-device-core-usage.md similarity index 100% rename from docs/userguide/Hygon-device/specify-device-core-usage.md rename to docs/en/userguide/Hygon-device/specify-device-core-usage.md diff --git a/docs/userguide/Hygon-device/specify-device-memory-usage.md b/docs/en/userguide/Hygon-device/specify-device-memory-usage.md similarity index 100% rename from docs/userguide/Hygon-device/specify-device-memory-usage.md rename to docs/en/userguide/Hygon-device/specify-device-memory-usage.md diff --git a/docs/userguide/Hygon-device/specify-device-uuid-to-use.md b/docs/en/userguide/Hygon-device/specify-device-uuid-to-use.md similarity index 100% rename from docs/userguide/Hygon-device/specify-device-uuid-to-use.md rename to docs/en/userguide/Hygon-device/specify-device-uuid-to-use.md diff --git a/docs/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md b/docs/en/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md similarity index 100% rename from docs/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md rename to docs/en/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md diff --git a/docs/userguide/Iluvatar-device/examples/allocate-device-core-and-memory-to-container.md b/docs/en/userguide/Iluvatar-device/examples/allocate-device-core-and-memory-to-container.md similarity index 100% rename from docs/userguide/Iluvatar-device/examples/allocate-device-core-and-memory-to-container.md rename to docs/en/userguide/Iluvatar-device/examples/allocate-device-core-and-memory-to-container.md diff --git a/docs/userguide/Iluvatar-device/examples/allocate-exclusive.md b/docs/en/userguide/Iluvatar-device/examples/allocate-exclusive.md similarity index 100% rename from docs/userguide/Iluvatar-device/examples/allocate-exclusive.md rename to docs/en/userguide/Iluvatar-device/examples/allocate-exclusive.md diff --git a/docs/userguide/Kunlunxin-device/enable-kunlunxin-schedule.md b/docs/en/userguide/Kunlunxin-device/enable-kunlunxin-schedule.md similarity index 100% rename from docs/userguide/Kunlunxin-device/enable-kunlunxin-schedule.md rename to docs/en/userguide/Kunlunxin-device/enable-kunlunxin-schedule.md diff --git a/docs/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md b/docs/en/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md similarity index 100% rename from docs/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md rename to docs/en/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md diff --git a/docs/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md b/docs/en/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md similarity index 100% rename from docs/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md rename to docs/en/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md diff --git a/docs/userguide/Metax-device/Metax-GPU/examples/allocate-spread.md b/docs/en/userguide/Metax-device/Metax-GPU/examples/allocate-spread.md similarity index 100% rename from docs/userguide/Metax-device/Metax-GPU/examples/allocate-spread.md rename to docs/en/userguide/Metax-device/Metax-GPU/examples/allocate-spread.md diff --git a/docs/userguide/Metax-device/Metax-GPU/examples/default-use.md b/docs/en/userguide/Metax-device/Metax-GPU/examples/default-use.md similarity index 100% rename from docs/userguide/Metax-device/Metax-GPU/examples/default-use.md rename to docs/en/userguide/Metax-device/Metax-GPU/examples/default-use.md diff --git a/docs/userguide/Metax-device/Metax-GPU/specify-binpack-task.md b/docs/en/userguide/Metax-device/Metax-GPU/specify-binpack-task.md similarity index 100% rename from docs/userguide/Metax-device/Metax-GPU/specify-binpack-task.md rename to docs/en/userguide/Metax-device/Metax-GPU/specify-binpack-task.md diff --git a/docs/userguide/Metax-device/Metax-GPU/specify-spread-task.md b/docs/en/userguide/Metax-device/Metax-GPU/specify-spread-task.md similarity index 100% rename from docs/userguide/Metax-device/Metax-GPU/specify-spread-task.md rename to docs/en/userguide/Metax-device/Metax-GPU/specify-spread-task.md diff --git a/docs/userguide/Metax-device/Metax-sGPU/enable-metax-gpu-sharing.md b/docs/en/userguide/Metax-device/Metax-sGPU/enable-metax-gpu-sharing.md similarity index 100% rename from docs/userguide/Metax-device/Metax-sGPU/enable-metax-gpu-sharing.md rename to docs/en/userguide/Metax-device/Metax-sGPU/enable-metax-gpu-sharing.md diff --git a/docs/userguide/Metax-device/Metax-sGPU/examples/allocate-exclusive.md b/docs/en/userguide/Metax-device/Metax-sGPU/examples/allocate-exclusive.md similarity index 100% rename from docs/userguide/Metax-device/Metax-sGPU/examples/allocate-exclusive.md rename to docs/en/userguide/Metax-device/Metax-sGPU/examples/allocate-exclusive.md diff --git a/docs/userguide/Metax-device/Metax-sGPU/examples/allocate-qos-policy.md b/docs/en/userguide/Metax-device/Metax-sGPU/examples/allocate-qos-policy.md similarity index 100% rename from docs/userguide/Metax-device/Metax-sGPU/examples/allocate-qos-policy.md rename to docs/en/userguide/Metax-device/Metax-sGPU/examples/allocate-qos-policy.md diff --git a/docs/userguide/Metax-device/Metax-sGPU/examples/default-use.md b/docs/en/userguide/Metax-device/Metax-sGPU/examples/default-use.md similarity index 100% rename from docs/userguide/Metax-device/Metax-sGPU/examples/default-use.md rename to docs/en/userguide/Metax-device/Metax-sGPU/examples/default-use.md diff --git a/docs/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md b/docs/en/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md similarity index 100% rename from docs/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md rename to docs/en/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md diff --git a/docs/userguide/Mthreads-device/examples/allocate-core-and-memory.md b/docs/en/userguide/Mthreads-device/examples/allocate-core-and-memory.md similarity index 100% rename from docs/userguide/Mthreads-device/examples/allocate-core-and-memory.md rename to docs/en/userguide/Mthreads-device/examples/allocate-core-and-memory.md diff --git a/docs/userguide/Mthreads-device/examples/allocate-exclusive.md b/docs/en/userguide/Mthreads-device/examples/allocate-exclusive.md similarity index 100% rename from docs/userguide/Mthreads-device/examples/allocate-exclusive.md rename to docs/en/userguide/Mthreads-device/examples/allocate-exclusive.md diff --git a/docs/userguide/Mthreads-device/specify-device-core-usage.md b/docs/en/userguide/Mthreads-device/specify-device-core-usage.md similarity index 100% rename from docs/userguide/Mthreads-device/specify-device-core-usage.md rename to docs/en/userguide/Mthreads-device/specify-device-core-usage.md diff --git a/docs/userguide/Mthreads-device/specify-device-memory-usage.md b/docs/en/userguide/Mthreads-device/specify-device-memory-usage.md similarity index 100% rename from docs/userguide/Mthreads-device/specify-device-memory-usage.md rename to docs/en/userguide/Mthreads-device/specify-device-memory-usage.md diff --git a/docs/userguide/NVIDIA-device/dynamic-mig-support.md b/docs/en/userguide/NVIDIA-device/dynamic-mig-support.md similarity index 100% rename from docs/userguide/NVIDIA-device/dynamic-mig-support.md rename to docs/en/userguide/NVIDIA-device/dynamic-mig-support.md diff --git a/docs/userguide/NVIDIA-device/examples/allocate-device-core.md b/docs/en/userguide/NVIDIA-device/examples/allocate-device-core.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/allocate-device-core.md rename to docs/en/userguide/NVIDIA-device/examples/allocate-device-core.md diff --git a/docs/userguide/NVIDIA-device/examples/allocate-device-memory.md b/docs/en/userguide/NVIDIA-device/examples/allocate-device-memory.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/allocate-device-memory.md rename to docs/en/userguide/NVIDIA-device/examples/allocate-device-memory.md diff --git a/docs/userguide/NVIDIA-device/examples/allocate-device-memory2.md b/docs/en/userguide/NVIDIA-device/examples/allocate-device-memory2.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/allocate-device-memory2.md rename to docs/en/userguide/NVIDIA-device/examples/allocate-device-memory2.md diff --git a/docs/userguide/NVIDIA-device/examples/dynamic-mig-example.md b/docs/en/userguide/NVIDIA-device/examples/dynamic-mig-example.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/dynamic-mig-example.md rename to docs/en/userguide/NVIDIA-device/examples/dynamic-mig-example.md diff --git a/docs/userguide/NVIDIA-device/examples/specify-card-type-to-use.md b/docs/en/userguide/NVIDIA-device/examples/specify-card-type-to-use.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/specify-card-type-to-use.md rename to docs/en/userguide/NVIDIA-device/examples/specify-card-type-to-use.md diff --git a/docs/userguide/NVIDIA-device/examples/specify-certain-card.md b/docs/en/userguide/NVIDIA-device/examples/specify-certain-card.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/specify-certain-card.md rename to docs/en/userguide/NVIDIA-device/examples/specify-certain-card.md diff --git a/docs/userguide/NVIDIA-device/examples/use-exclusive-card.md b/docs/en/userguide/NVIDIA-device/examples/use-exclusive-card.md similarity index 100% rename from docs/userguide/NVIDIA-device/examples/use-exclusive-card.md rename to docs/en/userguide/NVIDIA-device/examples/use-exclusive-card.md diff --git a/docs/userguide/NVIDIA-device/specify-device-core-usage.md b/docs/en/userguide/NVIDIA-device/specify-device-core-usage.md similarity index 100% rename from docs/userguide/NVIDIA-device/specify-device-core-usage.md rename to docs/en/userguide/NVIDIA-device/specify-device-core-usage.md diff --git a/docs/userguide/NVIDIA-device/specify-device-memory-usage.md b/docs/en/userguide/NVIDIA-device/specify-device-memory-usage.md similarity index 100% rename from docs/userguide/NVIDIA-device/specify-device-memory-usage.md rename to docs/en/userguide/NVIDIA-device/specify-device-memory-usage.md diff --git a/docs/userguide/NVIDIA-device/specify-device-type-to-use.md b/docs/en/userguide/NVIDIA-device/specify-device-type-to-use.md similarity index 100% rename from docs/userguide/NVIDIA-device/specify-device-type-to-use.md rename to docs/en/userguide/NVIDIA-device/specify-device-type-to-use.md diff --git a/docs/userguide/NVIDIA-device/specify-device-uuid-to-use.md b/docs/en/userguide/NVIDIA-device/specify-device-uuid-to-use.md similarity index 100% rename from docs/userguide/NVIDIA-device/specify-device-uuid-to-use.md rename to docs/en/userguide/NVIDIA-device/specify-device-uuid-to-use.md diff --git a/docs/userguide/configure.md b/docs/en/userguide/configure.md similarity index 100% rename from docs/userguide/configure.md rename to docs/en/userguide/configure.md diff --git a/docs/userguide/monitoring/device-allocation.md b/docs/en/userguide/monitoring/device-allocation.md similarity index 100% rename from docs/userguide/monitoring/device-allocation.md rename to docs/en/userguide/monitoring/device-allocation.md diff --git a/docs/userguide/monitoring/globalview.md b/docs/en/userguide/monitoring/globalview.md similarity index 100% rename from docs/userguide/monitoring/globalview.md rename to docs/en/userguide/monitoring/globalview.md diff --git a/docs/userguide/monitoring/real-time-device-usage.md b/docs/en/userguide/monitoring/real-time-device-usage.md similarity index 100% rename from docs/userguide/monitoring/real-time-device-usage.md rename to docs/en/userguide/monitoring/real-time-device-usage.md diff --git a/docs/userguide/monitoring/real-time-usage.md b/docs/en/userguide/monitoring/real-time-usage.md similarity index 100% rename from docs/userguide/monitoring/real-time-usage.md rename to docs/en/userguide/monitoring/real-time-usage.md diff --git a/docs/userguide/volcano-vgpu/NVIDIA-GPU/examples/default_use.md b/docs/en/userguide/volcano-vgpu/NVIDIA-GPU/examples/default_use.md similarity index 100% rename from docs/userguide/volcano-vgpu/NVIDIA-GPU/examples/default_use.md rename to docs/en/userguide/volcano-vgpu/NVIDIA-GPU/examples/default_use.md diff --git a/docs/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md b/docs/en/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md similarity index 100% rename from docs/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md rename to docs/en/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md diff --git a/docs/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md b/docs/en/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md similarity index 100% rename from docs/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md rename to docs/en/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md diff --git a/docs/userguide/volcano-vgpu/NVIDIA-GPU/monitor.md b/docs/en/userguide/volcano-vgpu/NVIDIA-GPU/monitor.md similarity index 100% rename from docs/userguide/volcano-vgpu/NVIDIA-GPU/monitor.md rename to docs/en/userguide/volcano-vgpu/NVIDIA-GPU/monitor.md diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..dda7077 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,37 @@ +# HAMi +Open, Device Virtualization, VGPU, Heterogeneous AI Computing + +### What is HAMi + +HAMi (Heterogeneous AI Computing Virtualization Middleware) formerly known as k8s-vGPU-scheduler, is an 'all-in-one' chart designed to manage Heterogeneous AI Computing Devices in a k8s cluster. It can provide the ability to share Heterogeneous AI devices and provide resource isolation among tasks. + +HAMi is committed to improving the utilization rate of heterogeneous computing devices in Kubernetes clusters and providing a unified multiplexing interface for different types of heterogeneous devices. + +### Why HAMi + +#### Kubernetes Native API Compatible + +Zero change upgrade: compatible with default behaviour from Kubernetes. + +#### Open and Neutral + +Jointly initiated by Internet, finance, manufacturing, cloud providers, etc. Target for open governance with CNCF + +#### Avoid Vendor Lock-in + +Integration with mainstream cloud providers; Not tied to proprietary vendor orchestration + +#### Resource Isolation + +Provides hard isolation of resources within containers, task in containers can't use resources that exceed their quota + +#### Supports a variety of heterogeneous computing devices + +Provides device-sharing on GPU, MLU, NPU from a variety of manufacturers + +#### Unified Management + +Unified monitoring system, Configurable scheduling policies(binpack,spread,etc...) + +### How It Works +tbd... \ No newline at end of file diff --git a/docs/logo.svg b/docs/logo.svg new file mode 100644 index 0000000..83b8b26 --- /dev/null +++ b/docs/logo.svg @@ -0,0 +1,7030 @@ + + + diff --git a/docs/zh/FAQ/FAQ.md b/docs/zh/FAQ/FAQ.md new file mode 100644 index 0000000..6beaf87 --- /dev/null +++ b/docs/zh/FAQ/FAQ.md @@ -0,0 +1,194 @@ +--- +title: 常见问题 +--- + + +## 支持的设备厂商及具体型号 + +| **GPU 厂商** | **GPU 型号** | **粒度** | **多 GPU 支持** | +| --- | --- | --- | --- | +| NVIDIA | 几乎所有主流消费级和数据中心 GPU | 核心 1%,内存 1M | 支持。多 GPU 仍可通过虚拟化进行拆分和共享。 | +| 昇腾 | 910A、910B2、910B3、310P | 最小粒度取决于卡类型模板。参考[官方模板](https://www.hiascend.com/document/detail/zh/mindx-dl/50rc1/AVI/cpaug/cpaug_0005.html)。 | 支持,但当 `npu > 1` 时不支持拆分,整卡独占。 | +| 海光 | Z100、Z100L、K100-AI | 核心 1%,内存 1M | 支持,但当 `dcu > 1` 时不支持拆分,整卡独占。 | +| 寒武纪 | 370、590 | 核心 1%,内存 256M | 支持,但当 `mlu > 1` 时不支持拆分,整卡独占。 | +| 天数智芯 | 全部 | 核心 1%,内存 256M | 支持,但当 `gpu > 1` 时不支持拆分,整卡独占。 | +| 摩尔线程 | MTT S4000 | 核心为 1 个核心组,内存 512M | 支持,但当 `gpu > 1` 时不支持拆分,整卡独占。 | +| 魅特思 | MXC500 | 不支持拆分,只能整卡分配。 | 支持,但所有分配均为整卡。 | + +## 什么是 vGPU?为什么看到 10 个 vGPU 却无法在同一张卡上分配两个 vGPU? + +**简要说明** + +vGPU 通过逻辑划分方式提升 GPU 利用率,使多个任务共享同一块物理 GPU。设置 `deviceSplitCount: 10` 表示该 GPU 最多可同时服务 10 个任务,但并不允许一个任务使用该 GPU 上的多个 vGPU。 + +--- + +### vGPU 的概念 + +vGPU 是通过虚拟化在物理 GPU 上创建的逻辑实例,使多个任务可共享同一个物理 GPU。例如配置为 `deviceSplitCount: 10`,表示该物理 GPU 最多可被分配给 10 个任务。这种分配并不会增加物理资源,仅改变逻辑可见性。 + +**为什么无法在同一张卡上分配两个 vGPU?** + +1. **vGPU 的含义** + vGPU 是物理 GPU 的不同任务视图,并非物理资源的划分。当任务请求 `nvidia.com/gpu: 2`,它被理解为需要两张物理 GPU,而非同一张卡上的两个 vGPU。 + +2. **资源分配机制** + vGPU 的设计初衷是让多个任务共享一张 GPU,而不是让单个任务绑定多个 vGPU。`deviceSplitCount: 10` 表示最多有 10 个任务可以并发共享此 GPU,并不支持一个任务使用多个 vGPU。 + +3. **容器与节点视图一致性** + 容器中的 GPU UUID 与节点上的物理 GPU UUID 是一致的,即反映的是同一块 GPU。虽然可见多个 vGPU,但这些是逻辑视图而非独立资源。 + +4. **设计目的** + vGPU 的设计是为了 **让一张 GPU 可供多个任务共享**,而不是 **让一个任务使用多个 vGPU**。vGPU 超售的目标是提升资源利用率,而非扩展单个任务的计算能力。 + +## HAMi 的 `nvidia.com/priority` 字段仅支持两级,如何在资源紧张时实现多级用户自定义优先级的排队调度? + +**简要说明** + +HAMi 的两级优先级用于同一张卡内任务的运行时抢占。若需支持多级用户自定义的任务调度优先级,可将 HAMi 与 **Volcano** 集成,利用其队列调度功能实现多级任务分配与抢占。 + +--- + +HAMi 原生的 `nvidia.com/priority` 字段(0 为高优先级,1 为低/默认)是为 **单卡内运行时抢占场景** 设计的。例如一个低优先级训练任务正在运行,若此时有高优先级的推理任务到来,高优先级任务会暂停低优任务,占用资源,完成后低优任务再恢复。此机制仅适用于单设备上的资源抢占,并非用于调度系统中多个任务队列的优先级排序。 + +若需在资源不足、多个任务排队等待的场景中,按照用户提交的多级优先级进行调度,HAMi 本身不具备此能力。 + +但你仍然可以通过与调度器 **Volcano** 集成来实现: + +1. **Volcano 实现多级调度优先级**: + - Volcano 支持定义多个具有不同优先级的队列; + - 可根据队列优先级决定任务的资源分配顺序,并可对任务间进行抢占,支持 HAMi 管理的 vGPU 资源。 + +2. **HAMi 管理 GPU 共享与运行时优先级**: + - HAMi 可通过其 [volcano-vgpu-device-plugin](https://github.com/Project-HAMi/volcano-vgpu-device-plugin) 与 Volcano 集成; + - Volcano 负责任务队列排序,HAMi 则负责实际运行时的 GPU 共享与抢占逻辑。 + +**总结**:HAMi 的优先级机制用于卡内任务的运行时抢占;若要实现多级任务调度优先级,应结合 **Volcano 与 HAMi** 使用。 + +## 与其他开源工具的集成情况 + +**已支持**: + +- **Volcano**:通过 [`volcano-vgpu-device-plugin`](https://github.com/Project-HAMi/volcano-vgpu-device-plugin) 与 Volcano 集成,实现 GPU 资源调度与管理。 +- **Koordinator**:支持与 Koordinator 集成,实现端到端的 GPU 共享。通过在节点部署 HAMi-core 并在 Pod 中配置 label 和资源请求,Koordinator 能够利用 HAMi 的 GPU 隔离能力。 + + 配置说明参见:[Device Scheduling - GPU Share With HAMi](https://koordinator.sh/docs/user-manuals/device-scheduling-gpu-share-with-hami/) + +**暂不支持**: + +- **KubeVirt 与 Kata Containers**:由于它们依赖虚拟化进行资源隔离,而 HAMi 的 GPU 插件依赖直接挂载 GPU,无法兼容。若要支持需重构设备分配逻辑,但会增加性能开销,HAMi 当前优先支持高性能直挂场景。 + +## 为什么我的 Pod 输出中有 `[HAMI-core Warn(...)]` 日志?可以关闭吗? + +这是正常日志,可忽略。如需关闭,可在容器中设置环境变量 `LIBCUDA_LOG_LEVEL=0`。 + +## HAMi 支持多节点、多 GPU 分布式训练吗?支持跨节点和跨 GPU 吗? + +**简要说明** + +HAMi 支持多节点多 GPU 分布式训练,单个 Pod 可使用同节点多个 GPU,跨节点则通过多个 Pod 配合分布式框架实现。 + +--- + +### 多节点多 GPU 分布式训练 + +在 Kubernetes 中,HAMi 支持通过在不同节点运行多个 Pod,结合分布式框架(如 PyTorch、TensorFlow、Horovod),实现多节点多 GPU 协同训练。每个 Pod 使用本地 GPU,通过 NCCL、RDMA 等高性能网络通信。 + +### 跨节点与跨 GPU 场景 + +1. **跨节点**:多个 Pod 分布在不同节点上,节点间通过网络同步梯度和参数; +2. **跨 GPU**:单个 Pod 可使用所在节点内的多个 GPU。 + +**注意**:一个 Pod 无法跨节点。需采用多 Pod 分布式训练,由分布式框架协调。 + +## HAMi 插件、Volcano 插件、NVIDIA 官方插件三者的关系与兼容性 + +**简要说明** + +同一节点只能启用一个 GPU 插件,避免资源冲突。 + +--- + +### 插件关系说明 + +三种插件都用于 GPU 资源管理,但适用场景及资源汇报方式不同: + +- **HAMi 插件** + - 使用扩展资源名 `nvidia.com/gpu`; + - 支持 HAMi 的 GPU 管理能力(如 vGPU 拆分、自定义调度); + - 适用于复杂资源管理场景。 + +- **Volcano 插件** + - 使用扩展资源名 `volcano.sh/vgpu-number`; + - 为 Volcano 提供 vGPU 虚拟化资源; + - 适合分布式任务、细粒度调度场景。 + +- **NVIDIA 官方插件** + - 使用扩展资源名 `nvidia.com/gpu`; + - 提供基本 GPU 分配功能; + - 适合直接使用物理 GPU 的稳定场景。 + +### 是否可共存 + +- **HAMi 与 NVIDIA 插件**:不建议共存,会产生资源冲突; +- **HAMi 与 Volcano 插件**:理论上可共存,但推荐只启用一个; +- **NVIDIA 与 Volcano 插件**:理论上可共存,但不建议混合使用。 + +## 为什么 Node Capacity 中只有 `nvidia.com/gpu` 而没有 `nvidia.com/gpucores` 或 `nvidia.com/gpumem`? + +**简要说明** + +Kubernetes 的 Device Plugin 每次只能上报一种资源类型。HAMi 将核心数和内存信息以 Node 注解方式记录供调度器使用。 + +--- + +### Device Plugin 的设计限制 + +- Device Plugin 接口(如 Registration、ListAndWatch)仅允许每个插件实例上报一个资源; +- 这简化了资源管理,但限制了同时上报多个指标(如核心和内存)。 + +### HAMi 的实现 + +- HAMi 将 GPU 详细信息(如算力、内存、型号)存储为 **节点注解**,供调度器解析; +- 示例: + ```yaml + hami.io/node-nvidia-register: GPU-fc28df76-54d2-c387-e52e-5f0a9495968c,10,49140,100,NVIDIA-NVIDIA L40S,0,true:GPU-b97db201-0442-8531-56d4-367e0c7d6edd,10,49140,100,... + +### 后续问题说明 + +**为什么使用 `volcano-vgpu-device-plugin` 时 Node Capacity 中会出现 `volcano.sh/vgpu-number` 和 `volcano.sh/vgpu-memory`?** + +- `volcano-vgpu-device-plugin` 是通过 Kubernetes API **直接补丁方式**将 `volcano.sh/vgpu-number` 和 `volcano.sh/vgpu-memory` 写入 Node 的 `capacity` 和 `allocatable` 字段中,而不是通过标准的 Device Plugin 接口进行注册。 +- **注意**:通过这种方式注册的资源并不受 kubelet 的标准机制管理,**kubelet 无法自动更新或释放这些资源**。 + +--- + +## 为什么某些国产厂商不需要单独安装运行时? + +某些国产厂商(例如:**海光**、**寒武纪**)的 Device Plugin 插件已内置了设备发现与挂载的能力,因此不再需要额外的运行时组件。 +相比之下,**NVIDIA** 和 **昇腾** 等厂商的插件则依赖运行时来完成以下功能: + +- 环境变量和软件依赖配置; +- 设备节点挂载; +- 高级功能(如拓扑感知、NUMA、性能隔离等)支持。 + +--- + +**简要总结** + +当官方插件无法满足高级功能(如缺少必要信息)或引入配置复杂性时,**HAMi 会选择自研 Device Plugin 插件**,以确保调度器获取完整资源信息。 + +--- + +HAMi 的调度器需要从节点获取足够的 GPU 信息来完成资源调度和设备分配。主要通过以下三种方式: + +1. **Patch 节点注解(Annotations)**; +2. **通过标准 Device Plugin 接口上报资源给 kubelet**; +3. **直接修改节点的 `status.capacity` 与 `status.allocatable` 字段**。 + +--- + +**为什么 HAMi 要自研插件?举例如下:** + +- **昇腾插件问题**:官方插件需为每种卡类型部署不同插件,HAMi 将其抽象为统一模板,简化集成; +- **NVIDIA 插件问题**:无法支持如 GPU 核心/内存比例限制、GPU 资源超售、NUMA 感知等高级功能,HAMi 需定制插件实现这些调度优化功能。 \ No newline at end of file diff --git a/docs/zh/blog/2024-12-18-support-blog-post/index.md b/docs/zh/blog/2024-12-18-support-blog-post/index.md new file mode 100644 index 0000000..453278d --- /dev/null +++ b/docs/zh/blog/2024-12-18-support-blog-post/index.md @@ -0,0 +1,54 @@ +--- +title: 介绍 HAMi +--- + +## 什么是 HAMi? + +HAMi(异构 AI 计算虚拟化中间件),之前称为 k8s-vGPU-scheduler,是一种创新解决方案, +旨在管理 Kubernetes 集群内的异构 AI 计算设备。这个一站式中间件能够实现各种 AI 设备的共享, +同时确保不同任务之间的资源隔离。通过提高异构计算设备的利用率, +HAMi 提供了一个统一的复用接口,以满足不同设备类型的需求。 + + + +## 为什么选择 HAMi? + +### Kubernetes 本机 API 兼容性 + +HAMi 的突出特点之一是其与 Kubernetes 原生 API 的兼容性。这意味着用户可以在 +不修改现有配置的情况下升级到 HAMi,从而实现无缝过渡,同时保持 Kubernetes 的默认行为。 + +### 开放和中立 + +HAMi 是一个涉及来自各个领域利益相关者的协作倡议,包括互联网服务、金融、制造业和云服务提供商。 +目标是建立云原生计算基金会(CNCF)下的开放治理,确保 HAMi 对所有用户保持中立和可访问。 + +### 避免供应商锁定 + +使用 HAMi,用户可以与主流云服务提供商集成,而无需绑定到专有供应商的编排。 +这种灵活性允许组织选择他们偏好的云解决方案,同时利用 HAMi 的功能。 + +### 资源隔离 + +HAMi 在容器内提供强大的资源隔离。每个在容器中运行的任务都被限制在其分配的资源范围内, +防止任何任务超出其配额。这种严格的隔离增强了计算环境中的安全性和稳定性。 + +### 支持多种异构计算设备 + +HAMi 在支持各种异构计算设备方面表现出色。无论是来自不同制造商的 GPU、MLU 还是 NPU, +HAMi 都促进了设备共享,并在不同的硬件平台上最大化资源效率。 + +### 统一管理 + +为了简化运营,HAMi 提供了一套统一的监控系统,以及如箱装和扩散的可配置调度策略。 +这种全面的管理方法简化了对资源的监管,并提升了整体系统性能。 + +## 结语 + +总之,HAMi 代表了在 Kubernetes 环境中管理异构 AI 计算资源的重大进步。它与现有系统的兼容性、 +对开放治理的承诺以及强大的资源管理能力,使其成为寻求优化其 AI 计算基础设施的组织不可或缺的工具。 + +加入我们,一起踏上使用 HAMi 实现更高效和灵活的 AI 计算的旅程吧! + +引用: +[1] https://project-hami.io diff --git a/docs/zh/blog/2024-12-31-post/index.md b/docs/zh/blog/2024-12-31-post/index.md new file mode 100644 index 0000000..a358607 --- /dev/null +++ b/docs/zh/blog/2024-12-31-post/index.md @@ -0,0 +1,1799 @@ +--- +layout: post +title: HAMI 项目 GPU Pod 调度流程源码走读 +catalog: true +tag: [Kubernetes, GPU, AI] +author: elrond.wang +--- + +- [调度流程](#调度流程) +- [Pod 调度流程](#pod-调度流程) + - [常见问题排查](#常见问题排查) + - [Pod UnexpectedAdmissionError](#pod-unexpectedadmissionerror) + - [调度问题](#调度问题) + - [MutatingWebhook](#mutatingwebhook) + - [Webhook 配置](#webhook-配置) + - [Webhook Server 实现](#webhook-server-实现) + - [拓展 k8s scheduler](#拓展-k8s-scheduler) + - [KubeSchedulerConfiguration](#kubeschedulerconfiguration) + - [拓展调度器 HTTP Server 启动](#拓展调度器-http-server-启动) + - [filter 实现](#filter-实现) + - [获取节点资源信息](#获取节点资源信息) + - [Node 缓存](#node-缓存) + - [device](#device) + - [根据节点资源信息打分](#根据节点资源信息打分) + - [计算出节点的分数](#计算出节点的分数) + - [计算每个容器对应的设备的分数](#计算每个容器对应的设备的分数) + - [binding 实现](#binding-实现) + - [Node 将设备情况写入 node annotation](#node-将设备情况写入-node-annotation) + - [启动 device-plugin 服务](#启动-device-plugin-服务) + - [启动 plugin](#启动-plugin) + - [nvidia 插件的实现](#nvidia-插件的实现) +- [参考](#参考) + +使用 HAMi 的过程中经常会出现 Pod 被创建出来 Pending 的问题,犹以如下两个问题为著: + +- Pod UnexpectedAdmissionError +- Pod Pending + +介于此,展开这部分代码的粗略走读,旨在说明调度过程中各组件的交互,以及资源的计算方式,其他细节会有所遗漏。 + +## 调度流程 + +看代码之前可以先看下官方文档说明,大体上比较明确: + + + +细节上可以分为三个阶段: + +- 准备阶段: 图上可以看出有一些依赖条件,例如要有 Mutating Webhook、device-plugin 等等。 + 所以这个阶段主要分析下依赖条件的准备,只有在服务首次启动时需要。 + +  + +- Pod 调度阶段: 准备过程完成之后 Pod 进入处理流程,完成调度 +- Pod 启动阶段: Pod 如何与 Node 上的 GPU 进行交互等 + +本文会着重分析准备阶段,主要内容为调度分析。 + +## Pod 调度流程 + +- 用户发送创建 Pod 请求到 kube-apiserver +- 触发 Adminssion Webhook,更新 Pod 中 schedulerName +- kube-apiserver 根据 schedulerName 将请求发送给调度器处理 +- 调度器处理 + - 收集 Node device 信息 -- 通过 node annotation 收集,数据来自 daemonSet `hami-device-plugin` 定时写入 + - 根据设备信息以及 Pod 的 limit 信息进行打分,选出最高分的 node + - 将 Pod 和 node 进行绑定完成绑定,进行 Pod 创建 + +### 常见问题排查 + +#### Pod UnexpectedAdmissionError + +Pod 创建状态显示 `UnexpectedAdmissionError` + +了解流程之后,可以知道这个错误代表 kube-apiserver 调用拓展调度器失败,可能有两个原因,其他情况具体排查需要看 kube-apiserver 日志。 + +- 通信异常: 从 kube-apiserver 到拓展调度器的 https 端口不通,有几种可能 + - dns 无法解析 + - 跨节点通信有问题 + - 拓展调度器的服务异常 +- TLS 验证错误: 一般会显示 `webhook x509: certificate signed by unknown authority`,helmchart 部署时有一个 `jobs.batch` `hami-vgpu.admission-pathch`,如果没有运行完成会出现这样的问题 + +#### 调度问题 + +容器一直在 pending 状态,使用 `kubectl describe` 命令可以看到具体原因,主要有以下几个: + +- `card Insufficient remaining memory` +- `calcScore:node not fit pod` + + 主要原因一般是确实资源不足,或者配置错误,配置错误是指 devicememoryscaling 配置未符合预期。 + 有两个地方可以配置,优先级为节点配置大于全局配置,容易发生问题的地方在于 name 需要和 kubectl get node 显示的 nodename 一致才能生效。 + +- 全局配置 `kubectl get cm hami-scheduler-device` + + ```yaml + deviceMemoryScaling: 3 + ``` + +- 节点配置 `kubectl get cm hami-device-plugin` + + ```json + { + "nodeconfig": [ + { + "name": "node1", + "devicememoryscaling": 3, + "devicesplitcount": 10, + "migstrategy": "none", + "filterdevices": { + "uuid": [], + "index": [] + } + } + ] + } + ``` + +### MutatingWebhook + +K8s 提供了 adminssionWebhook 资源, 以 k8s 资源操作为触发器,触发 hook,用途最广泛的为针对 +Pod 创建做拦截,对 Pod 做 YAML 注入,具体的例如增加 init 容器注入文件等等。 + +#### Webhook 配置 + +hami-webhook: + +```bash +kubectl get mutatingwebhookconfigurations.admissionregistration.k8s.io hami-webhook -o yaml +``` + +```yaml +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + annotations: + meta.helm.sh/release-name: hami + meta.helm.sh/release-namespace: kube-system + creationTimestamp: "2024-12-10T03:50:37Z" + generation: 5 + labels: + app.kubernetes.io/managed-by: Helm + name: hami-webhook + resourceVersion: "2307810" + uid: 2cdcebe4-f561-429f-9480-701e65980687 +webhooks: +- admissionReviewVersions: + - v1beta1 + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVJ5Z0F3SUJBZ0lSQUxjd2FQMjUrMlphdGhTTlFMcG1qT0V3Q2dZSUtvWkl6ajBFQXdJd0R6RU4KTUFzR0ExVUVDaE1FYm1sc01UQWdGdzB5TkRFeU1EWXdOekV4TVRWYUdBOHlNVEkwTVRFeE1qQTNNVEV4TlZvdwpEekVOTUFzR0ExVUVDaE1FYm1sc01UQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJDUnlXUDdYCkRmT2N4NEVTMVRYaUs0dnFFU2wrcUFHYjI2YzNrOEdMWlZTL1lHaFpLZVVxaEgydVRhTFdWTW1hZVJFbkxqM0cKSStMVFRVTTR6SVhEUld5alZ6QlZNQTRHQTFVZER3RUIvd1FFQXdJQ0JEQVRCZ05WSFNVRUREQUtCZ2dyQmdFRgpCUWNEQVRBUEJnTlZIUk1CQWY4RUJUQURBUUgvTUIwR0ExVWREZ1FXQkJTcVV4bWpGa29YUlpRK0xXVzBNM1pJCnMzck1wakFLQmdncWhrak9QUVFEQWdOSUFEQkZBaUJSY2VRL2tJVkR2VTV3Vjl0K3NRWm93TmFhTWhIMTV5K2sKT3VrR0FlRGVtQUloQUxDZzFrM0JQZUJBNG8reWY5emxvVjM2VEk2RHUzaGdMT1B3MXhaZkFvcDMKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + service: + name: hami-scheduler + namespace: kube-system + path: /webhook + port: 443 + failurePolicy: Ignore + matchPolicy: Equivalent + name: vgpu.hami.io + namespaceSelector: + matchExpressions: + - key: hami.io/webhook + operator: NotIn + values: + - ignore + objectSelector: + matchExpressions: + - key: hami.io/webhook + operator: NotIn + values: + - ignore + reinvocationPolicy: Never + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + scope: '*' + sideEffects: None + timeoutSeconds: 10 +``` + +当 Pod 创建时,调用 `https://hami-scheduler.kube-system:443/webhook` 做 TLS 校验,CA 为 `caBundle` 配置。 +当命名空间有 `hami.io/webhook: ignore` 的标签时不触发。 + +#### Webhook Server 实现 + +需要实现一个 TLS 的 HTTP Server,且提供 `/webhook` 接口。 + +cmd/scheduler/main.go:84 + +```golang +func start() { + ... + router.POST("/webhook", routes.WebHookRoute()) +``` + +`WebHookRoute` 需要实现 `sigs.k8s.io/controller-runtime@v0.16.3/pkg/webhook/admission/webhook.go:98` + +pkg/scheduler/webhook.go:52 + +```golang + pod := &corev1.Pod{} + err := h.decoder.Decode(req, pod) + if err != nil { + klog.Errorf("Failed to decode request: %v", err) + return admission.Errored(http.StatusBadRequest, err) + } + if len(pod.Spec.Containers) == 0 { + klog.Warningf(template+" - Denying admission as pod has no containers", req.Namespace, req.Name, req.UID) + return admission.Denied("pod has no containers") + } + klog.Infof(template, req.Namespace, req.Name, req.UID) + hasResource := false + for idx, ctr := range pod.Spec.Containers { + c := &pod.Spec.Containers[idx] + if ctr.SecurityContext != nil { + if ctr.SecurityContext.Privileged != nil && *ctr.SecurityContext.Privileged { + klog.Warningf(template+" - Denying admission as container %s is privileged", req.Namespace, req.Name, req.UID, c.Name) + continue + } + } + for _, val := range device.GetDevices() { + found, err := val.MutateAdmission(c, pod) + if err != nil { + klog.Errorf("validating pod failed:%s", err.Error()) + return admission.Errored(http.StatusInternalServerError, err) + } + hasResource = hasResource || found + } + } + + if !hasResource { + klog.Infof(template+" - Allowing admission for pod: no resource found", req.Namespace, req.Name, req.UID) + //return admission.Allowed("no resource found") + } else if len(config.SchedulerName) > 0 { + pod.Spec.SchedulerName = config.SchedulerName + if pod.Spec.NodeName != "" { + klog.Infof(template+" - Pod already has node assigned", req.Namespace, req.Name, req.UID) + return admission.Denied("pod has node assigned") + } + } + marshaledPod, err := json.Marshal(pod) + if err != nil { + klog.Errorf(template+" - Failed to marshal pod, error: %v", req.Namespace, req.Name, req.UID, err) + return admission.Errored(http.StatusInternalServerError, err) + } + return admission.PatchResponseFromRaw(req.Object.Raw, marshaledPod) +``` + +主要通过 Pod 中容器的 resource 来判断是否要不要走拓展调度器。 + +pkg/device/nvidia/device.go:246 + +```golang +func (dev *NvidiaGPUDevices) MutateAdmission(ctr *corev1.Container, p *corev1.Pod) (bool, error) { + /*gpu related */ + priority, ok := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourcePriority)] + if ok { + ctr.Env = append(ctr.Env, corev1.EnvVar{ + Name: util.TaskPriority, + Value: fmt.Sprint(priority.Value()), + }) + } + + _, resourceNameOK := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceCountName)] + if resourceNameOK { + return resourceNameOK, nil + } + + _, resourceCoresOK := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceCoreName)] + _, resourceMemOK := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceMemoryName)] + _, resourceMemPercentageOK := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceMemoryPercentageName)] + + if resourceCoresOK || resourceMemOK || resourceMemPercentageOK { + if dev.config.DefaultGPUNum > 0 { + ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceCountName)] = *resource.NewQuantity(int64(dev.config.DefaultGPUNum), resource.BinarySI) + resourceNameOK = true + } + } + + if !resourceNameOK && dev.config.OverwriteEnv { + ctr.Env = append(ctr.Env, corev1.EnvVar{ + Name: "NVIDIA_VISIBLE_DEVICES", + Value: "none", + }) + } + return resourceNameOK, nil +} +``` + +主要比对 Pod 的 Resources Limit 中有没有包含 `device-config.yaml` 的配置,如果有走 hami 调度流程 + +`deivce-config` 以英伟达显卡为例: + +```yaml +nvidia: + resourceCountName: nvidia.com/gpu + resourceMemoryName: nvidia.com/gpumem + resourceMemoryPercentageName: nvidia.com/gpumem-percentage + resourceCoreName: nvidia.com/gpucores + resourcePriorityName: nvidia.com/priority + overwriteEnv: false + defaultMemory: 0 + defaultCores: 0 + defaultGPUNum: 1 + deviceSplitCount: 10 + deviceMemoryScaling: 3 + deviceCoreScaling: 3 +``` + +确定走 HAMi 调度流程之后,通过 Patch 修改 Pod `schedulerName` 为 HAMi 调度器的名称。 + +### 拓展 k8s scheduler + +[KubeSchedulerConfiguration](https://kubernetes.io/docs/reference/config-api/kube-scheduler-config.v1/) 拓展调度器可以通过实现拓展点进行调度器的拓展 + +#### KubeSchedulerConfiguration + +```yaml +kubectl get cm hami-scheduler-newversion -o yaml +``` + +```yaml +apiVersion: v1 +data: + config.yaml: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + leaderElection: + leaderElect: false + profiles: + - schedulerName: hami-scheduler + extenders: + - urlPrefix: "https://127.0.0.1:443" + filterVerb: filter + bindVerb: bind + nodeCacheCapable: true + weight: 1 + httpTimeout: 30s + enableHTTPS: true + tlsConfig: + insecure: true + managedResources: + - name: nvidia.com/gpu + ignoredByScheduler: true + - name: nvidia.com/gpumem + ignoredByScheduler: true + - name: nvidia.com/gpucores + ignoredByScheduler: true + - name: nvidia.com/gpumem-percentage + ignoredByScheduler: true + - name: nvidia.com/priority + ignoredByScheduler: true + - name: cambricon.com/vmlu + ignoredByScheduler: true + - name: hygon.com/dcunum + ignoredByScheduler: true + - name: hygon.com/dcumem + ignoredByScheduler: true + - name: hygon.com/dcucores + ignoredByScheduler: true + - name: iluvatar.ai/vgpu + ignoredByScheduler: true +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: hami + meta.helm.sh/release-namespace: kube-system + creationTimestamp: "2024-12-10T03:50:36Z" + labels: + + app.kubernetes.io/component: hami-scheduler + app.kubernetes.io/instance: hami + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: hami + app.kubernetes.io/version: 2.4.1 + helm.sh/chart: hami-2.4.1 + name: hami-scheduler-newversion + namespace: kube-system + resourceVersion: "2316275" + uid: 3a61a72c-0bab-432f-b4d7-5c1ae46ee14d +``` + +拓展调度器通过[拓展点](https://kubernetes.io/docs/reference/scheduling/config/#extension-points)进行拓展, 这里拓展了 filter 和 bind。 + +- filter: 找到最合适的 node +- bind: 为 Pod 创建一个 binding 资源 + +调度时会根据拓展点顺序来调用拓展调度器的实现,这里会先调用 +`https://127.0.0.1:443/filter`,再调用 `https://127.0.0.1:443/filter` + +#### 拓展调度器 HTTP Server 启动 + +`cmd/scheduler/main.go:70` + +```golang +func start() { + device.InitDevices() + sher = scheduler.NewScheduler() + sher.Start() + defer sher.Stop() + + // start monitor metrics + go sher.RegisterFromNodeAnnotations() + go initMetrics(config.MetricsBindAddress) + + // start http server + router := httprouter.New() + router.POST("/filter", routes.PredicateRoute(sher)) + router.POST("/bind", routes.Bind(sher)) +``` + +#### filter 实现 + +`pkg/scheduler/routes/route.go:41` + +```golang +func PredicateRoute(s *scheduler.Scheduler) httprouter.Handle { + klog.Infoln("Into Predicate Route outer func") + return func(w http.ResponseWriter, r *http.Request, _ httprouter.Params) { + klog.Infoln("Into Predicate Route inner func") + checkBody(w, r) + + var buf bytes.Buffer + body := io.TeeReader(r.Body, &buf) + + var extenderArgs extenderv1.ExtenderArgs + var extenderFilterResult *extenderv1.ExtenderFilterResult + + if err := json.NewDecoder(body).Decode(&extenderArgs); err != nil { + klog.Errorln("decode error", err.Error()) + extenderFilterResult = &extenderv1.ExtenderFilterResult{ + Error: err.Error(), + } + } else { + extenderFilterResult, err = s.Filter(extenderArgs) + if err != nil { + klog.Errorf("pod %v filter error, %v", extenderArgs.Pod.Name, err) + extenderFilterResult = &extenderv1.ExtenderFilterResult{ + Error: err.Error(), + } + } + } + + if resultBody, err := json.Marshal(extenderFilterResult); err != nil { + klog.Errorf("Failed to marshal extenderFilterResult: %+v, %+v", + err, extenderFilterResult) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(err.Error())) + } else { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write(resultBody) + } + } +} +``` + +`pkg/scheduler/scheduler.go:430` + +```golang +func (s *Scheduler) Filter(args extenderv1.ExtenderArgs) (*extenderv1.ExtenderFilterResult, error) { + klog.InfoS("begin schedule filter", "pod", args.Pod.Name, "uuid", args.Pod.UID, "namespaces", args.Pod.Namespace) + nums := k8sutil.Resourcereqs(args.Pod) + total := 0 + for _, n := range nums { + for _, k := range n { + total += int(k.Nums) + } + } + if total == 0 { + klog.V(1).Infof("pod %v not find resource", args.Pod.Name) + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, fmt.Errorf("does not request any resource")) + return &extenderv1.ExtenderFilterResult{ + NodeNames: args.NodeNames, + FailedNodes: nil, + Error: "", + }, nil + } + annos := args.Pod.Annotations + s.delPod(args.Pod) + nodeUsage, failedNodes, err := s.getNodesUsage(args.NodeNames, args.Pod) + if err != nil { + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, err) + return nil, err + } + if len(failedNodes) != 0 { + klog.V(5).InfoS("getNodesUsage failed nodes", "nodes", failedNodes) + } + nodeScores, err := s.calcScore(nodeUsage, nums, annos, args.Pod) + if err != nil { + err := fmt.Errorf("calcScore failed %v for pod %v", err, args.Pod.Name) + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, err) + return nil, err + } + if len((*nodeScores).NodeList) == 0 { + klog.V(4).Infof("All node scores do not meet for pod %v", args.Pod.Name) + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, fmt.Errorf("no available node, all node scores do not meet")) + return &extenderv1.ExtenderFilterResult{ + FailedNodes: failedNodes, + }, nil + } + klog.V(4).Infoln("nodeScores_len=", len((*nodeScores).NodeList)) + sort.Sort(nodeScores) + m := (*nodeScores).NodeList[len((*nodeScores).NodeList)-1] + klog.Infof("schedule %v/%v to %v %v", args.Pod.Namespace, args.Pod.Name, m.NodeID, m.Devices) + annotations := make(map[string]string) + annotations[util.AssignedNodeAnnotations] = m.NodeID + annotations[util.AssignedTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10) + + for _, val := range device.GetDevices() { + val.PatchAnnotations(&annotations, m.Devices) + } + + //InRequestDevices := util.EncodePodDevices(util.InRequestDevices, m.devices) + //supportDevices := util.EncodePodDevices(util.SupportDevices, m.devices) + //maps.Copy(annotations, InRequestDevices) + //maps.Copy(annotations, supportDevices) + s.addPod(args.Pod, m.NodeID, m.Devices) + err = util.PatchPodAnnotations(args.Pod, annotations) + if err != nil { + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, err) + s.delPod(args.Pod) + return nil, err + } + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringSucceed, []string{m.NodeID}, nil) + res := extenderv1.ExtenderFilterResult{NodeNames: &[]string{m.NodeID}} + return &res, nil +} +``` + +这里核心逻辑主要有两步, 获取节点资源、根据节点已分配资源与总资源计算分数并选出一个最高分。 + +##### 获取节点资源信息 + +`pkg/scheduler/scheduler.go:241` + +```golang +func (s *Scheduler) getNodesUsage(nodes *[]string, task *corev1.Pod) (*map[string]*NodeUsage, map[string]string, error) { + overallnodeMap := make(map[string]*NodeUsage) + cachenodeMap := make(map[string]*NodeUsage) + failedNodes := make(map[string]string) + //for _, nodeID := range *nodes { + allNodes, err := s.ListNodes() + if err != nil { + return &overallnodeMap, failedNodes, err + } + + for _, node := range allNodes { + nodeInfo := &NodeUsage{} + userGPUPolicy := config.GPUSchedulerPolicy + if task != nil && task.Annotations != nil { + if value, ok := task.Annotations[policy.GPUSchedulerPolicyAnnotationKey]; ok { + userGPUPolicy = value + } + } + nodeInfo.Node = node.Node + nodeInfo.Devices = policy.DeviceUsageList{ + Policy: userGPUPolicy, + DeviceLists: make([]*policy.DeviceListsScore, 0), + } + for _, d := range node.Devices { + nodeInfo.Devices.DeviceLists = append(nodeInfo.Devices.DeviceLists, &policy.DeviceListsScore{ + Score: 0, + Device: &util.DeviceUsage{ + ID: d.ID, + Index: d.Index, + Used: 0, + Count: d.Count, + Usedmem: 0, + Totalmem: d.Devmem, + Totalcore: d.Devcore, + Usedcores: 0, + MigUsage: util.MigInUse{ + Index: 0, + UsageList: make(util.MIGS, 0), + }, + MigTemplate: d.MIGTemplate, + Mode: d.Mode, + Type: d.Type, + Numa: d.Numa, + Health: d.Health, + }, + }) + } + overallnodeMap[node.ID] = nodeInfo + } + + podsInfo := s.ListPodsInfo() + for _, p := range podsInfo { + node, ok := overallnodeMap[p.NodeID] + if !ok { + continue + } + for _, podsingleds := range p.Devices { + for _, ctrdevs := range podsingleds { + for _, udevice := range ctrdevs { + for _, d := range node.Devices.DeviceLists { + deviceID := udevice.UUID + if strings.Contains(deviceID, "[") { + deviceID = strings.Split(deviceID, "[")[0] + } + if d.Device.ID == deviceID { + d.Device.Used++ + d.Device.Usedmem += udevice.Usedmem + d.Device.Usedcores += udevice.Usedcores + if strings.Contains(udevice.UUID, "[") { + tmpIdx, Instance := util.ExtractMigTemplatesFromUUID(udevice.UUID) + if len(d.Device.MigUsage.UsageList) == 0 { + util.PlatternMIG(&d.Device.MigUsage, d.Device.MigTemplate, tmpIdx) + } + d.Device.MigUsage.UsageList[Instance].InUse = true + klog.V(3).Infoln("add mig usage", d.Device.MigUsage, "template=", d.Device.MigTemplate, "uuid=", d.Device.ID) + } + } + } + } + } + } + klog.V(5).Infof("usage: pod %v assigned %v %v", p.Name, p.NodeID, p.Devices) + } + s.overviewstatus = overallnodeMap + for _, nodeID := range *nodes { + node, err := s.GetNode(nodeID) + if err != nil { + // The identified node does not have a gpu device, so the log here has no practical meaning,increase log priority. + klog.V(5).InfoS("node unregistered", "node", nodeID, "error", err) + failedNodes[nodeID] = "node unregistered" + continue + } + cachenodeMap[node.ID] = overallnodeMap[node.ID] + } + s.cachedstatus = cachenodeMap + return &cachenodeMap, failedNodes, nil +} +``` + +获取 Node 总的资源与已分配的资源, 首先获取 Node 信息。 + +`pkg/scheduler/nodes.go:120` + +```golang +func (m *nodeManager) ListNodes() (map[string]*util.NodeInfo, error) { + m.mutex.RLock() + defer m.mutex.RUnlock() + return m.nodes, nil +} +``` + +这里用到了缓存,缓存节点信息,由 `addNode` 添加缓存。 + +###### Node 缓存 + +`pkg/scheduler/nodes.go:46` + +```golang +func (m *nodeManager) addNode(nodeID string, nodeInfo *util.NodeInfo) { + if nodeInfo == nil || len(nodeInfo.Devices) == 0 { + return + } + m.mutex.Lock() + defer m.mutex.Unlock() + _, ok := m.nodes[nodeID] + if ok { + if len(nodeInfo.Devices) > 0 { + tmp := make([]util.DeviceInfo, 0, len(nodeInfo.Devices)) + devices := device.GetDevices() + deviceType := "" + for _, val := range devices { + if strings.Contains(nodeInfo.Devices[0].Type, val.CommonWord()) { + deviceType = val.CommonWord() + } + } + for _, val := range m.nodes[nodeID].Devices { + if !strings.Contains(val.Type, deviceType) { + tmp = append(tmp, val) + } + } + m.nodes[nodeID].Devices = tmp + m.nodes[nodeID].Devices = append(m.nodes[nodeID].Devices, nodeInfo.Devices...) + } + } else { + m.nodes[nodeID] = nodeInfo + } +} +``` + +这里的主要逻辑在于 `device.GetDevices()` 获取设备信息 + +`pkg/device/devices.go:81` + +```golang +func GetDevices() map[string]Devices { + return devices +} +``` + +device 也是个缓存,后面再分析,首先看 Node 缓存是什么时候被调用的。 + +`pkg/scheduler/scheduler.go:155` + +```golang +func (s *Scheduler) RegisterFromNodeAnnotations() { + klog.V(5).Infoln("Scheduler into RegisterFromNodeAnnotations") + ticker := time.NewTicker(time.Second * 15) + for { + select { + case <-s.nodeNotify: + case <-ticker.C: + case <-s.stopCh: + return + } + labelSelector := labels.Everything() + if len(config.NodeLabelSelector) > 0 { + labelSelector = (labels.Set)(config.NodeLabelSelector).AsSelector() + } + rawNodes, err := s.nodeLister.List(labelSelector) + if err != nil { + klog.Errorln("nodes list failed", err.Error()) + continue + } + var nodeNames []string + for _, val := range rawNodes { + nodeNames = append(nodeNames, val.Name) + for devhandsk, devInstance := range device.GetDevices() { + health, needUpdate := devInstance.CheckHealth(devhandsk, val) + klog.V(5).InfoS("device check health", "node", val.Name, "deviceVendor", devhandsk, "health", health, "needUpdate", needUpdate) + if !health { + err := devInstance.NodeCleanUp(val.Name) + // If the device is not healthy, the device is removed from the node. + // At the same time, this node needs to be removed from the cache. + if err != nil { + klog.Errorln("node cleanup failed", err.Error()) + } + info, ok := s.nodes[val.Name] + if ok { + klog.Infof("node %v device %s:%v leave, %v remaining devices:%v", val.Name, devhandsk, info.ID, err, s.nodes[val.Name].Devices) + s.rmNodeDevice(val.Name, info, devhandsk) + continue + } + } + if !needUpdate { + continue + } + _, ok := util.HandshakeAnnos[devhandsk] + if ok { + tmppat := make(map[string]string) + tmppat[util.HandshakeAnnos[devhandsk]] = "Requesting_" + time.Now().Format("2006.01.02 15:04:05") + klog.V(4).InfoS("New timestamp", util.HandshakeAnnos[devhandsk], tmppat[util.HandshakeAnnos[devhandsk]], "nodeName", val.Name) + n, err := util.GetNode(val.Name) + if err != nil { + klog.Errorln("get node failed", err.Error()) + continue + } + util.PatchNodeAnnotations(n, tmppat) + } + + nodeInfo := &util.NodeInfo{} + nodeInfo.ID = val.Name + nodeInfo.Node = val + nodedevices, err := devInstance.GetNodeDevices(*val) + if err != nil { + continue + } + nodeInfo.Devices = make([]util.DeviceInfo, 0) + for _, deviceinfo := range nodedevices { + nodeInfo.Devices = append(nodeInfo.Devices, *deviceinfo) + } + s.addNode(val.Name, nodeInfo) + if s.nodes[val.Name] != nil && len(nodeInfo.Devices) > 0 { + klog.Infof("node %v device %s come node info=%s,%v total=%v", val.Name, devhandsk, nodeInfo.ID, nodeInfo.Devices, s.nodes[val.Name].Devices) + } + } + } + _, _, err = s.getNodesUsage(&nodeNames, nil) + if err != nil { + klog.Errorln("get node usage failed", err.Error()) + } + } +} +``` + +启动了一个 15s 的定时任务,获取 Node 信息维护 Node 缓存。 + +这里的核心逻辑在于 `for devhandsk, devInstance := range device.GetDevices()` 获取所有的 device, +主要是一些根据不同的设备注册了不同的 handler,根据注册的 device 获取显卡的资源信息 `devInstance.GetNodeDevices`。 + +这里会通过注册的 device(此环境为 nvidia),调用到不同显卡的`GetNodeDevices`实现,device 后面再做具体说明。 + +`pkg/device/nvidia/device.go:209` + +```golang +ffunc (dev *NvidiaGPUDevices) GetNodeDevices(n corev1.Node) ([]*util.DeviceInfo, error) { + devEncoded, ok := n.Annotations[RegisterAnnos] + if !ok { + return []*util.DeviceInfo{}, errors.New("annos not found " + RegisterAnnos) + } + nodedevices, err := util.DecodeNodeDevices(devEncoded) + if err != nil { + klog.ErrorS(err, "failed to decode node devices", "node", n.Name, "device annotation", devEncoded) + return []*util.DeviceInfo{}, err + } + if len(nodedevices) == 0 { + klog.InfoS("no nvidia gpu device found", "node", n.Name, "device annotation", devEncoded) + return []*util.DeviceInfo{}, errors.New("no gpu found on node") + } + for _, val := range nodedevices { + if val.Mode == "mig" { + val.MIGTemplate = make([]util.Geometry, 0) + for _, migTemplates := range dev.config.MigGeometriesList { + found := false + for _, migDevices := range migTemplates.Models { + if strings.Contains(val.Type, migDevices) { + found = true + break + } + } + if found { + val.MIGTemplate = append(val.MIGTemplate, migTemplates.Geometries...) + break + } + } + } + } + devDecoded := util.EncodeNodeDevices(nodedevices) + klog.V(5).InfoS("nodes device information", "node", n.Name, "nodedevices", devDecoded) + return nodedevices, nil +} +``` + +看到这里基本逻辑是 scheduler 通过定时器去读取 node 的 annotation 信息并将其维护再 node 缓存中,以供调度时使用。 + +```yaml +apiVersion: v1 +kind: Node +metadata: + annotations: + ... + hami.io/node-nvidia-register: 'GPU-7aebc545-cbd3-18a0-afce-76cae449702a,10,24576,300,NVIDIA-NVIDIA + GeForce RTX 3090,0,true: +``` + +又调用到了 device,这个我们待会儿再看,继续看谁调用的 `RegisterFromNodeAnnotations`。 + +`cmd/scheduler/main.go:70` + +```golang +func start() { + device.InitDevices() + sher = scheduler.NewScheduler() + sher.Start() + defer sher.Stop() + + // start monitor metrics + go sher.RegisterFromNodeAnnotations() + go initMetrics(config.MetricsBindAddress) +``` + +调度器启动的时候就会调用,这里逻辑明确了,继续看刚刚的 device。 + +###### device + +device 通过 `pkg/device/devices.go:85` 进行初始化。 + +```golang +func InitDevicesWithConfig(config *Config) { + devices = make(map[string]Devices) + DevicesToHandle = []string{} + devices[nvidia.NvidiaGPUDevice] = nvidia.InitNvidiaDevice(config.NvidiaConfig) + devices[cambricon.CambriconMLUDevice] = cambricon.InitMLUDevice(config.CambriconConfig) + devices[hygon.HygonDCUDevice] = hygon.InitDCUDevice(config.HygonConfig) + devices[iluvatar.IluvatarGPUDevice] = iluvatar.InitIluvatarDevice(config.IluvatarConfig) + devices[mthreads.MthreadsGPUDevice] = mthreads.InitMthreadsDevice(config.MthreadsConfig) + devices[metax.MetaxGPUDevice] = metax.InitMetaxDevice(config.MetaxConfig) + + DevicesToHandle = append(DevicesToHandle, nvidia.NvidiaGPUCommonWord) + DevicesToHandle = append(DevicesToHandle, cambricon.CambriconMLUCommonWord) + DevicesToHandle = append(DevicesToHandle, hygon.HygonDCUCommonWord) + DevicesToHandle = append(DevicesToHandle, iluvatar.IluvatarGPUCommonWord) + DevicesToHandle = append(DevicesToHandle, mthreads.MthreadsGPUCommonWord) + DevicesToHandle = append(DevicesToHandle, metax.MetaxGPUCommonWord) + for _, dev := range ascend.InitDevices(config.VNPUs) { + devices[dev.CommonWord()] = dev + DevicesToHandle = append(DevicesToHandle, dev.CommonWord()) + } +} +``` + +这里用的是 nvidia,所以主要看 `InitNvidiaDevice` 即可。 + +`pkg/device/devices.go:42` + +```golang +type Devices interface { + CommonWord() string + MutateAdmission(ctr *corev1.Container, pod *corev1.Pod) (bool, error) + CheckHealth(devType string, n *corev1.Node) (bool, bool) + NodeCleanUp(nn string) error + GetNodeDevices(n corev1.Node) ([]*util.DeviceInfo, error) + CheckType(annos map[string]string, d util.DeviceUsage, n util.ContainerDeviceRequest) (bool, bool, bool) + // CheckUUID is check current device id whether in GPUUseUUID or GPUNoUseUUID set, return true is check success. + CheckUUID(annos map[string]string, d util.DeviceUsage) bool + LockNode(n *corev1.Node, p *corev1.Pod) error + ReleaseNodeLock(n *corev1.Node, p *corev1.Pod) error + GenerateResourceRequests(ctr *corev1.Container) util.ContainerDeviceRequest + PatchAnnotations(annoinput *map[string]string, pd util.PodDevices) map[string]string + CustomFilterRule(allocated *util.PodDevices, request util.ContainerDeviceRequest, toAllicate util.ContainerDevices, device *util.DeviceUsage) bool + ScoreNode(node *corev1.Node, podDevices util.PodSingleDevice, policy string) float32 + AddResourceUsage(n *util.DeviceUsage, ctr *util.ContainerDevice) error + // This should not be associated with a specific device object + //ParseConfig(fs *flag.FlagSet) +} +``` + +这里定义了一些接口,不同的设备进行不同的实现,在 scheduler 启动时进行初始化,以供运行中调用。 + +获取到各个节点的各个设备的资源情况之后开始进行打分。 + +##### 根据节点资源信息打分 + +`pkg/scheduler/scheduler.go:458` + +```golang + nodeScores, err := s.calcScore(nodeUsage, nums, annos, args.Pod) + if err != nil { + err := fmt.Errorf("calcScore failed %v for pod %v", err, args.Pod.Name) + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, err) + return nil, err + } +``` + +`pkg/scheduler/score.go:198` + +```golang +func (s *Scheduler) calcScore(nodes *map[string]*NodeUsage, nums util.PodDeviceRequests, annos map[string]string, task *corev1.Pod) (*policy.NodeScoreList, error) { + userNodePolicy := config.NodeSchedulerPolicy + if annos != nil { + if value, ok := annos[policy.NodeSchedulerPolicyAnnotationKey]; ok { + userNodePolicy = value + } + } + res := policy.NodeScoreList{ + Policy: userNodePolicy, + NodeList: make([]*policy.NodeScore, 0), + } + + //func calcScore(nodes *map[string]*NodeUsage, errMap *map[string]string, nums util.PodDeviceRequests, annos map[string]string, task *corev1.Pod) (*NodeScoreList, error) { + // res := make(NodeScoreList, 0, len(*nodes)) + for nodeID, node := range *nodes { + viewStatus(*node) + score := policy.NodeScore{NodeID: nodeID, Node: node.Node, Devices: make(util.PodDevices), Score: 0} + score.ComputeDefaultScore(node.Devices) + + //This loop is for different container request + ctrfit := false + for ctrid, n := range nums { + sums := 0 + for _, k := range n { + sums += int(k.Nums) + } + + if sums == 0 { + for idx := range score.Devices { + for len(score.Devices[idx]) <= ctrid { + score.Devices[idx] = append(score.Devices[idx], util.ContainerDevices{}) + } + score.Devices[idx][ctrid] = append(score.Devices[idx][ctrid], util.ContainerDevice{}) + continue + } + } + klog.V(5).InfoS("fitInDevices", "pod", klog.KObj(task), "node", nodeID) + fit, _ := fitInDevices(node, n, annos, task, &score.Devices) + ctrfit = fit + if !fit { + klog.InfoS("calcScore:node not fit pod", "pod", klog.KObj(task), "node", nodeID) + break + } + } + + if ctrfit { + res.NodeList = append(res.NodeList, &score) + score.OverrideScore(node.Devices, userNodePolicy) + } + } + return &res, nil +} +``` + +这块逻辑主要分为遍历节点打分,遍历 Pod 的容器计算每个容器对应的设备的分数,返回所有可以承载 limits 所需资源的 node 返回。 + +##### 计算出节点的分数 + +`pkg/scheduler/policy/node_policy.go:68` + +```golang +func (ns *NodeScore) ComputeDefaultScore(devices DeviceUsageList) { + used, usedCore, usedMem := int32(0), int32(0), int32(0) + for _, device := range devices.DeviceLists { + used += device.Device.Used + usedCore += device.Device.Usedcores + usedMem += device.Device.Usedmem + } + klog.V(2).Infof("node %s used %d, usedCore %d, usedMem %d,", ns.NodeID, used, usedCore, usedMem) + + total, totalCore, totalMem := int32(0), int32(0), int32(0) + for _, deviceLists := range devices.DeviceLists { + total += deviceLists.Device.Count + totalCore += deviceLists.Device.Totalcore + totalMem += deviceLists.Device.Totalmem + } + useScore := float32(used) / float32(total) + coreScore := float32(usedCore) / float32(totalCore) + memScore := float32(usedMem) / float32(totalMem) + ns.Score = float32(Weight) * (useScore + coreScore + memScore) + klog.V(2).Infof("node %s computer default score is %f", ns.NodeID, ns.Score) +} +``` + +节点打分规则比较简单 + +##### 计算每个容器对应的设备的分数 + +`pkg/scheduler/score.go:149` + +```golang +func fitInDevices(node *NodeUsage, requests util.ContainerDeviceRequests, annos map[string]string, pod *corev1.Pod, devinput *util.PodDevices) (bool, float32) { + //devmap := make(map[string]util.ContainerDevices) + devs := util.ContainerDevices{} + total, totalCore, totalMem := int32(0), int32(0), int32(0) + free, freeCore, freeMem := int32(0), int32(0), int32(0) + sums := 0 + // computer all device score for one node + for index := range node.Devices.DeviceLists { + node.Devices.DeviceLists[index].ComputeScore(requests) + } + //This loop is for requests for different devices + for _, k := range requests { + sums += int(k.Nums) + if int(k.Nums) > len(node.Devices.DeviceLists) { + klog.InfoS("request devices nums cannot exceed the total number of devices on the node.", "pod", klog.KObj(pod), "request devices nums", k.Nums, "node device nums", len(node.Devices.DeviceLists)) + return false, 0 + } + sort.Sort(node.Devices) + fit, tmpDevs := fitInCertainDevice(node, k, annos, pod, devinput) + if fit { + for idx, val := range tmpDevs[k.Type] { + for nidx, v := range node.Devices.DeviceLists { + //bc node.Devices has been sorted, so we should find out the correct device + if v.Device.ID != val.UUID { + continue + } + total += v.Device.Count + totalCore += v.Device.Totalcore + totalMem += v.Device.Totalmem + free += v.Device.Count - v.Device.Used + freeCore += v.Device.Totalcore - v.Device.Usedcores + freeMem += v.Device.Totalmem - v.Device.Usedmem + err := device.GetDevices()[k.Type].AddResourceUsage(node.Devices.DeviceLists[nidx].Device, &tmpDevs[k.Type][idx]) + if err != nil { + klog.Errorf("AddResource failed:%s", err.Error()) + return false, 0 + } + klog.Infoln("After AddResourceUsage:", node.Devices.DeviceLists[nidx].Device) + } + } + devs = append(devs, tmpDevs[k.Type]...) + } else { + return false, 0 + } + (*devinput)[k.Type] = append((*devinput)[k.Type], devs) + } + return true, 0 +} +``` + +主要逻辑为: + +- 给容器对应的每个设备打分、遍历不同的容器对应的 limit 资源,找到可以承载容器 limits 资源的设备 + +`pkg/scheduler/policy/gpu_policy.go:58` + +```golang +func (ds *DeviceListsScore) ComputeScore(requests util.ContainerDeviceRequests) { + request, core, mem := int32(0), int32(0), int32(0) + // Here we are required to use the same type device + for _, container := range requests { + request += container.Nums + core += container.Coresreq + if container.MemPercentagereq != 0 && container.MemPercentagereq != 101 { + mem += ds.Device.Totalmem * (container.MemPercentagereq / 100.0) + continue + } + mem += container.Memreq + } + klog.V(2).Infof("device %s user %d, userCore %d, userMem %d,", ds.Device.ID, ds.Device.Used, ds.Device.Usedcores, ds.Device.Usedmem) + + usedScore := float32(request+ds.Device.Used) / float32(ds.Device.Count) + coreScore := float32(core+ds.Device.Usedcores) / float32(ds.Device.Totalcore) + memScore := float32(mem+ds.Device.Usedmem) / float32(ds.Device.Totalmem) + ds.Score = float32(Weight) * (usedScore + coreScore + memScore) + klog.V(2).Infof("device %s computer score is %f", ds.Device.ID, ds.Score) +} +``` + +打分规则与节点类似。 + +`pkg/scheduler/score.go:65` + +```golang +func fitInCertainDevice(node *NodeUsage, request util.ContainerDeviceRequest, annos map[string]string, pod *corev1.Pod, allocated *util.PodDevices) (bool, map[string]util.ContainerDevices) { + k := request + originReq := k.Nums + prevnuma := -1 + klog.InfoS("Allocating device for container request", "pod", klog.KObj(pod), "card request", k) + var tmpDevs map[string]util.ContainerDevices + tmpDevs = make(map[string]util.ContainerDevices) + for i := len(node.Devices.DeviceLists) - 1; i >= 0; i-- { + klog.InfoS("scoring pod", "pod", klog.KObj(pod), "Memreq", k.Memreq, "MemPercentagereq", k.MemPercentagereq, "Coresreq", k.Coresreq, "Nums", k.Nums, "device index", i, "device", node.Devices.DeviceLists[i].Device.ID) + found, numa := checkType(annos, *node.Devices.DeviceLists[i].Device, k) + if !found { + klog.InfoS("card type mismatch,continuing...", "pod", klog.KObj(pod), (node.Devices.DeviceLists[i].Device).Type, k.Type) + continue + } + if numa && prevnuma != node.Devices.DeviceLists[i].Device.Numa { + klog.InfoS("Numa not fit, resotoreing", "pod", klog.KObj(pod), "k.nums", k.Nums, "numa", numa, "prevnuma", prevnuma, "device numa", node.Devices.DeviceLists[i].Device.Numa) + k.Nums = originReq + prevnuma = node.Devices.DeviceLists[i].Device.Numa + tmpDevs = make(map[string]util.ContainerDevices) + } + if !checkUUID(annos, *node.Devices.DeviceLists[i].Device, k) { + klog.InfoS("card uuid mismatch,", "pod", klog.KObj(pod), "current device info is:", *node.Devices.DeviceLists[i].Device) + continue + } + + memreq := int32(0) + if node.Devices.DeviceLists[i].Device.Count <= node.Devices.DeviceLists[i].Device.Used { + continue + } + if k.Coresreq > 100 { + klog.ErrorS(nil, "core limit can't exceed 100", "pod", klog.KObj(pod)) + k.Coresreq = 100 + //return false, tmpDevs + } + if k.Memreq > 0 { + memreq = k.Memreq + } + if k.MemPercentagereq != 101 && k.Memreq == 0 { + //This incurs an issue + memreq = node.Devices.DeviceLists[i].Device.Totalmem * k.MemPercentagereq / 100 + } + if node.Devices.DeviceLists[i].Device.Totalmem-node.Devices.DeviceLists[i].Device.Usedmem < memreq { + klog.V(5).InfoS("card Insufficient remaining memory", "pod", klog.KObj(pod), "device index", i, "device", node.Devices.DeviceLists[i].Device.ID, "device total memory", node.Devices.DeviceLists[i].Device.Totalmem, "device used memory", node.Devices.DeviceLists[i].Device.Usedmem, "request memory", memreq) + continue + } + if node.Devices.DeviceLists[i].Device.Totalcore-node.Devices.DeviceLists[i].Device.Usedcores < k.Coresreq { + klog.V(5).InfoS("card Insufficient remaining cores", "pod", klog.KObj(pod), "device index", i, "device", node.Devices.DeviceLists[i].Device.ID, "device total core", node.Devices.DeviceLists[i].Device.Totalcore, "device used core", node.Devices.DeviceLists[i].Device.Usedcores, "request cores", k.Coresreq) + continue + } + // Coresreq=100 indicates it want this card exclusively + if node.Devices.DeviceLists[i].Device.Totalcore == 100 && k.Coresreq == 100 && node.Devices.DeviceLists[i].Device.Used > 0 { + klog.V(5).InfoS("the container wants exclusive access to an entire card, but the card is already in use", "pod", klog.KObj(pod), "device index", i, "device", node.Devices.DeviceLists[i].Device.ID, "used", node.Devices.DeviceLists[i].Device.Used) + continue + } + // You can't allocate core=0 job to an already full GPU + if node.Devices.DeviceLists[i].Device.Totalcore != 0 && node.Devices.DeviceLists[i].Device.Usedcores == node.Devices.DeviceLists[i].Device.Totalcore && k.Coresreq == 0 { + klog.V(5).InfoS("can't allocate core=0 job to an already full GPU", "pod", klog.KObj(pod), "device index", i, "device", node.Devices.DeviceLists[i].Device.ID) + continue + } + if !device.GetDevices()[k.Type].CustomFilterRule(allocated, request, tmpDevs[k.Type], node.Devices.DeviceLists[i].Device) { + continue + } + if k.Nums > 0 { + klog.InfoS("first fitted", "pod", klog.KObj(pod), "device", node.Devices.DeviceLists[i].Device.ID) + k.Nums-- + tmpDevs[k.Type] = append(tmpDevs[k.Type], util.ContainerDevice{ + Idx: int(node.Devices.DeviceLists[i].Device.Index), + UUID: node.Devices.DeviceLists[i].Device.ID, + Type: k.Type, + Usedmem: memreq, + Usedcores: k.Coresreq, + }) + } + if k.Nums == 0 { + klog.InfoS("device allocate success", "pod", klog.KObj(pod), "allocate device", tmpDevs) + return true, tmpDevs + } + if node.Devices.DeviceLists[i].Device.Mode == "mig" { + i++ + } + } + return false, tmpDevs +} +``` + +遍历设备,主要根据设备资源余量来判断是否够 container 分配,返回所有够分配的设备。 + +`pkg/scheduler/scheduler.go:458` + +```golang + nodeScores, err := s.calcScore(nodeUsage, nums, annos, args.Pod) + if err != nil { + err := fmt.Errorf("calcScore failed %v for pod %v", err, args.Pod.Name) + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, err) + return nil, err + } + if len((*nodeScores).NodeList) == 0 { + klog.V(4).Infof("All node scores do not meet for pod %v", args.Pod.Name) + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, fmt.Errorf("no available node, all node scores do not meet")) + return &extenderv1.ExtenderFilterResult{ + FailedNodes: failedNodes, + }, nil + } + klog.V(4).Infoln("nodeScores_len=", len((*nodeScores).NodeList)) + sort.Sort(nodeScores) + m := (*nodeScores).NodeList[len((*nodeScores).NodeList)-1] + klog.Infof("schedule %v/%v to %v %v", args.Pod.Namespace, args.Pod.Name, m.NodeID, m.Devices) + annotations := make(map[string]string) + annotations[util.AssignedNodeAnnotations] = m.NodeID + annotations[util.AssignedTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10) + + for _, val := range device.GetDevices() { + val.PatchAnnotations(&annotations, m.Devices) + } + + //InRequestDevices := util.EncodePodDevices(util.InRequestDevices, m.devices) + //supportDevices := util.EncodePodDevices(util.SupportDevices, m.devices) + //maps.Copy(annotations, InRequestDevices) + //maps.Copy(annotations, supportDevices) + s.addPod(args.Pod, m.NodeID, m.Devices) + err = util.PatchPodAnnotations(args.Pod, annotations) + if err != nil { + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, []string{}, err) + s.delPod(args.Pod) + return nil, err + } + s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringSucceed, []string{m.NodeID}, nil) + res := extenderv1.ExtenderFilterResult{NodeNames: &[]string{m.NodeID}} + return &res, nil +``` + +遍历完成之后选择分数最高的, 给 Pod 打标签。 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + hami.io/vgpu-node: node1 + hami.io/vgpu-time: "1733988480" + hami.io/vgpu-devices-allocated: GPU-7aebc545-cbd3-18a0-afce-76cae449702a,NVIDIA,20000,80:; + hami.io/vgpu-devices-to-allocate: ; +``` + +#### binding 实现 + +bind 逻辑比较简单,将 Pod 绑定到 Node。 + +`pkg/scheduler/routes/route.go:82` + +```golang +func Bind(s *scheduler.Scheduler) httprouter.Handle { + return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { + var buf bytes.Buffer + body := io.TeeReader(r.Body, &buf) + var extenderBindingArgs extenderv1.ExtenderBindingArgs + var extenderBindingResult *extenderv1.ExtenderBindingResult + + if err := json.NewDecoder(body).Decode(&extenderBindingArgs); err != nil { + klog.ErrorS(err, "Decode extender binding args") + extenderBindingResult = &extenderv1.ExtenderBindingResult{ + Error: err.Error(), + } + } else { + extenderBindingResult, err = s.Bind(extenderBindingArgs) + } + + if response, err := json.Marshal(extenderBindingResult); err != nil { + klog.ErrorS(err, "Marshal binding result", "result", extenderBindingResult) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusInternalServerError) + errMsg := fmt.Sprintf("{'error':'%s'}", err.Error()) + w.Write([]byte(errMsg)) + } else { + klog.V(5).InfoS("Return bind response", "result", extenderBindingResult) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write(response) + } + } +} +``` + +路由处理: + +```golang +func (s *Scheduler) Bind(args extenderv1.ExtenderBindingArgs) (*extenderv1.ExtenderBindingResult, error) { + klog.InfoS("Bind", "pod", args.PodName, "namespace", args.PodNamespace, "podUID", args.PodUID, "node", args.Node) + var err error + var res *extenderv1.ExtenderBindingResult + binding := &corev1.Binding{ + ObjectMeta: metav1.ObjectMeta{Name: args.PodName, UID: args.PodUID}, + Target: corev1.ObjectReference{Kind: "Node", Name: args.Node}, + } + current, err := s.kubeClient.CoreV1().Pods(args.PodNamespace).Get(context.Background(), args.PodName, metav1.GetOptions{}) + if err != nil { + klog.ErrorS(err, "Get pod failed") + } + + node, err := s.kubeClient.CoreV1().Nodes().Get(context.Background(), args.Node, metav1.GetOptions{}) + if err != nil { + klog.ErrorS(err, "Failed to get node", "node", args.Node) + s.recordScheduleBindingResultEvent(current, EventReasonBindingFailed, []string{}, fmt.Errorf("failed to get node %v", args.Node)) + res = &extenderv1.ExtenderBindingResult{ + Error: err.Error(), + } + return res, nil + } + + tmppatch := make(map[string]string) + for _, val := range device.GetDevices() { + err = val.LockNode(node, current) + if err != nil { + goto ReleaseNodeLocks + } + } + + tmppatch[util.DeviceBindPhase] = "allocating" + tmppatch[util.BindTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10) + + err = util.PatchPodAnnotations(current, tmppatch) + if err != nil { + klog.ErrorS(err, "patch pod annotation failed") + } + if err = s.kubeClient.CoreV1().Pods(args.PodNamespace).Bind(context.Background(), binding, metav1.CreateOptions{}); err != nil { + klog.ErrorS(err, "Failed to bind pod", "pod", args.PodName, "namespace", args.PodNamespace, "podUID", args.PodUID, "node", args.Node) + } + if err == nil { + s.recordScheduleBindingResultEvent(current, EventReasonBindingSucceed, []string{args.Node}, nil) + res = &extenderv1.ExtenderBindingResult{ + Error: "", + } + klog.Infoln("After Binding Process") + return res, nil + } +ReleaseNodeLocks: + klog.InfoS("bind failed", "err", err.Error()) + for _, val := range device.GetDevices() { + val.ReleaseNodeLock(node, current) + } + s.recordScheduleBindingResultEvent(current, EventReasonBindingFailed, []string{}, err) + return &extenderv1.ExtenderBindingResult{ + Error: err.Error(), + }, nil +} +``` + +### Node 将设备情况写入 node annotation + +scheduler 获取 node 的设备信息主要是通过读取 node 的 annotation,主要有如下几步: + +- 启动插件 + +```yaml +apiVersion: v1 +kind: Node +metadata: + annotations: + hami.io/node-handshake: Requesting_2024.12.24 03:31:30 + hami.io/node-handshake-dcu: Deleted_2024.12.06 07:43:49 + hami.io/node-nvidia-register: + "GPU-7aebc545-cbd3-18a0-afce-76cae449702a,10,73728,300,NVIDIA-NVIDIA + GeForce RTX 3090,0,true:" +``` + +#### 启动 device-plugin 服务 + +这里用到了 `github.com/urfave/cli/v2` 作为 command 启动服务,需要注意 -v 不是日志等级而是是否显示版本 + +`cmd/device-plugin/nvidia/main.go:40` + +```golang +func main() { + var configFile string + + c := cli.NewApp() + c.Name = "NVIDIA Device Plugin" + c.Usage = "NVIDIA device plugin for Kubernetes" + c.Version = info.GetVersionString() + c.Action = func(ctx *cli.Context) error { + return start(ctx, c.Flags) + } +``` + +#### 启动 plugin + +这里的 plugin 主要是针对不同厂家的设备需要实现不同的方法,这里定义了 pluigin 的控制器,例如 start、restart、exit 等,这里我们主要关注`plugins, restartPlugins, err := startPlugins(c, flags, restarting)` + +`cmd/device-plugin/nvidia/main.go:156` + +```golang +func start(c *cli.Context, flags []cli.Flag) error { + klog.Info("Starting FS watcher.") + util.NodeName = os.Getenv(util.NodeNameEnvName) + watcher, err := newFSWatcher(kubeletdevicepluginv1beta1.DevicePluginPath) + if err != nil { + return fmt.Errorf("failed to create FS watcher: %v", err) + } + defer watcher.Close() + //device.InitDevices() + + /*Loading config files*/ + klog.Infof("Start working on node %s", util.NodeName) + klog.Info("Starting OS watcher.") + sigs := newOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) + + var restarting bool + var restartTimeout <-chan time.Time + var plugins []plugin.Interface +restart: + // If we are restarting, stop plugins from previous run. + if restarting { + err := stopPlugins(plugins) + if err != nil { + return fmt.Errorf("error stopping plugins from previous run: %v", err) + } + } + + klog.Info("Starting Plugins.") + plugins, restartPlugins, err := startPlugins(c, flags, restarting) + if err != nil { + return fmt.Errorf("error starting plugins: %v", err) + } + + if restartPlugins { + klog.Info("Failed to start one or more plugins. Retrying in 30s...") + restartTimeout = time.After(30 * time.Second) + } + + restarting = true + + // Start an infinite loop, waiting for several indicators to either log + // some messages, trigger a restart of the plugins, or exit the program. + for { + select { + // If the restart timeout has expired, then restart the plugins + case <-restartTimeout: + goto restart + + // Detect a kubelet restart by watching for a newly created + // 'kubeletdevicepluginv1beta1.KubeletSocket' file. When this occurs, restart this loop, + // restarting all of the plugins in the process. + case event := <-watcher.Events: + if event.Name == kubeletdevicepluginv1beta1.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create { + klog.Infof("inotify: %s created, restarting.", kubeletdevicepluginv1beta1.KubeletSocket) + goto restart + } + + // Watch for any other fs errors and log them. + case err := <-watcher.Errors: + klog.Errorf("inotify: %s", err) + + // Watch for any signals from the OS. On SIGHUP, restart this loop, + // restarting all of the plugins in the process. On all other + // signals, exit the loop and exit the program. + case s := <-sigs: + switch s { + case syscall.SIGHUP: + klog.Info("Received SIGHUP, restarting.") + goto restart + default: + klog.Infof("Received signal \"%v\", shutting down.", s) + goto exit + } + } + } +exit: + err = stopPlugins(plugins) + if err != nil { + return fmt.Errorf("error stopping plugins: %v", err) + } + return nil +} +``` + +`cmd/device-plugin/nvidia/main.go:239` + +启动插件,主要方法 `p.Start()` + +```golang +func startPlugins(c *cli.Context, flags []cli.Flag, restarting bool) ([]plugin.Interface, bool, error) { + // Load the configuration file + klog.Info("Loading configuration.") + config, err := loadConfig(c, flags) + if err != nil { + return nil, false, fmt.Errorf("unable to load config: %v", err) + } + disableResourceRenamingInConfig(config) + + /*Loading config files*/ + //fmt.Println("NodeName=", config.NodeName) + devConfig, err := generateDeviceConfigFromNvidia(config, c, flags) + if err != nil { + klog.Errorf("failed to load config file %s", err.Error()) + return nil, false, err + } + + // Update the configuration file with default resources. + klog.Info("Updating config with default resource matching patterns.") + err = rm.AddDefaultResourcesToConfig(&devConfig) + if err != nil { + return nil, false, fmt.Errorf("unable to add default resources to config: %v", err) + } + + // Print the config to the output. + configJSON, err := json.MarshalIndent(devConfig, "", " ") + if err != nil { + return nil, false, fmt.Errorf("failed to marshal config to JSON: %v", err) + } + klog.Infof("\nRunning with config:\n%v", string(configJSON)) + + // Get the set of plugins. + klog.Info("Retrieving plugins.") + pluginManager, err := NewPluginManager(&devConfig) + if err != nil { + return nil, false, fmt.Errorf("error creating plugin manager: %v", err) + } + plugins, err := pluginManager.GetPlugins() + if err != nil { + return nil, false, fmt.Errorf("error getting plugins: %v", err) + } + + // Loop through all plugins, starting them if they have any devices + // to serve. If even one plugin fails to start properly, try + // starting them all again. + started := 0 + for _, p := range plugins { + // Just continue if there are no devices to serve for plugin p. + if len(p.Devices()) == 0 { + continue + } + + // Start the gRPC server for plugin p and connect it with the kubelet. + if err := p.Start(); err != nil { + klog.Error("Could not contact Kubelet. Did you enable the device plugin feature gate?") + klog.Error("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites") + klog.Error("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start") + return plugins, true, nil + } + started++ + } + + if started == 0 { + klog.Info("No devices found. Waiting indefinitely.") + } + + return plugins, false, nil +} +``` + +其中 p(plugin) 需要实现几个方法来管理插件。 + +`pkg/device-plugin/nvidiadevice/nvinternal/plugin/api.go:37` + +```golang +type Interface interface { + Devices() rm.Devices + Start() error + Stop() error +} +``` + +同时如果需要 kubelet 能够识别 resource 中的类似 `nvidia.com/gpu: 1` 这样的拓展字段需要启动一个 GRPC +服务挂载 `/var/lib/kubelet/device-plugins/` 且实现如下方法。这块跟调度相关性不大,暂且不展开 +[device-plugins](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/)。 + +`k8s.io/kubelet@v0.28.3/pkg/apis/deviceplugin/v1beta1/api.pb.go:1419` + +```golang +type DevicePluginServer interface { + // GetDevicePluginOptions returns options to be communicated with Device + // Manager + GetDevicePluginOptions(context.Context, *Empty) (*DevicePluginOptions, error) + // ListAndWatch returns a stream of List of Devices + // Whenever a Device state change or a Device disappears, ListAndWatch + // returns the new list + ListAndWatch(*Empty, DevicePlugin_ListAndWatchServer) error + // GetPreferredAllocation returns a preferred set of devices to allocate + // from a list of available ones. The resulting preferred allocation is not + // guaranteed to be the allocation ultimately performed by the + // devicemanager. It is only designed to help the devicemanager make a more + // informed allocation decision when possible. + GetPreferredAllocation(context.Context, *PreferredAllocationRequest) (*PreferredAllocationResponse, error) + // Allocate is called during container creation so that the Device + // Plugin can run device specific operations and instruct Kubelet + // of the steps to make the Device available in the container + Allocate(context.Context, *AllocateRequest) (*AllocateResponse, error) + // PreStartContainer is called, if indicated by Device Plugin during registeration phase, + // before each container start. Device plugin can run device specific operations + // such as resetting the device before making devices available to the container + PreStartContainer(context.Context, *PreStartContainerRequest) (*PreStartContainerResponse, error) +} +``` + +#### nvidia 插件的实现 + +主要看`plugin.WatchAndRegister()` + +`pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go:196` + +```golang +func (plugin *NvidiaDevicePlugin) Start() error { + plugin.initialize() + + err := plugin.Serve() + if err != nil { + klog.Infof("Could not start device plugin for '%s': %s", plugin.rm.Resource(), err) + plugin.cleanup() + return err + } + klog.Infof("Starting to serve '%s' on %s", plugin.rm.Resource(), plugin.socket) + + err = plugin.Register() + if err != nil { + klog.Infof("Could not register device plugin: %s", err) + plugin.Stop() + return err + } + klog.Infof("Registered device plugin for '%s' with Kubelet", plugin.rm.Resource()) + + if plugin.operatingMode == "mig" { + cmd := exec.Command("nvidia-mig-parted", "export") + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := cmd.Run() + if err != nil { + klog.Fatalf("nvidia-mig-parted failed with %s\n", err) + } + outStr := stdout.Bytes() + yaml.Unmarshal(outStr, &plugin.migCurrent) + os.WriteFile("/tmp/migconfig.yaml", outStr, os.ModePerm) + if len(plugin.migCurrent.MigConfigs["current"]) == 1 && len(plugin.migCurrent.MigConfigs["current"][0].Devices) == 0 { + idx := 0 + plugin.migCurrent.MigConfigs["current"][0].Devices = make([]int32, 0) + for idx < GetDeviceNums() { + plugin.migCurrent.MigConfigs["current"][0].Devices = append(plugin.migCurrent.MigConfigs["current"][0].Devices, int32(idx)) + idx++ + } + } + klog.Infoln("Mig export", plugin.migCurrent) + } + go func() { + err := plugin.rm.CheckHealth(plugin.stop, plugin.health) + if err != nil { + klog.Infof("Failed to start health check: %v; continuing with health checks disabled", err) + } + }() + + go func() { + plugin.WatchAndRegister() + }() + + return nil +} +``` + +这里是个定时器,每 30s 收集一次该 node 的设备信息,并写入 node annotation。 + +```golang +func (plugin *NvidiaDevicePlugin) WatchAndRegister() { + klog.Info("Starting WatchAndRegister") + errorSleepInterval := time.Second * 5 + successSleepInterval := time.Second * 30 + for { + err := plugin.RegistrInAnnotation() + if err != nil { + klog.Errorf("Failed to register annotation: %v", err) + klog.Infof("Retrying in %v seconds...", errorSleepInterval) + time.Sleep(errorSleepInterval) + } else { + klog.Infof("Successfully registered annotation. Next check in %v seconds...", successSleepInterval) + time.Sleep(successSleepInterval) + } + } +} +``` + +```golang +func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error { + devices := plugin.getAPIDevices() + klog.InfoS("start working on the devices", "devices", devices) + annos := make(map[string]string) + node, err := util.GetNode(util.NodeName) + if err != nil { + klog.Errorln("get node error", err.Error()) + return err + } + encodeddevices := util.EncodeNodeDevices(*devices) + annos[nvidia.HandshakeAnnos] = "Reported " + time.Now().String() + annos[nvidia.RegisterAnnos] = encodeddevices + klog.Infof("patch node with the following annos %v", fmt.Sprintf("%v", annos)) + err = util.PatchNodeAnnotations(node, annos) + + if err != nil { + klog.Errorln("patch node error", err.Error()) + } + return err +} +``` + +具体数据收集逻辑。 + +`pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go:110` + +```golang +func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo { + devs := plugin.Devices() + klog.V(5).InfoS("getAPIDevices", "devices", devs) + nvml.Init() + res := make([]*util.DeviceInfo, 0, len(devs)) + for UUID := range devs { + ndev, ret := nvml.DeviceGetHandleByUUID(UUID) + if ret != nvml.SUCCESS { + klog.Errorln("nvml new device by index error uuid=", UUID, "err=", ret) + panic(0) + } + idx, ret := ndev.GetIndex() + if ret != nvml.SUCCESS { + klog.Errorln("nvml get index error ret=", ret) + panic(0) + } + memoryTotal := 0 + memory, ret := ndev.GetMemoryInfo() + if ret == nvml.SUCCESS { + memoryTotal = int(memory.Total) + } else { + klog.Error("nvml get memory error ret=", ret) + panic(0) + } + Model, ret := ndev.GetName() + if ret != nvml.SUCCESS { + klog.Error("nvml get name error ret=", ret) + panic(0) + } + + registeredmem := int32(memoryTotal / 1024 / 1024) + if plugin.schedulerConfig.DeviceMemoryScaling != 1 { + registeredmem = int32(float64(registeredmem) * plugin.schedulerConfig.DeviceMemoryScaling) + } + klog.Infoln("MemoryScaling=", plugin.schedulerConfig.DeviceMemoryScaling, "registeredmem=", registeredmem) + health := true + for _, val := range devs { + if strings.Compare(val.ID, UUID) == 0 { + // when NVIDIA-Tesla P4, the device info is : ID:GPU-e290caca-2f0c-9582-acab-67a142b61ffa,Health:Healthy,Topology:nil, + // it is more reasonable to think of healthy as case-insensitive + if strings.EqualFold(val.Health, "healthy") { + health = true + } else { + health = false + } + break + } + } + numa, err := plugin.getNumaInformation(idx) + if err != nil { + klog.ErrorS(err, "failed to get numa information", "idx", idx) + } + res = append(res, &util.DeviceInfo{ + ID: UUID, + Index: uint(idx), + Count: int32(plugin.schedulerConfig.DeviceSplitCount), + Devmem: registeredmem, + Devcore: int32(plugin.schedulerConfig.DeviceCoreScaling * 100), + Type: fmt.Sprintf("%v-%v", "NVIDIA", Model), + Numa: numa, + Mode: plugin.operatingMode, + Health: health, + }) + klog.Infof("nvml registered device id=%v, memory=%v, type=%v, numa=%v", idx, registeredmem, Model, numa) + } + return &res +} +``` + +这里通过 nvidia 驱动获取设备信息,需要注意的是这里有配置 DeviceMemoryScaling,内存超分配置, +这里是通过命令行启动的 --config-file 参数指定的 schduler 配置和代码中固化的 +`config/config.json` 来取值的,其中 config/config.json 优先级大于 --config-file + +到这里,调度所需的所有东西就准备好了,Pod 可以顺利被分配到合适的节点上。 + +## 参考 + +- [kubernetes 官网](https://kubernetes.io/) +- [自定义 Kubernetes 调度器](https://www.qikqiak.com/post/custom-kube-scheduler/) +- [自定义资源支持:K8s Device Plugin 从原理到实现](https://www.lixueduan.com/posts/kubernetes/21-device-plugin/) diff --git a/docs/zh/contributor/adopters.md b/docs/zh/contributor/adopters.md new file mode 100644 index 0000000..6067059 --- /dev/null +++ b/docs/zh/contributor/adopters.md @@ -0,0 +1,32 @@ +--- +title: HAMi 采用者 +--- + +# HAMi 采用者 + +您和您的组织正在使用 HAMi?太棒了!我们很乐意听到您的使用反馈!💖 + +## 添加您的信息 + +[这里](https://github.com/Project-HAMi/website/blob/master/src/pages/adopters.mdx)列出了在生产环境中采用 HAMi 项目的组织。 + +您只需为您的公司添加一个条目,合并后它将自动添加到我们的网站中。 + +要添加您的组织,请按照以下步骤操作: + +1. Fork [HAMi-io/website](https://github.com/Project-HAMi/website) 仓库。 +2. 使用 `git clone https://github.com/<您的-GH-用户名>/website.git` 将其克隆到本地。 +3. (可选) 将您组织的 logo 添加到 `static/img/supporters` 目录。建议将 logo 文件命名为 `<公司名>.png`。 + 这些内容不会用于商业用途。 +4. 编辑 [adopters.mdx](https://github.com/Project-HAMi/website/blob/master/src/pages/adopters.mdx) 中的采用者列表。 + 您可以参考下面的示例表格格式。 + + | 公司名称 | 联系方式 | 环境 | 场景描述 | + | -------- | --------------------------------- | ---- | ------------------------------ | + | 我的公司 | [email](mailto:email@company.com) | 生产 | We use HAMi to manage our GPU. | + +5. 保存文件,然后执行 `git add -A` 并使用 `git commit -s -m "Add MY-ORG to adopters"` 提交。 +6. 使用 `git push origin main` 推送提交。 +7. 向 [HAMi-io/website](https://github.com/Project-HAMi/website) 开启一个拉取请求(Pull Request),预览构建将会出现。 + +非常感谢您成为我们社区的一员 - 我们非常感激! diff --git a/docs/zh/contributor/cherry-picks.md b/docs/zh/contributor/cherry-picks.md new file mode 100644 index 0000000..a336756 --- /dev/null +++ b/docs/zh/contributor/cherry-picks.md @@ -0,0 +1,86 @@ +--- +title: 如何 cherry-pick PRs +translated: true +--- + +本文档解释了如何在 `Project-HAMi/HAMi` 仓库的发布分支上管理 cherry pick。一个常见的用例是将 PR 从 master 分支回移到发布分支。 + +> 本文档摘自 [Kubernetes cherry-pick](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-release/cherry-picks.md)。 + +- [先决条件](#prerequisites) +- [哪些 PR 适合进行 Cherry Pick](#what-kind-of-prs-are-good-for-cherry-picks) +- [发起 Cherry Pick](#initiate-a-cherry-pick) +- [Cherry Pick 审核](#cherry-pick-review) +- [Cherry Pick 故障排除](#troubleshooting-cherry-picks) +- [不支持版本的 Cherry Pick](#cherry-picks-for-unsupported-releases) + +## 先决条件 + +- 一个已合并到 `master` 分支的拉取请求。 +- 发布分支已存在(例如:[`release-2.4`](https://github.com/Project-HAMi/HAMi/releases)) +- 正常配置的 git 和 GitHub shell 环境,用于推送到 GitHub 上的 HAMi `origin` fork,并对配置的远程 `upstream` 提交拉取请求,该 `upstream` 跟踪 `https://github.com/Project-HAMi/HAMi`,包括 `GITHUB_USER`。 +- 按照[安装说明](https://github.com/cli/cli#installation)安装 GitHub CLI (`gh`)。 +- 一个具有 "repo" 和 "read:org" 权限的 GitHub 个人访问令牌。权限是为 [gh auth login](https://cli.github.com/manual/gh_auth_login) 所需,与 cherry-pick 创建过程无关(创建分支和发起 PR)。 + +## 哪些 PR 适合进行 Cherry Pick + +与正常的 master 分支的合并量相比,发布分支的 PR 数量要少一个或两个数量级。这是因为发布分支的审查更为严格。重点在于关键的错误修复,例如: + +- 数据丢失 +- 内存损坏 +- 崩溃、挂起 +- 安全问题 + +仅影响 alpha 功能的功能性问题的错误修复(不是数据丢失或安全问题)不符合关键错误修复的标准。 + +如果您提议进行 cherry pick,但它不是一个明显的关键错误修复,请重新考虑。如果在反思后您仍希望继续,请通过补充您的 PR 来加强您的理由,例如: + +- 详细描述问题的 GitHub issue + +- 变更的范围 + +- 添加变更的风险 + +- 相关回归的风险 + +- 执行的测试,添加的测试用例 + +- 关键利益相关者的审阅者/批准者对变更为必要的回移的信心的证明 + +确保我们的整个社区积极参与项目的增强是至关重要的。如果某个已发布的功能未在特定提供商的平台上启用,这是一个需要在 `master` 分支中解决的社区失误,以便后续发布。这样的启用不会被回移到补丁发布分支。 + +## 发起 Cherry Pick + +- 运行 [cherry pick 脚本][cherry-pick-script] + + 此示例将 master 分支的 PR #1206 应用于远程分支 `upstream/release-1.0`: + + ```shell + hack/cherry_pick_pull.sh upstream/release-1.0 1206 + ``` + + - 请注意,cherry pick 脚本假定您有一个名为 `upstream` 的 git 远程指向 HAMi GitHub 组织。 + + - 您需要为每个想要进行 cherry pick 的补丁发布单独运行 cherry pick 脚本。cherry pick 应应用于所有适用修复的活动发布分支。 + + - 如果未设置 `GITHUB_TOKEN`,您将被要求输入 GitHub 密码:提供 GitHub [个人访问令牌](https://github.com/settings/tokens) 而不是实际的 GitHub 密码。如果您可以安全地将环境变量 `GITHUB_TOKEN` 设置为您的个人访问令牌,则可以避免交互式提示。参考 [https://github.com/github/hub/issues/2655#issuecomment-735836048](https://github.com/github/hub/issues/2655#issuecomment-735836048) + +## Cherry Pick 审核 + +与其他 PR 一样,代码 OWNERS 会根据需要对 cherry pick PR 进行审核 (`/lgtm`) 和批准 (`/approve`)。 + +与正常的拉取请求相同,发布说明要求适用,除了发布说明部分将自动从发起 cherry pick 的 master 分支拉取请求中填充。 + +## Cherry Pick 故障排除 + +贡献者在发起 cherry pick 时可能会遇到以下一些困难。 + +- cherry pick PR 无法干净地应用于旧的发布分支。在这种情况下,您需要手动修复冲突。 + +- cherry pick PR 包含无法通过 CI 测试的代码。在这种情况下,您需要从您的 fork 中获取自动生成的分支,修改有问题的提交并强制推送到自动生成的分支。或者,您可以创建一个新的 PR,这样会更繁琐。 + +## 不支持版本的 Cherry Pick + +社区支持和补丁的版本需要讨论。 + +[cherry-pick-script]: https://github.com/Project-HAMi/HAMi/blob/master/hack/cherry_pick_pull.sh \ No newline at end of file diff --git a/docs/zh/contributor/contribute-docs.md b/docs/zh/contributor/contribute-docs.md new file mode 100644 index 0000000..cd9bb7f --- /dev/null +++ b/docs/zh/contributor/contribute-docs.md @@ -0,0 +1,174 @@ +--- +title: 如何贡献文档 +translated: true +--- + +从1.3版本开始,社区文档将在HAMi网站上提供。本文件解释了如何向`Project-HAMi/website`仓库贡献文档。 + +## 前提条件 + +- 文档和代码一样,也按版本分类和存储。1.3是我们归档的第一个版本。 +- 文档需要翻译成多种语言,以便来自不同地区的读者阅读。社区现在支持中文和英文。英文是文档的官方语言。 +- 我们的文档使用Markdown。如果您不熟悉Markdown,请参阅https://guides.github.com/features/mastering-markdown/或https://www.markdownguide.org/以获取更详细的信息。 +- 我们通过[Docusaurus 2](https://docusaurus.io/)获得了一些附加功能,这是一个现代静态网站生成器。 + +## 设置 + +您可以通过克隆我们的网站仓库来设置本地环境。 + +```shell +git clone https://github.com/Project-HAMi/website.git +cd website +``` + +我们的网站组织如下: + +``` +website +├── sidebars.json # 当前文档版本的侧边栏 +├── docs # 当前文档版本的文档目录 +│ ├── foo +│ │ └── bar.md # https://mysite.com/docs/next/foo/bar +│ └── hello.md # https://mysite.com/docs/next/hello +├── versions.json # 指示可用版本的文件 +├── versioned_docs +│ ├── version-1.1.0 +│ │ ├── foo +│ │ │ └── bar.md # https://mysite.com/docs/foo/bar +│ │ └── hello.md +│ └── version-1.0.0 +│ ├── foo +│ │ └── bar.md # https://mysite.com/docs/1.0.0/foo/bar +│ └── hello.md +├── versioned_sidebars +│ ├── version-1.1.0-sidebars.json +│ └── version-1.0.0-sidebars.json +├── docusaurus.config.js +└── package.json +``` + +`versions.json`文件是一个版本列表,从最新到最早。下表解释了版本化文件如何映射到其版本和生成的URL。 + +| 路径 | 版本 | URL | +| --------------------------------------- | -------------- | ----------------- | +| `versioned_docs/version-1.0.0/hello.md` | 1.0.0 | /docs/1.0.0/hello | +| `versioned_docs/version-1.1.0/hello.md` | 1.1.0 (最新) | /docs/hello | +| `docs/hello.md` | 当前 | /docs/next/hello | + +:::提示 + +`docs`目录中的文件属于`current`文档版本。 + +`current`文档版本标记为`Next`,托管在`/docs/next/*`下。 + +贡献者主要为当前版本贡献文档。 +::: + +## 撰写文档 + +### 在顶部开始一个标题 + +在Markdown文件的顶部指定有关文章的元数据是很重要的,这个部分称为**Front Matter**。 + +现在,让我们看一个快速示例,它应该解释**Front Matter**中最相关的条目: + +``` +--- +title: 带有标签的文档 +--- + +## 二级标题 +``` + +在两行---之间的顶部部分是Front Matter部分。在这里,我们定义了一些条目,告诉Docusaurus如何处理文章: + +- 标题相当于HTML文档中的`
+
+## 示例
+
+动态 mig 与 hami 任务兼容,如下例所示:
+只需设置 `nvidia.com/gpu` 和 `nvidia.com/gpumem`。
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+ name: gpu-pod1
+spec:
+ containers:
+ - name: ubuntu-container1
+ image: ubuntu:20.04
+ command: ["bash", "-c", "sleep 86400"]
+ resources:
+ limits:
+ nvidia.com/gpu: 2 # 请求 2 个 vGPU
+ nvidia.com/gpumem: 8000 # 每个 vGPU 包含 8000m 设备内存(可选,整数)
+```
+
+任务可以通过设置 `annotations.nvidia.com/vgpu-mode` 为相应的值来决定仅使用 `mig` 或 `hami-core`,如下例所示:
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+ name: gpu-pod1
+ annotations:
+ nvidia.com/vgpu-mode: "mig"
+spec:
+ containers:
+ - name: ubuntu-container1
+ image: ubuntu:20.04
+ command: ["bash", "-c", "sleep 86400"]
+ resources:
+ limits:
+ nvidia.com/gpu: 2 # 请求 2 个 vGPU
+ nvidia.com/gpumem: 8000 # 每个 vGPU 包含 8000m 设备内存(可选,整数)
+```
+
+## 流程
+
+使用动态-mig 的 vGPU 任务的流程如下所示:
+
+
+
+请注意,在提交任务后,deviceshare 插件将遍历 configMap `hami-scheduler-device` 中定义的模板,并找到第一个可用的模板来适配。您可以随时更改该 configMap 的内容,并重新启动 vc-scheduler 进行自定义。
+
+如果您在空的 A100-PCIE-40GB 节点上提交示例,那么它将选择一个 GPU 并选择以下 MIG 模板:
+
+```yaml
+ 2g.10gb : 3
+ 1g.5gb : 1
+```
+
+然后启动具有 2g.10gb 实例 * 2 的容器。
\ No newline at end of file
diff --git a/docs/zh/developers/HAMi-core-design.md b/docs/zh/developers/HAMi-core-design.md
new file mode 100644
index 0000000..67fb967
--- /dev/null
+++ b/docs/zh/developers/HAMi-core-design.md
@@ -0,0 +1,30 @@
+---
+title: HAMi-core 设计
+---
+
+HAMi-core是一个为 CUDA 环境设计的 hook 库,作为容器内的 GPU 资源控制器,已被
+[HAMi](https://github.com/HAMi-project/HAMi) 和
+[Volcano](https://github.com/volcano-sh/devices) 等项目采用。
+
+
+
+## 功能特性
+
+HAMi-core 提供以下核心功能:
+
+1. 设备显存虚拟化
+
+ 
+
+2. 限制设备使用率
+
+ 通过自定义的时间片机制控制 GPU 使用情况。
+
+3. 实时监控设备使用率
+
+## 设计原理
+
+HAMi-core 通过劫持 CUDA 运行时库(`libcudart.so`)与 CUDA 驱动库(`libcuda.so`)之间的
+API 调用来实现其功能,如下图所示:
+
+
diff --git a/docs/zh/developers/build.md b/docs/zh/developers/build.md
new file mode 100644
index 0000000..ec31be8
--- /dev/null
+++ b/docs/zh/developers/build.md
@@ -0,0 +1,95 @@
+---
+title: 构建 HAMi
+translated: true
+---
+
+## 制作二进制文件
+
+### 前提条件
+
+需要以下工具:
+
+- go v1.20+
+- make
+
+### 构建
+
+```bash
+make
+```
+
+如果一切成功构建,将打印以下输出
+
+```
+go build -ldflags '-s -w -X github.com/Project-HAMi/HAMi/pkg/version.version=v0.0.1' -o bin/scheduler ./cmd/scheduler
+go build -ldflags '-s -w -X github.com/Project-HAMi/HAMi/pkg/version.version=v0.0.1' -o bin/vGPUmonitor ./cmd/vGPUmonitor
+go build -ldflags '-s -w -X github.com/Project-HAMi/HAMi/pkg/version.version=v0.0.1' -o bin/nvidia-device-plugin ./cmd/device-plugin/nvidia
+```
+
+## 制作镜像
+
+### 前提条件
+
+需要以下工具:
+
+- docker
+- make
+
+### 构建
+
+```bash
+make docker
+```
+
+如果一切成功构建,将打印以下输出
+
+```
+go build -ldflags '-s -w -X github.com/Project-HAMi/HAMi/pkg/version.version=v0.0.1' -o bin/scheduler ./cmd/scheduler
+go build -ldflags '-s -w -X github.com/Project-HAMi/HAMi/pkg/version.version=v0.0.1' -o bin/vGPUmonitor ./cmd/vGPUmonitor
+go build -ldflags '-s -w -X github.com/Project-HAMi/HAMi/pkg/version.version=v0.0.1' -o bin/nvidia-device-plugin ./cmd/device-plugin/nvidia
+[+] Building 146.4s (28/28)
+FINISHED docker:default
+ => [internal] load build definition from Dockerfile 0.0s
+ => => transferring dockerfile: 1.30kB 0.0s
+ => [internal] load metadata for docker.io/nvidia/cuda:12.2.0-base-ubuntu22.04 5.5s
+ => [internal] load metadata for docker.io/library/golang:1.21-bullseye 4.5s
+ => [internal] load metadata for docker.io/nvidia/cuda:12.2.0-devel-ubuntu20.04 0.0s
+ => [auth] nvidia/cuda:pull token for registry-1.docker.io 0.0s
+ => [auth] library/golang:pull token for registry-1.docker.io 0.0s
+ => [internal] load .dockerignore 0.0s
+ => => transferring context: 2B 0.0s
+ => [internal] load build context 1.3s
+ => => transferring context: 119.90MB 1.3s
+ => [stage-3 1/6] FROM docker.io/nvidia/cuda:12.2.0-base-ubuntu22.04@sha256:ecdf8549dd5f12609e365217a64dedde26ecda26da8f3ff3f82def6749f53051 0.0s
+ => CACHED [gobuild 1/4] FROM docker.io/library/golang:1.21-bullseye@sha256:311468bffa9fa4747a334b94e6ce3681b564126d653675a6adc46698b2b88d35 0.0s
+ => [nvbuild 1/9] FROM docker.io/nvidia/cuda:12.2.0-devel-ubuntu20.04 0.0s
+ => [gobuild 2/4] ADD . /k8s-vgpu 0.8s
+ => [nvbuild 2/9] COPY ./libvgpu /libvgpu 0.3s
+ => [nvbuild 3/9] WORKDIR /libvgpu 0.2s
+ => [nvbuild 4/9] RUN apt-get -y update && apt-get -y install wget 21.9s
+ => [gobuild 3/4] RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev 18.8s
+ => [gobuild 4/4] RUN cd /k8s-vgpu && make all 83.5s
+ => [nvbuild 5/9] RUN wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-x86_64.tar.gz 99.8s
+ => CACHED [stage-3 2/6] COPY ./LICENSE /k8s-vgpu/LICENSE 0.0s
+ => [stage-3 3/6] COPY --from=GOBUILD /k8s-vgpu/bin /k8s-vgpu/bin 0.5s
+ => [stage-3 4/6] COPY ./docker/entrypoint.sh /k8s-vgpu/bin/entrypoint.sh 0.2s
+ => [stage-3 5/6] COPY ./lib /k8s-vgpu/lib 0.2s
+ => [nvbuild 6/9] RUN tar -xf cmake-3.19.8-Linux-x86_64.tar.gz 2.1s
+ => [nvbuild 7/9] RUN cp /libvgpu/cmake-3.19.8-Linux-x86_64/bin/cmake /libvgpu/cmake-3.19.8-Linux-x86_64/bin/cmake3 1.3s
+ => [nvbuild 8/9] RUN apt-get -y install openssl libssl-dev 7.7s
+ => [nvbuild 9/9] RUN bash ./build.sh 4.0s
+ => [stage-3 6/6] COPY --from=NVBUILD /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/ 0.3s
+ => exporting to image 1.8s
+ => => exporting layers 1.8s
+ => => writing image sha256:fc0ce42b41f9a177921c9bfd239babfa06fc77cf9e4087e8f2d959d749e8039f 0.0s
+ => => naming to docker.io/projecthami/hami:master-103b2b677e018a40af6322a56c2e9d5d5c62cccf 0.0s
+The push refers to repository [docker.io/projecthami/hami]
+```
+
+## 制作HAMi-Core
+
+建议在nvidia/cuda镜像中构建HAMi-Core:
+
+```bash
+git clone https://github.com/Project-HAMi/HAMi-core.git
+docker build . -f dockerfiles/Dockerfile.{arch}
\ No newline at end of file
diff --git a/docs/zh/developers/kunlunxin-topology.md b/docs/zh/developers/kunlunxin-topology.md
new file mode 100644
index 0000000..13eae7f
--- /dev/null
+++ b/docs/zh/developers/kunlunxin-topology.md
@@ -0,0 +1,52 @@
+---
+title: 昆仑芯拓扑感知调度
+---
+
+## 背景
+
+当单个P800服务器配置多块XPU时,若GPU连接或位于同一NUMA节点内(如下图所示),可获得最优性能表现。这种配置会在服务器内所有GPU之间形成特定拓扑关系。
+
+
+
+当用户作业申请特定数量的`kunlunxin.com/xpu`资源时,Kubernetes会将pod调度到合适节点以最小化资源碎片并保持高性能。选定节点后,XPU设备会根据以下规则进行细粒度资源分配:
+
+1. 仅允许1、2、4或8卡分配方案
+2. 1/2/4卡分配不得跨NUMA节点
+3. 分配后应最小化资源碎片
+
+## 过滤阶段
+
+过滤阶段识别所有符合分配条件的节点。针对每个节点,系统会筛选最优XPU组合方案并缓存,供评分阶段使用。筛选流程如下图所示:
+
+
+
+## 评分阶段
+
+在评分阶段,所有通过过滤的节点会接受评估并打分以选择最优调度目标。我们引入**MTF**(最小填充分任务数)指标,用于量化节点在分配后容纳未来任务的能力。
+
+下表展示了XPU占用情况与对应MTF值的示例:
+
+| XPU占用状态 | MTF | 说明 |
+|----------------|-----|-------------|
+| 11111111 | 0 | 完全占用,无法调度新任务 |
+| 00000000 | 1 | 可被一个8-XPU任务完全占用 |
+| 00000011 | 2 | 可调度一个4-XPU任务和一个2-XPU任务 |
+| 00000001 | 3 | 可容纳一个4-XPU、一个2-XPU和一个1-XPU任务 |
+| 00010001 | 4 | 可容纳两个2-XPU任务和两个1-XPU任务 |
+
+节点得分基于分配前后的**MTF差值**计算。差值越小表示适配度越高,得分也越高。具体评分逻辑如下:
+
+| MTF差值 | 得分 | 示例 |
+|------------|-------|---------|
+| -1 | 2000 | 00000111->00001111 |
+| 0 | 1000 | 00000111->00110111 |
+| 1 | 0 | 00001111->00011111 |
+| 2 | -1000 | 00000000->00000001 |
+
+## 绑定阶段
+
+在绑定阶段,分配结果会以注解形式注入pod。例如:
+
+```
+BAIDU_COM_DEVICE_IDX=0,1,2,3
+```
diff --git a/docs/zh/developers/mindmap.md b/docs/zh/developers/mindmap.md
new file mode 100644
index 0000000..8b5ea4e
--- /dev/null
+++ b/docs/zh/developers/mindmap.md
@@ -0,0 +1,8 @@
+---
+title: HAMi 路线图
+translated: true
+---
+
+## 思维导图
+
+
\ No newline at end of file
diff --git a/docs/zh/developers/protocol.md b/docs/zh/developers/protocol.md
new file mode 100644
index 0000000..b5e7a95
--- /dev/null
+++ b/docs/zh/developers/protocol.md
@@ -0,0 +1,37 @@
+---
+title: 协议设计
+translated: true
+---
+
+## 协议实现
+
+### 设备注册
+
+为了进行更准确的调度,HAMI 调度器需要在设备注册时感知设备的规格,包括 UUID、显存、计算能力、型号、numa 数量等。
+
+然而,device-plugin 设备注册 API 并未提供相应的参数获取,因此 HAMi-device-plugin 在注册时将这些补充信息存储在节点的注释中,以供调度器读取,如下图所示:
+
+
+
+这里需要使用两个注释,其中一个是时间戳,如果超过指定的阈值,则认为对应节点上的设备无效。另一个是设备注册信息。一个具有 2 个 32G-V100 GPU 的节点可以注册如下所示:
+
+```
+hami.io/node-handshake: Requesting_2024.05.14 07:07:33
+hami.io/node-nvidia-register: 'GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec,10,32768,100,NVIDIA-Tesla V100-PCIE-32GB,0,true:GPU-0fc3eda5-e98b-a25b-5b0d-cf5c855d1448,10,32768,100,NVIDIA-Tesla V100-PCIE-32GB,0,true:'
+```
+
+### 调度决策
+
+kube-scheduler 在 `bind` 过程中调用 device-plugin 挂载设备,但仅向 device-plugin 提供设备的 `UUID`。因此,在设备共享的场景中,device-plugin 无法获取任务请求的相应设备规格,如 `设备内存` 和 `计算核心`。
+
+因此,有必要开发一个协议,使调度器层与 device-plugin 进行通信以传递任务调度信息。调度器通过将调度结果补丁到 pod 的注释中并在 device-plugin 中读取它来传递此信息,如下图所示:
+
+
+
+在此过程中,需要设置 3 个注释,分别是 `时间戳`、`待分配设备` 和 `已分配设备`。调度器创建时,`待分配设备` 和 `已分配设备` 的内容相同,但 device-plugin 将根据 `待分配设备` 的内容确定当前设备分配情况,当分配成功时,相应设备将从注释中移除,因此当任务成功运行时,`待分配设备` 的内容将为空。
+
+一个请求 3000M 设备内存的 GPU 任务的示例将生成如下的相应注释:
+```
+hami.io/bind-time: 1716199325
+hami.io/vgpu-devices-allocated: GPU-0fc3eda5-e98b-a25b-5b0d-cf5c855d1448,NVIDIA,3000,0:;
+hami.io/vgpu-devices-to-allocate: ;
\ No newline at end of file
diff --git a/docs/zh/developers/scheduling.md b/docs/zh/developers/scheduling.md
new file mode 100644
index 0000000..7c21763
--- /dev/null
+++ b/docs/zh/developers/scheduling.md
@@ -0,0 +1,169 @@
+---
+title: 调度策略
+translated: true
+---
+
+## 摘要
+
+当前在一个拥有许多 GPU 节点的集群中,节点在做调度决策时没有进行 `binpack` 或 `spread`,使用 vGPU 时 GPU 卡也没有进行 `binpack` 或 `spread`。
+
+## 提案
+
+我们在配置中添加 `node-scheduler-policy` 和 `gpu-scheduler-policy`,然后调度器可以使用此策略实现节点 `binpack` 或 `spread` 或 GPU `binpack` 或 `spread`。用户可以设置 Pod 注释来更改此默认策略,使用 `hami.io/node-scheduler-policy` 和 `hami.io/gpu-scheduler-policy` 来覆盖调度器配置。
+
+### 用户故事
+
+这是一个 GPU 集群,拥有两个节点,以下故事以此集群为前提。
+
+
+
+#### 故事 1
+
+节点 binpack,尽可能使用一个节点的 GPU 卡,例如:
+- 集群资源:
+ - 节点1:GPU 拥有 4 个 GPU 设备
+ - 节点2:GPU 拥有 4 个 GPU 设备
+
+- 请求:
+ - pod1:使用 1 个 GPU
+ - pod2:使用 1 个 GPU
+
+- 调度结果:
+ - pod1:调度到节点1
+ - pod2:调度到节点1
+
+#### 故事 2
+
+节点 spread,尽可能使用来自不同节点的 GPU 卡,例如:
+
+- 集群资源:
+ - 节点1:GPU 拥有 4 个 GPU 设备
+ - 节点2:GPU 拥有 4 个 GPU 设备
+
+- 请求:
+ - pod1:使用 1 个 GPU
+ - pod2:使用 1 个 GPU
+
+- 调度结果:
+ - pod1:调度到节点1
+ - pod2:调度到节点2
+
+#### 故事 3
+
+GPU binpack,尽可能使用同一个 GPU 卡,例如:
+
+- 集群资源:
+ - 节点1:GPU 拥有 4 个 GPU 设备,它们是 GPU1, GPU2, GPU3, GPU4
+
+- 请求:
+ - pod1:使用 1 个 GPU,gpucore 是 20%,gpumem-percentage 是 20%
+ - pod2:使用 1 个 GPU,gpucore 是 20%,gpumem-percentage 是 20%
+
+- 调度结果:
+ - pod1:调度到节点1,选择 GPU1 这个设备
+ - pod2:调度到节点1,选择 GPU1 这个设备
+
+#### 故事 4
+
+GPU spread,尽可能使用不同的 GPU 卡,例如:
+
+- 集群资源:
+ - 节点1:GPU 拥有 4 个 GPU 设备,它们是 GPU1, GPU2, GPU3, GPU4
+
+- 请求:
+ - pod1:使用 1 个 GPU,gpucore 是 20%,gpumem-percentage 是 20%
+ - pod2:使用 1 个 GPU,gpucore 是 20%,gpumem-percentage 是 20%
+
+- 调度结果:
+ - pod1:调度到节点1,选择 GPU1 这个设备
+ - pod2:调度到节点1,选择 GPU2 这个设备
+
+## 设计细节
+
+### Node-scheduler-policy
+
+
+
+#### Binpack
+
+Binpack 主要考虑节点资源使用情况。使用越满,得分越高。
+
+```
+score: ((request + used) / allocatable) * 10
+```
+
+1. 节点1的 Binpack 评分信息如下
+
+```
+Node1 score: ((1+3)/4) * 10= 10
+```
+
+2. 节点2的 Binpack 评分信息如下
+
+```
+Node2 score: ((1+2)/4) * 10= 7.5
+```
+
+因此,在 `Binpack` 策略中我们可以选择 `Node1`。
+
+#### Spread
+
+Spread 主要考虑节点资源使用情况。使用越少,得分越高。
+
+```
+score: ((request + used) / allocatable) * 10
+```
+
+1. 节点1的 Spread 评分信息如下
+```
+Node1 score: ((1+3)/4) * 10= 10
+```
+
+2. 节点2的 Spread 评分信息如下
+```
+Node2 score: ((1+2)/4) * 10= 7.5
+```
+
+因此,在 `Spread` 策略中我们可以选择 `Node2`。
+
+### GPU-scheduler-policy
+
+
+
+#### Binpack
+
+Binpack 主要关注每张卡的计算能力和显存使用情况。使用越多,得分越高。
+```
+score: ((request.core + used.core) / allocatable.core + (request.mem + used.mem) / allocatable.mem)) * 10
+```
+
+1. GPU1 的 Binpack 评分信息如下
+```
+GPU1 Score: ((20+10)/100 + (1000+2000)/8000)) * 10 = 6.75
+```
+
+2. GPU2 的 Binpack 评分信息如下
+```
+GPU2 Score: ((20+70)/100 + (1000+6000)/8000)) * 10 = 17.75
+```
+
+因此,在 `Binpack` 策略中我们可以选择 `GPU2`。
+
+#### Spread
+
+Spread 主要关注每张卡的计算能力和显存使用情况。使用越少,得分越高。
+```
+score: ((request.core + used.core) / allocatable.core + (request.mem + used.mem) / allocatable.mem)) * 10
+```
+
+1. GPU1 的 Spread 评分信息如下
+```
+GPU1 Score: ((20+10)/100 + (1000+2000)/8000)) * 10 = 6.75
+```
+
+2. GPU2 的 Spread 评分信息如下
+```
+GPU2 Score: ((20+70)/100 + (1000+6000)/8000)) * 10 = 17.75
+```
+
+因此,在 `Spread` 策略中我们可以选择 `GPU1`。
\ No newline at end of file
diff --git a/docs/zh/get-started/deploy-with-helm.md b/docs/zh/get-started/deploy-with-helm.md
new file mode 100644
index 0000000..618d7ba
--- /dev/null
+++ b/docs/zh/get-started/deploy-with-helm.md
@@ -0,0 +1,188 @@
+---
+title: 使用 Helm 部署 HAMi
+---
+
+## 目录 {#toc}
+
+- [先决条件](#prerequisites)
+- [安装步骤](#installation)
+- [演示](#demo)
+
+本指南将涵盖:
+
+- 为每个 GPU 节点配置 NVIDIA 容器运行时
+- 使用 Helm 安装 HAMi
+- 启动 vGPU 任务
+- 验证容器内设备资源是否受限
+
+## 先决条件 {#prerequisites}
+
+- [Helm](https://helm.sh/zh/docs/) v3+
+- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) v1.16+
+- [CUDA](https://developer.nvidia.com/cuda-toolkit) v10.2+
+- [NVIDIA 驱动](https://www.nvidia.cn/drivers/unix/) v440+
+
+## 安装步骤 {#installation}
+
+### 1. 配置 nvidia-container-toolkit {#configure-nvidia-container-toolkit}
+
++ Copyright Contributors to the HAMi's project. +
++ The Linux Foundation® (TLF) has registered trademarks and uses trademarks. For a list of TLF trademarks, see Trademark Usage. +
++ Footer part can be edit. +
+ + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..89a458b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,80 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "HAMi-docs" +description = "Documentation for HAMi" +version = "0.4.0" +readme = "README.md" +license = { file = "LICENSE" } +authors = [{ name = "HAMi Contributors" }] +maintainers = [{ name = "HAMi Contributors" }] +keywords = ["kubernetes", "energy", "power", "sustainability", "documentation"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Documentation", + "Topic :: System :: Monitoring", +] +requires-python = ">=3.8" +dependencies = [ + "mkdocs>=1.6", + "mkdocs-material>=9.5", + "mkdocs-static-i18n>=1.2", +] + +[dependency-groups] +# Development tools (additional dependencies beyond the main ones) +dev = [ + # Development utilities could go here + # e.g., "pre-commit", "ruff", etc. +] +ci = [ + # CI/CD specific tools could go here + # Currently handled by GitHub Actions with super-linter +] + +[project.urls] +Homepage = "https://sustainable-computing.io" +Documentation = "https://sustainable-computing.io" +Repository = "https://github.com/sustainable-computing-io/kepler" +"Bug Tracker" = "https://github.com/sustainable-computing-io/kepler/issues" + +[tool.hatch.version] +path = "mkdocs.yml" +pattern = "version: (?P