From 68cdf08bf5431d81472412e00326d0437b38dd7b Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Wed, 12 Nov 2025 19:29:33 +0400 Subject: [PATCH 01/12] cni-switcher init Signed-off-by: Denis Tarabrin --- .gitignore | 2 +- cmd/commands/cni.go | 158 ++++ cmd/commands/kubectl.go | 7 +- cmd/d8/root.go | 1 + .../cni/api/v1alpha1/cni_migration_types.go | 75 ++ .../api/v1alpha1/cni_node_migration_types.go | 63 ++ internal/cni/api/v1alpha1/register.go | 50 ++ .../cni/api/v1alpha1/zz_generated.deepcopy.go | 218 +++++ internal/cni/cleanup.go | 330 +++++++ internal/cni/common.go | 141 +++ internal/cni/prepare.go | 586 +++++++++++++ internal/cni/resources.go | 537 ++++++++++++ internal/cni/rollback.go | 28 + internal/cni/switch.go | 807 ++++++++++++++++++ 14 files changed, 2998 insertions(+), 5 deletions(-) create mode 100644 cmd/commands/cni.go create mode 100644 internal/cni/api/v1alpha1/cni_migration_types.go create mode 100644 internal/cni/api/v1alpha1/cni_node_migration_types.go create mode 100644 internal/cni/api/v1alpha1/register.go create mode 100644 internal/cni/api/v1alpha1/zz_generated.deepcopy.go create mode 100644 internal/cni/cleanup.go create mode 100644 internal/cni/common.go create mode 100644 internal/cni/prepare.go create mode 100644 internal/cni/resources.go create mode 100644 internal/cni/rollback.go create mode 100644 internal/cni/switch.go diff --git a/.gitignore b/.gitignore index 30a43a92..8680e16b 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,4 @@ dist/ build/ # Entrypoint for the application -!/cmd/d8 \ No newline at end of file +!/cmd/d8 diff --git a/cmd/commands/cni.go b/cmd/commands/cni.go new file mode 100644 index 00000000..229a6b86 --- /dev/null +++ b/cmd/commands/cni.go @@ -0,0 +1,158 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package commands + +import ( + "fmt" + "log" + "strings" + "time" + + "github.com/deckhouse/deckhouse-cli/internal/cni" + "github.com/go-logr/logr" + "github.com/spf13/cobra" + "k8s.io/kubectl/pkg/util/templates" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" +) + +var ( + cniSwitchLong = templates.LongDesc(` +A group of commands to switch the CNI (Container Network Interface) provider in the Deckhouse cluster. + +This process is divided into several steps: + + - 'd8 cni-switch prepare' - STEP 1. Prepares the cluster for CNI migration. + - 'd8 cni-switch switch' - STEP 2. Performs the actual CNI switch. + - 'd8 cni-switch cleanup' - STEP 3. Cleans up resources if the switch is aborted. + - 'd8 cni-switch rollback' - (Optional) Rollback CNI if the switch is aborted.`) + + cniPrepareExample = templates.Examples(` + # Prepare to switch to Cilium CNI + d8 cni-switch prepare --to-cni cilium`) + + cniSwitchExample = templates.Examples(` + # Perform the CNI switch after the prepare step is complete + d8 cni-switch switch`) + + cniCleanupExample = templates.Examples(` + # Cleanup resources created by the 'prepare' command + d8 cni-switch cleanup`) + + cniRollbackExample = templates.Examples(` + # Rollback changes, restore previous CNI, cleanup resources created by the 'prepare' command + d8 cni-switch rollback`) + + supportedCNIs = []string{"cilium", "flannel", "simple-bridge"} +) + +func NewCniSwitchCommand() *cobra.Command { + log.SetFlags(0) + ctrllog.SetLogger(logr.Discard()) + + cmd := &cobra.Command{ + Use: "cni-switch", + Short: "A group of commands to switch CNI in the cluster", + Long: cniSwitchLong, + } + cmd.PersistentFlags().Duration("timeout", 1*time.Hour, "The timeout for the entire operation (e.g., 40m, 1h)") + cmd.AddCommand(NewCmdCniPrepare()) + cmd.AddCommand(NewCmdCniSwitch()) + cmd.AddCommand(NewCmdCniCleanup()) + cmd.AddCommand(NewCmdCniRollback()) + return cmd +} + +func NewCmdCniPrepare() *cobra.Command { + cmd := &cobra.Command{ + Use: "prepare", + Short: "Prepares the cluster for CNI switching", + Example: cniPrepareExample, + PreRunE: func(cmd *cobra.Command, args []string) error { + targetCNI, _ := cmd.Flags().GetString("to-cni") + for _, supported := range supportedCNIs { + if strings.ToLower(targetCNI) == supported { + return nil + } + } + return fmt.Errorf( + "invalid --to-cni value %q. Supported values are: %s", + targetCNI, + strings.Join(supportedCNIs, ", "), + ) + }, + + Run: func(cmd *cobra.Command, args []string) { + targetCNI, _ := cmd.Flags().GetString("to-cni") + timeout, _ := cmd.Flags().GetDuration("timeout") + if err := cni.RunPrepare(targetCNI, timeout); err != nil { + log.Fatalf("❌ Error running prepare command: %v", err) + } + }, + } + cmd.Flags().String("to-cni", "", fmt.Sprintf( + "Target CNI provider to switch to. Supported values: %s", + strings.Join(supportedCNIs, ", "), + )) + _ = cmd.MarkFlagRequired("to-cni") + + return cmd +} + +func NewCmdCniSwitch() *cobra.Command { + cmd := &cobra.Command{ + Use: "switch", + Short: "Performs the CNI switching", + Example: cniSwitchExample, + Run: func(cmd *cobra.Command, args []string) { + timeout, _ := cmd.Flags().GetDuration("timeout") + if err := cni.RunSwitch(timeout); err != nil { + log.Fatalf("❌ Error running switch command: %v", err) + } + }, + } + return cmd +} + +func NewCmdCniCleanup() *cobra.Command { + cmd := &cobra.Command{ + Use: "cleanup", + Short: "Cleans up resources created during CNI switching", + Example: cniCleanupExample, + Run: func(cmd *cobra.Command, args []string) { + timeout, _ := cmd.Flags().GetDuration("timeout") + if err := cni.RunCleanup(timeout); err != nil { + log.Fatalf("❌ Error running cleanup command: %v", err) + } + }, + } + return cmd +} + +func NewCmdCniRollback() *cobra.Command { // TDEN It needs to be done! + cmd := &cobra.Command{ + Use: "rollback", + Short: "Rollback all changes and restore previous CNI", + Example: cniCleanupExample, + Run: func(cmd *cobra.Command, args []string) { + timeout, _ := cmd.Flags().GetDuration("timeout") + if err := cni.RunRollback(timeout); err != nil { + log.Fatalf("❌ Error running rollback command: %v", err) + } + }, + } + return cmd +} diff --git a/cmd/commands/kubectl.go b/cmd/commands/kubectl.go index d029fdc0..5a65cfc7 100644 --- a/cmd/commands/kubectl.go +++ b/cmd/commands/kubectl.go @@ -25,6 +25,7 @@ import ( "regexp" "time" + "github.com/deckhouse/deckhouse-cli/internal/cni" "github.com/spf13/cobra" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/cli-runtime/pkg/genericclioptions" @@ -35,9 +36,7 @@ import ( ) const ( - cmNamespace = "d8-system" - cmName = "debug-container" - cmImageKey = "image" + cmImageKey = "debug-container-image" ) var d8CommandRegex = regexp.MustCompile("([\"'`])d8 (\\w+)") @@ -112,7 +111,7 @@ func getDebugImage(cmd *cobra.Command) (string, error) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - configMap, err := kubeCl.CoreV1().ConfigMaps(cmNamespace).Get(ctx, cmName, v1.GetOptions{}) + configMap, err := kubeCl.CoreV1().ConfigMaps(cni.CMDataNameSpace).Get(ctx, cni.CMDataName, v1.GetOptions{}) if err != nil { return "", ErrGenericImageFetch } diff --git a/cmd/d8/root.go b/cmd/d8/root.go index 46ccce7d..946c7936 100644 --- a/cmd/d8/root.go +++ b/cmd/d8/root.go @@ -98,6 +98,7 @@ func (r *RootCommand) registerCommands() { r.cmd.AddCommand(commands.NewKubectlCommand()) r.cmd.AddCommand(commands.NewLoginCommand()) r.cmd.AddCommand(commands.NewStrongholdCommand()) + r.cmd.AddCommand(commands.NewCniSwitchCommand()) r.cmd.AddCommand(commands.NewHelpJSONCommand(r.cmd)) r.cmd.AddCommand(plugins.NewPluginsCommand(r.logger.Named("plugins-command"))) diff --git a/internal/cni/api/v1alpha1/cni_migration_types.go b/internal/cni/api/v1alpha1/cni_migration_types.go new file mode 100644 index 00000000..5cf9a76d --- /dev/null +++ b/internal/cni/api/v1alpha1/cni_migration_types.go @@ -0,0 +1,75 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +genclient:nonNamespaced +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +k8s:openapi-gen=true + +// CNIMigration is the schema for the CNIMigration API. +// It is a cluster-level resource that serves as the "single source of truth" +// for the entire migration process. It defines the goal (targetCNI) +// and tracks the overall progress across all nodes. +type CNIMigration struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Spec defines the desired state of CNIMigration. + Spec CNIMigrationSpec `json:"spec,omitempty"` + // Status defines the observed state of CNIMigration. + Status CNIMigrationStatus `json:"status,omitempty"` +} + +// CNIMigrationSpec defines the desired state of CNIMigration. +// +k8s:deepcopy-gen=true +type CNIMigrationSpec struct { + // TargetCNI is the CNI to switch to. + // Set by the d8 cli utility when starting Phase 1. + TargetCNI string `json:"targetCNI"` + // Phase is the phase controlled by the d8 cli to command the agents. + // Possible values: Prepare, Migrate, Cleanup, Abort. + Phase string `json:"phase"` +} + +// CNIMigrationStatus defines the observed state of CNIMigration. +// +k8s:deepcopy-gen=true +type CNIMigrationStatus struct { + // CurrentCNI is the detected CNI from which the switch is being made. + CurrentCNI string `json:"currentCNI,omitempty"` + // NodesTotal is the total number of nodes involved in the migration. + NodesTotal int `json:"nodesTotal,omitempty"` + // NodesSucceeded is the number of nodes that have successfully completed the current phase. + NodesSucceeded int `json:"nodesSucceeded,omitempty"` + // NodesFailed is the number of nodes where an error occurred. + NodesFailed int `json:"nodesFailed,omitempty"` + // Conditions reflect the state of the migration as a whole. + // The d8 cli aggregates statuses from all CNINodeMigrations here. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// CNIMigrationList contains a list of CNIMigration. +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type CNIMigrationList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []CNIMigration `json:"items"` +} diff --git a/internal/cni/api/v1alpha1/cni_node_migration_types.go b/internal/cni/api/v1alpha1/cni_node_migration_types.go new file mode 100644 index 00000000..b8536266 --- /dev/null +++ b/internal/cni/api/v1alpha1/cni_node_migration_types.go @@ -0,0 +1,63 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +genclient:nonNamespaced +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +k8s:openapi-gen=true + +// CNINodeMigration is the schema for the CNINodeMigration API. +// This resource is created for each node in the cluster. The Helper +// agent running on the node updates this resource to report its local progress. +// The d8 cli reads these resources to display detailed status. +type CNINodeMigration struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Spec can be empty, as all configuration is taken from the parent CNIMigration resource. + Spec CNINodeMigrationSpec `json:"spec,omitempty"` + // Status defines the observed state of CNINodeMigration. + Status CNINodeMigrationStatus `json:"status,omitempty"` +} + +// CNINodeMigrationSpec defines the desired state of CNINodeMigration. +// +k8s:deepcopy-gen=true +type CNINodeMigrationSpec struct { + // The spec can be empty, as all configuration is taken from the parent CNIMigration resource. +} + +// CNINodeMigrationStatus defines the observed state of CNINodeMigration. +// +k8s:deepcopy-gen=true +type CNINodeMigrationStatus struct { + // Phase is the phase of this particular node. + Phase string `json:"phase,omitempty"` + // Conditions are the detailed conditions reflecting the steps performed on the node. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// CNINodeMigrationList contains a list of CNINodeMigration. +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type CNINodeMigrationList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []CNINodeMigration `json:"items"` +} diff --git a/internal/cni/api/v1alpha1/register.go b/internal/cni/api/v1alpha1/register.go new file mode 100644 index 00000000..958d272f --- /dev/null +++ b/internal/cni/api/v1alpha1/register.go @@ -0,0 +1,50 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + APIGroup = "network.deckhouse.io" + APIVersion = "v1alpha1" +) + +// SchemeGroupVersion is group version used to register these objects +var ( + SchemeGroupVersion = schema.GroupVersion{ + Group: APIGroup, + Version: APIVersion, + } + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +// Adds the list of known types to Scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &CNIMigration{}, + &CNIMigrationList{}, + &CNINodeMigration{}, + &CNINodeMigrationList{}, + ) + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/internal/cni/api/v1alpha1/zz_generated.deepcopy.go b/internal/cni/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000..495df5c2 --- /dev/null +++ b/internal/cni/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,218 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNIMigration) DeepCopyInto(out *CNIMigration) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNIMigration. +func (in *CNIMigration) DeepCopy() *CNIMigration { + if in == nil { + return nil + } + out := new(CNIMigration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *CNIMigration) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNIMigrationList) DeepCopyInto(out *CNIMigrationList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]CNIMigration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNIMigrationList. +func (in *CNIMigrationList) DeepCopy() *CNIMigrationList { + if in == nil { + return nil + } + out := new(CNIMigrationList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *CNIMigrationList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNIMigrationSpec) DeepCopyInto(out *CNIMigrationSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNIMigrationSpec. +func (in *CNIMigrationSpec) DeepCopy() *CNIMigrationSpec { + if in == nil { + return nil + } + out := new(CNIMigrationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNIMigrationStatus) DeepCopyInto(out *CNIMigrationStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNIMigrationStatus. +func (in *CNIMigrationStatus) DeepCopy() *CNIMigrationStatus { + if in == nil { + return nil + } + out := new(CNIMigrationStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNINodeMigration) DeepCopyInto(out *CNINodeMigration) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNINodeMigration. +func (in *CNINodeMigration) DeepCopy() *CNINodeMigration { + if in == nil { + return nil + } + out := new(CNINodeMigration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *CNINodeMigration) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNINodeMigrationList) DeepCopyInto(out *CNINodeMigrationList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]CNINodeMigration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNINodeMigrationList. +func (in *CNINodeMigrationList) DeepCopy() *CNINodeMigrationList { + if in == nil { + return nil + } + out := new(CNINodeMigrationList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *CNINodeMigrationList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNINodeMigrationSpec) DeepCopyInto(out *CNINodeMigrationSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNINodeMigrationSpec. +func (in *CNINodeMigrationSpec) DeepCopy() *CNINodeMigrationSpec { + if in == nil { + return nil + } + out := new(CNINodeMigrationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNINodeMigrationStatus) DeepCopyInto(out *CNINodeMigrationStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNINodeMigrationStatus. +func (in *CNINodeMigrationStatus) DeepCopy() *CNINodeMigrationStatus { + if in == nil { + return nil + } + out := new(CNINodeMigrationStatus) + in.DeepCopyInto(out) + return out +} diff --git a/internal/cni/cleanup.go b/internal/cni/cleanup.go new file mode 100644 index 00000000..53b15fdc --- /dev/null +++ b/internal/cni/cleanup.go @@ -0,0 +1,330 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// RunCleanup executes the logic for the 'cni-switch cleanup' command. +// It performs a robust cleanup by deleting a known list of resources +// and waiting for them to be fully terminated. +func RunCleanup(timeout time.Duration) error { + startTime := time.Now() + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + fmt.Printf("🚀 Starting CNI switch cleanup (global timeout: %s)\n", timeout) + + // 1. Create Kubernetes client + safeClient, err := saferequest.NewSafeClient() + if err != nil { + return fmt.Errorf("creating safe client: %w", err) + } + + rtClient, err := safeClient.NewRTClient(v1alpha1.AddToScheme) + if err != nil { + return fmt.Errorf("creating runtime client: %w", err) + } + fmt.Printf("✅ Kubernetes client created\n") + + // 2. Delete cluster-scoped resources first, with waiting + fmt.Println("\nDeleting cluster-scoped resources...") + webhookConfig := &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: webhookConfigName}, + } + if err = deleteAndWait(ctx, rtClient, webhookConfig); err != nil { + return err + } + + // 3. Stop active controllers first to prevent reconciliation loops + fmt.Printf(" Stopping active controllers in '%s'...\n", cniSwitchNamespace) + helperDs := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{Name: switchHelperDaemonSetName, Namespace: cniSwitchNamespace}, + } + if err = deleteAndWait(ctx, rtClient, helperDs); err != nil { + return err + } + webhookDep := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: webhookDeploymentName, Namespace: cniSwitchNamespace}, + } + if err = deleteAndWait(ctx, rtClient, webhookDep); err != nil { + return err + } + fmt.Println("✅ Active controllers stopped") + + // 4. Delete all CNINodeMigration resources + fmt.Println("\nDeleting all CNINodeMigration resources...") + nodeMigrations := &v1alpha1.CNINodeMigrationList{} + if err = rtClient.List(ctx, nodeMigrations); err != nil && !strings.Contains(err.Error(), "no matches for kind") { + return fmt.Errorf("listing CNINodeMigrations: %w", err) + } + for _, nm := range nodeMigrations.Items { + // Remove finalizers to ensure deletion even if controller is down + if err = removeFinalizers(ctx, rtClient, &nm); err != nil { + fmt.Printf("⚠️ Warning: failed to remove finalizers from %s: %v\n", nm.Name, err) + } + if err = deleteAndWait(ctx, rtClient, &nm); err != nil { + return err + } + } + fmt.Println("✅ All CNINodeMigration resources deleted") + + // 5. Delete all CNIMigration resources + fmt.Println("\nDeleting all CNIMigration resources...") + migrations := &v1alpha1.CNIMigrationList{} + if err = rtClient.List(ctx, migrations); err != nil && !strings.Contains(err.Error(), "no matches for kind") { + return fmt.Errorf("listing CNIMigrations: %w", err) + } + for _, m := range migrations.Items { + // Remove finalizers to ensure deletion even if controller is down + if err = removeFinalizers(ctx, rtClient, &m); err != nil { + fmt.Printf("⚠️ Warning: failed to remove finalizers from %s: %v\n", m.Name, err) + } + if err = deleteAndWait(ctx, rtClient, &m); err != nil { + return err + } + } + fmt.Println("✅ All CNIMigration resources deleted") + + // 6. Remove annotations from all pods + if err = removePodAnnotations(ctx, rtClient); err != nil { + // Non-fatal, print a warning + fmt.Printf("⚠️ Warning: failed to remove all pod annotations: %v\n", err) + } + + // 7. Delete RBAC resources + fmt.Println("\nDeleting RBAC resources...") + clusterRoleHelper := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: switchHelperClusterRoleName}, + } + if err = deleteAndWait(ctx, rtClient, clusterRoleHelper); err != nil { + return err + } + clusterRoleWebhook := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: webhookClusterRoleName}, + } + if err = deleteAndWait(ctx, rtClient, clusterRoleWebhook); err != nil { + return err + } + clusterRoleBindingHelper := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: switchHelperClusterRoleBindingName}, + } + if err = deleteAndWait(ctx, rtClient, clusterRoleBindingHelper); err != nil { + return err + } + clusterRoleBindingWebhook := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: webhookClusterRoleBindingName}, + } + if err = deleteAndWait(ctx, rtClient, clusterRoleBindingWebhook); err != nil { + return err + } + fmt.Println("✅ Cluster-scoped RBAC resources deleted") + + // 8. Delete remaining namespaced resources + fmt.Printf("\nDeleting remaining namespaced resources in '%s'...\n", cniSwitchNamespace) + webhookService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: webhookServiceName, Namespace: cniSwitchNamespace}, + } + if err = deleteAndWait(ctx, rtClient, webhookService); err != nil { + return err + } + webhookSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: webhookSecretName, Namespace: cniSwitchNamespace}, + } + if err = deleteAndWait(ctx, rtClient, webhookSecret); err != nil { + return err + } + helperSA := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: switchHelperServiceAccountName, Namespace: cniSwitchNamespace}, + } + if err = deleteAndWait(ctx, rtClient, helperSA); err != nil { + return err + } + webhookSA := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: webhookServiceAccountName, Namespace: cniSwitchNamespace}, + } + if err = deleteAndWait(ctx, rtClient, webhookSA); err != nil { + return err + } + fmt.Println("✅ Remaining namespaced resources deleted") + + // 9. Delete the namespace + fmt.Printf("\nDeleting namespace '%s'...\n", cniSwitchNamespace) + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: cniSwitchNamespace}} + if err = deleteAndWait(ctx, rtClient, ns); err != nil { + return err + } + fmt.Println("✅ Namespace successfully deleted") + + fmt.Printf("\n🎉 CNI switch cleanup successfully completed (total time: %s)\n", + time.Since(startTime).Round(time.Second), + ) + return nil +} + +// deleteAndWait deletes a resource and waits for it to be fully terminated. +func deleteAndWait(ctx context.Context, rtClient client.Client, obj client.Object) error { + if err := deleteResource(ctx, rtClient, obj); err != nil { + // This handles the "already deleted" case, so we don't need to check again. + return err + } + return waitForResourceDeletion(ctx, rtClient, obj) +} + +// deleteResource just initiates deletion for a Kubernetes resource. +func deleteResource(ctx context.Context, rtClient client.Client, obj client.Object) error { + kind := getKind(obj) + name := obj.GetName() + ns := obj.GetNamespace() + + fmt.Printf("- Deleting %s '%s%s'... ", kind, name, func() string { + if ns != "" { + return fmt.Sprintf("' in namespace '%s", ns) + } + return "" + }()) + + err := rtClient.Delete(ctx, obj, client.PropagationPolicy(metav1.DeletePropagationBackground)) + if err != nil { + if errors.IsNotFound(err) { + fmt.Println("already deleted.") + return nil + } + return fmt.Errorf("failed to delete %s '%s': %w", kind, name, err) + } + + fmt.Println("deleted.") + return nil +} + +// waitForResourceDeletion polls until a resource is confirmed to be gone. +func waitForResourceDeletion(ctx context.Context, rtClient client.Client, obj client.Object) error { + key := client.ObjectKey{Name: obj.GetName(), Namespace: obj.GetNamespace()} + kind := getKind(obj) + + // Check if the resource is already gone before starting to wait. + if err := rtClient.Get(ctx, key, obj); err != nil { + if errors.IsNotFound(err) { + // It was already gone, no need to wait. + return nil + } + return fmt.Errorf("getting %s '%s': %w", kind, key.Name, err) + } + + fmt.Printf(" Waiting for %s '%s' to terminate... ", kind, key.Name) + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + fmt.Println("timeout.") + return fmt.Errorf("timed out waiting for %s '%s' to be deleted", kind, key.Name) + case <-ticker.C: + err := rtClient.Get(ctx, key, obj) + if err != nil { + if errors.IsNotFound(err) { + fmt.Println("terminated.") + return nil // Success! + } + if strings.Contains(err.Error(), "no matches for kind") { + fmt.Println("Kind not known, assuming terminated.") + return nil + } + fmt.Printf("error: %v\n", err) + return fmt.Errorf("getting %s '%s': %w", kind, key.Name, err) + } + } + } +} + +func removePodAnnotations(ctx context.Context, rtClient client.Client) error { + fmt.Println("\nRemoving CNI switch annotations from all pods...") + + podList := &corev1.PodList{} + if err := rtClient.List(ctx, podList); err != nil { + return fmt.Errorf("listing all pods: %w", err) + } + + podsPatched := 0 + for _, pod := range podList.Items { + if pod.Spec.HostNetwork { + continue + } + + if _, ok := pod.Annotations[EffectiveCNIAnnotation]; ok { + // Use a merge patch to remove just the one annotation + patch := []byte(fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}}`, EffectiveCNIAnnotation)) + err := rtClient.Patch(ctx, &pod, client.RawPatch(client.Merge.Type(), patch)) + if err != nil { + if errors.IsNotFound(err) { + // No need to print warning, just continue + continue + } + fmt.Printf("\n⚠️ Warning: failed to patch pod %s/%s: %v", pod.Namespace, pod.Name, err) + continue + } + podsPatched++ + fmt.Printf("\r Patched %d pods...", podsPatched) + } + } + + if podsPatched > 0 { + fmt.Printf("\n✅ Removed annotations from %d pods.\n", podsPatched) + } else { + fmt.Print("✅ No pods with CNI switch annotations were found.\n") + } + + return nil +} + +// getKind extracts a user-friendly kind from a runtime object. +func getKind(obj client.Object) string { + kind := obj.GetObjectKind().GroupVersionKind().Kind + if kind == "" { + t := fmt.Sprintf("%T", obj) + parts := strings.Split(t, ".") + if len(parts) > 0 { + return parts[len(parts)-1] + } + } + return kind +} + +// removeFinalizers patches the object to remove all finalizers. +func removeFinalizers(ctx context.Context, rtClient client.Client, obj client.Object) error { + if len(obj.GetFinalizers()) == 0 { + return nil + } + + patch := []byte(`{"metadata":{"finalizers":null}}`) + return rtClient.Patch(ctx, obj, client.RawPatch(client.Merge.Type(), patch)) +} diff --git a/internal/cni/common.go b/internal/cni/common.go new file mode 100644 index 00000000..0fc646f3 --- /dev/null +++ b/internal/cni/common.go @@ -0,0 +1,141 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + "bufio" + "context" + "fmt" + "os" + "strings" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + // Image ConfigMap from Deckhouse + CMDataName = "d8-cli-data" + CMDataNameSpace = "d8-system" + cmDataHelperImage = "cni-switch-helper-image" + + // Namespace where the helper and webhook are deployed + cniSwitchNamespace = "cni-switch" + + // Service Account Names + switchHelperServiceAccountName = "cni-switch-helper-sa" + webhookServiceAccountName = "cni-switch-webhook-sa" + + // DaemonSet Names + switchHelperDaemonSetName = "cni-switch-helper" + + // Cluster Role and Binding Names + switchHelperClusterRoleName = "d8:cni-switch-helper" + webhookClusterRoleName = "d8:cni-switch-webhook" + switchHelperClusterRoleBindingName = "d8:cni-switch-helper" + webhookClusterRoleBindingName = "d8:cni-switch-webhook" + + // Webhook Resources + webhookDeploymentName = "cni-switch-webhook" + webhookServiceName = "cni-switch-webhook-service" + webhookSecretName = "cni-switch-webhook-tls" + webhookConfigName = "cni-switch-pod-annotator" + webhookConfigurationName = "annotator.cni-switch.deckhouse.io" + webhookPort = 9443 + + // Annotations + EffectiveCNIAnnotation = "effective-cni.network.deckhouse.io" +) + +var ( + CNIModuleConfigs = []string{"cni-cilium", "cni-flannel", "cni-simple-bridge"} + + moduleConfigGVK = schema.GroupVersionKind{ + Group: "deckhouse.io", + Version: "v1alpha1", + Kind: "ModuleConfig", + } +) + +// AskForConfirmation displays a warning and prompts the user for confirmation. +func AskForConfirmation(commandName string) (bool, error) { + reader := bufio.NewReader(os.Stdin) + + fmt.Println("--------------------------------------------------------------------------------") + fmt.Println("⚠️ IMPORTANT: PLEASE READ CAREFULLY") + fmt.Println("--------------------------------------------------------------------------------") + fmt.Println() + fmt.Printf("You are about to run the '%s' step of the CNI switch process. Please ensure that:\n\n", commandName) + fmt.Println("1. External cluster management systems (CI/CD, GitOps like ArgoCD, Flux)") + fmt.Println(" are temporarily disabled. They might interfere with the CNI switch process") + fmt.Println(" by reverting changes made by this tool.") + fmt.Println() + fmt.Println("2. You have sufficient administrative privileges for this cluster to perform") + fmt.Println(" the required actions (modifying ModuleConfigs, deleting pods, etc.).") + fmt.Println() + fmt.Println("3. The utility does not configure CNI modules in the cluster; it only enables/disables") + fmt.Println(" them via ModuleConfig during operation. The user must independently prepare the") + fmt.Println(" ModuleConfig configuration for the target CNI.") + fmt.Println() + fmt.Println("Once the process starts, no active intervention is required from you.") + fmt.Println() + fmt.Print("Do you want to continue? (y/n): ") + + for { + response, err := reader.ReadString('\n') + if err != nil { + return false, err + } + + response = strings.ToLower(strings.TrimSpace(response)) + + switch response { + case "y", "yes": + fmt.Println() + return true, nil + case "n", "no": + fmt.Println() + return false, nil + default: + fmt.Print("Invalid input. Please enter 'y/yes' or 'n/no'): ") + } + } +} + +// FindActiveMigration searches for an existing CNIMigration resource. +// It returns an error if more than one migration is found. +func FindActiveMigration(ctx context.Context, rtClient client.Client) (*v1alpha1.CNIMigration, error) { + migrationList := &v1alpha1.CNIMigrationList{} + if err := rtClient.List(ctx, migrationList); err != nil { + return nil, fmt.Errorf("listing CNIMigration objects: %w", err) + } + + if len(migrationList.Items) == 0 { + return nil, nil // No migration found + } + + if len(migrationList.Items) > 1 { + return nil, fmt.Errorf( + "found %d CNI migration objects, which is an inconsistent state. "+ + "Please run 'd8 cni-switch cleanup' to resolve this", + len(migrationList.Items), + ) + } + + return &migrationList.Items[0], nil +} diff --git a/internal/cni/prepare.go b/internal/cni/prepare.go new file mode 100644 index 00000000..b23d28eb --- /dev/null +++ b/internal/cni/prepare.go @@ -0,0 +1,586 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "fmt" + "math/big" + "strings" + "time" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// RunPrepare executes the logic for the 'cni-switch prepare' command. +func RunPrepare(targetCNI string, timeout time.Duration) error { + // 0. Ask for user confirmation + confirmed, err := AskForConfirmation("prepare") + if err != nil { + return fmt.Errorf("asking for confirmation: %w", err) + } + if !confirmed { + fmt.Println("Operation cancelled by user.") + return nil + } + + startTime := time.Now() + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + fmt.Printf("🚀 Starting CNI switch preparation for target '%s' (global timeout: %s)\n", + targetCNI, timeout) + + // 1. Create a Kubernetes client + safeClient, err := saferequest.NewSafeClient() + if err != nil { + return fmt.Errorf("creating safe client: %w", err) + } + + rtClient, err := safeClient.NewRTClient(v1alpha1.AddToScheme) + if err != nil { + return fmt.Errorf("creating runtime client: %w", err) + } + fmt.Printf("✅ Kubernetes client created (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 2. Find an existing migration or create a new one + activeMigration, err := getOrCreateMigrationForPrepare(ctx, rtClient, targetCNI) + if err != nil { + return err + } + if activeMigration == nil { + // This means preparation is already complete, and the user has been notified. + return nil + } + fmt.Printf( + "✅ Working with CNIMigration '%s' (total elapsed: %s)\n\n", + activeMigration.Name, + time.Since(startTime).Round(time.Millisecond), + ) + + // 3. Detect current CNI and update migration status + if activeMigration.Status.CurrentCNI == "" { + var currentCNI string + currentCNI, err = detectCurrentCNI(rtClient) + if err != nil { + return fmt.Errorf("detecting current CNI: %w", err) + } + fmt.Printf("Detected current CNI: '%s'\n", currentCNI) + + if currentCNI == targetCNI { + return fmt.Errorf("target CNI '%s' is the same as the current CNI. Nothing to do", targetCNI) + } + + activeMigration.Status.CurrentCNI = currentCNI + err = rtClient.Status().Update(ctx, activeMigration) + if err != nil { + return fmt.Errorf("updating migration status with current CNI: %w", err) + } + fmt.Printf( + "✅ Added current CNI to migration status (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond), + ) + } + + // 4. Create the dedicated namespace + fmt.Printf("Creating dedicated namespace '%s'...\n", cniSwitchNamespace) + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: cniSwitchNamespace}} + if err = rtClient.Create(ctx, ns); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating namespace %s: %w", cniSwitchNamespace, err) + } + fmt.Printf("✅ Namespace created (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 5. Get the helper image name from the configmap + cm := &corev1.ConfigMap{} + if err = rtClient.Get(ctx, client.ObjectKey{Name: CMDataName, Namespace: CMDataNameSpace}, cm); err != nil { + return fmt.Errorf("getting %s configmap: %w", CMDataName, err) + } + imageName, ok := cm.Data[cmDataHelperImage] + if !ok || imageName == "" { + return fmt.Errorf("%s not found or empty in %s configmap", cmDataHelperImage, CMDataName) + } + + // 6. Apply RBAC + fmt.Println("Applying RBAC...") + // Helper RBAC + helperSA := getSwitchHelperServiceAccount(cniSwitchNamespace) + if err = rtClient.Create(ctx, helperSA); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating helper service account: %w", err) + } + fmt.Printf("- Helper's ServiceAccount '%s' created\n", helperSA.Name) + + helperRole := getSwitchHelperClusterRole() + if err = rtClient.Create(ctx, helperRole); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating cluster role: %w", err) + } + fmt.Printf("- Helper's ClusterRole '%s' created\n", helperRole.Name) + + helperBinding := getSwitchHelperClusterRoleBinding(cniSwitchNamespace) + if err = rtClient.Create(ctx, helperBinding); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating cluster role binding: %w", err) + } + fmt.Printf("- Helper's ClusterRoleBinding '%s' created\n", helperBinding.Name) + + // Webhook RBAC + webhookSA := getWebhookServiceAccount(cniSwitchNamespace) + if err = rtClient.Create(ctx, webhookSA); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating webhook service account: %w", err) + } + fmt.Printf("- Webhook's ServiceAccount '%s' created\n", webhookSA.Name) + + webhookRole := getWebhookClusterRole() + if err = rtClient.Create(ctx, webhookRole); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating webhook cluster role: %w", err) + } + fmt.Printf("- Webhook's ClusterRole '%s' created\n", webhookRole.Name) + + webhookBinding := getWebhookClusterRoleBinding(cniSwitchNamespace) + if err = rtClient.Create(ctx, webhookBinding); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating webhook cluster role binding: %w", err) + } + fmt.Printf("- Webhook's ClusterRoleBinding '%s' created\n", webhookBinding.Name) + fmt.Printf("✅ RBAC applied (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 7. Create and wait for the mutating webhook + fmt.Println("Deploying Mutating Webhook for annotating new pods...") + // Generate certificates + caCert, serverCert, serverKey, err := generateWebhookCertificates(cniSwitchNamespace) + if err != nil { + return fmt.Errorf("generating webhook certificates: %w", err) + } + fmt.Printf("- TLS certificate generated\n") + + // Create TLS secret + tlsSecret := getWebhookTLSSecret(cniSwitchNamespace, serverCert, serverKey) + if err = rtClient.Create(ctx, tlsSecret); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating webhook tls secret: %w", err) + } + fmt.Printf("- Secret with TLS certificate '%s' created\n", tlsSecret.Name) + + // Create Deployment + webhookDeployment := getWebhookDeployment(cniSwitchNamespace, imageName, webhookServiceAccountName) + if err = rtClient.Create(ctx, webhookDeployment); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating webhook deployment: %w", err) + } + + // Wait for Deployment to be ready + if err = waitForDeploymentReady(ctx, rtClient, webhookDeployment); err != nil { + return fmt.Errorf("waiting for webhook deployment ready: %w", err) + } + fmt.Printf("- Webhook Deployment '%s' created\n", webhookDeployment.Name) + + // Create Service + webhookService := getWebhookService(cniSwitchNamespace) + if err = rtClient.Create(ctx, webhookService); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating webhook service: %w", err) + } + fmt.Printf("- Webhook Service '%s' created\n", webhookService.Name) + + // Create MutatingWebhookConfiguration + webhookConfig := getMutatingWebhookConfiguration(cniSwitchNamespace, caCert) + if err = rtClient.Create(ctx, webhookConfig); err != nil && !errors.IsAlreadyExists(err) { + return fmt.Errorf("creating mutating webhook configuration: %w", err) + } + fmt.Printf("✅ Mutating Webhook '%s' is active (total elapsed: %s)\n\n", + webhookConfig.Name, time.Since(startTime).Round(time.Millisecond)) + + // 8. Create and wait for the Helper daemonset + dsKey := client.ObjectKey{Name: switchHelperDaemonSetName, Namespace: cniSwitchNamespace} + ds := &appsv1.DaemonSet{} + if err = rtClient.Get(ctx, dsKey, ds); err != nil { + if !errors.IsNotFound(err) { + return fmt.Errorf("getting helper daemonset: %w", err) + } + fmt.Printf("Creating helper DaemonSet '%s'...\n", switchHelperDaemonSetName) + dsToCreate := getSwitchHelperDaemonSet(cniSwitchNamespace, imageName) + if err = rtClient.Create(ctx, dsToCreate); err != nil { + return fmt.Errorf("creating helper daemonset: %w", err) + } + ds = dsToCreate + } else { + fmt.Printf("Helper DaemonSet '%s' already exists.\n", switchHelperDaemonSetName) + } + + if err = waitForDaemonSetReady(ctx, rtClient, ds); err != nil { + return fmt.Errorf("waiting for daemonset ready: %w", err) + } + fmt.Printf("✅ Helper DaemonSet is ready (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 9. Wait for all nodes to be prepared + fmt.Println("Waiting for all nodes to complete the preparation step...") + err = waitForNodesPrepared(ctx, rtClient) + if err != nil { + return fmt.Errorf("waiting for nodes to be prepared: %w", err) + } + fmt.Printf("✅ All CNINodeMigrations are created and all nodes are prepared (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 10. Update overall status + activeMigration.Status.Conditions = append(activeMigration.Status.Conditions, metav1.Condition{ + Type: "PreparationSucceeded", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Reason: "AllNodesPrepared", + Message: "All nodes successfully completed the preparation step.", + }) + + err = rtClient.Status().Update(ctx, activeMigration) + if err != nil { + return fmt.Errorf("updating CNIMigration status to prepared: %w", err) + } + + fmt.Printf( + "🎉 Cluster successfully prepared for CNI switch (total time: %s)\n", + time.Since(startTime).Round(time.Second), + ) + fmt.Println("\nYou can now run 'd8 cni-switch switch' to proceed") + + return nil +} + +// generateWebhookCertificates creates a self-signed CA and a server certificate for the webhook. +func generateWebhookCertificates(namespace string) (caCert, serverCert, serverKey []byte, err error) { + caSerialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128) + caSerialNumber, err := rand.Int(rand.Reader, caSerialNumberLimit) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to generate CA serial number: %w", err) + } + + // CA configuration + ca := &x509.Certificate{ + SerialNumber: caSerialNumber, + Subject: pkix.Name{ + Organization: []string{"deckhouse.io"}, + }, + NotBefore: time.Now(), + NotAfter: time.Now().AddDate(1, 0, 0), + IsCA: true, + ExtKeyUsage: []x509.ExtKeyUsage{ + x509.ExtKeyUsageClientAuth, + x509.ExtKeyUsageServerAuth, + }, + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, + BasicConstraintsValid: true, + } + + caPrivKey, err := rsa.GenerateKey(rand.Reader, 4096) + if err != nil { + return nil, nil, nil, fmt.Errorf("generating CA private key: %w", err) + } + + caBytes, err := x509.CreateCertificate(rand.Reader, ca, ca, &caPrivKey.PublicKey, caPrivKey) + if err != nil { + return nil, nil, nil, fmt.Errorf("creating CA certificate: %w", err) + } + + caPEM := new(bytes.Buffer) + _ = pem.Encode(caPEM, &pem.Block{ + Type: "CERTIFICATE", + Bytes: caBytes, + }) + + serverSerialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128) + serverSerialNumber, err := rand.Int(rand.Reader, serverSerialNumberLimit) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to generate server serial number: %w", err) + } + + // Server certificate configuration + commonName := fmt.Sprintf("%s.%s.svc", webhookServiceName, namespace) + cert := &x509.Certificate{ + SerialNumber: serverSerialNumber, + Subject: pkix.Name{ + CommonName: commonName, + Organization: []string{"deckhouse.io"}, + }, + DNSNames: []string{commonName}, + NotBefore: time.Now(), + NotAfter: time.Now().AddDate(1, 0, 0), + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + KeyUsage: x509.KeyUsageDigitalSignature, + } + + serverPrivKey, err := rsa.GenerateKey(rand.Reader, 4096) + if err != nil { + return nil, nil, nil, fmt.Errorf("generating server private key: %w", err) + } + + serverCertBytes, err := x509.CreateCertificate(rand.Reader, cert, ca, &serverPrivKey.PublicKey, caPrivKey) + if err != nil { + return nil, nil, nil, fmt.Errorf("creating server certificate: %w", err) + } + + serverCertPEM := new(bytes.Buffer) + _ = pem.Encode(serverCertPEM, &pem.Block{ + Type: "CERTIFICATE", + Bytes: serverCertBytes, + }) + + serverPrivKeyPEM := new(bytes.Buffer) + _ = pem.Encode(serverPrivKeyPEM, &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(serverPrivKey), + }) + + return caPEM.Bytes(), serverCertPEM.Bytes(), serverPrivKeyPEM.Bytes(), nil +} + +func getOrCreateMigrationForPrepare( + ctx context.Context, rtClient client.Client, targetCNI string, +) (*v1alpha1.CNIMigration, error) { + activeMigration, err := FindActiveMigration(ctx, rtClient) + if err != nil { + return nil, fmt.Errorf("failed to find active migration: %w", err) + } + + if activeMigration != nil { + fmt.Printf("Found active CNIMigration '%s'\n", activeMigration.Name) + + // If an active migration is found, ensure its target CNI matches the requested target CNI. + if activeMigration.Spec.TargetCNI != targetCNI { + return nil, fmt.Errorf( + "an active CNI migration to '%s' is already in progress. "+ + "Cannot prepare for '%s'. To change the target CNI, "+ + "please run 'd8 cni-switch cleanup' first to reset the state", + activeMigration.Spec.TargetCNI, + targetCNI, + ) + } + + // Check if preparation is already done + for _, cond := range activeMigration.Status.Conditions { + if cond.Type == "PreparationSucceeded" && cond.Status == metav1.ConditionTrue { + fmt.Println("🎉 Cluster has already been prepared for CNI switch.") + fmt.Println("\nYou can now run 'd8 cni-switch switch' to proceed.") + return nil, nil // Signal to the caller that we can exit gracefully + } + } + + // Ensure the migration is in the 'Prepare' phase + if activeMigration.Spec.Phase != "Prepare" { + return nil, fmt.Errorf( + "an active migration is already in the '%s' phase. "+ + "Cannot run 'prepare' again. To proceed, run 'd8 cni-switch switch'. "+ + "To start over, run 'd8 cni-switch cleanup'", + activeMigration.Spec.Phase, + ) + } + + return activeMigration, nil + } + migrationName := fmt.Sprintf("cni-migration-%s", time.Now().Format("20060102-150405")) + fmt.Printf("No active migration found. Creating a new one...\n") + + newMigration := &v1alpha1.CNIMigration{ + ObjectMeta: metav1.ObjectMeta{ + Name: migrationName, + }, + Spec: v1alpha1.CNIMigrationSpec{ + TargetCNI: targetCNI, + Phase: "Prepare", + }, + } + + err = rtClient.Create(ctx, newMigration) + if err != nil { + if errors.IsAlreadyExists(err) { + fmt.Println("Migration object was created by another process. Getting it.") + err = rtClient.Get(ctx, client.ObjectKey{Name: migrationName}, newMigration) + if err != nil { + return nil, fmt.Errorf("getting existing CNIMigration object: %w", err) + } + return newMigration, nil + } + return nil, fmt.Errorf("creating new CNIMigration object: %w", err) + } + + fmt.Printf("Successfully created CNIMigration object '%s'\n", newMigration.Name) + return newMigration, nil +} + +func waitForDaemonSetReady(ctx context.Context, rtClient client.Client, ds *appsv1.DaemonSet) error { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + key := client.ObjectKey{Name: ds.Name, Namespace: ds.Namespace} + err := rtClient.Get(ctx, key, ds) + if err != nil { + fmt.Printf("\n⚠️ Warning: could not get DaemonSet status: %v\n", err) + continue + } + + // This is the exit condition for the loop. + if ds.Status.DesiredNumberScheduled == ds.Status.NumberReady && ds.Status.NumberUnavailable == 0 { + fmt.Printf( + "\r Waiting for DaemonSet... %d/%d pods ready\n", + ds.Status.NumberReady, + ds.Status.DesiredNumberScheduled, + ) + return nil + } + + // This is the progress update. + fmt.Printf( + "\r Waiting for DaemonSet... %d/%d pods ready", + ds.Status.NumberReady, + ds.Status.DesiredNumberScheduled, + ) + } + } +} + +func waitForDeploymentReady(ctx context.Context, rtClient client.Client, dep *appsv1.Deployment) error { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + key := client.ObjectKey{Name: dep.Name, Namespace: dep.Namespace} + err := rtClient.Get(ctx, key, dep) + if err != nil { + fmt.Printf("\n⚠️ Warning: could not get Deployment status: %v\n", err) + continue + } + + // This is the exit condition for the loop. + if dep.Spec.Replicas != nil && dep.Status.ReadyReplicas >= + *dep.Spec.Replicas && dep.Status.UnavailableReplicas == 0 { + fmt.Printf( + "\r Waiting for Deployment... %d/%d replicas ready\n", + dep.Status.ReadyReplicas, + *dep.Spec.Replicas, + ) + return nil + } + + // This is the progress update. + if dep.Spec.Replicas != nil { + fmt.Printf( + "\r Waiting for Deployment... %d/%d replicas ready", + dep.Status.ReadyReplicas, + *dep.Spec.Replicas, + ) + } + } + } +} + +func waitForNodesPrepared(ctx context.Context, rtClient client.Client) error { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + nodes := &corev1.NodeList{} + if err := rtClient.List(ctx, nodes); err != nil { + fmt.Printf("⚠️ Warning: could not list nodes: %v\n", err) + continue + } + totalNodes := len(nodes.Items) + + migrations := &v1alpha1.CNINodeMigrationList{} + if err := rtClient.List(ctx, migrations); err != nil { + fmt.Printf("⚠️ Warning: could not list node migrations: %v\n", err) + continue + } + + readyNodes := 0 + for _, migration := range migrations.Items { + for _, cond := range migration.Status.Conditions { + if cond.Type == "PreparationSucceeded" && cond.Status == metav1.ConditionTrue { + readyNodes++ + break + } + } + } + + fmt.Printf("\r Progress: %d/%d nodes prepared...", readyNodes, totalNodes) + + if readyNodes >= totalNodes && totalNodes > 0 { + fmt.Printf("\r Progress: %d/%d nodes prepared...\n", readyNodes, totalNodes) + return nil + } + } + } +} + +func detectCurrentCNI(rtClient client.Client) (string, error) { + var enabledCNIs []string + for _, cniModule := range CNIModuleConfigs { + mc := &unstructured.Unstructured{} + mc.SetGroupVersionKind(moduleConfigGVK) + + err := rtClient.Get(context.Background(), client.ObjectKey{Name: cniModule}, mc) + if err != nil { + if errors.IsNotFound(err) { + continue + } + return "", fmt.Errorf("getting module config %s: %w", cniModule, err) + } + + enabled, found, err := unstructured.NestedBool(mc.Object, "spec", "enabled") + if err != nil { + return "", fmt.Errorf("parsing 'spec.enabled' for module config %s: %w", cniModule, err) + } + + if found && enabled { + cniName := strings.TrimPrefix(cniModule, "cni-") + enabledCNIs = append(enabledCNIs, cniName) + } + } + + if len(enabledCNIs) == 0 { + return "", fmt.Errorf("no enabled CNI module found. Looked for: %s", strings.Join(CNIModuleConfigs, ", ")) + } + + if len(enabledCNIs) > 1 { + return "", fmt.Errorf( + "found multiple enabled CNI modules: %s. Please disable all but one", + strings.Join(enabledCNIs, ", "), + ) + } + + return enabledCNIs[0], nil +} diff --git a/internal/cni/resources.go b/internal/cni/resources.go new file mode 100644 index 00000000..f7801ccb --- /dev/null +++ b/internal/cni/resources.go @@ -0,0 +1,537 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// --- Helper Resources --- + +// getSwitchHelperDaemonSet returns a DaemonSet object. +func getSwitchHelperDaemonSet(namespace, imageName string) *appsv1.DaemonSet { + rootID := int64(0) + truePtr := true + terminationGracePeriodSeconds := int64(5) + mountPropagationBidirectional := corev1.MountPropagationBidirectional + hostPathDirectoryOrCreate := corev1.HostPathDirectoryOrCreate + hostPathDirectory := corev1.HostPathDirectory + + return &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: switchHelperDaemonSetName, + Namespace: namespace, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": switchHelperDaemonSetName}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": switchHelperDaemonSetName}, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: switchHelperServiceAccountName, + Containers: []corev1.Container{ + { + Name: "helper", + Image: imageName, + Ports: []corev1.ContainerPort{ + { + Name: "healthz", + ContainerPort: 8081, + Protocol: corev1.ProtocolTCP, + }, + }, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz", + Port: intstr.FromString("healthz"), + }, + }, + InitialDelaySeconds: 15, + PeriodSeconds: 20, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/readyz", + Port: intstr.FromString("healthz"), + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 10, + }, + Env: []corev1.EnvVar{ + { + Name: "NODE_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, + }, + { + Name: "KUBERNETES_SERVICE_HOST", + Value: "127.0.0.1", + }, + { + Name: "KUBERNETES_SERVICE_PORT", + Value: "6445", + }, + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: &truePtr, + RunAsUser: &rootID, + RunAsGroup: &rootID, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "host-proc", + MountPath: "/host/proc", + ReadOnly: true, + }, + { + Name: "host-sys", + MountPath: "/host/sys", + ReadOnly: true, + }, + { + Name: "cni-net-d", + MountPath: "/etc/cni/net.d", + }, + { + Name: "cni-bin", + MountPath: "/opt/cni/bin", + }, + { + Name: "host-run", + MountPath: "/run", + }, + { + Name: "host-bpf", + MountPath: "/sys/fs/bpf", + MountPropagation: &mountPropagationBidirectional, + }, + { + Name: "host-lib-modules", + MountPath: "/lib/modules", + ReadOnly: true, + }, + { + Name: "host-var-lib-cni", + MountPath: "/var/lib/cni", + }, + }, + }, + }, + Tolerations: []corev1.Toleration{ + { + Operator: corev1.TolerationOpExists, + }, + }, + PriorityClassName: "system-node-critical", + HostNetwork: true, + HostPID: true, + HostIPC: true, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Volumes: []corev1.Volume{ + { + Name: "host-proc", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "/proc"}, + }, + }, + { + Name: "host-sys", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "/sys"}, + }, + }, + { + Name: "cni-net-d", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "/etc/cni/net.d"}, + }, + }, + { + Name: "cni-bin", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "/opt/cni/bin"}, + }, + }, + { + Name: "host-run", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "/run"}, + }, + }, + { + Name: "host-bpf", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/sys/fs/bpf", + Type: &hostPathDirectoryOrCreate, + }, + }, + }, + { + Name: "host-lib-modules", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/lib/modules", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "host-var-lib-cni", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/var/lib/cni", + Type: &hostPathDirectoryOrCreate, + }, + }, + }, + }, + }, + }, + }, + } +} + +// --- Webhook Resources --- + +// getWebhookTLSSecret returns a Secret object containing the webhook's TLS certificates. +func getWebhookTLSSecret(namespace string, cert, key []byte) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookSecretName, + Namespace: namespace, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + corev1.TLSCertKey: cert, + corev1.TLSPrivateKeyKey: key, + }, + } +} + +// getWebhookDeployment returns a Deployment object for the webhook server. +func getWebhookDeployment(namespace, imageName, serviceAccountName string) *appsv1.Deployment { + replicas := int32(2) + terminationGracePeriodSeconds := int64(5) + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookDeploymentName, + Namespace: namespace, + Labels: map[string]string{ + "app": webhookDeploymentName, + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": webhookDeploymentName, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": webhookDeploymentName, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: serviceAccountName, + HostNetwork: true, + DNSPolicy: corev1.DNSClusterFirstWithHostNet, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Affinity: &corev1.Affinity{ + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": webhookDeploymentName, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "webhook", + Image: imageName, + Args: []string{ + "--mode=webhook", + "--health-probe-bind-address=:8082", + }, + Ports: []corev1.ContainerPort{ + { + ContainerPort: webhookPort, + Name: "webhook", + }, + { + Name: "healthz", + ContainerPort: 8082, + Protocol: corev1.ProtocolTCP, + }, + }, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz", + Port: intstr.FromString("healthz"), + }, + }, + InitialDelaySeconds: 15, + PeriodSeconds: 20, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/readyz", + Port: intstr.FromString("healthz"), + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 10, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "tls-certs", + MountPath: "/etc/tls", + ReadOnly: true, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "tls-certs", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: webhookSecretName, + }, + }, + }, + }, + }, + }, + }, + } +} + +// getWebhookService returns a Service object for the webhook. +func getWebhookService(namespace string) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookServiceName, + Namespace: namespace, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "app": webhookDeploymentName, + }, + Ports: []corev1.ServicePort{ + { + Protocol: corev1.ProtocolTCP, + Port: 443, + TargetPort: intstr.FromInt(webhookPort), + }, + }, + }, + } +} + +// getMutatingWebhookConfiguration returns a MutatingWebhookConfiguration object for annotating pods. +func getMutatingWebhookConfiguration(namespace string, caBundle []byte) *admissionregistrationv1.MutatingWebhookConfiguration { + path := "/mutate-pod" + failurePolicy := admissionregistrationv1.Ignore + sideEffects := admissionregistrationv1.SideEffectClassNone + + return &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookConfigName, + }, + Webhooks: []admissionregistrationv1.MutatingWebhook{ + { + Name: webhookConfigurationName, + AdmissionReviewVersions: []string{"v1"}, + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + Service: &admissionregistrationv1.ServiceReference{ + Name: webhookServiceName, + Namespace: namespace, + Path: &path, + Port: &[]int32{443}[0], + }, + CABundle: caBundle, + }, + Rules: []admissionregistrationv1.RuleWithOperations{ + { + Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create}, + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{""}, + APIVersions: []string{"v1"}, + Resources: []string{"pods"}, + }, + }, + }, + NamespaceSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpNotIn, + Values: generateExcludedNamespaces(namespace), + }, + }, + }, + FailurePolicy: &failurePolicy, + SideEffects: &sideEffects, + }, + }, + } +} + +// generateExcludedNamespaces creates a list of namespaces to be excluded from webhook processing. +// This includes the webhook's own namespace and CNI module namespaces. +func generateExcludedNamespaces(currentNamespace string) []string { + excluded := []string{currentNamespace} // Exclude the webhook's own namespace (e.g., "cni-switch") + for _, module := range CNIModuleConfigs { + excluded = append(excluded, "d8-"+module) // Exclude "d8-cni-cilium", "d8-cni-flannel", etc. + } + return excluded +} + +// --- RBAC Resources --- + +func getSwitchHelperServiceAccount(namespace string) *corev1.ServiceAccount { + return &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: switchHelperServiceAccountName, + Namespace: namespace, + }, + } +} + +func getWebhookServiceAccount(namespace string) *corev1.ServiceAccount { + return &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookServiceAccountName, + Namespace: namespace, + }, + } +} + +func getSwitchHelperClusterRole() *rbacv1.ClusterRole { + return &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: switchHelperClusterRoleName, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"network.deckhouse.io"}, + Resources: []string{"cnimigrations"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"network.deckhouse.io"}, + Resources: []string{"cnimigrations/status"}, + Verbs: []string{"get"}, + }, + { + APIGroups: []string{"network.deckhouse.io"}, + Resources: []string{"cninodemigrations"}, + Verbs: []string{"get", "list", "watch", "create", "update", "patch", "delete"}, + }, + { + APIGroups: []string{"network.deckhouse.io"}, + Resources: []string{"cninodemigrations/status"}, + Verbs: []string{"get", "update", "patch"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"pods"}, + Verbs: []string{"get", "list", "watch", "patch", "update", "delete"}, + }, + }, + } +} + +func getWebhookClusterRole() *rbacv1.ClusterRole { + return &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookClusterRoleName, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"network.deckhouse.io"}, + Resources: []string{"cnimigrations"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + } +} + +func getSwitchHelperClusterRoleBinding(namespace string) *rbacv1.ClusterRoleBinding { + return &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: switchHelperClusterRoleBindingName, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: switchHelperClusterRoleName, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: switchHelperServiceAccountName, + Namespace: namespace, + }, + }, + } +} + +func getWebhookClusterRoleBinding(namespace string) *rbacv1.ClusterRoleBinding { + return &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookClusterRoleBindingName, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: webhookClusterRoleName, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: webhookServiceAccountName, + Namespace: namespace, + }, + }, + } +} diff --git a/internal/cni/rollback.go b/internal/cni/rollback.go new file mode 100644 index 00000000..bd383eff --- /dev/null +++ b/internal/cni/rollback.go @@ -0,0 +1,28 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + "fmt" + "time" +) + +// RunRollback executes the logic for the 'cni-switch rollback' command. +func RunRollback(timeout time.Duration) error { + fmt.Println("Logic for rollback is not implemented yet.") + return nil +} diff --git a/internal/cni/switch.go b/internal/cni/switch.go new file mode 100644 index 00000000..9a7e137b --- /dev/null +++ b/internal/cni/switch.go @@ -0,0 +1,807 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// RunSwitch executes the logic for the 'cni-switch switch' command. +func RunSwitch(timeout time.Duration) error { + // 0. Ask for user confirmation + confirmed, err := AskForConfirmation("switch") + if err != nil { + return fmt.Errorf("asking for confirmation: %w", err) + } + if !confirmed { + fmt.Println("Operation cancelled by user.") + return nil + } + + startTime := time.Now() + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + fmt.Printf("🚀 Starting CNI switch (global timeout: %s)\n", timeout) + + // 1. Create a Kubernetes client + safeClient, err := saferequest.NewSafeClient() + if err != nil { + return fmt.Errorf("creating safe client: %w", err) + } + + rtClient, err := safeClient.NewRTClient(v1alpha1.AddToScheme) + if err != nil { + return fmt.Errorf("creating runtime client: %w", err) + } + fmt.Printf("✅ Kubernetes client created (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 2. Find the active migration + activeMigration, err := FindActiveMigration(ctx, rtClient) + if err != nil { + return fmt.Errorf("failed to find active migration: %w", err) + } + + if activeMigration == nil { + return fmt.Errorf("no active CNI migration found. Please run 'd8 cni-switch prepare' first") + } + + // Check if the switch is already completed successfully. + for _, cond := range activeMigration.Status.Conditions { + if cond.Type == "Succeeded" && cond.Status == metav1.ConditionTrue { + fmt.Printf("🎉 CNI switch to '%s' is already completed successfully.\n", activeMigration.Spec.TargetCNI) + fmt.Println("\nYou can run 'd8 cni-switch cleanup' to remove auxiliary resources.") + return nil + } + } + + // 3. Check if the preparation step was completed successfully + isPrepared := false + for _, cond := range activeMigration.Status.Conditions { + if cond.Type == "PreparationSucceeded" && cond.Status == metav1.ConditionTrue { + isPrepared = true + break + } + } + + if !isPrepared { + return fmt.Errorf("cluster is not ready for switching. " + + "Please ensure the 'prepare' command completed successfully") + } + + // Verify all resources exist + fmt.Println("Verifying all created resources...") + if err = verifyResourcesExist(ctx, rtClient); err != nil { + return fmt.Errorf("verifying resources: %w", err) + } + fmt.Println("- Verified. All necessary resources are present in the cluster") + + fmt.Printf( + "✅ Working with prepared migration '%s' (total elapsed: %s)\n\n", + activeMigration.Name, + time.Since(startTime).Round(time.Millisecond), + ) + + // 4. Enable target CNI + currentCNI := activeMigration.Status.CurrentCNI + targetCNI := activeMigration.Spec.TargetCNI + + // Check if we already allowed the target CNI to start + targetCNIAllowedToStart := false + for _, cond := range activeMigration.Status.Conditions { + if cond.Type == "NodeCleanupSucceeded" && cond.Status == metav1.ConditionTrue { + targetCNIAllowedToStart = true + break + } + } + + fmt.Printf("Enabling target CNI module 'cni-%s'...\n", targetCNI) + if err = toggleModule(ctx, rtClient, "cni-"+strings.ToLower(targetCNI), true); err != nil { + return fmt.Errorf("enabling module '%s': %w", targetCNI, err) + } + + var dsName string + if !targetCNIAllowedToStart { + // Wait for target CNI pods to start initializing + targetModuleName := "cni-" + strings.ToLower(targetCNI) + dsName, err = getDaemonSetNameForCNI(targetModuleName) + if err != nil { + return fmt.Errorf("getting daemonset name for target CNI: %w", err) + } + if err = waitForModulePodsInitializing(ctx, rtClient, targetModuleName, dsName); err != nil { + return fmt.Errorf("waiting for target CNI pods to initialize: %w", err) + } + fmt.Printf("✅ CNI module 'cni-%s' enabled and pods initialized (total elapsed: %s)\n\n", + targetCNI, + time.Since(startTime).Round(time.Millisecond)) + + // 5. Disable current CNI + fmt.Printf("Disabling current CNI module 'cni-%s'...\n", currentCNI) + if err = toggleModule(ctx, rtClient, "cni-"+strings.ToLower(currentCNI), false); err != nil { + return fmt.Errorf("disabling module '%s': %w", currentCNI, err) + } + if err = waitForModule(ctx, rtClient, "cni-"+strings.ToLower(currentCNI), false); err != nil { + return fmt.Errorf("waiting for module '%s' to be disabled: %w", currentCNI, err) + } + + var dsNameCurrent string + dsNameCurrent, err = getDaemonSetNameForCNI("cni-" + strings.ToLower(currentCNI)) + if err != nil { + return fmt.Errorf("getting daemonset name for current CNI: %w", err) + } + if err = waitForModulePodsTermination( + ctx, rtClient, "cni-"+strings.ToLower(currentCNI), dsNameCurrent, + ); err != nil { + return fmt.Errorf("waiting for current CNI pods to terminate: %w", err) + } + + if err = updateCNIMigrationStatus(ctx, rtClient, activeMigration.Name, metav1.Condition{ + Type: "OldCNIDisabled", + Status: metav1.ConditionTrue, + Reason: "ModuleDisabled", + Message: fmt.Sprintf("Module 'cni-%s' was successfully disabled.", currentCNI), + }); err != nil { + return fmt.Errorf("updating CNIMigration status: %w", err) + } + fmt.Printf("✅ CNI module 'cni-%s' disabled (total elapsed: %s)\n\n", + currentCNI, + time.Since(startTime).Round(time.Millisecond)) + + // 6. Update phase to Migrate (Triggers cleanup on nodes) + fmt.Println("Updating CNIMigration phase to 'Migrate' to trigger node cleanup...") + if err = updateCNIMigrationPhase(ctx, rtClient, activeMigration.Name, "Migrate"); err != nil { + return fmt.Errorf("updating CNIMigration phase: %w", err) + } + fmt.Printf("✅ CNIMigration phase updated (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 7. Wait for nodes to be cleaned up + fmt.Println("Waiting for nodes to be cleaned up by cni-switch-helper...") + if err = waitForNodeConditions(ctx, rtClient, activeMigration, "CleanupSucceeded"); err != nil { + return fmt.Errorf("waiting for node cleanup: %w", err) + } + fmt.Printf("✅ All nodes cleaned up (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 8. This status update is CRITICAL. It unblocks the target CNI's init-container. + fmt.Println("Signaling target CNI pods to proceed by updating CNIMigration status...") + if err = updateCNIMigrationStatus(ctx, rtClient, activeMigration.Name, metav1.Condition{ + Type: "NodeCleanupSucceeded", + Status: metav1.ConditionTrue, + Reason: "AllNodesCleanedUp", + Message: "All nodes have been successfully cleaned up from old CNI artifacts.", + }); err != nil { + return fmt.Errorf("updating CNIMigration status: %w", err) + } + fmt.Printf("✅ CNIMigration status updated (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + } else { + fmt.Printf("ℹ️ Skipping wait for init, disable old CNI, cleanup as NodeCleanupSucceeded is already True.\n\n") + } + + // 9. Wait for target CNI to be Ready + // Now that NodeCleanupSucceeded is True, the target CNI pods should unblock and become Ready + if err = waitForModule(ctx, rtClient, "cni-"+strings.ToLower(targetCNI), true); err != nil { + return fmt.Errorf("waiting for module '%s' to be ready: %w", targetCNI, err) + } + fmt.Printf("✅ CNI module 'cni-%s' is now Ready (total elapsed: %s)\n\n", + targetCNI, + time.Since(startTime).Round(time.Millisecond)) + + // 10. Delete Mutating Webhook + fmt.Println("Deleting Mutating Webhook...") + if err = deleteMutatingWebhook(ctx, rtClient); err != nil { + return fmt.Errorf("deleting mutating webhook: %w", err) + } + fmt.Printf("✅ Mutating webhook deleted (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 11. Signal 'NewCNIEnabled' + fmt.Println("Signaling 'NewCNIEnabled' to proceed with pod restart...") + if err = updateCNIMigrationStatus(ctx, rtClient, activeMigration.Name, metav1.Condition{ + Type: "NewCNIEnabled", + Status: metav1.ConditionTrue, + Reason: "ModuleEnabled", + Message: fmt.Sprintf("Module 'cni-%s' was successfully enabled.", targetCNI), + }); err != nil { + return fmt.Errorf("updating CNIMigration status: %w", err) + } + fmt.Printf("✅ CNIMigration status updated (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + + // 12. Wait for pods to be restarted + fmt.Println("Waiting for pods to be restarted on all nodes...") + // We do not hard fail here, as per ADR suggestions. + if err = waitForNodeConditions(ctx, rtClient, activeMigration, "PodsRestarted"); err != nil { + fmt.Printf("⚠️ Warning: Timed out waiting for pods to restart: %v\n", err) + fmt.Println("Please check the cluster status manually. The CNI switch is otherwise complete.") + } else { + fmt.Printf("✅ All pods restarted (total elapsed: %s)\n\n", + time.Since(startTime).Round(time.Millisecond)) + } + + // 13. Finalize migration + fmt.Println("Finalizing migration...") + + // Update condition 'Succeeded' using the helper + if err = updateCNIMigrationStatus(ctx, rtClient, activeMigration.Name, metav1.Condition{ + Type: "Succeeded", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Reason: "MigrationComplete", + Message: "CNI migration completed successfully.", + }); err != nil { + fmt.Printf("\n ⚠️ Warning: Failed to update status (Succeeded): %v. Proceeding...\n", err) + } + + fmt.Printf( + "🎉 CNI switch to '%s' completed successfully! (total time: %s)\n", + targetCNI, + time.Since(startTime).Round(time.Second), + ) + fmt.Println("\nYou can now run 'd8 cni-switch cleanup' to remove auxiliary resources.") + + return nil +} + +func toggleModule(ctx context.Context, cl client.Client, moduleName string, toggle bool) error { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + mc := &unstructured.Unstructured{} + mc.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "deckhouse.io", + Version: "v1alpha1", + Kind: "ModuleConfig", + }) + + err := cl.Get(ctx, types.NamespacedName{Name: moduleName}, mc) + if err != nil { + fmt.Printf("\r ⚠️ Error getting module config '%s': %v. Retrying...", moduleName, err) + continue + } + + spec, found, err := unstructured.NestedMap(mc.Object, "spec") + if err != nil { + fmt.Printf("\r ⚠️ Error getting spec from module config '%s': %v. Retrying...", moduleName, err) + continue + } + if !found { + spec = make(map[string]any) + } + + // Skip update if value is already correct + if currentToggle, ok := spec["enabled"].(bool); ok && currentToggle == toggle { + return nil + } + + spec["enabled"] = toggle + + if err := unstructured.SetNestedMap(mc.Object, spec, "spec"); err != nil { + fmt.Printf("\r ⚠️ Error setting spec for module config '%s': %v. Retrying...", moduleName, err) + continue + } + + if err := cl.Update(ctx, mc); err != nil { + fmt.Printf("\r ⚠️ Error updating module config '%s': %v. Retrying...", moduleName, err) + continue + } + return nil + } + } +} + +func waitForModule(ctx context.Context, cl client.Client, moduleName string, shouldBeReady bool) error { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + module := &unstructured.Unstructured{} + module.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "deckhouse.io", + Version: "v1alpha1", + Kind: "Module", + }) + + err := cl.Get(ctx, types.NamespacedName{Name: moduleName}, module) + + if shouldBeReady { + if err != nil { + if errors.IsNotFound(err) { + fmt.Printf("\r Waiting for module '%s': not found yet...", moduleName) + } else { + fmt.Printf("\r ⚠️ Error getting module '%s': %v. Retrying...", moduleName, err) + } + continue + } + + state, found, err := unstructured.NestedString(module.Object, "status", "phase") + if err != nil || !found { + fmt.Printf("\r Waiting for module '%s': status.phase field not found. Retrying...", + moduleName) + continue + } + + if state == "Ready" { + fmt.Printf("Module '%s' is Ready.", moduleName) + fmt.Println() + return nil + } + fmt.Printf("\r Waiting for module '%s' to be Ready, current state: %s", moduleName, state) + + } else { // should NOT be ready (disabled) + err := cl.Get(ctx, types.NamespacedName{Name: moduleName}, module) + if err != nil { + if errors.IsNotFound(err) { + fmt.Printf("\r Module '%s' is not found, assuming disabled.", moduleName) + fmt.Println() + return nil + } + fmt.Printf("\r ⚠️ Error getting module '%s': %v. Retrying...", moduleName, err) + continue + } + + // Check conditions to see if it's disabled + conditions, found, err := unstructured.NestedSlice(module.Object, "status", "conditions") + if err != nil || !found { + fmt.Printf("\r Waiting for module '%s' status conditions. Retrying...", moduleName) + continue + } + + isReadyFound := false + for _, c := range conditions { + condition, ok := c.(map[string]any) + if !ok { + continue + } + + condType, found, err := unstructured.NestedString(condition, "type") + if err != nil || !found { + continue + } + + if condType == "IsReady" { + isReadyFound = true + condStatus, _, _ := unstructured.NestedString(condition, "status") + if condStatus == "False" { + fmt.Printf("\r✅ Module '%s' is disabled (IsReady=False).\n", moduleName) + fmt.Println() + return nil + } + } + } + + if !isReadyFound { + fmt.Printf("\r Waiting for module '%s' to be disabled, 'IsReady' condition not found...", + moduleName) + } else { + fmt.Printf("\r Waiting for module '%s' to be disabled (IsReady=False)...", moduleName) + } + } + } + } +} + +func updateCNIMigrationStatus( + ctx context.Context, cl client.Client, migrationName string, newCondition metav1.Condition, +) error { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + migration := &v1alpha1.CNIMigration{} + err := cl.Get(ctx, types.NamespacedName{Name: migrationName}, migration) + if err != nil { + fmt.Printf("\r ⚠️ Error getting CNIMigration '%s': %v. Retrying...", migrationName, err) + continue + } + + patchedMigration := migration.DeepCopy() + newCondition.LastTransitionTime = metav1.Now() + + found := false + for i, cond := range patchedMigration.Status.Conditions { + if cond.Type == newCondition.Type { + // Check if update is needed + if cond.Status == newCondition.Status && cond.Reason == newCondition.Reason && + cond.Message == newCondition.Message { + return nil // Already up to date + } + patchedMigration.Status.Conditions[i] = newCondition + found = true + break + } + } + + if !found { + patchedMigration.Status.Conditions = append(patchedMigration.Status.Conditions, newCondition) + } + + err = cl.Status().Patch(ctx, patchedMigration, client.MergeFrom(migration)) + if err == nil { + return nil + } + + if !errors.IsConflict(err) { + fmt.Printf("\r ⚠️ Error patching CNIMigration status: %v. Retrying...", err) + } + } + } +} + +func waitForNodeConditions( + ctx context.Context, cl client.Client, cniMigration *v1alpha1.CNIMigration, conditionType string, +) error { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + reportedFailures := make(map[string]struct{}) + + // Helper to update status stats + updateStats := func(total, succeeded, failed int) { + // Fetch fresh object to avoid conflicts + freshMigration := &v1alpha1.CNIMigration{} + if err := cl.Get(ctx, types.NamespacedName{Name: cniMigration.Name}, freshMigration); err != nil { + return // Ignore error, will retry next tick + } + + if freshMigration.Status.NodesTotal == total && + freshMigration.Status.NodesSucceeded == succeeded && + freshMigration.Status.NodesFailed == failed { + return + } + + patched := freshMigration.DeepCopy() + patched.Status.NodesTotal = total + patched.Status.NodesSucceeded = succeeded + patched.Status.NodesFailed = failed + + _ = cl.Status().Patch(ctx, patched, client.MergeFrom(freshMigration)) + } + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + // Get current total nodes + nodeList := &corev1.NodeList{} + if err := cl.List(ctx, nodeList); err != nil { + fmt.Printf("\r ⚠️ Error listing nodes: %v. Retrying...", err) + continue + } + totalNodes := len(nodeList.Items) + + // Get current node migrations + nodeMigrations := &v1alpha1.CNINodeMigrationList{} + if err := cl.List(ctx, nodeMigrations); err != nil { + fmt.Printf("\r ⚠️ Error listing CNINodeMigration resources: %v. Retrying...", err) + continue + } + + succeededNodes := 0 + failedNodes := 0 + + for _, nm := range nodeMigrations.Items { + if !metav1.IsControlledBy(&nm, cniMigration) { + continue + } + + // Check specific condition success + isSucceeded := false + for _, cond := range nm.Status.Conditions { + if cond.Type == conditionType && cond.Status == metav1.ConditionTrue { + succeededNodes++ + isSucceeded = true + + // Check if it was previously failed + if _, wasFailed := reportedFailures[nm.Name]; wasFailed { + fmt.Printf("\r\033[K") // Clear line + fmt.Printf(" ✅ Node '%s' has recovered.\n", nm.Name) + delete(reportedFailures, nm.Name) + } + break + } + } + + if isSucceeded { + continue + } + + // Check failure + for _, cond := range nm.Status.Conditions { + if cond.Type == "Failed" && cond.Status == metav1.ConditionTrue { + failedNodes++ + if _, reported := reportedFailures[nm.Name]; !reported { + // Clear line, print error, then let the progress bar overwrite next + fmt.Printf("\r\033[K") // Clear current line + fmt.Printf(" ❌ Node '%s' failed: %s\n", nm.Name, cond.Message) + reportedFailures[nm.Name] = struct{}{} + } + break + } + } + } + + // Update status in CNIMigration + go updateStats(totalNodes, succeededNodes, failedNodes) + + // Output progress + fmt.Printf("\r Progress: %d/%d nodes completed, %d failed. ", + succeededNodes, totalNodes, failedNodes) + + // 5. Check exit condition + if succeededNodes >= totalNodes { + fmt.Println() + return nil + } + } + } +} + +func deleteMutatingWebhook(ctx context.Context, cl client.Client) error { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + fmt.Printf(" Deleting mutating webhook '%s'...", webhookConfigName) + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + webhook := &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookConfigName, + }, + } + if err := cl.Delete(ctx, webhook); err != nil { + if errors.IsNotFound(err) { + fmt.Println("\r Mutating webhook not found, assuming already deleted.") + return nil + } + fmt.Printf("\r ⚠️ Error deleting mutating webhook: %v. Retrying...", err) + continue + } + fmt.Println("- Mutating webhook deleted") + return nil + } + } +} + +// getDaemonSetNameForCNI returns the name of the main DaemonSet for a given CNI module. +func getDaemonSetNameForCNI(cniModule string) (string, error) { + switch cniModule { + case "cni-cilium": + return "agent", nil + case "cni-flannel": + return "flannel", nil + case "cni-simple-bridge": + return "simple-bridge", nil + default: + return "", fmt.Errorf("unknown CNI module: %s", cniModule) + } +} + +// waitForModulePodsInitializing waits for all pods of a module's daemonset to be in the 'Initializing' state, +// specifically waiting in the init container. +func waitForModulePodsInitializing(ctx context.Context, cl client.Client, moduleName string, dsName string) error { + fmt.Printf(" Waiting for pods of module '%s' to enter 'Initializing' state...", moduleName) + + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + // Get the DaemonSet + ds := &appsv1.DaemonSet{} + err := cl.Get(ctx, types.NamespacedName{Name: dsName, Namespace: "d8-" + moduleName}, ds) + if err != nil { + if errors.IsNotFound(err) { + fmt.Printf("\r Waiting for DaemonSet '%s' in namespace 'd8-%s': not found...", dsName, moduleName) + continue + } + fmt.Printf("\r Error getting DaemonSet '%s' in namespace 'd8-%s': %v. Retrying...", + dsName, moduleName, err) + continue + } + + // Check if pods are scheduled + if ds.Status.DesiredNumberScheduled == 0 { + fmt.Printf("\r Waiting for DaemonSet '%s' to schedule pods...", dsName) + continue + } + + // List pods + podList := &corev1.PodList{} + opts := []client.ListOption{ + client.InNamespace("d8-" + moduleName), + client.MatchingLabels(ds.Spec.Selector.MatchLabels), + } + if err := cl.List(ctx, podList, opts...); err != nil { + fmt.Printf("\r Error listing pods for module '%s': %v. Retrying...", moduleName, err) + continue + } + + if len(podList.Items) == 0 { + fmt.Printf("\r Waiting for pods of DaemonSet '%s' to be created...", dsName) + continue + } + + initializingPods := 0 + for _, pod := range podList.Items { + // Check pod status + if pod.Status.Phase == corev1.PodPending || pod.Status.Phase == corev1.PodRunning { + for _, initStatus := range pod.Status.InitContainerStatuses { + if initStatus.Name == "cni-switch-init-checker" && + (initStatus.State.Waiting != nil || initStatus.State.Running != nil) { + // Pod is waiting in or running our init-container + initializingPods++ + break + } + } + } + } + + fmt.Printf("\r Progress: %d/%d pods are in 'Initializing' state.", + initializingPods, ds.Status.DesiredNumberScheduled) + + if int32(initializingPods) >= ds.Status.DesiredNumberScheduled { + fmt.Println("\n- All pods for target CNI are correctly waiting in the init-container.") + return nil + } + } + } +} + +// waitForModulePodsTermination waits for all pods of a module's daemonset to be terminated. +func waitForModulePodsTermination(ctx context.Context, cl client.Client, moduleName string, dsName string) error { + fmt.Printf(" Waiting for pods of module '%s' to terminate...", moduleName) + + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + // List pods with label app= in namespace d8- + podList := &corev1.PodList{} + opts := []client.ListOption{ + client.InNamespace("d8-" + moduleName), + client.MatchingLabels(map[string]string{"app": dsName}), + } + if err := cl.List(ctx, podList, opts...); err != nil { + fmt.Printf("\r Error listing pods for module '%s': %v. Retrying...", moduleName, err) + continue + } + + if len(podList.Items) == 0 { + fmt.Println("- All pods for disabled CNI module are terminated.") + return nil + } + + fmt.Printf("\r Waiting for %d pods of module '%s' to terminate...", len(podList.Items), moduleName) + } + } +} + +// verifyResourcesExist checks if all expected resources are present in the cluster. +func verifyResourcesExist(ctx context.Context, rtClient client.Client) error { + resources := []struct { + kind string + name string + namespace string + obj client.Object + }{ + {"Namespace", cniSwitchNamespace, "", &corev1.Namespace{}}, + {"ServiceAccount", switchHelperServiceAccountName, cniSwitchNamespace, &corev1.ServiceAccount{}}, + {"ServiceAccount", webhookServiceAccountName, cniSwitchNamespace, &corev1.ServiceAccount{}}, + {"ClusterRole", switchHelperClusterRoleName, "", &rbacv1.ClusterRole{}}, + {"ClusterRole", webhookClusterRoleName, "", &rbacv1.ClusterRole{}}, + {"ClusterRoleBinding", switchHelperClusterRoleBindingName, "", &rbacv1.ClusterRoleBinding{}}, + {"ClusterRoleBinding", webhookClusterRoleBindingName, "", &rbacv1.ClusterRoleBinding{}}, + {"Secret", webhookSecretName, cniSwitchNamespace, &corev1.Secret{}}, + {"Service", webhookServiceName, cniSwitchNamespace, &corev1.Service{}}, + {"MutatingWebhookConfiguration", webhookConfigName, "", &admissionregistrationv1.MutatingWebhookConfiguration{}}, + {"DaemonSet", switchHelperDaemonSetName, cniSwitchNamespace, &appsv1.DaemonSet{}}, + {"Deployment", webhookDeploymentName, cniSwitchNamespace, &appsv1.Deployment{}}, + } + + for _, r := range resources { + key := client.ObjectKey{Name: r.name, Namespace: r.namespace} + if err := rtClient.Get(ctx, key, r.obj); err != nil { + return fmt.Errorf("resource %s '%s' not found: %w", r.kind, r.name, err) + } + } + return nil +} + +func updateCNIMigrationPhase(ctx context.Context, cl client.Client, migrationName string, phase string) error { + update := func() error { // TDEN переписать по типу других + migration := &v1alpha1.CNIMigration{} + if err := cl.Get(ctx, types.NamespacedName{Name: migrationName}, migration); err != nil { + return fmt.Errorf("getting CNIMigration: %w", err) + } + + if migration.Spec.Phase == phase { + return nil + } + + patchedMigration := migration.DeepCopy() + patchedMigration.Spec.Phase = phase + if err := cl.Patch(ctx, patchedMigration, client.MergeFrom(migration)); err != nil { + return fmt.Errorf("patching CNIMigration: %w", err) + } + return nil + } + + if err := update(); err == nil { + return nil + } else { + fmt.Printf("\r ⚠️ %v. Retrying...", err) + } + + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + if err := update(); err != nil { + fmt.Printf("\r ⚠️ %v. Retrying...", err) + continue + } + return nil + } + } +} From 7e3346fdeb54f6abdd15853c4c39f96a10eb557e Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 11:21:35 +0400 Subject: [PATCH 02/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/switch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/cni/switch.go b/internal/cni/switch.go index 9a7e137b..13f3e198 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -765,7 +765,7 @@ func verifyResourcesExist(ctx context.Context, rtClient client.Client) error { } func updateCNIMigrationPhase(ctx context.Context, cl client.Client, migrationName string, phase string) error { - update := func() error { // TDEN переписать по типу других + update := func() error { migration := &v1alpha1.CNIMigration{} if err := cl.Get(ctx, types.NamespacedName{Name: migrationName}, migration); err != nil { return fmt.Errorf("getting CNIMigration: %w", err) From f03c592c4795c5c606c45603e3619d7c1d77db6a Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 14:54:34 +0400 Subject: [PATCH 03/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/switch.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/internal/cni/switch.go b/internal/cni/switch.go index 13f3e198..c3c70eff 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -404,8 +404,7 @@ func waitForModule(ctx context.Context, cl client.Client, moduleName string, sho isReadyFound = true condStatus, _, _ := unstructured.NestedString(condition, "status") if condStatus == "False" { - fmt.Printf("\r✅ Module '%s' is disabled (IsReady=False).\n", moduleName) - fmt.Println() + fmt.Printf("\r- Module '%s' is disabled (IsReady=False).\n", moduleName) return nil } } @@ -606,7 +605,6 @@ func deleteMutatingWebhook(ctx context.Context, cl client.Client) error { fmt.Printf("\r ⚠️ Error deleting mutating webhook: %v. Retrying...", err) continue } - fmt.Println("- Mutating webhook deleted") return nil } } @@ -724,7 +722,7 @@ func waitForModulePodsTermination(ctx context.Context, cl client.Client, moduleN } if len(podList.Items) == 0 { - fmt.Println("- All pods for disabled CNI module are terminated.") + fmt.Println("\n- All pods for disabled CNI module are terminated.") return nil } From c3fe1afd5ce7e6b659f705d774d09e23f97a4a50 Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 15:35:38 +0400 Subject: [PATCH 04/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/switch.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/cni/switch.go b/internal/cni/switch.go index c3c70eff..ccbaefb8 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -224,7 +224,7 @@ func RunSwitch(timeout time.Duration) error { if err = deleteMutatingWebhook(ctx, rtClient); err != nil { return fmt.Errorf("deleting mutating webhook: %w", err) } - fmt.Printf("✅ Mutating webhook deleted (total elapsed: %s)\n\n", + fmt.Printf("\n✅ Mutating webhook deleted (total elapsed: %s)\n\n", time.Since(startTime).Round(time.Millisecond)) // 11. Signal 'NewCNIEnabled' @@ -253,8 +253,6 @@ func RunSwitch(timeout time.Duration) error { // 13. Finalize migration fmt.Println("Finalizing migration...") - - // Update condition 'Succeeded' using the helper if err = updateCNIMigrationStatus(ctx, rtClient, activeMigration.Name, metav1.Condition{ Type: "Succeeded", Status: metav1.ConditionTrue, From ab2dee2cdc5aaff4f798b2415ed3ec19ee463375 Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 16:56:40 +0400 Subject: [PATCH 05/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/cleanup.go | 42 +++++++++++++++++++++++------------------ internal/cni/switch.go | 1 - 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/internal/cni/cleanup.go b/internal/cni/cleanup.go index 53b15fdc..0051aea3 100644 --- a/internal/cni/cleanup.go +++ b/internal/cni/cleanup.go @@ -80,24 +80,7 @@ func RunCleanup(timeout time.Duration) error { } fmt.Println("✅ Active controllers stopped") - // 4. Delete all CNINodeMigration resources - fmt.Println("\nDeleting all CNINodeMigration resources...") - nodeMigrations := &v1alpha1.CNINodeMigrationList{} - if err = rtClient.List(ctx, nodeMigrations); err != nil && !strings.Contains(err.Error(), "no matches for kind") { - return fmt.Errorf("listing CNINodeMigrations: %w", err) - } - for _, nm := range nodeMigrations.Items { - // Remove finalizers to ensure deletion even if controller is down - if err = removeFinalizers(ctx, rtClient, &nm); err != nil { - fmt.Printf("⚠️ Warning: failed to remove finalizers from %s: %v\n", nm.Name, err) - } - if err = deleteAndWait(ctx, rtClient, &nm); err != nil { - return err - } - } - fmt.Println("✅ All CNINodeMigration resources deleted") - - // 5. Delete all CNIMigration resources + // 4. Delete all CNIMigration resources fmt.Println("\nDeleting all CNIMigration resources...") migrations := &v1alpha1.CNIMigrationList{} if err = rtClient.List(ctx, migrations); err != nil && !strings.Contains(err.Error(), "no matches for kind") { @@ -114,6 +97,23 @@ func RunCleanup(timeout time.Duration) error { } fmt.Println("✅ All CNIMigration resources deleted") + // 5. Delete all CNINodeMigration resources + fmt.Println("\nDeleting all CNINodeMigration resources...") + nodeMigrations := &v1alpha1.CNINodeMigrationList{} + if err = rtClient.List(ctx, nodeMigrations); err != nil && !strings.Contains(err.Error(), "no matches for kind") { + return fmt.Errorf("listing CNINodeMigrations: %w", err) + } + for _, nm := range nodeMigrations.Items { + // Remove finalizers to ensure deletion even if controller is down + if err = removeFinalizers(ctx, rtClient, &nm); err != nil { + fmt.Printf("⚠️ Warning: failed to remove finalizers from %s: %v\n", nm.Name, err) + } + if err = deleteAndWait(ctx, rtClient, &nm); err != nil { + return err + } + } + fmt.Println("✅ All CNINodeMigration resources deleted") + // 6. Remove annotations from all pods if err = removePodAnnotations(ctx, rtClient); err != nil { // Non-fatal, print a warning @@ -262,6 +262,12 @@ func waitForResourceDeletion(ctx context.Context, rtClient client.Client, obj cl fmt.Printf("error: %v\n", err) return fmt.Errorf("getting %s '%s': %w", kind, key.Name, err) } + + // If the resource is still there, check for finalizers and remove them + // to ensure it doesn't get stuck in Terminating state. + if len(obj.GetFinalizers()) > 0 { + _ = removeFinalizers(ctx, rtClient, obj) + } } } } diff --git a/internal/cni/switch.go b/internal/cni/switch.go index ccbaefb8..2d00055c 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -242,7 +242,6 @@ func RunSwitch(timeout time.Duration) error { // 12. Wait for pods to be restarted fmt.Println("Waiting for pods to be restarted on all nodes...") - // We do not hard fail here, as per ADR suggestions. if err = waitForNodeConditions(ctx, rtClient, activeMigration, "PodsRestarted"); err != nil { fmt.Printf("⚠️ Warning: Timed out waiting for pods to restart: %v\n", err) fmt.Println("Please check the cluster status manually. The CNI switch is otherwise complete.") From 4dac11168481b0f6516c1e8fdeec62652ad8f76f Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 18:12:02 +0400 Subject: [PATCH 06/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/common.go | 2 +- internal/cni/resources.go | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/internal/cni/common.go b/internal/cni/common.go index 0fc646f3..ad867e64 100644 --- a/internal/cni/common.go +++ b/internal/cni/common.go @@ -56,7 +56,7 @@ const ( webhookSecretName = "cni-switch-webhook-tls" webhookConfigName = "cni-switch-pod-annotator" webhookConfigurationName = "annotator.cni-switch.deckhouse.io" - webhookPort = 9443 + webhookPort = 42443 // Annotations EffectiveCNIAnnotation = "effective-cni.network.deckhouse.io" diff --git a/internal/cni/resources.go b/internal/cni/resources.go index f7801ccb..fe47446c 100644 --- a/internal/cni/resources.go +++ b/internal/cni/resources.go @@ -55,10 +55,13 @@ func getSwitchHelperDaemonSet(namespace, imageName string) *appsv1.DaemonSet { { Name: "helper", Image: imageName, + Args: []string{ + "--health-probe-bind-address=:42281", + }, Ports: []corev1.ContainerPort{ { Name: "healthz", - ContainerPort: 8081, + ContainerPort: 42281, Protocol: corev1.ProtocolTCP, }, }, @@ -267,6 +270,11 @@ func getWebhookDeployment(namespace, imageName, serviceAccountName string) *apps HostNetwork: true, DNSPolicy: corev1.DNSClusterFirstWithHostNet, TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Tolerations: []corev1.Toleration{ + { + Operator: corev1.TolerationOpExists, + }, + }, Affinity: &corev1.Affinity{ PodAntiAffinity: &corev1.PodAntiAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ @@ -287,7 +295,7 @@ func getWebhookDeployment(namespace, imageName, serviceAccountName string) *apps Image: imageName, Args: []string{ "--mode=webhook", - "--health-probe-bind-address=:8082", + "--health-probe-bind-address=:42282", }, Ports: []corev1.ContainerPort{ { @@ -296,7 +304,7 @@ func getWebhookDeployment(namespace, imageName, serviceAccountName string) *apps }, { Name: "healthz", - ContainerPort: 8082, + ContainerPort: 42282, Protocol: corev1.ProtocolTCP, }, }, From 525d34ac659ab26bf181c92cd47e75ca79b71812 Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 19:30:13 +0400 Subject: [PATCH 07/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/cleanup.go | 2 +- internal/cni/prepare.go | 12 ++++---- internal/cni/switch.go | 68 ++++++++++++++++++++--------------------- 3 files changed, 40 insertions(+), 42 deletions(-) diff --git a/internal/cni/cleanup.go b/internal/cni/cleanup.go index 0051aea3..acce2b42 100644 --- a/internal/cni/cleanup.go +++ b/internal/cni/cleanup.go @@ -299,7 +299,7 @@ func removePodAnnotations(ctx context.Context, rtClient client.Client) error { continue } podsPatched++ - fmt.Printf("\r Patched %d pods...", podsPatched) + fmt.Printf("\r\033[K Patched %d pods...", podsPatched) } } diff --git a/internal/cni/prepare.go b/internal/cni/prepare.go index b23d28eb..7fd01847 100644 --- a/internal/cni/prepare.go +++ b/internal/cni/prepare.go @@ -448,7 +448,7 @@ func waitForDaemonSetReady(ctx context.Context, rtClient client.Client, ds *apps // This is the exit condition for the loop. if ds.Status.DesiredNumberScheduled == ds.Status.NumberReady && ds.Status.NumberUnavailable == 0 { fmt.Printf( - "\r Waiting for DaemonSet... %d/%d pods ready\n", + "\r\033[K Waiting for DaemonSet... %d/%d pods ready\n", ds.Status.NumberReady, ds.Status.DesiredNumberScheduled, ) @@ -457,7 +457,7 @@ func waitForDaemonSetReady(ctx context.Context, rtClient client.Client, ds *apps // This is the progress update. fmt.Printf( - "\r Waiting for DaemonSet... %d/%d pods ready", + "\r\033[K Waiting for DaemonSet... %d/%d pods ready", ds.Status.NumberReady, ds.Status.DesiredNumberScheduled, ) @@ -485,7 +485,7 @@ func waitForDeploymentReady(ctx context.Context, rtClient client.Client, dep *ap if dep.Spec.Replicas != nil && dep.Status.ReadyReplicas >= *dep.Spec.Replicas && dep.Status.UnavailableReplicas == 0 { fmt.Printf( - "\r Waiting for Deployment... %d/%d replicas ready\n", + "\r\033[K Waiting for Deployment... %d/%d replicas ready\n", dep.Status.ReadyReplicas, *dep.Spec.Replicas, ) @@ -495,7 +495,7 @@ func waitForDeploymentReady(ctx context.Context, rtClient client.Client, dep *ap // This is the progress update. if dep.Spec.Replicas != nil { fmt.Printf( - "\r Waiting for Deployment... %d/%d replicas ready", + "\r\033[K Waiting for Deployment... %d/%d replicas ready", dep.Status.ReadyReplicas, *dep.Spec.Replicas, ) @@ -536,10 +536,10 @@ func waitForNodesPrepared(ctx context.Context, rtClient client.Client) error { } } - fmt.Printf("\r Progress: %d/%d nodes prepared...", readyNodes, totalNodes) + fmt.Printf("\r\033[K Progress: %d/%d nodes prepared...", readyNodes, totalNodes) if readyNodes >= totalNodes && totalNodes > 0 { - fmt.Printf("\r Progress: %d/%d nodes prepared...\n", readyNodes, totalNodes) + fmt.Printf("\r\033[K Progress: %d/%d nodes prepared...\n", readyNodes, totalNodes) return nil } } diff --git a/internal/cni/switch.go b/internal/cni/switch.go index 2d00055c..a66f0a35 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -290,13 +290,13 @@ func toggleModule(ctx context.Context, cl client.Client, moduleName string, togg err := cl.Get(ctx, types.NamespacedName{Name: moduleName}, mc) if err != nil { - fmt.Printf("\r ⚠️ Error getting module config '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K ⚠️ Error getting module config '%s': %v. Retrying...", moduleName, err) continue } spec, found, err := unstructured.NestedMap(mc.Object, "spec") if err != nil { - fmt.Printf("\r ⚠️ Error getting spec from module config '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K ⚠️ Error getting spec from module config '%s': %v. Retrying...", moduleName, err) continue } if !found { @@ -311,12 +311,12 @@ func toggleModule(ctx context.Context, cl client.Client, moduleName string, togg spec["enabled"] = toggle if err := unstructured.SetNestedMap(mc.Object, spec, "spec"); err != nil { - fmt.Printf("\r ⚠️ Error setting spec for module config '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K ⚠️ Error setting spec for module config '%s': %v. Retrying...", moduleName, err) continue } if err := cl.Update(ctx, mc); err != nil { - fmt.Printf("\r ⚠️ Error updating module config '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K ⚠️ Error updating module config '%s': %v. Retrying...", moduleName, err) continue } return nil @@ -345,16 +345,16 @@ func waitForModule(ctx context.Context, cl client.Client, moduleName string, sho if shouldBeReady { if err != nil { if errors.IsNotFound(err) { - fmt.Printf("\r Waiting for module '%s': not found yet...", moduleName) + fmt.Printf("\r\033[K Waiting for module '%s': not found yet...", moduleName) } else { - fmt.Printf("\r ⚠️ Error getting module '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K ⚠️ Error getting module '%s': %v. Retrying...", moduleName, err) } continue } state, found, err := unstructured.NestedString(module.Object, "status", "phase") if err != nil || !found { - fmt.Printf("\r Waiting for module '%s': status.phase field not found. Retrying...", + fmt.Printf("\r\033[K Waiting for module '%s': status.phase field not found. Retrying...", moduleName) continue } @@ -364,24 +364,24 @@ func waitForModule(ctx context.Context, cl client.Client, moduleName string, sho fmt.Println() return nil } - fmt.Printf("\r Waiting for module '%s' to be Ready, current state: %s", moduleName, state) + fmt.Printf("\r\033[K Waiting for module '%s' to be Ready, current state: %s", moduleName, state) } else { // should NOT be ready (disabled) err := cl.Get(ctx, types.NamespacedName{Name: moduleName}, module) if err != nil { if errors.IsNotFound(err) { - fmt.Printf("\r Module '%s' is not found, assuming disabled.", moduleName) + fmt.Printf("\r\033[K Module '%s' is not found, assuming disabled.", moduleName) fmt.Println() return nil } - fmt.Printf("\r ⚠️ Error getting module '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K ⚠️ Error getting module '%s': %v. Retrying...", moduleName, err) continue } // Check conditions to see if it's disabled conditions, found, err := unstructured.NestedSlice(module.Object, "status", "conditions") if err != nil || !found { - fmt.Printf("\r Waiting for module '%s' status conditions. Retrying...", moduleName) + fmt.Printf("\r\033[K Waiting for module '%s' status conditions. Retrying...", moduleName) continue } @@ -401,17 +401,17 @@ func waitForModule(ctx context.Context, cl client.Client, moduleName string, sho isReadyFound = true condStatus, _, _ := unstructured.NestedString(condition, "status") if condStatus == "False" { - fmt.Printf("\r- Module '%s' is disabled (IsReady=False).\n", moduleName) + fmt.Printf("\r\033[K- Module '%s' is disabled (IsReady=False).\n", moduleName) return nil } } } if !isReadyFound { - fmt.Printf("\r Waiting for module '%s' to be disabled, 'IsReady' condition not found...", + fmt.Printf("\r\033[K Waiting for module '%s' to be disabled, 'IsReady' condition not found...", moduleName) } else { - fmt.Printf("\r Waiting for module '%s' to be disabled (IsReady=False)...", moduleName) + fmt.Printf("\r\033[K Waiting for module '%s' to be disabled (IsReady=False)...", moduleName) } } } @@ -432,7 +432,7 @@ func updateCNIMigrationStatus( migration := &v1alpha1.CNIMigration{} err := cl.Get(ctx, types.NamespacedName{Name: migrationName}, migration) if err != nil { - fmt.Printf("\r ⚠️ Error getting CNIMigration '%s': %v. Retrying...", migrationName, err) + fmt.Printf("\r\033[K ⚠️ Error getting CNIMigration '%s': %v. Retrying...", migrationName, err) continue } @@ -463,7 +463,7 @@ func updateCNIMigrationStatus( } if !errors.IsConflict(err) { - fmt.Printf("\r ⚠️ Error patching CNIMigration status: %v. Retrying...", err) + fmt.Printf("\r\033[K ⚠️ Error patching CNIMigration status: %v. Retrying...", err) } } } @@ -507,7 +507,7 @@ func waitForNodeConditions( // Get current total nodes nodeList := &corev1.NodeList{} if err := cl.List(ctx, nodeList); err != nil { - fmt.Printf("\r ⚠️ Error listing nodes: %v. Retrying...", err) + fmt.Printf("\r\033[K ⚠️ Error listing nodes: %v. Retrying...", err) continue } totalNodes := len(nodeList.Items) @@ -515,7 +515,7 @@ func waitForNodeConditions( // Get current node migrations nodeMigrations := &v1alpha1.CNINodeMigrationList{} if err := cl.List(ctx, nodeMigrations); err != nil { - fmt.Printf("\r ⚠️ Error listing CNINodeMigration resources: %v. Retrying...", err) + fmt.Printf("\r\033[K ⚠️ Error listing CNINodeMigration resources: %v. Retrying...", err) continue } @@ -536,8 +536,7 @@ func waitForNodeConditions( // Check if it was previously failed if _, wasFailed := reportedFailures[nm.Name]; wasFailed { - fmt.Printf("\r\033[K") // Clear line - fmt.Printf(" ✅ Node '%s' has recovered.\n", nm.Name) + fmt.Printf("\r\033[K ✅ Node '%s' has recovered.\n", nm.Name) delete(reportedFailures, nm.Name) } break @@ -554,8 +553,7 @@ func waitForNodeConditions( failedNodes++ if _, reported := reportedFailures[nm.Name]; !reported { // Clear line, print error, then let the progress bar overwrite next - fmt.Printf("\r\033[K") // Clear current line - fmt.Printf(" ❌ Node '%s' failed: %s\n", nm.Name, cond.Message) + fmt.Printf("\r\033[K ❌ Node '%s' failed: %s\n", nm.Name, cond.Message) reportedFailures[nm.Name] = struct{}{} } break @@ -567,7 +565,7 @@ func waitForNodeConditions( go updateStats(totalNodes, succeededNodes, failedNodes) // Output progress - fmt.Printf("\r Progress: %d/%d nodes completed, %d failed. ", + fmt.Printf("\r\033[K Progress: %d/%d nodes completed, %d failed. ", succeededNodes, totalNodes, failedNodes) // 5. Check exit condition @@ -596,10 +594,10 @@ func deleteMutatingWebhook(ctx context.Context, cl client.Client) error { } if err := cl.Delete(ctx, webhook); err != nil { if errors.IsNotFound(err) { - fmt.Println("\r Mutating webhook not found, assuming already deleted.") + fmt.Println("\r\033[K Mutating webhook not found, assuming already deleted.") return nil } - fmt.Printf("\r ⚠️ Error deleting mutating webhook: %v. Retrying...", err) + fmt.Printf("\r\033[K ⚠️ Error deleting mutating webhook: %v. Retrying...", err) continue } return nil @@ -639,17 +637,17 @@ func waitForModulePodsInitializing(ctx context.Context, cl client.Client, module err := cl.Get(ctx, types.NamespacedName{Name: dsName, Namespace: "d8-" + moduleName}, ds) if err != nil { if errors.IsNotFound(err) { - fmt.Printf("\r Waiting for DaemonSet '%s' in namespace 'd8-%s': not found...", dsName, moduleName) + fmt.Printf("\r\033[K Waiting for DaemonSet '%s' in namespace 'd8-%s': not found...", dsName, moduleName) continue } - fmt.Printf("\r Error getting DaemonSet '%s' in namespace 'd8-%s': %v. Retrying...", + fmt.Printf("\r\033[K Error getting DaemonSet '%s' in namespace 'd8-%s': %v. Retrying...", dsName, moduleName, err) continue } // Check if pods are scheduled if ds.Status.DesiredNumberScheduled == 0 { - fmt.Printf("\r Waiting for DaemonSet '%s' to schedule pods...", dsName) + fmt.Printf("\r\033[K Waiting for DaemonSet '%s' to schedule pods...", dsName) continue } @@ -660,12 +658,12 @@ func waitForModulePodsInitializing(ctx context.Context, cl client.Client, module client.MatchingLabels(ds.Spec.Selector.MatchLabels), } if err := cl.List(ctx, podList, opts...); err != nil { - fmt.Printf("\r Error listing pods for module '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K Error listing pods for module '%s': %v. Retrying...", moduleName, err) continue } if len(podList.Items) == 0 { - fmt.Printf("\r Waiting for pods of DaemonSet '%s' to be created...", dsName) + fmt.Printf("\r\033[K Waiting for pods of DaemonSet '%s' to be created...", dsName) continue } @@ -684,7 +682,7 @@ func waitForModulePodsInitializing(ctx context.Context, cl client.Client, module } } - fmt.Printf("\r Progress: %d/%d pods are in 'Initializing' state.", + fmt.Printf("\r\033[K Progress: %d/%d pods are in 'Initializing' state.", initializingPods, ds.Status.DesiredNumberScheduled) if int32(initializingPods) >= ds.Status.DesiredNumberScheduled { @@ -714,7 +712,7 @@ func waitForModulePodsTermination(ctx context.Context, cl client.Client, moduleN client.MatchingLabels(map[string]string{"app": dsName}), } if err := cl.List(ctx, podList, opts...); err != nil { - fmt.Printf("\r Error listing pods for module '%s': %v. Retrying...", moduleName, err) + fmt.Printf("\r\033[K Error listing pods for module '%s': %v. Retrying...", moduleName, err) continue } @@ -723,7 +721,7 @@ func waitForModulePodsTermination(ctx context.Context, cl client.Client, moduleN return nil } - fmt.Printf("\r Waiting for %d pods of module '%s' to terminate...", len(podList.Items), moduleName) + fmt.Printf("\r\033[K Waiting for %d pods of module '%s' to terminate...", len(podList.Items), moduleName) } } } @@ -781,7 +779,7 @@ func updateCNIMigrationPhase(ctx context.Context, cl client.Client, migrationNam if err := update(); err == nil { return nil } else { - fmt.Printf("\r ⚠️ %v. Retrying...", err) + fmt.Printf("\r\033[K ⚠️ %v. Retrying...", err) } ticker := time.NewTicker(2 * time.Second) @@ -793,7 +791,7 @@ func updateCNIMigrationPhase(ctx context.Context, cl client.Client, migrationNam return ctx.Err() case <-ticker.C: if err := update(); err != nil { - fmt.Printf("\r ⚠️ %v. Retrying...", err) + fmt.Printf("\r\033[K ⚠️ %v. Retrying...", err) continue } return nil From 1f3de542fc7e5b3041a3f407a9322462258d4269 Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 19:30:24 +0400 Subject: [PATCH 08/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/switch.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/cni/switch.go b/internal/cni/switch.go index a66f0a35..b62392fd 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -637,7 +637,8 @@ func waitForModulePodsInitializing(ctx context.Context, cl client.Client, module err := cl.Get(ctx, types.NamespacedName{Name: dsName, Namespace: "d8-" + moduleName}, ds) if err != nil { if errors.IsNotFound(err) { - fmt.Printf("\r\033[K Waiting for DaemonSet '%s' in namespace 'd8-%s': not found...", dsName, moduleName) + fmt.Printf("\r\033[K Waiting for DaemonSet '%s' in namespace 'd8-%s': not found...", + dsName, moduleName) continue } fmt.Printf("\r\033[K Error getting DaemonSet '%s' in namespace 'd8-%s': %v. Retrying...", From 001b12879aa764cab0983517732e962793208e1f Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Thu, 11 Dec 2025 20:41:24 +0400 Subject: [PATCH 09/12] ++ Signed-off-by: Denis Tarabrin --- internal/cni/switch.go | 44 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/internal/cni/switch.go b/internal/cni/switch.go index b62392fd..11122280 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -212,8 +212,14 @@ func RunSwitch(timeout time.Duration) error { // 9. Wait for target CNI to be Ready // Now that NodeCleanupSucceeded is True, the target CNI pods should unblock and become Ready - if err = waitForModule(ctx, rtClient, "cni-"+strings.ToLower(targetCNI), true); err != nil { - return fmt.Errorf("waiting for module '%s' to be ready: %w", targetCNI, err) + targetModuleName := "cni-" + strings.ToLower(targetCNI) + fmt.Printf("Waiting for pods of module '%s' to become Ready...\n", targetModuleName) + dsName, err = getDaemonSetNameForCNI(targetModuleName) + if err != nil { + return fmt.Errorf("getting daemonset name for target CNI: %w", err) + } + if err = waitForModulePodsReady(ctx, rtClient, targetModuleName, dsName); err != nil { + return fmt.Errorf("waiting for target CNI pods to be ready: %w", err) } fmt.Printf("✅ CNI module 'cni-%s' is now Ready (total elapsed: %s)\n\n", targetCNI, @@ -694,6 +700,40 @@ func waitForModulePodsInitializing(ctx context.Context, cl client.Client, module } } +// waitForModulePodsReady waits for all pods of a module's daemonset to be in the 'Ready' state. +func waitForModulePodsReady(ctx context.Context, cl client.Client, moduleName string, dsName string) error { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + // Get the DaemonSet + ds := &appsv1.DaemonSet{} + err := cl.Get(ctx, types.NamespacedName{Name: dsName, Namespace: "d8-" + moduleName}, ds) + if err != nil { + fmt.Printf("\r\033[K Error getting DaemonSet '%s': %v. Retrying...", dsName, err) + continue + } + + if ds.Status.DesiredNumberScheduled == 0 { + fmt.Printf("\r\033[K Waiting for DaemonSet '%s' to schedule pods...", dsName) + continue + } + + fmt.Printf("\r\033[K Progress: %d/%d pods are Ready.", + ds.Status.NumberReady, ds.Status.DesiredNumberScheduled) + + if ds.Status.NumberReady >= ds.Status.DesiredNumberScheduled { + fmt.Println("\n- All pods for target CNI are Ready.") + return nil + } + } + } +} + // waitForModulePodsTermination waits for all pods of a module's daemonset to be terminated. func waitForModulePodsTermination(ctx context.Context, cl client.Client, moduleName string, dsName string) error { fmt.Printf(" Waiting for pods of module '%s' to terminate...", moduleName) From ded5e0906bdb2363c04f545b4e658d879495d22c Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Fri, 12 Dec 2025 14:51:58 +0400 Subject: [PATCH 10/12] linter Signed-off-by: Denis Tarabrin --- internal/cni/cleanup.go | 5 +++-- internal/cni/common.go | 3 ++- internal/cni/prepare.go | 7 ++++--- internal/cni/rollback.go | 1 + internal/cni/switch.go | 14 +++++++------- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/internal/cni/cleanup.go b/internal/cni/cleanup.go index acce2b42..4ec771b5 100644 --- a/internal/cni/cleanup.go +++ b/internal/cni/cleanup.go @@ -22,8 +22,6 @@ import ( "strings" "time" - "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" - saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -31,6 +29,9 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" ) // RunCleanup executes the logic for the 'cni-switch cleanup' command. diff --git a/internal/cni/common.go b/internal/cni/common.go index ad867e64..a018f8d8 100644 --- a/internal/cni/common.go +++ b/internal/cni/common.go @@ -23,9 +23,10 @@ import ( "os" "strings" - "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" ) const ( diff --git a/internal/cni/prepare.go b/internal/cni/prepare.go index 7fd01847..cb72efe8 100644 --- a/internal/cni/prepare.go +++ b/internal/cni/prepare.go @@ -29,14 +29,15 @@ import ( "strings" "time" - "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" - saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" ) // RunPrepare executes the logic for the 'cni-switch prepare' command. @@ -270,7 +271,7 @@ func RunPrepare(targetCNI string, timeout time.Duration) error { } // generateWebhookCertificates creates a self-signed CA and a server certificate for the webhook. -func generateWebhookCertificates(namespace string) (caCert, serverCert, serverKey []byte, err error) { +func generateWebhookCertificates(namespace string) ([]byte, []byte, []byte, error) { caSerialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128) caSerialNumber, err := rand.Int(rand.Reader, caSerialNumberLimit) if err != nil { diff --git a/internal/cni/rollback.go b/internal/cni/rollback.go index bd383eff..52999599 100644 --- a/internal/cni/rollback.go +++ b/internal/cni/rollback.go @@ -24,5 +24,6 @@ import ( // RunRollback executes the logic for the 'cni-switch rollback' command. func RunRollback(timeout time.Duration) error { fmt.Println("Logic for rollback is not implemented yet.") + _ = timeout return nil } diff --git a/internal/cni/switch.go b/internal/cni/switch.go index 11122280..39ebef36 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -22,8 +22,6 @@ import ( "strings" "time" - "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" - saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -34,6 +32,9 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/deckhouse-cli/internal/cni/api/v1alpha1" + saferequest "github.com/deckhouse/deckhouse-cli/pkg/libsaferequest/client" ) // RunSwitch executes the logic for the 'cni-switch switch' command. @@ -371,8 +372,7 @@ func waitForModule(ctx context.Context, cl client.Client, moduleName string, sho return nil } fmt.Printf("\r\033[K Waiting for module '%s' to be Ready, current state: %s", moduleName, state) - - } else { // should NOT be ready (disabled) + } else { err := cl.Get(ctx, types.NamespacedName{Name: moduleName}, module) if err != nil { if errors.IsNotFound(err) { @@ -817,11 +817,11 @@ func updateCNIMigrationPhase(ctx context.Context, cl client.Client, migrationNam return nil } - if err := update(); err == nil { + err := update() + if err == nil { return nil - } else { - fmt.Printf("\r\033[K ⚠️ %v. Retrying...", err) } + fmt.Printf("\r\033[K ⚠️ %v. Retrying...", err) ticker := time.NewTicker(2 * time.Second) defer ticker.Stop() From 4a5da95863b677390b8c55cc675db0ee7749703b Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Fri, 12 Dec 2025 15:05:10 +0400 Subject: [PATCH 11/12] linter2 Signed-off-by: Denis Tarabrin --- cmd/commands/cni.go | 15 ++++++++------- cmd/commands/kubectl.go | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cmd/commands/cni.go b/cmd/commands/cni.go index 229a6b86..8d1ac8ab 100644 --- a/cmd/commands/cni.go +++ b/cmd/commands/cni.go @@ -22,11 +22,12 @@ import ( "strings" "time" - "github.com/deckhouse/deckhouse-cli/internal/cni" "github.com/go-logr/logr" "github.com/spf13/cobra" "k8s.io/kubectl/pkg/util/templates" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/deckhouse/deckhouse-cli/internal/cni" ) var ( @@ -81,7 +82,7 @@ func NewCmdCniPrepare() *cobra.Command { Use: "prepare", Short: "Prepares the cluster for CNI switching", Example: cniPrepareExample, - PreRunE: func(cmd *cobra.Command, args []string) error { + PreRunE: func(cmd *cobra.Command, _ []string) error { targetCNI, _ := cmd.Flags().GetString("to-cni") for _, supported := range supportedCNIs { if strings.ToLower(targetCNI) == supported { @@ -95,7 +96,7 @@ func NewCmdCniPrepare() *cobra.Command { ) }, - Run: func(cmd *cobra.Command, args []string) { + Run: func(cmd *cobra.Command, _ []string) { targetCNI, _ := cmd.Flags().GetString("to-cni") timeout, _ := cmd.Flags().GetDuration("timeout") if err := cni.RunPrepare(targetCNI, timeout); err != nil { @@ -117,7 +118,7 @@ func NewCmdCniSwitch() *cobra.Command { Use: "switch", Short: "Performs the CNI switching", Example: cniSwitchExample, - Run: func(cmd *cobra.Command, args []string) { + Run: func(cmd *cobra.Command, _ []string) { timeout, _ := cmd.Flags().GetDuration("timeout") if err := cni.RunSwitch(timeout); err != nil { log.Fatalf("❌ Error running switch command: %v", err) @@ -132,7 +133,7 @@ func NewCmdCniCleanup() *cobra.Command { Use: "cleanup", Short: "Cleans up resources created during CNI switching", Example: cniCleanupExample, - Run: func(cmd *cobra.Command, args []string) { + Run: func(cmd *cobra.Command, _ []string) { timeout, _ := cmd.Flags().GetDuration("timeout") if err := cni.RunCleanup(timeout); err != nil { log.Fatalf("❌ Error running cleanup command: %v", err) @@ -146,8 +147,8 @@ func NewCmdCniRollback() *cobra.Command { // TDEN It needs to be done! cmd := &cobra.Command{ Use: "rollback", Short: "Rollback all changes and restore previous CNI", - Example: cniCleanupExample, - Run: func(cmd *cobra.Command, args []string) { + Example: cniRollbackExample, + Run: func(cmd *cobra.Command, _ []string) { timeout, _ := cmd.Flags().GetDuration("timeout") if err := cni.RunRollback(timeout); err != nil { log.Fatalf("❌ Error running rollback command: %v", err) diff --git a/cmd/commands/kubectl.go b/cmd/commands/kubectl.go index 5a65cfc7..5f14c148 100644 --- a/cmd/commands/kubectl.go +++ b/cmd/commands/kubectl.go @@ -25,7 +25,6 @@ import ( "regexp" "time" - "github.com/deckhouse/deckhouse-cli/internal/cni" "github.com/spf13/cobra" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/cli-runtime/pkg/genericclioptions" @@ -33,6 +32,8 @@ import ( cliflag "k8s.io/component-base/cli/flag" "k8s.io/component-base/logs" kubecmd "k8s.io/kubectl/pkg/cmd" + + "github.com/deckhouse/deckhouse-cli/internal/cni" ) const ( From 732b2b60a89acd2895b67a9500ae946b34864991 Mon Sep 17 00:00:00 2001 From: Denis Tarabrin Date: Fri, 12 Dec 2025 15:34:16 +0400 Subject: [PATCH 12/12] message fix Signed-off-by: Denis Tarabrin --- internal/cni/switch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/cni/switch.go b/internal/cni/switch.go index 39ebef36..20cc89e5 100644 --- a/internal/cni/switch.go +++ b/internal/cni/switch.go @@ -196,7 +196,7 @@ func RunSwitch(timeout time.Duration) error { time.Since(startTime).Round(time.Millisecond)) // 8. This status update is CRITICAL. It unblocks the target CNI's init-container. - fmt.Println("Signaling target CNI pods to proceed by updating CNIMigration status...") + fmt.Println("Signaling 'NodeCleanupSucceeded' to proceed with pod restart...") if err = updateCNIMigrationStatus(ctx, rtClient, activeMigration.Name, metav1.Condition{ Type: "NodeCleanupSucceeded", Status: metav1.ConditionTrue,