diff --git a/Makefile b/Makefile index 45080f7..8d39d26 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,11 @@ E2E_CLUSTER_NAME=gatewayapi-plugin-e2e IS_E2E_CLUSTER=$(shell kind get clusters | grep -e "^${E2E_CLUSTER_NAME}$$") # Versions of components used in e2e tests -GATEWAY_API_VERSION=v1.1.0 +GATEWAY_API_VERSION=v1.4.0 # See more versions at https://artifacthub.io/packages/helm/argo/argo-rollouts -ARGO_ROLLOUTS_HELM_VERSION=2.37.2 # Contains Argo Rollouts 1.7.1 +ARGO_ROLLOUTS_HELM_VERSION=2.40.5 # Contains Argo Rollouts 1.8.3 # See more versions at https://artifacthub.io/packages/helm/traefik/traefik -TRAEFIK_HELM_VERSION=31.0.0 # Contains Traefik proxy v3.1.2 +TRAEFIK_HELM_VERSION=37.4.0 # Contains Traefik proxy v3.6.2 @@ -21,9 +21,9 @@ define add_helm_repo endef define setup_cluster - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/experimental-install.yaml helm install argo-rollouts argo/argo-rollouts --values ./test/cluster-setup/argo-rollouts-values.yml --version ${ARGO_ROLLOUTS_HELM_VERSION} --wait helm install traefik traefik/traefik --values ./test/cluster-setup/traefik-values.yml --version ${TRAEFIK_HELM_VERSION} --wait + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/experimental-install.yaml --server-side=true --force-conflicts endef define install_k8s_resources @@ -60,7 +60,7 @@ unit-tests: go test -v -count=1 ./pkg/... .PHONY: setup-e2e-cluster -setup-e2e-cluster: +setup-e2e-cluster: make BIN_NAME=gatewayapi-plugin-linux-amd64 GOOS=linux GOARCH=amd64 gatewayapi-plugin-build ifeq (${IS_E2E_CLUSTER},) kind create cluster --name ${E2E_CLUSTER_NAME} --config ./test/cluster-setup/cluster-config.yml @@ -79,20 +79,20 @@ endif sanity-check-e2e: ./test/cluster-setup/sanity-check.sh -.PHONY: run-e2e-tests +.PHONY: run-e2e-tests run-e2e-tests: sanity-check-e2e go test -v -timeout 5m -count=1 -run ${RUN} ./test/e2e/... # Flaky tests usually fail with GitHub actions. You should be able to run them locally though. .PHONY: e2e-tests-flaky -e2e-tests-flaky: setup-e2e-cluster run-e2e-tests-flaky +e2e-tests-flaky: setup-e2e-cluster run-e2e-tests-flaky ifeq (${CLUSTER_DELETE},true) make clear-e2e-cluster endif -.PHONY: run-e2e-tests-flaky +.PHONY: run-e2e-tests-flaky run-e2e-tests-flaky: sanity-check-e2e - go test -tags "flaky" -v -timeout 5m -count=1 -run ${RUN} ./test/e2e/... + go test -tags "flaky" -v -timeout 5m -count=1 -run ${RUN} ./test/e2e/... .PHONY: clear-e2e-cluster clear-e2e-cluster: diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 9b48929..d839e76 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,2 +1,3 @@ * Added support for [TLSRoute](https://rollouts-plugin-trafficrouter-gatewayapi.readthedocs.io/en/latest/features/tls/). -* You can now use [filters with Header based routing](https://github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/issues/87). \ No newline at end of file +* You can now use [filters with Header based routing](https://github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/issues/87). +* Gateway API routes are labeled while a canary is running to avoid GitOps drift and the label is removed once traffic returns to 100% stable. diff --git a/docs/features/multiple-routes.md b/docs/features/multiple-routes.md index 85292d6..3842c86 100644 --- a/docs/features/multiple-routes.md +++ b/docs/features/multiple-routes.md @@ -23,7 +23,7 @@ spec: parentRefs: - name: eg hostnames: - - backend.example.com + - backend.example.com rules: - matches: - path: @@ -46,7 +46,7 @@ spec: parentRefs: - name: eg hostnames: - - api.example.com + - api.example.com rules: - matches: - path: @@ -106,10 +106,36 @@ spec: - name: http containerPort: 8080 protocol: TCP -``` +``` If you now start a canary deployment both routes will change to 10%, 50% and 100% as the canary progresses to all its steps. +## Working with GitOps controllers + +GitOps tools such as Argo CD continuously reconcile Gateway API resources and can revert the temporary weight changes that occur +while a canary is progressing. The plugin automatically adds the label +`rollouts.argoproj.io/gatewayapi-canary=in-progress` to every HTTPRoute/GRPCRoute/TCPRoute/TLSRoute it mutates so that you can +configure your GitOps policy to ignore those resources during a rollout. The label disappears as soon as the stable service +returns to 100% weight. You can customise the key/value or disable the feature altogether with the +`inProgressLabelKey`, `inProgressLabelValue` and `disableInProgressLabel` fields under the plugin configuration. + +### Argo CD `ignoreDifferences` + +When you use Argo CD (either through the Application CRD or its Helm chart), add the following snippet so that Argo CD skips the +temporary rule edits while the `rollouts.argoproj.io/gatewayapi-canary` label is present: + +```yaml +configs: + cm: + resource.customizations.ignoreDifferences.gateway.networking.k8s.io_HTTPRoute: | + jqPathExpressions: + - select(.metadata.labels["rollouts.argoproj.io/gatewayapi-canary"] == "in-progress") | .spec.rules +``` + +Duplicate the block for `GRPCRoute`, `TCPRoute` and `TLSRoute` if you manage those kinds as well. If you have customised the +label key or value on the plugin, update the `jqPathExpressions` condition to match your configuration. The same structure applies +when you configure `resource.customizations` directly on an Application manifest (outside of Helm). + ## Automatic Route Discovery with Label Selectors Instead of explicitly listing each route name, you can use label selectors to automatically discover routes. This is particularly useful when managing many routes or when routes are created dynamically. @@ -200,7 +226,7 @@ trafficRouting: The plugin supports selectors for different route types: - `httpRouteSelector`: Discovers HTTPRoutes -- `grpcRouteSelector`: Discovers GRPCRoutes +- `grpcRouteSelector`: Discovers GRPCRoutes - `tcpRouteSelector`: Discovers TCPRoutes You can use multiple selectors simultaneously: @@ -247,4 +273,4 @@ To verify which routes will be discovered by your selector, use kubectl: kubectl get httproutes -n default -l app=my-app,canary-enabled=true ``` -The plugin logs discovered routes during reconciliation, which can help with debugging. \ No newline at end of file +The plugin logs discovered routes during reconciliation, which can help with debugging. diff --git a/docs/quick-start.md b/docs/quick-start.md index 8ed7fe8..d06b53d 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -5,7 +5,7 @@ to control your Http Routes. In this guide we will see how to use [the Rollouts You can find more examples at the [provider status page](provider-status.md). -## Prerequisites +## Prerequisites Get access to a Kubernetes cluster. You can use a cluster on the cloud or on your workstation like [k3s](https://k3s.io/), [k3d](https://k3d.io/) or [Docker for Desktop](https://www.docker.com/products/docker-desktop/). @@ -40,7 +40,7 @@ kubectl wait --timeout=5m -n envoy-gateway-system deployment/envoy-gateway --for !!! note This process needs to happen only once per cluster. The task is normally handled by infrastructure operators. -Create a Gateway +Create a Gateway ```yaml --- @@ -63,7 +63,7 @@ spec: - name: http protocol: HTTP port: 80 -``` +``` Apply the file with kubectl and then verify it works correctly with ``` @@ -109,12 +109,12 @@ subjects: - namespace: argo-rollouts kind: ServiceAccount name: argo-rollouts -``` +``` Apply the file with kubectl. Note that this role is **NOT** to be used in production clusters as it is super permissive. -## Step 4 - Create an HTTP route +## Step 4 - Create an HTTP route !!! note This process needs to happen only once per application. The task is normally handled by cluster operators or application developers. @@ -135,7 +135,7 @@ spec: - matches: - path: type: PathPrefix - value: / + value: / backendRefs: - name: argo-rollouts-stable-service kind: Service @@ -148,7 +148,7 @@ spec: Apply the file with kubectl. Verify it with `kubectl get httproutes` -## Step 5 - Create a Rollout +## Step 5 - Create a Rollout !!! note This process needs to happen only once per application. The task is normally handled by cluster operators or application developers. @@ -207,6 +207,10 @@ spec: argoproj-labs/gatewayAPI: httpRoute: argo-rollouts-http-route # our created httproute namespace: default + # Optional: customize or disable the temporary label that marks routes as managed during a canary + # inProgressLabelKey: rollouts.argoproj.io/gatewayapi-canary + # inProgressLabelValue: in-progress + # disableInProgressLabel: false steps: - setWeight: 50 - pause: {} @@ -241,7 +245,7 @@ You should see that all requests return with blue color: ![First deployment](images/quick-start/canary-start.png) -## Daily Task - Perform a Canary +## Daily Task - Perform a Canary !!! note This process happens multiple times per day/week. The task is normally handled by application developers. @@ -260,7 +264,7 @@ At this point each color should get 50% of requests. You can see this visually i You should also inspect the Http Route and verify that Argo Rollouts has changed the weights of the backend services -Run +Run ``` kubectl get httproute -o yaml @@ -268,6 +272,26 @@ kubectl get httproute -o yaml In the response you should see the following information about the weights for each backing service. +!!! info + While the canary is running, the plugin adds the label `rollouts.argoproj.io/gatewayapi-canary=in-progress` to every managed + Gateway API route so that GitOps tools such as Argo CD can be configured to ignore those temporary changes. The label is + removed automatically once the stable service goes back to 100% weight. Use `disableInProgressLabel`, `inProgressLabelKey` + or `inProgressLabelValue` if you need to adjust this behaviour. + + **Argo CD example (Helm chart values)** + + ```yaml + configs: + cm: + resource.customizations.ignoreDifferences.gateway.networking.k8s.io_HTTPRoute: | + jqPathExpressions: + - if .metadata.labels["rollouts.argoproj.io/gatewayapi-canary"] == "in-progress" then .spec.rules + ``` + + Apply the same snippet to `GRPCRoute`, `TCPRoute` and `TLSRoute` kinds if you manage them. If you configure `resource.customizations` + directly inside an Application manifest rather than Helm values, reuse the same structure under `spec.source.plugin` or + `spec.source.helm.values`. + ```yaml [...snip...] spec: @@ -307,4 +331,4 @@ The application should gradually change now to yellow. The deployment has finished. If you change the Rollout image again, the process will start over. -Feel free to learn more about all Rollout options in the [Specification documentation](https://argo-rollouts.readthedocs.io/en/stable/features/specification/). \ No newline at end of file +Feel free to learn more about all Rollout options in the [Specification documentation](https://argo-rollouts.readthedocs.io/en/stable/features/specification/). diff --git a/internal/defaults/defaults.go b/internal/defaults/defaults.go index e15c347..f778bef 100644 --- a/internal/defaults/defaults.go +++ b/internal/defaults/defaults.go @@ -1,3 +1,7 @@ package defaults -const ConfigMap = "argo-gatewayapi-configmap" +const ( + ConfigMap = "argo-gatewayapi-configmap" + InProgressLabelKey = "rollouts.argoproj.io/gatewayapi-canary" + InProgressLabelValue = "in-progress" +) diff --git a/pkg/plugin/grpcroute.go b/pkg/plugin/grpcroute.go index 2943323..1557755 100644 --- a/pkg/plugin/grpcroute.go +++ b/pkg/plugin/grpcroute.go @@ -52,6 +52,7 @@ func (r *RpcPlugin) setGRPCRouteWeight(rollout *v1alpha1.Rollout, desiredWeight for _, ref := range stableBackendRefs { ref.Weight = &restWeight } + ensureInProgressLabel(grpcRoute, desiredWeight, gatewayAPIConfig) updatedGRPCRoute, err := grpcRouteClient.Update(ctx, grpcRoute, metav1.UpdateOptions{}) if r.IsTest { r.UpdatedGRPCRouteMock = updatedGRPCRoute @@ -396,6 +397,16 @@ func removeManagedGRPCRouteEntry(managedRouteMap ManagedRouteMap, routeRuleList managedRouteMapKey := managedRouteName + "." + grpcRouteName return nil, fmt.Errorf(ManagedRouteMapEntryDeleteError, managedRouteMapKey, managedRouteMapKey) } + if managedRouteIndex < 0 || managedRouteIndex >= len(routeRuleList) { + // stale or corrupted managed route index; clean references for this route and continue gracefully + for name, managedMap := range managedRouteMap { + delete(managedMap, grpcRouteName) + if len(managedMap) == 0 { + delete(managedRouteMap, name) + } + } + return routeRuleList, nil + } delete(routeManagedRouteMap, grpcRouteName) if len(managedRouteMap[managedRouteName]) == 0 { delete(managedRouteMap, managedRouteName) diff --git a/pkg/plugin/httproute.go b/pkg/plugin/httproute.go index 4033d96..fd89721 100644 --- a/pkg/plugin/httproute.go +++ b/pkg/plugin/httproute.go @@ -56,6 +56,7 @@ func (r *RpcPlugin) setHTTPRouteWeight(rollout *v1alpha1.Rollout, desiredWeight if err != nil { r.LogCtx.Error(err, "Failed to handle experiment services") } + ensureInProgressLabel(httpRoute, desiredWeight, gatewayAPIConfig) updatedHTTPRoute, err := httpRouteClient.Update(ctx, httpRoute, metav1.UpdateOptions{}) if r.IsTest { r.UpdatedHTTPRouteMock = updatedHTTPRoute @@ -395,6 +396,16 @@ func removeManagedHTTPRouteEntry(managedRouteMap ManagedRouteMap, routeRuleList managedRouteMapKey := managedRouteName + "." + httpRouteName return nil, fmt.Errorf(ManagedRouteMapEntryDeleteError, managedRouteMapKey, managedRouteMapKey) } + if managedRouteIndex < 0 || managedRouteIndex >= len(routeRuleList) { + // stale or corrupted managed route index; clean references for this route and continue gracefully + for name, managedMap := range managedRouteMap { + delete(managedMap, httpRouteName) + if len(managedMap) == 0 { + delete(managedRouteMap, name) + } + } + return routeRuleList, nil + } delete(routeManagedRouteMap, httpRouteName) if len(managedRouteMap[managedRouteName]) == 0 { delete(managedRouteMap, managedRouteName) diff --git a/pkg/plugin/labels.go b/pkg/plugin/labels.go new file mode 100644 index 0000000..e51005d --- /dev/null +++ b/pkg/plugin/labels.go @@ -0,0 +1,56 @@ +package plugin + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/internal/defaults" +) + +func ensureInProgressLabel(obj metav1.Object, desiredWeight int32, config *GatewayAPITrafficRouting) bool { + if obj == nil || config == nil || config.DisableInProgressLabel { + return false + } + + key := config.inProgressLabelKey() + if key == "" { + return false + } + + labels := obj.GetLabels() + if desiredWeight == 0 { + if labels == nil { + return false + } + if _, ok := labels[key]; ok { + delete(labels, key) + obj.SetLabels(labels) + return true + } + return false + } + + value := config.inProgressLabelValue() + if labels == nil { + labels = make(map[string]string) + } + if current, ok := labels[key]; ok && current == value { + return false + } + labels[key] = value + obj.SetLabels(labels) + return true +} + +func (c *GatewayAPITrafficRouting) inProgressLabelKey() string { + if c.InProgressLabelKey != "" { + return c.InProgressLabelKey + } + return defaults.InProgressLabelKey +} + +func (c *GatewayAPITrafficRouting) inProgressLabelValue() string { + if c.InProgressLabelValue != "" { + return c.InProgressLabelValue + } + return defaults.InProgressLabelValue +} diff --git a/pkg/plugin/plugin_test.go b/pkg/plugin/plugin_test.go index 0d58fd3..dd1a34d 100644 --- a/pkg/plugin/plugin_test.go +++ b/pkg/plugin/plugin_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/internal/defaults" "github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/internal/utils" "github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/pkg/mocks" "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" @@ -114,6 +115,25 @@ func TestRunSuccessfully(t *testing.T) { assert.Equal(t, 100-desiredWeight, *(rpcPluginImp.UpdatedHTTPRouteMock.Spec.Rules[0].BackendRefs[0].Weight)) assert.Equal(t, desiredWeight, *(rpcPluginImp.UpdatedHTTPRouteMock.Spec.Rules[0].BackendRefs[1].Weight)) }) + t.Run("SetHTTPRouteWeightAddsAndRemovesLabel", func(t *testing.T) { + httpRoute := mocks.CreateHTTPRouteWithLabels(mocks.HTTPRouteName, nil) + rpcPluginImp.HTTPRouteClient = gwFake.NewSimpleClientset(httpRoute).GatewayV1().HTTPRoutes(mocks.RolloutNamespace) + rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, &GatewayAPITrafficRouting{ + Namespace: mocks.RolloutNamespace, + HTTPRoute: mocks.HTTPRouteName, + }) + + err := pluginInstance.SetWeight(rollout, 25, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels := rpcPluginImp.UpdatedHTTPRouteMock.Labels + assert.Equal(t, defaults.InProgressLabelValue, labels[defaults.InProgressLabelKey]) + + err = pluginInstance.SetWeight(rollout, 0, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels = rpcPluginImp.UpdatedHTTPRouteMock.Labels + _, exists := labels[defaults.InProgressLabelKey] + assert.False(t, exists) + }) t.Run("SetGRPCRouteWeight", func(t *testing.T) { var desiredWeight int32 = 30 rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, &GatewayAPITrafficRouting{ @@ -126,6 +146,25 @@ func TestRunSuccessfully(t *testing.T) { assert.Equal(t, 100-desiredWeight, *(rpcPluginImp.UpdatedGRPCRouteMock.Spec.Rules[0].BackendRefs[0].Weight)) assert.Equal(t, desiredWeight, *(rpcPluginImp.UpdatedGRPCRouteMock.Spec.Rules[0].BackendRefs[1].Weight)) }) + t.Run("SetGRPCRouteWeightAddsAndRemovesLabel", func(t *testing.T) { + grpcRoute := mocks.CreateGRPCRouteWithLabels(mocks.GRPCRouteName, nil) + rpcPluginImp.GRPCRouteClient = gwFake.NewSimpleClientset(grpcRoute).GatewayV1().GRPCRoutes(mocks.RolloutNamespace) + rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, &GatewayAPITrafficRouting{ + Namespace: mocks.RolloutNamespace, + GRPCRoute: mocks.GRPCRouteName, + }) + + err := pluginInstance.SetWeight(rollout, 40, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels := rpcPluginImp.UpdatedGRPCRouteMock.Labels + assert.Equal(t, defaults.InProgressLabelValue, labels[defaults.InProgressLabelKey]) + + err = pluginInstance.SetWeight(rollout, 0, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels = rpcPluginImp.UpdatedGRPCRouteMock.Labels + _, exists := labels[defaults.InProgressLabelKey] + assert.False(t, exists) + }) t.Run("SetTCPRouteWeight", func(t *testing.T) { var desiredWeight int32 = 30 rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, @@ -139,6 +178,26 @@ func TestRunSuccessfully(t *testing.T) { assert.Equal(t, 100-desiredWeight, *(rpcPluginImp.UpdatedTCPRouteMock.Spec.Rules[0].BackendRefs[0].Weight)) assert.Equal(t, desiredWeight, *(rpcPluginImp.UpdatedTCPRouteMock.Spec.Rules[0].BackendRefs[1].Weight)) }) + t.Run("SetTCPRouteWeightAddsAndRemovesLabel", func(t *testing.T) { + tcpRoute := mocks.CreateTCPRouteWithLabels(mocks.TCPRouteName, nil) + rpcPluginImp.TCPRouteClient = gwFake.NewSimpleClientset(tcpRoute).GatewayV1alpha2().TCPRoutes(mocks.RolloutNamespace) + rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, + &GatewayAPITrafficRouting{ + Namespace: mocks.RolloutNamespace, + TCPRoute: mocks.TCPRouteName, + }) + + err := pluginInstance.SetWeight(rollout, 15, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels := rpcPluginImp.UpdatedTCPRouteMock.Labels + assert.Equal(t, defaults.InProgressLabelValue, labels[defaults.InProgressLabelKey]) + + err = pluginInstance.SetWeight(rollout, 0, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels = rpcPluginImp.UpdatedTCPRouteMock.Labels + _, exists := labels[defaults.InProgressLabelKey] + assert.False(t, exists) + }) t.Run("SetTLSRouteWeight", func(t *testing.T) { var desiredWeight int32 = 30 rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, @@ -152,6 +211,26 @@ func TestRunSuccessfully(t *testing.T) { assert.Equal(t, 100-desiredWeight, *(rpcPluginImp.UpdatedTLSRouteMock.Spec.Rules[0].BackendRefs[0].Weight)) assert.Equal(t, desiredWeight, *(rpcPluginImp.UpdatedTLSRouteMock.Spec.Rules[0].BackendRefs[1].Weight)) }) + t.Run("SetTLSRouteWeightAddsAndRemovesLabel", func(t *testing.T) { + tlsRoute := mocks.CreateTLSRouteWithLabels(mocks.TLSRouteName, nil) + rpcPluginImp.TLSRouteClient = gwFake.NewSimpleClientset(tlsRoute).GatewayV1alpha2().TLSRoutes(mocks.RolloutNamespace) + rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, + &GatewayAPITrafficRouting{ + Namespace: mocks.RolloutNamespace, + TLSRoute: mocks.TLSRouteName, + }) + + err := pluginInstance.SetWeight(rollout, 60, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels := rpcPluginImp.UpdatedTLSRouteMock.Labels + assert.Equal(t, defaults.InProgressLabelValue, labels[defaults.InProgressLabelKey]) + + err = pluginInstance.SetWeight(rollout, 0, []v1alpha1.WeightDestination{}) + assert.Empty(t, err.Error()) + labels = rpcPluginImp.UpdatedTLSRouteMock.Labels + _, exists := labels[defaults.InProgressLabelKey] + assert.False(t, exists) + }) t.Run("SetWeightViaRoutes", func(t *testing.T) { var desiredWeight int32 = 30 rollout := newRollout(mocks.StableServiceName, mocks.CanaryServiceName, diff --git a/pkg/plugin/tcproute.go b/pkg/plugin/tcproute.go index 26ada98..f0483dd 100644 --- a/pkg/plugin/tcproute.go +++ b/pkg/plugin/tcproute.go @@ -44,6 +44,7 @@ func (r *RpcPlugin) setTCPRouteWeight(rollout *v1alpha1.Rollout, desiredWeight i for _, ref := range stableBackendRefs { ref.Weight = &restWeight } + ensureInProgressLabel(tcpRoute, desiredWeight, gatewayAPIConfig) updatedTCPRoute, err := tcpRouteClient.Update(ctx, tcpRoute, metav1.UpdateOptions{}) if r.IsTest { r.UpdatedTCPRouteMock = updatedTCPRoute diff --git a/pkg/plugin/tlsroute.go b/pkg/plugin/tlsroute.go index a055cb3..fab58b2 100644 --- a/pkg/plugin/tlsroute.go +++ b/pkg/plugin/tlsroute.go @@ -44,6 +44,7 @@ func (r *RpcPlugin) setTLSRouteWeight(rollout *v1alpha1.Rollout, desiredWeight i for _, ref := range stableBackendRefs { ref.Weight = &restWeight } + ensureInProgressLabel(tlsRoute, desiredWeight, gatewayAPIConfig) updatedTLSRoute, err := tlsRouteClient.Update(ctx, tlsRoute, metav1.UpdateOptions{}) if r.IsTest { r.UpdatedTLSRouteMock = updatedTLSRoute diff --git a/pkg/plugin/types.go b/pkg/plugin/types.go index d1f1e61..c72ee2e 100644 --- a/pkg/plugin/types.go +++ b/pkg/plugin/types.go @@ -73,6 +73,12 @@ type GatewayAPITrafficRouting struct { TCPRouteSelector *metav1.LabelSelector `json:"tcpRouteSelector,omitempty"` // TLSRouteSelector refers to label selector for auto-discovery of TLSRoutes TLSRouteSelector *metav1.LabelSelector `json:"tlsRouteSelector,omitempty"` + // DisableInProgressLabel disables the automatic label that marks routes as managed during canary steps + DisableInProgressLabel bool `json:"disableInProgressLabel,omitempty"` + // InProgressLabelKey overrides the label key used while a canary is running + InProgressLabelKey string `json:"inProgressLabelKey,omitempty"` + // InProgressLabelValue overrides the label value used while a canary is running + InProgressLabelValue string `json:"inProgressLabelValue,omitempty"` // ConfigMapRWMutex refers to the RWMutex that we use to enter to the critical section // critical section is config map ConfigMapRWMutex sync.RWMutex diff --git a/test/cluster-setup/argo-rollouts-values.yml b/test/cluster-setup/argo-rollouts-values.yml index 31e637e..12b59e8 100644 --- a/test/cluster-setup/argo-rollouts-values.yml +++ b/test/cluster-setup/argo-rollouts-values.yml @@ -18,9 +18,8 @@ controller: name: gatewayapi-plugin trafficRouterPlugins: - trafficRouterPlugins: |- - - name: "argoproj-labs/gatewayAPI" - location: "file:///argo-rollouts-gatewayapi-plugin/gatewayapi-plugin-linux-amd64" + - name: "argoproj-labs/gatewayAPI" + location: "file:///argo-rollouts-gatewayapi-plugin/gatewayapi-plugin-linux-amd64" providerRBAC: providers: diff --git a/test/cluster-setup/cluster-config.yml b/test/cluster-setup/cluster-config.yml index 7b637af..7e86e1f 100644 --- a/test/cluster-setup/cluster-config.yml +++ b/test/cluster-setup/cluster-config.yml @@ -9,4 +9,4 @@ nodes: - hostPath: ./dist/ containerPath: /Volumes/ - role: worker - - role: worker \ No newline at end of file + - role: worker diff --git a/test/cluster-setup/sanity-check.sh b/test/cluster-setup/sanity-check.sh index 74ce15f..739ce0c 100755 --- a/test/cluster-setup/sanity-check.sh +++ b/test/cluster-setup/sanity-check.sh @@ -4,9 +4,20 @@ set -e echo ">>> Sanity checks for e2e tests. If these tests fail, your e2e tests will also fail. <<<" - -echo "Checking e2egateway class traefik with accepted condition=true ..." -kubectl get gatewayclasses traefik -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' | grep -q "True" + +echo "Checking e2egateway class traefik with accepted condition=true (up to 5 attempts) ..." +for i in {1..5}; do + if kubectl get gatewayclasses traefik -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' | grep -q "True"; then + echo "traefik gatewayclass accepted" + break + fi + if [ "$i" -eq 5 ]; then + echo "gatewayclass traefik not accepted after 5 attempts" + exit 1 + fi + echo "gatewayclass not ready yet, retrying ($i/5)..." + sleep 5 +done echo "Checking e2egateway traefik-gateway with programmed condition=true ..." kubectl get gateway traefik-gateway -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' | grep -q "True" diff --git a/test/e2e/constants.go b/test/e2e/constants.go index afee832..4f26fae 100644 --- a/test/e2e/constants.go +++ b/test/e2e/constants.go @@ -35,6 +35,10 @@ const ( GRPC_ROUTE_FILTERS_PATH = "./testdata/grpcroute-filters.yml" GRPC_ROUTE_FILTERS_ROLLOUT_PATH = "./testdata/single-grpcroute-filters-rollout.yml" + // HTTP Route label test paths + HTTP_ROUTE_LABEL_PATH = "./testdata/httproute-basic.yml" + HTTP_ROUTE_LABEL_ROLLOUT_PATH = "./testdata/single-httproute-label-rollout.yml" + ROLLOUT_TEMPLATE_CONTAINERS_FIELD = "spec.template.spec.containers" ROLLOUT_TEMPLATE_FIRST_CONTAINER_FIELD = "spec.template.spec.containers.0" NEW_IMAGE_FIELD_VALUE = "argoproj/rollouts-demo:green" diff --git a/test/e2e/httproute_label_test.go b/test/e2e/httproute_label_test.go new file mode 100644 index 0000000..3ec7b41 --- /dev/null +++ b/test/e2e/httproute_label_test.go @@ -0,0 +1,329 @@ +//go:build !flaky + +package e2e + +import ( + "context" + "encoding/json" + "os" + "strings" + "testing" + + "github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/internal/defaults" + "github.com/sirupsen/logrus" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + + "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" + "sigs.k8s.io/e2e-framework/klient/decoder" + "sigs.k8s.io/e2e-framework/klient/k8s" + "sigs.k8s.io/e2e-framework/klient/wait" + "sigs.k8s.io/e2e-framework/klient/wait/conditions" + "sigs.k8s.io/e2e-framework/pkg/envconf" + "sigs.k8s.io/e2e-framework/pkg/features" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" +) + +func TestHTTPRouteLabelBehavior(t *testing.T) { + feature := features.New("HTTPRoute label behavior").Setup( + setupEnvironment, + ).Setup( + setupHTTPRouteLabelEnv, + ).Assess( + "Label should not be present when canary weight is 0", + testLabelAbsentWhenWeightZero, + ).Assess( + "Label should be present during canary step", + testLabelPresentDuringCanary, + ).Assess( + "Label should be removed when rollout completes", + testLabelRemovedWhenComplete, + ).Teardown( + teardownHTTPRouteLabelEnv, + ).Feature() + _ = global.Test(t, feature) +} + +func setupHTTPRouteLabelEnv(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + var httpRoute gatewayv1.HTTPRoute + var rollout v1alpha1.Rollout + clusterResources := config.Client().Resources() + resourcesMap := map[string]*unstructured.Unstructured{} + ctx = context.WithValue(ctx, RESOURCES_MAP_KEY, resourcesMap) + firstHTTPRouteFile, err := os.Open(HTTP_ROUTE_LABEL_PATH) + if err != nil { + logrus.Errorf("file %q opening was failed: %s", HTTP_ROUTE_LABEL_PATH, err) + t.Error() + return ctx + } + defer firstHTTPRouteFile.Close() + logrus.Infof("file %q was opened", HTTP_ROUTE_LABEL_PATH) + rolloutFile, err := os.Open(HTTP_ROUTE_LABEL_ROLLOUT_PATH) + if err != nil { + logrus.Errorf("file %q opening was failed: %s", HTTP_ROUTE_LABEL_ROLLOUT_PATH, err) + t.Error() + return ctx + } + defer rolloutFile.Close() + logrus.Infof("file %q was opened", HTTP_ROUTE_LABEL_ROLLOUT_PATH) + err = decoder.Decode(firstHTTPRouteFile, &httpRoute) + if err != nil { + logrus.Errorf("file %q decoding was failed: %s", HTTP_ROUTE_LABEL_PATH, err) + t.Error() + return ctx + } + logrus.Infof("file %q was decoded", HTTP_ROUTE_LABEL_PATH) + err = decoder.Decode(rolloutFile, &rollout) + if err != nil { + logrus.Errorf("file %q decoding was failed: %s", HTTP_ROUTE_LABEL_ROLLOUT_PATH, err) + t.Error() + return ctx + } + logrus.Infof("file %q was decoded", HTTP_ROUTE_LABEL_ROLLOUT_PATH) + httpRouteObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&httpRoute) + if err != nil { + logrus.Errorf("httpRoute %q converting to unstructured was failed: %s", httpRoute.GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q was converted to unstructured", httpRoute.GetName()) + resourcesMap[HTTP_ROUTE_KEY] = &unstructured.Unstructured{ + Object: httpRouteObject, + } + rolloutObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&rollout) + if err != nil { + logrus.Errorf("rollout %q converting to unstructured was failed: %s", rollout.GetName(), err) + t.Error() + return ctx + } + logrus.Infof("rollout %q was converted to unstructured", rollout.GetName()) + unstructured.RemoveNestedField(rolloutObject, "spec", "template", "metadata", "creationTimestamp") + resourcesMap[ROLLOUT_KEY] = &unstructured.Unstructured{ + Object: rolloutObject, + } + err = clusterResources.Create(ctx, resourcesMap[HTTP_ROUTE_KEY]) + if err != nil { + logrus.Errorf("httpRoute %q creation was failed: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q was created", resourcesMap[HTTP_ROUTE_KEY].GetName()) + err = clusterResources.Create(ctx, resourcesMap[ROLLOUT_KEY]) + if err != nil { + logrus.Errorf("rollout %q creation was failed: %s", resourcesMap[ROLLOUT_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("rollout %q was created", resourcesMap[ROLLOUT_KEY].GetName()) + waitCondition := conditions.New(clusterResources) + logrus.Infof("waiting for httpRoute %q to connect with rollout %q (expecting canary weight: %d)", resourcesMap[HTTP_ROUTE_KEY].GetName(), resourcesMap[ROLLOUT_KEY].GetName(), FIRST_CANARY_ROUTE_WEIGHT) + err = wait.For( + waitCondition.ResourceMatch( + resourcesMap[HTTP_ROUTE_KEY], + getMatchHTTPRouteFetcher(t, FIRST_CANARY_ROUTE_WEIGHT), + ), + wait.WithTimeout(MEDIUM_PERIOD), + wait.WithInterval(SHORT_PERIOD), + ) + if err != nil { + logrus.Errorf("checking httpRoute %q connection with rollout %q was failed: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), resourcesMap[ROLLOUT_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q connected with rollout %q", resourcesMap[HTTP_ROUTE_KEY].GetName(), resourcesMap[ROLLOUT_KEY].GetName()) + return ctx +} + +func testLabelAbsentWhenWeightZero(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + clusterResources := config.Client().Resources() + resourcesMap, ok := ctx.Value(RESOURCES_MAP_KEY).(map[string]*unstructured.Unstructured) + if !ok { + logrus.Errorf("%q type assertion was failed", RESOURCES_MAP_KEY) + t.Error() + return ctx + } + logrus.Infof("Checking that label is absent when canary weight is 0") + err := wait.For( + conditions.New(clusterResources).ResourceMatch( + resourcesMap[HTTP_ROUTE_KEY], + getMatchHTTPRouteLabelFetcher(t, false), + ), + wait.WithTimeout(MEDIUM_PERIOD), + wait.WithInterval(SHORT_PERIOD), + ) + if err != nil { + logrus.Errorf("httpRoute %q should not have label when weight is 0: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q correctly has no label when weight is 0", resourcesMap[HTTP_ROUTE_KEY].GetName()) + return ctx +} + +func testLabelPresentDuringCanary(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + clusterResources := config.Client().Resources() + resourcesMap, ok := ctx.Value(RESOURCES_MAP_KEY).(map[string]*unstructured.Unstructured) + if !ok { + logrus.Errorf("%q type assertion was failed", RESOURCES_MAP_KEY) + t.Error() + return ctx + } + logrus.Infof("%q was type asserted", RESOURCES_MAP_KEY) + containersObject, isFound, err := unstructured.NestedFieldNoCopy(resourcesMap[ROLLOUT_KEY].Object, strings.Split(ROLLOUT_TEMPLATE_CONTAINERS_FIELD, ".")...) + if !isFound { + logrus.Errorf("rollout %q field %q was not found", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_CONTAINERS_FIELD) + t.Error() + return ctx + } + if err != nil { + logrus.Errorf("getting rollout %q field %q was failed: %s", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_CONTAINERS_FIELD, err) + t.Error() + return ctx + } + logrus.Infof("rollout %q field %q was received", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_CONTAINERS_FIELD) + unstructuredContainerList, ok := containersObject.([]interface{}) + if !ok { + logrus.Errorf("rollout %q field %q type assertion was failed", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_CONTAINERS_FIELD) + t.Error() + return ctx + } + logrus.Infof("rollout %q field %q was type asserted", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_CONTAINERS_FIELD) + unstructuredContainer, ok := unstructuredContainerList[0].(map[string]interface{}) + if !ok { + logrus.Errorf("rollout %q field %q type assertion was failed", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_FIRST_CONTAINER_FIELD) + t.Error() + return ctx + } + logrus.Infof("rollout %q field %q was type asserted", resourcesMap[ROLLOUT_KEY].GetName(), ROLLOUT_TEMPLATE_FIRST_CONTAINER_FIELD) + unstructured.RemoveNestedField(resourcesMap[ROLLOUT_KEY].Object, "metadata", "resourceVersion") + unstructuredContainer["image"] = NEW_IMAGE_FIELD_VALUE + serializedRollout, err := json.Marshal(resourcesMap[ROLLOUT_KEY].Object) + if err != nil { + logrus.Errorf("rollout %q serializing was failed: %s", resourcesMap[ROLLOUT_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("rollout %q was serialized", resourcesMap[ROLLOUT_KEY].GetName()) + rolloutPatch := k8s.Patch{ + PatchType: types.MergePatchType, + Data: serializedRollout, + } + err = clusterResources.Patch(ctx, resourcesMap[ROLLOUT_KEY], rolloutPatch) + if err != nil { + logrus.Errorf("rollout %q updating was failed: %s", resourcesMap[ROLLOUT_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("rollout %q was updated", resourcesMap[ROLLOUT_KEY].GetName()) + waitCondition := conditions.New(clusterResources) + logrus.Infof("waiting for httpRoute %q to have label during canary step (weight: %d)", resourcesMap[HTTP_ROUTE_KEY].GetName(), LAST_CANARY_ROUTE_WEIGHT) + err = wait.For( + waitCondition.ResourceMatch( + resourcesMap[HTTP_ROUTE_KEY], + getMatchHTTPRouteLabelFetcher(t, true), + ), + wait.WithTimeout(LONG_PERIOD), + wait.WithInterval(SHORT_PERIOD), + ) + if err != nil { + logrus.Errorf("httpRoute %q should have label during canary: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q correctly has label during canary step", resourcesMap[HTTP_ROUTE_KEY].GetName()) + return ctx +} + +func testLabelRemovedWhenComplete(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + clusterResources := config.Client().Resources() + resourcesMap, ok := ctx.Value(RESOURCES_MAP_KEY).(map[string]*unstructured.Unstructured) + if !ok { + logrus.Errorf("%q type assertion was failed", RESOURCES_MAP_KEY) + t.Error() + return ctx + } + logrus.Infof("Waiting for rollout to complete and label to be removed") + waitCondition := conditions.New(clusterResources) + err := wait.For( + waitCondition.ResourceMatch( + resourcesMap[HTTP_ROUTE_KEY], + getMatchHTTPRouteFetcher(t, FIRST_CANARY_ROUTE_WEIGHT), + ), + wait.WithTimeout(LONG_PERIOD), + wait.WithInterval(SHORT_PERIOD), + ) + if err != nil { + logrus.Errorf("httpRoute %q weight did not return to 0: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q weight returned to 0, checking label removal", resourcesMap[HTTP_ROUTE_KEY].GetName()) + err = wait.For( + waitCondition.ResourceMatch( + resourcesMap[HTTP_ROUTE_KEY], + getMatchHTTPRouteLabelFetcher(t, false), + ), + wait.WithTimeout(MEDIUM_PERIOD), + wait.WithInterval(SHORT_PERIOD), + ) + if err != nil { + logrus.Errorf("httpRoute %q label should be removed when rollout completes: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q correctly has no label after rollout completion", resourcesMap[HTTP_ROUTE_KEY].GetName()) + return ctx +} + +func teardownHTTPRouteLabelEnv(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + clusterResources := config.Client().Resources() + resourcesMap, ok := ctx.Value(RESOURCES_MAP_KEY).(map[string]*unstructured.Unstructured) + if !ok { + logrus.Errorf("%q type assertion was failed", RESOURCES_MAP_KEY) + t.Error() + return ctx + } + logrus.Infof("%q was type asserted", RESOURCES_MAP_KEY) + err := clusterResources.Delete(ctx, resourcesMap[ROLLOUT_KEY]) + if err != nil { + logrus.Errorf("deleting rollout %q was failed: %s", resourcesMap[ROLLOUT_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("rollout %q was deleted", resourcesMap[ROLLOUT_KEY].GetName()) + err = clusterResources.Delete(ctx, resourcesMap[HTTP_ROUTE_KEY]) + if err != nil { + logrus.Errorf("deleting httpRoute %q was failed: %s", resourcesMap[HTTP_ROUTE_KEY].GetName(), err) + t.Error() + return ctx + } + logrus.Infof("httpRoute %q was deleted", resourcesMap[HTTP_ROUTE_KEY].GetName()) + return ctx +} + +func getMatchHTTPRouteLabelFetcher(t *testing.T, expectLabel bool) func(k8s.Object) bool { + return func(obj k8s.Object) bool { + var httpRoute gatewayv1.HTTPRoute + unstructuredHTTPRoute, ok := obj.(*unstructured.Unstructured) + if !ok { + logrus.Error("k8s object type assertion was failed") + t.Error() + return false + } + err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredHTTPRoute.Object, &httpRoute) + if err != nil { + logrus.Errorf("conversion from unstructured httpRoute %q to the typed httpRoute was failed: %s", unstructuredHTTPRoute.GetName(), err) + t.Error() + return false + } + labels := httpRoute.GetLabels() + value, ok := labels[defaults.InProgressLabelKey] + if expectLabel { + return ok && value == defaults.InProgressLabelValue + } + // we explicitly expect the label to be absent + return !ok + } +} diff --git a/test/e2e/testdata/grpcroute-filters.yml b/test/e2e/testdata/grpcroute-filters.yml index 4262bac..b7d9c95 100644 --- a/test/e2e/testdata/grpcroute-filters.yml +++ b/test/e2e/testdata/grpcroute-filters.yml @@ -25,7 +25,7 @@ spec: value: grpc-added-value remove: - X-Remove-GRPC-Header - # ResponseHeaderModifier - adds, sets, and removes response headers + # ResponseHeaderModifier - adds, sets, and removes response headers - type: ResponseHeaderModifier responseHeaderModifier: set: diff --git a/test/e2e/testdata/single-httproute-label-rollout.yml b/test/e2e/testdata/single-httproute-label-rollout.yml new file mode 100644 index 0000000..b29f537 --- /dev/null +++ b/test/e2e/testdata/single-httproute-label-rollout.yml @@ -0,0 +1,40 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: httproute-label + namespace: default +spec: + replicas: 2 + strategy: + canary: + canaryService: argo-rollouts-canary-service + stableService: argo-rollouts-stable-service + trafficRouting: + plugins: + argoproj-labs/gatewayAPI: + httpRoute: httproute-basic + namespace: default + steps: + - setWeight: 30 + - pause: + duration: 10s + revisionHistoryLimit: 1 + selector: + matchLabels: + app: rollouts-demo + template: + metadata: + labels: + app: rollouts-demo + spec: + containers: + - name: rollouts-demo + image: argoproj/rollouts-demo:red + ports: + - name: http + containerPort: 8080 + protocol: TCP + resources: + requests: + memory: 32Mi + cpu: 5m diff --git a/test/e2e/testdata/single-httproute-rollout.yml b/test/e2e/testdata/single-httproute-rollout.yml index 5d216b1..e8f6d06 100644 --- a/test/e2e/testdata/single-httproute-rollout.yml +++ b/test/e2e/testdata/single-httproute-rollout.yml @@ -16,7 +16,7 @@ spec: namespace: default steps: - setWeight: 30 - - pause: { } + - pause: {} revisionHistoryLimit: 1 selector: matchLabels: @@ -36,4 +36,4 @@ spec: resources: requests: memory: 32Mi - cpu: 5m \ No newline at end of file + cpu: 5m