Skip to content

Commit 89b96d0

Browse files
committed
sets a sensible default of **2 replicas** and update upgrade e2e test cases
1 parent c85bc55 commit 89b96d0

File tree

8 files changed

+73
-32
lines changed

8 files changed

+73
-32
lines changed

helm/olmv1/templates/deployment-olmv1-system-catalogd-controller-manager.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ metadata:
1212
namespace: {{ .Values.namespaces.olmv1.name }}
1313
spec:
1414
minReadySeconds: 5
15-
replicas: 1
15+
replicas: {{ .Values.options.catalogd.deployment.replicas }}
1616
strategy:
1717
type: RollingUpdate
1818
rollingUpdate:
19-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
19+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
2020
maxUnavailable: 0 # Never allow pods to be unavailable during updates
2121
selector:
2222
matchLabels:

helm/olmv1/templates/deployment-olmv1-system-operator-controller-controller-manager.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ metadata:
1111
name: operator-controller-controller-manager
1212
namespace: {{ .Values.namespaces.olmv1.name }}
1313
spec:
14-
replicas: 1
14+
replicas: {{ .Values.options.operatorController.deployment.replicas }}
1515
strategy:
1616
type: RollingUpdate
1717
rollingUpdate:
18-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
18+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
1919
maxUnavailable: 0 # Never allow pods to be unavailable during updates
2020
selector:
2121
matchLabels:

helm/olmv1/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ options:
88
enabled: true
99
deployment:
1010
image: quay.io/operator-framework/operator-controller:devel
11+
replicas: 2
1112
extraArguments: []
1213
features:
1314
enabled: []
@@ -19,6 +20,7 @@ options:
1920
enabled: true
2021
deployment:
2122
image: quay.io/operator-framework/catalogd:devel
23+
replicas: 2
2224
extraArguments: []
2325
features:
2426
enabled: []

manifests/experimental-e2e.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2107,11 +2107,11 @@ metadata:
21072107
namespace: olmv1-system
21082108
spec:
21092109
minReadySeconds: 5
2110-
replicas: 1
2110+
replicas: 2
21112111
strategy:
21122112
type: RollingUpdate
21132113
rollingUpdate:
2114-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
2114+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
21152115
maxUnavailable: 0 # Never allow pods to be unavailable during updates
21162116
selector:
21172117
matchLabels:
@@ -2258,11 +2258,11 @@ metadata:
22582258
name: operator-controller-controller-manager
22592259
namespace: olmv1-system
22602260
spec:
2261-
replicas: 1
2261+
replicas: 2
22622262
strategy:
22632263
type: RollingUpdate
22642264
rollingUpdate:
2265-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
2265+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
22662266
maxUnavailable: 0 # Never allow pods to be unavailable during updates
22672267
selector:
22682268
matchLabels:

manifests/experimental.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,11 +2032,11 @@ metadata:
20322032
namespace: olmv1-system
20332033
spec:
20342034
minReadySeconds: 5
2035-
replicas: 1
2035+
replicas: 2
20362036
strategy:
20372037
type: RollingUpdate
20382038
rollingUpdate:
2039-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
2039+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
20402040
maxUnavailable: 0 # Never allow pods to be unavailable during updates
20412041
selector:
20422042
matchLabels:
@@ -2170,11 +2170,11 @@ metadata:
21702170
name: operator-controller-controller-manager
21712171
namespace: olmv1-system
21722172
spec:
2173-
replicas: 1
2173+
replicas: 2
21742174
strategy:
21752175
type: RollingUpdate
21762176
rollingUpdate:
2177-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
2177+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
21782178
maxUnavailable: 0 # Never allow pods to be unavailable during updates
21792179
selector:
21802180
matchLabels:

manifests/standard-e2e.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,11 +1795,11 @@ metadata:
17951795
namespace: olmv1-system
17961796
spec:
17971797
minReadySeconds: 5
1798-
replicas: 1
1798+
replicas: 2
17991799
strategy:
18001800
type: RollingUpdate
18011801
rollingUpdate:
1802-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
1802+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
18031803
maxUnavailable: 0 # Never allow pods to be unavailable during updates
18041804
selector:
18051805
matchLabels:
@@ -1945,11 +1945,11 @@ metadata:
19451945
name: operator-controller-controller-manager
19461946
namespace: olmv1-system
19471947
spec:
1948-
replicas: 1
1948+
replicas: 2
19491949
strategy:
19501950
type: RollingUpdate
19511951
rollingUpdate:
1952-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
1952+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
19531953
maxUnavailable: 0 # Never allow pods to be unavailable during updates
19541954
selector:
19551955
matchLabels:

manifests/standard.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,11 +1720,11 @@ metadata:
17201720
namespace: olmv1-system
17211721
spec:
17221722
minReadySeconds: 5
1723-
replicas: 1
1723+
replicas: 2
17241724
strategy:
17251725
type: RollingUpdate
17261726
rollingUpdate:
1727-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
1727+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
17281728
maxUnavailable: 0 # Never allow pods to be unavailable during updates
17291729
selector:
17301730
matchLabels:
@@ -1857,11 +1857,11 @@ metadata:
18571857
name: operator-controller-controller-manager
18581858
namespace: olmv1-system
18591859
spec:
1860-
replicas: 1
1860+
replicas: 2
18611861
strategy:
18621862
type: RollingUpdate
18631863
rollingUpdate:
1864-
maxSurge: 1 # Allow temporary 2 pods (1 + 1) for zero-downtime updates
1864+
maxSurge: 1 # Allow temporary extra pod (e.g., 3 pods with 2 replicas) for zero-downtime updates
18651865
maxUnavailable: 0 # Never allow pods to be unavailable during updates
18661866
selector:
18671867
matchLabels:

test/upgrade-e2e/post_upgrade_test.go

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,23 +45,22 @@ func TestClusterCatalogUnpacking(t *testing.T) {
4545
require.Equal(ct, *managerDeployment.Spec.Replicas, managerDeployment.Status.ReadyReplicas)
4646
}, time.Minute, time.Second)
4747

48-
var managerPod corev1.Pod
49-
t.Log("Waiting for only one controller-manager pod to remain")
48+
t.Log("Waiting for controller-manager pods to match the desired replica count")
5049
require.EventuallyWithT(t, func(ct *assert.CollectT) {
5150
var managerPods corev1.PodList
5251
err := c.List(ctx, &managerPods, client.MatchingLabels(managerLabelSelector))
5352
require.NoError(ct, err)
54-
require.Len(ct, managerPods.Items, 1)
55-
managerPod = managerPods.Items[0]
53+
require.Len(ct, managerPods.Items, int(*managerDeployment.Spec.Replicas))
5654
}, time.Minute, time.Second)
5755

5856
t.Log("Waiting for acquired leader election")
5957
leaderCtx, leaderCancel := context.WithTimeout(ctx, 3*time.Minute)
6058
defer leaderCancel()
61-
leaderSubstrings := []string{"successfully acquired lease"}
62-
leaderElected, err := watchPodLogsForSubstring(leaderCtx, &managerPod, leaderSubstrings...)
59+
60+
// When there are multiple replicas, find the leader pod
61+
managerPod, err := findLeaderPod(leaderCtx, "catalogd")
6362
require.NoError(t, err)
64-
require.True(t, leaderElected)
63+
require.NotNil(t, managerPod)
6564

6665
t.Log("Reading logs to make sure that ClusterCatalog was reconciled by catalogdv1")
6766
logCtx, cancel := context.WithTimeout(ctx, time.Minute)
@@ -70,7 +69,7 @@ func TestClusterCatalogUnpacking(t *testing.T) {
7069
"reconcile ending",
7170
fmt.Sprintf(`ClusterCatalog=%q`, testClusterCatalogName),
7271
}
73-
found, err := watchPodLogsForSubstring(logCtx, &managerPod, substrings...)
72+
found, err := watchPodLogsForSubstring(logCtx, managerPod, substrings...)
7473
require.NoError(t, err)
7574
require.True(t, found)
7675

@@ -115,10 +114,13 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
115114
leaderCtx, leaderCancel := context.WithTimeout(ctx, 3*time.Minute)
116115
defer leaderCancel()
117116

118-
leaderSubstrings := []string{"successfully acquired lease"}
119-
leaderElected, err := watchPodLogsForSubstring(leaderCtx, managerPod, leaderSubstrings...)
117+
// When there are multiple replicas, find the leader pod
118+
leaderPod, err := findLeaderPod(leaderCtx, "operator-controller")
120119
require.NoError(t, err)
121-
require.True(t, leaderElected)
120+
require.NotNil(t, leaderPod)
121+
122+
// Use the leader pod for subsequent operations
123+
managerPod = leaderPod
122124

123125
t.Log("Reading logs to make sure that ClusterExtension was reconciled by operator-controller before we update it")
124126
// Make sure that after we upgrade OLM itself we can still reconcile old objects without any changes
@@ -221,11 +223,48 @@ func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel stri
221223
t.Logf("Ensure the number of remaining pods equal the desired number of replicas (%d)", desiredNumReplicas)
222224
require.EventuallyWithT(t, func(ct *assert.CollectT) {
223225
require.NoError(ct, c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: deploymentLabelSelector}))
224-
require.Len(ct, managerPods.Items, 1)
226+
require.Len(ct, managerPods.Items, int(desiredNumReplicas))
225227
}, time.Minute, time.Second)
226228
return &managerPods.Items[0]
227229
}
228230

231+
// findLeaderPod finds the pod that has acquired the leader lease by checking logs of all pods
232+
func findLeaderPod(ctx context.Context, controlPlaneLabel string) (*corev1.Pod, error) {
233+
deploymentLabelSelector := labels.Set{"app.kubernetes.io/name": controlPlaneLabel}.AsSelector()
234+
235+
var managerPods corev1.PodList
236+
if err := c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: deploymentLabelSelector}); err != nil {
237+
return nil, fmt.Errorf("failed to list pods: %w", err)
238+
}
239+
240+
if len(managerPods.Items) == 0 {
241+
return nil, fmt.Errorf("no pods found for label %s", controlPlaneLabel)
242+
}
243+
244+
// If there's only one pod, it must be the leader
245+
if len(managerPods.Items) == 1 {
246+
return &managerPods.Items[0], nil
247+
}
248+
249+
// Check each pod's logs for leader election message
250+
leaderSubstrings := []string{"successfully acquired lease"}
251+
for i := range managerPods.Items {
252+
pod := &managerPods.Items[i]
253+
254+
// Check if this pod has acquired the lease
255+
isLeader, err := watchPodLogsForSubstring(ctx, pod, leaderSubstrings...)
256+
if err != nil {
257+
// If we can't read logs from this pod, try the next one
258+
continue
259+
}
260+
if isLeader {
261+
return pod, nil
262+
}
263+
}
264+
265+
return nil, fmt.Errorf("no leader pod found among %d pods", len(managerPods.Items))
266+
}
267+
229268
func watchPodLogsForSubstring(ctx context.Context, pod *corev1.Pod, substrings ...string) (bool, error) {
230269
podLogOpts := corev1.PodLogOptions{
231270
Follow: true,

0 commit comments

Comments
 (0)