Skip to content

Commit 10d56d5

Browse files
pedjakPer Goncalves da Silva
authored andcommitted
WIP Introduce Progressing condition
1 parent b378a52 commit 10d56d5

File tree

5 files changed

+117
-76
lines changed

5 files changed

+117
-76
lines changed

api/v1/clusterextensionrevision_types.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ const (
4141
ClusterExtensionRevisionReasonIncomplete = "Incomplete"
4242
ClusterExtensionRevisionReasonProgressing = "Progressing"
4343
ClusterExtensionRevisionReasonArchived = "Archived"
44-
ClusterExtensionRevisionReasonRolloutInProgress = "RolloutInProgress"
44+
ClusterExtensionRevisionReasonRolloutInProgress = "RollingOut"
45+
ClusterExtensionRevisionReasonRolloutError = "RolloutError"
46+
ClusterExtensionRevisionReasonRolledOut = "RolledOut"
4547
)
4648

4749
// ClusterExtensionRevisionSpec defines the desired state of ClusterExtensionRevision.
@@ -152,6 +154,7 @@ type ClusterExtensionRevisionStatus struct {
152154

153155
// ClusterExtensionRevision is the Schema for the clusterextensionrevisions API
154156
// +kubebuilder:printcolumn:name="Available",type=string,JSONPath=`.status.conditions[?(@.type=='Available')].status`
157+
// +kubebuilder:printcolumn:name="Progressing",type=string,JSONPath=`.status.conditions[?(@.type=='Progressing')].status`
155158
// +kubebuilder:printcolumn:name=Age,type=date,JSONPath=`.metadata.creationTimestamp`
156159
type ClusterExtensionRevision struct {
157160
metav1.TypeMeta `json:",inline"`

helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ spec:
1919
- jsonPath: .status.conditions[?(@.type=='Available')].status
2020
name: Available
2121
type: string
22+
- jsonPath: .status.conditions[?(@.type=='Progressing')].status
23+
name: Progressing
24+
type: string
2225
- jsonPath: .metadata.creationTimestamp
2326
name: Age
2427
type: date

internal/operator-controller/controllers/clusterextensionrevision_controller.go

Lines changed: 104 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -126,25 +126,29 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
126126
// Reconcile
127127
//
128128
if err := c.ensureFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil {
129-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
130-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
131-
Status: metav1.ConditionFalse,
132-
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
133-
Message: err.Error(),
134-
ObservedGeneration: rev.Generation,
135-
})
136129
return ctrl.Result{}, fmt.Errorf("error ensuring teardown finalizer: %v", err)
137130
}
138131

139-
if err := c.establishWatch(ctx, rev, revision); err != nil {
132+
inRollout := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) == nil
133+
if inRollout {
134+
if err := c.establishWatch(ctx, rev, revision); err != nil {
135+
werr := fmt.Errorf("establish watch: %v", err)
136+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
137+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
138+
Status: metav1.ConditionTrue,
139+
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
140+
Message: werr.Error(),
141+
ObservedGeneration: rev.Generation,
142+
})
143+
return ctrl.Result{}, werr
144+
}
140145
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
141-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
142-
Status: metav1.ConditionFalse,
143-
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
144-
Message: err.Error(),
146+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
147+
Status: metav1.ConditionTrue,
148+
Reason: ocv1.ClusterExtensionRevisionReasonRolloutInProgress,
149+
Message: "Revision is being rolled out.",
145150
ObservedGeneration: rev.Generation,
146151
})
147-
return ctrl.Result{}, fmt.Errorf("establish watch: %v", err)
148152
}
149153

150154
rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...)
@@ -154,13 +158,24 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
154158
} else {
155159
l.Error(err, "revision reconcile failed")
156160
}
157-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
158-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
159-
Status: metav1.ConditionFalse,
160-
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
161-
Message: err.Error(),
162-
ObservedGeneration: rev.Generation,
163-
})
161+
if inRollout {
162+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
163+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
164+
Status: metav1.ConditionTrue,
165+
Reason: ocv1.ClusterExtensionRevisionReasonRolloutError,
166+
Message: err.Error(),
167+
ObservedGeneration: rev.Generation,
168+
})
169+
} else {
170+
// it is a probably transient error, and we do not know if the revision is available or not
171+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
172+
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
173+
Status: metav1.ConditionUnknown,
174+
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
175+
Message: err.Error(),
176+
ObservedGeneration: rev.Generation,
177+
})
178+
}
164179
return ctrl.Result{}, fmt.Errorf("revision reconcile: %v", err)
165180
}
166181
// Log detailed reconcile reports only in debug mode (V(1)) to reduce verbosity.
@@ -171,32 +186,51 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
171186
if verr := rres.GetValidationError(); verr != nil {
172187
l.Error(fmt.Errorf("%w", verr), "preflight validation failed, retrying after 10s")
173188

174-
// if we take Availability as strictly being "all availability probes pass"
175-
// we should probably be at an Unknown state (or not posted it at all here)
176-
// and post this up in the Progressing condition as False with a failed rollout reason / message
177-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
178-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
179-
Status: metav1.ConditionFalse,
180-
Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure,
181-
Message: fmt.Sprintf("revision validation error: %s", verr),
182-
ObservedGeneration: rev.Generation,
183-
})
189+
if inRollout {
190+
// given that we retry, we are not going to keep Progressing condition True
191+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
192+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
193+
Status: metav1.ConditionTrue,
194+
Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure,
195+
Message: fmt.Sprintf("revision validation error: %s", verr),
196+
ObservedGeneration: rev.Generation,
197+
})
198+
} else {
199+
// it is a probably transient error, and we do not know if the revision is available or not
200+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
201+
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
202+
Status: metav1.ConditionUnknown,
203+
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
204+
Message: fmt.Sprintf("revision validation error: %s", verr),
205+
ObservedGeneration: rev.Generation,
206+
})
207+
}
184208
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
185209
}
186210

187211
for i, pres := range rres.GetPhases() {
188212
if verr := pres.GetValidationError(); verr != nil {
189213
l.Error(fmt.Errorf("%w", verr), "phase preflight validation failed, retrying after 10s", "phase", i)
190214

191-
// we probably want to either eat this error and leave Progressing as True with a rolling out reason and implement timeout
192-
// or surface this error through a Degraded-type condition that can flap as roll out continues
193-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
194-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
195-
Status: metav1.ConditionFalse,
196-
Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError,
197-
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
198-
ObservedGeneration: rev.Generation,
199-
})
215+
if inRollout {
216+
// given that we retry, we are not going to keep Progressing condition True
217+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
218+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
219+
Status: metav1.ConditionTrue,
220+
Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError,
221+
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
222+
ObservedGeneration: rev.Generation,
223+
})
224+
} else {
225+
// it is a probably transient error, and we do not know if the revision is available or not
226+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
227+
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
228+
Status: metav1.ConditionUnknown,
229+
Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError,
230+
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
231+
ObservedGeneration: rev.Generation,
232+
})
233+
}
200234
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
201235
}
202236

@@ -211,19 +245,32 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
211245
l.Error(fmt.Errorf("object collision detected"), "object collision, retrying after 10s", "phase", i, "collisions", collidingObjs)
212246
// collisions are probably stickier than phase roll out probe failures - so we'd probably want to set
213247
// Progressing to false here due to the collision
214-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
215-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
216-
Status: metav1.ConditionFalse,
217-
Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions,
218-
Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")),
219-
ObservedGeneration: rev.Generation,
220-
})
221-
222-
// idk if we want to return yet or check the Availability probes on a best-effort basis
223-
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
248+
if inRollout {
249+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
250+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
251+
Status: metav1.ConditionFalse,
252+
Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions,
253+
Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")),
254+
ObservedGeneration: rev.Generation,
255+
})
256+
257+
// not sure if we want to retry here - collisions are probably not transient?
258+
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
259+
}
224260
}
225261
}
226262

263+
if !rres.InTransistion() {
264+
// we have rolled out all objects in all phases, not interested in probes here
265+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
266+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
267+
Status: metav1.ConditionFalse,
268+
Reason: ocv1.ClusterExtensionRevisionReasonRolledOut,
269+
Message: "Revision is rolled out.",
270+
ObservedGeneration: rev.Generation,
271+
})
272+
}
273+
227274
//nolint:nestif
228275
if rres.IsComplete() {
229276
// Archive other revisions.
@@ -237,30 +284,26 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
237284
}
238285
}
239286

240-
// Report status.
241-
// We probably want to set Progressing to False here with a rollout success reason and message
242287
// It would be good to understand from Nico how we can distinguish between progression and availability probes
243288
// and how to best check that all Availability probes are passing
244289
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
245290
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
246291
Status: metav1.ConditionTrue,
247292
Reason: ocv1.ClusterExtensionRevisionReasonAvailable,
248-
Message: "Object is available and passes all probes.",
293+
Message: "Objects are available and passes all probes.",
249294
ObservedGeneration: rev.Generation,
250295
})
251296

252297
// We'll probably only want to remove this once we are done updating the ClusterExtension conditions
253298
// as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now
254299
// that's fine.
255-
if !meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) {
256-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
257-
Type: ocv1.ClusterExtensionRevisionTypeSucceeded,
258-
Status: metav1.ConditionTrue,
259-
Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess,
260-
Message: "Revision succeeded rolling out.",
261-
ObservedGeneration: rev.Generation,
262-
})
263-
}
300+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
301+
Type: ocv1.ClusterExtensionRevisionTypeSucceeded,
302+
Status: metav1.ConditionTrue,
303+
Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess,
304+
Message: "Revision succeeded rolling out.",
305+
ObservedGeneration: rev.Generation,
306+
})
264307
} else {
265308
var probeFailureMsgs []string
266309
for _, pres := range rres.GetPhases() {
@@ -296,7 +339,6 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
296339
ObservedGeneration: rev.Generation,
297340
})
298341
} else {
299-
// either to nothing or set the Progressing condition to True with rolling out reason / message
300342
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
301343
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
302344
Status: metav1.ConditionFalse,
@@ -306,19 +348,6 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
306348
})
307349
}
308350
}
309-
if rres.InTransistion() {
310-
// seems to be the right thing XD
311-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
312-
Type: ocv1.TypeProgressing,
313-
Status: metav1.ConditionTrue,
314-
Reason: ocv1.ClusterExtensionRevisionReasonProgressing,
315-
Message: "Rollout in progress.",
316-
ObservedGeneration: rev.Generation,
317-
})
318-
} else {
319-
// we may not want to remove it but rather set it to False?
320-
meta.RemoveStatusCondition(&rev.Status.Conditions, ocv1.TypeProgressing)
321-
}
322351

323352
return ctrl.Result{}, nil
324353
}

manifests/experimental-e2e.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,9 @@ spec:
610610
- jsonPath: .status.conditions[?(@.type=='Available')].status
611611
name: Available
612612
type: string
613+
- jsonPath: .status.conditions[?(@.type=='Progressing')].status
614+
name: Progressing
615+
type: string
613616
- jsonPath: .metadata.creationTimestamp
614617
name: Age
615618
type: date

manifests/experimental.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,9 @@ spec:
575575
- jsonPath: .status.conditions[?(@.type=='Available')].status
576576
name: Available
577577
type: string
578+
- jsonPath: .status.conditions[?(@.type=='Progressing')].status
579+
name: Progressing
580+
type: string
578581
- jsonPath: .metadata.creationTimestamp
579582
name: Age
580583
type: date

0 commit comments

Comments
 (0)