Skip to content

Commit 8aad837

Browse files
authored
[Feature] Add condition to postpone changing storage class name (#875) (#895)
1 parent 0056d42 commit 8aad837

File tree

14 files changed

+328
-178
lines changed

14 files changed

+328
-178
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
44
- Do not check License V2 on Community images
55
- Add status.members.<group>.
6+
- Don't replace pod immediately when storage class changes
67
- Define MemberReplacementRequired condition
78
- Remove pod immediately when annotation is turned on
89
- (ARM64) Add support for ARM64 enablement

chart/kube-arangodb/templates/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ spec:
105105
{{ if .Values.operator.features.backup }}
106106
- --operator.backup
107107
{{- end }}
108+
{{- if .Values.operator.debug }}
109+
- --mode.single
110+
{{- end }}
108111
{{ if .Values.operator.features.apps }}
109112
- --operator.apps
110113
{{- end }}

pkg/apis/deployment/v1/server_group_spec.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ type ServerGroupSpec struct {
116116
// VolumeClaimTemplate specifies a template for volume claims
117117
VolumeClaimTemplate *core.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
118118
// VolumeResizeMode specified resize mode for pvc
119-
VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"`
120-
VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"`
119+
VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"`
120+
// Deprecated: VolumeAllowShrink allows shrink the volume
121+
VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"`
121122
// AntiAffinity specified additional antiAffinity settings in ArangoDB Pod definitions
122123
AntiAffinity *core.PodAntiAffinity `json:"antiAffinity,omitempty"`
123124
// Affinity specified additional affinity settings in ArangoDB Pod definitions
@@ -682,6 +683,7 @@ func (s ServerGroupSpec) ResetImmutableFields(group ServerGroup, fieldPrefix str
682683
return resetFields
683684
}
684685

686+
// Deprecated: GetVolumeAllowShrink returns true when it is possible to shrink the volume.
685687
func (s ServerGroupSpec) GetVolumeAllowShrink() bool {
686688
if s.VolumeAllowShrink == nil {
687689
return false // Default value

pkg/apis/deployment/v2alpha1/server_group_spec.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ type ServerGroupSpec struct {
116116
// VolumeClaimTemplate specifies a template for volume claims
117117
VolumeClaimTemplate *core.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
118118
// VolumeResizeMode specified resize mode for pvc
119-
VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"`
120-
VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"`
119+
VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"`
120+
// Deprecated: VolumeAllowShrink allows shrink the volume
121+
VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"`
121122
// AntiAffinity specified additional antiAffinity settings in ArangoDB Pod definitions
122123
AntiAffinity *core.PodAntiAffinity `json:"antiAffinity,omitempty"`
123124
// Affinity specified additional affinity settings in ArangoDB Pod definitions
@@ -682,6 +683,7 @@ func (s ServerGroupSpec) ResetImmutableFields(group ServerGroup, fieldPrefix str
682683
return resetFields
683684
}
684685

686+
// Deprecated: GetVolumeAllowShrink returns true when it is possible to shrink the volume.
685687
func (s ServerGroupSpec) GetVolumeAllowShrink() bool {
686688
if s.VolumeAllowShrink == nil {
687689
return false // Default value

pkg/deployment/member/phase_updates.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ func removeMemberConditionsMapFunc(m *api.MemberStatus) {
8686
m.Conditions.Remove(api.ConditionTypeUpdateFailed)
8787
m.Conditions.Remove(api.ConditionTypeCleanedOut)
8888
m.Conditions.Remove(api.ConditionTypeTopologyAware)
89+
m.Conditions.Remove(api.MemberReplacementRequired)
8990

9091
m.RemoveTerminationsBefore(time.Now().Add(-1 * recentTerminationsKeepPeriod))
9192

pkg/deployment/reconcile/action_set_member_condition_v2.go

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,26 +70,44 @@ func (a actionSetMemberConditionV2) Start(ctx context.Context) (bool, error) {
7070
as := a.action.Params[setConditionActionV2KeyStatus] == string(core.ConditionTrue)
7171

7272
if err := a.actionCtx.WithStatusUpdateErr(ctx, func(s *api.DeploymentStatus) (bool, error) {
73-
m, _, ok := s.Members.ElementByID(a.action.MemberID)
74-
if !ok {
73+
var changed bool
74+
75+
s.Members.ForServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
76+
for i := range members {
77+
if members[i].ID == a.action.MemberID {
78+
changed = members[i].Conditions.UpdateWithHash(api.ConditionType(aa), as, ar, am, ah)
79+
return nil
80+
}
81+
}
82+
7583
a.log.Info().Msg("can not set the condition because the member is gone already")
76-
return false, nil
77-
}
84+
return nil
85+
}, a.action.Group)
7886

79-
return m.Conditions.UpdateWithHash(api.ConditionType(aa), as, ar, am, ah), nil
87+
// If not found then false is returned.
88+
return changed, nil
8089
}); err != nil {
8190
a.log.Warn().Err(err).Msgf("unable to update status")
8291
return true, nil
8392
}
8493
case setConditionActionV2KeyTypeRemove:
8594
if err := a.actionCtx.WithStatusUpdateErr(ctx, func(s *api.DeploymentStatus) (bool, error) {
86-
m, _, ok := s.Members.ElementByID(a.action.MemberID)
87-
if !ok {
88-
a.log.Info().Msg("can not set the condition because the member is gone already")
89-
return false, nil
90-
}
91-
92-
return m.Conditions.Remove(api.ConditionType(aa)), nil
95+
var changed bool
96+
97+
s.Members.ForServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error {
98+
for i := range members {
99+
if members[i].ID == a.action.MemberID {
100+
changed = members[i].Conditions.Remove(api.ConditionType(aa))
101+
return nil
102+
}
103+
}
104+
105+
a.log.Info().Msg("can not remove the condition because the member is gone already")
106+
return nil
107+
}, a.action.Group)
108+
109+
// If not found then false is returned.
110+
return changed, nil
93111
}); err != nil {
94112
a.log.Warn().Err(err).Msgf("unable to update status")
95113
return true, nil

pkg/deployment/reconcile/condition_member_recreation.go

Lines changed: 173 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,18 @@ package reconcile
2222

2323
import (
2424
"context"
25+
"fmt"
2526
"strings"
2627

28+
"github.com/rs/zerolog"
29+
core "k8s.io/api/core/v1"
30+
"k8s.io/apimachinery/pkg/api/resource"
31+
32+
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
2733
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
34+
"github.com/arangodb/kube-arangodb/pkg/util"
2835
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
2936
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
30-
"github.com/rs/zerolog"
3137
)
3238

3339
func createMemberRecreationConditionsPlan(ctx context.Context,
@@ -37,17 +43,18 @@ func createMemberRecreationConditionsPlan(ctx context.Context,
3743
var p api.Plan
3844

3945
for _, m := range status.Members.AsList() {
40-
resp, recreate := EvaluateMemberRecreationCondition(ctx, log, apiObject, spec, status, m.Group, m.Member, cachedStatus, context)
46+
message, recreate := EvaluateMemberRecreationCondition(ctx, log, apiObject, spec, status, m.Group, m.Member,
47+
cachedStatus, context, isStorageClassChanged, isVolumeSizeChanged)
4148

4249
if !recreate {
4350
if _, ok := m.Member.Conditions.Get(api.MemberReplacementRequired); ok {
4451
// Unset condition
4552
p = append(p, removeMemberConditionActionV2("Member replacement not required", api.MemberReplacementRequired, m.Group, m.Member.ID))
4653
}
4754
} else {
48-
if c, ok := m.Member.Conditions.Get(api.MemberReplacementRequired); !ok || !c.IsTrue() || c.Message != resp {
55+
if c, ok := m.Member.Conditions.Get(api.MemberReplacementRequired); !ok || !c.IsTrue() || c.Message != message {
4956
// Update condition
50-
p = append(p, updateMemberConditionActionV2("Member replacement required", api.MemberReplacementRequired, m.Group, m.Member.ID, true, "Member replacement required", resp, ""))
57+
p = append(p, updateMemberConditionActionV2("Member replacement required", api.MemberReplacementRequired, m.Group, m.Member.ID, true, "Member replacement required", message, ""))
5158
}
5259
}
5360
}
@@ -59,7 +66,7 @@ type MemberRecreationConditionEvaluator func(ctx context.Context,
5966
log zerolog.Logger, apiObject k8sutil.APIObject,
6067
spec api.DeploymentSpec, status api.DeploymentStatus,
6168
group api.ServerGroup, member api.MemberStatus,
62-
cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (string, bool)
69+
cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (bool, string, error)
6370

6471
func EvaluateMemberRecreationCondition(ctx context.Context,
6572
log zerolog.Logger, apiObject k8sutil.APIObject,
@@ -69,10 +76,170 @@ func EvaluateMemberRecreationCondition(ctx context.Context,
6976
args := make([]string, 0, len(evaluators))
7077

7178
for _, e := range evaluators {
72-
if s, ok := e(ctx, log, apiObject, spec, status, group, member, cachedStatus, context); ok {
79+
ok, s, err := e(ctx, log, apiObject, spec, status, group, member, cachedStatus, context)
80+
if err != nil {
81+
// When one of an evaluator requires pod's replacement then it should be done.
82+
continue
83+
}
84+
85+
if ok {
7386
args = append(args, s)
7487
}
7588
}
7689

7790
return strings.Join(args, ", "), len(args) > 0
7891
}
92+
93+
// isStorageClassChanged returns true and reason when the member should be replaced.
94+
func isStorageClassChanged(_ context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec,
95+
_ api.DeploymentStatus, group api.ServerGroup, member api.MemberStatus,
96+
cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (bool, string, error) {
97+
if spec.GetMode() == api.DeploymentModeSingle {
98+
// Storage cannot be changed in single server deployments.
99+
return false, "", nil
100+
}
101+
102+
if member.Phase != api.MemberPhaseCreated {
103+
// Only make changes when phase is created.
104+
return false, "", nil
105+
}
106+
107+
if member.PersistentVolumeClaimName == "" {
108+
// Plan is irrelevant without PVC.
109+
return false, "", nil
110+
}
111+
112+
groupSpec := spec.GetServerGroupSpec(group)
113+
storageClassName := groupSpec.GetStorageClassName()
114+
if storageClassName == "" {
115+
// A storage class is not set.
116+
return false, "", nil
117+
}
118+
119+
// Check if a storage class changed.
120+
if pvc, ok := cachedStatus.PersistentVolumeClaim(member.PersistentVolumeClaimName); !ok {
121+
log.Warn().Str("role", group.AsRole()).Str("id", member.ID).Msg("Failed to get PVC")
122+
return false, "", fmt.Errorf("failed to get PVC %s", member.PersistentVolumeClaimName)
123+
} else {
124+
pvcClassName := util.StringOrDefault(pvc.Spec.StorageClassName)
125+
if pvcClassName == storageClassName {
126+
// A storage class has not been changed.
127+
return false, "", nil
128+
}
129+
if pvcClassName == "" {
130+
// TODO what to do here?
131+
return false, "", nil
132+
}
133+
}
134+
135+
// From here on, it is known that a storage class has changed.
136+
if group != api.ServerGroupDBServers && group != api.ServerGroupAgents {
137+
// Only agents & DB servers are allowed to change their storage class.
138+
context.CreateEvent(k8sutil.NewCannotChangeStorageClassEvent(apiObject, member.ID, group.AsRole(), "Not supported"))
139+
return false, "", nil
140+
}
141+
142+
// From here on it is known that the member requires replacement, so `true` must be returned.
143+
// If pod does not exist then it will try next time.
144+
if pod, ok := cachedStatus.Pod(member.PodName); ok {
145+
if _, ok := pod.GetAnnotations()[deployment.ArangoDeploymentPodReplaceAnnotation]; !ok {
146+
log.Warn().
147+
Str("pod-name", member.PodName).
148+
Str("server-group", group.AsRole()).
149+
Msgf("try changing a storage class name, but %s", getRequiredReplaceMessage(member.PodName))
150+
// No return here.
151+
}
152+
} else {
153+
return false, "", fmt.Errorf("failed to get pod %s", member.PodName)
154+
}
155+
156+
return true, "Storage class has changed", nil
157+
}
158+
159+
// isVolumeSizeChanged returns true and reason when the member should be replaced.
160+
func isVolumeSizeChanged(_ context.Context, log zerolog.Logger, _ k8sutil.APIObject, spec api.DeploymentSpec,
161+
_ api.DeploymentStatus, group api.ServerGroup, member api.MemberStatus,
162+
cachedStatus inspectorInterface.Inspector, _ PlanBuilderContext) (bool, string, error) {
163+
if spec.GetMode() == api.DeploymentModeSingle {
164+
// Storage cannot be changed in single server deployments.
165+
return false, "", nil
166+
}
167+
168+
if member.Phase != api.MemberPhaseCreated {
169+
// Only make changes when phase is created.
170+
return false, "", nil
171+
}
172+
173+
if member.PersistentVolumeClaimName == "" {
174+
// Plan is irrelevant without PVC.
175+
return false, "", nil
176+
}
177+
178+
pvc, ok := cachedStatus.PersistentVolumeClaim(member.PersistentVolumeClaimName)
179+
if !ok {
180+
log.Warn().
181+
Str("role", group.AsRole()).
182+
Str("id", member.ID).
183+
Msg("Failed to get PVC")
184+
185+
return false, "", fmt.Errorf("failed to get PVC %s", member.PersistentVolumeClaimName)
186+
}
187+
188+
groupSpec := spec.GetServerGroupSpec(group)
189+
ok, volumeSize, requestedSize := shouldVolumeResize(groupSpec, pvc)
190+
if !ok {
191+
return false, "", nil
192+
}
193+
194+
if group != api.ServerGroupDBServers {
195+
log.Error().
196+
Str("pvc-storage-size", volumeSize.String()).
197+
Str("requested-size", requestedSize.String()).
198+
Msgf("Volume size should not shrink, because it is not possible for \"%s\"", group.AsRole())
199+
200+
return false, "", nil
201+
}
202+
203+
// From here on it is known that the member requires replacement, so `true` must be returned.
204+
// If pod does not exist then it will try next time.
205+
if pod, ok := cachedStatus.Pod(member.PodName); ok {
206+
if _, ok := pod.GetAnnotations()[deployment.ArangoDeploymentPodReplaceAnnotation]; !ok {
207+
log.Warn().Str("pod-name", member.PodName).
208+
Msgf("try shrinking volume size, but %s", getRequiredReplaceMessage(member.PodName))
209+
// No return here.
210+
}
211+
} else {
212+
return false, "", fmt.Errorf("failed to get pod %s", member.PodName)
213+
}
214+
215+
return true, "Volume is shrunk", nil
216+
}
217+
218+
// shouldVolumeResize returns false when a volume should not resize.
219+
// Currently, it is only possible to shrink a volume size.
220+
// When return true then the actual and required volume size are returned.
221+
func shouldVolumeResize(groupSpec api.ServerGroupSpec,
222+
pvc *core.PersistentVolumeClaim) (bool, resource.Quantity, resource.Quantity) {
223+
var res core.ResourceList
224+
if groupSpec.HasVolumeClaimTemplate() {
225+
res = groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests
226+
} else {
227+
res = groupSpec.Resources.Requests
228+
}
229+
230+
if requestedSize, ok := res[core.ResourceStorage]; ok {
231+
if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok {
232+
if volumeSize.Cmp(requestedSize) > 0 {
233+
// The actual PVC's volume size is greater than requested size, so it can be shrunk to the requested size.
234+
return true, volumeSize, requestedSize
235+
}
236+
}
237+
}
238+
239+
return false, resource.Quantity{}, resource.Quantity{}
240+
}
241+
242+
func getRequiredReplaceMessage(podName string) string {
243+
return fmt.Sprintf("%s annotation is required to be set on the pod %s",
244+
deployment.ArangoDeploymentPodReplaceAnnotation, podName)
245+
}

pkg/deployment/reconcile/plan_builder_generator.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ func (d *Reconciler) generatePlan(ctx context.Context, cachedStatus inspectorInt
8686
action := result.plan[id]
8787
d.context.CreateEvent(k8sutil.NewPlanAppendEvent(d.context.GetAPIObject(), action.Type.String(), action.Group.AsRole(), action.MemberID, action.Reason))
8888
if r := action.Reason; r != "" {
89-
d.log.Info().Str("Action", action.Type.String()).Str("Role", action.Group.AsRole()).Str("Member", action.MemberID).Str("Type", strings.Title(result.planner.Type())).Msgf(r)
89+
d.log.Info().Str("Action", action.Type.String()).
90+
Str("Role", action.Group.AsRole()).Str("Member", action.MemberID).
91+
Str("Type", strings.Title(result.planner.Type())).Msgf(r)
9092
}
9193
}
9294

pkg/deployment/reconcile/plan_builder_normal.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ func createNormalPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil
7272
ApplySubPlanIfEmpty(createTLSStatusPropagatedFieldUpdate, createCARenewalPlan).
7373
ApplySubPlanIfEmpty(createTLSStatusPropagatedFieldUpdate, createCAAppendPlan).
7474
ApplyIfEmpty(createKeyfileRenewalPlan).
75-
ApplyIfEmpty(createRotateServerStoragePlan).
7675
ApplyIfEmpty(createRotateServerStorageResizePlan).
7776
ApplySubPlanIfEmpty(createTLSStatusPropagatedFieldUpdate, createRotateTLSServerSNIPlan).
7877
ApplyIfEmpty(createRestorePlan).

0 commit comments

Comments
 (0)