-
Notifications
You must be signed in to change notification settings - Fork 161
K8SPSMDB-1296: improve readiness probe #1917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 34 commits
d6e9d6b
09a63a1
40b7674
07f1be6
d75cca1
6f268bf
52df399
5d322e3
363fc77
ce419af
1c3c592
fb8a8ef
e5ae37d
c896cbe
ae932d3
01dedaa
d541d8e
ad260bb
62650d9
160e3ea
5116da4
8fc44b8
5c63a72
0520bbd
5059ea5
6fa92a8
1141926
c0b21b5
cd834df
49886fe
64dfbf7
b0bedb6
c66e663
b9f688b
aa34abc
335aef1
704adc3
854d35b
02b4bc7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,12 +16,9 @@ package healthcheck | |
|
|
||
| import ( | ||
| "context" | ||
| "encoding/json" | ||
|
|
||
| v "github.com/hashicorp/go-version" | ||
| "github.com/pkg/errors" | ||
| "go.mongodb.org/mongo-driver/bson" | ||
| "go.mongodb.org/mongo-driver/bson/primitive" | ||
| logf "sigs.k8s.io/controller-runtime/pkg/log" | ||
|
|
||
| "github.com/percona/percona-server-mongodb-operator/cmd/mongodb-healthcheck/db" | ||
|
|
@@ -32,6 +29,7 @@ var ErrNoReplsetConfigStr = "(NotYetInitialized) no replset config has been rece | |
|
|
||
| func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error) { | ||
| log := logf.FromContext(ctx).WithName("HealthCheckMongosLiveness") | ||
| ctx = logf.IntoContext(ctx, log) | ||
|
|
||
| client, err := db.Dial(ctx, cnf) | ||
| if err != nil { | ||
|
|
@@ -58,6 +56,7 @@ func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error) | |
|
|
||
| func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelaySeconds int64) (_ *mongo.MemberState, err error) { | ||
| log := logf.FromContext(ctx).WithName("HealthCheckMongodLiveness") | ||
| ctx = logf.IntoContext(ctx, log) | ||
|
|
||
| client, err := db.Dial(ctx, cnf) | ||
| if err != nil { | ||
|
|
@@ -74,50 +73,14 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay | |
| return nil, errors.Wrap(err, "get isMaster response") | ||
| } | ||
|
|
||
| buildInfo, err := client.RSBuildInfo(ctx) | ||
| rsStatus, err := client.RSStatus(ctx) | ||
| if err != nil { | ||
| return nil, errors.Wrap(err, "get buildInfo response") | ||
| } | ||
|
|
||
| replSetStatusCommand := bson.D{{Key: "replSetGetStatus", Value: 1}} | ||
| mongoVersion := v.Must(v.NewVersion(buildInfo.Version)) | ||
| if mongoVersion.Compare(v.Must(v.NewVersion("4.2.1"))) < 0 { | ||
| // https://docs.mongodb.com/manual/reference/command/replSetGetStatus/#syntax | ||
| replSetStatusCommand = append(replSetStatusCommand, primitive.E{Key: "initialSync", Value: 1}) | ||
| } | ||
|
|
||
| res := client.Database("admin").RunCommand(ctx, replSetStatusCommand) | ||
| if res.Err() != nil { | ||
| // if we come this far, it means db connection was successful | ||
| // standalone mongod nodes in an unmanaged cluster doesn't need | ||
| // to die before they added to a replset | ||
| if res.Err().Error() == ErrNoReplsetConfigStr { | ||
| if err.Error() == ErrNoReplsetConfigStr { | ||
| state := mongo.MemberStateUnknown | ||
| log.V(1).Info("replSetGetStatus failed", "err", res.Err().Error(), "state", state) | ||
| log.V(1).Info("replSetGetStatus failed", "err", err.Error(), "state", state) | ||
| return &state, nil | ||
| } | ||
| return nil, errors.Wrap(res.Err(), "get replsetGetStatus response") | ||
| } | ||
|
|
||
| // this is a workaround to fix decoding of empty interfaces | ||
| // https://jira.mongodb.org/browse/GODRIVER-988 | ||
| rsStatus := ReplSetStatus{} | ||
| tempResult := bson.M{} | ||
| err = res.Decode(&tempResult) | ||
| if err != nil { | ||
| return nil, errors.Wrap(err, "decode replsetGetStatus response") | ||
| } | ||
|
|
||
| if err == nil { | ||
| result, err := json.Marshal(tempResult) | ||
| if err != nil { | ||
| return nil, errors.Wrap(err, "marshal temp result") | ||
| } | ||
|
|
||
| err = json.Unmarshal(result, &rsStatus) | ||
| if err != nil { | ||
| return nil, errors.Wrap(err, "unmarshal temp result") | ||
| } | ||
| return nil, errors.Wrap(err, "get replSetGetStatus response") | ||
| } | ||
|
|
||
| oplogRs := OplogRs{} | ||
|
|
@@ -156,14 +119,10 @@ type OplogRs struct { | |
| StorageSize int64 `bson:"storageSize" json:"storageSize"` | ||
| } | ||
|
|
||
| type ReplSetStatus struct { | ||
| InitialSyncStatus InitialSyncStatus `bson:"initialSyncStatus" json:"initialSyncStatus"` | ||
| mongo.Status `bson:",inline"` | ||
| } | ||
|
|
||
| type InitialSyncStatus interface{} | ||
|
|
||
| func CheckState(rs ReplSetStatus, startupDelaySeconds int64, oplogSize int64) error { | ||
| func CheckState(rs mongo.Status, startupDelaySeconds int64, oplogSize int64) error { | ||
| if rs.GetSelf() == nil { | ||
| return errors.New("invalid replset status") | ||
|
||
| } | ||
| uptime := rs.GetSelf().Uptime | ||
|
|
||
| switch rs.MyState { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ package healthcheck | |
| import ( | ||
| "context" | ||
| "net" | ||
| "time" | ||
|
|
||
| "github.com/pkg/errors" | ||
| "go.mongodb.org/mongo-driver/bson" | ||
|
|
@@ -27,21 +28,56 @@ import ( | |
| ) | ||
|
|
||
| // MongodReadinessCheck runs a ping on a pmgo.SessionManager to check server readiness | ||
| func MongodReadinessCheck(ctx context.Context, addr string) error { | ||
| func MongodReadinessCheck(ctx context.Context, cnf *db.Config) error { | ||
| log := logf.FromContext(ctx).WithName("MongodReadinessCheck") | ||
| ctx = logf.IntoContext(ctx, log) | ||
|
|
||
| var d net.Dialer | ||
|
|
||
| addr := cnf.Hosts[0] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we ensure that hosts are not empty/nil?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| log.V(1).Info("Connecting to " + addr) | ||
| conn, err := d.DialContext(ctx, "tcp", addr) | ||
| if err != nil { | ||
| return errors.Wrap(err, "dial") | ||
| } | ||
| return conn.Close() | ||
| if err := conn.Close(); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| s, err := func() (*mongo.Status, error) { | ||
| cnf.Timeout = time.Second | ||
| client, err := db.Dial(ctx, cnf) | ||
pooknull marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| return nil, nil | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we swallowing this error?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
| defer func() { | ||
| if derr := client.Disconnect(ctx); derr != nil && err == nil { | ||
| err = errors.Wrap(derr, "failed to disconnect") | ||
| } | ||
pooknull marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }() | ||
| rs, err := client.RSStatus(ctx) | ||
| if err != nil { | ||
| if errors.Is(err, mongo.ErrInvalidReplsetConfig) { | ||
| return nil, nil | ||
| } | ||
| return nil, err | ||
| } | ||
| return &rs, nil | ||
| }() | ||
| if err != nil || s == nil { | ||
| return err | ||
|
||
| } | ||
|
|
||
| if err := CheckState(*s, 0, 0); err != nil { | ||
| return errors.Wrap(err, "check state") | ||
| } | ||
pooknull marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func MongosReadinessCheck(ctx context.Context, cnf *db.Config) (err error) { | ||
| log := logf.FromContext(ctx).WithName("MongosReadinessCheck") | ||
| ctx = logf.IntoContext(ctx, log) | ||
|
|
||
| client, err := db.Dial(ctx, cnf) | ||
| if err != nil { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given that on L126 we are using again
rs.GetSelf, assigning here to a variable, then performing the nil check and then using it in the remaining function is better since that function is looping through the members and it is not needed for every invocation.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
704adc3