Skip to content

Commit c2af35b

Browse files
jsmulrowevergreen
authored andcommitted
SERVER-42299 Upgrade/downgrade for config.chunks and config.tags
1 parent f4c495f commit c2af35b

20 files changed

+1151
-50
lines changed
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/**
2+
* Tests that config.chunks and config.tags documents are correctly modified on FCV
3+
* upgrade/downgrade.
4+
*/
5+
(function() {
6+
"use strict";
7+
8+
load("jstests/libs/parallelTester.js"); // for Thread.
9+
load("jstests/multiVersion/libs/config_chunks_tags_shared.js");
10+
load("jstests/sharding/libs/sharded_transactions_helpers.js");
11+
12+
// Assumes ns has the following chunk layout: [-inf, -50), [-50, 0) on shard0 and [0, inf) on
13+
// shard 1.
14+
function verifyChunkOperationsFailDuringSetFCV(st, ns) {
15+
assert.commandFailedWithCode(st.s.adminCommand({split: ns, middle: {_id: 50}}),
16+
ErrorCodes.ConflictingOperationInProgress);
17+
verifyChunkDistribution(st, ns, [2, 1]);
18+
19+
assert.commandFailedWithCode(
20+
st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: st.shard0.shardName}),
21+
ErrorCodes.ConflictingOperationInProgress);
22+
verifyChunkDistribution(st, ns, [2, 1]);
23+
24+
assert.commandFailedWithCode(
25+
st.s.adminCommand({mergeChunks: ns, bounds: [{_id: MinKey}, {_id: 0}]}),
26+
ErrorCodes.ConflictingOperationInProgress);
27+
verifyChunkDistribution(st, ns, [2, 1]);
28+
}
29+
30+
// Assumes shard0 is in zone0 which contains [-inf, 0) and is not in zone1.
31+
function verifyZoneOperationsSucceedDuringSetFCV(st, ns) {
32+
assert.commandWorked(
33+
st.s.adminCommand({updateZoneKeyRange: ns, min: {_id: MinKey}, max: {_id: 0}, zone: null}));
34+
assert.commandWorked(st.s.adminCommand(
35+
{updateZoneKeyRange: ns, min: {_id: MinKey}, max: {_id: 0}, zone: "zone0"}));
36+
37+
assert.commandWorked(st.s.adminCommand({addShardToZone: st.shard0.shardName, zone: "zone1"}));
38+
assert.commandWorked(
39+
st.s.adminCommand({removeShardFromZone: st.shard0.shardName, zone: "zone1"}));
40+
}
41+
42+
const dbName = "test";
43+
const chunkNs = dbName + ".chunk_coll";
44+
const zoneNs = dbName + ".zone_coll";
45+
46+
const st = new ShardingTest({shards: 2});
47+
const configPrimary = st.configRS.getPrimary();
48+
49+
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
50+
st.ensurePrimaryShard(dbName, st.shard0.shardName);
51+
52+
setUpCollectionForChunksTesting(st, chunkNs);
53+
setUpCollectionForZoneTesting(st, zoneNs);
54+
55+
//
56+
// Verify chunk and tag documents are updated by setFeatureCompatibilityVersion.
57+
//
58+
59+
checkFCV(configPrimary.getDB("admin"), latestFCV);
60+
61+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: true});
62+
63+
jsTestLog("Downgrading FCV to last stable");
64+
assert.commandWorked(st.s.adminCommand({setFeatureCompatibilityVersion: lastStableFCV}));
65+
checkFCV(configPrimary.getDB("admin"), lastStableFCV);
66+
67+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
68+
69+
jsTestLog("Upgrading FCV to latest");
70+
assert.commandWorked(st.s.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
71+
checkFCV(configPrimary.getDB("admin"), latestFCV);
72+
73+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: true});
74+
75+
//
76+
// Verify operations during setFeatureCompabitilityVersion use the correct format and that setFCV
77+
// blocks behind in-progress shard collections on shard servers.
78+
//
79+
80+
function runInProgressSetFCVTest(st, {initialFCV, desiredFCV}) {
81+
const pauseInSetFCVFailPointName = desiredFCV === lastStableFCV
82+
? "pauseBeforeDowngradingConfigMetadata"
83+
: "pauseBeforeUpgradingConfigMetadata";
84+
85+
clearRawMongoProgramOutput();
86+
checkFCV(configPrimary.getDB("admin"), initialFCV);
87+
88+
// Pause setFCV to test the in-progress states.
89+
assert.commandWorked(configPrimary.adminCommand(
90+
{configureFailPoint: pauseInSetFCVFailPointName, mode: "alwaysOn"}));
91+
92+
// Start and pause a shard collection, and verify that the setFCV blocks behind it.
93+
const shardCollDuringSetFCV = new Thread((host, ns) => {
94+
const mongosConn = new Mongo(host);
95+
return mongosConn.adminCommand({shardCollection: ns, key: {_id: 1}});
96+
}, st.s.host, dbName + ".setFCVTo" + desiredFCV);
97+
assert.commandWorked(st.rs0.getPrimary().adminCommand(
98+
{configureFailPoint: "pauseShardCollectionBeforeReturning", mode: "alwaysOn"}));
99+
shardCollDuringSetFCV.start();
100+
waitForFailpoint("Hit pauseShardCollectionBeforeReturning", 1 /* numTimes */);
101+
102+
// Assert setFCV can't hit the failpoint until the shard collection completes.
103+
const changeFCV = new Thread((host, fcv) => {
104+
const mongosConn = new Mongo(host);
105+
return mongosConn.adminCommand({setFeatureCompatibilityVersion: fcv});
106+
}, st.s.host, desiredFCV);
107+
changeFCV.start();
108+
assert.throws(() => {
109+
waitForFailpoint("Hit " + pauseInSetFCVFailPointName, 1 /* numTimes */, 3000 /* timeout */);
110+
});
111+
112+
// Unpause the shard collection and wait for setFCV to reach the failpoint.
113+
assert.commandWorked(st.rs0.getPrimary().adminCommand(
114+
{configureFailPoint: "pauseShardCollectionBeforeReturning", mode: "off"}));
115+
shardCollDuringSetFCV.join();
116+
waitForFailpoint("Hit " + pauseInSetFCVFailPointName, 1 /* numTimes */);
117+
118+
// Verify behavior while setFCV is in progress.
119+
verifyChunkOperationsFailDuringSetFCV(st, chunkNs);
120+
verifyZoneOperationsSucceedDuringSetFCV(st, zoneNs);
121+
testCRUDOperations(st, chunkNs);
122+
testCRUDOperations(st, zoneNs);
123+
124+
// A collection can still be sharded during setFCV and should write chunks correctly.
125+
verifyInitialChunks(
126+
st, dbName + ".newCollDuringFCV" + desiredFCV, {expectNewFormat: desiredFCV === latestFCV});
127+
128+
// Unset the setFCV failpoint and allow setFCV to finish.
129+
assert.commandWorked(
130+
configPrimary.adminCommand({configureFailPoint: pauseInSetFCVFailPointName, mode: "off"}));
131+
changeFCV.join();
132+
assert.commandWorked(changeFCV.returnData());
133+
checkFCV(configPrimary.getDB("admin"), desiredFCV);
134+
135+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: desiredFCV === latestFCV});
136+
}
137+
138+
runInProgressSetFCVTest(st, {initialFCV: latestFCV, desiredFCV: lastStableFCV});
139+
runInProgressSetFCVTest(st, {initialFCV: lastStableFCV, desiredFCV: latestFCV});
140+
141+
st.stop();
142+
}());
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/**
2+
* Tests upgrading a cluster from last stable to the latest version and downgrading it back to last
3+
* stable, verifying the behavior of chunk and zone operations throughout the process.
4+
*/
5+
6+
// Checking UUID consistency uses cached connections, which are not valid across restarts or
7+
// stepdowns.
8+
TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
9+
10+
(function() {
11+
"use strict";
12+
13+
load("jstests/multiVersion/libs/config_chunks_tags_shared.js");
14+
load("jstests/multiVersion/libs/multi_cluster.js");
15+
load("jstests/multiVersion/libs/multi_rs.js");
16+
17+
// Runs commands on the config server that will use its RSM to target both shard primaries until
18+
// they succeed.
19+
function waitForConfigServerShardRSMRetarget(st) {
20+
assert.soonNoExcept(() => {
21+
assert.commandWorked(st.s.getDB("unrelated_db").unrelated_coll.insert({x: 1}));
22+
st.ensurePrimaryShard("unrelated_db", st.shard0.shardName);
23+
st.ensurePrimaryShard("unrelated_db", st.shard1.shardName);
24+
st.ensurePrimaryShard("unrelated_db", st.shard0.shardName);
25+
assert.commandWorked(st.s.getDB("unrelated_db").dropDatabase());
26+
return true;
27+
});
28+
}
29+
30+
const dbName = "test";
31+
const chunkNs = dbName + ".chunk_coll";
32+
const zoneNs = dbName + ".zone_coll";
33+
34+
// Start a cluster with two shards at the last stable version and a sharding enabled db.
35+
const st = new ShardingTest({
36+
shards: 2,
37+
other: {
38+
mongosOptions: {binVersion: "last-stable"},
39+
configOptions: {binVersion: "last-stable"},
40+
rsOptions: {binVersion: "last-stable"},
41+
},
42+
rs: {nodes: 3} // Use 3 node replica sets to allow binary changes with no downtime.
43+
});
44+
checkFCV(st.configRS.getPrimary().getDB("admin"), lastStableFCV);
45+
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
46+
st.ensurePrimaryShard(dbName, st.shard0.shardName);
47+
48+
// Set up sharded collections for targeted chunk and zone operation testing.
49+
setUpCollectionForChunksTesting(st, chunkNs);
50+
setUpCollectionForZoneTesting(st, zoneNs);
51+
52+
// Set up another sharded collection on a different database to verify chunks and zones are updated
53+
// for every sharded collection.
54+
setUpExtraShardedCollections(st, "extra_db" /* dbName */);
55+
56+
//
57+
// Upgrade back to the latest version.
58+
//
59+
60+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
61+
62+
jsTestLog("Upgrading config servers.");
63+
st.upgradeCluster("latest", {upgradeConfigs: true, upgradeMongos: false, upgradeShards: false});
64+
65+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
66+
67+
jsTestLog("Upgrading shard servers.");
68+
st.upgradeCluster("latest", {upgradeConfigs: false, upgradeMongos: false, upgradeShards: true});
69+
70+
// Manually moving a chunk will use the config server's replica set monitor to target the primary of
71+
// the source shard. After upgrading the shard servers above, this RSM may be stale, so run
72+
// operations through the config server that will use the same RSM so it picks up the new primary.
73+
waitForConfigServerShardRSMRetarget(st);
74+
75+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
76+
77+
jsTestLog("Upgrading mongos servers.");
78+
st.upgradeCluster("latest", {upgradeConfigs: false, upgradeMongos: true, upgradeShards: false});
79+
checkFCV(st.configRS.getPrimary().getDB("admin"), lastStableFCV);
80+
81+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
82+
83+
jsTestLog("Upgrade feature compatibility version to latest");
84+
assert.commandWorked(st.s.getDB("admin").runCommand({setFeatureCompatibilityVersion: latestFCV}));
85+
checkFCV(st.configRS.getPrimary().getDB("admin"), latestFCV);
86+
87+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: true});
88+
89+
//
90+
// Downgrade back to the last stable version.
91+
//
92+
93+
jsTestLog("Downgrade feature compatibility version to last stable");
94+
assert.commandWorked(st.s.adminCommand({setFeatureCompatibilityVersion: lastStableFCV}));
95+
checkFCV(st.configRS.getPrimary().getDB("admin"), lastStableFCV);
96+
97+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
98+
99+
jsTestLog("Downgrading mongos servers.");
100+
st.upgradeCluster("last-stable",
101+
{upgradeConfigs: false, upgradeMongos: true, upgradeShards: false});
102+
103+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
104+
105+
jsTestLog("Downgrading shard servers.");
106+
st.upgradeCluster("last-stable",
107+
{upgradeConfigs: false, upgradeMongos: false, upgradeShards: true});
108+
109+
// Manually moving a chunk will use the config server's replica set monitor to target the primary of
110+
// the source shard. After upgrading the shard servers above, this RSM may be stale, so run
111+
// operations through the config server that will use the same RSM so it picks up the new primary.
112+
waitForConfigServerShardRSMRetarget(st);
113+
114+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
115+
116+
jsTestLog("Downgrading config servers.");
117+
st.upgradeCluster("last-stable",
118+
{upgradeConfigs: true, upgradeMongos: false, upgradeShards: false});
119+
checkFCV(st.configRS.getPrimary().getDB("admin"), lastStableFCV);
120+
121+
verifyChunksAndTags(st, dbName, chunkNs, zoneNs, {expectNewFormat: false});
122+
123+
st.stop();
124+
})();

0 commit comments

Comments
 (0)