...
Spinoff from SERVER-74679 I tried reproducing this, and then when trying to fix it by renaming it back (which I think succeeded, even though config.shards no longer exists), I may have won a race condition I still couldn't do anything on the cluster, so I then tried to rename the collection on the csrs (functioning as both csrs and shard), which succeeded. Now I'm now in a deadlock with myself Timeline (all logs from the csrs-but-also-shard): First rename from "config.shards" -> "config.foo2". As far as I can tell, this completes {"t":{"$date":"2023-03-08T13:18:46.716+11:00"},"s":"I", "c":"SHARDING", "id":22080, "ctx":"ShardingDDLCoordinator-3","msg":"About to log metadata event","attr":{"namespace":"changelog","event":{"_id":"M-K4F3614CKP:27019-2023-03-08T13:18:46.716+11:00-6407f0860b0873873c5224e5","server":"M-K4F3614CKP:27019","shard":"config","clientAddr":"","time":{"$date":"2023-03-08T02:18:46.716Z"},"what":"renameCollection.start","ns":"config.shards","details":{"source":"config.shards","destination":"config.foo2"}}}} ... {"t":{"$date":"2023-03-08T13:18:46.814+11:00"},"s":"I", "c":"SHARDING", "id":5515106, "ctx":"RenameCollectionParticipantService-0","msg":"Collection locally renamed, waiting for CRUD to be unblocked","attr":{"fromNs":"config.shards","toNs":"config.foo2"}} {"t":{"$date":"2023-03-08T13:18:46.837+11:00"},"s":"I", "c":"SHARDING", "id":22080, "ctx":"conn44","msg":"About to log metadata event","attr":{"namespace":"changelog","event":{"_id":"M-K4F3614CKP:27019-2023-03-08T13:18:46.836+11:00-6407f0860b0873873c5224ff","server":"M-K4F3614CKP:27019","shard":"config","clientAddr":"","time":{"$date":"2023-03-08T02:18:46.836Z"},"what":"renameCollection.metadata","ns":"admin.shards to config.foo2 : dropped target collection.","details":{}}}} ... {"t":{"$date":"2023-03-08T13:18:46.919+11:00"},"s":"I", "c":"SHARDING", "id":5460504, "ctx":"ShardingDDLCoordinator-1","msg":"Collection renamed","attr":{"namespace":"config.shards"}} {"t":{"$date":"2023-03-08T13:18:46.920+11:00"},"s":"I", "c":"SHARDING", "id":5565601, "ctx":"ShardingDDLCoordinator-2","msg":"Releasing sharding DDL coordinator","attr":{"coordinatorId":{"namespace":"config.shards","operationType":"renameCollection"}}} {"t":{"$date":"2023-03-08T13:18:46.932+11:00"},"s":"I", "c":"SHARDING", "id":6855302, "ctx":"ShardingDDLCoordinator-2","msg":"Released DDL lock","attr":{"resource":"config.foo2","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:18:46.933+11:00"},"s":"I", "c":"SHARDING", "id":6855302, "ctx":"ShardingDDLCoordinator-2","msg":"Released DDL lock","attr":{"resource":"config.shards","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:18:46.933+11:00"},"s":"I", "c":"SHARDING", "id":6855302, "ctx":"ShardingDDLCoordinator-2","msg":"Released DDL lock","attr":{"resource":"config","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:18:46.934+11:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn69","msg":"Slow query","attr":{"type":"command","ns":"config.$cmd","appName":"MongoDB Shell","command":{"_shardsvrRenameCollection":"shards","to":"config.foo2","dropTarget":false,"stayTemp":false,"allowEncryptedCollectionRename":true,"writeConcern":{"w":"majority","wtimeout":60000},"lsid":{"id":{"$uuid":"c37d658d-be88-4ffc-a4ea-d5ec08523f4a"},"uid":{"$binary":{"base64":"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=","subType":"0"}}},"$replData":1,"$clusterTime":{"clusterTime":{"$timestamp":{"t":1678241921,"i":7}},"signature":{"hash":{"$binary":{"base64":"AAAAAAAAAAAAAAAAAAAAAAAAAAA=","subType":"0"}},"keyId":0}},"$configTime":{"$timestamp":{"t":1678241921,"i":7}},"$topologyTime":{"$timestamp":{"t":1678168305,"i":2}},"$client":{"application":{"name":"MongoDB Shell"},"driver":{"name":"MongoDB Internal Client","version":"7.0.0-alpha-538-g7cec1b7"},"os":{"type":"Darwin","name":"Mac OS X","architecture":"arm64","version":"21.6.0"},"mongos":{"host":"M-K4F3614CKP:27017","client":"127.0.0.1:52330","version":"7.0.0-alpha-538-g7cec1b7"}},"mayBypassWriteBlocking":false,"$db":"config"},"numYields":0,"reslen":515,"locks":{"Mutex":{"acquireCount":{"r":1}}},"writeConcern":{"w":"majority","wtimeout":60000,"provenance":"clientSupplied"},"remote":"127.0.0.1:62699","protocol":"op_msg","durationMillis":243}} Renaming it back "config.foo2" -> "config.shards" , still via the mongos (which I think did not succeed) {"t":{"$date":"2023-03-08T13:19:14.864+11:00"},"s":"I", "c":"SHARDING", "id":5390510, "ctx":"conn69","msg":"Constructing new sharding DDL coordinator","attr":{"coordinatorDoc":{"_id":{"namespace":"config.foo2","operationType":"renameCollection"},"recoveredFromDisk":false,"forwardableOpMetadata":{"mayBypassWriteBlocking":false}}}} {"t":{"$date":"2023-03-08T13:19:14.865+11:00"},"s":"I", "c":"SHARDING", "id":6855301, "ctx":"ShardingDDLCoordinator-1","msg":"Acquired DDL lock","attr":{"resource":"config","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:19:14.866+11:00"},"s":"I", "c":"SHARDING", "id":6855301, "ctx":"ShardingDDLCoordinator-1","msg":"Acquired DDL lock","attr":{"resource":"config.foo2","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:19:14.869+11:00"},"s":"I", "c":"SHARDING", "id":6855301, "ctx":"ShardingDDLCoordinator-2","msg":"Acquired DDL lock","attr":{"resource":"config.shards","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:19:14.884+11:00"},"s":"I", "c":"SHARDING", "id":22080, "ctx":"ShardingDDLCoordinator-3","msg":"About to log metadata event","attr":{"namespace":"changelog","event":{"_id":"M-K4F3614CKP:27019-2023-03-08T13:19:14.884+11:00-6407f0a20b0873873c5225ae","server":"M-K4F3614CKP:27019","shard":"config","clientAddr":"","time":{"$date":"2023-03-08T02:19:14.884Z"},"what":"renameCollection.start","ns":"config.foo2","details":{"source":"config.foo2","destination":"config.shards"}}}} {"t":{"$date":"2023-03-08T13:19:14.912+11:00"},"s":"I", "c":"SHARDING", "id":22080, "ctx":"conn44","msg":"About to log metadata event","attr":{"namespace":"changelog","event":{"_id":"M-K4F3614CKP:27019-2023-03-08T13:19:14.911+11:00-6407f0a20b0873873c5225b8","server":"M-K4F3614CKP:27019","shard":"config","clientAddr":"","time":{"$date":"2023-03-08T02:19:14.911Z"},"what":"renameCollection.metadata","ns":"admin.foo2 to config.shards : dropped target collection.","details":{}}}} {"t":{"$date":"2023-03-08T13:19:14.937+11:00"},"s":"I", "c":"SH_REFR", "id":4619902, "ctx":"CatalogCache-19","msg":"Collection has found to be unsharded after refresh","attr":{"namespace":"config.shards","durationMillis":0}} {"t":{"$date":"2023-03-08T13:19:14.938+11:00"},"s":"I", "c":"SHARDING", "id":22080, "ctx":"ShardingDDLCoordinator-2","msg":"About to log metadata event","attr":{"namespace":"changelog","event":{"_id":"M-K4F3614CKP:27019-2023-03-08T13:19:14.938+11:00-6407f0a20b0873873c5225c1","server":"M-K4F3614CKP:27019","shard":"config","clientAddr":"","time":{"$date":"2023-03-08T02:19:14.938Z"},"what":"renameCollection.end","ns":"config.foo2","details":{"source":"config.foo2","destination":"config.shards"}}}} {"t":{"$date":"2023-03-08T13:19:14.944+11:00"},"s":"I", "c":"SHARDING", "id":5460504, "ctx":"ShardingDDLCoordinator-2","msg":"Collection renamed","attr":{"namespace":"config.foo2"}} {"t":{"$date":"2023-03-08T13:19:14.945+11:00"},"s":"I", "c":"SHARDING", "id":5565601, "ctx":"ShardingDDLCoordinator-1","msg":"Releasing sharding DDL coordinator","attr":{"coordinatorId":{"namespace":"config.foo2","operationType":"renameCollection"}}} {"t":{"$date":"2023-03-08T13:19:14.955+11:00"},"s":"I", "c":"SHARDING", "id":6855302, "ctx":"ShardingDDLCoordinator-1","msg":"Released DDL lock","attr":{"resource":"config.shards","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:19:14.956+11:00"},"s":"I", "c":"SHARDING", "id":6855302, "ctx":"ShardingDDLCoordinator-1","msg":"Released DDL lock","attr":{"resource":"config.foo2","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:19:14.957+11:00"},"s":"I", "c":"SHARDING", "id":6855302, "ctx":"ShardingDDLCoordinator-1","msg":"Released DDL lock","attr":{"resource":"config","reason":"renameCollection"}} {"t":{"$date":"2023-03-08T13:19:22.512+11:00"},"s":"W", "c":"SHARDING", "id":21852, "ctx":"Balancer","msg":"Unable to enforce zone range policy for collection","attr":{"namespace":"test.foo","error":{"code":70,"codeName":"ShardNotFound","errmsg":"The primary shard for database test does not exist :: caused by :: Shard shard2 not found"}}} Attempt to redo the "config.foo2" -> "config.shards" this time directly on the csrs-but-also-shard {"t":{"$date":"2023-03-08T13:21:56.822+11:00"},"s":"I", "c":"NETWORK", "id":22943, "ctx":"listener","msg":"Connection accepted","attr":{"remote":"127.0.0.1:63567","uuid":{"uuid":{"$uuid":"2709bddd-0ce5-49e1-9450-4313e13849fa"}},"connectionId":112,"connectionCount":14}} {"t":{"$date":"2023-03-08T13:21:56.822+11:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn112","msg":"client metadata","attr":{"remote":"127.0.0.1:63567","client":"conn112","doc":{"application":{"name":"MongoDB Shell"},"driver":{"name":"MongoDB Internal Client","version":"7.0.0-alpha-538-g7cec1b7"},"os":{"type":"Darwin","name":"Mac OS X","architecture":"arm64","version":"21.6.0"}}}} ... {"t":{"$date":"2023-03-08T13:22:15.248+11:00"},"s":"I", "c":"COMMAND", "id":20400, "ctx":"conn112","msg":"renameCollectionForCommand","attr":{"sourceNamespace":"config.foo2","targetNamespace":"config.shards","dropTarget":"no"}} {"t":{"$date":"2023-03-08T13:22:15.248+11:00"},"s":"I", "c":"STORAGE", "id":20319, "ctx":"conn112","msg":"renameCollection","attr":{"uuid":{"uuid":{"$uuid":"870d0320-10ba-4fb4-976c-6242115c9423"}},"fromName":"config.foo2","toName":"config.shards"}} ... {"t":{"$date":"2023-03-08T13:22:58.293+11:00"},"s":"I", "c":"SHARDING", "id":22080, "ctx":"ShardingDDLCoordinator-4","msg":"About to log metadata event","attr":{"namespace":"changelog","event":{"_id":"M-K4F3614CKP:27019-2023-03-08T13:22:58.292+11:00-6407f1820b0873873c522a29","server":"M-K4F3614CKP:27019","shard":"config","clientAddr":"","time":{"$date":"2023-03-08T02:22:58.292Z"},"what":"renameCollection.start","ns":"config.shards","details":{"source":"config.shards","destination":"config.foo2"}}}} {"t":{"$date":"2023-03-08T13:22:58.317+11:00"},"s":"E", "c":"ASSERT", "id":4457000, "ctx":"RenameCollectionParticipantService-2","msg":"Tripwire assertion","attr":{"error":{"code":7032368,"codeName":"Location7032368","errmsg":"Trying to acquire a critical section blocking writes for namespace 'config.shards' and reason '{ command: \"rename\", from: \"config.shards\", to: \"config.foo2\" }' but it is already taken by another operation with different reason '{ command: \"rename\", from: \"config.foo2\", to: \"config.shards\" }'"},"location":"{fileName:\"src/mongo/db/s/sharding_recovery_service.cpp\", line:200, functionName:\"acquireRecoverableCriticalSectionBlockWrites\"}"}} (there is a long BACKTRACE here) This sounds like my first rename (from: \"config.shards\", to: \"config.foo2\") is being blocked by my second rename (from: \"config.foo2\", to: \"config.shards\"). Even though it looks like from the logs the first rename ran to completion, so it's not clear why it's still in progress here.
xgen-internal-githook commented on Thu, 31 Aug 2023 11:41:30 +0000: Author: {'name': 'Antonio Fuschetto', 'email': 'antonio.fuschetto@mongodb.com', 'username': 'afuschetto'} Message: SERVER-74734 Test cannot rename system collections Branch: master https://github.com/mongodb/mongo/commit/be4715ad86a7474d30b58e6fbb3cdbbc7107e412 JIRAUSER1259062 commented on Thu, 31 Aug 2023 08:59:56 +0000: joanna.cheng@mongodb.com, any collections in the config and admin databases cannot be renamed after SERVER-74679. I'm about to add some automated tests to cover the use cases, as it's the only piece missing to close the circle. joanna.cheng@10gen.com commented on Fri, 10 Mar 2023 00:23:37 +0000: Followup comment copied over too: After renaming the "config.shards" collection to something else it looks like further rename operations return success but the rename is not reflected in listCollections. This is true for collections in databases other than config as well. I suspect that existence of a "config.shards" collection is required for collection rename to succeed? At a minimum we should confirm that the config.shards collection is available/exists when executing a DDL operation that depends on it and fail the command appropriately when not available. An example of where we handle this correctly is when trying to create a new database/collection when config.shards doesn't exist: mongos> use test2 switched to db test2 mongos> db.test.insert({}) WriteCommandError({ "ok" : 0, "errmsg" : "Database test2 could not be created :: caused by :: No shards found", "code" : 70, "codeName" : "ShardNotFound", "$clusterTime" : { "clusterTime" : Timestamp(1678292273, 2), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } }, "operationTime" : Timestamp(1678292273, 2) })