...
Hi, We recently updated our systems to mongo 4.4, we face a several regression on the performance for the aggregation on the new version. This query: mongo --eval 'db.getCollection("country_esp_static").aggregate([{$match:{$and:[ {"shape":"geoHash"} ,{$or:[{"tags.level0":{$in:["esp"]}}]}]}},{$match:{$and:[\{"data.UN_2010_E":{$gte:1}},\\{"data.UN_2010_E":{$lte:37917}}]}},{$addFields:{"weightFilters":[{$multiply:[{$cond:[{$gte:["$data.UN_2010_E",374.9025723525453]},{$add:[{$multiply:[{$divide:[{$subtract:["$data.UN_2010_E",374.9025723525453]},7508.41948552949]},10]},50]},{$multiply:[{$divide:[{$subtract:["$data.UN_2010_E",1]},74.78051447050906]},10]}]},1]}]}},\{$addFields:{"weight":{$sum:"$weightFilters"}}},{$match:{$and:[\{"weight":{$gte:0}},\\{"weight":{$lte:100}}]}},\{$group:{"_id":"","UN_2010_E":{$sum:"$data.UN_2010_E"},"A18_24B":\{$sum:"$data.A18_24B"},"A25_34B":\{$sum:"$data.A25_34B"},"A35_44B":\{$sum:"$data.A35_44B"},"A45_54B":\{$sum:"$data.A45_54B"},"A55_64B":\{$sum:"$data.A55_64B"},"A65PLUSB":\{$sum:"$data.A65PLUSB"},"F_2010_E":\{$sum:"$data.F_2010_E"},"M_2010_E":\{$sum:"$data.M_2010_E"},"B_AFRICA":\{$sum:"$data.B_AFRICA"},"B_ASIA":\{$sum:"$data.B_ASIA"},"B_CS_AMERICA":\{$sum:"$data.B_CS_AMERICA"},"B_NOT_UE":\{$sum:"$data.B_NOT_UE"},"B_N_AMERICA":\{$sum:"$data.B_N_AMERICA"},"B_OCEANIA":\{$sum:"$data.B_OCEANIA"},"B_SPAIN":\{$sum:"$data.B_SPAIN"},"B_UE":\{$sum:"$data.B_UE"},"NOT_SPANISH":\{$sum:"$data.NOT_SPANISH"},"SPANISH":\{$sum:"$data.SPANISH"},"MS_DIVORCED":\{$sum:"$data.MS_DIVORCED"},"MS_MARRIED":\{$sum:"$data.MS_MARRIED"},"MS_SEPARATED":\{$sum:"$data.MS_SEPARATED"},"MS_SINGLE":\{$sum:"$data.MS_SINGLE"},"MS_WIDOW":\{$sum:"$data.MS_WIDOW"},"SL_ILLITERATE":\{$sum:"$data.SL_ILLITERATE"},"SL_LEVEL1":\{$sum:"$data.SL_LEVEL1"},"SL_LEVEL2":\{$sum:"$data.SL_LEVEL2"},"SL_LEVEL3":\{$sum:"$data.SL_LEVEL3"},"SL_NOTINFO":\{$sum:"$data.SL_NOTINFO"},"SL_UNEDUCATED":\{$sum:"$data.SL_UNEDUCATED"},"IAB1":\{$sum:"$data.IAB1"},"IAB10":\{$sum:"$data.IAB10"},"IAB11":\{$sum:"$data.IAB11"},"IAB12":\{$sum:"$data.IAB12"},"IAB13":\{$sum:"$data.IAB13"},"IAB14":\{$sum:"$data.IAB14"},"IAB15":\{$sum:"$data.IAB15"},"IAB16":\{$sum:"$data.IAB16"},"IAB17":\{$sum:"$data.IAB17"},"IAB18":\{$sum:"$data.IAB18"},"IAB19":\{$sum:"$data.IAB19"},"IAB2":\{$sum:"$data.IAB2"},"IAB20":\{$sum:"$data.IAB20"},"IAB21":\{$sum:"$data.IAB21"},"IAB22":\{$sum:"$data.IAB22"},"IAB23":\{$sum:"$data.IAB23"},"IAB3":\{$sum:"$data.IAB3"},"IAB4":\{$sum:"$data.IAB4"},"IAB5":\{$sum:"$data.IAB5"},"IAB6":\{$sum:"$data.IAB6"},"IAB7":\{$sum:"$data.IAB7"},"IAB8":\{$sum:"$data.IAB8"},"IAB9":\{$sum:"$data.IAB9"},"HOME_1PERSONS":\{$sum:"$data.HOME_1PERSONS"},"HOME_2PERSONS":\{$sum:"$data.HOME_2PERSONS"},"HOME_3PERSONS":\{$sum:"$data.HOME_3PERSONS"},"HOME_4PERSONS":\{$sum:"$data.HOME_4PERSONS"},"HOME_5PERSONS":\{$sum:"$data.HOME_5PERSONS"},"HOME_6PERSONS":\{$sum:"$data.HOME_6PERSONS"},"HOME_TOTAL":\{$sum:"$data.HOME_TOTAL"},"TEN_106_120M":\{$sum:"$data.TEN_106_120M"},"TEN_121_150M":\{$sum:"$data.TEN_121_150M"},"TEN_151_180M":\{$sum:"$data.TEN_151_180M"},"TEN_1ROOM":\{$sum:"$data.TEN_1ROOM"},"TEN_2ROOM":\{$sum:"$data.TEN_2ROOM"},"TEN_30_45M":\{$sum:"$data.TEN_30_45M"},"TEN_3ROOM":\{$sum:"$data.TEN_3ROOM"},"TEN_46_60M":\{$sum:"$data.TEN_46_60M"},"TEN_4ROOM":\{$sum:"$data.TEN_4ROOM"},"TEN_5ROOM":\{$sum:"$data.TEN_5ROOM"},"TEN_61_75M":\{$sum:"$data.TEN_61_75M"},"TEN_6ROOM":\{$sum:"$data.TEN_6ROOM"},"TEN_76_90M":\{$sum:"$data.TEN_76_90M"},"TEN_7ROOM":\{$sum:"$data.TEN_7ROOM"},"TEN_8ROOM":\{$sum:"$data.TEN_8ROOM"},"TEN_91_105M":\{$sum:"$data.TEN_91_105M"},"TEN_EMPTY":\{$sum:"$data.TEN_EMPTY"},"TEN_LESS30M":\{$sum:"$data.TEN_LESS30M"},"TEN_MAIN":\{$sum:"$data.TEN_MAIN"},"TEN_MORE180M":\{$sum:"$data.TEN_MORE180M"},"TEN_MORE8ROOM":\{$sum:"$data.TEN_MORE8ROOM"},"TEN_OWNED":\{$sum:"$data.TEN_OWNED"},"TEN_OWNED_INHERITED":\{$sum:"$data.TEN_OWNED_INHERITED"},"TEN_OWNED_PAYING":\{$sum:"$data.TEN_OWNED_PAYING"},"TEN_REND_LOW_COST":\{$sum:"$data.TEN_REND_LOW_COST"},"TEN_REND_OTHER":\{$sum:"$data.TEN_REND_OTHER"},"TEN_RENT":\{$sum:"$data.TEN_RENT"},"TEN_SECOND":\{$sum:"$data.TEN_SECOND"},"TOTAL_TEN":\{$sum:"$data.TOTAL_TEN"},"digitalAffinity_general_conversion":\{$avg:"$data.digitalAffinity_general_conversion"},"digitalAffinity_general_ctr":\{$avg:"$data.digitalAffinity_general_ctr"},"digitalAffinity_general_digitalConversion":\{$avg:"$data.digitalAffinity_general_digitalConversion"},"digitalAffinity_general_digitalRate":\{$avg:"$data.digitalAffinity_general_digitalRate"},"digitalAffinity_general_displayEngagement":\{$avg:"$data.digitalAffinity_general_displayEngagement"},"digitalAffinity_general_displaySaturation":\{$avg:"$data.digitalAffinity_general_displaySaturation"},"digitalAffinity_general_engagement":\{$avg:"$data.digitalAffinity_general_engagement"},"digitalAffinity_general_saturation":\{$avg:"$data.digitalAffinity_general_saturation"},"digitalAffinity_general_storeVisitConversion":\{$avg:"$data.digitalAffinity_general_storeVisitConversion"},"digitalAffinity_general_videoEngagement":\{$avg:"$data.digitalAffinity_general_videoEngagement"},"digitalAffinity_general_videoSaturation":\{$avg:"$data.digitalAffinity_general_videoSaturation"},"digitalAffinity_general_vtr":\{$avg:"$data.digitalAffinity_general_vtr"},"digitalAffinity_automotive_conversion":\{$avg:"$data.digitalAffinity_automotive_conversion"},"digitalAffinity_automotive_ctr":\{$avg:"$data.digitalAffinity_automotive_ctr"},"digitalAffinity_automotive_digitalConversion":\{$avg:"$data.digitalAffinity_automotive_digitalConversion"},"digitalAffinity_automotive_digitalRate":\{$avg:"$data.digitalAffinity_automotive_digitalRate"},"digitalAffinity_automotive_displayEngagement":\{$avg:"$data.digitalAffinity_automotive_displayEngagement"},"digitalAffinity_automotive_displaySaturation":\{$avg:"$data.digitalAffinity_automotive_displaySaturation"},"digitalAffinity_automotive_engagement":\{$avg:"$data.digitalAffinity_automotive_engagement"},"digitalAffinity_automotive_saturation":\{$avg:"$data.digitalAffinity_automotive_saturation"},"digitalAffinity_automotive_storeVisitConversion":\{$avg:"$data.digitalAffinity_automotive_storeVisitConversion"},"digitalAffinity_automotive_videoEngagement":\{$avg:"$data.digitalAffinity_automotive_videoEngagement"},"digitalAffinity_automotive_videoSaturation":\{$avg:"$data.digitalAffinity_automotive_videoSaturation"},"digitalAffinity_automotive_vtr":\{$avg:"$data.digitalAffinity_automotive_vtr"},"digitalAffinity_consumerPackageGoods_conversion":\{$avg:"$data.digitalAffinity_consumerPackageGoods_conversion"},"digitalAffinity_consumerPackageGoods_ctr":\{$avg:"$data.digitalAffinity_consumerPackageGoods_ctr"},"digitalAffinity_consumerPackageGoods_digitalConversion":\{$avg:"$data.digitalAffinity_consumerPackageGoods_digitalConversion"},"digitalAffinity_consumerPackageGoods_digitalRate":\{$avg:"$data.digitalAffinity_consumerPackageGoods_digitalRate"},"digitalAffinity_consumerPackageGoods_displayEngagement":\{$avg:"$data.digitalAffinity_consumerPackageGoods_displayEngagement"},"digitalAffinity_consumerPackageGoods_displaySaturation":\{$avg:"$data.digitalAffinity_consumerPackageGoods_displaySaturation"},"digitalAffinity_consumerPackageGoods_engagement":\{$avg:"$data.digitalAffinity_consumerPackageGoods_engagement"},"digitalAffinity_consumerPackageGoods_saturation":\{$avg:"$data.digitalAffinity_consumerPackageGoods_saturation"},"digitalAffinity_consumerPackageGoods_storeVisitConversion":\{$avg:"$data.digitalAffinity_consumerPackageGoods_storeVisitConversion"},"digitalAffinity_consumerPackageGoods_videoEngagement":\{$avg:"$data.digitalAffinity_consumerPackageGoods_videoEngagement"},"digitalAffinity_consumerPackageGoods_videoSaturation":\{$avg:"$data.digitalAffinity_consumerPackageGoods_videoSaturation"},"digitalAffinity_consumerPackageGoods_vtr":\{$avg:"$data.digitalAffinity_consumerPackageGoods_vtr"},"digitalAffinity_fashion_conversion":\{$avg:"$data.digitalAffinity_fashion_conversion"},"digitalAffinity_fashion_ctr":\{$avg:"$data.digitalAffinity_fashion_ctr"},"digitalAffinity_fashion_digitalConversion":\{$avg:"$data.digitalAffinity_fashion_digitalConversion"},"digitalAffinity_fashion_digitalRate":\{$avg:"$data.digitalAffinity_fashion_digitalRate"},"digitalAffinity_fashion_displayEngagement":\{$avg:"$data.digitalAffinity_fashion_displayEngagement"},"digitalAffinity_fashion_displaySaturation":\{$avg:"$data.digitalAffinity_fashion_displaySaturation"},"digitalAffinity_fashion_engagement":\{$avg:"$data.digitalAffinity_fashion_engagement"},"digitalAffinity_fashion_saturation":\{$avg:"$data.digitalAffinity_fashion_saturation"},"digitalAffinity_fashion_storeVisitConversion":\{$avg:"$data.digitalAffinity_fashion_storeVisitConversion"},"digitalAffinity_fashion_videoEngagement":\{$avg:"$data.digitalAffinity_fashion_videoEngagement"},"digitalAffinity_fashion_videoSaturation":\{$avg:"$data.digitalAffinity_fashion_videoSaturation"},"digitalAffinity_fashion_vtr":\{$avg:"$data.digitalAffinity_fashion_vtr"},"digitalAffinity_retail_conversion":\{$avg:"$data.digitalAffinity_retail_conversion"},"digitalAffinity_retail_ctr":\{$avg:"$data.digitalAffinity_retail_ctr"},"digitalAffinity_retail_digitalConversion":\{$avg:"$data.digitalAffinity_retail_digitalConversion"},"digitalAffinity_retail_digitalRate":\{$avg:"$data.digitalAffinity_retail_digitalRate"},"digitalAffinity_retail_displayEngagement":\{$avg:"$data.digitalAffinity_retail_displayEngagement"},"digitalAffinity_retail_displaySaturation":\{$avg:"$data.digitalAffinity_retail_displaySaturation"},"digitalAffinity_retail_engagement":\{$avg:"$data.digitalAffinity_retail_engagement"},"digitalAffinity_retail_saturation":\{$avg:"$data.digitalAffinity_retail_saturation"},"digitalAffinity_retail_storeVisitConversion":\{$avg:"$data.digitalAffinity_retail_storeVisitConversion"},"digitalAffinity_retail_videoEngagement":\{$avg:"$data.digitalAffinity_retail_videoEngagement"},"digitalAffinity_retail_videoSaturation":\{$avg:"$data.digitalAffinity_retail_videoSaturation"},"digitalAffinity_retail_vtr":\{$avg:"$data.digitalAffinity_retail_vtr"},"digitalAffinity_techServices_conversion":\{$avg:"$data.digitalAffinity_techServices_conversion"}}}])' sonataDemographyDevelop takes around 5 secs on 4.0 and 4.2, and more than 20 seconds on 4.4, attached are the explain for both systems:
charlie.swanson commented on Tue, 2 Feb 2021 00:12:08 +0000: Hi all, I investigated further and I'm pretty confident that SERVER-42090 is the root cause. I don't think we can or should undo that change though, since it is a stability and predictability improvement. Instead, I'm closing this as a duplicate of SERVER-45364 which I think is the appropriate solution for this issue. SERVER-45364 is a pretty large engineering effort, but something we hope to see in the coming releases. As a different workaround, SERVER-37530 may be done in the meantime or sooner, so I would encourage you to keep your eye on that as well. It's unfortunate that we have regressed in this case, but in cases like this any solution will be temporary and likely unstable until the query planner has a better sense of the costs associated with each predicate. The ordering of predicates today is somewhat arbitrary, but as seen in SERVER-42090, it is important that the mostly-arbitrary order be predictable and stable. charlie.swanson commented on Wed, 13 Jan 2021 21:38:58 +0000: Hi all, I haven't really gotten to the bottom of this in a satisfying way, but I thought I'd pop in with an update. I think I've identified SERVER-42090 as the source of this change. This does make some sense as the culprit, but it does seem intentional and possibly hard to undo. I don't yet have all the details. Generally, my impression is that our documentation promises too much when it says > The $and operator uses short-circuit evaluation. If the first expression (e.g. ) evaluates to false, MongoDB will not evaluate the remaining expressions. I think this is true when we get to actually executing an $and, but I don't think we intend to guarantee that we will not re-order the user's expressions. As an example, this would make it very challenging to correctly use an index and not change semantics. I intend to look deeper as to exactly why we made the change in SERVER-42090, but I suspect there's not a whole lot we can do about this. I think there's a very legitimate feature request for the query planner to take the relative selectivity and computational expense of each predicate into account when deciding which order to execute them in. And from that perspective I think the query planner has regressed performance in this case. Interestingly, that change has probably also (accidentally) made some other queries go faster, so it's not a simple case of "that change was bad for performance." JIRAUSER1257066 commented on Thu, 19 Nov 2020 20:56:25 +0000: Hi torgus@gmail.com, Thank you for your patience while we investigate this issue. We didn't notice anything unusual in the diagnostic data you provided. However, in a reproduction attempt, I was able to observe a regression that you noticed in your pipeline. We noticed that there are differences the ordering of the $and terms and suspect that may be causing the regression. When the query is run in 4.2, the first term is: { "shape" : { "$eq" : "geoHash" } } We believe that this is a more selective term, and therefore short circuits the remaining $and stage if the result isn't found. When the query is run in 4.4, the first term is: { "data.UN_2010_E" : { "$lte" : 37917 } } which may be a broader term, so it continues to check the remaining terms if this one is fulfilled. We're continuing to investigate this issue and why the $and term ordering changes between the two versions. Best, Edwin JIRAUSER1269691 commented on Thu, 12 Nov 2020 16:09:28 +0000: Hi, I added the datas you requested for. Cheers! JIRAUSER1257066 commented on Wed, 11 Nov 2020 21:16:22 +0000: Hi torgus@gmail.com, Thanks for filing this ticket. At the moment, we are unable to conclude a specific cause for this regression. I'd first like to confirm that the 4.0 explain file is actually from 4.2, since the planCacheKey and queryHash field was released in 4.2. To diagnose the query performance issues would you please provide the following for both instances of the query in 4.4 and 4.2: Archive (tar or zip) the $dbpath/diagnostic.data directory (the contents are described here) and attach it to this ticket. The specific timestamps for when you run the query. The mongod.log for both instances of the query. Collect perf during both instances of the query. # record call stack samples and generate text output on test node # note the exact time at which the recording was done in order # to allow correlation with other events perf record -a -g -F 99 sleep 60 perf script >perf.txt Please make sure an exact timestamp is included. If it's not, you can run perf with the --start option. Run the aggregation again but inserting .explain(true) to provide the executionStats of the query, i.e. db.collection.explain(true).aggregate(...) instead of db.collection.aggregate([...], {explain: true}) Kind regards, Edwin