...
I have a collection with a simple array of values and also with a nested collection. If I do a negative look-ahead $regex search on the documents with everything works fine but if I do the same search on documents with a nested collection it doesn't work. For example: sample list collection { "_id": ObjectId("54deb2ed4e5018cf1c737b8d"), "values": [ "apples", "peaches", "bananas" ] } { "_id": ObjectId("54df5b40f546cf4c25013190"), "values": ["grapes", "oranges" ] } db.sampleList.count( { 'values':{$regex:"^(?<!appl).*", $options:'i '} }); // 1 sample nested collection { "_id": ObjectId("54e28b6caaadddb844d1f40a"), "values": [ { "value": " apple", "label": "Choice 1" }, { "value": "peach", "label": "Choice 2" }, { "value": "banana", "label": "Choice 3" }, { "value": "orange", "label": "Choice 4" } ] } { "_id": ObjectId("54eb580ee163c16b23145536"), "values": [ { "value": "orange", "label": "Choice 1" }, { "value": "banana", "label": "Choice 2" }, { "value": "peaches", "label": "Choice 3" }, { "value": "grapes", "label": "Choice 4" } ] db.sampleList.count( { 'values.value':{$regex:"^(?<!appl).*", $options:'i '} }); // 2
max.hirschhorn@10gen.com commented on Fri, 12 Feb 2016 03:34:50 +0000: Hi Jonah, My reading of this ticket suggests that there is some confusion around how matching against an array value works in MongoDB, so I'll attempt to clarify things here. The $not operator evaluates to true if its inner expression evaluates to false, and false if its inner expression evaluates to true. This behavior can be confusing when considering how matching against an array value works in MongoDB. Referring to your example in the gist, the expression {$eq: 'apple'} evaluates to true if any element of the array value is equal to the string "apple". The expression {$not: {$eq: 'apple'}} will then return true if none of the elements in the array value are equal to the string "apple". The expression {$regex: '^(?!apple).*'} evaluates to true if the string doesn't start with "apple". Following the semantics of how predicates work when applied to an array value, the expression {$regex: '^(?!apple).*'} evaluates to true if any element in the array value doesn't start with the string "apple". > db.mycoll.insert({fruits: ['apple', 'orange', 'pear']}); // An apple with other fruits > db.mycoll.insert({fruits: ['orange', 'pear']}); // No apples > db.mycoll.insert({fruits: ['apple', 'apple']}); // Only apples > db.mycoll.find({fruits: {$regex: '^(?!apple).*'}}) { "_id" : ObjectId("56bd36ab5a3dcb40deb78743"), "fruits" : [ "apple", "orange", "pear" ] } { "_id" : ObjectId("56bd36b15a3dcb40deb78744"), "fruits" : [ "orange", "pear" ] } The expression {$regex: '^(?=apple).*'} evaluates to true if the string starts with "apple". Following the semantics of how predicates work when applied to an array value, the expression {$not: /^(?=apple).*/} evaluates to true if none of the elements in the array value start with the string "apple". > db.mycoll.find({fruits: {$not: /\^(?=apple).*/}}); { "_id" : ObjectId("56bd36b15a3dcb40deb78744"), "fruits" : [ "orange", "pear" ] } Given that it is possible to negate the regular expression and use $not to have the array value semantics you are looking for, I don't think a change in MongoDB to inspect the regular expression for a negative look-ahead or negative look-behind and alter the matching behavior is a feature we'd consider. Let me know if you have any other questions. Thanks, Max samk commented on Tue, 5 May 2015 13:57:06 +0000: I was able to reproduce this on 3.0.1 on Linux using this test case. For consistency, here's a more minimal copy of the reproduction: var coll = [{"str": "apple", "nested": [{"value": "apple"}, {"value": "peach"}, {"value": "banana"}, {"value": "orange"}], "arr": ["apple","peach","banana","orange"]}, {"str": "sdf", "nested": [{"value": "sdf"}, {"value": "sdf"}, {"value": "sdf"}, {"value": "sdfsd"}], "arr": ["sdf","sdf","sdf","sdfsd"]}] db.testCollection.drop(); db.testCollection.insert(coll); var testQuery = {$not:{$eq:'apple'}} var lookAheadRegEx = /^(?!apple).*/i; assert.eq(db.testCollection.count({'str':testQuery }), db.testCollection.count({'str':lookAheadRegEx})) assert.eq(db.testCollection.count({'arr':testQuery }), db.testCollection.count({'arr':lookAheadRegEx})) assert.eq(db.testCollection.count({'nested.value':testQuery }), db.testCollection.count({'nested.value':lookAheadRegEx})) jonah@surveyplanet.com commented on Wed, 29 Apr 2015 20:46:52 +0000: I wrote up a test here: https://gist.github.com/jwerre/1f7138fb98b449e43e6d rassi@10gen.com commented on Wed, 29 Apr 2015 19:07:44 +0000: Hi, I can't reproduce your "sample list collection" results. See the below shell session: > db.version() 2.6.5 > db.sampleList.drop() true > db.sampleList.insert({ "_id": ObjectId("54deb2ed4e5018cf1c737b8d"), "values": [ "apples", "peaches", "bananas" ] } ) WriteResult({ "nInserted" : 1 }) > db.sampleList.insert({ "_id": ObjectId("54df5b40f546cf4c25013190"), "values": ["grapes", "oranges" ] } ) WriteResult({ "nInserted" : 1 }) > db.sampleList.count( { 'values':{$regex:"^(?<!appl).*", $options:'i '} }); 2 Could you please provide a similar shell snippet exhibiting the behavior you are observing? ~ Jason Rassi
Do a negative look-ahead $regex search collection with nested collection. I've only tried this on version 2.6.5 on OSX 10.10.3