WL12737: DevAPI: Add overlaps and not_overlaps as operator

isrgomez · isrgomez · commit 81443e38c9e5 · 2019-05-24T17:51:24.000-05:00
This worklog adds support to the JSON_OVERLAPS() function by the
overlaps and not_overlaps operators, available through the following
new expression infix operators:

  OVERLAPS
  NOT OVERLAPS
diff --git a/lib/mysqlx/expr.py b/lib/mysqlx/expr.py
@@ -124,6 +124,7 @@ class TokenType(object):
     DAY_MINUTE = 86
     DAY_HOUR = 87
     YEAR_MONTH = 88
+    OVERLAPS = 89
 # pylint: enable=C0103
 
 _INTERVAL_UNITS = set([
@@ -157,6 +158,7 @@ class TokenType(object):
     "not":      TokenType.NOT,
     "like":     TokenType.LIKE,
     "in":       TokenType.IN,
+    "overlaps": TokenType.OVERLAPS,
     "regexp":   TokenType.REGEXP,
     "between":  TokenType.BETWEEN,
     "interval": TokenType.INTERVAL,
@@ -235,6 +237,7 @@ class TokenType(object):
     "<": "<",
     "<=": "<=",
     "&": "&",
+    "&&": "&&",
     "|": "|",
     "<<": "<<",
     ">>": ">>",
@@ -245,7 +248,8 @@ class TokenType(object):
     "~": "~",
     "%": "%",
     "cast": "cast",
-    "cont_in": "cont_in"
+    "cont_in": "cont_in",
+    "overlaps": "overlaps"
 }
 
 _UNARY_OPERATORS = {
@@ -262,7 +266,8 @@ class TokenType(object):
     "regexp": "not_regexp",
     "like": "not_like",
     "in": "not_in",
-    "cont_in": "not_cont_in"
+    "cont_in": "not_cont_in",
+    "overlaps": "not_overlaps",
 }
 
 
@@ -549,7 +554,7 @@ def lex(self):
                     token = Token(TokenType.EQ, "==", 1)
             elif char == "&":
                 if self.next_char_is(i, "&"):
-                    token = Token(TokenType.ANDAND, char, 2)
+                    token = Token(TokenType.ANDAND, "&&", 2)
                 else:
                     token = Token(TokenType.BITAND, char)
             elif char == "^":
@@ -653,10 +658,10 @@ def paren_expr_list(self):
         exprs = []
         self.consume_token(TokenType.LPAREN)
         if not self.cur_token_type_is(TokenType.RPAREN):
-            exprs.append(self.expr().get_message())
+            exprs.append(self._expr().get_message())
             while self.cur_token_type_is(TokenType.COMMA):
                 self.pos += 1
-                exprs.append(self.expr().get_message())
+                exprs.append(self._expr().get_message())
         self.consume_token(TokenType.RPAREN)
         return exprs
 
@@ -848,7 +853,7 @@ def parse_json_array(self):
         msg = Message("Mysqlx.Expr.Array")
         while self.pos < len(self.tokens) and \
             not self.cur_token_type_is(TokenType.RSQBRACKET):
-            msg["value"].extend([self.expr().get_message()])
+            msg["value"].extend([self._expr().get_message()])
             if not self.cur_token_type_is(TokenType.COMMA):
                 break
             self.consume_token(TokenType.COMMA)
@@ -870,7 +875,7 @@ def parse_json_doc(self):
             item = Message("Mysqlx.Expr.Object.ObjectField")
             item["key"] = self.consume_token(TokenType.LSTRING)
             self.consume_token(TokenType.COLON)
-            item["value"] = self.expr().get_message()
+            item["value"] = self._expr().get_message()
             msg["fld"].extend([item.get_message()])
             if not self.cur_token_type_is(TokenType.COMMA):
                 break
@@ -912,7 +917,7 @@ def cast(self):
         """
         operator = Message("Mysqlx.Expr.Operator", name="cast")
         self.consume_token(TokenType.LPAREN)
-        operator["param"].extend([self.expr().get_message()])
+        operator["param"].extend([self._expr().get_message()])
         self.consume_token(TokenType.AS)
 
         type_scalar = build_bytes_scalar(str.encode(self.cast_data_type()))
@@ -991,7 +996,7 @@ def atomic_expr(self):
         elif token.token_type == TokenType.CAST:
             return self.cast()
         elif token.token_type == TokenType.LPAREN:
-            expr = self.expr()
+            expr = self._expr()
             self.expect_token(TokenType.RPAREN)
             return expr
         elif token.token_type in [TokenType.PLUS, TokenType.MINUS]:
@@ -1126,6 +1131,10 @@ def ilri_expr(self):
                 else:
                     op_name = "cont_in"
                     params.append(self.comp_expr().get_message())
+            elif self.cur_token_type_is(TokenType.OVERLAPS):
+                self.consume_token(TokenType.OVERLAPS)
+                params.append(self.comp_expr().get_message())
+
             elif self.cur_token_type_is(TokenType.LIKE):
                 self.consume_token(TokenType.LIKE)
                 params.append(self.comp_expr().get_message())
@@ -1168,7 +1177,7 @@ def or_expr(self):
         return self.parse_left_assoc_binary_op_expr(
             set([TokenType.OR, TokenType.OROR]), self.xor_expr)
 
-    def expr(self, reparse=False):
+    def _expr(self, reparse=False):
         if reparse:
             self.tokens = []
             self.pos = 0
@@ -1177,6 +1186,17 @@ def expr(self, reparse=False):
             self.lex()
         return self.or_expr()
 
+    def expr(self, reparse=False):
+        expression = self._expr(reparse)
+        used_tokens = self.pos
+        if self.pos_token_type_is(len(self.tokens) - 2, TokenType.AS):
+            used_tokens += 2
+        if used_tokens < len(self.tokens):
+            raise ValueError("Unused token types {} found in expression at "
+                             "position: {}".format(self.tokens[self.pos:],
+                                                   self.pos))
+        return expression
+
     def parse_table_insert_field(self):
         return Message("Mysqlx.Crud.Column",
                        name=self.consume_token(TokenType.IDENT))
@@ -1205,7 +1225,7 @@ def parse_table_select_projection(self):
             if not first:
                 self.consume_token(TokenType.COMMA)
             first = False
-            projection = Message("Mysqlx.Crud.Projection", source=self.expr())
+            projection = Message("Mysqlx.Crud.Projection", source=self._expr())
             if self.cur_token_type_is(TokenType.AS):
                 self.consume_token(TokenType.AS)
                 projection["alias"] = self.consume_token(TokenType.IDENT)
@@ -1222,7 +1242,7 @@ def parse_order_spec(self):
             if not first:
                 self.consume_token(TokenType.COMMA)
             first = False
-            order = Message("Mysqlx.Crud.Order", expr=self.expr())
+            order = Message("Mysqlx.Crud.Order", expr=self._expr())
             if self.cur_token_type_is(TokenType.ORDERBY_ASC):
                 order["direction"] = mysqlxpb_enum(
                     "Mysqlx.Crud.Order.Direction.ASC")
@@ -1241,5 +1261,5 @@ def parse_expr_list(self):
             if not first:
                 self.consume_token(TokenType.COMMA)
             first = False
-            expr_list.append(self.expr().get_message())
+            expr_list.append(self._expr().get_message())
         return expr_list
diff --git a/tests/test_mysqlx_crud.py b/tests/test_mysqlx_crud.py
@@ -762,6 +762,119 @@ def test_cont_in_operator(self):
                                  "result was {}".format(test, result))
         self.schema.drop_collection(collection_name)
 
+    @unittest.skipIf(tests.MYSQL_VERSION < (8, 0, 17),
+                     "OVERLAPS operator unavailable")
+    def test_overlaps_operator(self):
+        collection_name = "{0}.test".format(self.schema_name)
+        collection = self.schema.create_collection(collection_name)
+        collection.add({
+          "_id": "a6f4b93e1a264a108393524f29546a8c",
+          "title": "AFRICAN EGG",
+          "description": "A Fast-Paced Documentary of a Pastry Chef And a "
+                         "Dentist who must Pursue a Forensic Psychologist in "
+                         "The Gulf of Mexico",
+          "releaseyear": 2006,
+          "language": "English",
+          "duration": 130,
+          "rating": "G",
+          "genre": "Science fiction",
+          "actors": [{
+            "name": "MILLA PECK",
+            "country": "Mexico",
+            "birthdate": "12 Jan 1984"
+          }, {
+            "name": "VAL BOLGER",
+            "country": "Botswana",
+            "birthdate": "26 Jul 1975"
+          }, {
+            "name": "SCARLETT BENING",
+            "country": "Syria",
+            "birthdate": "16 Mar 1978"
+          }],
+          "additionalinfo": {
+            "director": "Sharice Legaspi",
+            "writers": ["Rusty Couturier", "Angelic Orduno", "Carin Postell"],
+            "productioncompanies": ["Qvodrill", "Indigoholdings"]
+          }
+        }).execute()
+
+        test_cases = [
+            ("(1+5) overlaps (1, 2, 3, 4, 5)", None),
+            ("(1>5) overlaps (true, false)", None),
+            ("('a'>'b') overlaps (true, false)", None),
+            ("(1>5) overlaps [true, false]", None),
+            ("[1>5] overlaps [true, false]", True),
+            ("[(1+5)] overlaps [1, 2, 3, 4, 5]", False),
+            ("[(1+4)] overlaps [1, 2, 3, 4, 5]", True),
+            ("('a'>'b') overlaps [true, false]", None),
+            ("true overlaps [(1>5), !(false), (true || false), (false && true)]",
+             True),
+            ("true overlaps ((1>5), !(false), (true || false), (false && true))",
+             None),
+            ("{ 'name' : 'MILLA PECK' } overlaps actors", False),
+            ("{\"field\":true} overlaps (\"mystring\", 124, myvar, othervar.jsonobj)",
+             None),
+            ("actor.name overlaps ['a name', null, (1<5-4), myvar.jsonobj.name]",
+             None),
+            ("!false && true overlaps [true]", True),
+            ("1-5/2*2 > 3-2/1*2 overlaps [true, false]", None),
+            ("true IN [1-5/2*2 > 3-2/1*2]", False),
+            ("'African Egg' overlaps ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", None),
+            ("1 overlaps ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", None),
+            ("true overlaps ('African Egg', 1, false, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", None),
+            ("false overlaps ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", None),
+            ("false overlaps ('African Egg', 1, true, 'No null', [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", None),
+            ("[0,1,2] overlaps ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", None),
+            ("{ 'title' : 'Atomic Firefighter' } overlaps ('African Egg', 1, true, "
+             "NULL, [0,1,2], { 'title' : 'Atomic Firefighter' })", None),
+            ("title overlaps ('African Egg', 'The Witcher', 'Jurassic Perk')", None),
+            ("releaseyear overlaps (2006, 2010, 2017)", None),
+            ("'African overlaps' in movietitle", None),
+            ("0 NOT overlaps [1,2,3]", True),
+            ("1 NOT overlaps [1,2,3]", False),
+            ("[0] NOT overlaps [1,2,3]", True),
+            ("[1] NOT overlaps [1,2,3]", False),
+            ("[!false && true] OVERLAPS [true]", True),
+            ("[!false AND true] OVERLAPS [true]", True),
+            ("[!false & true] OVERLAPS [true]", False),
+            ("'' IN title", False),
+            ("title overlaps ('', ' ')", None),
+            ("title overlaps ['', ' ']", False),
+            ("[\"Rusty Couturier\", \"Angelic Orduno\", \"Carin Postell\"] IN "
+             "additionalinfo.writers", True),
+            ("{ \"name\" : \"MILLA PECK\", \"country\" : \"Mexico\", "
+             "\"birthdate\": \"12 Jan 1984\"} IN actors", True),
+            ("releaseyear IN [2006, 2007, 2008]", True),
+            ("true overlaps title", False),
+            ("false overlaps genre", False),
+            ("'Sharice Legaspi' overlaps additionalinfo.director", True),
+            ("'Mexico' overlaps actors[*].country", True),
+            ("'Angelic Orduno' overlaps additionalinfo.writers", True),
+            ("[([1,2] overlaps [1,2])] overlaps [false] invalid [true]", None),
+            ("[([1] overlaps [2])] overlaps [3] invalid [true] as res", None),
+            ("[] []", None),
+            ("[] TRUE as res", None)
+        ]
+
+        for test in test_cases:
+            try:
+                result = collection.find() \
+                                   .fields("{0} as res".format(test[0])) \
+                                   .execute().fetch_one()
+            except:
+                self.assertEqual(None, test[1], "For test case {} "
+                                 "exeption was not expected.".format(test))
+            else:
+                self.assertEqual(result['res'], test[1], "For test case {} "
+                                 "result was {}".format(test, result))
+        self.schema.drop_collection(collection_name)
+
     def test_ilri_expressions(self):
         collection_name = "{0}.test".format(self.schema_name)
         collection = self.schema.create_collection(collection_name)