Add KQL support for additional ES field types (elastic#1247)

oren-zohar · Jun 11, 2021 · c98398f · c98398f
1 parent 6b45186
commit c98398f
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 5 deletions.
diff --git a/kql/eql2kql.py b/kql/eql2kql.py
@@ -3,7 +3,6 @@
 # 2.0; you may not use this file except in compliance with the Elastic License
 # 2.0.
 
-#!/usr/bin/env python
 import eql
 from eql import DepthFirstWalker
 

diff --git a/kql/parser.py b/kql/parser.py
@@ -33,16 +33,55 @@ def child_tokens(self):
 
 
 grammar_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "kql.g")
+
 with open(grammar_file, "rt") as f:
     grammar = f.read()
 
 lark_parser = Lark(grammar, propagate_positions=True, tree_class=KvTree, start=['query'], parser='lalr')
 
+
 def wildcard2regex(wc: str) -> re.Pattern:
     parts = wc.split("*")
     return re.compile("^{regex}$".format(regex=".*?".join(re.escape(w) for w in parts)))
 
 
+def elasticsearch_type_family(mapping_type: str) -> str:
+    """Get the family of type for an Elasticsearch mapping type."""
+    # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
+    return {
+        # range types
+        "long_range": "range",
+        "double_range": "range",
+        "date_range": "range",
+        "ip_range": "range",
+
+        # text search types
+        "annotated-text": "text",
+        "completion": "text",
+        "search-as_you_type": "text",
+
+        # keyword
+        "constant_keyword": "keyword",
+        "wildcard": "keyword",
+
+        # date
+        "date_nanos": "date",
+
+        # integer
+        "token_count": "integer",
+        "long": "integer",
+        "short": "integer",
+        "byte": "integer",
+        "unsigned_long": "integer",
+
+        # float
+        "double": "float",
+        "half_float": "float",
+        "scaled_float": "float",
+
+    }.get(mapping_type, mapping_type)
+
+
 class BaseKqlParser(Interpreter):
     NON_SPACE_WS = re.compile(r"[^\S ]+")
     ip_regex = re.compile("^" + eql.functions.CidrMatch.ip_re + "(/([0-2]?[0-9]|3[0-2]))?$")
@@ -173,14 +212,16 @@ def convert_value(self, field_name, python_value, value_tree):
                                  f"{field_name} has multiple types {', '.join(field_types)}")
 
         if field_type is not None and field_type != value_type:
-            if field_type in STRING_FIELDS:
+            field_type_family = elasticsearch_type_family(field_type)
+
+            if field_type_family in STRING_FIELDS:
                 return eql.utils.to_unicode(python_value)
-            elif field_type in ("float", "long"):
+            elif field_type_family in ("float", "integer"):
                 try:
-                    return float(python_value) if field_type == "float" else int(python_value)
+                    return float(python_value) if field_type_family == "float" else int(python_value)
                 except ValueError:
                     pass
-            elif field_type == "ip" and value_type == "keyword":
+            elif field_type_family == "ip" and value_type == "keyword":
                 if "::" in python_value or self.ip_regex.match(python_value) is not None:
                     return python_value
 

diff --git a/tests/kuery/test_parser.py b/tests/kuery/test_parser.py
@@ -68,3 +68,11 @@ def test_number_wildcard_fail(self):
 
         with self.assertRaises(kql.KqlParseError):
             kql.parse("foo:wc*", schema={"foo": "long"})
+
+    def test_type_family_success(self):
+        kql.parse("abc : 1.2345", schema={"abc": "scaled_float"})
+        kql.parse("abc : hello", schema={"abc": "annotated-text"})
+
+    def test_type_family_fail(self):
+        with self.assertRaises(kql.KqlParseError):
+            kql.parse('foo : "hello world"', schema={"foo": "scaled_float"})