Skip to content

Commit

Permalink
Add KQL support for additional ES field types (elastic#1247)
Browse files Browse the repository at this point in the history
  • Loading branch information
rw-access authored Jun 11, 2021
1 parent 6b45186 commit c98398f
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 5 deletions.
1 change: 0 additions & 1 deletion kql/eql2kql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# 2.0; you may not use this file except in compliance with the Elastic License
# 2.0.

#!/usr/bin/env python
import eql
from eql import DepthFirstWalker

Expand Down
49 changes: 45 additions & 4 deletions kql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,55 @@ def child_tokens(self):


grammar_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "kql.g")

with open(grammar_file, "rt") as f:
grammar = f.read()

lark_parser = Lark(grammar, propagate_positions=True, tree_class=KvTree, start=['query'], parser='lalr')


def wildcard2regex(wc: str) -> re.Pattern:
parts = wc.split("*")
return re.compile("^{regex}$".format(regex=".*?".join(re.escape(w) for w in parts)))


def elasticsearch_type_family(mapping_type: str) -> str:
"""Get the family of type for an Elasticsearch mapping type."""
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
return {
# range types
"long_range": "range",
"double_range": "range",
"date_range": "range",
"ip_range": "range",

# text search types
"annotated-text": "text",
"completion": "text",
"search-as_you_type": "text",

# keyword
"constant_keyword": "keyword",
"wildcard": "keyword",

# date
"date_nanos": "date",

# integer
"token_count": "integer",
"long": "integer",
"short": "integer",
"byte": "integer",
"unsigned_long": "integer",

# float
"double": "float",
"half_float": "float",
"scaled_float": "float",

}.get(mapping_type, mapping_type)


class BaseKqlParser(Interpreter):
NON_SPACE_WS = re.compile(r"[^\S ]+")
ip_regex = re.compile("^" + eql.functions.CidrMatch.ip_re + "(/([0-2]?[0-9]|3[0-2]))?$")
Expand Down Expand Up @@ -173,14 +212,16 @@ def convert_value(self, field_name, python_value, value_tree):
f"{field_name} has multiple types {', '.join(field_types)}")

if field_type is not None and field_type != value_type:
if field_type in STRING_FIELDS:
field_type_family = elasticsearch_type_family(field_type)

if field_type_family in STRING_FIELDS:
return eql.utils.to_unicode(python_value)
elif field_type in ("float", "long"):
elif field_type_family in ("float", "integer"):
try:
return float(python_value) if field_type == "float" else int(python_value)
return float(python_value) if field_type_family == "float" else int(python_value)
except ValueError:
pass
elif field_type == "ip" and value_type == "keyword":
elif field_type_family == "ip" and value_type == "keyword":
if "::" in python_value or self.ip_regex.match(python_value) is not None:
return python_value

Expand Down
8 changes: 8 additions & 0 deletions tests/kuery/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,11 @@ def test_number_wildcard_fail(self):

with self.assertRaises(kql.KqlParseError):
kql.parse("foo:wc*", schema={"foo": "long"})

def test_type_family_success(self):
kql.parse("abc : 1.2345", schema={"abc": "scaled_float"})
kql.parse("abc : hello", schema={"abc": "annotated-text"})

def test_type_family_fail(self):
with self.assertRaises(kql.KqlParseError):
kql.parse('foo : "hello world"', schema={"foo": "scaled_float"})

0 comments on commit c98398f

Please sign in to comment.