Skip to content

Commit

Permalink
ES|QL JOIN: non existing keys and duplicate keys (#730)
Browse files Browse the repository at this point in the history
  • Loading branch information
luigidellaquila authored Jan 20, 2025
1 parent 17cc202 commit 27cf095
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 10 deletions.
19 changes: 18 additions & 1 deletion joins/challenges/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,24 @@
},
{% endfor %}

{% for i in range(2, 7) %}
{
"operation": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{
"operation": "esql_lookup_join_100k_keys_x10_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 50
},

{
"operation": "esql_lookup_join_1k_keys_where_no_match",
"tags": ["lookup", "join"],
Expand All @@ -130,6 +148,5 @@
"iterations": 50
}


]
}
151 changes: 151 additions & 0 deletions joins/challenges/large.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
{
"name": "esql-large",
"description": "Performance benchmarks for internal R&D on query languages. This is work in progress",
"default": false,
"schedule": [
{
"operation": "delete-index",
"tags": ["setup"]
},
{
"operation": {
"operation-type": "create-index",
"settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} {
{# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
{% if p_include_non_serverless_index_settings %}
"index.translog.flush_threshold_size": "4g",
{% endif %}
{%- endif -%}{# non-serverless-index-settings-marker-end #}
"index.codec": "best_compression",
"index.refresh_interval": "30s"
}{%- endif %}
},
"tags": ["setup"]
},

{
"name": "check-cluster-health",
"operation": {
"operation-type": "cluster-health",
"index": "join_base_idx",
"request-params": {
"wait_for_status": "{{cluster_health | default('green')}}",
"wait_for_no_relocating_shards": "true"
},
"retry-until-success": true
},
"tags": ["setup"]
},

{
"operation": "index-base",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-small-lookup-indices",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-lookup-1m",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-lookup-5m",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-lookup-100m",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"name": "refresh-after-index",
"operation": "refresh",
"tags": ["setup"]
},


{% for i in range(idx_suffix|length) %}
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_limit1",
"tags": ["lookup", "join", "limit1"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_limit10000",
"tags": ["lookup", "join", "limit10000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_keep_limit10000",
"tags": ["lookup", "join", "limit10000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_sort_limit10000",
"tags": ["lookup", "join", "limit10000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 20
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_where_no_match",
"tags": ["lookup", "join"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 20
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_where_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{% for i in range(2, 7) %}
{
"operation": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{
"operation": "esql_lookup_join_100k_keys_x10_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 50
},

{
"operation": "esql_lookup_join_1k_100k_200k_500k",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
}

]
}
19 changes: 18 additions & 1 deletion joins/challenges/small.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,24 @@
},
{% endfor %}

{% for i in range(2, 6) %}
{
"operation": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{
"operation": "esql_lookup_join_100k_keys_x10_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 50
},

{
"operation": "esql_lookup_join_1k_keys_sort_limit10000",
"tags": ["lookup", "join", "limit10000"],
Expand All @@ -126,6 +144,5 @@
"iterations": 50
}


]
}
21 changes: 21 additions & 0 deletions joins/index-lookup_idx_100000_f10_x10.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{% set p_include_non_serverless_index_settings = (include_non_serverless_index_settings | default(build_flavor != "serverless")) %}

{
"settings": {
"index.mode": "lookup",
"auto_expand_replicas": {{ auto_expand_replicas | default("0-all") | tojson }}
{# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
, "index.requests.cache.enable": false
{%- endif -%}{# non-serverless-index-settings-marker-end #}
},
"mappings": {
"_source": {
"mode": {{ source_mode | default("stored") | tojson }}
},
"properties": {
"key_100000": {
"type": "keyword"
}
}
}
}
31 changes: 23 additions & 8 deletions joins/operations/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
"bulk-size": {{bulk_size | default(10000)}},
"ingest-percentage": 100
},
{
"name": "index-lookup-100k_x10",
"operation-type": "bulk",
"indices": ["lookup_idx_100000_f10_x10"],
"bulk-size": {{bulk_size | default(10000)}},
"ingest-percentage": {{ingest_percentage | default(100)}}
},
{
"name": "index-lookup-1m",
"operation-type": "bulk",
Expand Down Expand Up @@ -84,13 +91,21 @@
{% endfor %}


{% for i in range(2, 7) %}
{
"name": "esql_lookup_join_1k_100k_200k_500k",
"operation-type": "esql",
"query": "FROM join_base_idx | lookup join lookup_idx_1000_f10 on key_1000 | rename lookup_keyword_0 as lk_1k | lookup join lookup_idx_100000_f10 on key_100000 | rename lookup_keyword_0 as lk_100k | lookup join lookup_idx_200000_f10 on key_200000 | rename lookup_keyword_0 as lk_200k | lookup join lookup_idx_500000_f10 on key_500000 | rename lookup_keyword_0 as lk_500k | keep id, key_1000, key_100000, key_200000, key_500000, lk_1k, lk_100k, lk_200k, lk_500k | limit 1000"
}




"name": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"operation-type": "esql",
"query": "FROM join_base_idx | rename key_100000 as key_{{key_suffix[i]}}| lookup join lookup_idx_{{key_suffix[i]}}_f10 on key_{{key_suffix[i]}} | limit 1000"
},
{% endfor %}

{
"name": "esql_lookup_join_100k_keys_x10_limit1000",
"operation-type": "esql",
"query": "FROM join_base_idx | lookup join lookup_idx_100000_f10_x10 on key_100000 | limit 1000"
},
{
"name": "esql_lookup_join_1k_100k_200k_500k",
"operation-type": "esql",
"query": "FROM join_base_idx | lookup join lookup_idx_1000_f10 on key_1000 | rename lookup_keyword_0 as lk_1k | lookup join lookup_idx_100000_f10 on key_100000 | rename lookup_keyword_0 as lk_100k | lookup join lookup_idx_200000_f10 on key_200000 | rename lookup_keyword_0 as lk_200k | lookup join lookup_idx_500000_f10 on key_500000 | rename lookup_keyword_0 as lk_500k | keep id, key_1000, key_100000, key_200000, key_500000, lk_1k, lk_100k, lk_200k, lk_500k | limit 1000"
}
18 changes: 18 additions & 0 deletions joins/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
"name": "lookup_idx_100000_f10",
"body": "index-lookup_idx_100000_f10.json"
},
{
"name": "lookup_idx_100000_f10_x10",
"body": "index-lookup_idx_100000_f10_x10.json"
},
{
"name": "lookup_idx_200000_f10",
"body": "index-lookup_idx_200000_f10.json"
Expand Down Expand Up @@ -66,6 +70,20 @@
}
]
},
{
"name": "lookup_idx_100000_f10_x10",
"base-url": "https://rally-tracks.elastic.co/joins",
"documents": [
{
"target-index": "lookup_idx_100000_f10_x10",
"source-file": "lookup_idx_100000_f10_x10.json.bz2",
"#COMMENT": "Lookup index with 1M documents and 100k distinct keys (keyworks, \"0\"..\"99999\")",
"document-count": 1000000,
"compressed-bytes": 22483295,
"uncompressed-bytes": 412777900
}
]
},
{
"name": "lookup_idx_200000_f10",
"base-url": "https://rally-tracks.elastic.co/joins",
Expand Down

0 comments on commit 27cf095

Please sign in to comment.