Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ES|QL JOIN: non existing keys and duplicate keys #730

Merged
merged 2 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion joins/challenges/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,24 @@
},
{% endfor %}

{% for i in range(2, 7) %}
{
"operation": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{
"operation": "esql_lookup_join_100k_keys_x10_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 50
},

{
"operation": "esql_lookup_join_1k_keys_where_no_match",
"tags": ["lookup", "join"],
Expand All @@ -130,6 +148,5 @@
"iterations": 50
}


]
}
151 changes: 151 additions & 0 deletions joins/challenges/large.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
{
"name": "esql-large",
"description": "Performance benchmarks for internal R&D on query languages. This is work in progress",
"default": false,
"schedule": [
{
"operation": "delete-index",
"tags": ["setup"]
},
{
"operation": {
"operation-type": "create-index",
"settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} {
{# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
{% if p_include_non_serverless_index_settings %}
"index.translog.flush_threshold_size": "4g",
{% endif %}
{%- endif -%}{# non-serverless-index-settings-marker-end #}
"index.codec": "best_compression",
"index.refresh_interval": "30s"
}{%- endif %}
},
"tags": ["setup"]
},

{
"name": "check-cluster-health",
"operation": {
"operation-type": "cluster-health",
"index": "join_base_idx",
"request-params": {
"wait_for_status": "{{cluster_health | default('green')}}",
"wait_for_no_relocating_shards": "true"
},
"retry-until-success": true
},
"tags": ["setup"]
},

{
"operation": "index-base",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-small-lookup-indices",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-lookup-1m",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-lookup-5m",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"operation": "index-lookup-100m",
"clients": {{bulk_indexing_clients | default(8)}},
"tags": ["setup"]
},
{
"name": "refresh-after-index",
"operation": "refresh",
"tags": ["setup"]
},


{% for i in range(idx_suffix|length) %}
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_limit1",
"tags": ["lookup", "join", "limit1"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_limit10000",
"tags": ["lookup", "join", "limit10000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_keep_limit10000",
"tags": ["lookup", "join", "limit10000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_sort_limit10000",
"tags": ["lookup", "join", "limit10000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 20
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_where_no_match",
"tags": ["lookup", "join"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 20
},
{
"operation": "esql_lookup_join_{{idx_suffix[i]}}_keys_where_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{% for i in range(2, 7) %}
{
"operation": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{
"operation": "esql_lookup_join_100k_keys_x10_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 50
},

{
"operation": "esql_lookup_join_1k_100k_200k_500k",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
}

]
}
19 changes: 18 additions & 1 deletion joins/challenges/small.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,24 @@
},
{% endfor %}

{% for i in range(2, 6) %}
{
"operation": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 10,
"iterations": 50
},
{% endfor %}

{
"operation": "esql_lookup_join_100k_keys_x10_limit1000",
"tags": ["lookup", "join", "limit1000"],
"clients": 1,
"warmup-iterations": 5,
"iterations": 50
},

{
"operation": "esql_lookup_join_1k_keys_sort_limit10000",
"tags": ["lookup", "join", "limit10000"],
Expand All @@ -126,6 +144,5 @@
"iterations": 50
}


]
}
21 changes: 21 additions & 0 deletions joins/index-lookup_idx_100000_f10_x10.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{% set p_include_non_serverless_index_settings = (include_non_serverless_index_settings | default(build_flavor != "serverless")) %}

{
"settings": {
"index.mode": "lookup",
"auto_expand_replicas": {{ auto_expand_replicas | default("0-all") | tojson }}
{# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
, "index.requests.cache.enable": false
{%- endif -%}{# non-serverless-index-settings-marker-end #}
},
"mappings": {
"_source": {
"mode": {{ source_mode | default("stored") | tojson }}
},
"properties": {
"key_100000": {
"type": "keyword"
}
}
}
}
31 changes: 23 additions & 8 deletions joins/operations/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
"bulk-size": {{bulk_size | default(10000)}},
"ingest-percentage": 100
},
{
"name": "index-lookup-100k_x10",
"operation-type": "bulk",
"indices": ["lookup_idx_100000_f10_x10"],
"bulk-size": {{bulk_size | default(10000)}},
"ingest-percentage": {{ingest_percentage | default(100)}}
},
{
"name": "index-lookup-1m",
"operation-type": "bulk",
Expand Down Expand Up @@ -84,13 +91,21 @@
{% endfor %}


{% for i in range(2, 7) %}
{
"name": "esql_lookup_join_1k_100k_200k_500k",
"operation-type": "esql",
"query": "FROM join_base_idx | lookup join lookup_idx_1000_f10 on key_1000 | rename lookup_keyword_0 as lk_1k | lookup join lookup_idx_100000_f10 on key_100000 | rename lookup_keyword_0 as lk_100k | lookup join lookup_idx_200000_f10 on key_200000 | rename lookup_keyword_0 as lk_200k | lookup join lookup_idx_500000_f10 on key_500000 | rename lookup_keyword_0 as lk_500k | keep id, key_1000, key_100000, key_200000, key_500000, lk_1k, lk_100k, lk_200k, lk_500k | limit 1000"
}




"name": "esql_lookup_join_100k_to_{{idx_suffix[i]}}",
"operation-type": "esql",
"query": "FROM join_base_idx | rename key_100000 as key_{{key_suffix[i]}}| lookup join lookup_idx_{{key_suffix[i]}}_f10 on key_{{key_suffix[i]}} | limit 1000"
},
{% endfor %}

{
"name": "esql_lookup_join_100k_keys_x10_limit1000",
"operation-type": "esql",
"query": "FROM join_base_idx | lookup join lookup_idx_100000_f10_x10 on key_100000 | limit 1000"
},
{
"name": "esql_lookup_join_1k_100k_200k_500k",
"operation-type": "esql",
"query": "FROM join_base_idx | lookup join lookup_idx_1000_f10 on key_1000 | rename lookup_keyword_0 as lk_1k | lookup join lookup_idx_100000_f10 on key_100000 | rename lookup_keyword_0 as lk_100k | lookup join lookup_idx_200000_f10 on key_200000 | rename lookup_keyword_0 as lk_200k | lookup join lookup_idx_500000_f10 on key_500000 | rename lookup_keyword_0 as lk_500k | keep id, key_1000, key_100000, key_200000, key_500000, lk_1k, lk_100k, lk_200k, lk_500k | limit 1000"
}
18 changes: 18 additions & 0 deletions joins/track.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
"name": "lookup_idx_100000_f10",
"body": "index-lookup_idx_100000_f10.json"
},
{
"name": "lookup_idx_100000_f10_x10",
"body": "index-lookup_idx_100000_f10_x10.json"
},
{
"name": "lookup_idx_200000_f10",
"body": "index-lookup_idx_200000_f10.json"
Expand Down Expand Up @@ -66,6 +70,20 @@
}
]
},
{
"name": "lookup_idx_100000_f10_x10",
"base-url": "https://rally-tracks.elastic.co/joins",
"documents": [
{
"target-index": "lookup_idx_100000_f10_x10",
"source-file": "lookup_idx_100000_f10_x10.json.bz2",
"#COMMENT": "Lookup index with 1M documents and 100k distinct keys (keyworks, \"0\"..\"99999\")",
"document-count": 1000000,
"compressed-bytes": 22483295,
"uncompressed-bytes": 412777900
}
]
},
{
"name": "lookup_idx_200000_f10",
"base-url": "https://rally-tracks.elastic.co/joins",
Expand Down
Loading