Skip to content

Commit

Permalink
Sync benchmark folder from main
Browse files Browse the repository at this point in the history
Signed-off-by: Heemin Kim <[email protected]>
  • Loading branch information
heemin32 committed Feb 23, 2024
1 parent 6be0f0c commit 30bb554
Show file tree
Hide file tree
Showing 41 changed files with 860 additions and 328 deletions.
485 changes: 247 additions & 238 deletions benchmarks/osb/README.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions benchmarks/osb/params/no-train-params.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "<path to data>",
"target_index_bulk_index_clients": 10,
"target_index_max_num_segments": 10,
"target_index_force_merge_timeout": 45.0,
"hnsw_ef_search": 512,
"hnsw_ef_construction": 512,
"hnsw_m": 16,
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/osb/params/train-params.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "<path to data>",
"target_index_bulk_index_clients": 10,
"target_index_max_num_segments": 10,
"target_index_force_merge_timeout": 45.0,
"ivf_nlists": 10,
"ivf_nprobes": 1,
"pq_code_size": 8,
Expand Down
10 changes: 10 additions & 0 deletions benchmarks/osb/procedures/no-train-test.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@
"retries": 100
}
},
{
"operation": {
"name": "force-merge",
"operation-type": "force-merge",
"request-timeout": {{ target_index_force_merge_timeout }},
"index": "{{ target_index_name }}",
"mode": "polling",
"max-num-segments": {{ target_index_max_num_segments }}
}
},
{
"operation": {
"name": "knn-query-from-data-set",
Expand Down
10 changes: 10 additions & 0 deletions benchmarks/osb/procedures/train-test.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@
"retries": 100
}
},
{
"operation": {
"name": "force-merge",
"operation-type": "force-merge",
"request-timeout": {{ target_index_force_merge_timeout }},
"index": "{{ target_index_name }}",
"mode": "polling",
"max-num-segments": {{ target_index_max_num_segments }}
}
},
{
"operation": {
"name": "knn-query-from-data-set",
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/perf-tool/add-parent-doc-id-to-dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def run(self, source_path, target_path) -> None:
possible_colors = ['red', 'green', 'yellow', 'blue', None]
possible_tastes = ['sweet', 'salty', 'sour', 'bitter', None]
max_age = 100
min_field_size = 1000
max_field_size = 10001
min_field_size = 10
max_field_size = 10

# Copy train and test data
for key in in_file.keys():
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/perf-tool/okpt/io/config/parsers/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class TestConfig:
test_id: str
endpoint: str
port: int
timeout: int
num_runs: int
show_runs: bool
setup: List[Step]
Expand Down Expand Up @@ -67,6 +68,7 @@ def parse(self, file_obj: TextIOWrapper) -> TestConfig:
test_config = TestConfig(
endpoint=config_obj['endpoint'],
port=config_obj['port'],
timeout=config_obj['timeout'],
test_name=config_obj['test_name'],
test_id=config_obj['test_id'],
num_runs=config_obj['num_runs'],
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/perf-tool/okpt/io/config/schemas/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ endpoint:
port:
type: integer
default: 9200
timeout:
type: integer
default: 60
test_name:
type: string
test_id:
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/perf-tool/okpt/test/steps/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from okpt.test.steps.steps import CreateIndexStep, DisableRefreshStep, RefreshIndexStep, DeleteIndexStep, \
TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, \
IngestNestedFieldStep, QueryStep, QueryWithFilterStep, QueryNestedFieldStep, GetStatsStep
IngestNestedFieldStep, QueryStep, QueryWithFilterStep, QueryNestedFieldStep, GetStatsStep, WarmupStep


def create_step(step_config: StepConfig) -> Step:
Expand Down Expand Up @@ -44,5 +44,7 @@ def create_step(step_config: StepConfig) -> Step:
return ClearCacheStep(step_config)
elif step_config.step_name == GetStatsStep.label:
return GetStatsStep(step_config)
elif step_config.step_name == WarmupStep.label:
return WarmupStep(step_config)

raise ConfigurationError(f'Invalid step {step_config.step_name}')
44 changes: 39 additions & 5 deletions benchmarks/perf-tool/okpt/test/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ def __init__(self, step_config: StepConfig):
default_port = 9200 if self.endpoint == 'localhost' else 80
self.port = parse_int_param('port', step_config.config,
step_config.implicit_config, default_port)
self.timeout = parse_int_param('timeout', step_config.config, {}, 60)
self.opensearch = get_opensearch_client(str(self.endpoint),
int(self.port))
int(self.port), int(self.timeout))


class CreateIndexStep(OpenSearchStep):
Expand Down Expand Up @@ -163,6 +164,25 @@ def _get_measures(self) -> List[str]:
return ['took']


class WarmupStep(OpenSearchStep):
"""See base class."""

label = 'warmup_operation'

def __init__(self, step_config: StepConfig):
super().__init__(step_config)
self.index_name = parse_string_param('index_name', step_config.config, {},
None)

def _action(self):
"""Performs warmup operation on an index."""
warmup_operation(self.endpoint, self.port, self.index_name)
return {}

def _get_measures(self) -> List[str]:
return ['took']


class TrainModelStep(OpenSearchStep):
"""See base class."""

Expand Down Expand Up @@ -739,9 +759,6 @@ def get_body(self, vec):
}
}

def get_exclude_fields(self):
return ['nested_field.' + self.field_name]

class GetStatsStep(OpenSearchStep):
"""See base class."""

Expand Down Expand Up @@ -841,6 +858,23 @@ def delete_model(endpoint, port, model_id):
return response.json()


def warmup_operation(endpoint, port, index):
"""
Performs warmup operation on index to load native library files
of that index to reduce query latencies.
Args:
endpoint: Endpoint OpenSearch is running on
port: Port OpenSearch is running on
index: index name
Returns:
number of shards the plugin succeeded and failed to warm up.
"""
response = requests.get('http://' + endpoint + ':' + str(port) +
'/_plugins/_knn/warmup/' + index,
headers={'content-type': 'application/json'})
return response.json()


def get_opensearch_client(endpoint: str, port: int, timeout=60):
"""
Get an opensearch client from an endpoint and port
Expand Down Expand Up @@ -947,7 +981,7 @@ def query_index(opensearch: OpenSearch, index_name: str, body: dict,


def bulk_index(opensearch: OpenSearch, index_name: str, body: List):
return opensearch.bulk(index=index_name, body=body, timeout='5m')
return opensearch.bulk(index=index_name, body=body)

def get_segment_stats(opensearch: OpenSearch, index_name: str):
return opensearch.indices.segments(index=index_name)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
"number_of_replicas": 1,
"knn.algo_param.ef_search": 100
}
},
"mappings": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,40 @@
endpoint: [ENDPOINT]
port: [PORT]
test_name: "Faiss HNSW Relaxed Filter Test"
test_id: "Faiss HNSW Relaxed Filter Test"
num_runs: 10
num_runs: 3
show_runs: false
steps:
- name: delete_index
index_name: target_index
- name: create_index
index_name: target_index
index_spec: [INDEX_SPEC_PATH]/relaxed-filter/index.json
index_spec: release-configs/faiss-hnsw/filtering/relaxed-filter/index.json
- name: ingest_multi_field
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
attributes_dataset_name: attributes
attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ]
- name: refresh_index
index_name: target_index
- name: force_merge
index_name: target_index
max_num_segments: 1
- name: warmup_operation
index_name: target_index
- name: query_with_filter
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
neighbors_format: hdf5
neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters-updated.hdf5
neighbors_path: dataset/sift-128-euclidean-with-relaxed-filters.hdf5
neighbors_dataset: neighbors_filter_5
filter_spec: [INDEX_SPEC_PATH]/relaxed-filter-spec.json
filter_spec: release-configs/faiss-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json
filter_type: FILTER
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
"number_of_replicas": 1,
"knn.algo_param.ef_search": 100
}
},
"mappings": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
endpoint: [ENDPOINT]
port: [PORT]
test_name: "Faiss HNSW Restrictive Filter Test"
test_id: "Faiss HNSW Restrictive Filter Test"
num_runs: 10
num_runs: 3
show_runs: false
steps:
- name: delete_index
index_name: target_index
- name: create_index
index_name: target_index
index_spec: [INDEX_SPEC_PATH]/index.json
index_spec: release-configs/faiss-hnsw/filtering/restrictive-filter/index.json
- name: ingest_multi_field
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
attributes_dataset_name: attributes
attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ]
- name: refresh_index
index_name: target_index
- name: force_merge
index_name: target_index
max_num_segments: 1
- name: warmup_operation
index_name: target_index
- name: query_with_filter
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
neighbors_format: hdf5
neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5
neighbors_path: dataset/sift-128-euclidean-with-restrictive-filters.hdf5
neighbors_dataset: neighbors_filter_4
filter_spec: [INDEX_SPEC_PATH]/restrictive-filter-spec.json
filter_spec: release-configs/faiss-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json
filter_type: FILTER
3 changes: 2 additions & 1 deletion benchmarks/perf-tool/release-configs/faiss-hnsw/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
"number_of_replicas": 1,
"knn.algo_param.ef_search": 100
}
},
"mappings": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
}
},
"mappings": {
"_source": {
"excludes": ["nested_field"]
},
"properties": {
"nested_field": {
"type": "nested",
Expand Down
15 changes: 9 additions & 6 deletions benchmarks/perf-tool/release-configs/faiss-hnsw/test.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
endpoint: localhost
endpoint: [ENDPOINT]
port: [PORT]
test_name: "Faiss HNSW Test"
test_id: "Faiss HNSW Test"
num_runs: 10
num_runs: 3
show_runs: false
steps:
- name: delete_index
index_name: target_index
- name: create_index
index_name: target_index
index_spec: /home/ec2-user/[PATH]/index.json
index_spec: release-configs/faiss-hnsw/index.json
- name: ingest
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean.hdf5
dataset_path: dataset/sift-128-euclidean.hdf5
- name: refresh_index
index_name: target_index
- name: force_merge
index_name: target_index
max_num_segments: 1
- name: warmup_operation
index_name: target_index
- name: query
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean.hdf5
dataset_path: dataset/sift-128-euclidean.hdf5
neighbors_format: hdf5
neighbors_path: [DATASET_PATH]/sift-128-euclidean.hdf5
neighbors_path: dataset/sift-128-euclidean.hdf5
Loading

0 comments on commit 30bb554

Please sign in to comment.