-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deployed cdd347f with MkDocs version: 1.6.1
- Loading branch information
Showing
16 changed files
with
846 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
general: | ||
name: "bge-base-en-v1-5" | ||
model_name: "bge-base-en-v1-5" | ||
|
||
# AWS and SageMaker settings | ||
aws: | ||
region: {region} | ||
# uncomment and set the Role ARN if not running on sagemaker | ||
sagemaker_execution_role: {role_arn} | ||
## these are the buckets/resources you will create in your account below: | ||
bucket: {write_bucket} ## add the name of your desired bucket | ||
|
||
## WRITE BUCKET -- Write the results, data, metrics, endpoint.json and payloads to this bucket directory | ||
dir_paths: | ||
data_prefix: data ## add the prefix for all your data management/storage | ||
prompts_prefix: prompts | ||
all_prompts_file: all_prompts.csv | ||
metrics_dir: metrics | ||
models_dir: models | ||
metadata_dir: metadata ## add a file here to dynamically track the metrics dir | ||
|
||
## READ BUCKET -- Represents the section to read from scripts, source data and tokenizer for a separate s3 bucket for read/write segregation | ||
s3_read_data: | ||
read_bucket: {read_bucket} | ||
scripts_prefix: scripts ## add your own scripts in case you are using anything that is not on jumpstart | ||
script_files: | ||
- hf_token.txt ## add your scripts files you have in s3 (including inference files, serving stacks, if any) | ||
configs_prefix: configs | ||
config_files: | ||
- pricing.yml # mention the name of the config files that you want to be downloaded from s3 into the configs directory locally | ||
source_data_prefix: source_data ## Add a source_data folder to store your raw data in an s3 path configured by you | ||
source_data_files: | ||
# - rajpurkar/squad_v2.jsonl | ||
- just_text.jsonl | ||
tokenizer_prefix: llama3_tokenizer ## add the tokenizer.json and config.json from your specific tokenizer type | ||
prompt_template_dir: prompt_template | ||
prompt_template_file: prompt_template_bert.txt ## add your desired prompt template type | ||
|
||
## section that enables container to run notebooks and python scripts automatically | ||
run_steps: | ||
0_setup.ipynb: yes | ||
1_generate_data.ipynb: yes | ||
2_deploy_model.ipynb: yes | ||
3_run_inference.ipynb: yes | ||
4_model_metric_analysis.ipynb: yes | ||
5_cleanup.ipynb: yes | ||
|
||
|
||
datasets: | ||
prompt_template_keys: | ||
- text | ||
filters: | ||
- language: en | ||
min_length_in_tokens: 1 | ||
max_length_in_tokens: 50 | ||
payload_file: payload_en_1-50.jsonl | ||
- language: en | ||
min_length_in_tokens: 50 | ||
max_length_in_tokens: 100 | ||
payload_file: payload_en_50-100.jsonl | ||
- language: en | ||
min_length_in_tokens: 100 | ||
max_length_in_tokens: 150 | ||
payload_file: payload_en_100-150.jsonl | ||
|
||
|
||
metrics: | ||
dataset_of_interest: en_50-100 | ||
|
||
pricing: pricing.yml | ||
|
||
inference_parameters: | ||
embedding: | ||
top_p: 0.92 | ||
|
||
experiments: | ||
- name: bge-base-en-v1-5-c5.xl | ||
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5 | ||
model_version: "*" | ||
model_name: bge-base-en-v1.5 | ||
ep_name: bge-base-en-v1-5 | ||
instance_type: "ml.c5.xlarge" | ||
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04' | ||
deploy: yes | ||
instance_count: 1 | ||
deployment_script: jumpstart.py | ||
inference_script: sagemaker_predictor.py | ||
inference_spec: | ||
parameter_set: embedding | ||
container_type: huggingface | ||
payload_files: | ||
- payload_en_1-50.jsonl | ||
- payload_en_50-100.jsonl | ||
- payload_en_100-150.jsonl | ||
concurrency_levels: | ||
- 1 | ||
- 2 | ||
- 3 | ||
- 4 | ||
- 6 | ||
- 8 | ||
- 10 | ||
accept_eula: false | ||
env: | ||
SAGEMAKER_PROGRAM: "inference.py" | ||
ENDPOINT_SERVER_TIMEOUT: "1200" | ||
|
||
- name: bge-base-en-v1-5-c5.2xl | ||
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5 | ||
model_version: "*" | ||
model_name: bge-base-en-v1.5 | ||
ep_name: bge-base-en-v1-5 | ||
instance_type: "ml.c5.2xlarge" | ||
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04' | ||
deploy: yes | ||
instance_count: 1 | ||
deployment_script: jumpstart.py | ||
inference_script: sagemaker_predictor.py | ||
inference_spec: | ||
parameter_set: embedding | ||
container_type: huggingface | ||
payload_files: | ||
- payload_en_1-50.jsonl | ||
- payload_en_50-100.jsonl | ||
- payload_en_100-150.jsonl | ||
concurrency_levels: | ||
- 1 | ||
- 2 | ||
- 3 | ||
- 4 | ||
- 6 | ||
- 8 | ||
- 10 | ||
accept_eula: false | ||
env: | ||
SAGEMAKER_PROGRAM: "inference.py" | ||
ENDPOINT_SERVER_TIMEOUT: "1200" | ||
|
||
- name: bge-base-en-v1-5-c5.4xl | ||
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5 | ||
model_version: "*" | ||
model_name: bge-base-en-v1.5 | ||
ep_name: bge-base-en-v1-5 | ||
instance_type: "ml.c5.4xlarge" | ||
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04' | ||
deploy: yes | ||
instance_count: 1 | ||
deployment_script: jumpstart.py | ||
inference_script: sagemaker_predictor.py | ||
inference_spec: | ||
parameter_set: embedding | ||
container_type: huggingface | ||
payload_files: | ||
- payload_en_1-50.jsonl | ||
- payload_en_50-100.jsonl | ||
- payload_en_100-150.jsonl | ||
concurrency_levels: | ||
- 1 | ||
- 2 | ||
- 3 | ||
- 4 | ||
- 6 | ||
- 8 | ||
- 10 | ||
|
||
accept_eula: false | ||
env: | ||
SAGEMAKER_PROGRAM: "inference.py" | ||
ENDPOINT_SERVER_TIMEOUT: "1200" | ||
# SM_NUM_GPUS: "1" | ||
# MODEL_CACHE_ROOT: "/opt/ml/model" | ||
# SAGEMAKER_ENV: "1" | ||
# HF_MODEL_ID: "BAAI/bge-base-en-v1.5" | ||
# MAX_INPUT_LENGTH: "4095" | ||
# MAX_TOTAL_TOKENS: "4096" | ||
# # SAGEMAKER_MODEL_SERVER_WORKERS: "2" | ||
|
||
report: | ||
latency_budget: 1 | ||
cost_per_10k_txn_budget: 20 | ||
error_rate_budget: 0 | ||
per_inference_request_file: per_inference_request_results.csv | ||
all_metrics_file: all_metrics.csv | ||
txn_count_for_showing_cost: 10000 | ||
v_shift_w_single_instance: 0.025 | ||
v_shift_w_gt_one_instance: 0.025 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
general: | ||
name: "bge-base-en-v1-5" | ||
model_name: "bge-base-en-v1-5" | ||
|
||
# AWS and SageMaker settings | ||
aws: | ||
region: {region} | ||
# uncomment and set the Role ARN if not running on sagemaker | ||
sagemaker_execution_role: {role_arn} | ||
## these are the buckets/resources you will create in your account below: | ||
bucket: {write_bucket} ## add the name of your desired bucket | ||
|
||
## WRITE BUCKET -- Write the results, data, metrics, endpoint.json and payloads to this bucket directory | ||
dir_paths: | ||
data_prefix: data ## add the prefix for all your data management/storage | ||
prompts_prefix: prompts | ||
all_prompts_file: all_prompts.csv | ||
metrics_dir: metrics | ||
models_dir: models | ||
metadata_dir: metadata ## add a file here to dynamically track the metrics dir | ||
|
||
## READ BUCKET -- Represents the section to read from scripts, source data and tokenizer for a separate s3 bucket for read/write segregation | ||
s3_read_data: | ||
read_bucket: {read_bucket} | ||
scripts_prefix: scripts ## add your own scripts in case you are using anything that is not on jumpstart | ||
script_files: | ||
- hf_token.txt ## add your scripts files you have in s3 (including inference files, serving stacks, if any) | ||
configs_prefix: configs | ||
config_files: | ||
- pricing.yml # mention the name of the config files that you want to be downloaded from s3 into the configs directory locally | ||
source_data_prefix: source_data ## Add a source_data folder to store your raw data in an s3 path configured by you | ||
source_data_files: | ||
# - rajpurkar/squad_v2.jsonl | ||
- just_text.jsonl | ||
tokenizer_prefix: llama3_tokenizer ## add the tokenizer.json and config.json from your specific tokenizer type | ||
prompt_template_dir: prompt_template | ||
prompt_template_file: prompt_template_bert.txt ## add your desired prompt template type | ||
|
||
## section that enables container to run notebooks and python scripts automatically | ||
run_steps: | ||
0_setup.ipynb: yes | ||
1_generate_data.ipynb: yes | ||
2_deploy_model.ipynb: yes | ||
3_run_inference.ipynb: yes | ||
4_model_metric_analysis.ipynb: yes | ||
5_cleanup.ipynb: yes | ||
|
||
|
||
datasets: | ||
prompt_template_keys: | ||
- text | ||
filters: | ||
- language: en | ||
min_length_in_tokens: 1 | ||
max_length_in_tokens: 50 | ||
payload_file: payload_en_1-50.jsonl | ||
- language: en | ||
min_length_in_tokens: 50 | ||
max_length_in_tokens: 100 | ||
payload_file: payload_en_50-100.jsonl | ||
- language: en | ||
min_length_in_tokens: 100 | ||
max_length_in_tokens: 150 | ||
payload_file: payload_en_100-150.jsonl | ||
|
||
|
||
metrics: | ||
dataset_of_interest: en_50-100 | ||
|
||
pricing: pricing.yml | ||
|
||
inference_parameters: | ||
embedding: | ||
top_p: 0.92 | ||
|
||
experiments: | ||
- name: bge-base-en-v1-5-g5.xl | ||
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5 | ||
model_version: "*" | ||
model_name: bge-base-en-v1.5 | ||
ep_name: bge-base-en-v1-5 | ||
instance_type: "ml.g5.xlarge" | ||
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04' | ||
deploy: yes | ||
instance_count: 1 | ||
deployment_script: jumpstart.py | ||
inference_script: sagemaker_predictor.py | ||
inference_spec: | ||
parameter_set: embedding | ||
container_type: huggingface | ||
payload_files: | ||
- payload_en_1-50.jsonl | ||
- payload_en_50-100.jsonl | ||
- payload_en_100-150.jsonl | ||
concurrency_levels: | ||
- 1 | ||
- 2 | ||
- 3 | ||
- 4 | ||
- 6 | ||
- 8 | ||
- 10 | ||
accept_eula: false | ||
env: | ||
SAGEMAKER_PROGRAM: "inference.py" | ||
ENDPOINT_SERVER_TIMEOUT: "1200" | ||
|
||
- name: bge-base-en-v1-5-g5.2xl | ||
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5 | ||
model_version: "*" | ||
model_name: bge-base-en-v1.5 | ||
ep_name: bge-base-en-v1-5 | ||
instance_type: "ml.g5.2xlarge" | ||
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04' | ||
deploy: yes | ||
instance_count: 1 | ||
deployment_script: jumpstart.py | ||
inference_script: sagemaker_predictor.py | ||
inference_spec: | ||
parameter_set: embedding | ||
container_type: huggingface | ||
payload_files: | ||
- payload_en_1-50.jsonl | ||
- payload_en_50-100.jsonl | ||
- payload_en_100-150.jsonl | ||
concurrency_levels: | ||
- 1 | ||
- 2 | ||
- 3 | ||
- 4 | ||
- 6 | ||
- 8 | ||
- 10 | ||
accept_eula: false | ||
env: | ||
SAGEMAKER_PROGRAM: "inference.py" | ||
ENDPOINT_SERVER_TIMEOUT: "1200" | ||
# SM_NUM_GPUS: "1" | ||
# MODEL_CACHE_ROOT: "/opt/ml/model" | ||
# SAGEMAKER_ENV: "1" | ||
# HF_MODEL_ID: "BAAI/bge-base-en-v1.5" | ||
# MAX_INPUT_LENGTH: "4095" | ||
# MAX_TOTAL_TOKENS: "4096" | ||
# # SAGEMAKER_MODEL_SERVER_WORKERS: "2" | ||
|
||
report: | ||
latency_budget: 1 | ||
cost_per_10k_txn_budget: 20 | ||
error_rate_budget: 0 | ||
per_inference_request_file: per_inference_request_results.csv | ||
all_metrics_file: all_metrics.csv | ||
txn_count_for_showing_cost: 10000 | ||
v_shift_w_single_instance: 0.025 | ||
v_shift_w_gt_one_instance: 0.025 |
Oops, something went wrong.