Skip to content

Commit

Permalink
Deployed cdd347f with MkDocs version: 1.6.1
Browse files Browse the repository at this point in the history
  • Loading branch information
aarora79 committed Nov 6, 2024
1 parent 2fde31d commit 01c0f92
Show file tree
Hide file tree
Showing 16 changed files with 846 additions and 49 deletions.
186 changes: 186 additions & 0 deletions configs/embeddings/bge-base-en-v1-5-c5-embeddings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
general:
name: "bge-base-en-v1-5"
model_name: "bge-base-en-v1-5"

# AWS and SageMaker settings
aws:
region: {region}
# uncomment and set the Role ARN if not running on sagemaker
sagemaker_execution_role: {role_arn}
## these are the buckets/resources you will create in your account below:
bucket: {write_bucket} ## add the name of your desired bucket

## WRITE BUCKET -- Write the results, data, metrics, endpoint.json and payloads to this bucket directory
dir_paths:
data_prefix: data ## add the prefix for all your data management/storage
prompts_prefix: prompts
all_prompts_file: all_prompts.csv
metrics_dir: metrics
models_dir: models
metadata_dir: metadata ## add a file here to dynamically track the metrics dir

## READ BUCKET -- Represents the section to read from scripts, source data and tokenizer for a separate s3 bucket for read/write segregation
s3_read_data:
read_bucket: {read_bucket}
scripts_prefix: scripts ## add your own scripts in case you are using anything that is not on jumpstart
script_files:
- hf_token.txt ## add your scripts files you have in s3 (including inference files, serving stacks, if any)
configs_prefix: configs
config_files:
- pricing.yml # mention the name of the config files that you want to be downloaded from s3 into the configs directory locally
source_data_prefix: source_data ## Add a source_data folder to store your raw data in an s3 path configured by you
source_data_files:
# - rajpurkar/squad_v2.jsonl
- just_text.jsonl
tokenizer_prefix: llama3_tokenizer ## add the tokenizer.json and config.json from your specific tokenizer type
prompt_template_dir: prompt_template
prompt_template_file: prompt_template_bert.txt ## add your desired prompt template type

## section that enables container to run notebooks and python scripts automatically
run_steps:
0_setup.ipynb: yes
1_generate_data.ipynb: yes
2_deploy_model.ipynb: yes
3_run_inference.ipynb: yes
4_model_metric_analysis.ipynb: yes
5_cleanup.ipynb: yes


datasets:
prompt_template_keys:
- text
filters:
- language: en
min_length_in_tokens: 1
max_length_in_tokens: 50
payload_file: payload_en_1-50.jsonl
- language: en
min_length_in_tokens: 50
max_length_in_tokens: 100
payload_file: payload_en_50-100.jsonl
- language: en
min_length_in_tokens: 100
max_length_in_tokens: 150
payload_file: payload_en_100-150.jsonl


metrics:
dataset_of_interest: en_50-100

pricing: pricing.yml

inference_parameters:
embedding:
top_p: 0.92

experiments:
- name: bge-base-en-v1-5-c5.xl
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
model_version: "*"
model_name: bge-base-en-v1.5
ep_name: bge-base-en-v1-5
instance_type: "ml.c5.xlarge"
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
deploy: yes
instance_count: 1
deployment_script: jumpstart.py
inference_script: sagemaker_predictor.py
inference_spec:
parameter_set: embedding
container_type: huggingface
payload_files:
- payload_en_1-50.jsonl
- payload_en_50-100.jsonl
- payload_en_100-150.jsonl
concurrency_levels:
- 1
- 2
- 3
- 4
- 6
- 8
- 10
accept_eula: false
env:
SAGEMAKER_PROGRAM: "inference.py"
ENDPOINT_SERVER_TIMEOUT: "1200"

- name: bge-base-en-v1-5-c5.2xl
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
model_version: "*"
model_name: bge-base-en-v1.5
ep_name: bge-base-en-v1-5
instance_type: "ml.c5.2xlarge"
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
deploy: yes
instance_count: 1
deployment_script: jumpstart.py
inference_script: sagemaker_predictor.py
inference_spec:
parameter_set: embedding
container_type: huggingface
payload_files:
- payload_en_1-50.jsonl
- payload_en_50-100.jsonl
- payload_en_100-150.jsonl
concurrency_levels:
- 1
- 2
- 3
- 4
- 6
- 8
- 10
accept_eula: false
env:
SAGEMAKER_PROGRAM: "inference.py"
ENDPOINT_SERVER_TIMEOUT: "1200"

- name: bge-base-en-v1-5-c5.4xl
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
model_version: "*"
model_name: bge-base-en-v1.5
ep_name: bge-base-en-v1-5
instance_type: "ml.c5.4xlarge"
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
deploy: yes
instance_count: 1
deployment_script: jumpstart.py
inference_script: sagemaker_predictor.py
inference_spec:
parameter_set: embedding
container_type: huggingface
payload_files:
- payload_en_1-50.jsonl
- payload_en_50-100.jsonl
- payload_en_100-150.jsonl
concurrency_levels:
- 1
- 2
- 3
- 4
- 6
- 8
- 10

accept_eula: false
env:
SAGEMAKER_PROGRAM: "inference.py"
ENDPOINT_SERVER_TIMEOUT: "1200"
# SM_NUM_GPUS: "1"
# MODEL_CACHE_ROOT: "/opt/ml/model"
# SAGEMAKER_ENV: "1"
# HF_MODEL_ID: "BAAI/bge-base-en-v1.5"
# MAX_INPUT_LENGTH: "4095"
# MAX_TOTAL_TOKENS: "4096"
# # SAGEMAKER_MODEL_SERVER_WORKERS: "2"

report:
latency_budget: 1
cost_per_10k_txn_budget: 20
error_rate_budget: 0
per_inference_request_file: per_inference_request_results.csv
all_metrics_file: all_metrics.csv
txn_count_for_showing_cost: 10000
v_shift_w_single_instance: 0.025
v_shift_w_gt_one_instance: 0.025
154 changes: 154 additions & 0 deletions configs/embeddings/bge-base-en-v1-5-g5-embeddings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
general:
name: "bge-base-en-v1-5"
model_name: "bge-base-en-v1-5"

# AWS and SageMaker settings
aws:
region: {region}
# uncomment and set the Role ARN if not running on sagemaker
sagemaker_execution_role: {role_arn}
## these are the buckets/resources you will create in your account below:
bucket: {write_bucket} ## add the name of your desired bucket

## WRITE BUCKET -- Write the results, data, metrics, endpoint.json and payloads to this bucket directory
dir_paths:
data_prefix: data ## add the prefix for all your data management/storage
prompts_prefix: prompts
all_prompts_file: all_prompts.csv
metrics_dir: metrics
models_dir: models
metadata_dir: metadata ## add a file here to dynamically track the metrics dir

## READ BUCKET -- Represents the section to read from scripts, source data and tokenizer for a separate s3 bucket for read/write segregation
s3_read_data:
read_bucket: {read_bucket}
scripts_prefix: scripts ## add your own scripts in case you are using anything that is not on jumpstart
script_files:
- hf_token.txt ## add your scripts files you have in s3 (including inference files, serving stacks, if any)
configs_prefix: configs
config_files:
- pricing.yml # mention the name of the config files that you want to be downloaded from s3 into the configs directory locally
source_data_prefix: source_data ## Add a source_data folder to store your raw data in an s3 path configured by you
source_data_files:
# - rajpurkar/squad_v2.jsonl
- just_text.jsonl
tokenizer_prefix: llama3_tokenizer ## add the tokenizer.json and config.json from your specific tokenizer type
prompt_template_dir: prompt_template
prompt_template_file: prompt_template_bert.txt ## add your desired prompt template type

## section that enables container to run notebooks and python scripts automatically
run_steps:
0_setup.ipynb: yes
1_generate_data.ipynb: yes
2_deploy_model.ipynb: yes
3_run_inference.ipynb: yes
4_model_metric_analysis.ipynb: yes
5_cleanup.ipynb: yes


datasets:
prompt_template_keys:
- text
filters:
- language: en
min_length_in_tokens: 1
max_length_in_tokens: 50
payload_file: payload_en_1-50.jsonl
- language: en
min_length_in_tokens: 50
max_length_in_tokens: 100
payload_file: payload_en_50-100.jsonl
- language: en
min_length_in_tokens: 100
max_length_in_tokens: 150
payload_file: payload_en_100-150.jsonl


metrics:
dataset_of_interest: en_50-100

pricing: pricing.yml

inference_parameters:
embedding:
top_p: 0.92

experiments:
- name: bge-base-en-v1-5-g5.xl
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
model_version: "*"
model_name: bge-base-en-v1.5
ep_name: bge-base-en-v1-5
instance_type: "ml.g5.xlarge"
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
deploy: yes
instance_count: 1
deployment_script: jumpstart.py
inference_script: sagemaker_predictor.py
inference_spec:
parameter_set: embedding
container_type: huggingface
payload_files:
- payload_en_1-50.jsonl
- payload_en_50-100.jsonl
- payload_en_100-150.jsonl
concurrency_levels:
- 1
- 2
- 3
- 4
- 6
- 8
- 10
accept_eula: false
env:
SAGEMAKER_PROGRAM: "inference.py"
ENDPOINT_SERVER_TIMEOUT: "1200"

- name: bge-base-en-v1-5-g5.2xl
model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
model_version: "*"
model_name: bge-base-en-v1.5
ep_name: bge-base-en-v1-5
instance_type: "ml.g5.2xlarge"
image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
deploy: yes
instance_count: 1
deployment_script: jumpstart.py
inference_script: sagemaker_predictor.py
inference_spec:
parameter_set: embedding
container_type: huggingface
payload_files:
- payload_en_1-50.jsonl
- payload_en_50-100.jsonl
- payload_en_100-150.jsonl
concurrency_levels:
- 1
- 2
- 3
- 4
- 6
- 8
- 10
accept_eula: false
env:
SAGEMAKER_PROGRAM: "inference.py"
ENDPOINT_SERVER_TIMEOUT: "1200"
# SM_NUM_GPUS: "1"
# MODEL_CACHE_ROOT: "/opt/ml/model"
# SAGEMAKER_ENV: "1"
# HF_MODEL_ID: "BAAI/bge-base-en-v1.5"
# MAX_INPUT_LENGTH: "4095"
# MAX_TOTAL_TOKENS: "4096"
# # SAGEMAKER_MODEL_SERVER_WORKERS: "2"

report:
latency_budget: 1
cost_per_10k_txn_budget: 20
error_rate_budget: 0
per_inference_request_file: per_inference_request_results.csv
all_metrics_file: all_metrics.csv
txn_count_for_showing_cost: 10000
v_shift_w_single_instance: 0.025
v_shift_w_gt_one_instance: 0.025
Loading

0 comments on commit 01c0f92

Please sign in to comment.