Deployed cdd347f with MkDocs version: 1.6.1

aws-samples · Nov 6, 2024 · 01c0f92 · 01c0f92
1 parent 2fde31d
commit 01c0f92
Show file tree

Hide file tree

Showing 16 changed files with 846 additions and 49 deletions.
diff --git a/configs/embeddings/bge-base-en-v1-5-c5-embeddings.yml b/configs/embeddings/bge-base-en-v1-5-c5-embeddings.yml
@@ -0,0 +1,186 @@
+general:
+  name: "bge-base-en-v1-5"      
+  model_name: "bge-base-en-v1-5"
+
+# AWS and SageMaker settings
+aws:
+  region: {region}
+  # uncomment and set the Role ARN if not running on sagemaker
+  sagemaker_execution_role: {role_arn}
+  ## these are the buckets/resources you will create in your account below:
+  bucket: {write_bucket} ## add the name of your desired bucket
+
+## WRITE BUCKET -- Write the results, data, metrics, endpoint.json and payloads to this bucket directory
+dir_paths:
+    data_prefix: data ## add the prefix for all your data management/storage
+    prompts_prefix: prompts
+    all_prompts_file: all_prompts.csv
+    metrics_dir: metrics
+    models_dir: models
+    metadata_dir: metadata ## add a file here to dynamically track the metrics dir
+
+## READ BUCKET -- Represents the section to read from scripts, source data and tokenizer for a separate s3 bucket for read/write segregation
+s3_read_data:
+    read_bucket: {read_bucket}
+    scripts_prefix: scripts ## add your own scripts in case you are using anything that is not on jumpstart
+    script_files:
+    - hf_token.txt ## add your scripts files you have in s3 (including inference files, serving stacks, if any)
+    configs_prefix: configs
+    config_files:
+    - pricing.yml # mention the name of the config files that you want to be downloaded from s3 into the configs directory locally
+    source_data_prefix: source_data  ## Add a source_data folder to store your raw data in an s3 path configured by you
+    source_data_files:
+    # - rajpurkar/squad_v2.jsonl
+    - just_text.jsonl
+    tokenizer_prefix: llama3_tokenizer ## add the tokenizer.json and config.json from your specific tokenizer type
+    prompt_template_dir: prompt_template
+    prompt_template_file: prompt_template_bert.txt ## add your desired prompt template type
+
+## section that enables container to run notebooks and python scripts automatically 
+run_steps:
+    0_setup.ipynb: yes
+    1_generate_data.ipynb: yes
+    2_deploy_model.ipynb: yes
+    3_run_inference.ipynb: yes
+    4_model_metric_analysis.ipynb: yes
+    5_cleanup.ipynb: yes
+
+
+datasets:
+  prompt_template_keys:
+  - text
+  filters:
+  - language: en    
+    min_length_in_tokens: 1
+    max_length_in_tokens: 50
+    payload_file: payload_en_1-50.jsonl
+  - language: en    
+    min_length_in_tokens: 50
+    max_length_in_tokens: 100
+    payload_file: payload_en_50-100.jsonl
+  - language: en    
+    min_length_in_tokens: 100
+    max_length_in_tokens: 150
+    payload_file: payload_en_100-150.jsonl
+
+
+metrics:
+  dataset_of_interest: en_50-100
+
+pricing: pricing.yml
+
+inference_parameters:
+  embedding:
+    top_p: 0.92
+
+experiments:
+  - name: bge-base-en-v1-5-c5.xl
+    model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
+    model_version: "*"
+    model_name: bge-base-en-v1.5
+    ep_name: bge-base-en-v1-5
+    instance_type: "ml.c5.xlarge"
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
+    deploy: yes
+    instance_count: 1
+    deployment_script: jumpstart.py
+    inference_script: sagemaker_predictor.py
+    inference_spec:
+      parameter_set: embedding
+      container_type: huggingface
+    payload_files:
+    - payload_en_1-50.jsonl
+    - payload_en_50-100.jsonl
+    - payload_en_100-150.jsonl
+    concurrency_levels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 6
+    - 8
+    - 10
+    accept_eula: false
+    env:
+      SAGEMAKER_PROGRAM: "inference.py"
+      ENDPOINT_SERVER_TIMEOUT: "1200"
+
+  - name: bge-base-en-v1-5-c5.2xl
+    model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
+    model_version: "*"
+    model_name: bge-base-en-v1.5
+    ep_name: bge-base-en-v1-5
+    instance_type: "ml.c5.2xlarge"
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
+    deploy: yes
+    instance_count: 1
+    deployment_script: jumpstart.py
+    inference_script: sagemaker_predictor.py
+    inference_spec:
+      parameter_set: embedding
+      container_type: huggingface
+    payload_files:
+    - payload_en_1-50.jsonl
+    - payload_en_50-100.jsonl
+    - payload_en_100-150.jsonl
+    concurrency_levels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 6
+    - 8
+    - 10
+    accept_eula: false
+    env:
+      SAGEMAKER_PROGRAM: "inference.py"
+      ENDPOINT_SERVER_TIMEOUT: "1200"
+
+  - name: bge-base-en-v1-5-c5.4xl
+    model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
+    model_version: "*"
+    model_name: bge-base-en-v1.5
+    ep_name: bge-base-en-v1-5
+    instance_type: "ml.c5.4xlarge"
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
+    deploy: yes
+    instance_count: 1
+    deployment_script: jumpstart.py
+    inference_script: sagemaker_predictor.py
+    inference_spec:
+      parameter_set: embedding
+      container_type: huggingface
+    payload_files:
+    - payload_en_1-50.jsonl
+    - payload_en_50-100.jsonl
+    - payload_en_100-150.jsonl
+    concurrency_levels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 6
+    - 8
+    - 10
+
+    accept_eula: false
+    env:
+      SAGEMAKER_PROGRAM: "inference.py"
+      ENDPOINT_SERVER_TIMEOUT: "1200"
+      # SM_NUM_GPUS: "1"
+      # MODEL_CACHE_ROOT: "/opt/ml/model"
+      # SAGEMAKER_ENV: "1"
+      # HF_MODEL_ID: "BAAI/bge-base-en-v1.5"
+      # MAX_INPUT_LENGTH: "4095"
+      # MAX_TOTAL_TOKENS: "4096"
+    #   # SAGEMAKER_MODEL_SERVER_WORKERS: "2"
+
+report:
+  latency_budget: 1
+  cost_per_10k_txn_budget: 20
+  error_rate_budget: 0
+  per_inference_request_file: per_inference_request_results.csv
+  all_metrics_file: all_metrics.csv
+  txn_count_for_showing_cost: 10000
+  v_shift_w_single_instance: 0.025
+  v_shift_w_gt_one_instance: 0.025
diff --git a/configs/embeddings/bge-base-en-v1-5-g5-embeddings.yml b/configs/embeddings/bge-base-en-v1-5-g5-embeddings.yml
@@ -0,0 +1,154 @@
+general:
+  name: "bge-base-en-v1-5"      
+  model_name: "bge-base-en-v1-5"
+
+# AWS and SageMaker settings
+aws:
+  region: {region}
+  # uncomment and set the Role ARN if not running on sagemaker
+  sagemaker_execution_role: {role_arn}
+  ## these are the buckets/resources you will create in your account below:
+  bucket: {write_bucket} ## add the name of your desired bucket
+
+## WRITE BUCKET -- Write the results, data, metrics, endpoint.json and payloads to this bucket directory
+dir_paths:
+    data_prefix: data ## add the prefix for all your data management/storage
+    prompts_prefix: prompts
+    all_prompts_file: all_prompts.csv
+    metrics_dir: metrics
+    models_dir: models
+    metadata_dir: metadata ## add a file here to dynamically track the metrics dir
+
+## READ BUCKET -- Represents the section to read from scripts, source data and tokenizer for a separate s3 bucket for read/write segregation
+s3_read_data:
+    read_bucket: {read_bucket}
+    scripts_prefix: scripts ## add your own scripts in case you are using anything that is not on jumpstart
+    script_files:
+    - hf_token.txt ## add your scripts files you have in s3 (including inference files, serving stacks, if any)
+    configs_prefix: configs
+    config_files:
+    - pricing.yml # mention the name of the config files that you want to be downloaded from s3 into the configs directory locally
+    source_data_prefix: source_data  ## Add a source_data folder to store your raw data in an s3 path configured by you
+    source_data_files:
+    # - rajpurkar/squad_v2.jsonl
+    - just_text.jsonl
+    tokenizer_prefix: llama3_tokenizer ## add the tokenizer.json and config.json from your specific tokenizer type
+    prompt_template_dir: prompt_template
+    prompt_template_file: prompt_template_bert.txt ## add your desired prompt template type
+
+## section that enables container to run notebooks and python scripts automatically 
+run_steps:
+    0_setup.ipynb: yes
+    1_generate_data.ipynb: yes
+    2_deploy_model.ipynb: yes
+    3_run_inference.ipynb: yes
+    4_model_metric_analysis.ipynb: yes
+    5_cleanup.ipynb: yes
+
+
+datasets:
+  prompt_template_keys:
+  - text
+  filters:
+  - language: en    
+    min_length_in_tokens: 1
+    max_length_in_tokens: 50
+    payload_file: payload_en_1-50.jsonl
+  - language: en    
+    min_length_in_tokens: 50
+    max_length_in_tokens: 100
+    payload_file: payload_en_50-100.jsonl
+  - language: en    
+    min_length_in_tokens: 100
+    max_length_in_tokens: 150
+    payload_file: payload_en_100-150.jsonl
+
+
+metrics:
+  dataset_of_interest: en_50-100
+
+pricing: pricing.yml
+
+inference_parameters:
+  embedding:
+    top_p: 0.92
+
+experiments:
+  - name: bge-base-en-v1-5-g5.xl
+    model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
+    model_version: "*"
+    model_name: bge-base-en-v1.5
+    ep_name: bge-base-en-v1-5
+    instance_type: "ml.g5.xlarge"
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
+    deploy: yes
+    instance_count: 1
+    deployment_script: jumpstart.py
+    inference_script: sagemaker_predictor.py
+    inference_spec:
+      parameter_set: embedding
+      container_type: huggingface
+    payload_files:
+    - payload_en_1-50.jsonl
+    - payload_en_50-100.jsonl
+    - payload_en_100-150.jsonl
+    concurrency_levels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 6
+    - 8
+    - 10
+    accept_eula: false
+    env:
+      SAGEMAKER_PROGRAM: "inference.py"
+      ENDPOINT_SERVER_TIMEOUT: "1200"
+
+  - name: bge-base-en-v1-5-g5.2xl
+    model_id: huggingface-sentencesimilarity-bge-base-en-v1-5
+    model_version: "*"
+    model_name: bge-base-en-v1.5
+    ep_name: bge-base-en-v1-5
+    instance_type: "ml.g5.2xlarge"
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04'
+    deploy: yes
+    instance_count: 1
+    deployment_script: jumpstart.py
+    inference_script: sagemaker_predictor.py
+    inference_spec:
+      parameter_set: embedding
+      container_type: huggingface
+    payload_files:
+    - payload_en_1-50.jsonl
+    - payload_en_50-100.jsonl
+    - payload_en_100-150.jsonl
+    concurrency_levels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 6
+    - 8
+    - 10
+    accept_eula: false
+    env:
+      SAGEMAKER_PROGRAM: "inference.py"
+      ENDPOINT_SERVER_TIMEOUT: "1200"
+      # SM_NUM_GPUS: "1"
+      # MODEL_CACHE_ROOT: "/opt/ml/model"
+      # SAGEMAKER_ENV: "1"
+      # HF_MODEL_ID: "BAAI/bge-base-en-v1.5"
+      # MAX_INPUT_LENGTH: "4095"
+      # MAX_TOTAL_TOKENS: "4096"
+    #   # SAGEMAKER_MODEL_SERVER_WORKERS: "2"
+
+report:
+  latency_budget: 1
+  cost_per_10k_txn_budget: 20
+  error_rate_budget: 0
+  per_inference_request_file: per_inference_request_results.csv
+  all_metrics_file: all_metrics.csv
+  txn_count_for_showing_cost: 10000
+  v_shift_w_single_instance: 0.025
+  v_shift_w_gt_one_instance: 0.025