diff --git a/recipes/3p_integrations/README.md b/3p-integrations/README.md similarity index 100% rename from recipes/3p_integrations/README.md rename to 3p-integrations/README.md diff --git a/recipes/3p_integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb b/3p-integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb similarity index 100% rename from recipes/3p_integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb rename to 3p-integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb diff --git a/recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb b/3p-integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb similarity index 100% rename from recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb rename to 3p-integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb diff --git a/recipes/3p_integrations/aws/react_llama_3_bedrock_wk.ipynb b/3p-integrations/aws/react_llama_3_bedrock_wk.ipynb similarity index 100% rename from recipes/3p_integrations/aws/react_llama_3_bedrock_wk.ipynb rename to 3p-integrations/aws/react_llama_3_bedrock_wk.ipynb diff --git a/recipes/3p_integrations/azure/Azure MaaS/azure_api_example.ipynb b/3p-integrations/azure/Azure MaaS/azure_api_example.ipynb similarity index 100% rename from recipes/3p_integrations/azure/Azure MaaS/azure_api_example.ipynb rename to 3p-integrations/azure/Azure MaaS/azure_api_example.ipynb diff --git a/recipes/3p_integrations/azure/README.md b/3p-integrations/azure/README.md similarity index 100% rename from recipes/3p_integrations/azure/README.md rename to 3p-integrations/azure/README.md diff --git a/recipes/3p_integrations/crusoe/README.md b/3p-integrations/crusoe/README.md similarity index 100% rename from recipes/3p_integrations/crusoe/README.md rename to 3p-integrations/crusoe/README.md diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/README.md b/3p-integrations/crusoe/vllm-fp8/README.md similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/README.md rename to 3p-integrations/crusoe/vllm-fp8/README.md diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png b/3p-integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png rename to 3p-integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png b/3p-integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png rename to 3p-integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py b/3p-integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py rename to 3p-integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py b/3p-integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py rename to 3p-integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt b/3p-integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt rename to 3p-integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py b/3p-integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py rename to 3p-integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/main.tf b/3p-integrations/crusoe/vllm-fp8/main.tf similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/main.tf rename to 3p-integrations/crusoe/vllm-fp8/main.tf diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/plot.py b/3p-integrations/crusoe/vllm-fp8/plot.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/plot.py rename to 3p-integrations/crusoe/vllm-fp8/plot.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/pyproject.toml b/3p-integrations/crusoe/vllm-fp8/pyproject.toml similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/pyproject.toml rename to 3p-integrations/crusoe/vllm-fp8/pyproject.toml diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/run_benchmark.sh b/3p-integrations/crusoe/vllm-fp8/run_benchmark.sh similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/run_benchmark.sh rename to 3p-integrations/crusoe/vllm-fp8/run_benchmark.sh diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb b/3p-integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb rename to 3p-integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb b/3p-integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb rename to 3p-integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt b/3p-integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt rename to 3p-integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv b/3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv rename to 3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb b/3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb rename to 3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb diff --git a/recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md b/3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md rename to 3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py b/3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py rename to 3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt b/3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt rename to 3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/crewai-agents/README.md b/3p-integrations/groq/groq-example-templates/crewai-agents/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/crewai-agents/README.md rename to 3p-integrations/groq/groq-example-templates/crewai-agents/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/crewai-agents/main.py b/3p-integrations/groq/groq-example-templates/crewai-agents/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/crewai-agents/main.py rename to 3p-integrations/groq/groq-example-templates/crewai-agents/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/crewai-agents/requirements.txt b/3p-integrations/groq/groq-example-templates/crewai-agents/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/crewai-agents/requirements.txt rename to 3p-integrations/groq/groq-example-templates/crewai-agents/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md b/3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md rename to 3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py b/3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py rename to 3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt b/3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt rename to 3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md b/3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md rename to 3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py b/3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py rename to 3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt b/3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt rename to 3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md b/3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md rename to 3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py b/3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py rename to 3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt b/3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt rename to 3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md b/3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md rename to 3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py b/3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py rename to 3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt b/3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt rename to 3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/README.md b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/README.md rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/main.py b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/main.py rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml diff --git a/recipes/3p_integrations/groq/llama3_cookbook_groq.ipynb b/3p-integrations/groq/llama3_cookbook_groq.ipynb similarity index 100% rename from recipes/3p_integrations/groq/llama3_cookbook_groq.ipynb rename to 3p-integrations/groq/llama3_cookbook_groq.ipynb diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/README.md b/3p-integrations/lamini/text2sql_memory_tuning/README.md similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/README.md rename to 3p-integrations/lamini/text2sql_memory_tuning/README.md diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png b/3p-integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png rename to 3p-integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/website.png b/3p-integrations/lamini/text2sql_memory_tuning/assets/website.png similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/website.png rename to 3p-integrations/lamini/text2sql_memory_tuning/assets/website.png diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb b/3p-integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb rename to 3p-integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/nba_roster.db b/3p-integrations/lamini/text2sql_memory_tuning/nba_roster.db similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/nba_roster.db rename to 3p-integrations/lamini/text2sql_memory_tuning/nba_roster.db diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py b/3p-integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_rubric.py b/3p-integrations/lamini/text2sql_memory_tuning/util/get_rubric.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_rubric.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/get_rubric.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_schema.py b/3p-integrations/lamini/text2sql_memory_tuning/util/get_schema.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_schema.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/get_schema.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/load_dataset.py b/3p-integrations/lamini/text2sql_memory_tuning/util/load_dataset.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/load_dataset.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/load_dataset.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py b/3p-integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py b/3p-integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/setup_logging.py b/3p-integrations/lamini/text2sql_memory_tuning/util/setup_logging.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/setup_logging.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/setup_logging.py diff --git a/recipes/3p_integrations/langchain/README.md b/3p-integrations/langchain/README.md similarity index 100% rename from recipes/3p_integrations/langchain/README.md rename to 3p-integrations/langchain/README.md diff --git a/recipes/3p_integrations/langchain/langgraph_rag_agent.ipynb b/3p-integrations/langchain/langgraph_rag_agent.ipynb similarity index 100% rename from recipes/3p_integrations/langchain/langgraph_rag_agent.ipynb rename to 3p-integrations/langchain/langgraph_rag_agent.ipynb diff --git a/recipes/3p_integrations/langchain/langgraph_rag_agent_local.ipynb b/3p-integrations/langchain/langgraph_rag_agent_local.ipynb similarity index 100% rename from recipes/3p_integrations/langchain/langgraph_rag_agent_local.ipynb rename to 3p-integrations/langchain/langgraph_rag_agent_local.ipynb diff --git a/recipes/3p_integrations/langchain/langgraph_tool_calling_agent.ipynb b/3p-integrations/langchain/langgraph_tool_calling_agent.ipynb similarity index 100% rename from recipes/3p_integrations/langchain/langgraph_tool_calling_agent.ipynb rename to 3p-integrations/langchain/langgraph_tool_calling_agent.ipynb diff --git a/recipes/3p_integrations/llama_on_prem.md b/3p-integrations/llama_on_prem.md similarity index 100% rename from recipes/3p_integrations/llama_on_prem.md rename to 3p-integrations/llama_on_prem.md diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb b/3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb rename to 3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb b/3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb rename to 3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb b/3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb rename to 3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md b/3p-integrations/llamaindex/dlai_agentic_rag/README.md similarity index 75% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md rename to 3p-integrations/llamaindex/dlai_agentic_rag/README.md index ea9c3d849..39ea05f5d 100644 --- a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md +++ b/3p-integrations/llamaindex/dlai_agentic_rag/README.md @@ -2,7 +2,7 @@ The folder here contains the Llama 3 ported notebooks of the DLAI short course [Building Agentic RAG with Llamaindex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/). -1. [Building Agentic RAG with Llamaindex L1 Router Engine](../../../quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb) shows how to implement a simple agentic RAG, a router that will pick up one of several query tools (question answering or summarization) to execute a query on a single document. Note this notebook is located in the `quickstart` folder. +1. [Building Agentic RAG with Llamaindex L1 Router Engine](../../../end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb) shows how to implement a simple agentic RAG, a router that will pick up one of several query tools (question answering or summarization) to execute a query on a single document. Note this notebook is located in the `quickstart` folder. 2. [Building Agentic RAG with Llamaindex L2 Tool Calling](Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb) shows how to use Llama 3 to not only pick a function to execute, but also infer an argument to pass through the function. diff --git a/recipes/3p_integrations/llamaindex/llamaindex_cookbook.ipynb b/3p-integrations/llamaindex/llamaindex_cookbook.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/llamaindex_cookbook.ipynb rename to 3p-integrations/llamaindex/llamaindex_cookbook.ipynb diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/README.md b/3p-integrations/modal/many-llamas-human-eval/README.md similarity index 94% rename from recipes/3p_integrations/modal/many-llamas-human-eval/README.md rename to 3p-integrations/modal/many-llamas-human-eval/README.md index 1c3c1b661..342949e92 100644 --- a/recipes/3p_integrations/modal/many-llamas-human-eval/README.md +++ b/3p-integrations/modal/many-llamas-human-eval/README.md @@ -12,7 +12,7 @@ This experiment built by the team at [Modal](https://modal.com), and is describe [Beat GPT-4o at Python by searching with 100 small Llamas](https://modal.com/blog/llama-human-eval) -The experiment has since been upgraded to use the [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) model, and runnable end-to-end using the Modal serverless platform. +The experiment has since been upgraded to use the [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) model, and run end-to-end using the Modal serverless platform. ## Run it yourself @@ -55,7 +55,7 @@ This will execute: 5. Generating graphs of pass@k and fail@k ### Results - + The resulting plots of the evals will be saved locally to: - `/tmp/plot-pass-k.jpeg` - `/tmp/plot-fail-k.jpeg` @@ -69,3 +69,4 @@ You'll see that at 100 generations, the Llama model is able to perform on-par wi `/tmp/plot-fail-k.jpeg` shows fail@k across a log-scale, showing smooth scaling of this method. ![plot-fail-k](https://github.com/user-attachments/assets/7286e4ff-5090-4288-bd62-8a078c6dc5a1) + diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/download.py b/3p-integrations/modal/many-llamas-human-eval/download.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/download.py rename to 3p-integrations/modal/many-llamas-human-eval/download.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/eval.py b/3p-integrations/modal/many-llamas-human-eval/eval.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/eval.py rename to 3p-integrations/modal/many-llamas-human-eval/eval.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/generate.py b/3p-integrations/modal/many-llamas-human-eval/generate.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/generate.py rename to 3p-integrations/modal/many-llamas-human-eval/generate.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/inference.py b/3p-integrations/modal/many-llamas-human-eval/inference.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/inference.py rename to 3p-integrations/modal/many-llamas-human-eval/inference.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/plot.py b/3p-integrations/modal/many-llamas-human-eval/plot.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/plot.py rename to 3p-integrations/modal/many-llamas-human-eval/plot.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/run_e2e.sh b/3p-integrations/modal/many-llamas-human-eval/run_e2e.sh similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/run_e2e.sh rename to 3p-integrations/modal/many-llamas-human-eval/run_e2e.sh diff --git a/recipes/3p_integrations/octoai/MediaGen.ipynb b/3p-integrations/octoai/MediaGen.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/MediaGen.ipynb rename to 3p-integrations/octoai/MediaGen.ipynb diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb b/3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb rename to 3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf b/3p-integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf rename to 3p-integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/requirements.txt b/3p-integrations/octoai/RAG_chatbot_example/requirements.txt similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/requirements.txt rename to 3p-integrations/octoai/RAG_chatbot_example/requirements.txt diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss b/3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss rename to 3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl b/3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl rename to 3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl diff --git a/recipes/3p_integrations/octoai/getting_to_know_llama.ipynb b/3p-integrations/octoai/getting_to_know_llama.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/getting_to_know_llama.ipynb rename to 3p-integrations/octoai/getting_to_know_llama.ipynb diff --git a/recipes/3p_integrations/octoai/hello_llama_cloud.ipynb b/3p-integrations/octoai/hello_llama_cloud.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/hello_llama_cloud.ipynb rename to 3p-integrations/octoai/hello_llama_cloud.ipynb diff --git a/recipes/3p_integrations/octoai/live_data.ipynb b/3p-integrations/octoai/live_data.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/live_data.ipynb rename to 3p-integrations/octoai/live_data.ipynb diff --git a/recipes/3p_integrations/octoai/llama2_gradio.ipynb b/3p-integrations/octoai/llama2_gradio.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/llama2_gradio.ipynb rename to 3p-integrations/octoai/llama2_gradio.ipynb diff --git a/recipes/3p_integrations/octoai/video_summary.ipynb b/3p-integrations/octoai/video_summary.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/video_summary.ipynb rename to 3p-integrations/octoai/video_summary.ipynb diff --git a/recipes/3p_integrations/tgi/README.md b/3p-integrations/tgi/README.md similarity index 100% rename from recipes/3p_integrations/tgi/README.md rename to 3p-integrations/tgi/README.md diff --git a/recipes/3p_integrations/tgi/merge_lora_weights.py b/3p-integrations/tgi/merge_lora_weights.py similarity index 100% rename from recipes/3p_integrations/tgi/merge_lora_weights.py rename to 3p-integrations/tgi/merge_lora_weights.py diff --git a/recipes/3p_integrations/togetherai/README.md b/3p-integrations/togetherai/README.md similarity index 100% rename from recipes/3p_integrations/togetherai/README.md rename to 3p-integrations/togetherai/README.md diff --git a/recipes/3p_integrations/togetherai/datasets/movies.json b/3p-integrations/togetherai/datasets/movies.json similarity index 100% rename from recipes/3p_integrations/togetherai/datasets/movies.json rename to 3p-integrations/togetherai/datasets/movies.json diff --git a/recipes/3p_integrations/togetherai/images/BERTScore.png b/3p-integrations/togetherai/images/BERTScore.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/BERTScore.png rename to 3p-integrations/togetherai/images/BERTScore.png diff --git a/recipes/3p_integrations/togetherai/images/CoQA.png b/3p-integrations/togetherai/images/CoQA.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/CoQA.png rename to 3p-integrations/togetherai/images/CoQA.png diff --git a/recipes/3p_integrations/togetherai/images/ColPaliMaxSim-1.png b/3p-integrations/togetherai/images/ColPaliMaxSim-1.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/ColPaliMaxSim-1.png rename to 3p-integrations/togetherai/images/ColPaliMaxSim-1.png diff --git a/recipes/3p_integrations/togetherai/images/Nvidia_collage.png b/3p-integrations/togetherai/images/Nvidia_collage.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/Nvidia_collage.png rename to 3p-integrations/togetherai/images/Nvidia_collage.png diff --git a/recipes/3p_integrations/togetherai/images/UMAP.png b/3p-integrations/togetherai/images/UMAP.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/UMAP.png rename to 3p-integrations/togetherai/images/UMAP.png diff --git a/recipes/3p_integrations/togetherai/images/cRAG.png b/3p-integrations/togetherai/images/cRAG.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cRAG.png rename to 3p-integrations/togetherai/images/cRAG.png diff --git a/recipes/3p_integrations/togetherai/images/cRAG_indexing.png b/3p-integrations/togetherai/images/cRAG_indexing.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cRAG_indexing.png rename to 3p-integrations/togetherai/images/cRAG_indexing.png diff --git a/recipes/3p_integrations/togetherai/images/cRAG_querytime.png b/3p-integrations/togetherai/images/cRAG_querytime.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cRAG_querytime.png rename to 3p-integrations/togetherai/images/cRAG_querytime.png diff --git a/recipes/3p_integrations/togetherai/images/cluster.png b/3p-integrations/togetherai/images/cluster.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cluster.png rename to 3p-integrations/togetherai/images/cluster.png diff --git a/recipes/3p_integrations/togetherai/images/colpali_arch.png b/3p-integrations/togetherai/images/colpali_arch.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/colpali_arch.png rename to 3p-integrations/togetherai/images/colpali_arch.png diff --git a/recipes/3p_integrations/togetherai/images/conversation.png b/3p-integrations/togetherai/images/conversation.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/conversation.png rename to 3p-integrations/togetherai/images/conversation.png diff --git a/recipes/3p_integrations/togetherai/images/deploy_CFT.png b/3p-integrations/togetherai/images/deploy_CFT.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/deploy_CFT.png rename to 3p-integrations/togetherai/images/deploy_CFT.png diff --git a/recipes/3p_integrations/togetherai/images/ft_model.png b/3p-integrations/togetherai/images/ft_model.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/ft_model.png rename to 3p-integrations/togetherai/images/ft_model.png diff --git a/recipes/3p_integrations/togetherai/images/mmrag_only.png b/3p-integrations/togetherai/images/mmrag_only.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/mmrag_only.png rename to 3p-integrations/togetherai/images/mmrag_only.png diff --git a/recipes/3p_integrations/togetherai/images/page_25.png b/3p-integrations/togetherai/images/page_25.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/page_25.png rename to 3p-integrations/togetherai/images/page_25.png diff --git a/recipes/3p_integrations/togetherai/images/repetition_task.png b/3p-integrations/togetherai/images/repetition_task.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/repetition_task.png rename to 3p-integrations/togetherai/images/repetition_task.png diff --git a/recipes/3p_integrations/togetherai/images/reranking.png b/3p-integrations/togetherai/images/reranking.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/reranking.png rename to 3p-integrations/togetherai/images/reranking.png diff --git a/recipes/3p_integrations/togetherai/images/semantic_search.png b/3p-integrations/togetherai/images/semantic_search.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/semantic_search.png rename to 3p-integrations/togetherai/images/semantic_search.png diff --git a/recipes/3p_integrations/togetherai/images/simple_RAG.png b/3p-integrations/togetherai/images/simple_RAG.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/simple_RAG.png rename to 3p-integrations/togetherai/images/simple_RAG.png diff --git a/recipes/3p_integrations/togetherai/images/structured_text_image.png b/3p-integrations/togetherai/images/structured_text_image.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/structured_text_image.png rename to 3p-integrations/togetherai/images/structured_text_image.png diff --git a/recipes/3p_integrations/togetherai/images/summarization.png b/3p-integrations/togetherai/images/summarization.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/summarization.png rename to 3p-integrations/togetherai/images/summarization.png diff --git a/recipes/3p_integrations/togetherai/images/summary_task.png b/3p-integrations/togetherai/images/summary_task.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/summary_task.png rename to 3p-integrations/togetherai/images/summary_task.png diff --git a/recipes/3p_integrations/togetherai/images/text_RAG.png b/3p-integrations/togetherai/images/text_RAG.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/text_RAG.png rename to 3p-integrations/togetherai/images/text_RAG.png diff --git a/recipes/3p_integrations/togetherai/images/together-color.jpg b/3p-integrations/togetherai/images/together-color.jpg similarity index 100% rename from recipes/3p_integrations/togetherai/images/together-color.jpg rename to 3p-integrations/togetherai/images/together-color.jpg diff --git a/recipes/3p_integrations/togetherai/images/together.gif b/3p-integrations/togetherai/images/together.gif similarity index 100% rename from recipes/3p_integrations/togetherai/images/together.gif rename to 3p-integrations/togetherai/images/together.gif diff --git a/recipes/3p_integrations/togetherai/images/wandb_model.png b/3p-integrations/togetherai/images/wandb_model.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/wandb_model.png rename to 3p-integrations/togetherai/images/wandb_model.png diff --git a/recipes/3p_integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb b/3p-integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb rename to 3p-integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb diff --git a/recipes/3p_integrations/togetherai/llama_contextual_RAG.ipynb b/3p-integrations/togetherai/llama_contextual_RAG.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/llama_contextual_RAG.ipynb rename to 3p-integrations/togetherai/llama_contextual_RAG.ipynb diff --git a/recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb b/3p-integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb rename to 3p-integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb diff --git a/recipes/3p_integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb b/3p-integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb rename to 3p-integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb diff --git a/recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb b/3p-integrations/togetherai/structured_text_extraction_from_images.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb rename to 3p-integrations/togetherai/structured_text_extraction_from_images.ipynb diff --git a/recipes/3p_integrations/togetherai/text_RAG_using_llama_on_together.ipynb b/3p-integrations/togetherai/text_RAG_using_llama_on_together.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/text_RAG_using_llama_on_together.ipynb rename to 3p-integrations/togetherai/text_RAG_using_llama_on_together.ipynb diff --git a/recipes/3p_integrations/using_externally_hosted_llms.ipynb b/3p-integrations/using_externally_hosted_llms.ipynb similarity index 100% rename from recipes/3p_integrations/using_externally_hosted_llms.ipynb rename to 3p-integrations/using_externally_hosted_llms.ipynb diff --git a/recipes/3p_integrations/vllm/README.md b/3p-integrations/vllm/README.md similarity index 100% rename from recipes/3p_integrations/vllm/README.md rename to 3p-integrations/vllm/README.md diff --git a/recipes/3p_integrations/vllm/inference.py b/3p-integrations/vllm/inference.py similarity index 100% rename from recipes/3p_integrations/vllm/inference.py rename to 3p-integrations/vllm/inference.py diff --git a/README.md b/README.md index 38aaf5846..da232b526 100644 --- a/README.md +++ b/README.md @@ -1,169 +1,50 @@ -# Llama Recipes: Examples to get started using the Llama models from Meta - -The 'llama-recipes' repository is a companion to the [Meta Llama](https://github.com/meta-llama/llama-models) models. We support the latest version, [Llama 3.2 Vision](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md) and [Llama 3.2 Text](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md), in this repository. This repository contains example scripts and notebooks to get started with the models in a variety of use-cases, including fine-tuning for domain adaptation and building LLM-based applications with Llama and other tools in the LLM ecosystem. The examples here use Llama locally, in the cloud, and on-prem. +# Llama Cookbook: The Official Guide to building with Llama Models + +Welcome to the official repository for helping you get started with [inference](./getting-started/inference/), [fine-tuning](./getting-started/finetuning) and [end-to-end use-cases](./end-to-end-use-cases) of building with the Llama Model family. + +This repository covers the most popular community approaches, use-cases and the latest recipes for Llama Text and Vision models. > [!TIP] -> Get started with Llama 3.2 with these new recipes: -> * [Finetune Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/finetuning/finetune_vision_model.md) -> * [Multimodal Inference with Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) +> Popular getting started links: +> * [Build with Llama Tutorial](./getting-started/build_with_Llama_3_2.ipynb) +> * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) +> * [Inferencing using Llama Guard (Safety Model)](./end-to-end-use-cases/responsible_ai/llama_guard/) +> [!TIP] +> Popular end to end recipes: +> * [Email Agent](./end-to-end-use-cases/email_agent/) +> * [NotebookLlama](./end-to-end-use-cases/NotebookLlama/) +> * [Text to SQL](./end-to-end-use-cases/coding/text2sql/) - -> [!NOTE] -> Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. -> -> More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). - - - -## Table of Contents - -- [Llama Recipes: Examples to get started using the Llama models from Meta](#llama-recipes-examples-to-get-started-using-the-llama-models-from-meta) - - [Table of Contents](#table-of-contents) - - [Getting Started](#getting-started) - - [Prerequisites](#prerequisites) - - [PyTorch Nightlies](#pytorch-nightlies) - - [Installing](#installing) - - [Install with pip](#install-with-pip) - - [Install with optional dependencies](#install-with-optional-dependencies) - - [Install from source](#install-from-source) - - [Getting the Llama models](#getting-the-llama-models) - - [Model conversion to Hugging Face](#model-conversion-to-hugging-face) - - [Repository Organization](#repository-organization) - - [`recipes/`](#recipes) - - [`src/`](#src) - - [Supported Features](#supported-features) - - [Contributing](#contributing) - - [License](#license) - -## Getting Started - -These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. - -### Prerequisites - -#### PyTorch Nightlies -If you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform. - -### Installing -Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source. - -> [!NOTE] -> Ensure you use the correct CUDA version (from `nvidia-smi`) when installing the PyTorch wheels. Here we are using 11.8 as `cu118`. -> H100 GPUs work better with CUDA >12.0 - -#### Install with pip -``` -pip install llama-recipes -``` - -#### Install with optional dependencies -Llama-recipes offers the installation of optional packages. There are three optional dependency groups. -To run the unit tests we can install the required dependencies with: -``` -pip install llama-recipes[tests] -``` -For the vLLM example we need additional requirements that can be installed with: -``` -pip install llama-recipes[vllm] -``` -To use the sensitive topics safety checker install with: -``` -pip install llama-recipes[auditnlg] -``` -Some recipes require the presence of langchain. To install the packages follow the recipe description or install with: -``` -pip install llama-recipes[langchain] -``` -Optional dependencies can also be combined with [option1,option2]. - -#### Install from source -To install from source e.g. for development use these commands. We're using hatchling as our build backend which requires an up-to-date pip as well as setuptools package. -``` -git clone git@github.com:meta-llama/llama-recipes.git -cd llama-recipes -pip install -U pip setuptools -pip install -e . -``` -For development and contributing to llama-recipes please install all optional dependencies: -``` -git clone git@github.com:meta-llama/llama-recipes.git -cd llama-recipes -pip install -U pip setuptools -pip install -e .[tests,auditnlg,vllm] -``` - - -### Getting the Llama models -You can find Llama models on Hugging Face hub [here](https://huggingface.co/meta-llama), **where models with `hf` in the name are already converted to Hugging Face checkpoints so no further conversion is needed**. The conversion step below is only for original model weights from Meta that are hosted on Hugging Face model hub as well. - -#### Model conversion to Hugging Face -If you have the model checkpoints downloaded from the Meta website, you can convert it to the Hugging Face format with: - -```bash -## Install Hugging Face Transformers from source -pip freeze | grep transformers ## verify it is version 4.45.0 or higher - -git clone git@github.com:huggingface/transformers.git -cd transformers -pip install protobuf -python src/transformers/models/llama/convert_llama_weights_to_hf.py \ - --input_dir /path/to/downloaded/llama/weights --model_size 3B --output_dir /output/path -``` - - - -## Repository Organization -Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. - -### `recipes/` - -Contains examples organized in folders by topic: -| Subfolder | Description | -|---|---| -[quickstart](./recipes/quickstart) | The "Hello World" of using Llama, start here if you are new to using Llama. -[use_cases](./recipes/use_cases)|Scripts showing common applications of Meta Llama3 -[3p_integrations](./recipes/3p_integrations)|Partner owned folder showing common applications of Meta Llama3 -[responsible_ai](./recipes/responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs -[experimental](./recipes/experimental)|Meta Llama implementations of experimental LLM techniques - -### `src/` - -Contains modules which support the example recipes: -| Subfolder | Description | -|---|---| -| [configs](src/llama_recipes/configs/) | Contains the configuration files for PEFT methods, FSDP, Datasets, Weights & Biases experiment tracking. | -| [datasets](src/llama_recipes/datasets/) | Contains individual scripts for each dataset to download and process. Note | -| [inference](src/llama_recipes/inference/) | Includes modules for inference for the fine-tuned models. | -| [model_checkpointing](src/llama_recipes/model_checkpointing/) | Contains FSDP checkpoint handlers. | -| [policies](src/llama_recipes/policies/) | Contains FSDP scripts to provide different policies, such as mixed precision, transformer wrapping policy and activation checkpointing along with any precision optimizer (used for running FSDP with pure bf16 mode). | -| [utils](src/llama_recipes/utils/) | Utility files for:
- `train_utils.py` provides training/eval loop and more train utils.
- `dataset_utils.py` to get preprocessed datasets.
- `config_utils.py` to override the configs received from CLI.
- `fsdp_utils.py` provides FSDP wrapping policy for PEFT methods.
- `memory_utils.py` context manager to track different memory stats in train loop. | - - -## Supported Features -The recipes and modules in this repository support the following features: - -| Feature | | -| ---------------------------------------------- | - | -| HF support for inference | ✅ | -| HF support for finetuning | ✅ | -| PEFT | ✅ | -| Deferred initialization ( meta init) | ✅ | -| Low CPU mode for multi GPU | ✅ | -| Mixed precision | ✅ | -| Single node quantization | ✅ | -| Flash attention | ✅ | -| Activation checkpointing FSDP | ✅ | -| Hybrid Sharded Data Parallel (HSDP) | ✅ | -| Dataset packing & padding | ✅ | -| BF16 Optimizer (Pure BF16) | ✅ | -| Profiling & MFU tracking | ✅ | -| Gradient accumulation | ✅ | -| CPU offloading | ✅ | -| FSDP checkpoint conversion to HF for inference | ✅ | -| W&B experiment tracker | ✅ | +> Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor + +## Repository Structure: + +- [3P Integrations](./3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers +- [End to End Use Cases](./end-to-end-use-cases): As the name suggests, spanning various domains and applications +- [Getting Started](./getting-started/): Reference for inferencing, fine-tuning and RAG examples +- [src](./src/): Contains the src for the original llama-recipes library along with some FAQs for fine-tuning. + +## FAQ: + +- Q: Prompt Template changes for Multi-Modality? + +A: Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. + +More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). + +- Q: I have some questions for Fine-Tuning, is there a section to address these? + +A: Checkout the Fine-Tuning FAQ [here](./src/docs/) + +- Q: Some links are broken/folders are missing: + +A: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor + +- Where can we find details about the latest models? + +A: Official [Llama models website](https://www.llama.com) ## Contributing diff --git a/UPDATES.md b/UPDATES.md index fcd455592..74385fe63 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -1,19 +1,24 @@ -## System Prompt Update +DIFFLOG: + +Nested Folders rename: +- /recipes/3p_integrations -> /3p-integrations +- /recipes/quickstart -> /getting-started +- /recipes/responsible_ai -> /end-to-end-use-cases/responsible_ai +- /recipes/use_cases -> /end-to-end-use-cases +- /quickstart/agents -> /end-to-end-use-cases/agents +- /quickstart/NotebookLlama -> /end-to-end-use-cases/NotebookLlama +- /quickstart/responsible_ai -> /end-to-end-use-cases/responsible_ai +- /recipes/use_cases/end-toend/RAFT-Chatbot -> /end-to-end-use-cases/RAFT-Chatbot +- /docs -> /src/docs/ +- /dev_requirements.txt -> /src/dev_requirements.txt +- /requirements.txt -> /src/requirements.txt +- /tools -> /end-to-end-use-cases/benchmarks/ +- /recipes/experimental/long_context -> /end-to-end-use-cases/long_context -### Observed Issue -We received feedback from the community on our prompt template and we are providing an update to reduce the false refusal rates seen. False refusals occur when the model incorrectly refuses to answer a question that it should, for example due to overly broad instructions to be cautious in how it provides responses. -### Updated approach -Based on evaluation and analysis, we recommend the removal of the system prompt as the default setting. Pull request [#626](https://github.com/facebookresearch/llama/pull/626) removes the system prompt as the default option, but still provides an example to help enable experimentation for those using it. - -## Token Sanitization Update - -### Observed Issue -The PyTorch scripts currently provided for tokenization and model inference allow for direct prompt injection via string concatenation. Prompt injections allow for the addition of special system and instruction prompt strings from user-provided prompts. - -As noted in the documentation, these strings are required to use the fine-tuned chat models. However, prompt injections have also been used for manipulating or abusing models by bypassing their safeguards, allowing for the creation of content or behaviors otherwise outside the bounds of acceptable use. - -### Updated approach -We recommend sanitizing [these strings](https://github.com/meta-llama/llama?tab=readme-ov-file#fine-tuned-chat-models) from any user provided prompts. Sanitization of user prompts mitigates malicious or accidental abuse of these strings. The provided scripts have been updated to do this. - -Note: even with this update safety classifiers should still be applied to catch unsafe behaviors or content produced by the model. An [example](./recipes/quickstart/inference/local_inference/inference.py) of how to deploy such a classifier can be found in the llama-recipes repository. +Removed folders: +- /flagged (Empty folder) +- /recipes/quickstart/Running_Llama3_Anywhere (Redundant code) +- /recipes/quickstart/inference/codellama (deprecated model) +- /recipes/quickstart/getting-to-know-llama-3.ipynb + diff --git a/recipes/quickstart/NotebookLlama/README.md b/end-to-end-use-cases/NotebookLlama/README.md similarity index 100% rename from recipes/quickstart/NotebookLlama/README.md rename to end-to-end-use-cases/NotebookLlama/README.md diff --git a/recipes/quickstart/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb b/end-to-end-use-cases/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb diff --git a/recipes/quickstart/NotebookLlama/Step-2-Transcript-Writer.ipynb b/end-to-end-use-cases/NotebookLlama/Step-2-Transcript-Writer.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-2-Transcript-Writer.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-2-Transcript-Writer.ipynb diff --git a/recipes/quickstart/NotebookLlama/Step-3-Re-Writer.ipynb b/end-to-end-use-cases/NotebookLlama/Step-3-Re-Writer.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-3-Re-Writer.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-3-Re-Writer.ipynb diff --git a/recipes/quickstart/NotebookLlama/Step-4-TTS-Workflow.ipynb b/end-to-end-use-cases/NotebookLlama/Step-4-TTS-Workflow.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-4-TTS-Workflow.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-4-TTS-Workflow.ipynb diff --git a/recipes/quickstart/NotebookLlama/TTS_Notes.md b/end-to-end-use-cases/NotebookLlama/TTS_Notes.md similarity index 100% rename from recipes/quickstart/NotebookLlama/TTS_Notes.md rename to end-to-end-use-cases/NotebookLlama/TTS_Notes.md diff --git a/recipes/quickstart/NotebookLlama/requirements.txt b/end-to-end-use-cases/NotebookLlama/requirements.txt similarity index 100% rename from recipes/quickstart/NotebookLlama/requirements.txt rename to end-to-end-use-cases/NotebookLlama/requirements.txt diff --git a/recipes/quickstart/NotebookLlama/resources/2402.13116v4.pdf b/end-to-end-use-cases/NotebookLlama/resources/2402.13116v4.pdf similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/2402.13116v4.pdf rename to end-to-end-use-cases/NotebookLlama/resources/2402.13116v4.pdf diff --git a/recipes/quickstart/NotebookLlama/resources/Outline.jpg b/end-to-end-use-cases/NotebookLlama/resources/Outline.jpg similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/Outline.jpg rename to end-to-end-use-cases/NotebookLlama/resources/Outline.jpg diff --git a/recipes/quickstart/NotebookLlama/resources/_podcast.mp3 b/end-to-end-use-cases/NotebookLlama/resources/_podcast.mp3 similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/_podcast.mp3 rename to end-to-end-use-cases/NotebookLlama/resources/_podcast.mp3 diff --git a/recipes/quickstart/NotebookLlama/resources/clean_extracted_text.txt b/end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/clean_extracted_text.txt rename to end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt diff --git a/recipes/quickstart/NotebookLlama/resources/data.pkl b/end-to-end-use-cases/NotebookLlama/resources/data.pkl similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/data.pkl rename to end-to-end-use-cases/NotebookLlama/resources/data.pkl diff --git a/recipes/quickstart/NotebookLlama/resources/podcast_ready_data.pkl b/end-to-end-use-cases/NotebookLlama/resources/podcast_ready_data.pkl similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/podcast_ready_data.pkl rename to end-to-end-use-cases/NotebookLlama/resources/podcast_ready_data.pkl diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/README.md b/end-to-end-use-cases/RAFT-Chatbot/README.md similarity index 98% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/README.md rename to end-to-end-use-cases/RAFT-Chatbot/README.md index 50356d509..2f5160da6 100644 --- a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/README.md +++ b/end-to-end-use-cases/RAFT-Chatbot/README.md @@ -124,7 +124,7 @@ export PATH_TO_RAFT_JSON=recipes/use_cases/end2end-recipes/raft/output/raft.json torchrun --nnodes 1 --nproc_per_node 4 recipes/quickstart/finetuning/finetuning.py --enable_fsdp --lr 1e-5 --context_length 8192 --num_epochs 1 --batch_size_training 1 --model_name meta-Llama/Meta-Llama-3-8B-Instruct --dist_checkpoint_root_folder $PATH_TO_ROOT_FOLDER --dist_checkpoint_folder fine-tuned --use_fast_kernels --dataset "custom_dataset" --custom_dataset.test_split "test" --custom_dataset.file "recipes/finetuning/datasets/raft_dataset.py" --use-wandb --run_validation True --custom_dataset.data_path $PATH_TO_RAFT_JSON ``` -For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../../quickstart/finetuning/multigpu_finetuning.md) in the finetuning recipe. +For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../getting-started/finetuning/multigpu_finetuning.md) in the finetuning recipe. Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using the following command: @@ -132,7 +132,7 @@ Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using t python src/llama_recipes/inference/checkpoint_converter_fsdp_hf.py --fsdp_checkpoint_path "$PATH_TO_ROOT_FOLDER/fine-tuned-meta-Llama/Meta-Llama-3-8B-Instruct" --consolidated_model_path "$PATH_TO_ROOT_FOLDER" ``` -For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../../quickstart/inference/local_inference/README.md) in the inference/local_inference recipe. +For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../getting-started/finetuning/multigpu_finetuning.md) in the inference/local_inference recipe. ## Evaluation Steps Once we have the RAFT model, we need to evaluate its performance. In this tutorial, we'll not only use traditional evaluation methods (e.g., calculating exact match rate or ROUGE score) but also use LLM as a judge to score model-generated answers. @@ -236,7 +236,7 @@ Once we evaluated and refined our RAFT model, we can deploy it locally to intera python recipes/inference/local_inference/inference.py --model_name raft-8b ``` -For more details,please check [local_inference recipe](../../../quickstart/inference/local_inference/README.md) +For more details,please check [local_inference recipe](../../getting-started/inference/local_inference/README.md) ## Acknowledgement diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/config.py b/end-to-end-use-cases/RAFT-Chatbot/config.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/config.py rename to end-to-end-use-cases/RAFT-Chatbot/config.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/eval_llama.json b/end-to-end-use-cases/RAFT-Chatbot/eval_llama.json similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/eval_llama.json rename to end-to-end-use-cases/RAFT-Chatbot/eval_llama.json diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/format.py b/end-to-end-use-cases/RAFT-Chatbot/format.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/format.py rename to end-to-end-use-cases/RAFT-Chatbot/format.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Answers_Precision.png b/end-to-end-use-cases/RAFT-Chatbot/images/Answers_Precision.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Answers_Precision.png rename to end-to-end-use-cases/RAFT-Chatbot/images/Answers_Precision.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/LLM_score_comparison.png b/end-to-end-use-cases/RAFT-Chatbot/images/LLM_score_comparison.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/LLM_score_comparison.png rename to end-to-end-use-cases/RAFT-Chatbot/images/LLM_score_comparison.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Num_of_refusal_comparison.png b/end-to-end-use-cases/RAFT-Chatbot/images/Num_of_refusal_comparison.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Num_of_refusal_comparison.png rename to end-to-end-use-cases/RAFT-Chatbot/images/Num_of_refusal_comparison.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/RAFT.png b/end-to-end-use-cases/RAFT-Chatbot/images/RAFT.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/RAFT.png rename to end-to-end-use-cases/RAFT-Chatbot/images/RAFT.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.py b/end-to-end-use-cases/RAFT-Chatbot/raft.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.py rename to end-to-end-use-cases/RAFT-Chatbot/raft.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.yaml b/end-to-end-use-cases/RAFT-Chatbot/raft.yaml similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.yaml rename to end-to-end-use-cases/RAFT-Chatbot/raft.yaml diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py b/end-to-end-use-cases/RAFT-Chatbot/raft_eval.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py rename to end-to-end-use-cases/RAFT-Chatbot/raft_eval.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml b/end-to-end-use-cases/RAFT-Chatbot/raft_eval_config.yaml similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml rename to end-to-end-use-cases/RAFT-Chatbot/raft_eval_config.yaml diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py b/end-to-end-use-cases/RAFT-Chatbot/raft_utils.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py rename to end-to-end-use-cases/RAFT-Chatbot/raft_utils.py diff --git a/recipes/use_cases/README.md b/end-to-end-use-cases/README.md similarity index 57% rename from recipes/use_cases/README.md rename to end-to-end-use-cases/README.md index 45d08ab8f..68284444e 100644 --- a/recipes/use_cases/README.md +++ b/end-to-end-use-cases/README.md @@ -1,23 +1,57 @@ -## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience +# End to End Use Applications using various Llama Models + +## [Agentic Tutorial](./agents/): + +### 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models +101 notebooks show how to apply Llama models and enable tool calling functionality, 201 notebook walks you through an end to end workflow of building an agent that can search two papers, fetch their details and find their differences. + +## [Benchmarks](./benchmarks/): + +### A folder contains benchmark scripts +The scripts apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality + +## [Browser Usage](./browser_use/): + +### Demo of how to apply Llama models and use them for browsing the internet and completing tasks + +## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): + +### Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications. -## [VideoSummary](video_summary.ipynb): Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../3p_integrations/octoai/video_summary.ipynb)) +## [VideoSummary](video_summary.ipynb): + +### Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../3p-integrations/octoai/video_summary.ipynb)) This demo app uses Llama 3 to return a text summary of a YouTube video. It shows how to retrieve the caption of a YouTube video and how to ask Llama to summarize the content in different ways, from the simplest naive way that works for short text to more advanced methods of using LangChain's map_reduce and refine to overcome the 8K context length limit of Llama 3. -## [NBA2023-24](./coding/text2sql/structured_llama.ipynb): Ask Llama 3 about Structured Data +## [NBA2023-24](./coding/text2sql/quickstart.ipynb): + +### Ask Llama 3 about Structured Data This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players. -## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p_integrations/octoai/live_data.ipynb)) +## [NotebookLlama](./NotebookLlama/): + +### PDF to Podcast using Llama Models +Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast + +## [live_data](live_data.ipynb): + +### Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p-integrations/octoai/live_data.ipynb)) This demo app shows how to perform live data augmented generation tasks with Llama 3, [LlamaIndex](https://github.com/run-llama/llama_index), another leading open-source framework for building LLM apps, and the [Tavily](https://tavily.com) live search API. -## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md): Building a Llama 3 Enabled WhatsApp Chatbot +## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md): +### Building a Llama 3 Enabled WhatsApp Chatbot This step-by-step tutorial shows how to use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to build a Llama 3 enabled WhatsApp chatbot. -## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md): Building a Llama 3 Enabled Messenger Chatbot +## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md): + +### Building a Llama 3 Enabled Messenger Chatbot This step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot. -### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) or on [OctoAI](../3p_integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb)) +### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) or on [OctoAI](../3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb)) A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG). You can run Llama2 locally if you have a good enough GPU or on OctoAI if you follow the note [here](../README.md#octoai_note). -## [Sales Bot](./customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb): Sales Bot with Llama3 - A Summarization and RAG Use Case +## [Sales Bot](./customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb): + +### Sales Bot with Llama3 - A Summarization and RAG Use Case An summarization + RAG use case built around the Amazon product review Kaggle dataset to build a helpful Music Store Sales Bot. The summarization and RAG are built on top of Llama models hosted on OctoAI, and the vector database is hosted on Weaviate Cloud Services. diff --git a/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb b/end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_101.ipynb similarity index 100% rename from recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb rename to end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_101.ipynb diff --git a/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb b/end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_201.ipynb similarity index 100% rename from recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb rename to end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_201.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/README.md b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/README.md rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md diff --git a/recipes/quickstart/agents/README.md b/end-to-end-use-cases/agents/README.md similarity index 100% rename from recipes/quickstart/agents/README.md rename to end-to-end-use-cases/agents/README.md diff --git a/tools/benchmarks/README.md b/end-to-end-use-cases/benchmarks/README.md similarity index 100% rename from tools/benchmarks/README.md rename to end-to-end-use-cases/benchmarks/README.md diff --git a/tools/benchmarks/inference/README.md b/end-to-end-use-cases/benchmarks/inference/README.md similarity index 100% rename from tools/benchmarks/inference/README.md rename to end-to-end-use-cases/benchmarks/inference/README.md diff --git a/tools/benchmarks/inference/cloud/README.md b/end-to-end-use-cases/benchmarks/inference/cloud/README.md similarity index 100% rename from tools/benchmarks/inference/cloud/README.md rename to end-to-end-use-cases/benchmarks/inference/cloud/README.md diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/README.md b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/README.md rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/config.yml b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/config.yml similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/config.yml rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/config.yml diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/CFT.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/CFT.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/CFT.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/CFT.png diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/instances.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/instances.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/instances.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/instances.png diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png diff --git a/tools/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py b/end-to-end-use-cases/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py similarity index 100% rename from tools/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py diff --git a/tools/benchmarks/inference/cloud/azure/input.jsonl b/end-to-end-use-cases/benchmarks/inference/cloud/azure/input.jsonl similarity index 100% rename from tools/benchmarks/inference/cloud/azure/input.jsonl rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/input.jsonl diff --git a/tools/benchmarks/inference/cloud/azure/parameters.json b/end-to-end-use-cases/benchmarks/inference/cloud/azure/parameters.json similarity index 100% rename from tools/benchmarks/inference/cloud/azure/parameters.json rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/parameters.json diff --git a/tools/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py b/end-to-end-use-cases/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py similarity index 100% rename from tools/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py diff --git a/tools/benchmarks/inference/on_prem/README.md b/end-to-end-use-cases/benchmarks/inference/on_prem/README.md similarity index 96% rename from tools/benchmarks/inference/on_prem/README.md rename to end-to-end-use-cases/benchmarks/inference/on_prem/README.md index afffd6ee5..f9d7c02fc 100644 --- a/tools/benchmarks/inference/on_prem/README.md +++ b/end-to-end-use-cases/benchmarks/inference/on_prem/README.md @@ -7,7 +7,7 @@ We support benchmark on these serving framework: # vLLM - Getting Started -To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../recipes/3p_integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem. +To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../3p-integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem. Note that in common scenario which overall throughput is important, we suggest you prioritize deploying as many model replicas as possible to reach higher overall throughput and request-per-second (RPS), comparing to deploy one model container among multiple GPUs for model parallelism. Additionally, as deploying multiple model replicas, there is a need for a higher level wrapper to handle the load balancing which here has been simulated in the benchmark scripts. For example, we have an instance from Azure that has 8xA100 80G GPUs, and we want to deploy the Meta Llama 3 70B instruct model, which is around 140GB with FP16. So for deployment we can do: diff --git a/tools/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py diff --git a/tools/benchmarks/inference/on_prem/vllm/input.jsonl b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/input.jsonl similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/input.jsonl rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/input.jsonl diff --git a/tools/benchmarks/inference/on_prem/vllm/parameters.json b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/parameters.json similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/parameters.json rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/parameters.json diff --git a/tools/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py diff --git a/tools/benchmarks/inference/requirements.txt b/end-to-end-use-cases/benchmarks/inference/requirements.txt similarity index 100% rename from tools/benchmarks/inference/requirements.txt rename to end-to-end-use-cases/benchmarks/inference/requirements.txt diff --git a/tools/benchmarks/llm_eval_harness/README.md b/end-to-end-use-cases/benchmarks/llm_eval_harness/README.md similarity index 100% rename from tools/benchmarks/llm_eval_harness/README.md rename to end-to-end-use-cases/benchmarks/llm_eval_harness/README.md diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/README.md b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md similarity index 97% rename from tools/benchmarks/llm_eval_harness/meta_eval/README.md rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md index 96e0ae677..edf27bc6d 100644 --- a/tools/benchmarks/llm_eval_harness/meta_eval/README.md +++ b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md @@ -50,7 +50,7 @@ Given the extensive number of tasks available (12 for pretrained models and 30 f - **Tasks for 3.2 pretrained models**: MMLU - **Tasks for 3.2 instruct models**: MMLU, GPQA -These tasks are common evalutions, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +These tasks are common evaluations, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) Here, we aim to get the benchmark numbers on the aforementioned tasks using Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard). Please follow the instructions below to make necessary modifications to use our eval prompts and get more eval metrics. @@ -104,7 +104,7 @@ lm_eval --model vllm --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,te **NOTE**: As for `add_bos_token=True`, since our prompts in the evals dataset has already included all the special tokens required by instruct model, such as `<|start_header_id|>user<|end_header_id|>`, we will not use `--apply_chat_template` argument for instruct models anymore. However, we need to use `add_bos_token=True` flag to add the BOS_token back during VLLM inference, as the BOS_token is removed by default in [this PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1465). -**NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://huggingface.co/datasets/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file. +**NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://www.oxen.ai/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file. Moreover, we have modified this [math_hard/utils.py](./meta_template/math_hard/utils.py) to address two issues: diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py diff --git a/recipes/use_cases/browser_use/agent/browser-use-quickstart.ipynb b/end-to-end-use-cases/browser_use/agent/browser-use-quickstart.ipynb similarity index 100% rename from recipes/use_cases/browser_use/agent/browser-use-quickstart.ipynb rename to end-to-end-use-cases/browser_use/agent/browser-use-quickstart.ipynb diff --git a/recipes/use_cases/browser_use/agent/sample_screenshot.png b/end-to-end-use-cases/browser_use/agent/sample_screenshot.png similarity index 100% rename from recipes/use_cases/browser_use/agent/sample_screenshot.png rename to end-to-end-use-cases/browser_use/agent/sample_screenshot.png diff --git a/recipes/use_cases/coding/text2sql/csv2db.py b/end-to-end-use-cases/coding/text2sql/csv2db.py similarity index 100% rename from recipes/use_cases/coding/text2sql/csv2db.py rename to end-to-end-use-cases/coding/text2sql/csv2db.py diff --git a/recipes/use_cases/coding/text2sql/nba.txt b/end-to-end-use-cases/coding/text2sql/nba.txt similarity index 100% rename from recipes/use_cases/coding/text2sql/nba.txt rename to end-to-end-use-cases/coding/text2sql/nba.txt diff --git a/recipes/use_cases/coding/text2sql/nba_roster.db b/end-to-end-use-cases/coding/text2sql/nba_roster.db similarity index 100% rename from recipes/use_cases/coding/text2sql/nba_roster.db rename to end-to-end-use-cases/coding/text2sql/nba_roster.db diff --git a/recipes/use_cases/coding/text2sql/quickstart.ipynb b/end-to-end-use-cases/coding/text2sql/quickstart.ipynb similarity index 100% rename from recipes/use_cases/coding/text2sql/quickstart.ipynb rename to end-to-end-use-cases/coding/text2sql/quickstart.ipynb diff --git a/recipes/use_cases/coding/text2sql/txt2csv.py b/end-to-end-use-cases/coding/text2sql/txt2csv.py similarity index 100% rename from recipes/use_cases/coding/text2sql/txt2csv.py rename to end-to-end-use-cases/coding/text2sql/txt2csv.py diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/requirements.txt b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/requirements.txt rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb diff --git a/recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb b/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb similarity index 100% rename from recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb rename to end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb diff --git a/recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv b/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv similarity index 100% rename from recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv rename to end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv diff --git a/recipes/use_cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py similarity index 100% rename from recipes/use_cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py rename to end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py diff --git a/recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md similarity index 98% rename from recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md rename to end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md index b47fbc16b..b7866864f 100644 --- a/recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md +++ b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md @@ -10,7 +10,7 @@ Messenger from Meta is a messaging service that allows a Facebook business page The diagram below shows the components and overall data flow of the Llama 3 enabled Messenger chatbot demo we built, using an Amazon EC2 instance as an example for running the web server. -![](../../../../docs/img/messenger_llama_arch.jpg) +![](../../../src/docs/img/messenger_llama_arch.jpg) ## Getting Started with Messenger Platform @@ -24,7 +24,7 @@ The diagram below shows the components and overall data flow of the Llama 3 enab 5. Open Messenger's API Settings, as shown in the screenshot below, then in "1. Configure webhooks", set the Callback URL and Verify Token set up in the previous step, and subscribe all message related fields for "Webhook Fields". Finally, in "2. Generate access tokens", connect your Facebook page (see step 1) and copy your page access token for later use. -![](../../../../docs/img/messenger_api_settings.png) +![](../../../src/docs/img/messenger_api_settings.png) ## Writing Llama 3 Enabled Web App diff --git a/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py similarity index 100% rename from recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py rename to end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py diff --git a/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md similarity index 98% rename from recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md rename to end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md index 02770257c..1d37a196d 100644 --- a/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md +++ b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md @@ -10,7 +10,7 @@ Businesses of all sizes can use the [WhatsApp Business API](https://developers.f The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server. -![](../../../../docs/img/whatsapp_llama_arch.jpg) +![](../../../src/docs/img/whatsapp_llama_arch.jpg) ## Getting Started with WhatsApp Business Cloud API @@ -25,7 +25,7 @@ For the last step, you need to further follow the [Sample Callback URL for Webho Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp. -![](../../../../docs/img/whatsapp_dashboard.jpg) +![](../../../src/docs/img/whatsapp_dashboard.jpg) Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later. diff --git a/recipes/use_cases/email_agent/1.png b/end-to-end-use-cases/email_agent/1.png similarity index 100% rename from recipes/use_cases/email_agent/1.png rename to end-to-end-use-cases/email_agent/1.png diff --git a/recipes/use_cases/email_agent/2.png b/end-to-end-use-cases/email_agent/2.png similarity index 100% rename from recipes/use_cases/email_agent/2.png rename to end-to-end-use-cases/email_agent/2.png diff --git a/recipes/use_cases/email_agent/3.png b/end-to-end-use-cases/email_agent/3.png similarity index 100% rename from recipes/use_cases/email_agent/3.png rename to end-to-end-use-cases/email_agent/3.png diff --git a/recipes/use_cases/email_agent/README.md b/end-to-end-use-cases/email_agent/README.md similarity index 100% rename from recipes/use_cases/email_agent/README.md rename to end-to-end-use-cases/email_agent/README.md diff --git a/recipes/use_cases/email_agent/email_agent.png b/end-to-end-use-cases/email_agent/email_agent.png similarity index 100% rename from recipes/use_cases/email_agent/email_agent.png rename to end-to-end-use-cases/email_agent/email_agent.png diff --git a/recipes/use_cases/email_agent/email_agent.py b/end-to-end-use-cases/email_agent/email_agent.py similarity index 100% rename from recipes/use_cases/email_agent/email_agent.py rename to end-to-end-use-cases/email_agent/email_agent.py diff --git a/recipes/use_cases/email_agent/functions_prompt.py b/end-to-end-use-cases/email_agent/functions_prompt.py similarity index 100% rename from recipes/use_cases/email_agent/functions_prompt.py rename to end-to-end-use-cases/email_agent/functions_prompt.py diff --git a/recipes/use_cases/email_agent/main.py b/end-to-end-use-cases/email_agent/main.py similarity index 100% rename from recipes/use_cases/email_agent/main.py rename to end-to-end-use-cases/email_agent/main.py diff --git a/recipes/use_cases/email_agent/requirements.txt b/end-to-end-use-cases/email_agent/requirements.txt similarity index 100% rename from recipes/use_cases/email_agent/requirements.txt rename to end-to-end-use-cases/email_agent/requirements.txt diff --git a/recipes/use_cases/github_triage/README.md b/end-to-end-use-cases/github_triage/README.md similarity index 100% rename from recipes/use_cases/github_triage/README.md rename to end-to-end-use-cases/github_triage/README.md diff --git a/recipes/use_cases/github_triage/config.yaml b/end-to-end-use-cases/github_triage/config.yaml similarity index 100% rename from recipes/use_cases/github_triage/config.yaml rename to end-to-end-use-cases/github_triage/config.yaml diff --git a/recipes/use_cases/github_triage/llm.py b/end-to-end-use-cases/github_triage/llm.py similarity index 100% rename from recipes/use_cases/github_triage/llm.py rename to end-to-end-use-cases/github_triage/llm.py diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf diff --git a/recipes/use_cases/github_triage/pdf_report.py b/end-to-end-use-cases/github_triage/pdf_report.py similarity index 100% rename from recipes/use_cases/github_triage/pdf_report.py rename to end-to-end-use-cases/github_triage/pdf_report.py diff --git a/recipes/use_cases/github_triage/plots.py b/end-to-end-use-cases/github_triage/plots.py similarity index 100% rename from recipes/use_cases/github_triage/plots.py rename to end-to-end-use-cases/github_triage/plots.py diff --git a/recipes/use_cases/github_triage/requirements.txt b/end-to-end-use-cases/github_triage/requirements.txt similarity index 100% rename from recipes/use_cases/github_triage/requirements.txt rename to end-to-end-use-cases/github_triage/requirements.txt diff --git a/recipes/use_cases/github_triage/triage.py b/end-to-end-use-cases/github_triage/triage.py similarity index 100% rename from recipes/use_cases/github_triage/triage.py rename to end-to-end-use-cases/github_triage/triage.py diff --git a/recipes/use_cases/github_triage/utils.py b/end-to-end-use-cases/github_triage/utils.py similarity index 100% rename from recipes/use_cases/github_triage/utils.py rename to end-to-end-use-cases/github_triage/utils.py diff --git a/recipes/use_cases/github_triage/walkthrough.ipynb b/end-to-end-use-cases/github_triage/walkthrough.ipynb similarity index 100% rename from recipes/use_cases/github_triage/walkthrough.ipynb rename to end-to-end-use-cases/github_triage/walkthrough.ipynb diff --git a/recipes/use_cases/live_data.ipynb b/end-to-end-use-cases/live_data.ipynb similarity index 100% rename from recipes/use_cases/live_data.ipynb rename to end-to-end-use-cases/live_data.ipynb diff --git a/recipes/experimental/long_context/H2O/README.md b/end-to-end-use-cases/long_context/H2O/README.md similarity index 100% rename from recipes/experimental/long_context/H2O/README.md rename to end-to-end-use-cases/long_context/H2O/README.md diff --git a/recipes/experimental/long_context/H2O/data/summarization/cnn_dailymail.jsonl b/end-to-end-use-cases/long_context/H2O/data/summarization/cnn_dailymail.jsonl similarity index 100% rename from recipes/experimental/long_context/H2O/data/summarization/cnn_dailymail.jsonl rename to end-to-end-use-cases/long_context/H2O/data/summarization/cnn_dailymail.jsonl diff --git a/recipes/experimental/long_context/H2O/data/summarization/xsum.jsonl b/end-to-end-use-cases/long_context/H2O/data/summarization/xsum.jsonl similarity index 100% rename from recipes/experimental/long_context/H2O/data/summarization/xsum.jsonl rename to end-to-end-use-cases/long_context/H2O/data/summarization/xsum.jsonl diff --git a/recipes/experimental/long_context/H2O/requirements.txt b/end-to-end-use-cases/long_context/H2O/requirements.txt similarity index 100% rename from recipes/experimental/long_context/H2O/requirements.txt rename to end-to-end-use-cases/long_context/H2O/requirements.txt diff --git a/recipes/experimental/long_context/H2O/run_streaming.py b/end-to-end-use-cases/long_context/H2O/run_streaming.py similarity index 100% rename from recipes/experimental/long_context/H2O/run_streaming.py rename to end-to-end-use-cases/long_context/H2O/run_streaming.py diff --git a/recipes/experimental/long_context/H2O/run_summarization.py b/end-to-end-use-cases/long_context/H2O/run_summarization.py similarity index 100% rename from recipes/experimental/long_context/H2O/run_summarization.py rename to end-to-end-use-cases/long_context/H2O/run_summarization.py diff --git a/recipes/experimental/long_context/H2O/src/streaming.sh b/end-to-end-use-cases/long_context/H2O/src/streaming.sh similarity index 100% rename from recipes/experimental/long_context/H2O/src/streaming.sh rename to end-to-end-use-cases/long_context/H2O/src/streaming.sh diff --git a/recipes/experimental/long_context/H2O/utils/cache.py b/end-to-end-use-cases/long_context/H2O/utils/cache.py similarity index 100% rename from recipes/experimental/long_context/H2O/utils/cache.py rename to end-to-end-use-cases/long_context/H2O/utils/cache.py diff --git a/recipes/experimental/long_context/H2O/utils/llama.py b/end-to-end-use-cases/long_context/H2O/utils/llama.py similarity index 100% rename from recipes/experimental/long_context/H2O/utils/llama.py rename to end-to-end-use-cases/long_context/H2O/utils/llama.py diff --git a/recipes/experimental/long_context/H2O/utils/streaming.py b/end-to-end-use-cases/long_context/H2O/utils/streaming.py similarity index 100% rename from recipes/experimental/long_context/H2O/utils/streaming.py rename to end-to-end-use-cases/long_context/H2O/utils/streaming.py diff --git a/recipes/use_cases/multilingual/README.md b/end-to-end-use-cases/multilingual/README.md similarity index 97% rename from recipes/use_cases/multilingual/README.md rename to end-to-end-use-cases/multilingual/README.md index 159db54b3..662f7c50b 100644 --- a/recipes/use_cases/multilingual/README.md +++ b/end-to-end-use-cases/multilingual/README.md @@ -119,7 +119,7 @@ phase2_ds.save_to_disk("data/phase2") ``` ### Train -Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../quickstart/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`. +Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`. OpenHathi was trained on 64 A100 80GB GPUs. Here are the hyperparameters used and other training details: - maximum learning rate: 2e-4 diff --git a/recipes/use_cases/multilingual/extend_tokenizer.py b/end-to-end-use-cases/multilingual/extend_tokenizer.py similarity index 100% rename from recipes/use_cases/multilingual/extend_tokenizer.py rename to end-to-end-use-cases/multilingual/extend_tokenizer.py diff --git a/recipes/use_cases/multilingual/img/phase1_eval_loss.png b/end-to-end-use-cases/multilingual/img/phase1_eval_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase1_eval_loss.png rename to end-to-end-use-cases/multilingual/img/phase1_eval_loss.png diff --git a/recipes/use_cases/multilingual/img/phase1_train_loss.png b/end-to-end-use-cases/multilingual/img/phase1_train_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase1_train_loss.png rename to end-to-end-use-cases/multilingual/img/phase1_train_loss.png diff --git a/recipes/use_cases/multilingual/img/phase2_eval_loss.png b/end-to-end-use-cases/multilingual/img/phase2_eval_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase2_eval_loss.png rename to end-to-end-use-cases/multilingual/img/phase2_eval_loss.png diff --git a/recipes/use_cases/multilingual/img/phase2_train_loss.png b/end-to-end-use-cases/multilingual/img/phase2_train_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase2_train_loss.png rename to end-to-end-use-cases/multilingual/img/phase2_train_loss.png diff --git a/recipes/use_cases/multilingual/prepare_data.py b/end-to-end-use-cases/multilingual/prepare_data.py similarity index 100% rename from recipes/use_cases/multilingual/prepare_data.py rename to end-to-end-use-cases/multilingual/prepare_data.py diff --git a/recipes/use_cases/multilingual/train_tokenizer.py b/end-to-end-use-cases/multilingual/train_tokenizer.py similarity index 100% rename from recipes/use_cases/multilingual/train_tokenizer.py rename to end-to-end-use-cases/multilingual/train_tokenizer.py diff --git a/recipes/responsible_ai/README.md b/end-to-end-use-cases/responsible_ai/README.md similarity index 100% rename from recipes/responsible_ai/README.md rename to end-to-end-use-cases/responsible_ai/README.md diff --git a/recipes/responsible_ai/code_shield_usage_demo.ipynb b/end-to-end-use-cases/responsible_ai/code_shield_usage_demo.ipynb similarity index 100% rename from recipes/responsible_ai/code_shield_usage_demo.ipynb rename to end-to-end-use-cases/responsible_ai/code_shield_usage_demo.ipynb diff --git a/recipes/responsible_ai/llama_guard/README.md b/end-to-end-use-cases/responsible_ai/llama_guard/README.md similarity index 100% rename from recipes/responsible_ai/llama_guard/README.md rename to end-to-end-use-cases/responsible_ai/llama_guard/README.md diff --git a/recipes/responsible_ai/llama_guard/__init__.py b/end-to-end-use-cases/responsible_ai/llama_guard/__init__.py similarity index 100% rename from recipes/responsible_ai/llama_guard/__init__.py rename to end-to-end-use-cases/responsible_ai/llama_guard/__init__.py diff --git a/recipes/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb b/end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb similarity index 100% rename from recipes/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb rename to end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb diff --git a/recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb b/end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb similarity index 100% rename from recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb rename to end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb diff --git a/recipes/responsible_ai/llama_guard/resources/dog.jpg b/end-to-end-use-cases/responsible_ai/llama_guard/resources/dog.jpg similarity index 100% rename from recipes/responsible_ai/llama_guard/resources/dog.jpg rename to end-to-end-use-cases/responsible_ai/llama_guard/resources/dog.jpg diff --git a/recipes/responsible_ai/llama_guard/resources/pasta.jpeg b/end-to-end-use-cases/responsible_ai/llama_guard/resources/pasta.jpeg similarity index 100% rename from recipes/responsible_ai/llama_guard/resources/pasta.jpeg rename to end-to-end-use-cases/responsible_ai/llama_guard/resources/pasta.jpeg diff --git a/recipes/responsible_ai/prompt_guard/README.md b/end-to-end-use-cases/responsible_ai/prompt_guard/README.md similarity index 100% rename from recipes/responsible_ai/prompt_guard/README.md rename to end-to-end-use-cases/responsible_ai/prompt_guard/README.md diff --git a/recipes/responsible_ai/prompt_guard/__init__.py b/end-to-end-use-cases/responsible_ai/prompt_guard/__init__.py similarity index 100% rename from recipes/responsible_ai/prompt_guard/__init__.py rename to end-to-end-use-cases/responsible_ai/prompt_guard/__init__.py diff --git a/recipes/responsible_ai/prompt_guard/inference.py b/end-to-end-use-cases/responsible_ai/prompt_guard/inference.py similarity index 100% rename from recipes/responsible_ai/prompt_guard/inference.py rename to end-to-end-use-cases/responsible_ai/prompt_guard/inference.py diff --git a/recipes/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb b/end-to-end-use-cases/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb similarity index 100% rename from recipes/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb rename to end-to-end-use-cases/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb diff --git a/recipes/use_cases/video_summary.ipynb b/end-to-end-use-cases/video_summary.ipynb similarity index 100% rename from recipes/use_cases/video_summary.ipynb rename to end-to-end-use-cases/video_summary.ipynb diff --git a/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb b/getting-started/Prompt_Engineering_with_Llama.ipynb similarity index 95% rename from recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb rename to getting-started/Prompt_Engineering_with_Llama.ipynb index bc90afbe4..bab120bf7 100644 --- a/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb +++ b/getting-started/Prompt_Engineering_with_Llama.ipynb @@ -7,11 +7,13 @@ "source": [ "\"Open\n", "\n", - "# Prompt Engineering with Llama 3.1\n", + "# Prompt Engineering with Llama\n", "\n", "Prompt engineering is using natural language to produce a desired response from a large language model (LLM).\n", "\n", - "This interactive guide covers prompt engineering & best practices with Llama 3.1." + "This interactive guide covers prompt engineering & best practices with Llama.\n", + "\n", + "Note: The notebook can be extended to any (latest) Llama models." ] }, { @@ -69,34 +71,6 @@ "1. `llama-2-70b-chat` - chat fine-tuned 70 billion parameter model (flagship)\n" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Code Llama is a code-focused LLM built on top of Llama 2 also available in various sizes and finetunes:" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Code Llama\n", - "1. `codellama-7b` - code fine-tuned 7 billion parameter model\n", - "1. `codellama-13b` - code fine-tuned 13 billion parameter model\n", - "1. `codellama-34b` - code fine-tuned 34 billion parameter model\n", - "1. `codellama-70b` - code fine-tuned 70 billion parameter model\n", - "1. `codellama-7b-instruct` - code & instruct fine-tuned 7 billion parameter model\n", - "2. `codellama-13b-instruct` - code & instruct fine-tuned 13 billion parameter model\n", - "3. `codellama-34b-instruct` - code & instruct fine-tuned 34 billion parameter model\n", - "3. `codellama-70b-instruct` - code & instruct fine-tuned 70 billion parameter model\n", - "1. `codellama-7b-python` - Python fine-tuned 7 billion parameter model\n", - "2. `codellama-13b-python` - Python fine-tuned 13 billion parameter model\n", - "3. `codellama-34b-python` - Python fine-tuned 34 billion parameter model\n", - "3. `codellama-70b-python` - Python fine-tuned 70 billion parameter model" - ] - }, { "attachments": {}, "cell_type": "markdown", diff --git a/recipes/quickstart/RAG/hello_llama_cloud.ipynb b/getting-started/RAG/hello_llama_cloud.ipynb similarity index 100% rename from recipes/quickstart/RAG/hello_llama_cloud.ipynb rename to getting-started/RAG/hello_llama_cloud.ipynb diff --git a/recipes/quickstart/README.md b/getting-started/README.md similarity index 71% rename from recipes/quickstart/README.md rename to getting-started/README.md index a48c63436..bfde987b8 100644 --- a/recipes/quickstart/README.md +++ b/getting-started/README.md @@ -1,10 +1,9 @@ -## Llama-Recipes Quickstart +## Llama-Recipes Getting Started If you are new to developing with Meta Llama models, this is where you should start. This folder contains introductory-level notebooks across different techniques relating to Meta Llama. * The [Build_with_Llama 3.2](./build_with_Llama_3_2.ipynb) notebook showcases a comprehensive walkthrough of the new capabilities of Llama 3.2 models, including multimodal use cases, function/tool calling, Llama Stack, and Llama on edge. -* The [Running_Llama_Anywhere](./Running_Llama3_Anywhere/) notebooks demonstrate how to run Llama inference across Linux, Mac and Windows platforms using the appropriate tooling. * The [Prompt_Engineering_with_Llama](./Prompt_Engineering_with_Llama_3.ipynb) notebook showcases the various ways to elicit appropriate outputs from Llama. Take this notebook for a spin to get a feel for how Llama responds to different inputs and generation parameters. -* The [inference](./inference/) folder contains scripts to deploy Llama for inference on server and mobile. See also [3p_integrations/vllm](../3p_integrations/vllm/) and [3p_integrations/tgi](../3p_integrations/tgi/) for hosting Llama on open-source model servers. +* The [inference](./inference/) folder contains scripts to deploy Llama for inference on server and mobile. See also [3p_integrations/vllm](../3p-integrations/vllm/) and [3p_integrations/tgi](../3p-integrations/tgi/) for hosting Llama on open-source model servers. * The [RAG](./RAG/) folder contains a simple Retrieval-Augmented Generation application using Llama. -* The [finetuning](./finetuning/) folder contains resources to help you finetune Llama on your custom datasets, for both single- and multi-GPU setups. The scripts use the native llama-recipes finetuning code found in [finetuning.py](../../src/llama_recipes/finetuning.py) which supports these features: +* The [finetuning](./finetuning/) folder contains resources to help you finetune Llama on your custom datasets, for both single- and multi-GPU setups. The scripts use the native llama-recipes finetuning code found in [finetuning.py](../src/llama_recipes/finetuning.py) which supports these features: diff --git a/recipes/quickstart/build_with_Llama_3_2.ipynb b/getting-started/build_with_Llama_3_2.ipynb similarity index 100% rename from recipes/quickstart/build_with_Llama_3_2.ipynb rename to getting-started/build_with_Llama_3_2.ipynb diff --git a/recipes/quickstart/finetuning/LLM_finetuning_overview.md b/getting-started/finetuning/LLM_finetuning_overview.md similarity index 100% rename from recipes/quickstart/finetuning/LLM_finetuning_overview.md rename to getting-started/finetuning/LLM_finetuning_overview.md diff --git a/recipes/quickstart/finetuning/README.md b/getting-started/finetuning/README.md similarity index 87% rename from recipes/quickstart/finetuning/README.md rename to getting-started/finetuning/README.md index c7933474b..ea49fa0cf 100644 --- a/recipes/quickstart/finetuning/README.md +++ b/getting-started/finetuning/README.md @@ -6,7 +6,7 @@ This folder contains instructions to fine-tune Meta Llama 3 on a * [single-GPU setup](./singlegpu_finetuning.md) * [multi-GPU setup](./multigpu_finetuning.md) -using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package. +using the canonical [finetuning script](../../src/llama_recipes/finetuning.py) in the llama-recipes package. If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuning_overview.md). @@ -17,10 +17,10 @@ If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuni ## How to configure finetuning settings? > [!TIP] -> All the setting defined in [config files](../../../src/llama_recipes/configs/) can be passed as args through CLI when running the script, there is no need to change from config files directly. +> All the setting defined in [config files](../../src/llama_recipes/configs/) can be passed as args through CLI when running the script, there is no need to change from config files directly. -* [Training config file](../../../src/llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../../../src/llama_recipes/configs/) +* [Training config file](../../src/llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../../src/llama_recipes/configs/) It lets us specify the training settings for everything from `model_name` to `dataset_name`, `batch_size` and so on. Below is the list of supported settings: @@ -71,11 +71,11 @@ It lets us specify the training settings for everything from `model_name` to `da ``` -* [Datasets config file](../../../src/llama_recipes/configs/datasets.py) provides the available options for datasets. +* [Datasets config file](../../src/llama_recipes/configs/datasets.py) provides the available options for datasets. -* [peft config file](../../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. We currently support LoRA and Llama-Adapter. Please note that LoRA is the only technique which is supported in combination with FSDP. +* [peft config file](../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. We currently support LoRA and Llama-Adapter. Please note that LoRA is the only technique which is supported in combination with FSDP. -* [FSDP config file](../../../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as: +* [FSDP config file](../../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as: * `mixed_precision` boolean flag to specify using mixed precision, defatults to true. diff --git a/recipes/quickstart/finetuning/datasets/README.md b/getting-started/finetuning/datasets/README.md similarity index 94% rename from recipes/quickstart/finetuning/datasets/README.md rename to getting-started/finetuning/datasets/README.md index 8795ca96d..3543ee776 100644 --- a/recipes/quickstart/finetuning/datasets/README.md +++ b/getting-started/finetuning/datasets/README.md @@ -48,17 +48,17 @@ python -m llama_recipes.finetuning --dataset "custom_dataset" --custom_dataset.f This will call the function `get_foo` instead of `get_custom_dataset` when retrieving the dataset. ### Adding new dataset -Each dataset has a corresponding configuration (dataclass) in [configs/datasets.py](../../../../src/llama_recipes/configs/datasets.py) which contains the dataset name, training/validation split names, as well as optional parameters like datafiles etc. +Each dataset has a corresponding configuration (dataclass) in [configs/datasets.py](../../../src/llama_recipes/configs/datasets.py) which contains the dataset name, training/validation split names, as well as optional parameters like datafiles etc. -Additionally, there is a preprocessing function for each dataset in the [datasets](../../../../src/llama_recipes/datasets) folder. +Additionally, there is a preprocessing function for each dataset in the [datasets](../../../src/llama_recipes/datasets) folder. The returned data of the dataset needs to be consumable by the forward method of the fine-tuned model by calling ```model(**data)```. For CausalLM models this usually means that the data needs to be in the form of a dictionary with "input_ids", "attention_mask" and "labels" fields. To add a custom dataset the following steps need to be performed. -1. Create a dataset configuration after the schema described above. Examples can be found in [configs/datasets.py](../../../../src/llama_recipes/configs/datasets.py). +1. Create a dataset configuration after the schema described above. Examples can be found in [configs/datasets.py](../../../src/llama_recipes/configs/datasets.py). 2. Create a preprocessing routine which loads the data and returns a PyTorch style dataset. The signature for the preprocessing function needs to be (dataset_config, tokenizer, split_name) where split_name will be the string for train/validation split as defined in the dataclass. -3. Register the dataset name and preprocessing function by inserting it as key and value into the DATASET_PREPROC dictionary in [datasets/__init__.py](../../../../src/llama_recipes/datasets/__init__.py) +3. Register the dataset name and preprocessing function by inserting it as key and value into the DATASET_PREPROC dictionary in [datasets/__init__.py](../../../src/llama_recipes/datasets/__init__.py) 4. Set dataset field in training config to dataset name or use --dataset option of the `llama_recipes.finetuning` module or examples/finetuning.py training script. ## Application diff --git a/recipes/quickstart/finetuning/datasets/custom_dataset.py b/getting-started/finetuning/datasets/custom_dataset.py similarity index 100% rename from recipes/quickstart/finetuning/datasets/custom_dataset.py rename to getting-started/finetuning/datasets/custom_dataset.py diff --git a/recipes/quickstart/finetuning/datasets/ocrvqa_dataset.py b/getting-started/finetuning/datasets/ocrvqa_dataset.py similarity index 99% rename from recipes/quickstart/finetuning/datasets/ocrvqa_dataset.py rename to getting-started/finetuning/datasets/ocrvqa_dataset.py index f5948e151..9597cac62 100644 --- a/recipes/quickstart/finetuning/datasets/ocrvqa_dataset.py +++ b/getting-started/finetuning/datasets/ocrvqa_dataset.py @@ -137,4 +137,3 @@ def __call__(self, samples): def get_data_collator(processor): return OCRVQADataCollator(processor) - diff --git a/recipes/quickstart/finetuning/datasets/raft_dataset.py b/getting-started/finetuning/datasets/raft_dataset.py similarity index 100% rename from recipes/quickstart/finetuning/datasets/raft_dataset.py rename to getting-started/finetuning/datasets/raft_dataset.py diff --git a/recipes/quickstart/finetuning/finetune_vision_model.md b/getting-started/finetuning/finetune_vision_model.md similarity index 100% rename from recipes/quickstart/finetuning/finetune_vision_model.md rename to getting-started/finetuning/finetune_vision_model.md diff --git a/recipes/quickstart/finetuning/finetuning.py b/getting-started/finetuning/finetuning.py similarity index 100% rename from recipes/quickstart/finetuning/finetuning.py rename to getting-started/finetuning/finetuning.py diff --git a/recipes/quickstart/finetuning/multi_node.slurm b/getting-started/finetuning/multi_node.slurm similarity index 100% rename from recipes/quickstart/finetuning/multi_node.slurm rename to getting-started/finetuning/multi_node.slurm diff --git a/recipes/quickstart/finetuning/multigpu_finetuning.md b/getting-started/finetuning/multigpu_finetuning.md similarity index 90% rename from recipes/quickstart/finetuning/multigpu_finetuning.md rename to getting-started/finetuning/multigpu_finetuning.md index 0dbf99b8f..43a818d18 100644 --- a/recipes/quickstart/finetuning/multigpu_finetuning.md +++ b/getting-started/finetuning/multigpu_finetuning.md @@ -96,14 +96,14 @@ srun torchrun --nproc_per_node 8 --rdzv_id $RANDOM --rdzv_backend c10d --rdzv_e Do not forget to adjust the number of nodes, ntasks and gpus-per-task in the top. ## Running with different datasets -Currently 3 open source datasets are supported that can be found in [Datasets config file](../../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). +Currently 3 open source datasets are supported that can be found in [Datasets config file](../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). -* `grammar_dataset` : use this [notebook](../../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder. ```bash -wget -P ../../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json +wget -P ../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json ``` * `samsum_dataset` @@ -132,7 +132,7 @@ In case you are dealing with slower interconnect network between nodes, to reduc HSDP (Hybrid sharding Data Parallel) helps to define a hybrid sharding strategy where you can have FSDP within `sharding_group_size` which can be the minimum number of GPUs you can fit your model and DDP between the replicas of the model specified by `replica_group_size`. -This will require to set the Sharding strategy in [fsdp config](../../../src/llama_recipes/configs/fsdp.py) to `ShardingStrategy.HYBRID_SHARD` and specify two additional settings, `sharding_group_size` and `replica_group_size` where former specifies the sharding group size, number of GPUs that you model can fit into to form a replica of a model and latter specifies the replica group size, which is world_size/sharding_group_size. +This will require to set the Sharding strategy in [fsdp config](../../src/llama_recipes/configs/fsdp.py) to `ShardingStrategy.HYBRID_SHARD` and specify two additional settings, `sharding_group_size` and `replica_group_size` where former specifies the sharding group size, number of GPUs that you model can fit into to form a replica of a model and latter specifies the replica group size, which is world_size/sharding_group_size. ```bash diff --git a/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb b/getting-started/finetuning/quickstart_peft_finetuning.ipynb similarity index 100% rename from recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb rename to getting-started/finetuning/quickstart_peft_finetuning.ipynb diff --git a/recipes/quickstart/finetuning/singlegpu_finetuning.md b/getting-started/finetuning/singlegpu_finetuning.md similarity index 83% rename from recipes/quickstart/finetuning/singlegpu_finetuning.md rename to getting-started/finetuning/singlegpu_finetuning.md index 1b054be18..80689d4ea 100644 --- a/recipes/quickstart/finetuning/singlegpu_finetuning.md +++ b/getting-started/finetuning/singlegpu_finetuning.md @@ -1,12 +1,12 @@ # Fine-tuning with Single GPU This recipe steps you through how to finetune a Meta Llama 3 model on the text summarization task using the [samsum](https://huggingface.co/datasets/samsum) dataset on a single GPU. -These are the instructions for using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package. +These are the instructions for using the canonical [finetuning script](../../src/llama_recipes/finetuning.py) in the llama-recipes package. ## Requirements -Ensure that you have installed the llama-recipes package ([details](../../../README.md#installing)). +Ensure that you have installed the llama-recipes package. To run fine-tuning on a single GPU, we will make use of two packages: 1. [PEFT](https://github.com/huggingface/peft) to use parameter-efficient finetuning. @@ -33,15 +33,15 @@ The args used in the command above are: ### How to run with different datasets? -Currently 3 open source datasets are supported that can be found in [Datasets config file](../../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). +Currently 3 open source datasets are supported that can be found in [Datasets config file](../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). -* `grammar_dataset` : use this [notebook](../../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `alpaca.json` to `dataset` folder. ```bash -wget -P ../../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json +wget -P ../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json ``` * `samsum_dataset` diff --git a/recipes/quickstart/inference/README.md b/getting-started/inference/README.md similarity index 50% rename from recipes/quickstart/inference/README.md rename to getting-started/inference/README.md index 7f48aba70..afa6ffdf3 100644 --- a/recipes/quickstart/inference/README.md +++ b/getting-started/inference/README.md @@ -2,7 +2,5 @@ This folder contains scripts to get you started with inference on Meta Llama models. -* [Code Llama](./code_llama/) contains scripts for tasks relating to code generation using CodeLlama * [Local Inference](./local_inference/) contains scripts to do memory efficient inference on servers and local machines -* [Mobile Inference](./mobile_inference/) has scripts using MLC to serve Llama on Android (h/t to OctoAI for the contribution!) -* [Model Update Example](./modelUpgradeExample.py) shows an example of replacing a Llama 3 model with a Llama 3.1 model. \ No newline at end of file +* [Mobile Inference](./mobile_inference/) has scripts using MLC to serve Llama on Android (h/t to OctoAI for the contribution!) \ No newline at end of file diff --git a/recipes/quickstart/inference/local_inference/README.md b/getting-started/inference/local_inference/README.md similarity index 96% rename from recipes/quickstart/inference/local_inference/README.md rename to getting-started/inference/local_inference/README.md index 40f2e5015..60d025d80 100644 --- a/recipes/quickstart/inference/local_inference/README.md +++ b/getting-started/inference/local_inference/README.md @@ -105,7 +105,7 @@ python inference.py --model_name --peft_model --prompt_file B(Applications e.g. mobile, web)\n", - " B --> |Hosted API|C(Platforms e.g. Custom, HuggingFace, Replicate)\n", - " B -- optional --> E(Frameworks e.g. LangChain)\n", - " C-->|User Input|D[Llama 3]\n", - " D-->|Model Output|C\n", - " E --> C\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def rag_arch():\n", - " mm(\"\"\"\n", - " flowchart TD\n", - " A[User Prompts] --> B(Frameworks e.g. LangChain)\n", - " B <--> |Database, Docs, XLS|C[fa:fa-database External Data]\n", - " B -->|API|D[Llama 3]\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def llama2_family():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " llama-2 --> llama-2-7b\n", - " llama-2 --> llama-2-13b\n", - " llama-2 --> llama-2-70b\n", - " llama-2-7b --> llama-2-7b-chat\n", - " llama-2-13b --> llama-2-13b-chat\n", - " llama-2-70b --> llama-2-70b-chat\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def llama3_family():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " llama-3 --> llama-3-8b\n", - " llama-3 --> llama-3-70b\n", - " llama-3-8b --> llama-3-8b\n", - " llama-3-8b --> llama-3-8b-instruct\n", - " llama-3-70b --> llama-3-70b\n", - " llama-3-70b --> llama-3-70b-instruct\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - " \n", - "def llama3_1_family():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " llama-3-1 --> llama-3-8b\n", - " llama-3-1 --> llama-3-70b\n", - " llama-3-1 --> llama-3-4050b\n", - " llama-3-1-8b --> llama-3-1-8b\n", - " llama-3-1-8b --> llama-3-1-8b-instruct\n", - " llama-3-1-70b --> llama-3-1-70b\n", - " llama-3-1-70b --> llama-3-1-70b-instruct\n", - " llama-3-1-405b --> llama-3-1-405b-instruct\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "import ipywidgets as widgets\n", - "from IPython.display import display, Markdown\n", - "\n", - "# Create a text widget\n", - "API_KEY = widgets.Password(\n", - " value='',\n", - " placeholder='',\n", - " description='API_KEY:',\n", - " disabled=False\n", - ")\n", - "\n", - "def md(t):\n", - " display(Markdown(t))\n", - "\n", - "def bot_arch():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " user --> prompt\n", - " prompt --> i_safety\n", - " i_safety --> context\n", - " context --> Llama_3\n", - " Llama_3 --> output\n", - " output --> o_safety\n", - " i_safety --> memory\n", - " o_safety --> memory\n", - " memory --> context\n", - " o_safety --> user\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def fine_tuned_arch():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " Custom_Dataset --> Pre-trained_Llama\n", - " Pre-trained_Llama --> Fine-tuned_Llama\n", - " Fine-tuned_Llama --> RLHF\n", - " RLHF --> |Loss:Cross-Entropy|Fine-tuned_Llama\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def load_data_faiss_arch():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " documents --> textsplitter\n", - " textsplitter --> embeddings\n", - " embeddings --> vectorstore\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def mem_context():\n", - " mm(\"\"\"\n", - " graph LR\n", - " context(text)\n", - " user_prompt --> context\n", - " instruction --> context\n", - " examples --> context\n", - " memory --> context\n", - " context --> tokenizer\n", - " tokenizer --> embeddings\n", - " embeddings --> LLM\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i4Np_l_KtIno" - }, - "source": [ - "### **1 - Understanding Llama 3.1**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PGPSI3M5PGTi" - }, - "source": [ - "### **1.1 - What is Llama 3.1?**\n", - "\n", - "* State of the art (SOTA), Open Source LLM\n", - "* 8B, 70B, 405B - base and instruct models\n", - "* Choosing model: Size, Quality, Cost, Speed\n", - "* Pretrained + Chat\n", - "* [Meta Llama 3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)\n", - "* [Getting Started with Meta Llama](https://llama.meta.com/docs/get-started)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 240 - }, - "executionInfo": { - "elapsed": 248, - "status": "ok", - "timestamp": 1695832233087, - "user": { - "displayName": "Amit Sangani", - "userId": "11552178012079240149" - }, - "user_tz": 420 - }, - "id": "OXRCC7wexZXd", - "outputId": "1feb1918-df4b-4cec-d09e-ffe55c12090b" - }, - "outputs": [], - "source": [ - "llama2_family()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llama3_family()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llama3_1_family()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aYeHVVh45bdT" - }, - "source": [ - "### **1.2 - Accessing Llama 3.1**\n", - "* Download + Self Host (i.e. [download Llama](https://ai.meta.com/resources/models-and-libraries/llama-downloads))\n", - "* Hosted API Platform (e.g. [Groq](https://console.groq.com/), [Replicate](https://replicate.com/meta/meta-llama-3-8b-instruct), [Together](https://api.together.xyz/playground/language/meta-llama/Llama-3-8b-hf), [Anyscale](https://app.endpoints.anyscale.com/playground))\n", - "\n", - "* Hosted Container Platform (e.g. [Azure](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/introducing-llama-2-on-azure/ba-p/3881233), [AWS](https://aws.amazon.com/blogs/machine-learning/llama-2-foundation-models-from-meta-are-now-available-in-amazon-sagemaker-jumpstart/), [GCP](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/139))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kBuSay8vtzL4" - }, - "source": [ - "### **1.3 - Use Cases of Llama 3.1**\n", - "* Content Generation\n", - "* Summarization\n", - "* General Chatbots\n", - "* RAG (Retrieval Augmented Generation): Chat about Your Own Data\n", - "* Fine-tuning\n", - "* Agents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sd54g0OHuqBY" - }, - "source": [ - "## **2 - Using and Comparing Llama 3 and Llama 2**\n", - "\n", - "We will be using Llama 2 7b & 70b chat and Llama 3 8b & 70b instruct models hosted on [Replicate](https://replicate.com/search?query=llama) to run the examples here. You will need to first sign in with Replicate with your github account, then create a free API token [here](https://replicate.com/account/api-tokens) that you can use for a while. You can also use other Llama 3 cloud providers such as [Groq](https://console.groq.com/), [Together](https://api.together.xyz/playground/language/meta-llama/Llama-3-8b-hf), or [Anyscale](https://app.endpoints.anyscale.com/playground).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h3YGMDJidHtH" - }, - "source": [ - "### **2.1 - Install dependencies**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VhN6hXwx7FCp" - }, - "outputs": [], - "source": [ - "!pip install replicate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### **2.2 - Create helpers for Llama 2 and Llama 3**\n", - "First, set your Replicate API token as environment variables.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8hkWpqWD28ho" - }, - "outputs": [], - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "REPLICATE_API_TOKEN = getpass()\n", - "\n", - "os.environ[\"REPLICATE_API_TOKEN\"] = REPLICATE_API_TOKEN" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 instruct and Llama 2 chat models, not the base models." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bVCHZmETk36v" - }, - "outputs": [], - "source": [ - "import replicate\n", - "\n", - "def llama2_7b(prompt):\n", - " output = replicate.run(\n", - " \"meta/llama-2-7b-chat\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)\n", - "\n", - "def llama2_70b(prompt):\n", - " output = replicate.run(\n", - " \"meta/llama-2-70b-chat\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)\n", - "\n", - "def llama3_8b(prompt):\n", - " output = replicate.run(\n", - " \"meta/meta-llama-3-8b-instruct\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)\n", - "\n", - "def llama3_70b(prompt):\n", - " output = replicate.run(\n", - " \"meta/meta-llama-3-70b-instruct\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5Jxq0pmf6L73" - }, - "source": [ - "### **2.3 - Basic QA with Llama 2 and 3**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H93zZBIk6tNU" - }, - "outputs": [], - "source": [ - "prompt = \"The typical color of a llama is: \"\n", - "output = llama2_7b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama2_7b(\"The typical color of a llama is what? Answer in one word.\")\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(\"The typical color of a llama is what? Answer in one word.\")\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 follows instructions better than Llama 2 in single-turn chat.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cWs_s9y-avIT" - }, - "source": [ - "## **3 - Chat conversation**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r4DyTLD5ys6t" - }, - "source": [ - "### **3.1 - Single-turn chat**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EMM_egWMys6u" - }, - "outputs": [], - "source": [ - "prompt_chat = \"What is the average lifespan of a Llama? Answer the question in few words.\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sZ7uVKDYucgi" - }, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WQl3wmfbyBQ1" - }, - "outputs": [], - "source": [ - "# example without previous context. LLM's are stateless and cannot understand \"they\" without previous context\n", - "prompt_chat = \"What animal family are they? Answer the question in few words.\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama2_70b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_70b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 70b doesn't hallucinate.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### **3.2 - Multi-turn chat**\n", - "Chat app requires us to send in previous context to LLM to get in valid responses. Below is an example of Multi-turn chat." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "t7SZe5fT3HG3" - }, - "outputs": [], - "source": [ - "# example of multi-turn chat, with storing previous context\n", - "prompt_chat = \"\"\"\n", - "User: What is the average lifespan of a Llama?\n", - "Assistant: 15-20 years.\n", - "User: What animal family are they?\n", - "\"\"\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 2 and 3 both behave well for using the chat history for follow up questions.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### **3.3 - Multi-turn chat with more instruction**\n", - "Adding the instructon \"Answer the question with one word\" to see the difference of Llama 2 and 3." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# example of multi-turn chat, with storing previous context\n", - "prompt_chat = \"\"\"\n", - "User: What is the average lifespan of a Llama?\n", - "Assistant: Sure! The average lifespan of a llama is around 20-30 years.\n", - "User: What animal family are they?\n", - "\n", - "Answer the question with one word.\n", - "\"\"\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama2_70b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Both Llama 3 8b and Llama 2 70b follows instructions (e.g. \"Answer the question with one word\") better than Llama 2 7b in multi-turn chat.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "moXnmJ_xyD10" - }, - "source": [ - "### **4.2 - Prompt Engineering**\n", - "* Prompt engineering refers to the science of designing effective prompts to get desired responses\n", - "\n", - "* Helps reduce hallucination\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t-v-FeZ4ztTB" - }, - "source": [ - "#### **4.2.1 - In-Context Learning (e.g. Zero-shot, Few-shot)**\n", - " * In-context learning - specific method of prompt engineering where demonstration of task are provided as part of prompt.\n", - " 1. Zero-shot learning - model is performing tasks without any\n", - "input examples.\n", - " 2. Few or “N-Shot” Learning - model is performing and behaving based on input examples in user's prompt." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6W71MFNZyRkQ" - }, - "outputs": [], - "source": [ - "# Zero-shot example. To get positive/negative/neutral sentiment, we need to give examples in the prompt\n", - "prompt = '''\n", - "Classify: I saw a Gecko.\n", - "Sentiment: ?\n", - "\n", - "Give one word response.\n", - "'''\n", - "output = llama2_7b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MCQRjf1Y1RYJ" - }, - "outputs": [], - "source": [ - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 has different opinions than Llama 2.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8UmdlTmpDZxA" - }, - "outputs": [], - "source": [ - "# By giving examples to Llama, it understands the expected output format.\n", - "\n", - "prompt = '''\n", - "Classify: I love Llamas!\n", - "Sentiment: Positive\n", - "Classify: I dont like Snakes.\n", - "Sentiment: Negative\n", - "Classify: I saw a Gecko.\n", - "Sentiment:\n", - "\n", - "Give one word response.\n", - "'''\n", - "\n", - "output = llama2_7b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "M_EcsUo1zqFD" - }, - "outputs": [], - "source": [ - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 2, with few shots, has the same output \"Neutral\" as Llama 3, but Llama 2 doesn't follow instructions (Give one word response) well.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mbr124Y197xl" - }, - "source": [ - "#### **4.2.2 - Chain of Thought**\n", - "\"Chain of thought\" enables complex reasoning through logical step by step thinking and generates meaningful and contextually relevant responses." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Xn8zmLBQzpgj" - }, - "outputs": [], - "source": [ - "# Standard prompting\n", - "prompt = '''\n", - "Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.\n", - "How many tennis balls does Llama have?\n", - "\n", - "Answer in one word.\n", - "'''\n", - "\n", - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lKNOj79o1Kwu" - }, - "outputs": [], - "source": [ - "output = llama3_70b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3-8b did not get the right answer because it was asked to answer in one word.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# By default, Llama 3 models follow \"Chain-Of-Thought\" prompting\n", - "prompt = '''\n", - "Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.\n", - "How many tennis balls does Llama have?\n", - "'''\n", - "\n", - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_70b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: By default, Llama 3 models identify word problems and solves it step by step!**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "prompt = \"\"\"\n", - "15 of us want to go to a restaurant.\n", - "Two of them have cars\n", - "Each car can seat 5 people.\n", - "Two of us have motorcycles.\n", - "Each motorcycle can fit 2 people.\n", - "Can we all get to the restaurant by car or motorcycle?\n", - "Think step by step.\n", - "Provide the answer as a single yes/no answer first.\n", - "Then explain each intermediate step.\n", - "\"\"\"\n", - "output = llama3_8b(prompt)\n", - "print(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_70b(prompt)\n", - "print(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 70b model works correctly in this example.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Summary: Llama 2 often needs encouragement for step by step thinking to correctly reasoning. Llama 3 understands, reasons and explains better, making chain of thought unnecessary in the cases above.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C7tDW-AH770Y" - }, - "source": [ - "### **4.3 - Retrieval Augmented Generation (RAG)**\n", - "* Prompt Eng Limitations - Knowledge cutoff & lack of specialized data\n", - "\n", - "* Retrieval Augmented Generation(RAG) allows us to retrieve snippets of information from external data sources and augment it to the user's prompt to get tailored responses from Llama 2.\n", - "\n", - "For our demo, we are going to download an external PDF file from a URL and query against the content in the pdf file to get contextually relevant information back with the help of Llama!\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 259 - }, - "executionInfo": { - "elapsed": 329, - "status": "ok", - "timestamp": 1695832267093, - "user": { - "displayName": "Amit Sangani", - "userId": "11552178012079240149" - }, - "user_tz": 420 - }, - "id": "Fl1LPltpRQD9", - "outputId": "4410c9bf-3559-4a05-cebb-a5731bb094c1" - }, - "outputs": [], - "source": [ - "rag_arch()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JJaGMLl_4vYm" - }, - "source": [ - "#### **4.3.1 - LangChain**\n", - "LangChain is a framework that helps make it easier to implement RAG." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install langchain\n", - "!pip install langchain-community\n", - "!pip install sentence-transformers\n", - "!pip install faiss-cpu\n", - "!pip install bs4\n", - "!pip install langchain-groq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### **4.3.2 - LangChain Q&A Retriever**\n", - "* ConversationalRetrievalChain\n", - "\n", - "* Query the Source documents\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gAV2EkZqcruF" - }, - "outputs": [], - "source": [ - "from langchain_community.embeddings import HuggingFaceEmbeddings\n", - "from langchain_community.vectorstores import FAISS\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "from langchain_community.document_loaders import WebBaseLoader\n", - "import bs4\n", - "\n", - "# Step 1: Load the document from a web url\n", - "loader = WebBaseLoader([\"https://huggingface.co/blog/llama31\"])\n", - "documents = loader.load()\n", - "\n", - "# Step 2: Split the document into chunks with a specified chunk size\n", - "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n", - "all_splits = text_splitter.split_documents(documents)\n", - "\n", - "# Step 3: Store the document into a vector store with a specific embedding model\n", - "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You'll need to first sign in at [Groq](https://console.groq.com/login) with your github or gmail account, then get an API token to try Groq out for free." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "GROQ_API_TOKEN = getpass()\n", - "\n", - "os.environ[\"GROQ_API_KEY\"] = GROQ_API_TOKEN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_groq import ChatGroq\n", - "llm = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import ConversationalRetrievalChain\n", - "\n", - "# Query against your own data\n", - "chain = ConversationalRetrievalChain.from_llm(llm,\n", - " vectorstore.as_retriever(),\n", - " return_source_documents=True)\n", - "\n", - "# no chat history passed\n", - "result = chain({\"question\": \"What’s new with Llama 3?\", \"chat_history\": []})\n", - "md(result['answer'])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CelLHIvoy2Ke" - }, - "outputs": [], - "source": [ - "# This time your previous question and answer will be included as a chat history which will enable the ability\n", - "# to ask follow up questions.\n", - "query = \"What two sizes?\"\n", - "chat_history = [(query, result[\"answer\"])]\n", - "result = chain({\"question\": query, \"chat_history\": chat_history})\n", - "md(result['answer'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TEvefAWIJONx" - }, - "source": [ - "## **5 - Fine-Tuning Models**\n", - "\n", - "* Limitatons of Prompt Eng and RAG\n", - "* Fine-Tuning Arch\n", - "* Types (PEFT, LoRA, QLoRA)\n", - "* Using PyTorch for Pre-Training & Fine-Tuning\n", - "\n", - "* Evals + Quality\n", - "\n", - "Examples of Fine-Tuning:\n", - "* [Meta Llama Recipes](https://github.com/meta-llama/llama-recipes/tree/main/recipes/finetuning)\n", - "* [Hugging Face fine-tuning with Llama 3](https://huggingface.co/blog/llama3#fine-tuning-with-%F0%9F%A4%97-trl)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_8lcgdZa8onC" - }, - "source": [ - "## **6 - Responsible AI**\n", - "\n", - "* Power + Responsibility\n", - "* Hallucinations\n", - "* Input & Output Safety\n", - "* Red-teaming (simulating real-world cyber attackers)\n", - "* [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pbqb006R-T_k" - }, - "source": [ - "## **7 - Conclusion**\n", - "* Active research on LLMs and Llama\n", - "* Leverage the power of Llama and its open community\n", - "* Safety and responsible use is paramount!\n", - "\n", - "* Call-To-Action\n", - " * [Replicate Free Credits](https://replicate.fyi/connect2023) for Connect attendees!\n", - " * This notebook is available through Llama Github recipes\n", - " * Use Llama in your projects and give us feedback\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gSz5dTMxp7xo" - }, - "source": [ - "#### **Resources**\n", - "- [Meta Llama 3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)\n", - "- [Getting Started with Meta Llama](https://llama.meta.com/docs/get-started)\n", - "- [Llama 3 repo](https://github.com/meta-llama/llama3)\n", - "- [Llama 3 model card](https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md)\n", - "- [LLama 3 Recipes repo](https://github.com/meta-llama/llama-recipes)\n", - "- [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/)\n", - "- [Acceptable Use Policy](https://ai.meta.com/llama/use-policy/)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "ioVMNcTesSEk" - ], - "machine_shape": "hm", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb b/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb deleted file mode 100644 index 06f0e4094..000000000 --- a/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb +++ /dev/null @@ -1,336 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running Meta Llama 3.1 on Google Colab using Hugging Face transformers library\n", - "This notebook goes over how you can set up and run Llama 3.1 using Hugging Face transformers library\n", - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Steps at a glance:\n", - "This demo showcases how to run the example with already converted Llama 3.1 weights on [Hugging Face](https://huggingface.co/meta-llama). Please Note: To use the downloads on Hugging Face, you must first request a download as shown in the steps below making sure that you are using the same email address as your Hugging Face account.\n", - "\n", - "To use already converted weights, start here:\n", - "1. Request download of model weights from the Llama website\n", - "2. Login to Hugging Face from your terminal using the same email address as (1). Follow the instructions [here](https://huggingface.co/docs/huggingface_hub/en/quick-start). \n", - "3. Run the example\n", - "\n", - "\n", - "Else, if you'd like to download the models locally and convert them to the HF format, follow the steps below to convert the weights:\n", - "1. Request download of model weights from the Llama website\n", - "2. Clone the llama repo and get the weights\n", - "3. Convert the model weights\n", - "4. Prepare the script\n", - "5. Run the example" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using already converted weights" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Request download of model weights from the Llama website\n", - "Request download of model weights from the Llama website\n", - "Before you can run the model locally, you will need to get the model weights. To get the model weights, visit the [Llama website](https://llama.meta.com/) and click on “download models”. \n", - "\n", - "Fill the required information, select the models “Meta Llama 3.1” and accept the terms & conditions. You will receive a URL in your email in a short time." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Prepare the script\n", - "\n", - "We will install the Transformers library and Accelerate library for our demo.\n", - "\n", - "The `Transformers` library provides many models to perform tasks on texts such as classification, question answering, text generation, etc.\n", - "The `accelerate` library enables the same PyTorch code to be run across any distributed configuration of GPUs and CPUs.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install transformers\n", - "!pip install accelerate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we will import AutoTokenizer, which is a class from the transformers library that automatically chooses the correct tokenizer for a given pre-trained model, import transformers library and torch for PyTorch.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import AutoTokenizer\n", - "import transformers\n", - "import torch" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then, we will set the model variable to a specific model we’d like to use. In this demo, we will use the 8b chat model `meta-llama/Meta-Llama-3.1-8B-Instruct`. Using Meta models from Hugging Face requires you to\n", - "\n", - "1. Accept Terms of Service for Meta Llama 3.1 on Meta [website](https://llama.meta.com/llama-downloads).\n", - "2. Use the same email address from Step (1) to login into Hugging Face.\n", - "\n", - "Follow the instructions on this Hugging Face page to login from your [terminal](https://huggingface.co/docs/huggingface_hub/en/quick-start). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pip install --upgrade huggingface_hub" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from huggingface_hub import login\n", - "login()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n", - "tokenizer = AutoTokenizer.from_pretrained(model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we will use the `from_pretrained` method of `AutoTokenizer` to create a tokenizer. This will download and cache the pre-trained tokenizer and return an instance of the appropriate tokenizer class.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = transformers.pipeline(\n", - "\"text-generation\",\n", - " model=model,\n", - " torch_dtype=torch.float16,\n", - " device_map=\"auto\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Run the example\n", - "\n", - "Now, let’s create the pipeline for text generation. We’ll also set the device_map argument to `auto`, which means the pipeline will automatically use a GPU if one is available.\n", - "\n", - "Let’s also generate a text sequence based on the input that we provide. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sequences = pipeline(\n", - " 'I have tomatoes, basil and cheese at home. What can I cook for dinner?\\n',\n", - " do_sample=True,\n", - " top_k=10,\n", - " num_return_sequences=1,\n", - " eos_token_id=tokenizer.eos_token_id,\n", - " truncation = True,\n", - " max_length=400,\n", - ")\n", - "\n", - "for seq in sequences:\n", - " print(f\"Result: {seq['generated_text']}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - "\n", - "### Downloading and converting weights to Hugging Face format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Request download of model weights from the Llama website\n", - "Request download of model weights from the Llama website\n", - "Before you can run the model locally, you will need to get the model weights. To get the model weights, visit the [Llama website](https://llama.meta.com/) and click on “download models”. \n", - "\n", - "Fill the required information, select the models \"Meta Llama 3\" and accept the terms & conditions. You will receive a URL in your email in a short time." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Clone the llama repo and get the weights\n", - "Git clone the [Meta Llama 3 repo](https://github.com/meta-llama/llama3). Run the `download.sh` script and follow the instructions. This will download the model checkpoints and tokenizer.\n", - "\n", - "This example demonstrates a Meta Llama 3.1 model with 8B-instruct parameters, but the steps we follow would be similar for other llama models, as well as for other parameter models." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Convert the model weights using Hugging Face transformer from source\n", - "\n", - "* `python3 -m venv hf-convertor`\n", - "* `source hf-convertor/bin/activate`\n", - "* `git clone https://github.com/huggingface/transformers.git`\n", - "* `cd transformers`\n", - "* `pip install -e .`\n", - "* `pip install torch tiktoken blobfile accelerate`\n", - "* `python3 src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir ${path_to_meta_downloaded_model} --output_dir ${path_to_save_converted_hf_model} --model_size 8B --llama_version 3.1`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "#### 4. Prepare the script\n", - "Import the following necessary modules in your script: \n", - "* `AutoModel` is the Llama 3 model class\n", - "* `AutoTokenizer` prepares your prompt for the model to process\n", - "* `pipeline` is an abstraction to generate model outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import transformers\n", - "from transformers import AutoModelForCausalLM, AutoTokenizer\n", - "\n", - "model_dir = \"${path_the_converted_hf_model}\"\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " model_dir,\n", - " device_map=\"auto\",\n", - " )\n", - "tokenizer = AutoTokenizer.from_pretrained(model_dir)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need a way to use our model for inference. Pipeline allows us to specify which type of task the pipeline needs to run (`text-generation`), specify the model that the pipeline should use to make predictions (`model`), define the precision to use this model (`torch.float16`), device on which the pipeline should run (`device_map`) among various other options. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = transformers.pipeline(\n", - " \"text-generation\",\n", - " model=model,\n", - " tokenizer=tokenizer,\n", - " torch_dtype=torch.float16,\n", - " device_map=\"auto\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we have our pipeline defined, and we need to provide some text prompts as inputs to our pipeline to use when it runs to generate responses (`sequences`). The pipeline shown in the example below sets `do_sample` to True, which allows us to specify the decoding strategy we’d like to use to select the next token from the probability distribution over the entire vocabulary. In our example, we are using top_k sampling. \n", - "\n", - "By changing `max_length`, you can specify how long you’d like the generated response to be. \n", - "Setting the `num_return_sequences` parameter to greater than one will let you generate more than one output.\n", - "\n", - "In your script, add the following to provide input, and information on how to run the pipeline:\n", - "\n", - "\n", - "#### 5. Run the example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sequences = pipeline(\n", - " 'I have tomatoes, basil and cheese at home. What can I cook for dinner?\\n',\n", - " do_sample=True,\n", - " top_k=10,\n", - " num_return_sequences=1,\n", - " eos_token_id=tokenizer.eos_token_id,\n", - " max_length=400,\n", - ")\n", - "for seq in sequences:\n", - " print(f\"{seq['generated_text']}\")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb b/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb deleted file mode 100644 index 0a5f43059..000000000 --- a/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb +++ /dev/null @@ -1,166 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running Llama 3 on Mac, Windows or Linux\n", - "This notebook goes over how you can set up and run Llama 3.1 locally on a Mac, Windows or Linux using [Ollama](https://ollama.com/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Steps at a glance:\n", - "1. Download and install Ollama.\n", - "2. Download and test run Llama 3.1\n", - "3. Use local Llama 3.1 via Python.\n", - "4. Use local Llama 3.1 via LangChain.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Download and install Ollama\n", - "\n", - "On Mac or Windows, go to the Ollama download page [here](https://ollama.com/download) and select your platform to download it, then double click the downloaded file to install Ollama.\n", - "\n", - "On Linux, you can simply run on a terminal `curl -fsSL https://ollama.com/install.sh | sh` to download and install Ollama." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Download and test run Llama 3\n", - "\n", - "On a terminal or console, run `ollama pull llama3.1` to download the Llama 3.1 8b chat model, in the 4-bit quantized format with size about 4.7 GB.\n", - "\n", - "Run `ollama pull llama3.1:70b` to download the Llama 3.1 70b chat model, also in the 4-bit quantized format with size 39GB.\n", - "\n", - "Then you can run `ollama run llama3.1` and ask Llama 3.1 questions such as \"who wrote the book godfather?\" or \"who wrote the book godfather? answer in one sentence.\" You can also try `ollama run llama3.1:70b`, but the inference speed will most likely be too slow - for example, on an Apple M1 Pro with 32GB RAM, it takes over 10 seconds to generate one token using Llama 3.1 70b chat (vs over 10 tokens per second with Llama 3.1 8b chat).\n", - "\n", - "You can also run the following command to test Llama 3.1 8b chat:\n", - "```\n", - " curl http://localhost:11434/api/chat -d '{\n", - " \"model\": \"llama3.1\",\n", - " \"messages\": [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": \"who wrote the book godfather?\"\n", - " }\n", - " ],\n", - " \"stream\": false\n", - "}'\n", - "```\n", - "\n", - "The complete Ollama API doc is [here](https://github.com/ollama/ollama/blob/main/docs/api.md)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Use local Llama 3.1 via Python\n", - "\n", - "The Python code below is the port of the curl command above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "import json\n", - "\n", - "url = \"http://localhost:11434/api/chat\"\n", - "\n", - "def llama3(prompt):\n", - " data = {\n", - " \"model\": \"llama3.1\",\n", - " \"messages\": [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt\n", - " }\n", - " ],\n", - " \"stream\": False\n", - " }\n", - " \n", - " headers = {\n", - " 'Content-Type': 'application/json'\n", - " }\n", - " \n", - " response = requests.post(url, headers=headers, json=data)\n", - " \n", - " return(response.json()['message']['content'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "response = llama3(\"who wrote the book godfather\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4. Use local Llama 3.1 via LangChain\n", - "\n", - "Code below use LangChain with Ollama to query Llama 3 running locally. For a more advanced example of using local Llama 3 with LangChain and agent-powered RAG, see [this](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_rag_agent_llama3_local.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install langchain" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.chat_models import ChatOllama\n", - "\n", - "llm = ChatOllama(model=\"llama3.1\", temperature=0)\n", - "response = llm.invoke(\"who wrote the book godfather?\")\n", - "print(response.content)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/recipes/quickstart/inference/code_llama/README.md b/recipes/quickstart/inference/code_llama/README.md deleted file mode 100644 index ef1be5e83..000000000 --- a/recipes/quickstart/inference/code_llama/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Code Llama - -Code llama was recently released with three flavors, base-model that support multiple programming languages, Python fine-tuned model and an instruction fine-tuned and aligned variation of Code Llama, please read more [here](https://ai.meta.com/blog/code-llama-large-language-model-coding/). Also note that the Python fine-tuned model and 34B models are not trained on infilling objective, hence can not be used for infilling use-case. - -Find the scripts to run Code Llama, where there are two examples of running code completion and infilling. - -**Note** Please find the right model on HF [here](https://huggingface.co/models?search=meta-llama%20codellama). - -Make sure to install Transformers from source for now - -```bash - -pip install git+https://github.com/huggingface/transformers - -``` - -To run the code completion example: - -```bash - -python code_completion_example.py --model_name MODEL_NAME --prompt_file code_completion_prompt.txt --temperature 0.2 --top_p 0.9 - -``` - -To run the code infilling example: - -```bash - -python code_infilling_example.py --model_name MODEL_NAME --prompt_file code_infilling_prompt.txt --temperature 0.2 --top_p 0.9 - -``` -To run the 70B Instruct model example run the following (you'll need to enter the system and user prompts to instruct the model): - -```bash - -python code_instruct_example.py --model_name codellama/CodeLlama-70b-Instruct-hf --temperature 0.2 --top_p 0.9 - -``` -You can learn more about the chat prompt template [on HF](https://huggingface.co/meta-llama/CodeLlama-70b-Instruct-hf#chat-prompt) and [original Code Llama repository](https://github.com/meta-llama/codellama/blob/main/README.md#fine-tuned-instruction-models). HF tokenizer has already taken care of the chat template as shown in this example. diff --git a/recipes/quickstart/inference/code_llama/code_completion_example.py b/recipes/quickstart/inference/code_llama/code_completion_example.py deleted file mode 100644 index 201f8df8b..000000000 --- a/recipes/quickstart/inference/code_llama/code_completion_example.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -# from accelerate import init_empty_weights, load_checkpoint_and_dispatch - -import fire -import os -import sys -import time - -import torch -from transformers import AutoTokenizer - -from llama_recipes.inference.safety_utils import get_safety_checker -from llama_recipes.inference.model_utils import load_model, load_peft_model - - -def main( - model_name, - peft_model: str=None, - quantization: bool=False, - max_new_tokens =100, #The maximum numbers of tokens to generate - prompt_file: str=None, - seed: int=42, #seed value for reproducibility - do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise. - min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens - use_cache: bool=True, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: float=0.6, # [optional] The value used to modulate the next token probabilities. - top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. - repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty. - length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation. - enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api - enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs - enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 - enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard - use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels - **kwargs -): - if prompt_file is not None: - assert os.path.exists( - prompt_file - ), f"Provided Prompt file does not exist {prompt_file}" - with open(prompt_file, "r") as f: - user_prompt = f.read() - else: - print("No user prompt provided. Exiting.") - sys.exit(1) - - # Set the seeds for reproducibility - torch.cuda.manual_seed(seed) - torch.manual_seed(seed) - - model = load_model(model_name, quantization, use_fast_kernels) - if peft_model: - model = load_peft_model(model, peft_model) - - model.eval() - - tokenizer = AutoTokenizer.from_pretrained(model_name) - safety_checker = get_safety_checker(enable_azure_content_safety, - enable_sensitive_topics, - enable_salesforce_content_safety, - enable_llamaguard_content_safety, - ) - - # Safety check of the user prompt - safety_results = [check(user_prompt) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User prompt deemed safe.") - print(f"User prompt:\n{user_prompt}") - else: - print("User prompt deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - print("Skipping the inference as the prompt is not safe.") - sys.exit(1) # Exit the program with an error status - - batch = tokenizer(user_prompt, return_tensors="pt") - - batch = {k: v.to("cuda") for k, v in batch.items()} - start = time.perf_counter() - with torch.no_grad(): - outputs = model.generate( - **batch, - max_new_tokens=max_new_tokens, - do_sample=do_sample, - top_p=top_p, - temperature=temperature, - min_length=min_length, - use_cache=use_cache, - top_k=top_k, - repetition_penalty=repetition_penalty, - length_penalty=length_penalty, - **kwargs - ) - e2e_inference_time = (time.perf_counter()-start)*1000 - print(f"the inference time is {e2e_inference_time} ms") - output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - - # Safety check of the model output - safety_results = [check(output_text) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User input and model output deemed safe.") - print(f"Model output:\n{output_text}") - else: - print("Model output deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/recipes/quickstart/inference/code_llama/code_completion_prompt.txt b/recipes/quickstart/inference/code_llama/code_completion_prompt.txt deleted file mode 100644 index 8e184e2fe..000000000 --- a/recipes/quickstart/inference/code_llama/code_completion_prompt.txt +++ /dev/null @@ -1,7 +0,0 @@ -import argparse - -def main(string: str): - print(string) - print(string[::-1]) - -if __name__ == "__main__": \ No newline at end of file diff --git a/recipes/quickstart/inference/code_llama/code_infilling_example.py b/recipes/quickstart/inference/code_llama/code_infilling_example.py deleted file mode 100644 index a955eb5ce..000000000 --- a/recipes/quickstart/inference/code_llama/code_infilling_example.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -# from accelerate import init_empty_weights, load_checkpoint_and_dispatch - -import fire -import torch -import os -import sys -import time - -from transformers import AutoTokenizer - -from llama_recipes.inference.safety_utils import get_safety_checker -from llama_recipes.inference.model_utils import load_model, load_peft_model - -def main( - model_name, - peft_model: str=None, - quantization: bool=False, - max_new_tokens =100, #The maximum numbers of tokens to generate - prompt_file: str=None, - seed: int=42, #seed value for reproducibility - do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise. - min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens - use_cache: bool=True, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: float=0.6, # [optional] The value used to modulate the next token probabilities. - top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. - repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty. - length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation. - enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api - enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs - enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 - enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard - use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels - **kwargs -): - if prompt_file is not None: - assert os.path.exists( - prompt_file - ), f"Provided Prompt file does not exist {prompt_file}" - with open(prompt_file, "r") as f: - user_prompt = f.read() - else: - print("No user prompt provided. Exiting.") - sys.exit(1) - # Set the seeds for reproducibility - torch.cuda.manual_seed(seed) - torch.manual_seed(seed) - - model = load_model(model_name, quantization, use_fast_kernels) - model.config.tp_size=1 - if peft_model: - model = load_peft_model(model, peft_model) - - model.eval() - - tokenizer = AutoTokenizer.from_pretrained(model_name) - - safety_checker = get_safety_checker(enable_azure_content_safety, - enable_sensitive_topics, - enable_salesforce_content_safety, - enable_llamaguard_content_safety, - ) - - # Safety check of the user prompt - safety_results = [check(user_prompt) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User prompt deemed safe.") - print(f"User prompt:\n{user_prompt}") - else: - print("User prompt deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - print("Skipping the inference as the prompt is not safe.") - sys.exit(1) # Exit the program with an error status - - batch = tokenizer(user_prompt, return_tensors="pt") - batch = {k: v.to("cuda") for k, v in batch.items()} - - start = time.perf_counter() - with torch.no_grad(): - outputs = model.generate( - **batch, - max_new_tokens=max_new_tokens, - do_sample=do_sample, - top_p=top_p, - temperature=temperature, - min_length=min_length, - use_cache=use_cache, - top_k=top_k, - repetition_penalty=repetition_penalty, - length_penalty=length_penalty, - **kwargs - ) - e2e_inference_time = (time.perf_counter()-start)*1000 - print(f"the inference time is {e2e_inference_time} ms") - filling = tokenizer.batch_decode(outputs[:, batch["input_ids"].shape[1]:], skip_special_tokens=True)[0] - # Safety check of the model output - safety_results = [check(filling) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User input and model output deemed safe.") - print(user_prompt.replace("", filling)) - else: - print("Model output deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/recipes/quickstart/inference/code_llama/code_infilling_prompt.txt b/recipes/quickstart/inference/code_llama/code_infilling_prompt.txt deleted file mode 100644 index 3fe94b7a5..000000000 --- a/recipes/quickstart/inference/code_llama/code_infilling_prompt.txt +++ /dev/null @@ -1,3 +0,0 @@ -def remove_non_ascii(s: str) -> str: - """ - return result diff --git a/recipes/quickstart/inference/code_llama/code_instruct_example.py b/recipes/quickstart/inference/code_llama/code_instruct_example.py deleted file mode 100644 index d7b98f088..000000000 --- a/recipes/quickstart/inference/code_llama/code_instruct_example.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -import fire -import os -import sys -import time - -import torch -from transformers import AutoTokenizer - -from llama_recipes.inference.safety_utils import get_safety_checker -from llama_recipes.inference.model_utils import load_model, load_peft_model - - -def handle_safety_check(are_safe_user_prompt, user_prompt, safety_results_user_prompt, are_safe_system_prompt, system_prompt, safety_results_system_prompt): - """ - Handles the output based on the safety check of both user and system prompts. - - Parameters: - - are_safe_user_prompt (bool): Indicates whether the user prompt is safe. - - user_prompt (str): The user prompt that was checked for safety. - - safety_results_user_prompt (list of tuples): A list of tuples for the user prompt containing the method, safety status, and safety report. - - are_safe_system_prompt (bool): Indicates whether the system prompt is safe. - - system_prompt (str): The system prompt that was checked for safety. - - safety_results_system_prompt (list of tuples): A list of tuples for the system prompt containing the method, safety status, and safety report. - """ - def print_safety_results(are_safe_prompt, prompt, safety_results, prompt_type="User"): - """ - Prints the safety results for a prompt. - - Parameters: - - are_safe_prompt (bool): Indicates whether the prompt is safe. - - prompt (str): The prompt that was checked for safety. - - safety_results (list of tuples): A list of tuples containing the method, safety status, and safety report. - - prompt_type (str): The type of prompt (User/System). - """ - if are_safe_prompt: - print(f"{prompt_type} prompt deemed safe.") - print(f"{prompt_type} prompt:\n{prompt}") - else: - print(f"{prompt_type} prompt deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - print(f"Skipping the inference as the {prompt_type.lower()} prompt is not safe.") - sys.exit(1) - - # Check user prompt - print_safety_results(are_safe_user_prompt, user_prompt, safety_results_user_prompt, "User") - - # Check system prompt - print_safety_results(are_safe_system_prompt, system_prompt, safety_results_system_prompt, "System") - -def main( - model_name, - peft_model: str=None, - quantization: bool=False, - max_new_tokens =100, #The maximum numbers of tokens to generate - seed: int=42, #seed value for reproducibility - do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise. - min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens - use_cache: bool=False, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: float=0.6, # [optional] The value used to modulate the next token probabilities. - top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. - repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty. - length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation. - enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api - enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs - enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 - enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard - use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels - **kwargs -): - system_prompt = input("Please insert your system prompt: ") - user_prompt = input("Please insert your prompt: ") - chat = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ] - # Set the seeds for reproducibility - torch.cuda.manual_seed(seed) - torch.manual_seed(seed) - - model = load_model(model_name, quantization, use_fast_kernels) - if peft_model: - model = load_peft_model(model, peft_model) - - model.eval() - - tokenizer = AutoTokenizer.from_pretrained(model_name) - safety_checker = get_safety_checker(enable_azure_content_safety, - enable_sensitive_topics, - enable_salesforce_content_safety, - enable_llamaguard_content_safety, - ) - - # Safety check of the user prompt - safety_results_user_prompt = [check(user_prompt) for check in safety_checker] - safety_results_system_prompt = [check(system_prompt) for check in safety_checker] - are_safe_user_prompt = all([r[1] for r in safety_results_user_prompt]) - are_safe_system_prompt = all([r[1] for r in safety_results_system_prompt]) - handle_safety_check(are_safe_user_prompt, user_prompt, safety_results_user_prompt, are_safe_system_prompt, system_prompt, safety_results_system_prompt) - - inputs = tokenizer.apply_chat_template(chat, return_tensors="pt").to("cuda") - - start = time.perf_counter() - with torch.no_grad(): - outputs = model.generate( - input_ids=inputs, - max_new_tokens=max_new_tokens, - do_sample=do_sample, - top_p=top_p, - temperature=temperature, - min_length=min_length, - use_cache=use_cache, - top_k=top_k, - repetition_penalty=repetition_penalty, - length_penalty=length_penalty, - **kwargs - ) - e2e_inference_time = (time.perf_counter()-start)*1000 - print(f"the inference time is {e2e_inference_time} ms") - output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - - # Safety check of the model output - safety_results = [check(output_text) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User input and model output deemed safe.") - print(f"Model output:\n{output_text}") - else: - print("Model output deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/recipes/quickstart/inference/modelUpgradeExample.py b/recipes/quickstart/inference/modelUpgradeExample.py deleted file mode 100644 index f2fa19cd1..000000000 --- a/recipes/quickstart/inference/modelUpgradeExample.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -# Running the script without any arguments "python modelUpgradeExample.py" performs inference with the Llama 3 8B Instruct model. -# Passing --model-id "meta-llama/Meta-Llama-3.1-8B-Instruct" to the script will switch it to using the Llama 3.1 version of the same model. -# The script also shows the input tokens to confirm that the models are responding to the same input - -import fire -from transformers import AutoTokenizer, AutoModelForCausalLM -import torch - -def main(model_id = "meta-llama/Meta-Llama-3-8B-Instruct"): - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained( - model_id, - torch_dtype=torch.bfloat16, - device_map="auto", - ) - - messages = [ - {"role": "system", "content": "You are a helpful chatbot"}, - {"role": "user", "content": "Why is the sky blue?"}, - {"role": "assistant", "content": "Because the light is scattered"}, - {"role": "user", "content": "Please tell me more about that"}, - ] - - input_ids = tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - return_tensors="pt", - ).to(model.device) - - print("Input tokens:") - print(input_ids) - - attention_mask = torch.ones_like(input_ids) - outputs = model.generate( - input_ids, - max_new_tokens=400, - eos_token_id=tokenizer.eos_token_id, - do_sample=True, - temperature=0.6, - top_p=0.9, - attention_mask=attention_mask, - ) - response = outputs[0][input_ids.shape[-1]:] - print("\nOutput:\n") - print(tokenizer.decode(response, skip_special_tokens=True)) - -if __name__ == "__main__": - fire.Fire(main) \ No newline at end of file diff --git a/src/README.md b/src/README.md new file mode 100644 index 000000000..5fad6150d --- /dev/null +++ b/src/README.md @@ -0,0 +1,74 @@ +## Getting Started + +These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. + +### Prerequisites + +#### PyTorch Nightlies +If you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform. + +### Installing +Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source. + +> [!NOTE] +> Ensure you use the correct CUDA version (from `nvidia-smi`) when installing the PyTorch wheels. Here we are using 11.8 as `cu118`. +> H100 GPUs work better with CUDA >12.0 + +#### Install with pip +``` +pip install llama-recipes +``` + +#### Install with optional dependencies +Llama-recipes offers the installation of optional packages. There are three optional dependency groups. +To run the unit tests we can install the required dependencies with: +``` +pip install llama-recipes[tests] +``` +For the vLLM example we need additional requirements that can be installed with: +``` +pip install llama-recipes[vllm] +``` +To use the sensitive topics safety checker install with: +``` +pip install llama-recipes[auditnlg] +``` +Some recipes require the presence of langchain. To install the packages follow the recipe description or install with: +``` +pip install llama-recipes[langchain] +``` +Optional dependencies can also be combined with [option1,option2]. + +#### Install from source +To install from source e.g. for development use these commands. We're using hatchling as our build backend which requires an up-to-date pip as well as setuptools package. +``` +git clone git@github.com:meta-llama/llama-recipes.git +cd llama-recipes +pip install -U pip setuptools +pip install -e . +``` +For development and contributing to llama-recipes please install all optional dependencies: +``` +git clone git@github.com:meta-llama/llama-recipes.git +cd llama-recipes +pip install -U pip setuptools +pip install -e .[tests,auditnlg,vllm] +``` + + +### Getting the Llama models +You can find Llama models on Hugging Face hub [here](https://huggingface.co/meta-llama), **where models with `hf` in the name are already converted to Hugging Face checkpoints so no further conversion is needed**. The conversion step below is only for original model weights from Meta that are hosted on Hugging Face model hub as well. + +#### Model conversion to Hugging Face +If you have the model checkpoints downloaded from the Meta website, you can convert it to the Hugging Face format with: + +```bash +## Install Hugging Face Transformers from source +pip freeze | grep transformers ## verify it is version 4.45.0 or higher + +git clone git@github.com:huggingface/transformers.git +cd transformers +pip install protobuf +python src/transformers/models/llama/convert_llama_weights_to_hf.py \ + --input_dir /path/to/downloaded/llama/weights --model_size 3B --output_dir /output/path +``` \ No newline at end of file diff --git a/docs/FAQ.md b/src/docs/FAQ.md similarity index 98% rename from docs/FAQ.md rename to src/docs/FAQ.md index fa5e7de82..1cdd4eba9 100644 --- a/docs/FAQ.md +++ b/src/docs/FAQ.md @@ -16,7 +16,7 @@ Here we discuss frequently asked questions that may occur and we found useful al 4. Can I add custom datasets? - Yes, you can find more information on how to do that [here](../recipes/quickstart/finetuning/datasets/README.md). + Yes, you can find more information on how to do that [here](../../getting-started/finetuning/datasets/README.md). 5. What are the hardware SKU requirements for deploying these models? diff --git a/docs/LLM_finetuning.md b/src/docs/LLM_finetuning.md similarity index 100% rename from docs/LLM_finetuning.md rename to src/docs/LLM_finetuning.md diff --git a/docs/img/a_colorful_llama_doing_ai_programming.jpeg b/src/docs/img/a_colorful_llama_doing_ai_programming.jpeg similarity index 100% rename from docs/img/a_colorful_llama_doing_ai_programming.jpeg rename to src/docs/img/a_colorful_llama_doing_ai_programming.jpeg diff --git a/docs/img/cat.jpeg b/src/docs/img/cat.jpeg similarity index 100% rename from docs/img/cat.jpeg rename to src/docs/img/cat.jpeg diff --git a/docs/img/feature_based_fn.png b/src/docs/img/feature_based_fn.png similarity index 100% rename from docs/img/feature_based_fn.png rename to src/docs/img/feature_based_fn.png diff --git a/docs/img/feature_based_fn_2.png b/src/docs/img/feature_based_fn_2.png similarity index 100% rename from docs/img/feature_based_fn_2.png rename to src/docs/img/feature_based_fn_2.png diff --git a/docs/img/full_param_fn.png b/src/docs/img/full_param_fn.png similarity index 100% rename from docs/img/full_param_fn.png rename to src/docs/img/full_param_fn.png diff --git a/docs/img/gnocchi_alla_romana.jpeg b/src/docs/img/gnocchi_alla_romana.jpeg similarity index 100% rename from docs/img/gnocchi_alla_romana.jpeg rename to src/docs/img/gnocchi_alla_romana.jpeg diff --git a/docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg b/src/docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg similarity index 100% rename from docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg rename to src/docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg diff --git a/docs/img/llama-mobile-confirmed.png b/src/docs/img/llama-mobile-confirmed.png similarity index 100% rename from docs/img/llama-mobile-confirmed.png rename to src/docs/img/llama-mobile-confirmed.png diff --git a/docs/img/llama-recipes.png b/src/docs/img/llama-recipes.png similarity index 100% rename from docs/img/llama-recipes.png rename to src/docs/img/llama-recipes.png diff --git a/docs/img/llama2_gradio.png b/src/docs/img/llama2_gradio.png similarity index 100% rename from docs/img/llama2_gradio.png rename to src/docs/img/llama2_gradio.png diff --git a/docs/img/llama2_streamlit.png b/src/docs/img/llama2_streamlit.png similarity index 100% rename from docs/img/llama2_streamlit.png rename to src/docs/img/llama2_streamlit.png diff --git a/docs/img/llama2_streamlit2.png b/src/docs/img/llama2_streamlit2.png similarity index 100% rename from docs/img/llama2_streamlit2.png rename to src/docs/img/llama2_streamlit2.png diff --git a/docs/img/llama_stack.png b/src/docs/img/llama_stack.png similarity index 100% rename from docs/img/llama_stack.png rename to src/docs/img/llama_stack.png diff --git a/docs/img/messenger_api_settings.png b/src/docs/img/messenger_api_settings.png similarity index 100% rename from docs/img/messenger_api_settings.png rename to src/docs/img/messenger_api_settings.png diff --git a/docs/img/messenger_llama_arch.jpg b/src/docs/img/messenger_llama_arch.jpg similarity index 100% rename from docs/img/messenger_llama_arch.jpg rename to src/docs/img/messenger_llama_arch.jpg diff --git a/docs/img/meta_release.png b/src/docs/img/meta_release.png similarity index 100% rename from docs/img/meta_release.png rename to src/docs/img/meta_release.png diff --git a/docs/img/resized_image.jpg b/src/docs/img/resized_image.jpg similarity index 100% rename from docs/img/resized_image.jpg rename to src/docs/img/resized_image.jpg diff --git a/docs/img/thumbnail_IMG_1329.jpg b/src/docs/img/thumbnail_IMG_1329.jpg similarity index 100% rename from docs/img/thumbnail_IMG_1329.jpg rename to src/docs/img/thumbnail_IMG_1329.jpg diff --git a/docs/img/thumbnail_IMG_1440.jpg b/src/docs/img/thumbnail_IMG_1440.jpg similarity index 100% rename from docs/img/thumbnail_IMG_1440.jpg rename to src/docs/img/thumbnail_IMG_1440.jpg diff --git a/docs/img/thumbnail_IMG_6385.jpg b/src/docs/img/thumbnail_IMG_6385.jpg similarity index 100% rename from docs/img/thumbnail_IMG_6385.jpg rename to src/docs/img/thumbnail_IMG_6385.jpg diff --git a/docs/img/wandb_screenshot.png b/src/docs/img/wandb_screenshot.png similarity index 100% rename from docs/img/wandb_screenshot.png rename to src/docs/img/wandb_screenshot.png diff --git a/docs/img/whatsapp_dashboard.jpg b/src/docs/img/whatsapp_dashboard.jpg similarity index 100% rename from docs/img/whatsapp_dashboard.jpg rename to src/docs/img/whatsapp_dashboard.jpg diff --git a/docs/img/whatsapp_llama_arch.jpg b/src/docs/img/whatsapp_llama_arch.jpg similarity index 100% rename from docs/img/whatsapp_llama_arch.jpg rename to src/docs/img/whatsapp_llama_arch.jpg diff --git a/docs/multi_gpu.md b/src/docs/multi_gpu.md similarity index 93% rename from docs/multi_gpu.md rename to src/docs/multi_gpu.md index 7c797ddc8..b7862bee6 100644 --- a/docs/multi_gpu.md +++ b/src/docs/multi_gpu.md @@ -10,7 +10,7 @@ Given the combination of PEFT and FSDP, we would be able to fine tune a Meta Lla For big models like 405B we will need to fine-tune in a multi-node setup even if 4bit quantization is enabled. ## Requirements -To run the examples, make sure to install the llama-recipes package and clone the github repository in order to use the provided [`finetuning.py`](../recipes/quickstart/finetuning/finetuning.py) script with torchrun (See [README.md](../README.md) for details). +To run the examples, make sure to install the llama-recipes package and clone the github repository in order to use the provided [`finetuning.py`](../../getting-started/finetuning/finetuning.py) script with torchrun (See [README.md](../README.md) for details). ## How to run it @@ -86,9 +86,9 @@ sbatch recipes/quickstart/finetuning/multi_node.slurm ## How to run with different datasets? -Currently 4 datasets are supported that can be found in [Datasets config file](../src/llama_recipes/configs/datasets.py). +Currently 4 datasets are supported that can be found in [Datasets config file](../llama_recipes/configs/datasets.py). -* `grammar_dataset` : use this [notebook](../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder. @@ -117,7 +117,7 @@ torchrun --nnodes 1 --nproc_per_node 4 recipes/quickstart/finetuning/finetuning ## Where to configure settings? -* [Training config file](../src/llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../src/llama_recipes/configs/) +* [Training config file](../llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../llama_recipes/configs/) It lets us specify the training settings for everything from `model_name` to `dataset_name`, `batch_size` and so on. Below is the list of supported settings: @@ -166,11 +166,11 @@ It lets us specify the training settings for everything from `model_name` to `da profiler_dir: str = "PATH/to/save/profiler/results" # will be used if using profiler ``` -* [Datasets config file](../src/llama_recipes/configs/datasets.py) provides the available options for datasets. +* [Datasets config file](../llama_recipes/configs/datasets.py) provides the available options for datasets. -* [peft config file](../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. +* [peft config file](../llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. -* [FSDP config file](../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as: +* [FSDP config file](../llama_recipes/configs/fsdp.py) provides FSDP settings such as: * `mixed_precision` boolean flag to specify using mixed precision, defatults to true. diff --git a/docs/single_gpu.md b/src/docs/single_gpu.md similarity index 90% rename from docs/single_gpu.md rename to src/docs/single_gpu.md index 3f6834ef8..cff6b8399 100644 --- a/docs/single_gpu.md +++ b/src/docs/single_gpu.md @@ -35,9 +35,9 @@ The args used in the command above are: ## How to run with different datasets? -Currently 4 datasets are supported that can be found in [Datasets config file](../src/llama_recipes/configs/datasets.py). +Currently 4 datasets are supported that can be found in [Datasets config file](../llama_recipes/configs/datasets.py). -* `grammar_dataset` : use this [notebook](../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `ft_dataset` folder. @@ -67,7 +67,7 @@ python -m llama_recipes.finetuning --use_peft --peft_method lora --quantization ## Where to configure settings? -* [Training config file](../src/llama_recipes/configs/training.py) is the main config file that help to specify the settings for our run can be found in +* [Training config file](../llama_recipes/configs/training.py) is the main config file that help to specify the settings for our run can be found in It let us specify the training settings, everything from `model_name` to `dataset_name`, `batch_size` etc. can be set here. Below is the list of supported settings: @@ -117,9 +117,10 @@ It let us specify the training settings, everything from `model_name` to `datase ``` -* [Datasets config file](../src/llama_recipes/configs/datasets.py) provides the available options for datasets. +* [Datasets config file](../llama_recipes/configs/datasets.py) + ../src/llama_recipes/configs/datasets.py) provides the available options for datasets. -* [peft config file](../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. +* [peft config file](../llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. ## FLOPS Counting and Pytorch Profiling