From 731bef06d7305889f782e49328d8598aa2890f6e Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 08:51:04 -0800 Subject: [PATCH 01/23] move3p to home --- .../3p_integrations => 3p-integrations}/README.md | 0 .../getting_started_llama_3_on_amazon_bedrock.ipynb | 0 ...engineering_with_llama_2_on_amazon_bedrock.ipynb | 0 .../aws/react_llama_3_bedrock_wk.ipynb | 0 .../azure/Azure MaaS/azure_api_example.ipynb | 0 .../azure/README.md | 0 .../crusoe/README.md | 0 .../crusoe/vllm-fp8/README.md | 0 .../crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png | Bin .../crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png | Bin .../vllm-fp8/benchmarks/backend_request_func.py | 0 .../crusoe/vllm-fp8/benchmarks/benchmark_serving.py | 0 .../crusoe/vllm-fp8/benchmarks/sonnet.txt | 0 .../crusoe/vllm-fp8/convert_hf_to_fp8.py | 0 .../crusoe/vllm-fp8/main.tf | 0 .../crusoe/vllm-fp8/plot.py | 0 .../crusoe/vllm-fp8/pyproject.toml | 0 .../crusoe/vllm-fp8/run_benchmark.sh | 0 .../Function-Calling-101-Ecommerce.ipynb | 0 .../function-calling-101-ecommerce/customers.csv | 0 .../function-calling-101-ecommerce/orders.csv | 0 .../function-calling-101-ecommerce/products.csv | 0 .../data/employees.csv | 0 .../data/purchases.csv | 0 .../json-mode-function-calling-for-sql.ipynb | 0 .../employees-without-purchases.yaml | 0 .../verified-queries/most-expensive-purchase.yaml | 0 .../verified-queries/most-recent-purchases.yaml | 0 .../verified-queries/number-of-teslas.yaml | 0 .../SDOH-Json-mode.ipynb | 0 .../clinical_notes/00456321.txt | 0 .../clinical_notes/00567289.txt | 0 .../clinical_notes/00678934.txt | 0 .../clinical_notes/00785642.txt | 0 .../clinical_notes/00893247.txt | 0 .../llama3-stock-market-function-calling.ipynb | 0 .../parallel-tool-use/parallel-tool-use.ipynb | 0 .../parallel-tool-use/requirements.txt | 0 .../presidential_speeches.csv | 0 .../rag-langchain-presidential-speeches.ipynb | 0 .../conversational-chatbot-langchain/README.md | 0 .../conversational-chatbot-langchain/main.py | 0 .../requirements.txt | 0 .../groq-example-templates/crewai-agents/README.md | 0 .../groq-example-templates/crewai-agents/main.py | 0 .../crewai-agents/requirements.txt | 0 .../README.md | 0 .../groq-quickstart-conversational-chatbot/main.py | 0 .../requirements.txt | 0 .../README.md | 0 .../main.py | 0 .../requirements.txt | 0 .../README.md | 0 .../main.py | 0 .../requirements.txt | 0 .../README.md | 0 .../presidential-speeches-rag-with-pinecone/main.py | 0 .../requirements.txt | 0 .../text-to-sql-json-mode/README.md | 0 .../text-to-sql-json-mode/data/employees.csv | 0 .../text-to-sql-json-mode/data/purchases.csv | 0 .../text-to-sql-json-mode/main.py | 0 .../text-to-sql-json-mode/prompts/base_prompt.txt | 0 .../text-to-sql-json-mode/requirements.txt | 0 .../verified-sql-function-calling/README.md | 0 .../data/employees.csv | 0 .../data/purchases.csv | 0 .../verified-sql-function-calling/main.py | 0 .../verified-sql-function-calling/requirements.txt | 0 .../employees-without-purchases.yaml | 0 .../verified-queries/most-expensive-purchase.yaml | 0 .../verified-queries/most-recent-purchases.yaml | 0 .../verified-queries/number-of-teslas.yaml | 0 .../groq/llama3_cookbook_groq.ipynb | 0 .../lamini/text2sql_memory_tuning/README.md | 0 .../assets/manual_filtering.png | Bin .../text2sql_memory_tuning/assets/website.png | Bin .../data/gold-test-set-v2.jsonl | 0 .../text2sql_memory_tuning/data/gold-test-set.jsonl | 0 .../generated_queries_large_filtered_cleaned.jsonl | 0 ...enerated_queries_v2_large_filtered_cleaned.jsonl | 0 .../data/training_data/generated_queries.jsonl | 0 .../training_data/generated_queries_large.jsonl | 0 .../generated_queries_large_filtered.jsonl | 0 .../data/training_data/generated_queries_v2.jsonl | 0 .../training_data/generated_queries_v2_large.jsonl | 0 .../generated_queries_v2_large_filtered.jsonl | 0 .../lamini/text2sql_memory_tuning/meta_lamini.ipynb | 0 .../lamini/text2sql_memory_tuning/nba_roster.db | Bin .../util/get_default_finetune_args.py | 0 .../text2sql_memory_tuning/util/get_rubric.py | 0 .../text2sql_memory_tuning/util/get_schema.py | 0 .../text2sql_memory_tuning/util/load_dataset.py | 0 .../util/make_llama_3_prompt.py | 0 .../text2sql_memory_tuning/util/parse_arguments.py | 0 .../text2sql_memory_tuning/util/setup_logging.py | 0 .../langchain/README.md | 0 .../langchain/langgraph_rag_agent.ipynb | 0 .../langchain/langgraph_rag_agent_local.ipynb | 0 .../langchain/langgraph_tool_calling_agent.ipynb | 0 .../llama_on_prem.md | 0 ...gentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb | 0 ...aindex_L3_Building_an_Agent_Reasoning_Loop.ipynb | 0 ...maindex_L4_Building_a_Multi-Document_Agent.ipynb | 0 .../llamaindex/dlai_agentic_rag/README.md | 0 .../llamaindex/llamaindex_cookbook.ipynb | 0 .../modal/many-llamas-human-eval/README.md | 0 .../modal/many-llamas-human-eval/download.py | 0 .../modal/many-llamas-human-eval/eval.py | 0 .../modal/many-llamas-human-eval/generate.py | 0 .../modal/many-llamas-human-eval/inference.py | 0 .../modal/many-llamas-human-eval/plot.py | 0 .../modal/many-llamas-human-eval/run_e2e.sh | 0 .../octoai/MediaGen.ipynb | 0 .../RAG_chatbot_example/RAG_chatbot_example.ipynb | 0 .../data/Llama Getting Started Guide.pdf | Bin .../octoai/RAG_chatbot_example/requirements.txt | 0 .../vectorstore/db_faiss/index.faiss | Bin .../vectorstore/db_faiss/index.pkl | Bin .../octoai/getting_to_know_llama.ipynb | 0 .../octoai/hello_llama_cloud.ipynb | 0 .../octoai/live_data.ipynb | 0 .../octoai/llama2_gradio.ipynb | 0 .../octoai/video_summary.ipynb | 0 .../tgi/README.md | 0 .../tgi/merge_lora_weights.py | 0 .../togetherai/README.md | 0 .../togetherai/datasets/movies.json | 0 .../togetherai/images/BERTScore.png | Bin .../togetherai/images/CoQA.png | Bin .../togetherai/images/ColPaliMaxSim-1.png | Bin .../togetherai/images/Nvidia_collage.png | Bin .../togetherai/images/UMAP.png | Bin .../togetherai/images/cRAG.png | Bin .../togetherai/images/cRAG_indexing.png | Bin .../togetherai/images/cRAG_querytime.png | Bin .../togetherai/images/cluster.png | Bin .../togetherai/images/colpali_arch.png | Bin .../togetherai/images/conversation.png | Bin .../togetherai/images/deploy_CFT.png | Bin .../togetherai/images/ft_model.png | Bin .../togetherai/images/mmrag_only.png | Bin .../togetherai/images/page_25.png | Bin .../togetherai/images/repetition_task.png | Bin .../togetherai/images/reranking.png | Bin .../togetherai/images/semantic_search.png | Bin .../togetherai/images/simple_RAG.png | Bin .../togetherai/images/structured_text_image.png | Bin .../togetherai/images/summarization.png | Bin .../togetherai/images/summary_task.png | Bin .../togetherai/images/text_RAG.png | Bin .../togetherai/images/together-color.jpg | Bin .../togetherai/images/together.gif | Bin .../togetherai/images/wandb_model.png | Bin .../knowledge_graphs_with_structured_outputs.ipynb | 0 .../togetherai/llama_contextual_RAG.ipynb | 0 ...imodal_RAG_with_nvidia_investor_slide_deck.ipynb | 0 .../pdf_to_podcast_using_llama_on_together.ipynb | 0 .../structured_text_extraction_from_images.ipynb | 0 .../text_RAG_using_llama_on_together.ipynb | 0 .../using_externally_hosted_llms.ipynb | 0 .../vllm/README.md | 0 .../vllm/inference.py | 0 163 files changed, 0 insertions(+), 0 deletions(-) rename {recipes/3p_integrations => 3p-integrations}/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/aws/getting_started_llama_3_on_amazon_bedrock.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/aws/react_llama_3_bedrock_wk.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/azure/Azure MaaS/azure_api_example.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/azure/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/benchmarks/backend_request_func.py (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/benchmarks/benchmark_serving.py (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/benchmarks/sonnet.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/convert_hf_to_fp8.py (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/main.tf (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/plot.py (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/pyproject.toml (100%) rename {recipes/3p_integrations => 3p-integrations}/crusoe/vllm-fp8/run_benchmark.sh (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/parallel-tool-use/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/conversational-chatbot-langchain/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/conversational-chatbot-langchain/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/crewai-agents/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/crewai-agents/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/crewai-agents/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/text-to-sql-json-mode/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/text-to-sql-json-mode/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/main.py (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml (100%) rename {recipes/3p_integrations => 3p-integrations}/groq/llama3_cookbook_groq.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/assets/manual_filtering.png (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/assets/website.png (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/meta_lamini.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/nba_roster.db (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/get_rubric.py (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/get_schema.py (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/load_dataset.py (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/parse_arguments.py (100%) rename {recipes/3p_integrations => 3p-integrations}/lamini/text2sql_memory_tuning/util/setup_logging.py (100%) rename {recipes/3p_integrations => 3p-integrations}/langchain/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/langchain/langgraph_rag_agent.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/langchain/langgraph_rag_agent_local.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/langchain/langgraph_tool_calling_agent.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/llama_on_prem.md (100%) rename {recipes/3p_integrations => 3p-integrations}/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/llamaindex/dlai_agentic_rag/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/llamaindex/llamaindex_cookbook.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/download.py (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/eval.py (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/generate.py (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/inference.py (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/plot.py (100%) rename {recipes/3p_integrations => 3p-integrations}/modal/many-llamas-human-eval/run_e2e.sh (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/MediaGen.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/RAG_chatbot_example/requirements.txt (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/getting_to_know_llama.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/hello_llama_cloud.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/live_data.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/llama2_gradio.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/octoai/video_summary.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/tgi/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/tgi/merge_lora_weights.py (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/datasets/movies.json (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/BERTScore.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/CoQA.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/ColPaliMaxSim-1.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/Nvidia_collage.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/UMAP.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/cRAG.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/cRAG_indexing.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/cRAG_querytime.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/cluster.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/colpali_arch.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/conversation.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/deploy_CFT.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/ft_model.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/mmrag_only.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/page_25.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/repetition_task.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/reranking.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/semantic_search.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/simple_RAG.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/structured_text_image.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/summarization.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/summary_task.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/text_RAG.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/together-color.jpg (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/together.gif (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/images/wandb_model.png (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/knowledge_graphs_with_structured_outputs.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/llama_contextual_RAG.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/pdf_to_podcast_using_llama_on_together.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/structured_text_extraction_from_images.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/togetherai/text_RAG_using_llama_on_together.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/using_externally_hosted_llms.ipynb (100%) rename {recipes/3p_integrations => 3p-integrations}/vllm/README.md (100%) rename {recipes/3p_integrations => 3p-integrations}/vllm/inference.py (100%) diff --git a/recipes/3p_integrations/README.md b/3p-integrations/README.md similarity index 100% rename from recipes/3p_integrations/README.md rename to 3p-integrations/README.md diff --git a/recipes/3p_integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb b/3p-integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb similarity index 100% rename from recipes/3p_integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb rename to 3p-integrations/aws/getting_started_llama_3_on_amazon_bedrock.ipynb diff --git a/recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb b/3p-integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb similarity index 100% rename from recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb rename to 3p-integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb diff --git a/recipes/3p_integrations/aws/react_llama_3_bedrock_wk.ipynb b/3p-integrations/aws/react_llama_3_bedrock_wk.ipynb similarity index 100% rename from recipes/3p_integrations/aws/react_llama_3_bedrock_wk.ipynb rename to 3p-integrations/aws/react_llama_3_bedrock_wk.ipynb diff --git a/recipes/3p_integrations/azure/Azure MaaS/azure_api_example.ipynb b/3p-integrations/azure/Azure MaaS/azure_api_example.ipynb similarity index 100% rename from recipes/3p_integrations/azure/Azure MaaS/azure_api_example.ipynb rename to 3p-integrations/azure/Azure MaaS/azure_api_example.ipynb diff --git a/recipes/3p_integrations/azure/README.md b/3p-integrations/azure/README.md similarity index 100% rename from recipes/3p_integrations/azure/README.md rename to 3p-integrations/azure/README.md diff --git a/recipes/3p_integrations/crusoe/README.md b/3p-integrations/crusoe/README.md similarity index 100% rename from recipes/3p_integrations/crusoe/README.md rename to 3p-integrations/crusoe/README.md diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/README.md b/3p-integrations/crusoe/vllm-fp8/README.md similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/README.md rename to 3p-integrations/crusoe/vllm-fp8/README.md diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png b/3p-integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png rename to 3p-integrations/crusoe/vllm-fp8/assets/tpot_vs_qps_chart.png diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png b/3p-integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png rename to 3p-integrations/crusoe/vllm-fp8/assets/ttft_vs_qps_chart.png diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py b/3p-integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py rename to 3p-integrations/crusoe/vllm-fp8/benchmarks/backend_request_func.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py b/3p-integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py rename to 3p-integrations/crusoe/vllm-fp8/benchmarks/benchmark_serving.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt b/3p-integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt rename to 3p-integrations/crusoe/vllm-fp8/benchmarks/sonnet.txt diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py b/3p-integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py rename to 3p-integrations/crusoe/vllm-fp8/convert_hf_to_fp8.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/main.tf b/3p-integrations/crusoe/vllm-fp8/main.tf similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/main.tf rename to 3p-integrations/crusoe/vllm-fp8/main.tf diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/plot.py b/3p-integrations/crusoe/vllm-fp8/plot.py similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/plot.py rename to 3p-integrations/crusoe/vllm-fp8/plot.py diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/pyproject.toml b/3p-integrations/crusoe/vllm-fp8/pyproject.toml similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/pyproject.toml rename to 3p-integrations/crusoe/vllm-fp8/pyproject.toml diff --git a/recipes/3p_integrations/crusoe/vllm-fp8/run_benchmark.sh b/3p-integrations/crusoe/vllm-fp8/run_benchmark.sh similarity index 100% rename from recipes/3p_integrations/crusoe/vllm-fp8/run_benchmark.sh rename to 3p-integrations/crusoe/vllm-fp8/run_benchmark.sh diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/Function-Calling-101-Ecommerce.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/customers.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/orders.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv b/3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv rename to 3p-integrations/groq/groq-api-cookbook/function-calling-101-ecommerce/products.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/employees.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/data/purchases.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/json-mode-function-calling-for-sql.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/employees-without-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-expensive-purchase.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/most-recent-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml b/3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml rename to 3p-integrations/groq/groq-api-cookbook/json-mode-function-calling-for-sql/verified-queries/number-of-teslas.yaml diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/SDOH-Json-mode.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00456321.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00567289.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00678934.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00785642.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt b/3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt rename to 3p-integrations/groq/groq-api-cookbook/json-mode-social-determinants-of-health/clinical_notes/00893247.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb b/3p-integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb rename to 3p-integrations/groq/groq-api-cookbook/llama3-stock-market-function-calling/llama3-stock-market-function-calling.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb b/3p-integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb rename to 3p-integrations/groq/groq-api-cookbook/parallel-tool-use/parallel-tool-use.ipynb diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt b/3p-integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt rename to 3p-integrations/groq/groq-api-cookbook/parallel-tool-use/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv b/3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv rename to 3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/presidential_speeches.csv diff --git a/recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb b/3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb similarity index 100% rename from recipes/3p_integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb rename to 3p-integrations/groq/groq-api-cookbook/rag-langchain-presidential-speeches/rag-langchain-presidential-speeches.ipynb diff --git a/recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md b/3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md rename to 3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py b/3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py rename to 3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt b/3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt rename to 3p-integrations/groq/groq-example-templates/conversational-chatbot-langchain/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/crewai-agents/README.md b/3p-integrations/groq/groq-example-templates/crewai-agents/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/crewai-agents/README.md rename to 3p-integrations/groq/groq-example-templates/crewai-agents/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/crewai-agents/main.py b/3p-integrations/groq/groq-example-templates/crewai-agents/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/crewai-agents/main.py rename to 3p-integrations/groq/groq-example-templates/crewai-agents/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/crewai-agents/requirements.txt b/3p-integrations/groq/groq-example-templates/crewai-agents/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/crewai-agents/requirements.txt rename to 3p-integrations/groq/groq-example-templates/crewai-agents/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md b/3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md rename to 3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py b/3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py rename to 3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt b/3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt rename to 3p-integrations/groq/groq-example-templates/groq-quickstart-conversational-chatbot/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md b/3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md rename to 3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py b/3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py rename to 3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt b/3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt rename to 3p-integrations/groq/groq-example-templates/groqing-the-stock-market-function-calling-llama3/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md b/3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md rename to 3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py b/3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py rename to 3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt b/3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt rename to 3p-integrations/groq/groq-example-templates/llamachat-conversational-chatbot-with-llamaIndex/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md b/3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md rename to 3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py b/3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py rename to 3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt b/3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt rename to 3p-integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/employees.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/data/purchases.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/prompts/base_prompt.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt b/3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt rename to 3p-integrations/groq/groq-example-templates/text-to-sql-json-mode/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/README.md b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/README.md similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/README.md rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/README.md diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/employees.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/data/purchases.csv diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/main.py b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/main.py similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/main.py rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/main.py diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/requirements.txt diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/employees-without-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-expensive-purchase.yaml diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/most-recent-purchases.yaml diff --git a/recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml b/3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml similarity index 100% rename from recipes/3p_integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml rename to 3p-integrations/groq/groq-example-templates/verified-sql-function-calling/verified-queries/number-of-teslas.yaml diff --git a/recipes/3p_integrations/groq/llama3_cookbook_groq.ipynb b/3p-integrations/groq/llama3_cookbook_groq.ipynb similarity index 100% rename from recipes/3p_integrations/groq/llama3_cookbook_groq.ipynb rename to 3p-integrations/groq/llama3_cookbook_groq.ipynb diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/README.md b/3p-integrations/lamini/text2sql_memory_tuning/README.md similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/README.md rename to 3p-integrations/lamini/text2sql_memory_tuning/README.md diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png b/3p-integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png rename to 3p-integrations/lamini/text2sql_memory_tuning/assets/manual_filtering.png diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/website.png b/3p-integrations/lamini/text2sql_memory_tuning/assets/website.png similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/assets/website.png rename to 3p-integrations/lamini/text2sql_memory_tuning/assets/website.png diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl b/3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl rename to 3p-integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large_filtered.jsonl diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb b/3p-integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb rename to 3p-integrations/lamini/text2sql_memory_tuning/meta_lamini.ipynb diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/nba_roster.db b/3p-integrations/lamini/text2sql_memory_tuning/nba_roster.db similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/nba_roster.db rename to 3p-integrations/lamini/text2sql_memory_tuning/nba_roster.db diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py b/3p-integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/get_default_finetune_args.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_rubric.py b/3p-integrations/lamini/text2sql_memory_tuning/util/get_rubric.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_rubric.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/get_rubric.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_schema.py b/3p-integrations/lamini/text2sql_memory_tuning/util/get_schema.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/get_schema.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/get_schema.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/load_dataset.py b/3p-integrations/lamini/text2sql_memory_tuning/util/load_dataset.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/load_dataset.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/load_dataset.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py b/3p-integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/make_llama_3_prompt.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py b/3p-integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/parse_arguments.py diff --git a/recipes/3p_integrations/lamini/text2sql_memory_tuning/util/setup_logging.py b/3p-integrations/lamini/text2sql_memory_tuning/util/setup_logging.py similarity index 100% rename from recipes/3p_integrations/lamini/text2sql_memory_tuning/util/setup_logging.py rename to 3p-integrations/lamini/text2sql_memory_tuning/util/setup_logging.py diff --git a/recipes/3p_integrations/langchain/README.md b/3p-integrations/langchain/README.md similarity index 100% rename from recipes/3p_integrations/langchain/README.md rename to 3p-integrations/langchain/README.md diff --git a/recipes/3p_integrations/langchain/langgraph_rag_agent.ipynb b/3p-integrations/langchain/langgraph_rag_agent.ipynb similarity index 100% rename from recipes/3p_integrations/langchain/langgraph_rag_agent.ipynb rename to 3p-integrations/langchain/langgraph_rag_agent.ipynb diff --git a/recipes/3p_integrations/langchain/langgraph_rag_agent_local.ipynb b/3p-integrations/langchain/langgraph_rag_agent_local.ipynb similarity index 100% rename from recipes/3p_integrations/langchain/langgraph_rag_agent_local.ipynb rename to 3p-integrations/langchain/langgraph_rag_agent_local.ipynb diff --git a/recipes/3p_integrations/langchain/langgraph_tool_calling_agent.ipynb b/3p-integrations/langchain/langgraph_tool_calling_agent.ipynb similarity index 100% rename from recipes/3p_integrations/langchain/langgraph_tool_calling_agent.ipynb rename to 3p-integrations/langchain/langgraph_tool_calling_agent.ipynb diff --git a/recipes/3p_integrations/llama_on_prem.md b/3p-integrations/llama_on_prem.md similarity index 100% rename from recipes/3p_integrations/llama_on_prem.md rename to 3p-integrations/llama_on_prem.md diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb b/3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb rename to 3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb b/3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb rename to 3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L3_Building_an_Agent_Reasoning_Loop.ipynb diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb b/3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb rename to 3p-integrations/llamaindex/dlai_agentic_rag/Building_Agentic_RAG_with_Llamaindex_L4_Building_a_Multi-Document_Agent.ipynb diff --git a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md b/3p-integrations/llamaindex/dlai_agentic_rag/README.md similarity index 100% rename from recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md rename to 3p-integrations/llamaindex/dlai_agentic_rag/README.md diff --git a/recipes/3p_integrations/llamaindex/llamaindex_cookbook.ipynb b/3p-integrations/llamaindex/llamaindex_cookbook.ipynb similarity index 100% rename from recipes/3p_integrations/llamaindex/llamaindex_cookbook.ipynb rename to 3p-integrations/llamaindex/llamaindex_cookbook.ipynb diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/README.md b/3p-integrations/modal/many-llamas-human-eval/README.md similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/README.md rename to 3p-integrations/modal/many-llamas-human-eval/README.md diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/download.py b/3p-integrations/modal/many-llamas-human-eval/download.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/download.py rename to 3p-integrations/modal/many-llamas-human-eval/download.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/eval.py b/3p-integrations/modal/many-llamas-human-eval/eval.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/eval.py rename to 3p-integrations/modal/many-llamas-human-eval/eval.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/generate.py b/3p-integrations/modal/many-llamas-human-eval/generate.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/generate.py rename to 3p-integrations/modal/many-llamas-human-eval/generate.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/inference.py b/3p-integrations/modal/many-llamas-human-eval/inference.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/inference.py rename to 3p-integrations/modal/many-llamas-human-eval/inference.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/plot.py b/3p-integrations/modal/many-llamas-human-eval/plot.py similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/plot.py rename to 3p-integrations/modal/many-llamas-human-eval/plot.py diff --git a/recipes/3p_integrations/modal/many-llamas-human-eval/run_e2e.sh b/3p-integrations/modal/many-llamas-human-eval/run_e2e.sh similarity index 100% rename from recipes/3p_integrations/modal/many-llamas-human-eval/run_e2e.sh rename to 3p-integrations/modal/many-llamas-human-eval/run_e2e.sh diff --git a/recipes/3p_integrations/octoai/MediaGen.ipynb b/3p-integrations/octoai/MediaGen.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/MediaGen.ipynb rename to 3p-integrations/octoai/MediaGen.ipynb diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb b/3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb rename to 3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf b/3p-integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf rename to 3p-integrations/octoai/RAG_chatbot_example/data/Llama Getting Started Guide.pdf diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/requirements.txt b/3p-integrations/octoai/RAG_chatbot_example/requirements.txt similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/requirements.txt rename to 3p-integrations/octoai/RAG_chatbot_example/requirements.txt diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss b/3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss rename to 3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.faiss diff --git a/recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl b/3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl similarity index 100% rename from recipes/3p_integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl rename to 3p-integrations/octoai/RAG_chatbot_example/vectorstore/db_faiss/index.pkl diff --git a/recipes/3p_integrations/octoai/getting_to_know_llama.ipynb b/3p-integrations/octoai/getting_to_know_llama.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/getting_to_know_llama.ipynb rename to 3p-integrations/octoai/getting_to_know_llama.ipynb diff --git a/recipes/3p_integrations/octoai/hello_llama_cloud.ipynb b/3p-integrations/octoai/hello_llama_cloud.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/hello_llama_cloud.ipynb rename to 3p-integrations/octoai/hello_llama_cloud.ipynb diff --git a/recipes/3p_integrations/octoai/live_data.ipynb b/3p-integrations/octoai/live_data.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/live_data.ipynb rename to 3p-integrations/octoai/live_data.ipynb diff --git a/recipes/3p_integrations/octoai/llama2_gradio.ipynb b/3p-integrations/octoai/llama2_gradio.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/llama2_gradio.ipynb rename to 3p-integrations/octoai/llama2_gradio.ipynb diff --git a/recipes/3p_integrations/octoai/video_summary.ipynb b/3p-integrations/octoai/video_summary.ipynb similarity index 100% rename from recipes/3p_integrations/octoai/video_summary.ipynb rename to 3p-integrations/octoai/video_summary.ipynb diff --git a/recipes/3p_integrations/tgi/README.md b/3p-integrations/tgi/README.md similarity index 100% rename from recipes/3p_integrations/tgi/README.md rename to 3p-integrations/tgi/README.md diff --git a/recipes/3p_integrations/tgi/merge_lora_weights.py b/3p-integrations/tgi/merge_lora_weights.py similarity index 100% rename from recipes/3p_integrations/tgi/merge_lora_weights.py rename to 3p-integrations/tgi/merge_lora_weights.py diff --git a/recipes/3p_integrations/togetherai/README.md b/3p-integrations/togetherai/README.md similarity index 100% rename from recipes/3p_integrations/togetherai/README.md rename to 3p-integrations/togetherai/README.md diff --git a/recipes/3p_integrations/togetherai/datasets/movies.json b/3p-integrations/togetherai/datasets/movies.json similarity index 100% rename from recipes/3p_integrations/togetherai/datasets/movies.json rename to 3p-integrations/togetherai/datasets/movies.json diff --git a/recipes/3p_integrations/togetherai/images/BERTScore.png b/3p-integrations/togetherai/images/BERTScore.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/BERTScore.png rename to 3p-integrations/togetherai/images/BERTScore.png diff --git a/recipes/3p_integrations/togetherai/images/CoQA.png b/3p-integrations/togetherai/images/CoQA.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/CoQA.png rename to 3p-integrations/togetherai/images/CoQA.png diff --git a/recipes/3p_integrations/togetherai/images/ColPaliMaxSim-1.png b/3p-integrations/togetherai/images/ColPaliMaxSim-1.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/ColPaliMaxSim-1.png rename to 3p-integrations/togetherai/images/ColPaliMaxSim-1.png diff --git a/recipes/3p_integrations/togetherai/images/Nvidia_collage.png b/3p-integrations/togetherai/images/Nvidia_collage.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/Nvidia_collage.png rename to 3p-integrations/togetherai/images/Nvidia_collage.png diff --git a/recipes/3p_integrations/togetherai/images/UMAP.png b/3p-integrations/togetherai/images/UMAP.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/UMAP.png rename to 3p-integrations/togetherai/images/UMAP.png diff --git a/recipes/3p_integrations/togetherai/images/cRAG.png b/3p-integrations/togetherai/images/cRAG.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cRAG.png rename to 3p-integrations/togetherai/images/cRAG.png diff --git a/recipes/3p_integrations/togetherai/images/cRAG_indexing.png b/3p-integrations/togetherai/images/cRAG_indexing.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cRAG_indexing.png rename to 3p-integrations/togetherai/images/cRAG_indexing.png diff --git a/recipes/3p_integrations/togetherai/images/cRAG_querytime.png b/3p-integrations/togetherai/images/cRAG_querytime.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cRAG_querytime.png rename to 3p-integrations/togetherai/images/cRAG_querytime.png diff --git a/recipes/3p_integrations/togetherai/images/cluster.png b/3p-integrations/togetherai/images/cluster.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/cluster.png rename to 3p-integrations/togetherai/images/cluster.png diff --git a/recipes/3p_integrations/togetherai/images/colpali_arch.png b/3p-integrations/togetherai/images/colpali_arch.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/colpali_arch.png rename to 3p-integrations/togetherai/images/colpali_arch.png diff --git a/recipes/3p_integrations/togetherai/images/conversation.png b/3p-integrations/togetherai/images/conversation.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/conversation.png rename to 3p-integrations/togetherai/images/conversation.png diff --git a/recipes/3p_integrations/togetherai/images/deploy_CFT.png b/3p-integrations/togetherai/images/deploy_CFT.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/deploy_CFT.png rename to 3p-integrations/togetherai/images/deploy_CFT.png diff --git a/recipes/3p_integrations/togetherai/images/ft_model.png b/3p-integrations/togetherai/images/ft_model.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/ft_model.png rename to 3p-integrations/togetherai/images/ft_model.png diff --git a/recipes/3p_integrations/togetherai/images/mmrag_only.png b/3p-integrations/togetherai/images/mmrag_only.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/mmrag_only.png rename to 3p-integrations/togetherai/images/mmrag_only.png diff --git a/recipes/3p_integrations/togetherai/images/page_25.png b/3p-integrations/togetherai/images/page_25.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/page_25.png rename to 3p-integrations/togetherai/images/page_25.png diff --git a/recipes/3p_integrations/togetherai/images/repetition_task.png b/3p-integrations/togetherai/images/repetition_task.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/repetition_task.png rename to 3p-integrations/togetherai/images/repetition_task.png diff --git a/recipes/3p_integrations/togetherai/images/reranking.png b/3p-integrations/togetherai/images/reranking.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/reranking.png rename to 3p-integrations/togetherai/images/reranking.png diff --git a/recipes/3p_integrations/togetherai/images/semantic_search.png b/3p-integrations/togetherai/images/semantic_search.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/semantic_search.png rename to 3p-integrations/togetherai/images/semantic_search.png diff --git a/recipes/3p_integrations/togetherai/images/simple_RAG.png b/3p-integrations/togetherai/images/simple_RAG.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/simple_RAG.png rename to 3p-integrations/togetherai/images/simple_RAG.png diff --git a/recipes/3p_integrations/togetherai/images/structured_text_image.png b/3p-integrations/togetherai/images/structured_text_image.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/structured_text_image.png rename to 3p-integrations/togetherai/images/structured_text_image.png diff --git a/recipes/3p_integrations/togetherai/images/summarization.png b/3p-integrations/togetherai/images/summarization.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/summarization.png rename to 3p-integrations/togetherai/images/summarization.png diff --git a/recipes/3p_integrations/togetherai/images/summary_task.png b/3p-integrations/togetherai/images/summary_task.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/summary_task.png rename to 3p-integrations/togetherai/images/summary_task.png diff --git a/recipes/3p_integrations/togetherai/images/text_RAG.png b/3p-integrations/togetherai/images/text_RAG.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/text_RAG.png rename to 3p-integrations/togetherai/images/text_RAG.png diff --git a/recipes/3p_integrations/togetherai/images/together-color.jpg b/3p-integrations/togetherai/images/together-color.jpg similarity index 100% rename from recipes/3p_integrations/togetherai/images/together-color.jpg rename to 3p-integrations/togetherai/images/together-color.jpg diff --git a/recipes/3p_integrations/togetherai/images/together.gif b/3p-integrations/togetherai/images/together.gif similarity index 100% rename from recipes/3p_integrations/togetherai/images/together.gif rename to 3p-integrations/togetherai/images/together.gif diff --git a/recipes/3p_integrations/togetherai/images/wandb_model.png b/3p-integrations/togetherai/images/wandb_model.png similarity index 100% rename from recipes/3p_integrations/togetherai/images/wandb_model.png rename to 3p-integrations/togetherai/images/wandb_model.png diff --git a/recipes/3p_integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb b/3p-integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb rename to 3p-integrations/togetherai/knowledge_graphs_with_structured_outputs.ipynb diff --git a/recipes/3p_integrations/togetherai/llama_contextual_RAG.ipynb b/3p-integrations/togetherai/llama_contextual_RAG.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/llama_contextual_RAG.ipynb rename to 3p-integrations/togetherai/llama_contextual_RAG.ipynb diff --git a/recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb b/3p-integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb rename to 3p-integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb diff --git a/recipes/3p_integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb b/3p-integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb rename to 3p-integrations/togetherai/pdf_to_podcast_using_llama_on_together.ipynb diff --git a/recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb b/3p-integrations/togetherai/structured_text_extraction_from_images.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb rename to 3p-integrations/togetherai/structured_text_extraction_from_images.ipynb diff --git a/recipes/3p_integrations/togetherai/text_RAG_using_llama_on_together.ipynb b/3p-integrations/togetherai/text_RAG_using_llama_on_together.ipynb similarity index 100% rename from recipes/3p_integrations/togetherai/text_RAG_using_llama_on_together.ipynb rename to 3p-integrations/togetherai/text_RAG_using_llama_on_together.ipynb diff --git a/recipes/3p_integrations/using_externally_hosted_llms.ipynb b/3p-integrations/using_externally_hosted_llms.ipynb similarity index 100% rename from recipes/3p_integrations/using_externally_hosted_llms.ipynb rename to 3p-integrations/using_externally_hosted_llms.ipynb diff --git a/recipes/3p_integrations/vllm/README.md b/3p-integrations/vllm/README.md similarity index 100% rename from recipes/3p_integrations/vllm/README.md rename to 3p-integrations/vllm/README.md diff --git a/recipes/3p_integrations/vllm/inference.py b/3p-integrations/vllm/inference.py similarity index 100% rename from recipes/3p_integrations/vllm/inference.py rename to 3p-integrations/vllm/inference.py From ae010af7d86d2d4589250ba57ed519356f8e5d25 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:06:53 -0800 Subject: [PATCH 02/23] move and add Difflog --- UPDATES.md | 32 ++++++++++-------- .../NotebookLlama/README.md | 0 .../Step-1 PDF-Pre-Processing-Logic.ipynb | 0 .../Step-2-Transcript-Writer.ipynb | 0 .../NotebookLlama/Step-3-Re-Writer.ipynb | 0 .../NotebookLlama/Step-4-TTS-Workflow.ipynb | 0 .../NotebookLlama/TTS_Notes.md | 0 .../NotebookLlama/requirements.txt | 0 .../NotebookLlama/resources/2402.13116v4.pdf | Bin .../NotebookLlama/resources/Outline.jpg | Bin .../NotebookLlama/resources/_podcast.mp3 | Bin .../resources/clean_extracted_text.txt | 0 .../NotebookLlama/resources/data.pkl | Bin .../resources/podcast_ready_data.pkl | Bin .../RAFT-Chatbot/README.md | 0 .../RAFT-Chatbot/config.py | 0 .../RAFT-Chatbot/eval_llama.json | 0 .../RAFT-Chatbot/format.py | 0 .../RAFT-Chatbot/images/Answers_Precision.png | Bin .../images/LLM_score_comparison.png | Bin .../images/Num_of_refusal_comparison.png | Bin .../RAFT-Chatbot/images/RAFT.png | Bin .../RAFT-Chatbot/raft.py | 0 .../RAFT-Chatbot/raft.yaml | 0 .../RAFT-Chatbot/raft_eval.py | 0 .../RAFT-Chatbot/raft_eval_config.yaml | 0 .../RAFT-Chatbot/raft_utils.py | 0 .../README.md | 0 .../Agents_Tutorial/Tool_Calling_101.ipynb | 0 .../Agents_Tutorial/Tool_Calling_201.ipynb | 0 ...L4_Tool_Use_and_Conversational_Chess.ipynb | 0 ...Graph_L1_Build_an_Agent_from_Scratch.ipynb | 0 ...RAG_with_Llamaindex_L1_Router_Engine.ipynb | 0 ...s_with_LangChain_L1_Function_Calling.ipynb | 0 .../DeepLearningai_Course_Notebooks/README.md | 0 .../agents/README.md | 0 .../benchmarks/README.md | 0 .../benchmarks/inference/README.md | 0 .../benchmarks/inference/cloud/README.md | 0 .../inference/cloud/aws/fmbench/README.md | 0 .../inference/cloud/aws/fmbench/config.yml | 0 .../inference/cloud/aws/fmbench/img/CFT.png | Bin .../aws/fmbench/img/business_summary.png | Bin .../cloud/aws/fmbench/img/instances.png | Bin .../aws/fmbench/img/latency_vs_tokens.png | Bin .../cloud/azure/chat_azure_api_benchmark.py | 0 .../inference/cloud/azure/input.jsonl | 0 .../inference/cloud/azure/parameters.json | 0 .../azure/pretrained_azure_api_benchmark.py | 0 .../benchmarks/inference/on_prem/README.md | 0 .../on_prem/vllm/chat_vllm_benchmark.py | 0 .../inference/on_prem/vllm/input.jsonl | 0 .../inference/on_prem/vllm/parameters.json | 0 .../on_prem/vllm/pretrained_vllm_benchmark.py | 0 .../benchmarks/inference/requirements.txt | 0 .../benchmarks/llm_eval_harness/README.md | 0 .../llm_eval_harness/meta_eval/README.md | 0 .../meta_eval/eval_config.yaml | 0 .../meta_template/bbh/bbh_3shot_cot.yaml | 0 .../meta_eval/meta_template/bbh/utils.py | 0 .../meta_template/gpqa/gpqa_0shot.yaml | 0 .../meta_eval/meta_template/gpqa/utils.py | 0 .../gpqa_cot/gpqa_0shot_cot.yaml | 0 .../meta_eval/meta_template/gpqa_cot/utils.py | 0 .../meta_template/ifeval/ifeval.yaml | 0 .../meta_eval/meta_template/ifeval/utils.py | 0 .../math_hard/math_4shot_cot.yaml | 0 .../math_hard/math_hard_0shot_cot.yaml | 0 .../meta_template/math_hard/utils.py | 0 .../meta_template/meta_instruct.yaml | 0 .../meta_template/meta_pretrain.yaml | 0 .../meta_eval/meta_template/mmlu/mmlu.yaml | 0 .../meta_eval/meta_template/mmlu/utils.py | 0 .../mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml | 0 .../mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml | 0 .../meta_eval/meta_template/mmlu_pro/utils.py | 0 .../meta_eval/prepare_meta_eval.py | 0 .../agent/browser-use-quickstart.ipynb | 0 .../browser_use/agent/sample_screenshot.png | Bin .../coding/text2sql/csv2db.py | 0 .../coding/text2sql/nba.txt | 0 .../coding/text2sql/nba_roster.db | Bin .../coding/text2sql/quickstart.ipynb | 0 .../coding/text2sql/txt2csv.py | 0 .../RAG_chatbot/RAG_Chatbot_Example.ipynb | 0 .../data/Llama Getting Started Guide.pdf | Bin .../RAG_chatbot/requirements.txt | 0 .../vectorstore/db_faiss/index.faiss | Bin .../vectorstore/db_faiss/index.pkl | Bin ...ngodb_llama3_huggingface_open_source.ipynb | 0 .../ai_agent_chatbot/SalesBot.ipynb | 0 .../musical_instruments_reviews.csv | 0 .../messenger_chatbot/llama_messenger.py | 0 .../messenger_chatbot/messenger_llama3.md | 0 .../whatsapp_chatbot/llama_chatbot.py | 0 .../whatsapp_chatbot/whatsapp_llama3.md | 0 .../email_agent/1.png | Bin .../email_agent/2.png | Bin .../email_agent/3.png | Bin .../email_agent/README.md | 0 .../email_agent/email_agent.png | Bin .../email_agent/email_agent.py | 0 .../email_agent/functions_prompt.py | 0 .../email_agent/main.py | 0 .../email_agent/requirements.txt | 0 .../github_triage/README.md | 0 .../github_triage/config.yaml | 0 .../github_triage/llm.py | 0 .../annotated_issues.csv | 0 .../2024-08-28_2024-08-28/challenges.csv | 0 .../2024-08-28_2024-08-28/overview.csv | 0 .../2024-08-28_2024-08-28/plots/commits.png | Bin .../plots/engagement_sankey.png | Bin .../2024-08-28_2024-08-28/plots/expertise.png | Bin .../2024-08-28_2024-08-28/plots/sentiment.png | Bin .../2024-08-28_2024-08-28/plots/severity.png | Bin .../2024-08-28_2024-08-28/plots/themes.png | Bin .../pytorch/2024-08-28_2024-08-28/report.pdf | Bin .../github_triage/pdf_report.py | 0 .../github_triage/plots.py | 0 .../github_triage/requirements.txt | 0 .../github_triage/triage.py | 0 .../github_triage/utils.py | 0 .../github_triage/walkthrough.ipynb | 0 .../live_data.ipynb | 0 .../multilingual/README.md | 0 .../multilingual/extend_tokenizer.py | 0 .../multilingual/img/phase1_eval_loss.png | Bin .../multilingual/img/phase1_train_loss.png | Bin .../multilingual/img/phase2_eval_loss.png | Bin .../multilingual/img/phase2_train_loss.png | Bin .../multilingual/prepare_data.py | 0 .../multilingual/train_tokenizer.py | 0 .../responsible_ai/README.md | 0 .../code_shield_usage_demo.ipynb | 0 .../responsible_ai/llama_guard/README.md | 0 .../responsible_ai/llama_guard/__init__.py | 0 ...zation_via_prompting_and_fine_tuning.ipynb | 0 ...lama_guard_text_and_vision_inference.ipynb | 0 .../llama_guard/resources/dog.jpg | Bin .../llama_guard/resources/pasta.jpeg | Bin .../responsible_ai/prompt_guard/README.md | 0 .../responsible_ai/prompt_guard/__init__.py | 0 .../responsible_ai/prompt_guard/inference.py | 0 .../prompt_guard/prompt_guard_tutorial.ipynb | 0 .../video_summary.ipynb | 0 .../Getting_to_know_Llama.ipynb | 0 .../Prompt_Engineering_with_Llama_3.ipynb | 0 .../RAG/hello_llama_cloud.ipynb | 0 .../quickstart => getting-started}/README.md | 0 .../Running_Llama_on_HF_transformers.ipynb | 0 .../Running_Llama_on_Mac_Windows_Linux.ipynb | 0 .../build_with_Llama_3_2.ipynb | 0 .../finetuning/LLM_finetuning_overview.md | 0 .../finetuning/README.md | 0 .../finetuning/datasets/README.md | 0 .../finetuning/datasets/custom_dataset.py | 0 .../finetuning/datasets/ocrvqa_dataset.py | 0 .../finetuning/datasets/raft_dataset.py | 0 .../finetuning/finetune_vision_model.md | 0 .../finetuning/finetuning.py | 0 .../finetuning/multi_node.slurm | 0 .../finetuning/multigpu_finetuning.md | 0 .../quickstart_peft_finetuning.ipynb | 0 .../finetuning/singlegpu_finetuning.md | 0 .../inference/README.md | 0 .../inference/code_llama/README.md | 0 .../code_llama/code_completion_example.py | 0 .../code_llama/code_completion_prompt.txt | 0 .../code_llama/code_infilling_example.py | 0 .../code_llama/code_infilling_prompt.txt | 0 .../code_llama/code_instruct_example.py | 0 .../inference/local_inference/README.md | 0 .../chat_completion/chat_completion.py | 0 .../chat_completion/chats.json | 0 .../inference/local_inference/inference.py | 0 .../local_inference/multi_modal_infer.py | 0 .../local_inference/samsum_prompt.txt | 0 .../android_inference/README.md | 0 .../android_inference/mlc-package-config.json | 0 .../android_inference/requirements.txt | 0 .../inference/modelUpgradeExample.py | 0 .../dev_requirements.txt | 0 {docs => src/docs}/FAQ.md | 0 {docs => src/docs}/LLM_finetuning.md | 0 ...a_colorful_llama_doing_ai_programming.jpeg | Bin {docs => src/docs}/img/cat.jpeg | Bin {docs => src/docs}/img/feature_based_fn.png | Bin {docs => src/docs}/img/feature_based_fn_2.png | Bin {docs => src/docs}/img/full_param_fn.png | Bin .../docs}/img/gnocchi_alla_romana.jpeg | Bin ...opping_bascket_with_salmon_in_package.jpeg | Bin .../docs}/img/llama-mobile-confirmed.png | Bin {docs => src/docs}/img/llama-recipes.png | Bin {docs => src/docs}/img/llama2_gradio.png | Bin {docs => src/docs}/img/llama2_streamlit.png | Bin {docs => src/docs}/img/llama2_streamlit2.png | Bin {docs => src/docs}/img/llama_stack.png | Bin .../docs}/img/messenger_api_settings.png | Bin .../docs}/img/messenger_llama_arch.jpg | Bin {docs => src/docs}/img/meta_release.png | Bin {docs => src/docs}/img/resized_image.jpg | Bin {docs => src/docs}/img/thumbnail_IMG_1329.jpg | Bin {docs => src/docs}/img/thumbnail_IMG_1440.jpg | Bin {docs => src/docs}/img/thumbnail_IMG_6385.jpg | Bin {docs => src/docs}/img/wandb_screenshot.png | Bin {docs => src/docs}/img/whatsapp_dashboard.jpg | Bin .../docs}/img/whatsapp_llama_arch.jpg | Bin {docs => src/docs}/multi_gpu.md | 0 {docs => src/docs}/single_gpu.md | 0 requirements.txt => src/requirements.txt | 0 211 files changed, 17 insertions(+), 15 deletions(-) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/README.md (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/Step-2-Transcript-Writer.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/Step-3-Re-Writer.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/Step-4-TTS-Workflow.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/TTS_Notes.md (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/requirements.txt (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/resources/2402.13116v4.pdf (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/resources/Outline.jpg (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/resources/_podcast.mp3 (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/resources/clean_extracted_text.txt (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/resources/data.pkl (100%) rename {recipes/quickstart => end-to-end-use-cases}/NotebookLlama/resources/podcast_ready_data.pkl (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/README.md (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/config.py (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/eval_llama.json (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/format.py (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/images/Answers_Precision.png (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/images/LLM_score_comparison.png (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/images/Num_of_refusal_comparison.png (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/images/RAFT.png (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/raft.py (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/raft.yaml (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/raft_eval.py (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/raft_eval_config.yaml (100%) rename {recipes/use_cases/end2end-recipes => end-to-end-use-cases}/RAFT-Chatbot/raft_utils.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/README.md (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/Agents_Tutorial/Tool_Calling_101.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/Agents_Tutorial/Tool_Calling_201.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/DeepLearningai_Course_Notebooks/README.md (100%) rename {recipes/quickstart => end-to-end-use-cases}/agents/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/aws/fmbench/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/aws/fmbench/config.yml (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/aws/fmbench/img/CFT.png (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/aws/fmbench/img/instances.png (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/azure/input.jsonl (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/azure/parameters.json (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/on_prem/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/on_prem/vllm/input.jsonl (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/on_prem/vllm/parameters.json (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/inference/requirements.txt (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/README.md (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py (100%) rename {tools => end-to-end-use-cases}/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/browser_use/agent/browser-use-quickstart.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/browser_use/agent/sample_screenshot.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/coding/text2sql/csv2db.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/coding/text2sql/nba.txt (100%) rename {recipes/use_cases => end-to-end-use-cases}/coding/text2sql/nba_roster.db (100%) rename {recipes/use_cases => end-to-end-use-cases}/coding/text2sql/quickstart.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/coding/text2sql/txt2csv.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/RAG_chatbot/requirements.txt (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/messenger_chatbot/llama_messenger.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/messenger_chatbot/messenger_llama3.md (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/1.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/2.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/3.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/README.md (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/email_agent.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/email_agent.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/functions_prompt.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/main.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/email_agent/requirements.txt (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/README.md (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/config.yaml (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/llm.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/pdf_report.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/plots.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/requirements.txt (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/triage.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/utils.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/github_triage/walkthrough.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/live_data.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/README.md (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/extend_tokenizer.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/img/phase1_eval_loss.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/img/phase1_train_loss.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/img/phase2_eval_loss.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/img/phase2_train_loss.png (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/prepare_data.py (100%) rename {recipes/use_cases => end-to-end-use-cases}/multilingual/train_tokenizer.py (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/README.md (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/code_shield_usage_demo.ipynb (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/llama_guard/README.md (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/llama_guard/__init__.py (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/llama_guard/resources/dog.jpg (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/llama_guard/resources/pasta.jpeg (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/prompt_guard/README.md (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/prompt_guard/__init__.py (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/prompt_guard/inference.py (100%) rename {recipes => end-to-end-use-cases}/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb (100%) rename {recipes/use_cases => end-to-end-use-cases}/video_summary.ipynb (100%) rename {recipes/quickstart => getting-started}/Getting_to_know_Llama.ipynb (100%) rename {recipes/quickstart => getting-started}/Prompt_Engineering_with_Llama_3.ipynb (100%) rename {recipes/quickstart => getting-started}/RAG/hello_llama_cloud.ipynb (100%) rename {recipes/quickstart => getting-started}/README.md (100%) rename {recipes/quickstart => getting-started}/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb (100%) rename {recipes/quickstart => getting-started}/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb (100%) rename {recipes/quickstart => getting-started}/build_with_Llama_3_2.ipynb (100%) rename {recipes/quickstart => getting-started}/finetuning/LLM_finetuning_overview.md (100%) rename {recipes/quickstart => getting-started}/finetuning/README.md (100%) rename {recipes/quickstart => getting-started}/finetuning/datasets/README.md (100%) rename {recipes/quickstart => getting-started}/finetuning/datasets/custom_dataset.py (100%) rename {recipes/quickstart => getting-started}/finetuning/datasets/ocrvqa_dataset.py (100%) rename {recipes/quickstart => getting-started}/finetuning/datasets/raft_dataset.py (100%) rename {recipes/quickstart => getting-started}/finetuning/finetune_vision_model.md (100%) rename {recipes/quickstart => getting-started}/finetuning/finetuning.py (100%) rename {recipes/quickstart => getting-started}/finetuning/multi_node.slurm (100%) rename {recipes/quickstart => getting-started}/finetuning/multigpu_finetuning.md (100%) rename {recipes/quickstart => getting-started}/finetuning/quickstart_peft_finetuning.ipynb (100%) rename {recipes/quickstart => getting-started}/finetuning/singlegpu_finetuning.md (100%) rename {recipes/quickstart => getting-started}/inference/README.md (100%) rename {recipes/quickstart => getting-started}/inference/code_llama/README.md (100%) rename {recipes/quickstart => getting-started}/inference/code_llama/code_completion_example.py (100%) rename {recipes/quickstart => getting-started}/inference/code_llama/code_completion_prompt.txt (100%) rename {recipes/quickstart => getting-started}/inference/code_llama/code_infilling_example.py (100%) rename {recipes/quickstart => getting-started}/inference/code_llama/code_infilling_prompt.txt (100%) rename {recipes/quickstart => getting-started}/inference/code_llama/code_instruct_example.py (100%) rename {recipes/quickstart => getting-started}/inference/local_inference/README.md (100%) rename {recipes/quickstart => getting-started}/inference/local_inference/chat_completion/chat_completion.py (100%) rename {recipes/quickstart => getting-started}/inference/local_inference/chat_completion/chats.json (100%) rename {recipes/quickstart => getting-started}/inference/local_inference/inference.py (100%) rename {recipes/quickstart => getting-started}/inference/local_inference/multi_modal_infer.py (100%) rename {recipes/quickstart => getting-started}/inference/local_inference/samsum_prompt.txt (100%) rename {recipes/quickstart => getting-started}/inference/mobile_inference/android_inference/README.md (100%) rename {recipes/quickstart => getting-started}/inference/mobile_inference/android_inference/mlc-package-config.json (100%) rename {recipes/quickstart => getting-started}/inference/mobile_inference/android_inference/requirements.txt (100%) rename {recipes/quickstart => getting-started}/inference/modelUpgradeExample.py (100%) rename dev_requirements.txt => src/dev_requirements.txt (100%) rename {docs => src/docs}/FAQ.md (100%) rename {docs => src/docs}/LLM_finetuning.md (100%) rename {docs => src/docs}/img/a_colorful_llama_doing_ai_programming.jpeg (100%) rename {docs => src/docs}/img/cat.jpeg (100%) rename {docs => src/docs}/img/feature_based_fn.png (100%) rename {docs => src/docs}/img/feature_based_fn_2.png (100%) rename {docs => src/docs}/img/full_param_fn.png (100%) rename {docs => src/docs}/img/gnocchi_alla_romana.jpeg (100%) rename {docs => src/docs}/img/grocery_shopping_bascket_with_salmon_in_package.jpeg (100%) rename {docs => src/docs}/img/llama-mobile-confirmed.png (100%) rename {docs => src/docs}/img/llama-recipes.png (100%) rename {docs => src/docs}/img/llama2_gradio.png (100%) rename {docs => src/docs}/img/llama2_streamlit.png (100%) rename {docs => src/docs}/img/llama2_streamlit2.png (100%) rename {docs => src/docs}/img/llama_stack.png (100%) rename {docs => src/docs}/img/messenger_api_settings.png (100%) rename {docs => src/docs}/img/messenger_llama_arch.jpg (100%) rename {docs => src/docs}/img/meta_release.png (100%) rename {docs => src/docs}/img/resized_image.jpg (100%) rename {docs => src/docs}/img/thumbnail_IMG_1329.jpg (100%) rename {docs => src/docs}/img/thumbnail_IMG_1440.jpg (100%) rename {docs => src/docs}/img/thumbnail_IMG_6385.jpg (100%) rename {docs => src/docs}/img/wandb_screenshot.png (100%) rename {docs => src/docs}/img/whatsapp_dashboard.jpg (100%) rename {docs => src/docs}/img/whatsapp_llama_arch.jpg (100%) rename {docs => src/docs}/multi_gpu.md (100%) rename {docs => src/docs}/single_gpu.md (100%) rename requirements.txt => src/requirements.txt (100%) diff --git a/UPDATES.md b/UPDATES.md index fcd455592..56739b392 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -1,19 +1,21 @@ -## System Prompt Update +DIFFLOG: -### Observed Issue -We received feedback from the community on our prompt template and we are providing an update to reduce the false refusal rates seen. False refusals occur when the model incorrectly refuses to answer a question that it should, for example due to overly broad instructions to be cautious in how it provides responses. +Nested Folders rename: +- /recipes/3p_integrations -> /3p-integrations +- /recipes/quickstart -> /getting-started +- /recipes/responsible_ai -> /end-to-end-use-cases/responsible_ai +- /recipes/use_cases -> /end-to-end-use-cases +- /quickstart/agents -> /end-to-end-use-cases/agents +- /quickstart/NotebookLlama -> /end-to-end-use-cases/NotebookLlama +- /quickstart/responsible_ai -> /end-to-end-use-cases/responsible_ai +- /recipes/use_cases/end-toend/RAFT-Chatbot -> /end-to-end-use-cases/RAFT-Chatbot +- /docs -> /src/docs/ +- /dev_requirements.txt -> /src/dev_requirements.txt +- /requirements.txt -> /src/requirements.txt +- /tools -> /end-to-end-use-cases/benchmarks/ -### Updated approach -Based on evaluation and analysis, we recommend the removal of the system prompt as the default setting. Pull request [#626](https://github.com/facebookresearch/llama/pull/626) removes the system prompt as the default option, but still provides an example to help enable experimentation for those using it. -## Token Sanitization Update +Removed folders: +- /flagged (Empty folder) +- /recipes/quickstart/Running_Llama3_Anywhere (Redundant code) -### Observed Issue -The PyTorch scripts currently provided for tokenization and model inference allow for direct prompt injection via string concatenation. Prompt injections allow for the addition of special system and instruction prompt strings from user-provided prompts. - -As noted in the documentation, these strings are required to use the fine-tuned chat models. However, prompt injections have also been used for manipulating or abusing models by bypassing their safeguards, allowing for the creation of content or behaviors otherwise outside the bounds of acceptable use. - -### Updated approach -We recommend sanitizing [these strings](https://github.com/meta-llama/llama?tab=readme-ov-file#fine-tuned-chat-models) from any user provided prompts. Sanitization of user prompts mitigates malicious or accidental abuse of these strings. The provided scripts have been updated to do this. - -Note: even with this update safety classifiers should still be applied to catch unsafe behaviors or content produced by the model. An [example](./recipes/quickstart/inference/local_inference/inference.py) of how to deploy such a classifier can be found in the llama-recipes repository. diff --git a/recipes/quickstart/NotebookLlama/README.md b/end-to-end-use-cases/NotebookLlama/README.md similarity index 100% rename from recipes/quickstart/NotebookLlama/README.md rename to end-to-end-use-cases/NotebookLlama/README.md diff --git a/recipes/quickstart/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb b/end-to-end-use-cases/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-1 PDF-Pre-Processing-Logic.ipynb diff --git a/recipes/quickstart/NotebookLlama/Step-2-Transcript-Writer.ipynb b/end-to-end-use-cases/NotebookLlama/Step-2-Transcript-Writer.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-2-Transcript-Writer.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-2-Transcript-Writer.ipynb diff --git a/recipes/quickstart/NotebookLlama/Step-3-Re-Writer.ipynb b/end-to-end-use-cases/NotebookLlama/Step-3-Re-Writer.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-3-Re-Writer.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-3-Re-Writer.ipynb diff --git a/recipes/quickstart/NotebookLlama/Step-4-TTS-Workflow.ipynb b/end-to-end-use-cases/NotebookLlama/Step-4-TTS-Workflow.ipynb similarity index 100% rename from recipes/quickstart/NotebookLlama/Step-4-TTS-Workflow.ipynb rename to end-to-end-use-cases/NotebookLlama/Step-4-TTS-Workflow.ipynb diff --git a/recipes/quickstart/NotebookLlama/TTS_Notes.md b/end-to-end-use-cases/NotebookLlama/TTS_Notes.md similarity index 100% rename from recipes/quickstart/NotebookLlama/TTS_Notes.md rename to end-to-end-use-cases/NotebookLlama/TTS_Notes.md diff --git a/recipes/quickstart/NotebookLlama/requirements.txt b/end-to-end-use-cases/NotebookLlama/requirements.txt similarity index 100% rename from recipes/quickstart/NotebookLlama/requirements.txt rename to end-to-end-use-cases/NotebookLlama/requirements.txt diff --git a/recipes/quickstart/NotebookLlama/resources/2402.13116v4.pdf b/end-to-end-use-cases/NotebookLlama/resources/2402.13116v4.pdf similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/2402.13116v4.pdf rename to end-to-end-use-cases/NotebookLlama/resources/2402.13116v4.pdf diff --git a/recipes/quickstart/NotebookLlama/resources/Outline.jpg b/end-to-end-use-cases/NotebookLlama/resources/Outline.jpg similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/Outline.jpg rename to end-to-end-use-cases/NotebookLlama/resources/Outline.jpg diff --git a/recipes/quickstart/NotebookLlama/resources/_podcast.mp3 b/end-to-end-use-cases/NotebookLlama/resources/_podcast.mp3 similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/_podcast.mp3 rename to end-to-end-use-cases/NotebookLlama/resources/_podcast.mp3 diff --git a/recipes/quickstart/NotebookLlama/resources/clean_extracted_text.txt b/end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/clean_extracted_text.txt rename to end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt diff --git a/recipes/quickstart/NotebookLlama/resources/data.pkl b/end-to-end-use-cases/NotebookLlama/resources/data.pkl similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/data.pkl rename to end-to-end-use-cases/NotebookLlama/resources/data.pkl diff --git a/recipes/quickstart/NotebookLlama/resources/podcast_ready_data.pkl b/end-to-end-use-cases/NotebookLlama/resources/podcast_ready_data.pkl similarity index 100% rename from recipes/quickstart/NotebookLlama/resources/podcast_ready_data.pkl rename to end-to-end-use-cases/NotebookLlama/resources/podcast_ready_data.pkl diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/README.md b/end-to-end-use-cases/RAFT-Chatbot/README.md similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/README.md rename to end-to-end-use-cases/RAFT-Chatbot/README.md diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/config.py b/end-to-end-use-cases/RAFT-Chatbot/config.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/config.py rename to end-to-end-use-cases/RAFT-Chatbot/config.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/eval_llama.json b/end-to-end-use-cases/RAFT-Chatbot/eval_llama.json similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/eval_llama.json rename to end-to-end-use-cases/RAFT-Chatbot/eval_llama.json diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/format.py b/end-to-end-use-cases/RAFT-Chatbot/format.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/format.py rename to end-to-end-use-cases/RAFT-Chatbot/format.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Answers_Precision.png b/end-to-end-use-cases/RAFT-Chatbot/images/Answers_Precision.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Answers_Precision.png rename to end-to-end-use-cases/RAFT-Chatbot/images/Answers_Precision.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/LLM_score_comparison.png b/end-to-end-use-cases/RAFT-Chatbot/images/LLM_score_comparison.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/LLM_score_comparison.png rename to end-to-end-use-cases/RAFT-Chatbot/images/LLM_score_comparison.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Num_of_refusal_comparison.png b/end-to-end-use-cases/RAFT-Chatbot/images/Num_of_refusal_comparison.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/Num_of_refusal_comparison.png rename to end-to-end-use-cases/RAFT-Chatbot/images/Num_of_refusal_comparison.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/RAFT.png b/end-to-end-use-cases/RAFT-Chatbot/images/RAFT.png similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/images/RAFT.png rename to end-to-end-use-cases/RAFT-Chatbot/images/RAFT.png diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.py b/end-to-end-use-cases/RAFT-Chatbot/raft.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.py rename to end-to-end-use-cases/RAFT-Chatbot/raft.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.yaml b/end-to-end-use-cases/RAFT-Chatbot/raft.yaml similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft.yaml rename to end-to-end-use-cases/RAFT-Chatbot/raft.yaml diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py b/end-to-end-use-cases/RAFT-Chatbot/raft_eval.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py rename to end-to-end-use-cases/RAFT-Chatbot/raft_eval.py diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml b/end-to-end-use-cases/RAFT-Chatbot/raft_eval_config.yaml similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml rename to end-to-end-use-cases/RAFT-Chatbot/raft_eval_config.yaml diff --git a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py b/end-to-end-use-cases/RAFT-Chatbot/raft_utils.py similarity index 100% rename from recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py rename to end-to-end-use-cases/RAFT-Chatbot/raft_utils.py diff --git a/recipes/use_cases/README.md b/end-to-end-use-cases/README.md similarity index 100% rename from recipes/use_cases/README.md rename to end-to-end-use-cases/README.md diff --git a/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb b/end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_101.ipynb similarity index 100% rename from recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb rename to end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_101.ipynb diff --git a/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb b/end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_201.ipynb similarity index 100% rename from recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb rename to end-to-end-use-cases/agents/Agents_Tutorial/Tool_Calling_201.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb diff --git a/recipes/quickstart/agents/DeepLearningai_Course_Notebooks/README.md b/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md similarity index 100% rename from recipes/quickstart/agents/DeepLearningai_Course_Notebooks/README.md rename to end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md diff --git a/recipes/quickstart/agents/README.md b/end-to-end-use-cases/agents/README.md similarity index 100% rename from recipes/quickstart/agents/README.md rename to end-to-end-use-cases/agents/README.md diff --git a/tools/benchmarks/README.md b/end-to-end-use-cases/benchmarks/README.md similarity index 100% rename from tools/benchmarks/README.md rename to end-to-end-use-cases/benchmarks/README.md diff --git a/tools/benchmarks/inference/README.md b/end-to-end-use-cases/benchmarks/inference/README.md similarity index 100% rename from tools/benchmarks/inference/README.md rename to end-to-end-use-cases/benchmarks/inference/README.md diff --git a/tools/benchmarks/inference/cloud/README.md b/end-to-end-use-cases/benchmarks/inference/cloud/README.md similarity index 100% rename from tools/benchmarks/inference/cloud/README.md rename to end-to-end-use-cases/benchmarks/inference/cloud/README.md diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/README.md b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/README.md rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/config.yml b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/config.yml similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/config.yml rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/config.yml diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/CFT.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/CFT.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/CFT.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/CFT.png diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/business_summary.png diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/instances.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/instances.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/instances.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/instances.png diff --git a/tools/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png b/end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png similarity index 100% rename from tools/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png rename to end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/img/latency_vs_tokens.png diff --git a/tools/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py b/end-to-end-use-cases/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py similarity index 100% rename from tools/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/chat_azure_api_benchmark.py diff --git a/tools/benchmarks/inference/cloud/azure/input.jsonl b/end-to-end-use-cases/benchmarks/inference/cloud/azure/input.jsonl similarity index 100% rename from tools/benchmarks/inference/cloud/azure/input.jsonl rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/input.jsonl diff --git a/tools/benchmarks/inference/cloud/azure/parameters.json b/end-to-end-use-cases/benchmarks/inference/cloud/azure/parameters.json similarity index 100% rename from tools/benchmarks/inference/cloud/azure/parameters.json rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/parameters.json diff --git a/tools/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py b/end-to-end-use-cases/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py similarity index 100% rename from tools/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/cloud/azure/pretrained_azure_api_benchmark.py diff --git a/tools/benchmarks/inference/on_prem/README.md b/end-to-end-use-cases/benchmarks/inference/on_prem/README.md similarity index 100% rename from tools/benchmarks/inference/on_prem/README.md rename to end-to-end-use-cases/benchmarks/inference/on_prem/README.md diff --git a/tools/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/chat_vllm_benchmark.py diff --git a/tools/benchmarks/inference/on_prem/vllm/input.jsonl b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/input.jsonl similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/input.jsonl rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/input.jsonl diff --git a/tools/benchmarks/inference/on_prem/vllm/parameters.json b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/parameters.json similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/parameters.json rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/parameters.json diff --git a/tools/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py b/end-to-end-use-cases/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py similarity index 100% rename from tools/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py rename to end-to-end-use-cases/benchmarks/inference/on_prem/vllm/pretrained_vllm_benchmark.py diff --git a/tools/benchmarks/inference/requirements.txt b/end-to-end-use-cases/benchmarks/inference/requirements.txt similarity index 100% rename from tools/benchmarks/inference/requirements.txt rename to end-to-end-use-cases/benchmarks/inference/requirements.txt diff --git a/tools/benchmarks/llm_eval_harness/README.md b/end-to-end-use-cases/benchmarks/llm_eval_harness/README.md similarity index 100% rename from tools/benchmarks/llm_eval_harness/README.md rename to end-to-end-use-cases/benchmarks/llm_eval_harness/README.md diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/README.md b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/README.md rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/eval_config.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/bbh_3shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/bbh/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/gpqa_0shot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/gpqa_0shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/gpqa_cot/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/ifeval.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/ifeval/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_4shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/math_hard_0shot_cot.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/math_hard/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_instruct.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/meta_pretrain.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/mmlu.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_instruct.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/mmlu_pro_5shot_cot_pretrain.yaml diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/meta_template/mmlu_pro/utils.py diff --git a/tools/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py similarity index 100% rename from tools/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py rename to end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/prepare_meta_eval.py diff --git a/recipes/use_cases/browser_use/agent/browser-use-quickstart.ipynb b/end-to-end-use-cases/browser_use/agent/browser-use-quickstart.ipynb similarity index 100% rename from recipes/use_cases/browser_use/agent/browser-use-quickstart.ipynb rename to end-to-end-use-cases/browser_use/agent/browser-use-quickstart.ipynb diff --git a/recipes/use_cases/browser_use/agent/sample_screenshot.png b/end-to-end-use-cases/browser_use/agent/sample_screenshot.png similarity index 100% rename from recipes/use_cases/browser_use/agent/sample_screenshot.png rename to end-to-end-use-cases/browser_use/agent/sample_screenshot.png diff --git a/recipes/use_cases/coding/text2sql/csv2db.py b/end-to-end-use-cases/coding/text2sql/csv2db.py similarity index 100% rename from recipes/use_cases/coding/text2sql/csv2db.py rename to end-to-end-use-cases/coding/text2sql/csv2db.py diff --git a/recipes/use_cases/coding/text2sql/nba.txt b/end-to-end-use-cases/coding/text2sql/nba.txt similarity index 100% rename from recipes/use_cases/coding/text2sql/nba.txt rename to end-to-end-use-cases/coding/text2sql/nba.txt diff --git a/recipes/use_cases/coding/text2sql/nba_roster.db b/end-to-end-use-cases/coding/text2sql/nba_roster.db similarity index 100% rename from recipes/use_cases/coding/text2sql/nba_roster.db rename to end-to-end-use-cases/coding/text2sql/nba_roster.db diff --git a/recipes/use_cases/coding/text2sql/quickstart.ipynb b/end-to-end-use-cases/coding/text2sql/quickstart.ipynb similarity index 100% rename from recipes/use_cases/coding/text2sql/quickstart.ipynb rename to end-to-end-use-cases/coding/text2sql/quickstart.ipynb diff --git a/recipes/use_cases/coding/text2sql/txt2csv.py b/end-to-end-use-cases/coding/text2sql/txt2csv.py similarity index 100% rename from recipes/use_cases/coding/text2sql/txt2csv.py rename to end-to-end-use-cases/coding/text2sql/txt2csv.py diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/data/Llama Getting Started Guide.pdf diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/requirements.txt b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/requirements.txt rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.faiss diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/db_faiss/index.pkl diff --git a/recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb b/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb similarity index 100% rename from recipes/use_cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb rename to end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb diff --git a/recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb b/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb similarity index 100% rename from recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb rename to end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb diff --git a/recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv b/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv similarity index 100% rename from recipes/use_cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv rename to end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/musical_instruments_reviews.csv diff --git a/recipes/use_cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py similarity index 100% rename from recipes/use_cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py rename to end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/llama_messenger.py diff --git a/recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md similarity index 100% rename from recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md rename to end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md diff --git a/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py similarity index 100% rename from recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py rename to end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/llama_chatbot.py diff --git a/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md similarity index 100% rename from recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md rename to end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md diff --git a/recipes/use_cases/email_agent/1.png b/end-to-end-use-cases/email_agent/1.png similarity index 100% rename from recipes/use_cases/email_agent/1.png rename to end-to-end-use-cases/email_agent/1.png diff --git a/recipes/use_cases/email_agent/2.png b/end-to-end-use-cases/email_agent/2.png similarity index 100% rename from recipes/use_cases/email_agent/2.png rename to end-to-end-use-cases/email_agent/2.png diff --git a/recipes/use_cases/email_agent/3.png b/end-to-end-use-cases/email_agent/3.png similarity index 100% rename from recipes/use_cases/email_agent/3.png rename to end-to-end-use-cases/email_agent/3.png diff --git a/recipes/use_cases/email_agent/README.md b/end-to-end-use-cases/email_agent/README.md similarity index 100% rename from recipes/use_cases/email_agent/README.md rename to end-to-end-use-cases/email_agent/README.md diff --git a/recipes/use_cases/email_agent/email_agent.png b/end-to-end-use-cases/email_agent/email_agent.png similarity index 100% rename from recipes/use_cases/email_agent/email_agent.png rename to end-to-end-use-cases/email_agent/email_agent.png diff --git a/recipes/use_cases/email_agent/email_agent.py b/end-to-end-use-cases/email_agent/email_agent.py similarity index 100% rename from recipes/use_cases/email_agent/email_agent.py rename to end-to-end-use-cases/email_agent/email_agent.py diff --git a/recipes/use_cases/email_agent/functions_prompt.py b/end-to-end-use-cases/email_agent/functions_prompt.py similarity index 100% rename from recipes/use_cases/email_agent/functions_prompt.py rename to end-to-end-use-cases/email_agent/functions_prompt.py diff --git a/recipes/use_cases/email_agent/main.py b/end-to-end-use-cases/email_agent/main.py similarity index 100% rename from recipes/use_cases/email_agent/main.py rename to end-to-end-use-cases/email_agent/main.py diff --git a/recipes/use_cases/email_agent/requirements.txt b/end-to-end-use-cases/email_agent/requirements.txt similarity index 100% rename from recipes/use_cases/email_agent/requirements.txt rename to end-to-end-use-cases/email_agent/requirements.txt diff --git a/recipes/use_cases/github_triage/README.md b/end-to-end-use-cases/github_triage/README.md similarity index 100% rename from recipes/use_cases/github_triage/README.md rename to end-to-end-use-cases/github_triage/README.md diff --git a/recipes/use_cases/github_triage/config.yaml b/end-to-end-use-cases/github_triage/config.yaml similarity index 100% rename from recipes/use_cases/github_triage/config.yaml rename to end-to-end-use-cases/github_triage/config.yaml diff --git a/recipes/use_cases/github_triage/llm.py b/end-to-end-use-cases/github_triage/llm.py similarity index 100% rename from recipes/use_cases/github_triage/llm.py rename to end-to-end-use-cases/github_triage/llm.py diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png diff --git a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf b/end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf similarity index 100% rename from recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf rename to end-to-end-use-cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf diff --git a/recipes/use_cases/github_triage/pdf_report.py b/end-to-end-use-cases/github_triage/pdf_report.py similarity index 100% rename from recipes/use_cases/github_triage/pdf_report.py rename to end-to-end-use-cases/github_triage/pdf_report.py diff --git a/recipes/use_cases/github_triage/plots.py b/end-to-end-use-cases/github_triage/plots.py similarity index 100% rename from recipes/use_cases/github_triage/plots.py rename to end-to-end-use-cases/github_triage/plots.py diff --git a/recipes/use_cases/github_triage/requirements.txt b/end-to-end-use-cases/github_triage/requirements.txt similarity index 100% rename from recipes/use_cases/github_triage/requirements.txt rename to end-to-end-use-cases/github_triage/requirements.txt diff --git a/recipes/use_cases/github_triage/triage.py b/end-to-end-use-cases/github_triage/triage.py similarity index 100% rename from recipes/use_cases/github_triage/triage.py rename to end-to-end-use-cases/github_triage/triage.py diff --git a/recipes/use_cases/github_triage/utils.py b/end-to-end-use-cases/github_triage/utils.py similarity index 100% rename from recipes/use_cases/github_triage/utils.py rename to end-to-end-use-cases/github_triage/utils.py diff --git a/recipes/use_cases/github_triage/walkthrough.ipynb b/end-to-end-use-cases/github_triage/walkthrough.ipynb similarity index 100% rename from recipes/use_cases/github_triage/walkthrough.ipynb rename to end-to-end-use-cases/github_triage/walkthrough.ipynb diff --git a/recipes/use_cases/live_data.ipynb b/end-to-end-use-cases/live_data.ipynb similarity index 100% rename from recipes/use_cases/live_data.ipynb rename to end-to-end-use-cases/live_data.ipynb diff --git a/recipes/use_cases/multilingual/README.md b/end-to-end-use-cases/multilingual/README.md similarity index 100% rename from recipes/use_cases/multilingual/README.md rename to end-to-end-use-cases/multilingual/README.md diff --git a/recipes/use_cases/multilingual/extend_tokenizer.py b/end-to-end-use-cases/multilingual/extend_tokenizer.py similarity index 100% rename from recipes/use_cases/multilingual/extend_tokenizer.py rename to end-to-end-use-cases/multilingual/extend_tokenizer.py diff --git a/recipes/use_cases/multilingual/img/phase1_eval_loss.png b/end-to-end-use-cases/multilingual/img/phase1_eval_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase1_eval_loss.png rename to end-to-end-use-cases/multilingual/img/phase1_eval_loss.png diff --git a/recipes/use_cases/multilingual/img/phase1_train_loss.png b/end-to-end-use-cases/multilingual/img/phase1_train_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase1_train_loss.png rename to end-to-end-use-cases/multilingual/img/phase1_train_loss.png diff --git a/recipes/use_cases/multilingual/img/phase2_eval_loss.png b/end-to-end-use-cases/multilingual/img/phase2_eval_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase2_eval_loss.png rename to end-to-end-use-cases/multilingual/img/phase2_eval_loss.png diff --git a/recipes/use_cases/multilingual/img/phase2_train_loss.png b/end-to-end-use-cases/multilingual/img/phase2_train_loss.png similarity index 100% rename from recipes/use_cases/multilingual/img/phase2_train_loss.png rename to end-to-end-use-cases/multilingual/img/phase2_train_loss.png diff --git a/recipes/use_cases/multilingual/prepare_data.py b/end-to-end-use-cases/multilingual/prepare_data.py similarity index 100% rename from recipes/use_cases/multilingual/prepare_data.py rename to end-to-end-use-cases/multilingual/prepare_data.py diff --git a/recipes/use_cases/multilingual/train_tokenizer.py b/end-to-end-use-cases/multilingual/train_tokenizer.py similarity index 100% rename from recipes/use_cases/multilingual/train_tokenizer.py rename to end-to-end-use-cases/multilingual/train_tokenizer.py diff --git a/recipes/responsible_ai/README.md b/end-to-end-use-cases/responsible_ai/README.md similarity index 100% rename from recipes/responsible_ai/README.md rename to end-to-end-use-cases/responsible_ai/README.md diff --git a/recipes/responsible_ai/code_shield_usage_demo.ipynb b/end-to-end-use-cases/responsible_ai/code_shield_usage_demo.ipynb similarity index 100% rename from recipes/responsible_ai/code_shield_usage_demo.ipynb rename to end-to-end-use-cases/responsible_ai/code_shield_usage_demo.ipynb diff --git a/recipes/responsible_ai/llama_guard/README.md b/end-to-end-use-cases/responsible_ai/llama_guard/README.md similarity index 100% rename from recipes/responsible_ai/llama_guard/README.md rename to end-to-end-use-cases/responsible_ai/llama_guard/README.md diff --git a/recipes/responsible_ai/llama_guard/__init__.py b/end-to-end-use-cases/responsible_ai/llama_guard/__init__.py similarity index 100% rename from recipes/responsible_ai/llama_guard/__init__.py rename to end-to-end-use-cases/responsible_ai/llama_guard/__init__.py diff --git a/recipes/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb b/end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb similarity index 100% rename from recipes/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb rename to end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_customization_via_prompting_and_fine_tuning.ipynb diff --git a/recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb b/end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb similarity index 100% rename from recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb rename to end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb diff --git a/recipes/responsible_ai/llama_guard/resources/dog.jpg b/end-to-end-use-cases/responsible_ai/llama_guard/resources/dog.jpg similarity index 100% rename from recipes/responsible_ai/llama_guard/resources/dog.jpg rename to end-to-end-use-cases/responsible_ai/llama_guard/resources/dog.jpg diff --git a/recipes/responsible_ai/llama_guard/resources/pasta.jpeg b/end-to-end-use-cases/responsible_ai/llama_guard/resources/pasta.jpeg similarity index 100% rename from recipes/responsible_ai/llama_guard/resources/pasta.jpeg rename to end-to-end-use-cases/responsible_ai/llama_guard/resources/pasta.jpeg diff --git a/recipes/responsible_ai/prompt_guard/README.md b/end-to-end-use-cases/responsible_ai/prompt_guard/README.md similarity index 100% rename from recipes/responsible_ai/prompt_guard/README.md rename to end-to-end-use-cases/responsible_ai/prompt_guard/README.md diff --git a/recipes/responsible_ai/prompt_guard/__init__.py b/end-to-end-use-cases/responsible_ai/prompt_guard/__init__.py similarity index 100% rename from recipes/responsible_ai/prompt_guard/__init__.py rename to end-to-end-use-cases/responsible_ai/prompt_guard/__init__.py diff --git a/recipes/responsible_ai/prompt_guard/inference.py b/end-to-end-use-cases/responsible_ai/prompt_guard/inference.py similarity index 100% rename from recipes/responsible_ai/prompt_guard/inference.py rename to end-to-end-use-cases/responsible_ai/prompt_guard/inference.py diff --git a/recipes/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb b/end-to-end-use-cases/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb similarity index 100% rename from recipes/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb rename to end-to-end-use-cases/responsible_ai/prompt_guard/prompt_guard_tutorial.ipynb diff --git a/recipes/use_cases/video_summary.ipynb b/end-to-end-use-cases/video_summary.ipynb similarity index 100% rename from recipes/use_cases/video_summary.ipynb rename to end-to-end-use-cases/video_summary.ipynb diff --git a/recipes/quickstart/Getting_to_know_Llama.ipynb b/getting-started/Getting_to_know_Llama.ipynb similarity index 100% rename from recipes/quickstart/Getting_to_know_Llama.ipynb rename to getting-started/Getting_to_know_Llama.ipynb diff --git a/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb b/getting-started/Prompt_Engineering_with_Llama_3.ipynb similarity index 100% rename from recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb rename to getting-started/Prompt_Engineering_with_Llama_3.ipynb diff --git a/recipes/quickstart/RAG/hello_llama_cloud.ipynb b/getting-started/RAG/hello_llama_cloud.ipynb similarity index 100% rename from recipes/quickstart/RAG/hello_llama_cloud.ipynb rename to getting-started/RAG/hello_llama_cloud.ipynb diff --git a/recipes/quickstart/README.md b/getting-started/README.md similarity index 100% rename from recipes/quickstart/README.md rename to getting-started/README.md diff --git a/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb b/getting-started/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb similarity index 100% rename from recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb rename to getting-started/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb diff --git a/recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb b/getting-started/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb similarity index 100% rename from recipes/quickstart/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb rename to getting-started/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb diff --git a/recipes/quickstart/build_with_Llama_3_2.ipynb b/getting-started/build_with_Llama_3_2.ipynb similarity index 100% rename from recipes/quickstart/build_with_Llama_3_2.ipynb rename to getting-started/build_with_Llama_3_2.ipynb diff --git a/recipes/quickstart/finetuning/LLM_finetuning_overview.md b/getting-started/finetuning/LLM_finetuning_overview.md similarity index 100% rename from recipes/quickstart/finetuning/LLM_finetuning_overview.md rename to getting-started/finetuning/LLM_finetuning_overview.md diff --git a/recipes/quickstart/finetuning/README.md b/getting-started/finetuning/README.md similarity index 100% rename from recipes/quickstart/finetuning/README.md rename to getting-started/finetuning/README.md diff --git a/recipes/quickstart/finetuning/datasets/README.md b/getting-started/finetuning/datasets/README.md similarity index 100% rename from recipes/quickstart/finetuning/datasets/README.md rename to getting-started/finetuning/datasets/README.md diff --git a/recipes/quickstart/finetuning/datasets/custom_dataset.py b/getting-started/finetuning/datasets/custom_dataset.py similarity index 100% rename from recipes/quickstart/finetuning/datasets/custom_dataset.py rename to getting-started/finetuning/datasets/custom_dataset.py diff --git a/recipes/quickstart/finetuning/datasets/ocrvqa_dataset.py b/getting-started/finetuning/datasets/ocrvqa_dataset.py similarity index 100% rename from recipes/quickstart/finetuning/datasets/ocrvqa_dataset.py rename to getting-started/finetuning/datasets/ocrvqa_dataset.py diff --git a/recipes/quickstart/finetuning/datasets/raft_dataset.py b/getting-started/finetuning/datasets/raft_dataset.py similarity index 100% rename from recipes/quickstart/finetuning/datasets/raft_dataset.py rename to getting-started/finetuning/datasets/raft_dataset.py diff --git a/recipes/quickstart/finetuning/finetune_vision_model.md b/getting-started/finetuning/finetune_vision_model.md similarity index 100% rename from recipes/quickstart/finetuning/finetune_vision_model.md rename to getting-started/finetuning/finetune_vision_model.md diff --git a/recipes/quickstart/finetuning/finetuning.py b/getting-started/finetuning/finetuning.py similarity index 100% rename from recipes/quickstart/finetuning/finetuning.py rename to getting-started/finetuning/finetuning.py diff --git a/recipes/quickstart/finetuning/multi_node.slurm b/getting-started/finetuning/multi_node.slurm similarity index 100% rename from recipes/quickstart/finetuning/multi_node.slurm rename to getting-started/finetuning/multi_node.slurm diff --git a/recipes/quickstart/finetuning/multigpu_finetuning.md b/getting-started/finetuning/multigpu_finetuning.md similarity index 100% rename from recipes/quickstart/finetuning/multigpu_finetuning.md rename to getting-started/finetuning/multigpu_finetuning.md diff --git a/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb b/getting-started/finetuning/quickstart_peft_finetuning.ipynb similarity index 100% rename from recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb rename to getting-started/finetuning/quickstart_peft_finetuning.ipynb diff --git a/recipes/quickstart/finetuning/singlegpu_finetuning.md b/getting-started/finetuning/singlegpu_finetuning.md similarity index 100% rename from recipes/quickstart/finetuning/singlegpu_finetuning.md rename to getting-started/finetuning/singlegpu_finetuning.md diff --git a/recipes/quickstart/inference/README.md b/getting-started/inference/README.md similarity index 100% rename from recipes/quickstart/inference/README.md rename to getting-started/inference/README.md diff --git a/recipes/quickstart/inference/code_llama/README.md b/getting-started/inference/code_llama/README.md similarity index 100% rename from recipes/quickstart/inference/code_llama/README.md rename to getting-started/inference/code_llama/README.md diff --git a/recipes/quickstart/inference/code_llama/code_completion_example.py b/getting-started/inference/code_llama/code_completion_example.py similarity index 100% rename from recipes/quickstart/inference/code_llama/code_completion_example.py rename to getting-started/inference/code_llama/code_completion_example.py diff --git a/recipes/quickstart/inference/code_llama/code_completion_prompt.txt b/getting-started/inference/code_llama/code_completion_prompt.txt similarity index 100% rename from recipes/quickstart/inference/code_llama/code_completion_prompt.txt rename to getting-started/inference/code_llama/code_completion_prompt.txt diff --git a/recipes/quickstart/inference/code_llama/code_infilling_example.py b/getting-started/inference/code_llama/code_infilling_example.py similarity index 100% rename from recipes/quickstart/inference/code_llama/code_infilling_example.py rename to getting-started/inference/code_llama/code_infilling_example.py diff --git a/recipes/quickstart/inference/code_llama/code_infilling_prompt.txt b/getting-started/inference/code_llama/code_infilling_prompt.txt similarity index 100% rename from recipes/quickstart/inference/code_llama/code_infilling_prompt.txt rename to getting-started/inference/code_llama/code_infilling_prompt.txt diff --git a/recipes/quickstart/inference/code_llama/code_instruct_example.py b/getting-started/inference/code_llama/code_instruct_example.py similarity index 100% rename from recipes/quickstart/inference/code_llama/code_instruct_example.py rename to getting-started/inference/code_llama/code_instruct_example.py diff --git a/recipes/quickstart/inference/local_inference/README.md b/getting-started/inference/local_inference/README.md similarity index 100% rename from recipes/quickstart/inference/local_inference/README.md rename to getting-started/inference/local_inference/README.md diff --git a/recipes/quickstart/inference/local_inference/chat_completion/chat_completion.py b/getting-started/inference/local_inference/chat_completion/chat_completion.py similarity index 100% rename from recipes/quickstart/inference/local_inference/chat_completion/chat_completion.py rename to getting-started/inference/local_inference/chat_completion/chat_completion.py diff --git a/recipes/quickstart/inference/local_inference/chat_completion/chats.json b/getting-started/inference/local_inference/chat_completion/chats.json similarity index 100% rename from recipes/quickstart/inference/local_inference/chat_completion/chats.json rename to getting-started/inference/local_inference/chat_completion/chats.json diff --git a/recipes/quickstart/inference/local_inference/inference.py b/getting-started/inference/local_inference/inference.py similarity index 100% rename from recipes/quickstart/inference/local_inference/inference.py rename to getting-started/inference/local_inference/inference.py diff --git a/recipes/quickstart/inference/local_inference/multi_modal_infer.py b/getting-started/inference/local_inference/multi_modal_infer.py similarity index 100% rename from recipes/quickstart/inference/local_inference/multi_modal_infer.py rename to getting-started/inference/local_inference/multi_modal_infer.py diff --git a/recipes/quickstart/inference/local_inference/samsum_prompt.txt b/getting-started/inference/local_inference/samsum_prompt.txt similarity index 100% rename from recipes/quickstart/inference/local_inference/samsum_prompt.txt rename to getting-started/inference/local_inference/samsum_prompt.txt diff --git a/recipes/quickstart/inference/mobile_inference/android_inference/README.md b/getting-started/inference/mobile_inference/android_inference/README.md similarity index 100% rename from recipes/quickstart/inference/mobile_inference/android_inference/README.md rename to getting-started/inference/mobile_inference/android_inference/README.md diff --git a/recipes/quickstart/inference/mobile_inference/android_inference/mlc-package-config.json b/getting-started/inference/mobile_inference/android_inference/mlc-package-config.json similarity index 100% rename from recipes/quickstart/inference/mobile_inference/android_inference/mlc-package-config.json rename to getting-started/inference/mobile_inference/android_inference/mlc-package-config.json diff --git a/recipes/quickstart/inference/mobile_inference/android_inference/requirements.txt b/getting-started/inference/mobile_inference/android_inference/requirements.txt similarity index 100% rename from recipes/quickstart/inference/mobile_inference/android_inference/requirements.txt rename to getting-started/inference/mobile_inference/android_inference/requirements.txt diff --git a/recipes/quickstart/inference/modelUpgradeExample.py b/getting-started/inference/modelUpgradeExample.py similarity index 100% rename from recipes/quickstart/inference/modelUpgradeExample.py rename to getting-started/inference/modelUpgradeExample.py diff --git a/dev_requirements.txt b/src/dev_requirements.txt similarity index 100% rename from dev_requirements.txt rename to src/dev_requirements.txt diff --git a/docs/FAQ.md b/src/docs/FAQ.md similarity index 100% rename from docs/FAQ.md rename to src/docs/FAQ.md diff --git a/docs/LLM_finetuning.md b/src/docs/LLM_finetuning.md similarity index 100% rename from docs/LLM_finetuning.md rename to src/docs/LLM_finetuning.md diff --git a/docs/img/a_colorful_llama_doing_ai_programming.jpeg b/src/docs/img/a_colorful_llama_doing_ai_programming.jpeg similarity index 100% rename from docs/img/a_colorful_llama_doing_ai_programming.jpeg rename to src/docs/img/a_colorful_llama_doing_ai_programming.jpeg diff --git a/docs/img/cat.jpeg b/src/docs/img/cat.jpeg similarity index 100% rename from docs/img/cat.jpeg rename to src/docs/img/cat.jpeg diff --git a/docs/img/feature_based_fn.png b/src/docs/img/feature_based_fn.png similarity index 100% rename from docs/img/feature_based_fn.png rename to src/docs/img/feature_based_fn.png diff --git a/docs/img/feature_based_fn_2.png b/src/docs/img/feature_based_fn_2.png similarity index 100% rename from docs/img/feature_based_fn_2.png rename to src/docs/img/feature_based_fn_2.png diff --git a/docs/img/full_param_fn.png b/src/docs/img/full_param_fn.png similarity index 100% rename from docs/img/full_param_fn.png rename to src/docs/img/full_param_fn.png diff --git a/docs/img/gnocchi_alla_romana.jpeg b/src/docs/img/gnocchi_alla_romana.jpeg similarity index 100% rename from docs/img/gnocchi_alla_romana.jpeg rename to src/docs/img/gnocchi_alla_romana.jpeg diff --git a/docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg b/src/docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg similarity index 100% rename from docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg rename to src/docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg diff --git a/docs/img/llama-mobile-confirmed.png b/src/docs/img/llama-mobile-confirmed.png similarity index 100% rename from docs/img/llama-mobile-confirmed.png rename to src/docs/img/llama-mobile-confirmed.png diff --git a/docs/img/llama-recipes.png b/src/docs/img/llama-recipes.png similarity index 100% rename from docs/img/llama-recipes.png rename to src/docs/img/llama-recipes.png diff --git a/docs/img/llama2_gradio.png b/src/docs/img/llama2_gradio.png similarity index 100% rename from docs/img/llama2_gradio.png rename to src/docs/img/llama2_gradio.png diff --git a/docs/img/llama2_streamlit.png b/src/docs/img/llama2_streamlit.png similarity index 100% rename from docs/img/llama2_streamlit.png rename to src/docs/img/llama2_streamlit.png diff --git a/docs/img/llama2_streamlit2.png b/src/docs/img/llama2_streamlit2.png similarity index 100% rename from docs/img/llama2_streamlit2.png rename to src/docs/img/llama2_streamlit2.png diff --git a/docs/img/llama_stack.png b/src/docs/img/llama_stack.png similarity index 100% rename from docs/img/llama_stack.png rename to src/docs/img/llama_stack.png diff --git a/docs/img/messenger_api_settings.png b/src/docs/img/messenger_api_settings.png similarity index 100% rename from docs/img/messenger_api_settings.png rename to src/docs/img/messenger_api_settings.png diff --git a/docs/img/messenger_llama_arch.jpg b/src/docs/img/messenger_llama_arch.jpg similarity index 100% rename from docs/img/messenger_llama_arch.jpg rename to src/docs/img/messenger_llama_arch.jpg diff --git a/docs/img/meta_release.png b/src/docs/img/meta_release.png similarity index 100% rename from docs/img/meta_release.png rename to src/docs/img/meta_release.png diff --git a/docs/img/resized_image.jpg b/src/docs/img/resized_image.jpg similarity index 100% rename from docs/img/resized_image.jpg rename to src/docs/img/resized_image.jpg diff --git a/docs/img/thumbnail_IMG_1329.jpg b/src/docs/img/thumbnail_IMG_1329.jpg similarity index 100% rename from docs/img/thumbnail_IMG_1329.jpg rename to src/docs/img/thumbnail_IMG_1329.jpg diff --git a/docs/img/thumbnail_IMG_1440.jpg b/src/docs/img/thumbnail_IMG_1440.jpg similarity index 100% rename from docs/img/thumbnail_IMG_1440.jpg rename to src/docs/img/thumbnail_IMG_1440.jpg diff --git a/docs/img/thumbnail_IMG_6385.jpg b/src/docs/img/thumbnail_IMG_6385.jpg similarity index 100% rename from docs/img/thumbnail_IMG_6385.jpg rename to src/docs/img/thumbnail_IMG_6385.jpg diff --git a/docs/img/wandb_screenshot.png b/src/docs/img/wandb_screenshot.png similarity index 100% rename from docs/img/wandb_screenshot.png rename to src/docs/img/wandb_screenshot.png diff --git a/docs/img/whatsapp_dashboard.jpg b/src/docs/img/whatsapp_dashboard.jpg similarity index 100% rename from docs/img/whatsapp_dashboard.jpg rename to src/docs/img/whatsapp_dashboard.jpg diff --git a/docs/img/whatsapp_llama_arch.jpg b/src/docs/img/whatsapp_llama_arch.jpg similarity index 100% rename from docs/img/whatsapp_llama_arch.jpg rename to src/docs/img/whatsapp_llama_arch.jpg diff --git a/docs/multi_gpu.md b/src/docs/multi_gpu.md similarity index 100% rename from docs/multi_gpu.md rename to src/docs/multi_gpu.md diff --git a/docs/single_gpu.md b/src/docs/single_gpu.md similarity index 100% rename from docs/single_gpu.md rename to src/docs/single_gpu.md diff --git a/requirements.txt b/src/requirements.txt similarity index 100% rename from requirements.txt rename to src/requirements.txt From bfa390fcd87ee85cc0c36fb99ccbce676440100e Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:12:27 -0800 Subject: [PATCH 03/23] Update UPDATES.md --- UPDATES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/UPDATES.md b/UPDATES.md index 56739b392..9ff4b961d 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -18,4 +18,5 @@ Nested Folders rename: Removed folders: - /flagged (Empty folder) - /recipes/quickstart/Running_Llama3_Anywhere (Redundant code) +- /recipes/quickstart/codellama (deprecated model) From 83e353178c1c9ee28174a354d3ff366a259904ce Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:18:24 -0800 Subject: [PATCH 04/23] rewrite readme --- README.md | 39 ++ UPDATES.md | 3 +- .../long_context/H2O/README.md | 0 .../data/summarization/cnn_dailymail.jsonl | 0 .../H2O/data/summarization/xsum.jsonl | 0 .../long_context/H2O/requirements.txt | 0 .../long_context/H2O/run_streaming.py | 0 .../long_context/H2O/run_summarization.py | 0 .../long_context/H2O/src/streaming.sh | 0 .../long_context/H2O/utils/cache.py | 0 .../long_context/H2O/utils/llama.py | 0 .../long_context/H2O/utils/streaming.py | 0 .../Running_Llama_on_HF_transformers.ipynb | 336 ------------------ .../Running_Llama_on_Mac_Windows_Linux.ipynb | 166 --------- .../inference/code_llama/README.md | 39 -- .../code_llama/code_completion_example.py | 119 ------- .../code_llama/code_completion_prompt.txt | 7 - .../code_llama/code_infilling_example.py | 118 ------ .../code_llama/code_infilling_prompt.txt | 3 - .../code_llama/code_instruct_example.py | 143 -------- .../inference/modelUpgradeExample.py | 51 --- recipes/README.md | 11 - 22 files changed, 41 insertions(+), 994 deletions(-) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/README.md (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/data/summarization/cnn_dailymail.jsonl (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/data/summarization/xsum.jsonl (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/requirements.txt (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/run_streaming.py (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/run_summarization.py (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/src/streaming.sh (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/utils/cache.py (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/utils/llama.py (100%) rename {recipes/experimental => end-to-end-use-cases}/long_context/H2O/utils/streaming.py (100%) delete mode 100644 getting-started/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb delete mode 100644 getting-started/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb delete mode 100644 getting-started/inference/code_llama/README.md delete mode 100644 getting-started/inference/code_llama/code_completion_example.py delete mode 100644 getting-started/inference/code_llama/code_completion_prompt.txt delete mode 100644 getting-started/inference/code_llama/code_infilling_example.py delete mode 100644 getting-started/inference/code_llama/code_infilling_prompt.txt delete mode 100644 getting-started/inference/code_llama/code_instruct_example.py delete mode 100644 getting-started/inference/modelUpgradeExample.py delete mode 100644 recipes/README.md diff --git a/README.md b/README.md index 38aaf5846..d38632fc0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,44 @@ # Llama Recipes: Examples to get started using the Llama models from Meta + +> Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor + +Welcome to the official repository for helping you get started with [inference](https://github.com/meta-llama/llama-recipes/tree/main/getting-started/inference), [fine-tuning](https://github.com/init27/llama-recipes/tree/main/getting-started/finetuning) and [end-to-end use-cases](https://github.com/meta-llama/llama-recipes/tree/main/end-to-end-use-cases) of building with the Llama Model family. + +The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. + +> [!TIP] +> Repository Structure: +> * [Start building with the Llama 3.2 models](./getting-started/) +> * [End to End Use cases with Llama model family](https://github.com/meta-llama/llama-recipes/tree/main/end-to-end-use-cases) +> * [Examples of building with 3rd Party Llama Providers](https://github.com/meta-llama/llama-recipes/tree/main/3p-integrations) +> * [Model Benchmarks](https://github.com/meta-llama/llama-recipes/tree/main/benchmarks) + +> [!TIP] +> Get started with Llama 3.2 with these new recipes: +> * [Finetune Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/getting-started/finetuning/finetune_vision_model.md) +> * [Multimodal Inference with Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/getting-started/inference/local_inference/README.md#multimodal-inference) +> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) + + +> [!NOTE] +> Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. +> +> More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). + + +## Repository Structure: + +- [3P Integrations](https://github.com/init27/llama-recipes/tree/main/3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers +- [End to End Use Cases](https://github.com/init27/llama-recipes/tree/main/end-to-end-use-cases): As the name suggests, spanning various domains and applications +- [Getting Started](https://github.com/init27/llama-recipes/tree/main/getting-started/): Reference for inferencing, fine-tuning and RAG examples +- [Benchmarks](https://github.com/init27/llama-recipes/tree/main/benchmarks): + + +## FAQ: + + + The 'llama-recipes' repository is a companion to the [Meta Llama](https://github.com/meta-llama/llama-models) models. We support the latest version, [Llama 3.2 Vision](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md) and [Llama 3.2 Text](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md), in this repository. This repository contains example scripts and notebooks to get started with the models in a variety of use-cases, including fine-tuning for domain adaptation and building LLM-based applications with Llama and other tools in the LLM ecosystem. The examples here use Llama locally, in the cloud, and on-prem. > [!TIP] diff --git a/UPDATES.md b/UPDATES.md index 9ff4b961d..f4dc5cef2 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -13,10 +13,11 @@ Nested Folders rename: - /dev_requirements.txt -> /src/dev_requirements.txt - /requirements.txt -> /src/requirements.txt - /tools -> /end-to-end-use-cases/benchmarks/ +- /recipes/experimental/long_context -> /end-to-end-use-cases/long_context Removed folders: - /flagged (Empty folder) - /recipes/quickstart/Running_Llama3_Anywhere (Redundant code) -- /recipes/quickstart/codellama (deprecated model) +- /recipes/quickstart/inference/codellama (deprecated model) diff --git a/recipes/experimental/long_context/H2O/README.md b/end-to-end-use-cases/long_context/H2O/README.md similarity index 100% rename from recipes/experimental/long_context/H2O/README.md rename to end-to-end-use-cases/long_context/H2O/README.md diff --git a/recipes/experimental/long_context/H2O/data/summarization/cnn_dailymail.jsonl b/end-to-end-use-cases/long_context/H2O/data/summarization/cnn_dailymail.jsonl similarity index 100% rename from recipes/experimental/long_context/H2O/data/summarization/cnn_dailymail.jsonl rename to end-to-end-use-cases/long_context/H2O/data/summarization/cnn_dailymail.jsonl diff --git a/recipes/experimental/long_context/H2O/data/summarization/xsum.jsonl b/end-to-end-use-cases/long_context/H2O/data/summarization/xsum.jsonl similarity index 100% rename from recipes/experimental/long_context/H2O/data/summarization/xsum.jsonl rename to end-to-end-use-cases/long_context/H2O/data/summarization/xsum.jsonl diff --git a/recipes/experimental/long_context/H2O/requirements.txt b/end-to-end-use-cases/long_context/H2O/requirements.txt similarity index 100% rename from recipes/experimental/long_context/H2O/requirements.txt rename to end-to-end-use-cases/long_context/H2O/requirements.txt diff --git a/recipes/experimental/long_context/H2O/run_streaming.py b/end-to-end-use-cases/long_context/H2O/run_streaming.py similarity index 100% rename from recipes/experimental/long_context/H2O/run_streaming.py rename to end-to-end-use-cases/long_context/H2O/run_streaming.py diff --git a/recipes/experimental/long_context/H2O/run_summarization.py b/end-to-end-use-cases/long_context/H2O/run_summarization.py similarity index 100% rename from recipes/experimental/long_context/H2O/run_summarization.py rename to end-to-end-use-cases/long_context/H2O/run_summarization.py diff --git a/recipes/experimental/long_context/H2O/src/streaming.sh b/end-to-end-use-cases/long_context/H2O/src/streaming.sh similarity index 100% rename from recipes/experimental/long_context/H2O/src/streaming.sh rename to end-to-end-use-cases/long_context/H2O/src/streaming.sh diff --git a/recipes/experimental/long_context/H2O/utils/cache.py b/end-to-end-use-cases/long_context/H2O/utils/cache.py similarity index 100% rename from recipes/experimental/long_context/H2O/utils/cache.py rename to end-to-end-use-cases/long_context/H2O/utils/cache.py diff --git a/recipes/experimental/long_context/H2O/utils/llama.py b/end-to-end-use-cases/long_context/H2O/utils/llama.py similarity index 100% rename from recipes/experimental/long_context/H2O/utils/llama.py rename to end-to-end-use-cases/long_context/H2O/utils/llama.py diff --git a/recipes/experimental/long_context/H2O/utils/streaming.py b/end-to-end-use-cases/long_context/H2O/utils/streaming.py similarity index 100% rename from recipes/experimental/long_context/H2O/utils/streaming.py rename to end-to-end-use-cases/long_context/H2O/utils/streaming.py diff --git a/getting-started/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb b/getting-started/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb deleted file mode 100644 index 06f0e4094..000000000 --- a/getting-started/Running_Llama3_Anywhere/Running_Llama_on_HF_transformers.ipynb +++ /dev/null @@ -1,336 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running Meta Llama 3.1 on Google Colab using Hugging Face transformers library\n", - "This notebook goes over how you can set up and run Llama 3.1 using Hugging Face transformers library\n", - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Steps at a glance:\n", - "This demo showcases how to run the example with already converted Llama 3.1 weights on [Hugging Face](https://huggingface.co/meta-llama). Please Note: To use the downloads on Hugging Face, you must first request a download as shown in the steps below making sure that you are using the same email address as your Hugging Face account.\n", - "\n", - "To use already converted weights, start here:\n", - "1. Request download of model weights from the Llama website\n", - "2. Login to Hugging Face from your terminal using the same email address as (1). Follow the instructions [here](https://huggingface.co/docs/huggingface_hub/en/quick-start). \n", - "3. Run the example\n", - "\n", - "\n", - "Else, if you'd like to download the models locally and convert them to the HF format, follow the steps below to convert the weights:\n", - "1. Request download of model weights from the Llama website\n", - "2. Clone the llama repo and get the weights\n", - "3. Convert the model weights\n", - "4. Prepare the script\n", - "5. Run the example" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using already converted weights" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Request download of model weights from the Llama website\n", - "Request download of model weights from the Llama website\n", - "Before you can run the model locally, you will need to get the model weights. To get the model weights, visit the [Llama website](https://llama.meta.com/) and click on “download models”. \n", - "\n", - "Fill the required information, select the models “Meta Llama 3.1” and accept the terms & conditions. You will receive a URL in your email in a short time." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Prepare the script\n", - "\n", - "We will install the Transformers library and Accelerate library for our demo.\n", - "\n", - "The `Transformers` library provides many models to perform tasks on texts such as classification, question answering, text generation, etc.\n", - "The `accelerate` library enables the same PyTorch code to be run across any distributed configuration of GPUs and CPUs.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install transformers\n", - "!pip install accelerate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we will import AutoTokenizer, which is a class from the transformers library that automatically chooses the correct tokenizer for a given pre-trained model, import transformers library and torch for PyTorch.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import AutoTokenizer\n", - "import transformers\n", - "import torch" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then, we will set the model variable to a specific model we’d like to use. In this demo, we will use the 8b chat model `meta-llama/Meta-Llama-3.1-8B-Instruct`. Using Meta models from Hugging Face requires you to\n", - "\n", - "1. Accept Terms of Service for Meta Llama 3.1 on Meta [website](https://llama.meta.com/llama-downloads).\n", - "2. Use the same email address from Step (1) to login into Hugging Face.\n", - "\n", - "Follow the instructions on this Hugging Face page to login from your [terminal](https://huggingface.co/docs/huggingface_hub/en/quick-start). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pip install --upgrade huggingface_hub" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from huggingface_hub import login\n", - "login()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n", - "tokenizer = AutoTokenizer.from_pretrained(model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we will use the `from_pretrained` method of `AutoTokenizer` to create a tokenizer. This will download and cache the pre-trained tokenizer and return an instance of the appropriate tokenizer class.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = transformers.pipeline(\n", - "\"text-generation\",\n", - " model=model,\n", - " torch_dtype=torch.float16,\n", - " device_map=\"auto\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Run the example\n", - "\n", - "Now, let’s create the pipeline for text generation. We’ll also set the device_map argument to `auto`, which means the pipeline will automatically use a GPU if one is available.\n", - "\n", - "Let’s also generate a text sequence based on the input that we provide. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sequences = pipeline(\n", - " 'I have tomatoes, basil and cheese at home. What can I cook for dinner?\\n',\n", - " do_sample=True,\n", - " top_k=10,\n", - " num_return_sequences=1,\n", - " eos_token_id=tokenizer.eos_token_id,\n", - " truncation = True,\n", - " max_length=400,\n", - ")\n", - "\n", - "for seq in sequences:\n", - " print(f\"Result: {seq['generated_text']}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - "\n", - "### Downloading and converting weights to Hugging Face format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Request download of model weights from the Llama website\n", - "Request download of model weights from the Llama website\n", - "Before you can run the model locally, you will need to get the model weights. To get the model weights, visit the [Llama website](https://llama.meta.com/) and click on “download models”. \n", - "\n", - "Fill the required information, select the models \"Meta Llama 3\" and accept the terms & conditions. You will receive a URL in your email in a short time." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Clone the llama repo and get the weights\n", - "Git clone the [Meta Llama 3 repo](https://github.com/meta-llama/llama3). Run the `download.sh` script and follow the instructions. This will download the model checkpoints and tokenizer.\n", - "\n", - "This example demonstrates a Meta Llama 3.1 model with 8B-instruct parameters, but the steps we follow would be similar for other llama models, as well as for other parameter models." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Convert the model weights using Hugging Face transformer from source\n", - "\n", - "* `python3 -m venv hf-convertor`\n", - "* `source hf-convertor/bin/activate`\n", - "* `git clone https://github.com/huggingface/transformers.git`\n", - "* `cd transformers`\n", - "* `pip install -e .`\n", - "* `pip install torch tiktoken blobfile accelerate`\n", - "* `python3 src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir ${path_to_meta_downloaded_model} --output_dir ${path_to_save_converted_hf_model} --model_size 8B --llama_version 3.1`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "#### 4. Prepare the script\n", - "Import the following necessary modules in your script: \n", - "* `AutoModel` is the Llama 3 model class\n", - "* `AutoTokenizer` prepares your prompt for the model to process\n", - "* `pipeline` is an abstraction to generate model outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import transformers\n", - "from transformers import AutoModelForCausalLM, AutoTokenizer\n", - "\n", - "model_dir = \"${path_the_converted_hf_model}\"\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " model_dir,\n", - " device_map=\"auto\",\n", - " )\n", - "tokenizer = AutoTokenizer.from_pretrained(model_dir)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need a way to use our model for inference. Pipeline allows us to specify which type of task the pipeline needs to run (`text-generation`), specify the model that the pipeline should use to make predictions (`model`), define the precision to use this model (`torch.float16`), device on which the pipeline should run (`device_map`) among various other options. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = transformers.pipeline(\n", - " \"text-generation\",\n", - " model=model,\n", - " tokenizer=tokenizer,\n", - " torch_dtype=torch.float16,\n", - " device_map=\"auto\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we have our pipeline defined, and we need to provide some text prompts as inputs to our pipeline to use when it runs to generate responses (`sequences`). The pipeline shown in the example below sets `do_sample` to True, which allows us to specify the decoding strategy we’d like to use to select the next token from the probability distribution over the entire vocabulary. In our example, we are using top_k sampling. \n", - "\n", - "By changing `max_length`, you can specify how long you’d like the generated response to be. \n", - "Setting the `num_return_sequences` parameter to greater than one will let you generate more than one output.\n", - "\n", - "In your script, add the following to provide input, and information on how to run the pipeline:\n", - "\n", - "\n", - "#### 5. Run the example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sequences = pipeline(\n", - " 'I have tomatoes, basil and cheese at home. What can I cook for dinner?\\n',\n", - " do_sample=True,\n", - " top_k=10,\n", - " num_return_sequences=1,\n", - " eos_token_id=tokenizer.eos_token_id,\n", - " max_length=400,\n", - ")\n", - "for seq in sequences:\n", - " print(f\"{seq['generated_text']}\")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/getting-started/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb b/getting-started/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb deleted file mode 100644 index 0a5f43059..000000000 --- a/getting-started/Running_Llama3_Anywhere/Running_Llama_on_Mac_Windows_Linux.ipynb +++ /dev/null @@ -1,166 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running Llama 3 on Mac, Windows or Linux\n", - "This notebook goes over how you can set up and run Llama 3.1 locally on a Mac, Windows or Linux using [Ollama](https://ollama.com/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Steps at a glance:\n", - "1. Download and install Ollama.\n", - "2. Download and test run Llama 3.1\n", - "3. Use local Llama 3.1 via Python.\n", - "4. Use local Llama 3.1 via LangChain.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Download and install Ollama\n", - "\n", - "On Mac or Windows, go to the Ollama download page [here](https://ollama.com/download) and select your platform to download it, then double click the downloaded file to install Ollama.\n", - "\n", - "On Linux, you can simply run on a terminal `curl -fsSL https://ollama.com/install.sh | sh` to download and install Ollama." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Download and test run Llama 3\n", - "\n", - "On a terminal or console, run `ollama pull llama3.1` to download the Llama 3.1 8b chat model, in the 4-bit quantized format with size about 4.7 GB.\n", - "\n", - "Run `ollama pull llama3.1:70b` to download the Llama 3.1 70b chat model, also in the 4-bit quantized format with size 39GB.\n", - "\n", - "Then you can run `ollama run llama3.1` and ask Llama 3.1 questions such as \"who wrote the book godfather?\" or \"who wrote the book godfather? answer in one sentence.\" You can also try `ollama run llama3.1:70b`, but the inference speed will most likely be too slow - for example, on an Apple M1 Pro with 32GB RAM, it takes over 10 seconds to generate one token using Llama 3.1 70b chat (vs over 10 tokens per second with Llama 3.1 8b chat).\n", - "\n", - "You can also run the following command to test Llama 3.1 8b chat:\n", - "```\n", - " curl http://localhost:11434/api/chat -d '{\n", - " \"model\": \"llama3.1\",\n", - " \"messages\": [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": \"who wrote the book godfather?\"\n", - " }\n", - " ],\n", - " \"stream\": false\n", - "}'\n", - "```\n", - "\n", - "The complete Ollama API doc is [here](https://github.com/ollama/ollama/blob/main/docs/api.md)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Use local Llama 3.1 via Python\n", - "\n", - "The Python code below is the port of the curl command above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "import json\n", - "\n", - "url = \"http://localhost:11434/api/chat\"\n", - "\n", - "def llama3(prompt):\n", - " data = {\n", - " \"model\": \"llama3.1\",\n", - " \"messages\": [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt\n", - " }\n", - " ],\n", - " \"stream\": False\n", - " }\n", - " \n", - " headers = {\n", - " 'Content-Type': 'application/json'\n", - " }\n", - " \n", - " response = requests.post(url, headers=headers, json=data)\n", - " \n", - " return(response.json()['message']['content'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "response = llama3(\"who wrote the book godfather\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4. Use local Llama 3.1 via LangChain\n", - "\n", - "Code below use LangChain with Ollama to query Llama 3 running locally. For a more advanced example of using local Llama 3 with LangChain and agent-powered RAG, see [this](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_rag_agent_llama3_local.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install langchain" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.chat_models import ChatOllama\n", - "\n", - "llm = ChatOllama(model=\"llama3.1\", temperature=0)\n", - "response = llm.invoke(\"who wrote the book godfather?\")\n", - "print(response.content)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/getting-started/inference/code_llama/README.md b/getting-started/inference/code_llama/README.md deleted file mode 100644 index ef1be5e83..000000000 --- a/getting-started/inference/code_llama/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Code Llama - -Code llama was recently released with three flavors, base-model that support multiple programming languages, Python fine-tuned model and an instruction fine-tuned and aligned variation of Code Llama, please read more [here](https://ai.meta.com/blog/code-llama-large-language-model-coding/). Also note that the Python fine-tuned model and 34B models are not trained on infilling objective, hence can not be used for infilling use-case. - -Find the scripts to run Code Llama, where there are two examples of running code completion and infilling. - -**Note** Please find the right model on HF [here](https://huggingface.co/models?search=meta-llama%20codellama). - -Make sure to install Transformers from source for now - -```bash - -pip install git+https://github.com/huggingface/transformers - -``` - -To run the code completion example: - -```bash - -python code_completion_example.py --model_name MODEL_NAME --prompt_file code_completion_prompt.txt --temperature 0.2 --top_p 0.9 - -``` - -To run the code infilling example: - -```bash - -python code_infilling_example.py --model_name MODEL_NAME --prompt_file code_infilling_prompt.txt --temperature 0.2 --top_p 0.9 - -``` -To run the 70B Instruct model example run the following (you'll need to enter the system and user prompts to instruct the model): - -```bash - -python code_instruct_example.py --model_name codellama/CodeLlama-70b-Instruct-hf --temperature 0.2 --top_p 0.9 - -``` -You can learn more about the chat prompt template [on HF](https://huggingface.co/meta-llama/CodeLlama-70b-Instruct-hf#chat-prompt) and [original Code Llama repository](https://github.com/meta-llama/codellama/blob/main/README.md#fine-tuned-instruction-models). HF tokenizer has already taken care of the chat template as shown in this example. diff --git a/getting-started/inference/code_llama/code_completion_example.py b/getting-started/inference/code_llama/code_completion_example.py deleted file mode 100644 index 201f8df8b..000000000 --- a/getting-started/inference/code_llama/code_completion_example.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -# from accelerate import init_empty_weights, load_checkpoint_and_dispatch - -import fire -import os -import sys -import time - -import torch -from transformers import AutoTokenizer - -from llama_recipes.inference.safety_utils import get_safety_checker -from llama_recipes.inference.model_utils import load_model, load_peft_model - - -def main( - model_name, - peft_model: str=None, - quantization: bool=False, - max_new_tokens =100, #The maximum numbers of tokens to generate - prompt_file: str=None, - seed: int=42, #seed value for reproducibility - do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise. - min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens - use_cache: bool=True, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: float=0.6, # [optional] The value used to modulate the next token probabilities. - top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. - repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty. - length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation. - enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api - enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs - enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 - enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard - use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels - **kwargs -): - if prompt_file is not None: - assert os.path.exists( - prompt_file - ), f"Provided Prompt file does not exist {prompt_file}" - with open(prompt_file, "r") as f: - user_prompt = f.read() - else: - print("No user prompt provided. Exiting.") - sys.exit(1) - - # Set the seeds for reproducibility - torch.cuda.manual_seed(seed) - torch.manual_seed(seed) - - model = load_model(model_name, quantization, use_fast_kernels) - if peft_model: - model = load_peft_model(model, peft_model) - - model.eval() - - tokenizer = AutoTokenizer.from_pretrained(model_name) - safety_checker = get_safety_checker(enable_azure_content_safety, - enable_sensitive_topics, - enable_salesforce_content_safety, - enable_llamaguard_content_safety, - ) - - # Safety check of the user prompt - safety_results = [check(user_prompt) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User prompt deemed safe.") - print(f"User prompt:\n{user_prompt}") - else: - print("User prompt deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - print("Skipping the inference as the prompt is not safe.") - sys.exit(1) # Exit the program with an error status - - batch = tokenizer(user_prompt, return_tensors="pt") - - batch = {k: v.to("cuda") for k, v in batch.items()} - start = time.perf_counter() - with torch.no_grad(): - outputs = model.generate( - **batch, - max_new_tokens=max_new_tokens, - do_sample=do_sample, - top_p=top_p, - temperature=temperature, - min_length=min_length, - use_cache=use_cache, - top_k=top_k, - repetition_penalty=repetition_penalty, - length_penalty=length_penalty, - **kwargs - ) - e2e_inference_time = (time.perf_counter()-start)*1000 - print(f"the inference time is {e2e_inference_time} ms") - output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - - # Safety check of the model output - safety_results = [check(output_text) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User input and model output deemed safe.") - print(f"Model output:\n{output_text}") - else: - print("Model output deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/getting-started/inference/code_llama/code_completion_prompt.txt b/getting-started/inference/code_llama/code_completion_prompt.txt deleted file mode 100644 index 8e184e2fe..000000000 --- a/getting-started/inference/code_llama/code_completion_prompt.txt +++ /dev/null @@ -1,7 +0,0 @@ -import argparse - -def main(string: str): - print(string) - print(string[::-1]) - -if __name__ == "__main__": \ No newline at end of file diff --git a/getting-started/inference/code_llama/code_infilling_example.py b/getting-started/inference/code_llama/code_infilling_example.py deleted file mode 100644 index a955eb5ce..000000000 --- a/getting-started/inference/code_llama/code_infilling_example.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -# from accelerate import init_empty_weights, load_checkpoint_and_dispatch - -import fire -import torch -import os -import sys -import time - -from transformers import AutoTokenizer - -from llama_recipes.inference.safety_utils import get_safety_checker -from llama_recipes.inference.model_utils import load_model, load_peft_model - -def main( - model_name, - peft_model: str=None, - quantization: bool=False, - max_new_tokens =100, #The maximum numbers of tokens to generate - prompt_file: str=None, - seed: int=42, #seed value for reproducibility - do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise. - min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens - use_cache: bool=True, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: float=0.6, # [optional] The value used to modulate the next token probabilities. - top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. - repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty. - length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation. - enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api - enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs - enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 - enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard - use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels - **kwargs -): - if prompt_file is not None: - assert os.path.exists( - prompt_file - ), f"Provided Prompt file does not exist {prompt_file}" - with open(prompt_file, "r") as f: - user_prompt = f.read() - else: - print("No user prompt provided. Exiting.") - sys.exit(1) - # Set the seeds for reproducibility - torch.cuda.manual_seed(seed) - torch.manual_seed(seed) - - model = load_model(model_name, quantization, use_fast_kernels) - model.config.tp_size=1 - if peft_model: - model = load_peft_model(model, peft_model) - - model.eval() - - tokenizer = AutoTokenizer.from_pretrained(model_name) - - safety_checker = get_safety_checker(enable_azure_content_safety, - enable_sensitive_topics, - enable_salesforce_content_safety, - enable_llamaguard_content_safety, - ) - - # Safety check of the user prompt - safety_results = [check(user_prompt) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User prompt deemed safe.") - print(f"User prompt:\n{user_prompt}") - else: - print("User prompt deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - print("Skipping the inference as the prompt is not safe.") - sys.exit(1) # Exit the program with an error status - - batch = tokenizer(user_prompt, return_tensors="pt") - batch = {k: v.to("cuda") for k, v in batch.items()} - - start = time.perf_counter() - with torch.no_grad(): - outputs = model.generate( - **batch, - max_new_tokens=max_new_tokens, - do_sample=do_sample, - top_p=top_p, - temperature=temperature, - min_length=min_length, - use_cache=use_cache, - top_k=top_k, - repetition_penalty=repetition_penalty, - length_penalty=length_penalty, - **kwargs - ) - e2e_inference_time = (time.perf_counter()-start)*1000 - print(f"the inference time is {e2e_inference_time} ms") - filling = tokenizer.batch_decode(outputs[:, batch["input_ids"].shape[1]:], skip_special_tokens=True)[0] - # Safety check of the model output - safety_results = [check(filling) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User input and model output deemed safe.") - print(user_prompt.replace("", filling)) - else: - print("Model output deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/getting-started/inference/code_llama/code_infilling_prompt.txt b/getting-started/inference/code_llama/code_infilling_prompt.txt deleted file mode 100644 index 3fe94b7a5..000000000 --- a/getting-started/inference/code_llama/code_infilling_prompt.txt +++ /dev/null @@ -1,3 +0,0 @@ -def remove_non_ascii(s: str) -> str: - """ - return result diff --git a/getting-started/inference/code_llama/code_instruct_example.py b/getting-started/inference/code_llama/code_instruct_example.py deleted file mode 100644 index d7b98f088..000000000 --- a/getting-started/inference/code_llama/code_instruct_example.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -import fire -import os -import sys -import time - -import torch -from transformers import AutoTokenizer - -from llama_recipes.inference.safety_utils import get_safety_checker -from llama_recipes.inference.model_utils import load_model, load_peft_model - - -def handle_safety_check(are_safe_user_prompt, user_prompt, safety_results_user_prompt, are_safe_system_prompt, system_prompt, safety_results_system_prompt): - """ - Handles the output based on the safety check of both user and system prompts. - - Parameters: - - are_safe_user_prompt (bool): Indicates whether the user prompt is safe. - - user_prompt (str): The user prompt that was checked for safety. - - safety_results_user_prompt (list of tuples): A list of tuples for the user prompt containing the method, safety status, and safety report. - - are_safe_system_prompt (bool): Indicates whether the system prompt is safe. - - system_prompt (str): The system prompt that was checked for safety. - - safety_results_system_prompt (list of tuples): A list of tuples for the system prompt containing the method, safety status, and safety report. - """ - def print_safety_results(are_safe_prompt, prompt, safety_results, prompt_type="User"): - """ - Prints the safety results for a prompt. - - Parameters: - - are_safe_prompt (bool): Indicates whether the prompt is safe. - - prompt (str): The prompt that was checked for safety. - - safety_results (list of tuples): A list of tuples containing the method, safety status, and safety report. - - prompt_type (str): The type of prompt (User/System). - """ - if are_safe_prompt: - print(f"{prompt_type} prompt deemed safe.") - print(f"{prompt_type} prompt:\n{prompt}") - else: - print(f"{prompt_type} prompt deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - print(f"Skipping the inference as the {prompt_type.lower()} prompt is not safe.") - sys.exit(1) - - # Check user prompt - print_safety_results(are_safe_user_prompt, user_prompt, safety_results_user_prompt, "User") - - # Check system prompt - print_safety_results(are_safe_system_prompt, system_prompt, safety_results_system_prompt, "System") - -def main( - model_name, - peft_model: str=None, - quantization: bool=False, - max_new_tokens =100, #The maximum numbers of tokens to generate - seed: int=42, #seed value for reproducibility - do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise. - min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens - use_cache: bool=False, #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - top_p: float=0.9, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. - temperature: float=0.6, # [optional] The value used to modulate the next token probabilities. - top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. - repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty. - length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation. - enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api - enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs - enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 - enable_llamaguard_content_safety: bool=False, # Enable safety check with Llama-Guard - use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels - **kwargs -): - system_prompt = input("Please insert your system prompt: ") - user_prompt = input("Please insert your prompt: ") - chat = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ] - # Set the seeds for reproducibility - torch.cuda.manual_seed(seed) - torch.manual_seed(seed) - - model = load_model(model_name, quantization, use_fast_kernels) - if peft_model: - model = load_peft_model(model, peft_model) - - model.eval() - - tokenizer = AutoTokenizer.from_pretrained(model_name) - safety_checker = get_safety_checker(enable_azure_content_safety, - enable_sensitive_topics, - enable_salesforce_content_safety, - enable_llamaguard_content_safety, - ) - - # Safety check of the user prompt - safety_results_user_prompt = [check(user_prompt) for check in safety_checker] - safety_results_system_prompt = [check(system_prompt) for check in safety_checker] - are_safe_user_prompt = all([r[1] for r in safety_results_user_prompt]) - are_safe_system_prompt = all([r[1] for r in safety_results_system_prompt]) - handle_safety_check(are_safe_user_prompt, user_prompt, safety_results_user_prompt, are_safe_system_prompt, system_prompt, safety_results_system_prompt) - - inputs = tokenizer.apply_chat_template(chat, return_tensors="pt").to("cuda") - - start = time.perf_counter() - with torch.no_grad(): - outputs = model.generate( - input_ids=inputs, - max_new_tokens=max_new_tokens, - do_sample=do_sample, - top_p=top_p, - temperature=temperature, - min_length=min_length, - use_cache=use_cache, - top_k=top_k, - repetition_penalty=repetition_penalty, - length_penalty=length_penalty, - **kwargs - ) - e2e_inference_time = (time.perf_counter()-start)*1000 - print(f"the inference time is {e2e_inference_time} ms") - output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - - # Safety check of the model output - safety_results = [check(output_text) for check in safety_checker] - are_safe = all([r[1] for r in safety_results]) - if are_safe: - print("User input and model output deemed safe.") - print(f"Model output:\n{output_text}") - else: - print("Model output deemed unsafe.") - for method, is_safe, report in safety_results: - if not is_safe: - print(method) - print(report) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/getting-started/inference/modelUpgradeExample.py b/getting-started/inference/modelUpgradeExample.py deleted file mode 100644 index f2fa19cd1..000000000 --- a/getting-started/inference/modelUpgradeExample.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. - -# Running the script without any arguments "python modelUpgradeExample.py" performs inference with the Llama 3 8B Instruct model. -# Passing --model-id "meta-llama/Meta-Llama-3.1-8B-Instruct" to the script will switch it to using the Llama 3.1 version of the same model. -# The script also shows the input tokens to confirm that the models are responding to the same input - -import fire -from transformers import AutoTokenizer, AutoModelForCausalLM -import torch - -def main(model_id = "meta-llama/Meta-Llama-3-8B-Instruct"): - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained( - model_id, - torch_dtype=torch.bfloat16, - device_map="auto", - ) - - messages = [ - {"role": "system", "content": "You are a helpful chatbot"}, - {"role": "user", "content": "Why is the sky blue?"}, - {"role": "assistant", "content": "Because the light is scattered"}, - {"role": "user", "content": "Please tell me more about that"}, - ] - - input_ids = tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - return_tensors="pt", - ).to(model.device) - - print("Input tokens:") - print(input_ids) - - attention_mask = torch.ones_like(input_ids) - outputs = model.generate( - input_ids, - max_new_tokens=400, - eos_token_id=tokenizer.eos_token_id, - do_sample=True, - temperature=0.6, - top_p=0.9, - attention_mask=attention_mask, - ) - response = outputs[0][input_ids.shape[-1]:] - print("\nOutput:\n") - print(tokenizer.decode(response, skip_special_tokens=True)) - -if __name__ == "__main__": - fire.Fire(main) \ No newline at end of file diff --git a/recipes/README.md b/recipes/README.md deleted file mode 100644 index 86d90b7e0..000000000 --- a/recipes/README.md +++ /dev/null @@ -1,11 +0,0 @@ -## Llama-Recipes - -This folder contains examples organized by topic: - -| Subfolder | Description | -|---|---| -[quickstart](./quickstart)|The "Hello World" of using Llama, start here if you are new to using Llama -[use_cases](./use_cases)|Scripts showing common applications of Llama -[3p_integrations](./3p_integrations)|Partner-owned folder showing Llama usage along with third-party tools -[responsible_ai](./responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs -[experimental](./experimental)| Llama implementations of experimental LLM techniques From 166792c24b6643f41cfa151e0cab360e8d6d3434 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:20:52 -0800 Subject: [PATCH 05/23] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d38632fc0..5cc4d1505 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,16 @@ > Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor -Welcome to the official repository for helping you get started with [inference](https://github.com/meta-llama/llama-recipes/tree/main/getting-started/inference), [fine-tuning](https://github.com/init27/llama-recipes/tree/main/getting-started/finetuning) and [end-to-end use-cases](https://github.com/meta-llama/llama-recipes/tree/main/end-to-end-use-cases) of building with the Llama Model family. +Welcome to the official repository for helping you get started with [inference](./getting-started/inference/), [fine-tuning](./getting-started/finetuning) and [end-to-end use-cases](./end-to-end-use-cases) of building with the Llama Model family. The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. > [!TIP] > Repository Structure: > * [Start building with the Llama 3.2 models](./getting-started/) -> * [End to End Use cases with Llama model family](https://github.com/meta-llama/llama-recipes/tree/main/end-to-end-use-cases) -> * [Examples of building with 3rd Party Llama Providers](https://github.com/meta-llama/llama-recipes/tree/main/3p-integrations) -> * [Model Benchmarks](https://github.com/meta-llama/llama-recipes/tree/main/benchmarks) +> * [End to End Use cases with Llama model family](./end-to-end-use-cases) +> * [Examples of building with 3rd Party Llama Providers](./3p-integrations) +> * [Model Benchmarks](./benchmarks) > [!TIP] > Get started with Llama 3.2 with these new recipes: From f6078ff011d5e47ef778fe429e300ebfdc4da962 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:24:15 -0800 Subject: [PATCH 06/23] update readme --- README.md | 53 +++----------- src/README.md | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 42 deletions(-) create mode 100644 src/README.md diff --git a/README.md b/README.md index 5cc4d1505..459543d03 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,9 @@ The examples cover the most popular community approaches, popular use-cases and > [!TIP] > Get started with Llama 3.2 with these new recipes: -> * [Finetune Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/getting-started/finetuning/finetune_vision_model.md) -> * [Multimodal Inference with Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/getting-started/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) +> * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md) +> * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) +> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) > [!NOTE] @@ -29,52 +29,21 @@ The examples cover the most popular community approaches, popular use-cases and ## Repository Structure: -- [3P Integrations](https://github.com/init27/llama-recipes/tree/main/3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers -- [End to End Use Cases](https://github.com/init27/llama-recipes/tree/main/end-to-end-use-cases): As the name suggests, spanning various domains and applications -- [Getting Started](https://github.com/init27/llama-recipes/tree/main/getting-started/): Reference for inferencing, fine-tuning and RAG examples -- [Benchmarks](https://github.com/init27/llama-recipes/tree/main/benchmarks): +- [3P Integrations](./3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers +- [End to End Use Cases](./end-to-end-use-cases): As the name suggests, spanning various domains and applications +- [Getting Started](./getting-started/): Reference for inferencing, fine-tuning and RAG examples +- [Benchmarks](./benchmarks): Reference implementation for some benchmarks ## FAQ: +- Q: Some links are broken/folders are missing: +A: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor -The 'llama-recipes' repository is a companion to the [Meta Llama](https://github.com/meta-llama/llama-models) models. We support the latest version, [Llama 3.2 Vision](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md) and [Llama 3.2 Text](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md), in this repository. This repository contains example scripts and notebooks to get started with the models in a variety of use-cases, including fine-tuning for domain adaptation and building LLM-based applications with Llama and other tools in the LLM ecosystem. The examples here use Llama locally, in the cloud, and on-prem. +- Where can we find details about the latest models? -> [!TIP] -> Get started with Llama 3.2 with these new recipes: -> * [Finetune Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/finetuning/finetune_vision_model.md) -> * [Multimodal Inference with Llama 3.2 Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](https://github.com/meta-llama/llama-recipes/blob/main/recipes/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) - - - -> [!NOTE] -> Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. -> -> More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). - - - -## Table of Contents - -- [Llama Recipes: Examples to get started using the Llama models from Meta](#llama-recipes-examples-to-get-started-using-the-llama-models-from-meta) - - [Table of Contents](#table-of-contents) - - [Getting Started](#getting-started) - - [Prerequisites](#prerequisites) - - [PyTorch Nightlies](#pytorch-nightlies) - - [Installing](#installing) - - [Install with pip](#install-with-pip) - - [Install with optional dependencies](#install-with-optional-dependencies) - - [Install from source](#install-from-source) - - [Getting the Llama models](#getting-the-llama-models) - - [Model conversion to Hugging Face](#model-conversion-to-hugging-face) - - [Repository Organization](#repository-organization) - - [`recipes/`](#recipes) - - [`src/`](#src) - - [Supported Features](#supported-features) - - [Contributing](#contributing) - - [License](#license) +A: Official [Llama models website](https://www.llama.com) ## Getting Started diff --git a/src/README.md b/src/README.md new file mode 100644 index 000000000..459543d03 --- /dev/null +++ b/src/README.md @@ -0,0 +1,190 @@ +# Llama Recipes: Examples to get started using the Llama models from Meta + + +> Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor + +Welcome to the official repository for helping you get started with [inference](./getting-started/inference/), [fine-tuning](./getting-started/finetuning) and [end-to-end use-cases](./end-to-end-use-cases) of building with the Llama Model family. + +The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. + +> [!TIP] +> Repository Structure: +> * [Start building with the Llama 3.2 models](./getting-started/) +> * [End to End Use cases with Llama model family](./end-to-end-use-cases) +> * [Examples of building with 3rd Party Llama Providers](./3p-integrations) +> * [Model Benchmarks](./benchmarks) + +> [!TIP] +> Get started with Llama 3.2 with these new recipes: +> * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md) +> * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) +> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) + + +> [!NOTE] +> Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. +> +> More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). + + +## Repository Structure: + +- [3P Integrations](./3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers +- [End to End Use Cases](./end-to-end-use-cases): As the name suggests, spanning various domains and applications +- [Getting Started](./getting-started/): Reference for inferencing, fine-tuning and RAG examples +- [Benchmarks](./benchmarks): Reference implementation for some benchmarks + + +## FAQ: + +- Q: Some links are broken/folders are missing: + +A: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor + +- Where can we find details about the latest models? + +A: Official [Llama models website](https://www.llama.com) + +## Getting Started + +These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. + +### Prerequisites + +#### PyTorch Nightlies +If you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform. + +### Installing +Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source. + +> [!NOTE] +> Ensure you use the correct CUDA version (from `nvidia-smi`) when installing the PyTorch wheels. Here we are using 11.8 as `cu118`. +> H100 GPUs work better with CUDA >12.0 + +#### Install with pip +``` +pip install llama-recipes +``` + +#### Install with optional dependencies +Llama-recipes offers the installation of optional packages. There are three optional dependency groups. +To run the unit tests we can install the required dependencies with: +``` +pip install llama-recipes[tests] +``` +For the vLLM example we need additional requirements that can be installed with: +``` +pip install llama-recipes[vllm] +``` +To use the sensitive topics safety checker install with: +``` +pip install llama-recipes[auditnlg] +``` +Some recipes require the presence of langchain. To install the packages follow the recipe description or install with: +``` +pip install llama-recipes[langchain] +``` +Optional dependencies can also be combined with [option1,option2]. + +#### Install from source +To install from source e.g. for development use these commands. We're using hatchling as our build backend which requires an up-to-date pip as well as setuptools package. +``` +git clone git@github.com:meta-llama/llama-recipes.git +cd llama-recipes +pip install -U pip setuptools +pip install -e . +``` +For development and contributing to llama-recipes please install all optional dependencies: +``` +git clone git@github.com:meta-llama/llama-recipes.git +cd llama-recipes +pip install -U pip setuptools +pip install -e .[tests,auditnlg,vllm] +``` + + +### Getting the Llama models +You can find Llama models on Hugging Face hub [here](https://huggingface.co/meta-llama), **where models with `hf` in the name are already converted to Hugging Face checkpoints so no further conversion is needed**. The conversion step below is only for original model weights from Meta that are hosted on Hugging Face model hub as well. + +#### Model conversion to Hugging Face +If you have the model checkpoints downloaded from the Meta website, you can convert it to the Hugging Face format with: + +```bash +## Install Hugging Face Transformers from source +pip freeze | grep transformers ## verify it is version 4.45.0 or higher + +git clone git@github.com:huggingface/transformers.git +cd transformers +pip install protobuf +python src/transformers/models/llama/convert_llama_weights_to_hf.py \ + --input_dir /path/to/downloaded/llama/weights --model_size 3B --output_dir /output/path +``` + + + +## Repository Organization +Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. + +### `recipes/` + +Contains examples organized in folders by topic: +| Subfolder | Description | +|---|---| +[quickstart](./recipes/quickstart) | The "Hello World" of using Llama, start here if you are new to using Llama. +[use_cases](./recipes/use_cases)|Scripts showing common applications of Meta Llama3 +[3p_integrations](./recipes/3p_integrations)|Partner owned folder showing common applications of Meta Llama3 +[responsible_ai](./recipes/responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs +[experimental](./recipes/experimental)|Meta Llama implementations of experimental LLM techniques + +### `src/` + +Contains modules which support the example recipes: +| Subfolder | Description | +|---|---| +| [configs](src/llama_recipes/configs/) | Contains the configuration files for PEFT methods, FSDP, Datasets, Weights & Biases experiment tracking. | +| [datasets](src/llama_recipes/datasets/) | Contains individual scripts for each dataset to download and process. Note | +| [inference](src/llama_recipes/inference/) | Includes modules for inference for the fine-tuned models. | +| [model_checkpointing](src/llama_recipes/model_checkpointing/) | Contains FSDP checkpoint handlers. | +| [policies](src/llama_recipes/policies/) | Contains FSDP scripts to provide different policies, such as mixed precision, transformer wrapping policy and activation checkpointing along with any precision optimizer (used for running FSDP with pure bf16 mode). | +| [utils](src/llama_recipes/utils/) | Utility files for:
- `train_utils.py` provides training/eval loop and more train utils.
- `dataset_utils.py` to get preprocessed datasets.
- `config_utils.py` to override the configs received from CLI.
- `fsdp_utils.py` provides FSDP wrapping policy for PEFT methods.
- `memory_utils.py` context manager to track different memory stats in train loop. | + + +## Supported Features +The recipes and modules in this repository support the following features: + +| Feature | | +| ---------------------------------------------- | - | +| HF support for inference | ✅ | +| HF support for finetuning | ✅ | +| PEFT | ✅ | +| Deferred initialization ( meta init) | ✅ | +| Low CPU mode for multi GPU | ✅ | +| Mixed precision | ✅ | +| Single node quantization | ✅ | +| Flash attention | ✅ | +| Activation checkpointing FSDP | ✅ | +| Hybrid Sharded Data Parallel (HSDP) | ✅ | +| Dataset packing & padding | ✅ | +| BF16 Optimizer (Pure BF16) | ✅ | +| Profiling & MFU tracking | ✅ | +| Gradient accumulation | ✅ | +| CPU offloading | ✅ | +| FSDP checkpoint conversion to HF for inference | ✅ | +| W&B experiment tracker | ✅ | + + +## Contributing + +Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. + +## License + + +See the License file for Meta Llama 3.2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/USE_POLICY.md) + +See the License file for Meta Llama 3.1 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/USE_POLICY.md) + +See the License file for Meta Llama 3 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/USE_POLICY.md) + +See the License file for Meta Llama 2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/USE_POLICY.md) + From d2c51d8077af0cff7c48ef72731a1f895e981e20 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:27:27 -0800 Subject: [PATCH 07/23] move readme --- README.md | 133 +++++--------------------------------------------- src/README.md | 118 +------------------------------------------- 2 files changed, 12 insertions(+), 239 deletions(-) diff --git a/README.md b/README.md index 459543d03..89a7e5cb0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Llama Recipes: Examples to get started using the Llama models from Meta +# Llama Cookbook: The Official Guide to building with Llama Models > Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor @@ -45,132 +45,21 @@ A: We recently did a refactor of the repo, [archive-main](https://github.com/met A: Official [Llama models website](https://www.llama.com) -## Getting Started +## Contributing -These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. +Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. -### Prerequisites +## License + -#### PyTorch Nightlies -If you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform. +See the License file for Meta Llama 3.2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/USE_POLICY.md) -### Installing -Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source. +See the License file for Meta Llama 3.1 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/USE_POLICY.md) -> [!NOTE] -> Ensure you use the correct CUDA version (from `nvidia-smi`) when installing the PyTorch wheels. Here we are using 11.8 as `cu118`. -> H100 GPUs work better with CUDA >12.0 - -#### Install with pip -``` -pip install llama-recipes -``` - -#### Install with optional dependencies -Llama-recipes offers the installation of optional packages. There are three optional dependency groups. -To run the unit tests we can install the required dependencies with: -``` -pip install llama-recipes[tests] -``` -For the vLLM example we need additional requirements that can be installed with: -``` -pip install llama-recipes[vllm] -``` -To use the sensitive topics safety checker install with: -``` -pip install llama-recipes[auditnlg] -``` -Some recipes require the presence of langchain. To install the packages follow the recipe description or install with: -``` -pip install llama-recipes[langchain] -``` -Optional dependencies can also be combined with [option1,option2]. - -#### Install from source -To install from source e.g. for development use these commands. We're using hatchling as our build backend which requires an up-to-date pip as well as setuptools package. -``` -git clone git@github.com:meta-llama/llama-recipes.git -cd llama-recipes -pip install -U pip setuptools -pip install -e . -``` -For development and contributing to llama-recipes please install all optional dependencies: -``` -git clone git@github.com:meta-llama/llama-recipes.git -cd llama-recipes -pip install -U pip setuptools -pip install -e .[tests,auditnlg,vllm] -``` - - -### Getting the Llama models -You can find Llama models on Hugging Face hub [here](https://huggingface.co/meta-llama), **where models with `hf` in the name are already converted to Hugging Face checkpoints so no further conversion is needed**. The conversion step below is only for original model weights from Meta that are hosted on Hugging Face model hub as well. - -#### Model conversion to Hugging Face -If you have the model checkpoints downloaded from the Meta website, you can convert it to the Hugging Face format with: - -```bash -## Install Hugging Face Transformers from source -pip freeze | grep transformers ## verify it is version 4.45.0 or higher - -git clone git@github.com:huggingface/transformers.git -cd transformers -pip install protobuf -python src/transformers/models/llama/convert_llama_weights_to_hf.py \ - --input_dir /path/to/downloaded/llama/weights --model_size 3B --output_dir /output/path -``` - - - -## Repository Organization -Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. - -### `recipes/` - -Contains examples organized in folders by topic: -| Subfolder | Description | -|---|---| -[quickstart](./recipes/quickstart) | The "Hello World" of using Llama, start here if you are new to using Llama. -[use_cases](./recipes/use_cases)|Scripts showing common applications of Meta Llama3 -[3p_integrations](./recipes/3p_integrations)|Partner owned folder showing common applications of Meta Llama3 -[responsible_ai](./recipes/responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs -[experimental](./recipes/experimental)|Meta Llama implementations of experimental LLM techniques - -### `src/` - -Contains modules which support the example recipes: -| Subfolder | Description | -|---|---| -| [configs](src/llama_recipes/configs/) | Contains the configuration files for PEFT methods, FSDP, Datasets, Weights & Biases experiment tracking. | -| [datasets](src/llama_recipes/datasets/) | Contains individual scripts for each dataset to download and process. Note | -| [inference](src/llama_recipes/inference/) | Includes modules for inference for the fine-tuned models. | -| [model_checkpointing](src/llama_recipes/model_checkpointing/) | Contains FSDP checkpoint handlers. | -| [policies](src/llama_recipes/policies/) | Contains FSDP scripts to provide different policies, such as mixed precision, transformer wrapping policy and activation checkpointing along with any precision optimizer (used for running FSDP with pure bf16 mode). | -| [utils](src/llama_recipes/utils/) | Utility files for:
- `train_utils.py` provides training/eval loop and more train utils.
- `dataset_utils.py` to get preprocessed datasets.
- `config_utils.py` to override the configs received from CLI.
- `fsdp_utils.py` provides FSDP wrapping policy for PEFT methods.
- `memory_utils.py` context manager to track different memory stats in train loop. | - - -## Supported Features -The recipes and modules in this repository support the following features: - -| Feature | | -| ---------------------------------------------- | - | -| HF support for inference | ✅ | -| HF support for finetuning | ✅ | -| PEFT | ✅ | -| Deferred initialization ( meta init) | ✅ | -| Low CPU mode for multi GPU | ✅ | -| Mixed precision | ✅ | -| Single node quantization | ✅ | -| Flash attention | ✅ | -| Activation checkpointing FSDP | ✅ | -| Hybrid Sharded Data Parallel (HSDP) | ✅ | -| Dataset packing & padding | ✅ | -| BF16 Optimizer (Pure BF16) | ✅ | -| Profiling & MFU tracking | ✅ | -| Gradient accumulation | ✅ | -| CPU offloading | ✅ | -| FSDP checkpoint conversion to HF for inference | ✅ | -| W&B experiment tracker | ✅ | +See the License file for Meta Llama 3 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/USE_POLICY.md) + +See the License file for Meta Llama 2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/USE_POLICY.md) + ## Contributing diff --git a/src/README.md b/src/README.md index 459543d03..5fad6150d 100644 --- a/src/README.md +++ b/src/README.md @@ -1,50 +1,3 @@ -# Llama Recipes: Examples to get started using the Llama models from Meta - - -> Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor - -Welcome to the official repository for helping you get started with [inference](./getting-started/inference/), [fine-tuning](./getting-started/finetuning) and [end-to-end use-cases](./end-to-end-use-cases) of building with the Llama Model family. - -The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. - -> [!TIP] -> Repository Structure: -> * [Start building with the Llama 3.2 models](./getting-started/) -> * [End to End Use cases with Llama model family](./end-to-end-use-cases) -> * [Examples of building with 3rd Party Llama Providers](./3p-integrations) -> * [Model Benchmarks](./benchmarks) - -> [!TIP] -> Get started with Llama 3.2 with these new recipes: -> * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md) -> * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) - - -> [!NOTE] -> Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. -> -> More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). - - -## Repository Structure: - -- [3P Integrations](./3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers -- [End to End Use Cases](./end-to-end-use-cases): As the name suggests, spanning various domains and applications -- [Getting Started](./getting-started/): Reference for inferencing, fine-tuning and RAG examples -- [Benchmarks](./benchmarks): Reference implementation for some benchmarks - - -## FAQ: - -- Q: Some links are broken/folders are missing: - -A: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor - -- Where can we find details about the latest models? - -A: Official [Llama models website](https://www.llama.com) - ## Getting Started These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. @@ -118,73 +71,4 @@ cd transformers pip install protobuf python src/transformers/models/llama/convert_llama_weights_to_hf.py \ --input_dir /path/to/downloaded/llama/weights --model_size 3B --output_dir /output/path -``` - - - -## Repository Organization -Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. - -### `recipes/` - -Contains examples organized in folders by topic: -| Subfolder | Description | -|---|---| -[quickstart](./recipes/quickstart) | The "Hello World" of using Llama, start here if you are new to using Llama. -[use_cases](./recipes/use_cases)|Scripts showing common applications of Meta Llama3 -[3p_integrations](./recipes/3p_integrations)|Partner owned folder showing common applications of Meta Llama3 -[responsible_ai](./recipes/responsible_ai)|Scripts to use PurpleLlama for safeguarding model outputs -[experimental](./recipes/experimental)|Meta Llama implementations of experimental LLM techniques - -### `src/` - -Contains modules which support the example recipes: -| Subfolder | Description | -|---|---| -| [configs](src/llama_recipes/configs/) | Contains the configuration files for PEFT methods, FSDP, Datasets, Weights & Biases experiment tracking. | -| [datasets](src/llama_recipes/datasets/) | Contains individual scripts for each dataset to download and process. Note | -| [inference](src/llama_recipes/inference/) | Includes modules for inference for the fine-tuned models. | -| [model_checkpointing](src/llama_recipes/model_checkpointing/) | Contains FSDP checkpoint handlers. | -| [policies](src/llama_recipes/policies/) | Contains FSDP scripts to provide different policies, such as mixed precision, transformer wrapping policy and activation checkpointing along with any precision optimizer (used for running FSDP with pure bf16 mode). | -| [utils](src/llama_recipes/utils/) | Utility files for:
- `train_utils.py` provides training/eval loop and more train utils.
- `dataset_utils.py` to get preprocessed datasets.
- `config_utils.py` to override the configs received from CLI.
- `fsdp_utils.py` provides FSDP wrapping policy for PEFT methods.
- `memory_utils.py` context manager to track different memory stats in train loop. | - - -## Supported Features -The recipes and modules in this repository support the following features: - -| Feature | | -| ---------------------------------------------- | - | -| HF support for inference | ✅ | -| HF support for finetuning | ✅ | -| PEFT | ✅ | -| Deferred initialization ( meta init) | ✅ | -| Low CPU mode for multi GPU | ✅ | -| Mixed precision | ✅ | -| Single node quantization | ✅ | -| Flash attention | ✅ | -| Activation checkpointing FSDP | ✅ | -| Hybrid Sharded Data Parallel (HSDP) | ✅ | -| Dataset packing & padding | ✅ | -| BF16 Optimizer (Pure BF16) | ✅ | -| Profiling & MFU tracking | ✅ | -| Gradient accumulation | ✅ | -| CPU offloading | ✅ | -| FSDP checkpoint conversion to HF for inference | ✅ | -| W&B experiment tracker | ✅ | - - -## Contributing - -Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. - -## License - - -See the License file for Meta Llama 3.2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/USE_POLICY.md) - -See the License file for Meta Llama 3.1 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/USE_POLICY.md) - -See the License file for Meta Llama 3 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/USE_POLICY.md) - -See the License file for Meta Llama 2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/USE_POLICY.md) - +``` \ No newline at end of file From c1909a631cd369dc3869ac2ab8ae32312cd699f7 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 09:28:49 -0800 Subject: [PATCH 08/23] Update README.md --- README.md | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/README.md b/README.md index 89a7e5cb0..1b556644f 100644 --- a/README.md +++ b/README.md @@ -60,20 +60,3 @@ See the License file for Meta Llama 3 [here](https://github.com/meta-llama/llama See the License file for Meta Llama 2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/USE_POLICY.md) - - -## Contributing - -Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. - -## License - - -See the License file for Meta Llama 3.2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/USE_POLICY.md) - -See the License file for Meta Llama 3.1 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/USE_POLICY.md) - -See the License file for Meta Llama 3 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama3/USE_POLICY.md) - -See the License file for Meta Llama 2 [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/LICENSE) and Acceptable Use Policy [here](https://github.com/meta-llama/llama-models/blob/main/models/llama2/USE_POLICY.md) - From e07143133d0b021dc53272fe8ad4f9ccc5d05ea7 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 11:13:07 -0800 Subject: [PATCH 09/23] Update README.md --- end-to-end-use-cases/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/end-to-end-use-cases/README.md b/end-to-end-use-cases/README.md index 45d08ab8f..ce5152a8c 100644 --- a/end-to-end-use-cases/README.md +++ b/end-to-end-use-cases/README.md @@ -1,3 +1,5 @@ +## [Agentic Tutorial](./agents/): 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models + ## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications. From e81bf1edf30aed6561ace7240aa94f5e82e14531 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 15:50:56 -0800 Subject: [PATCH 10/23] Update README.md --- end-to-end-use-cases/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/end-to-end-use-cases/README.md b/end-to-end-use-cases/README.md index ce5152a8c..9bc7c2182 100644 --- a/end-to-end-use-cases/README.md +++ b/end-to-end-use-cases/README.md @@ -1,5 +1,11 @@ ## [Agentic Tutorial](./agents/): 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models +## [Benchmarks](./benchmarks/): a folder contains benchmark scripts that apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality. + +## [Browser Usage](./browser_use/): Demo of how to apply Llama models and use them for browsing the internet and completing tasks + +## []() + ## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications. From 8d115193ffe3377ad242e4f90cbcaec7b065c1b9 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 15:54:25 -0800 Subject: [PATCH 11/23] Update README.md --- end-to-end-use-cases/README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/end-to-end-use-cases/README.md b/end-to-end-use-cases/README.md index 9bc7c2182..f5c66a0b8 100644 --- a/end-to-end-use-cases/README.md +++ b/end-to-end-use-cases/README.md @@ -1,11 +1,11 @@ ## [Agentic Tutorial](./agents/): 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models +101 notebooks show how to apply Llama models and enable tool calling functionality, 201 notebook walks you through an end to end workflow of building an agent that can search two papers, fetch their details and find their differences. -## [Benchmarks](./benchmarks/): a folder contains benchmark scripts that apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality. +## [Benchmarks](./benchmarks/): a folder contains benchmark scripts +The scripts apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality ## [Browser Usage](./browser_use/): Demo of how to apply Llama models and use them for browsing the internet and completing tasks -## []() - ## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications. @@ -15,6 +15,9 @@ This demo app uses Llama 3 to return a text summary of a YouTube video. It shows ## [NBA2023-24](./coding/text2sql/structured_llama.ipynb): Ask Llama 3 about Structured Data This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players. +## [NotebookLlama](./NotebookLlama/): PDF to Podcast using Llama Models +Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast + ## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p_integrations/octoai/live_data.ipynb)) This demo app shows how to perform live data augmented generation tasks with Llama 3, [LlamaIndex](https://github.com/run-llama/llama_index), another leading open-source framework for building LLM apps, and the [Tavily](https://tavily.com) live search API. From 9a3546910286196389528d7ba700745dca64cd14 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 15:56:01 -0800 Subject: [PATCH 12/23] add details --- README.md | 4 ---- getting-started/README.md | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 1b556644f..1087438a3 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,6 @@ The examples cover the most popular community approaches, popular use-cases and > * [Start building with the Llama 3.2 models](./getting-started/) > * [End to End Use cases with Llama model family](./end-to-end-use-cases) > * [Examples of building with 3rd Party Llama Providers](./3p-integrations) -> * [Model Benchmarks](./benchmarks) - > [!TIP] > Get started with Llama 3.2 with these new recipes: > * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md) @@ -32,8 +30,6 @@ The examples cover the most popular community approaches, popular use-cases and - [3P Integrations](./3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers - [End to End Use Cases](./end-to-end-use-cases): As the name suggests, spanning various domains and applications - [Getting Started](./getting-started/): Reference for inferencing, fine-tuning and RAG examples -- [Benchmarks](./benchmarks): Reference implementation for some benchmarks - ## FAQ: diff --git a/getting-started/README.md b/getting-started/README.md index a48c63436..523135c0b 100644 --- a/getting-started/README.md +++ b/getting-started/README.md @@ -1,4 +1,4 @@ -## Llama-Recipes Quickstart +## Llama-Recipes Getting Started If you are new to developing with Meta Llama models, this is where you should start. This folder contains introductory-level notebooks across different techniques relating to Meta Llama. From 2d483aeb9debd625078d0e213f86320959e05a0f Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 16:13:49 -0800 Subject: [PATCH 13/23] fix some spells --- 3p-integrations/modal/many-llamas-human-eval/README.md | 5 +++-- README.md | 2 -- UPDATES.md | 4 ++-- .../benchmarks/llm_eval_harness/meta_eval/README.md | 2 +- end-to-end-use-cases/multilingual/README.md | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/3p-integrations/modal/many-llamas-human-eval/README.md b/3p-integrations/modal/many-llamas-human-eval/README.md index 1c3c1b661..342949e92 100644 --- a/3p-integrations/modal/many-llamas-human-eval/README.md +++ b/3p-integrations/modal/many-llamas-human-eval/README.md @@ -12,7 +12,7 @@ This experiment built by the team at [Modal](https://modal.com), and is describe [Beat GPT-4o at Python by searching with 100 small Llamas](https://modal.com/blog/llama-human-eval) -The experiment has since been upgraded to use the [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) model, and runnable end-to-end using the Modal serverless platform. +The experiment has since been upgraded to use the [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) model, and run end-to-end using the Modal serverless platform. ## Run it yourself @@ -55,7 +55,7 @@ This will execute: 5. Generating graphs of pass@k and fail@k ### Results - + The resulting plots of the evals will be saved locally to: - `/tmp/plot-pass-k.jpeg` - `/tmp/plot-fail-k.jpeg` @@ -69,3 +69,4 @@ You'll see that at 100 generations, the Llama model is able to perform on-par wi `/tmp/plot-fail-k.jpeg` shows fail@k across a log-scale, showing smooth scaling of this method. ![plot-fail-k](https://github.com/user-attachments/assets/7286e4ff-5090-4288-bd62-8a078c6dc5a1) + diff --git a/README.md b/README.md index 1087438a3..c264d4145 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ # Llama Cookbook: The Official Guide to building with Llama Models - > Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor @@ -18,7 +17,6 @@ The examples cover the most popular community approaches, popular use-cases and > * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) > * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) - > [!NOTE] > Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. > diff --git a/UPDATES.md b/UPDATES.md index f4dc5cef2..0281eb309 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -1,5 +1,5 @@ DIFFLOG: - + Nested Folders rename: - /recipes/3p_integrations -> /3p-integrations - /recipes/quickstart -> /getting-started @@ -20,4 +20,4 @@ Removed folders: - /flagged (Empty folder) - /recipes/quickstart/Running_Llama3_Anywhere (Redundant code) - /recipes/quickstart/inference/codellama (deprecated model) - + diff --git a/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md index 96e0ae677..643197849 100644 --- a/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md +++ b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md @@ -50,7 +50,7 @@ Given the extensive number of tasks available (12 for pretrained models and 30 f - **Tasks for 3.2 pretrained models**: MMLU - **Tasks for 3.2 instruct models**: MMLU, GPQA -These tasks are common evalutions, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +These tasks are common evaluations, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) Here, we aim to get the benchmark numbers on the aforementioned tasks using Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard). Please follow the instructions below to make necessary modifications to use our eval prompts and get more eval metrics. diff --git a/end-to-end-use-cases/multilingual/README.md b/end-to-end-use-cases/multilingual/README.md index 159db54b3..e8a678b3f 100644 --- a/end-to-end-use-cases/multilingual/README.md +++ b/end-to-end-use-cases/multilingual/README.md @@ -119,7 +119,7 @@ phase2_ds.save_to_disk("data/phase2") ``` ### Train -Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../quickstart/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`. +Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`. OpenHathi was trained on 64 A100 80GB GPUs. Here are the hyperparameters used and other training details: - maximum learning rate: 2e-4 From 9703ce7d485adfe298e00a5dbf481b3aa44913ed Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 16:16:58 -0800 Subject: [PATCH 14/23] move req to home --- src/requirements.txt => requirements.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/requirements.txt => requirements.txt (100%) diff --git a/src/requirements.txt b/requirements.txt similarity index 100% rename from src/requirements.txt rename to requirements.txt From 7e9cab0c43bcb6bf466f188de19acef9ffb6ffa6 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 17:08:06 -0800 Subject: [PATCH 15/23] fix src links --- src/docs/multi_gpu.md | 12 ++++++------ src/docs/single_gpu.md | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/docs/multi_gpu.md b/src/docs/multi_gpu.md index 820595dcf..aecafd930 100644 --- a/src/docs/multi_gpu.md +++ b/src/docs/multi_gpu.md @@ -86,9 +86,9 @@ sbatch recipes/quickstart/finetuning/multi_node.slurm ## How to run with different datasets? -Currently 4 datasets are supported that can be found in [Datasets config file](../src/llama_recipes/configs/datasets.py). +Currently 4 datasets are supported that can be found in [Datasets config file](../llama_recipes/configs/datasets.py). -* `grammar_dataset` : use this [notebook](../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder. @@ -117,7 +117,7 @@ torchrun --nnodes 1 --nproc_per_node 4 recipes/quickstart/finetuning/finetuning ## Where to configure settings? -* [Training config file](../src/llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../src/llama_recipes/configs/) +* [Training config file](../llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../src/llama_recipes/configs/) It lets us specify the training settings for everything from `model_name` to `dataset_name`, `batch_size` and so on. Below is the list of supported settings: @@ -166,11 +166,11 @@ It lets us specify the training settings for everything from `model_name` to `da profiler_dir: str = "PATH/to/save/profiler/results" # will be used if using profiler ``` -* [Datasets config file](../src/llama_recipes/configs/datasets.py) provides the available options for datasets. +* [Datasets config file](../llama_recipes/configs/datasets.py) provides the available options for datasets. -* [peft config file](../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. +* [peft config file](../llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. -* [FSDP config file](../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as: +* [FSDP config file](../llama_recipes/configs/fsdp.py) provides FSDP settings such as: * `mixed_precision` boolean flag to specify using mixed precision, defatults to true. diff --git a/src/docs/single_gpu.md b/src/docs/single_gpu.md index 3f6834ef8..cff6b8399 100644 --- a/src/docs/single_gpu.md +++ b/src/docs/single_gpu.md @@ -35,9 +35,9 @@ The args used in the command above are: ## How to run with different datasets? -Currently 4 datasets are supported that can be found in [Datasets config file](../src/llama_recipes/configs/datasets.py). +Currently 4 datasets are supported that can be found in [Datasets config file](../llama_recipes/configs/datasets.py). -* `grammar_dataset` : use this [notebook](../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process theJfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `ft_dataset` folder. @@ -67,7 +67,7 @@ python -m llama_recipes.finetuning --use_peft --peft_method lora --quantization ## Where to configure settings? -* [Training config file](../src/llama_recipes/configs/training.py) is the main config file that help to specify the settings for our run can be found in +* [Training config file](../llama_recipes/configs/training.py) is the main config file that help to specify the settings for our run can be found in It let us specify the training settings, everything from `model_name` to `dataset_name`, `batch_size` etc. can be set here. Below is the list of supported settings: @@ -117,9 +117,10 @@ It let us specify the training settings, everything from `model_name` to `datase ``` -* [Datasets config file](../src/llama_recipes/configs/datasets.py) provides the available options for datasets. +* [Datasets config file](../llama_recipes/configs/datasets.py) + ../src/llama_recipes/configs/datasets.py) provides the available options for datasets. -* [peft config file](../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. +* [peft config file](../llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. ## FLOPS Counting and Pytorch Profiling From bd210b105df1b09922ebb8bea9fb81878e3fe658 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 17:41:07 -0800 Subject: [PATCH 16/23] Fix A LOT of links --- .../llamaindex/dlai_agentic_rag/README.md | 2 +- end-to-end-use-cases/RAFT-Chatbot/README.md | 4 ++-- end-to-end-use-cases/README.md | 2 +- .../benchmarks/inference/on_prem/README.md | 2 +- .../benchmarks/llm_eval_harness/meta_eval/README.md | 2 +- .../whatsapp_chatbot/whatsapp_llama3.md | 4 ++-- end-to-end-use-cases/multilingual/README.md | 2 +- getting-started/README.md | 4 ++-- getting-started/finetuning/README.md | 12 ++++++------ getting-started/finetuning/datasets/README.md | 8 ++++---- getting-started/finetuning/multigpu_finetuning.md | 8 ++++---- getting-started/finetuning/singlegpu_finetuning.md | 6 +++--- getting-started/inference/local_inference/README.md | 4 ++-- .../mobile_inference/android_inference/README.md | 2 +- src/docs/FAQ.md | 6 +++--- src/docs/multi_gpu.md | 4 ++-- 16 files changed, 36 insertions(+), 36 deletions(-) diff --git a/3p-integrations/llamaindex/dlai_agentic_rag/README.md b/3p-integrations/llamaindex/dlai_agentic_rag/README.md index deeee9a9c..b61a6b772 100644 --- a/3p-integrations/llamaindex/dlai_agentic_rag/README.md +++ b/3p-integrations/llamaindex/dlai_agentic_rag/README.md @@ -2,7 +2,7 @@ The folder here containts the Llama 3 ported notebooks of the DLAI short course [Building Agentic RAG with Llamaindex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/). -1. [Building Agentic RAG with Llamaindex L1 Router Engine](../../../quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb) shows how to implement a simple agentic RAG, a router that will pick up one of several query tools (question answering or summarization) to execute a query on a single document. Note this notebook is located in the `quickstart` folder. +1. [Building Agentic RAG with Llamaindex L1 Router Engine](../../../end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb) shows how to implement a simple agentic RAG, a router that will pick up one of several query tools (question answering or summarization) to execute a query on a single document. Note this notebook is located in the `quickstart` folder. 2. [Building Agentic RAG with Llamaindex L2 Tool Calling](Building_Agentic_RAG_with_Llamaindex_L2_Tool_Calling.ipynb) shows how to use Llama 3 to not only pick a function to execute, but also infer an argument to pass through the function. diff --git a/end-to-end-use-cases/RAFT-Chatbot/README.md b/end-to-end-use-cases/RAFT-Chatbot/README.md index 50356d509..b500944a2 100644 --- a/end-to-end-use-cases/RAFT-Chatbot/README.md +++ b/end-to-end-use-cases/RAFT-Chatbot/README.md @@ -124,7 +124,7 @@ export PATH_TO_RAFT_JSON=recipes/use_cases/end2end-recipes/raft/output/raft.json torchrun --nnodes 1 --nproc_per_node 4 recipes/quickstart/finetuning/finetuning.py --enable_fsdp --lr 1e-5 --context_length 8192 --num_epochs 1 --batch_size_training 1 --model_name meta-Llama/Meta-Llama-3-8B-Instruct --dist_checkpoint_root_folder $PATH_TO_ROOT_FOLDER --dist_checkpoint_folder fine-tuned --use_fast_kernels --dataset "custom_dataset" --custom_dataset.test_split "test" --custom_dataset.file "recipes/finetuning/datasets/raft_dataset.py" --use-wandb --run_validation True --custom_dataset.data_path $PATH_TO_RAFT_JSON ``` -For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../../quickstart/finetuning/multigpu_finetuning.md) in the finetuning recipe. +For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../getting-started/finetuning/multigpu_finetuning.md) in the finetuning recipe. Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using the following command: @@ -132,7 +132,7 @@ Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using t python src/llama_recipes/inference/checkpoint_converter_fsdp_hf.py --fsdp_checkpoint_path "$PATH_TO_ROOT_FOLDER/fine-tuned-meta-Llama/Meta-Llama-3-8B-Instruct" --consolidated_model_path "$PATH_TO_ROOT_FOLDER" ``` -For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../../quickstart/inference/local_inference/README.md) in the inference/local_inference recipe. +For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../getting-started/finetuning/multigpu_finetuning.md) in the inference/local_inference recipe. ## Evaluation Steps Once we have the RAFT model, we need to evaluate its performance. In this tutorial, we'll not only use traditional evaluation methods (e.g., calculating exact match rate or ROUGE score) but also use LLM as a judge to score model-generated answers. diff --git a/end-to-end-use-cases/README.md b/end-to-end-use-cases/README.md index f5c66a0b8..24b868c5e 100644 --- a/end-to-end-use-cases/README.md +++ b/end-to-end-use-cases/README.md @@ -18,7 +18,7 @@ This demo app shows how to use LangChain and Llama 3 to let users ask questions ## [NotebookLlama](./NotebookLlama/): PDF to Podcast using Llama Models Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast -## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p_integrations/octoai/live_data.ipynb)) +## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3pintegrations/octoai/live_data.ipynb)) This demo app shows how to perform live data augmented generation tasks with Llama 3, [LlamaIndex](https://github.com/run-llama/llama_index), another leading open-source framework for building LLM apps, and the [Tavily](https://tavily.com) live search API. ## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md): Building a Llama 3 Enabled WhatsApp Chatbot diff --git a/end-to-end-use-cases/benchmarks/inference/on_prem/README.md b/end-to-end-use-cases/benchmarks/inference/on_prem/README.md index afffd6ee5..f9d7c02fc 100644 --- a/end-to-end-use-cases/benchmarks/inference/on_prem/README.md +++ b/end-to-end-use-cases/benchmarks/inference/on_prem/README.md @@ -7,7 +7,7 @@ We support benchmark on these serving framework: # vLLM - Getting Started -To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../recipes/3p_integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem. +To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../3p-integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem. Note that in common scenario which overall throughput is important, we suggest you prioritize deploying as many model replicas as possible to reach higher overall throughput and request-per-second (RPS), comparing to deploy one model container among multiple GPUs for model parallelism. Additionally, as deploying multiple model replicas, there is a need for a higher level wrapper to handle the load balancing which here has been simulated in the benchmark scripts. For example, we have an instance from Azure that has 8xA100 80G GPUs, and we want to deploy the Meta Llama 3 70B instruct model, which is around 140GB with FP16. So for deployment we can do: diff --git a/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md index 643197849..edf27bc6d 100644 --- a/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md +++ b/end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md @@ -104,7 +104,7 @@ lm_eval --model vllm --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,te **NOTE**: As for `add_bos_token=True`, since our prompts in the evals dataset has already included all the special tokens required by instruct model, such as `<|start_header_id|>user<|end_header_id|>`, we will not use `--apply_chat_template` argument for instruct models anymore. However, we need to use `add_bos_token=True` flag to add the BOS_token back during VLLM inference, as the BOS_token is removed by default in [this PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1465). -**NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://huggingface.co/datasets/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file. +**NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://www.oxen.ai/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file. Moreover, we have modified this [math_hard/utils.py](./meta_template/math_hard/utils.py) to address two issues: diff --git a/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md index 9b022785b..8d1c136d1 100644 --- a/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md +++ b/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md @@ -10,7 +10,7 @@ Businesses of all sizes can use the [WhatsApp Business API](https://developers.f The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server. -![](../../../../docs/img/whatsapp_llama_arch.jpg) +![](../../../src/docs/img/whatsapp_llama_arch.jpg) ## Getting Started with WhatsApp Business Cloud API @@ -25,7 +25,7 @@ For the last step, you need to further follow the [Sample Callback URL for Webho Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp. -![](../../../../docs/img/whatsapp_dashboard.jpg) +![](../../../src/docs/img/whatsapp_dashboard.jpg) Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later. diff --git a/end-to-end-use-cases/multilingual/README.md b/end-to-end-use-cases/multilingual/README.md index e8a678b3f..662f7c50b 100644 --- a/end-to-end-use-cases/multilingual/README.md +++ b/end-to-end-use-cases/multilingual/README.md @@ -119,7 +119,7 @@ phase2_ds.save_to_disk("data/phase2") ``` ### Train -Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`. +Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`. OpenHathi was trained on 64 A100 80GB GPUs. Here are the hyperparameters used and other training details: - maximum learning rate: 2e-4 diff --git a/getting-started/README.md b/getting-started/README.md index 523135c0b..c09ac3d53 100644 --- a/getting-started/README.md +++ b/getting-started/README.md @@ -5,6 +5,6 @@ If you are new to developing with Meta Llama models, this is where you should st * The [Build_with_Llama 3.2](./build_with_Llama_3_2.ipynb) notebook showcases a comprehensive walkthrough of the new capabilities of Llama 3.2 models, including multimodal use cases, function/tool calling, Llama Stack, and Llama on edge. * The [Running_Llama_Anywhere](./Running_Llama3_Anywhere/) notebooks demonstrate how to run Llama inference across Linux, Mac and Windows platforms using the appropriate tooling. * The [Prompt_Engineering_with_Llama](./Prompt_Engineering_with_Llama_3.ipynb) notebook showcases the various ways to elicit appropriate outputs from Llama. Take this notebook for a spin to get a feel for how Llama responds to different inputs and generation parameters. -* The [inference](./inference/) folder contains scripts to deploy Llama for inference on server and mobile. See also [3p_integrations/vllm](../3p_integrations/vllm/) and [3p_integrations/tgi](../3p_integrations/tgi/) for hosting Llama on open-source model servers. +* The [inference](./inference/) folder contains scripts to deploy Llama for inference on server and mobile. See also [3p_integrations/vllm](../3p-integrations/vllm/) and [3p_integrations/tgi](../3p-integrations/tgi/) for hosting Llama on open-source model servers. * The [RAG](./RAG/) folder contains a simple Retrieval-Augmented Generation application using Llama. -* The [finetuning](./finetuning/) folder contains resources to help you finetune Llama on your custom datasets, for both single- and multi-GPU setups. The scripts use the native llama-recipes finetuning code found in [finetuning.py](../../src/llama_recipes/finetuning.py) which supports these features: +* The [finetuning](./finetuning/) folder contains resources to help you finetune Llama on your custom datasets, for both single- and multi-GPU setups. The scripts use the native llama-recipes finetuning code found in [finetuning.py](../src/llama_recipes/finetuning.py) which supports these features: diff --git a/getting-started/finetuning/README.md b/getting-started/finetuning/README.md index 46d58aa6c..ca2b67578 100644 --- a/getting-started/finetuning/README.md +++ b/getting-started/finetuning/README.md @@ -6,7 +6,7 @@ This folder contains instructions to fine-tune Meta Llama 3 on a * [single-GPU setup](./singlegpu_finetuning.md) * [multi-GPU setup](./multigpu_finetuning.md) -using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package. +using the canonical [finetuning script](../../src/llama_recipes/finetuning.py) in the llama-recipes package. If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuning_overview.md). @@ -17,10 +17,10 @@ If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuni ## How to configure finetuning settings? > [!TIP] -> All the setting defined in [config files](../../../src/llama_recipes/configs/) can be passed as args through CLI when running the script, there is no need to change from config files directly. +> All the setting defined in [config files](../../src/llama_recipes/configs/) can be passed as args through CLI when running the script, there is no need to change from config files directly. -* [Training config file](../../../src/llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../../../src/llama_recipes/configs/) +* [Training config file](../../src/llama_recipes/configs/training.py) is the main config file that helps to specify the settings for our run and can be found in [configs folder](../../src/llama_recipes/configs/) It lets us specify the training settings for everything from `model_name` to `dataset_name`, `batch_size` and so on. Below is the list of supported settings: @@ -71,11 +71,11 @@ It lets us specify the training settings for everything from `model_name` to `da ``` -* [Datasets config file](../../../src/llama_recipes/configs/datasets.py) provides the available options for datasets. +* [Datasets config file](../../src/llama_recipes/configs/datasets.py) provides the available options for datasets. -* [peft config file](../../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. We currently support LoRA and Llama-Adapter. Please note that LoRA is the only technique which is supported in combination with FSDP. +* [peft config file](../../src/llama_recipes/configs/peft.py) provides the supported PEFT methods and respective settings that can be modified. We currently support LoRA and Llama-Adapter. Please note that LoRA is the only technique which is supported in combination with FSDP. -* [FSDP config file](../../../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as: +* [FSDP config file](../../src/llama_recipes/configs/fsdp.py) provides FSDP settings such as: * `mixed_precision` boolean flag to specify using mixed precision, defatults to true. diff --git a/getting-started/finetuning/datasets/README.md b/getting-started/finetuning/datasets/README.md index 8795ca96d..3543ee776 100644 --- a/getting-started/finetuning/datasets/README.md +++ b/getting-started/finetuning/datasets/README.md @@ -48,17 +48,17 @@ python -m llama_recipes.finetuning --dataset "custom_dataset" --custom_dataset.f This will call the function `get_foo` instead of `get_custom_dataset` when retrieving the dataset. ### Adding new dataset -Each dataset has a corresponding configuration (dataclass) in [configs/datasets.py](../../../../src/llama_recipes/configs/datasets.py) which contains the dataset name, training/validation split names, as well as optional parameters like datafiles etc. +Each dataset has a corresponding configuration (dataclass) in [configs/datasets.py](../../../src/llama_recipes/configs/datasets.py) which contains the dataset name, training/validation split names, as well as optional parameters like datafiles etc. -Additionally, there is a preprocessing function for each dataset in the [datasets](../../../../src/llama_recipes/datasets) folder. +Additionally, there is a preprocessing function for each dataset in the [datasets](../../../src/llama_recipes/datasets) folder. The returned data of the dataset needs to be consumable by the forward method of the fine-tuned model by calling ```model(**data)```. For CausalLM models this usually means that the data needs to be in the form of a dictionary with "input_ids", "attention_mask" and "labels" fields. To add a custom dataset the following steps need to be performed. -1. Create a dataset configuration after the schema described above. Examples can be found in [configs/datasets.py](../../../../src/llama_recipes/configs/datasets.py). +1. Create a dataset configuration after the schema described above. Examples can be found in [configs/datasets.py](../../../src/llama_recipes/configs/datasets.py). 2. Create a preprocessing routine which loads the data and returns a PyTorch style dataset. The signature for the preprocessing function needs to be (dataset_config, tokenizer, split_name) where split_name will be the string for train/validation split as defined in the dataclass. -3. Register the dataset name and preprocessing function by inserting it as key and value into the DATASET_PREPROC dictionary in [datasets/__init__.py](../../../../src/llama_recipes/datasets/__init__.py) +3. Register the dataset name and preprocessing function by inserting it as key and value into the DATASET_PREPROC dictionary in [datasets/__init__.py](../../../src/llama_recipes/datasets/__init__.py) 4. Set dataset field in training config to dataset name or use --dataset option of the `llama_recipes.finetuning` module or examples/finetuning.py training script. ## Application diff --git a/getting-started/finetuning/multigpu_finetuning.md b/getting-started/finetuning/multigpu_finetuning.md index 0dbf99b8f..43a818d18 100644 --- a/getting-started/finetuning/multigpu_finetuning.md +++ b/getting-started/finetuning/multigpu_finetuning.md @@ -96,14 +96,14 @@ srun torchrun --nproc_per_node 8 --rdzv_id $RANDOM --rdzv_backend c10d --rdzv_e Do not forget to adjust the number of nodes, ntasks and gpus-per-task in the top. ## Running with different datasets -Currently 3 open source datasets are supported that can be found in [Datasets config file](../../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). +Currently 3 open source datasets are supported that can be found in [Datasets config file](../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). -* `grammar_dataset` : use this [notebook](../../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder. ```bash -wget -P ../../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json +wget -P ../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json ``` * `samsum_dataset` @@ -132,7 +132,7 @@ In case you are dealing with slower interconnect network between nodes, to reduc HSDP (Hybrid sharding Data Parallel) helps to define a hybrid sharding strategy where you can have FSDP within `sharding_group_size` which can be the minimum number of GPUs you can fit your model and DDP between the replicas of the model specified by `replica_group_size`. -This will require to set the Sharding strategy in [fsdp config](../../../src/llama_recipes/configs/fsdp.py) to `ShardingStrategy.HYBRID_SHARD` and specify two additional settings, `sharding_group_size` and `replica_group_size` where former specifies the sharding group size, number of GPUs that you model can fit into to form a replica of a model and latter specifies the replica group size, which is world_size/sharding_group_size. +This will require to set the Sharding strategy in [fsdp config](../../src/llama_recipes/configs/fsdp.py) to `ShardingStrategy.HYBRID_SHARD` and specify two additional settings, `sharding_group_size` and `replica_group_size` where former specifies the sharding group size, number of GPUs that you model can fit into to form a replica of a model and latter specifies the replica group size, which is world_size/sharding_group_size. ```bash diff --git a/getting-started/finetuning/singlegpu_finetuning.md b/getting-started/finetuning/singlegpu_finetuning.md index 1b054be18..8ab3d8a98 100644 --- a/getting-started/finetuning/singlegpu_finetuning.md +++ b/getting-started/finetuning/singlegpu_finetuning.md @@ -1,7 +1,7 @@ # Fine-tuning with Single GPU This recipe steps you through how to finetune a Meta Llama 3 model on the text summarization task using the [samsum](https://huggingface.co/datasets/samsum) dataset on a single GPU. -These are the instructions for using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package. +These are the instructions for using the canonical [finetuning script](../../src/llama_recipes/finetuning.py) in the llama-recipes package. ## Requirements @@ -35,13 +35,13 @@ The args used in the command above are: Currently 3 open source datasets are supported that can be found in [Datasets config file](../../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). -* `grammar_dataset` : use this [notebook](../../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. +* `grammar_dataset` : use this [notebook](../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. * `alpaca_dataset` : to get this open source data please download the `alpaca.json` to `dataset` folder. ```bash -wget -P ../../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json +wget -P ../../src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json ``` * `samsum_dataset` diff --git a/getting-started/inference/local_inference/README.md b/getting-started/inference/local_inference/README.md index 8e27304a2..a31ee9522 100644 --- a/getting-started/inference/local_inference/README.md +++ b/getting-started/inference/local_inference/README.md @@ -105,7 +105,7 @@ python inference.py --model_name --peft_model --prompt_file Date: Thu, 9 Jan 2025 17:53:37 -0800 Subject: [PATCH 17/23] Fix some more links --- end-to-end-use-cases/RAFT-Chatbot/README.md | 2 +- .../messenger_chatbot/messenger_llama3.md | 4 ++-- getting-started/README.md | 1 - getting-started/finetuning/singlegpu_finetuning.md | 4 ++-- getting-started/inference/README.md | 4 +--- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/end-to-end-use-cases/RAFT-Chatbot/README.md b/end-to-end-use-cases/RAFT-Chatbot/README.md index b500944a2..2f5160da6 100644 --- a/end-to-end-use-cases/RAFT-Chatbot/README.md +++ b/end-to-end-use-cases/RAFT-Chatbot/README.md @@ -236,7 +236,7 @@ Once we evaluated and refined our RAFT model, we can deploy it locally to intera python recipes/inference/local_inference/inference.py --model_name raft-8b ``` -For more details,please check [local_inference recipe](../../../quickstart/inference/local_inference/README.md) +For more details,please check [local_inference recipe](../../getting-started/inference/local_inference/README.md) ## Acknowledgement diff --git a/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md index a410a5647..f085a9911 100644 --- a/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md +++ b/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md @@ -10,7 +10,7 @@ Messenger from Meta is a messaging service that allows a Facebook business page The diagram below shows the components and overall data flow of the Llama 3 enabled Messenger chatbot demo we built, using an Amazon EC2 instance as an example for running the web server. -![](../../../../docs/img/messenger_llama_arch.jpg) +![](../../../src/docs/img/messenger_llama_arch.jpg) ## Getting Started with Messenger Platform @@ -24,7 +24,7 @@ The diagram below shows the components and overall data flow of the Llama 3 enab 5. Open Messenger's API Settings, as shown in the screenshot below, then in "1. Configure webhooks", set the Callback URL and Verify Token set up in the previous step, and subscribe all message related fields for "Webhook Fields". Finally, in "2. Generate access tokens", connect your Facebook page (see step 1) and copy your page access token for later use. -![](../../../../docs/img/messenger_api_settings.png) +![](../../../src/docs/img/messenger_api_settings.png) ## Writing Llama 3 Enabled Web App diff --git a/getting-started/README.md b/getting-started/README.md index c09ac3d53..bfde987b8 100644 --- a/getting-started/README.md +++ b/getting-started/README.md @@ -3,7 +3,6 @@ If you are new to developing with Meta Llama models, this is where you should start. This folder contains introductory-level notebooks across different techniques relating to Meta Llama. * The [Build_with_Llama 3.2](./build_with_Llama_3_2.ipynb) notebook showcases a comprehensive walkthrough of the new capabilities of Llama 3.2 models, including multimodal use cases, function/tool calling, Llama Stack, and Llama on edge. -* The [Running_Llama_Anywhere](./Running_Llama3_Anywhere/) notebooks demonstrate how to run Llama inference across Linux, Mac and Windows platforms using the appropriate tooling. * The [Prompt_Engineering_with_Llama](./Prompt_Engineering_with_Llama_3.ipynb) notebook showcases the various ways to elicit appropriate outputs from Llama. Take this notebook for a spin to get a feel for how Llama responds to different inputs and generation parameters. * The [inference](./inference/) folder contains scripts to deploy Llama for inference on server and mobile. See also [3p_integrations/vllm](../3p-integrations/vllm/) and [3p_integrations/tgi](../3p-integrations/tgi/) for hosting Llama on open-source model servers. * The [RAG](./RAG/) folder contains a simple Retrieval-Augmented Generation application using Llama. diff --git a/getting-started/finetuning/singlegpu_finetuning.md b/getting-started/finetuning/singlegpu_finetuning.md index 8ab3d8a98..80689d4ea 100644 --- a/getting-started/finetuning/singlegpu_finetuning.md +++ b/getting-started/finetuning/singlegpu_finetuning.md @@ -6,7 +6,7 @@ These are the instructions for using the canonical [finetuning script](../../src ## Requirements -Ensure that you have installed the llama-recipes package ([details](../../../README.md#installing)). +Ensure that you have installed the llama-recipes package. To run fine-tuning on a single GPU, we will make use of two packages: 1. [PEFT](https://github.com/huggingface/peft) to use parameter-efficient finetuning. @@ -33,7 +33,7 @@ The args used in the command above are: ### How to run with different datasets? -Currently 3 open source datasets are supported that can be found in [Datasets config file](../../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). +Currently 3 open source datasets are supported that can be found in [Datasets config file](../../src/llama_recipes/configs/datasets.py). You can also use your custom dataset (more info [here](./datasets/README.md)). * `grammar_dataset` : use this [notebook](../../src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) to pull and process the Jfleg and C4 200M datasets for grammar checking. diff --git a/getting-started/inference/README.md b/getting-started/inference/README.md index 7f48aba70..afa6ffdf3 100644 --- a/getting-started/inference/README.md +++ b/getting-started/inference/README.md @@ -2,7 +2,5 @@ This folder contains scripts to get you started with inference on Meta Llama models. -* [Code Llama](./code_llama/) contains scripts for tasks relating to code generation using CodeLlama * [Local Inference](./local_inference/) contains scripts to do memory efficient inference on servers and local machines -* [Mobile Inference](./mobile_inference/) has scripts using MLC to serve Llama on Android (h/t to OctoAI for the contribution!) -* [Model Update Example](./modelUpgradeExample.py) shows an example of replacing a Llama 3 model with a Llama 3.1 model. \ No newline at end of file +* [Mobile Inference](./mobile_inference/) has scripts using MLC to serve Llama on Android (h/t to OctoAI for the contribution!) \ No newline at end of file From 5112b86219af0ce305978351fb00e71d1cf30f69 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 18:04:13 -0800 Subject: [PATCH 18/23] Fix even more links :) --- end-to-end-use-cases/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/end-to-end-use-cases/README.md b/end-to-end-use-cases/README.md index 24b868c5e..653088010 100644 --- a/end-to-end-use-cases/README.md +++ b/end-to-end-use-cases/README.md @@ -9,16 +9,16 @@ The scripts apply a throughput analysis and introduction to `lm-evaluation-harne ## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications. -## [VideoSummary](video_summary.ipynb): Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../3p_integrations/octoai/video_summary.ipynb)) +## [VideoSummary](video_summary.ipynb): Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../3p-integrations/octoai/video_summary.ipynb)) This demo app uses Llama 3 to return a text summary of a YouTube video. It shows how to retrieve the caption of a YouTube video and how to ask Llama to summarize the content in different ways, from the simplest naive way that works for short text to more advanced methods of using LangChain's map_reduce and refine to overcome the 8K context length limit of Llama 3. -## [NBA2023-24](./coding/text2sql/structured_llama.ipynb): Ask Llama 3 about Structured Data +## [NBA2023-24](./coding/text2sql/quickstart.ipynb): Ask Llama 3 about Structured Data This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players. ## [NotebookLlama](./NotebookLlama/): PDF to Podcast using Llama Models Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast -## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3pintegrations/octoai/live_data.ipynb)) +## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p-integrations/octoai/live_data.ipynb)) This demo app shows how to perform live data augmented generation tasks with Llama 3, [LlamaIndex](https://github.com/run-llama/llama_index), another leading open-source framework for building LLM apps, and the [Tavily](https://tavily.com) live search API. ## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md): Building a Llama 3 Enabled WhatsApp Chatbot @@ -27,7 +27,7 @@ This step-by-step tutorial shows how to use the [WhatsApp Business API](https:// ## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md): Building a Llama 3 Enabled Messenger Chatbot This step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot. -### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) or on [OctoAI](../3p_integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb)) +### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) or on [OctoAI](../3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb)) A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG). You can run Llama2 locally if you have a good enough GPU or on OctoAI if you follow the note [here](../README.md#octoai_note). ## [Sales Bot](./customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb): Sales Bot with Llama3 - A Summarization and RAG Use Case From 3328d727556a34aad9dd91045bc9b44f4a41d09c Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Thu, 9 Jan 2025 18:09:48 -0800 Subject: [PATCH 19/23] refactor --- src/dev_requirements.txt => dev_requirements.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/dev_requirements.txt => dev_requirements.txt (100%) diff --git a/src/dev_requirements.txt b/dev_requirements.txt similarity index 100% rename from src/dev_requirements.txt rename to dev_requirements.txt From 84c4def4ef903202470cb033c481827c59a12cf9 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 13 Jan 2025 14:33:45 -0800 Subject: [PATCH 20/23] move files --- README.md | 15 +- getting-started/Getting_to_know_Llama.ipynb | 1150 ----------------- ...nb => Prompt_Engineering_with_Llama.ipynb} | 34 +- 3 files changed, 11 insertions(+), 1188 deletions(-) delete mode 100644 getting-started/Getting_to_know_Llama.ipynb rename getting-started/{Prompt_Engineering_with_Llama_3.ipynb => Prompt_Engineering_with_Llama.ipynb} (95%) diff --git a/README.md b/README.md index c264d4145..b0d14cd73 100644 --- a/README.md +++ b/README.md @@ -7,15 +7,14 @@ Welcome to the official repository for helping you get started with [inference]( The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. > [!TIP] -> Repository Structure: -> * [Start building with the Llama 3.2 models](./getting-started/) -> * [End to End Use cases with Llama model family](./end-to-end-use-cases) -> * [Examples of building with 3rd Party Llama Providers](./3p-integrations) -> [!TIP] -> Get started with Llama 3.2 with these new recipes: -> * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md) +> Popular getting started links: +> * [Build with Llama Notebook](./getting-started/build_with_Llama_3_2.ipynb) > * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/llama_guard_text_and_vision_inference.ipynb) +> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/ + +> [!TIP] +> Popular end to end recipes: +> * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md)llama_guard_text_and_vision_inference.ipynb) > [!NOTE] > Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. diff --git a/getting-started/Getting_to_know_Llama.ipynb b/getting-started/Getting_to_know_Llama.ipynb deleted file mode 100644 index caecb672e..000000000 --- a/getting-started/Getting_to_know_Llama.ipynb +++ /dev/null @@ -1,1150 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "RJSnI0Xy-kCm" - }, - "source": [ - "![Meta---Logo@1x.jpg]()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LERqQn5v8-ak" - }, - "source": [ - "# **Getting to know Llama 3.1: Everything you need to start building**\n", - "Our goal in this session is to provide a guided tour of Llama 3.1 with comparison with Llama 2, including understanding different Llama 3.1 models, how and where to access them, Generative AI and Chatbot architectures, prompt engineering, RAG (Retrieval Augmented Generation), Fine-tuning and more. All this is implemented with a starter code for you to take it and use it in your Llama 3.1 projects." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ioVMNcTesSEk" - }, - "source": [ - "### **0 - Prerequisites**\n", - "* Basic understanding of Large Language Models\n", - "* Basic understanding of Python" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install matplotlib ipywidgets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "executionInfo": { - "elapsed": 248, - "status": "ok", - "timestamp": 1695832228254, - "user": { - "displayName": "Amit Sangani", - "userId": "11552178012079240149" - }, - "user_tz": 420 - }, - "id": "ktEA7qXmwdUM" - }, - "outputs": [], - "source": [ - "# presentation layer code\n", - "\n", - "import base64\n", - "from IPython.display import Image, display\n", - "import matplotlib.pyplot as plt\n", - "\n", - "def mm(graph):\n", - " graphbytes = graph.encode(\"ascii\")\n", - " base64_bytes = base64.b64encode(graphbytes)\n", - " base64_string = base64_bytes.decode(\"ascii\")\n", - " display(Image(url=\"https://mermaid.ink/img/\" + base64_string))\n", - "\n", - "def genai_app_arch():\n", - " mm(\"\"\"\n", - " flowchart TD\n", - " A[Users] --> B(Applications e.g. mobile, web)\n", - " B --> |Hosted API|C(Platforms e.g. Custom, HuggingFace, Replicate)\n", - " B -- optional --> E(Frameworks e.g. LangChain)\n", - " C-->|User Input|D[Llama 3]\n", - " D-->|Model Output|C\n", - " E --> C\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def rag_arch():\n", - " mm(\"\"\"\n", - " flowchart TD\n", - " A[User Prompts] --> B(Frameworks e.g. LangChain)\n", - " B <--> |Database, Docs, XLS|C[fa:fa-database External Data]\n", - " B -->|API|D[Llama 3]\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def llama2_family():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " llama-2 --> llama-2-7b\n", - " llama-2 --> llama-2-13b\n", - " llama-2 --> llama-2-70b\n", - " llama-2-7b --> llama-2-7b-chat\n", - " llama-2-13b --> llama-2-13b-chat\n", - " llama-2-70b --> llama-2-70b-chat\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def llama3_family():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " llama-3 --> llama-3-8b\n", - " llama-3 --> llama-3-70b\n", - " llama-3-8b --> llama-3-8b\n", - " llama-3-8b --> llama-3-8b-instruct\n", - " llama-3-70b --> llama-3-70b\n", - " llama-3-70b --> llama-3-70b-instruct\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - " \n", - "def llama3_1_family():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " llama-3-1 --> llama-3-8b\n", - " llama-3-1 --> llama-3-70b\n", - " llama-3-1 --> llama-3-4050b\n", - " llama-3-1-8b --> llama-3-1-8b\n", - " llama-3-1-8b --> llama-3-1-8b-instruct\n", - " llama-3-1-70b --> llama-3-1-70b\n", - " llama-3-1-70b --> llama-3-1-70b-instruct\n", - " llama-3-1-405b --> llama-3-1-405b-instruct\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "import ipywidgets as widgets\n", - "from IPython.display import display, Markdown\n", - "\n", - "# Create a text widget\n", - "API_KEY = widgets.Password(\n", - " value='',\n", - " placeholder='',\n", - " description='API_KEY:',\n", - " disabled=False\n", - ")\n", - "\n", - "def md(t):\n", - " display(Markdown(t))\n", - "\n", - "def bot_arch():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " user --> prompt\n", - " prompt --> i_safety\n", - " i_safety --> context\n", - " context --> Llama_3\n", - " Llama_3 --> output\n", - " output --> o_safety\n", - " i_safety --> memory\n", - " o_safety --> memory\n", - " memory --> context\n", - " o_safety --> user\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def fine_tuned_arch():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " Custom_Dataset --> Pre-trained_Llama\n", - " Pre-trained_Llama --> Fine-tuned_Llama\n", - " Fine-tuned_Llama --> RLHF\n", - " RLHF --> |Loss:Cross-Entropy|Fine-tuned_Llama\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def load_data_faiss_arch():\n", - " mm(\"\"\"\n", - " graph LR;\n", - " documents --> textsplitter\n", - " textsplitter --> embeddings\n", - " embeddings --> vectorstore\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n", - "\n", - "def mem_context():\n", - " mm(\"\"\"\n", - " graph LR\n", - " context(text)\n", - " user_prompt --> context\n", - " instruction --> context\n", - " examples --> context\n", - " memory --> context\n", - " context --> tokenizer\n", - " tokenizer --> embeddings\n", - " embeddings --> LLM\n", - " classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;\n", - " \"\"\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i4Np_l_KtIno" - }, - "source": [ - "### **1 - Understanding Llama 3.1**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PGPSI3M5PGTi" - }, - "source": [ - "### **1.1 - What is Llama 3.1?**\n", - "\n", - "* State of the art (SOTA), Open Source LLM\n", - "* 8B, 70B, 405B - base and instruct models\n", - "* Choosing model: Size, Quality, Cost, Speed\n", - "* Pretrained + Chat\n", - "* [Meta Llama 3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)\n", - "* [Getting Started with Meta Llama](https://llama.meta.com/docs/get-started)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 240 - }, - "executionInfo": { - "elapsed": 248, - "status": "ok", - "timestamp": 1695832233087, - "user": { - "displayName": "Amit Sangani", - "userId": "11552178012079240149" - }, - "user_tz": 420 - }, - "id": "OXRCC7wexZXd", - "outputId": "1feb1918-df4b-4cec-d09e-ffe55c12090b" - }, - "outputs": [], - "source": [ - "llama2_family()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llama3_family()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llama3_1_family()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aYeHVVh45bdT" - }, - "source": [ - "### **1.2 - Accessing Llama 3.1**\n", - "* Download + Self Host (i.e. [download Llama](https://ai.meta.com/resources/models-and-libraries/llama-downloads))\n", - "* Hosted API Platform (e.g. [Groq](https://console.groq.com/), [Replicate](https://replicate.com/meta/meta-llama-3-8b-instruct), [Together](https://api.together.xyz/playground/language/meta-llama/Llama-3-8b-hf), [Anyscale](https://app.endpoints.anyscale.com/playground))\n", - "\n", - "* Hosted Container Platform (e.g. [Azure](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/introducing-llama-2-on-azure/ba-p/3881233), [AWS](https://aws.amazon.com/blogs/machine-learning/llama-2-foundation-models-from-meta-are-now-available-in-amazon-sagemaker-jumpstart/), [GCP](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/139))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kBuSay8vtzL4" - }, - "source": [ - "### **1.3 - Use Cases of Llama 3.1**\n", - "* Content Generation\n", - "* Summarization\n", - "* General Chatbots\n", - "* RAG (Retrieval Augmented Generation): Chat about Your Own Data\n", - "* Fine-tuning\n", - "* Agents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sd54g0OHuqBY" - }, - "source": [ - "## **2 - Using and Comparing Llama 3 and Llama 2**\n", - "\n", - "We will be using Llama 2 7b & 70b chat and Llama 3 8b & 70b instruct models hosted on [Replicate](https://replicate.com/search?query=llama) to run the examples here. You will need to first sign in with Replicate with your github account, then create a free API token [here](https://replicate.com/account/api-tokens) that you can use for a while. You can also use other Llama 3 cloud providers such as [Groq](https://console.groq.com/), [Together](https://api.together.xyz/playground/language/meta-llama/Llama-3-8b-hf), or [Anyscale](https://app.endpoints.anyscale.com/playground).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h3YGMDJidHtH" - }, - "source": [ - "### **2.1 - Install dependencies**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VhN6hXwx7FCp" - }, - "outputs": [], - "source": [ - "!pip install replicate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### **2.2 - Create helpers for Llama 2 and Llama 3**\n", - "First, set your Replicate API token as environment variables.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8hkWpqWD28ho" - }, - "outputs": [], - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "REPLICATE_API_TOKEN = getpass()\n", - "\n", - "os.environ[\"REPLICATE_API_TOKEN\"] = REPLICATE_API_TOKEN" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 instruct and Llama 2 chat models, not the base models." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bVCHZmETk36v" - }, - "outputs": [], - "source": [ - "import replicate\n", - "\n", - "def llama2_7b(prompt):\n", - " output = replicate.run(\n", - " \"meta/llama-2-7b-chat\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)\n", - "\n", - "def llama2_70b(prompt):\n", - " output = replicate.run(\n", - " \"meta/llama-2-70b-chat\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)\n", - "\n", - "def llama3_8b(prompt):\n", - " output = replicate.run(\n", - " \"meta/meta-llama-3-8b-instruct\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)\n", - "\n", - "def llama3_70b(prompt):\n", - " output = replicate.run(\n", - " \"meta/meta-llama-3-70b-instruct\",\n", - " input={\"prompt\": prompt}\n", - " )\n", - " return ''.join(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5Jxq0pmf6L73" - }, - "source": [ - "### **2.3 - Basic QA with Llama 2 and 3**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H93zZBIk6tNU" - }, - "outputs": [], - "source": [ - "prompt = \"The typical color of a llama is: \"\n", - "output = llama2_7b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama2_7b(\"The typical color of a llama is what? Answer in one word.\")\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(\"The typical color of a llama is what? Answer in one word.\")\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 follows instructions better than Llama 2 in single-turn chat.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cWs_s9y-avIT" - }, - "source": [ - "## **3 - Chat conversation**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r4DyTLD5ys6t" - }, - "source": [ - "### **3.1 - Single-turn chat**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EMM_egWMys6u" - }, - "outputs": [], - "source": [ - "prompt_chat = \"What is the average lifespan of a Llama? Answer the question in few words.\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sZ7uVKDYucgi" - }, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WQl3wmfbyBQ1" - }, - "outputs": [], - "source": [ - "# example without previous context. LLM's are stateless and cannot understand \"they\" without previous context\n", - "prompt_chat = \"What animal family are they? Answer the question in few words.\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama2_70b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_70b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 70b doesn't hallucinate.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### **3.2 - Multi-turn chat**\n", - "Chat app requires us to send in previous context to LLM to get in valid responses. Below is an example of Multi-turn chat." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "t7SZe5fT3HG3" - }, - "outputs": [], - "source": [ - "# example of multi-turn chat, with storing previous context\n", - "prompt_chat = \"\"\"\n", - "User: What is the average lifespan of a Llama?\n", - "Assistant: 15-20 years.\n", - "User: What animal family are they?\n", - "\"\"\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 2 and 3 both behave well for using the chat history for follow up questions.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### **3.3 - Multi-turn chat with more instruction**\n", - "Adding the instructon \"Answer the question with one word\" to see the difference of Llama 2 and 3." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# example of multi-turn chat, with storing previous context\n", - "prompt_chat = \"\"\"\n", - "User: What is the average lifespan of a Llama?\n", - "Assistant: Sure! The average lifespan of a llama is around 20-30 years.\n", - "User: What animal family are they?\n", - "\n", - "Answer the question with one word.\n", - "\"\"\"\n", - "output = llama2_7b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama2_70b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_8b(prompt_chat)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Both Llama 3 8b and Llama 2 70b follows instructions (e.g. \"Answer the question with one word\") better than Llama 2 7b in multi-turn chat.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "moXnmJ_xyD10" - }, - "source": [ - "### **4.2 - Prompt Engineering**\n", - "* Prompt engineering refers to the science of designing effective prompts to get desired responses\n", - "\n", - "* Helps reduce hallucination\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t-v-FeZ4ztTB" - }, - "source": [ - "#### **4.2.1 - In-Context Learning (e.g. Zero-shot, Few-shot)**\n", - " * In-context learning - specific method of prompt engineering where demonstration of task are provided as part of prompt.\n", - " 1. Zero-shot learning - model is performing tasks without any\n", - "input examples.\n", - " 2. Few or “N-Shot” Learning - model is performing and behaving based on input examples in user's prompt." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6W71MFNZyRkQ" - }, - "outputs": [], - "source": [ - "# Zero-shot example. To get positive/negative/neutral sentiment, we need to give examples in the prompt\n", - "prompt = '''\n", - "Classify: I saw a Gecko.\n", - "Sentiment: ?\n", - "\n", - "Give one word response.\n", - "'''\n", - "output = llama2_7b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MCQRjf1Y1RYJ" - }, - "outputs": [], - "source": [ - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 has different opinions than Llama 2.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8UmdlTmpDZxA" - }, - "outputs": [], - "source": [ - "# By giving examples to Llama, it understands the expected output format.\n", - "\n", - "prompt = '''\n", - "Classify: I love Llamas!\n", - "Sentiment: Positive\n", - "Classify: I dont like Snakes.\n", - "Sentiment: Negative\n", - "Classify: I saw a Gecko.\n", - "Sentiment:\n", - "\n", - "Give one word response.\n", - "'''\n", - "\n", - "output = llama2_7b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "M_EcsUo1zqFD" - }, - "outputs": [], - "source": [ - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 2, with few shots, has the same output \"Neutral\" as Llama 3, but Llama 2 doesn't follow instructions (Give one word response) well.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mbr124Y197xl" - }, - "source": [ - "#### **4.2.2 - Chain of Thought**\n", - "\"Chain of thought\" enables complex reasoning through logical step by step thinking and generates meaningful and contextually relevant responses." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Xn8zmLBQzpgj" - }, - "outputs": [], - "source": [ - "# Standard prompting\n", - "prompt = '''\n", - "Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.\n", - "How many tennis balls does Llama have?\n", - "\n", - "Answer in one word.\n", - "'''\n", - "\n", - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lKNOj79o1Kwu" - }, - "outputs": [], - "source": [ - "output = llama3_70b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3-8b did not get the right answer because it was asked to answer in one word.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# By default, Llama 3 models follow \"Chain-Of-Thought\" prompting\n", - "prompt = '''\n", - "Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.\n", - "How many tennis balls does Llama have?\n", - "'''\n", - "\n", - "output = llama3_8b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_70b(prompt)\n", - "md(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: By default, Llama 3 models identify word problems and solves it step by step!**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "prompt = \"\"\"\n", - "15 of us want to go to a restaurant.\n", - "Two of them have cars\n", - "Each car can seat 5 people.\n", - "Two of us have motorcycles.\n", - "Each motorcycle can fit 2 people.\n", - "Can we all get to the restaurant by car or motorcycle?\n", - "Think step by step.\n", - "Provide the answer as a single yes/no answer first.\n", - "Then explain each intermediate step.\n", - "\"\"\"\n", - "output = llama3_8b(prompt)\n", - "print(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output = llama3_70b(prompt)\n", - "print(output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Llama 3 70b model works correctly in this example.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Summary: Llama 2 often needs encouragement for step by step thinking to correctly reasoning. Llama 3 understands, reasons and explains better, making chain of thought unnecessary in the cases above.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C7tDW-AH770Y" - }, - "source": [ - "### **4.3 - Retrieval Augmented Generation (RAG)**\n", - "* Prompt Eng Limitations - Knowledge cutoff & lack of specialized data\n", - "\n", - "* Retrieval Augmented Generation(RAG) allows us to retrieve snippets of information from external data sources and augment it to the user's prompt to get tailored responses from Llama 2.\n", - "\n", - "For our demo, we are going to download an external PDF file from a URL and query against the content in the pdf file to get contextually relevant information back with the help of Llama!\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 259 - }, - "executionInfo": { - "elapsed": 329, - "status": "ok", - "timestamp": 1695832267093, - "user": { - "displayName": "Amit Sangani", - "userId": "11552178012079240149" - }, - "user_tz": 420 - }, - "id": "Fl1LPltpRQD9", - "outputId": "4410c9bf-3559-4a05-cebb-a5731bb094c1" - }, - "outputs": [], - "source": [ - "rag_arch()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JJaGMLl_4vYm" - }, - "source": [ - "#### **4.3.1 - LangChain**\n", - "LangChain is a framework that helps make it easier to implement RAG." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install langchain\n", - "!pip install langchain-community\n", - "!pip install sentence-transformers\n", - "!pip install faiss-cpu\n", - "!pip install bs4\n", - "!pip install langchain-groq" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### **4.3.2 - LangChain Q&A Retriever**\n", - "* ConversationalRetrievalChain\n", - "\n", - "* Query the Source documents\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gAV2EkZqcruF" - }, - "outputs": [], - "source": [ - "from langchain_community.embeddings import HuggingFaceEmbeddings\n", - "from langchain_community.vectorstores import FAISS\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "from langchain_community.document_loaders import WebBaseLoader\n", - "import bs4\n", - "\n", - "# Step 1: Load the document from a web url\n", - "loader = WebBaseLoader([\"https://huggingface.co/blog/llama31\"])\n", - "documents = loader.load()\n", - "\n", - "# Step 2: Split the document into chunks with a specified chunk size\n", - "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n", - "all_splits = text_splitter.split_documents(documents)\n", - "\n", - "# Step 3: Store the document into a vector store with a specific embedding model\n", - "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You'll need to first sign in at [Groq](https://console.groq.com/login) with your github or gmail account, then get an API token to try Groq out for free." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "GROQ_API_TOKEN = getpass()\n", - "\n", - "os.environ[\"GROQ_API_KEY\"] = GROQ_API_TOKEN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_groq import ChatGroq\n", - "llm = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import ConversationalRetrievalChain\n", - "\n", - "# Query against your own data\n", - "chain = ConversationalRetrievalChain.from_llm(llm,\n", - " vectorstore.as_retriever(),\n", - " return_source_documents=True)\n", - "\n", - "# no chat history passed\n", - "result = chain({\"question\": \"What’s new with Llama 3?\", \"chat_history\": []})\n", - "md(result['answer'])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CelLHIvoy2Ke" - }, - "outputs": [], - "source": [ - "# This time your previous question and answer will be included as a chat history which will enable the ability\n", - "# to ask follow up questions.\n", - "query = \"What two sizes?\"\n", - "chat_history = [(query, result[\"answer\"])]\n", - "result = chain({\"question\": query, \"chat_history\": chat_history})\n", - "md(result['answer'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TEvefAWIJONx" - }, - "source": [ - "## **5 - Fine-Tuning Models**\n", - "\n", - "* Limitatons of Prompt Eng and RAG\n", - "* Fine-Tuning Arch\n", - "* Types (PEFT, LoRA, QLoRA)\n", - "* Using PyTorch for Pre-Training & Fine-Tuning\n", - "\n", - "* Evals + Quality\n", - "\n", - "Examples of Fine-Tuning:\n", - "* [Meta Llama Recipes](https://github.com/meta-llama/llama-recipes/tree/main/recipes/finetuning)\n", - "* [Hugging Face fine-tuning with Llama 3](https://huggingface.co/blog/llama3#fine-tuning-with-%F0%9F%A4%97-trl)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_8lcgdZa8onC" - }, - "source": [ - "## **6 - Responsible AI**\n", - "\n", - "* Power + Responsibility\n", - "* Hallucinations\n", - "* Input & Output Safety\n", - "* Red-teaming (simulating real-world cyber attackers)\n", - "* [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pbqb006R-T_k" - }, - "source": [ - "## **7 - Conclusion**\n", - "* Active research on LLMs and Llama\n", - "* Leverage the power of Llama and its open community\n", - "* Safety and responsible use is paramount!\n", - "\n", - "* Call-To-Action\n", - " * [Replicate Free Credits](https://replicate.fyi/connect2023) for Connect attendees!\n", - " * This notebook is available through Llama Github recipes\n", - " * Use Llama in your projects and give us feedback\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gSz5dTMxp7xo" - }, - "source": [ - "#### **Resources**\n", - "- [Meta Llama 3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)\n", - "- [Getting Started with Meta Llama](https://llama.meta.com/docs/get-started)\n", - "- [Llama 3 repo](https://github.com/meta-llama/llama3)\n", - "- [Llama 3 model card](https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md)\n", - "- [LLama 3 Recipes repo](https://github.com/meta-llama/llama-recipes)\n", - "- [Responsible Use Guide](https://ai.meta.com/llama/responsible-use-guide/)\n", - "- [Acceptable Use Policy](https://ai.meta.com/llama/use-policy/)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "ioVMNcTesSEk" - ], - "machine_shape": "hm", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/getting-started/Prompt_Engineering_with_Llama_3.ipynb b/getting-started/Prompt_Engineering_with_Llama.ipynb similarity index 95% rename from getting-started/Prompt_Engineering_with_Llama_3.ipynb rename to getting-started/Prompt_Engineering_with_Llama.ipynb index bc90afbe4..bab120bf7 100644 --- a/getting-started/Prompt_Engineering_with_Llama_3.ipynb +++ b/getting-started/Prompt_Engineering_with_Llama.ipynb @@ -7,11 +7,13 @@ "source": [ "\"Open\n", "\n", - "# Prompt Engineering with Llama 3.1\n", + "# Prompt Engineering with Llama\n", "\n", "Prompt engineering is using natural language to produce a desired response from a large language model (LLM).\n", "\n", - "This interactive guide covers prompt engineering & best practices with Llama 3.1." + "This interactive guide covers prompt engineering & best practices with Llama.\n", + "\n", + "Note: The notebook can be extended to any (latest) Llama models." ] }, { @@ -69,34 +71,6 @@ "1. `llama-2-70b-chat` - chat fine-tuned 70 billion parameter model (flagship)\n" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Code Llama is a code-focused LLM built on top of Llama 2 also available in various sizes and finetunes:" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Code Llama\n", - "1. `codellama-7b` - code fine-tuned 7 billion parameter model\n", - "1. `codellama-13b` - code fine-tuned 13 billion parameter model\n", - "1. `codellama-34b` - code fine-tuned 34 billion parameter model\n", - "1. `codellama-70b` - code fine-tuned 70 billion parameter model\n", - "1. `codellama-7b-instruct` - code & instruct fine-tuned 7 billion parameter model\n", - "2. `codellama-13b-instruct` - code & instruct fine-tuned 13 billion parameter model\n", - "3. `codellama-34b-instruct` - code & instruct fine-tuned 34 billion parameter model\n", - "3. `codellama-70b-instruct` - code & instruct fine-tuned 70 billion parameter model\n", - "1. `codellama-7b-python` - Python fine-tuned 7 billion parameter model\n", - "2. `codellama-13b-python` - Python fine-tuned 13 billion parameter model\n", - "3. `codellama-34b-python` - Python fine-tuned 34 billion parameter model\n", - "3. `codellama-70b-python` - Python fine-tuned 70 billion parameter model" - ] - }, { "attachments": {}, "cell_type": "markdown", From 27504cd68d0403c6cb404cab1c6b755cf6022d1b Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Mon, 13 Jan 2025 14:41:14 -0800 Subject: [PATCH 21/23] restructure --- README.md | 24 ++++++++++++------ UPDATES.md | 1 + end-to-end-use-cases/README.md | 45 +++++++++++++++++++++++++--------- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index b0d14cd73..148f0c560 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ # Llama Cookbook: The Official Guide to building with Llama Models -> Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor - Welcome to the official repository for helping you get started with [inference](./getting-started/inference/), [fine-tuning](./getting-started/finetuning) and [end-to-end use-cases](./end-to-end-use-cases) of building with the Llama Model family. The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. @@ -10,26 +8,36 @@ The examples cover the most popular community approaches, popular use-cases and > Popular getting started links: > * [Build with Llama Notebook](./getting-started/build_with_Llama_3_2.ipynb) > * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/ +> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/) > [!TIP] > Popular end to end recipes: -> * [Finetune Llama 3.2 Vision](./getting-started/finetuning/finetune_vision_model.md)llama_guard_text_and_vision_inference.ipynb) +> * [Agentic Examples](./end-to-end-use-cases/agents/) +> * [NotebookLlama](./end-to-end-use-cases/NotebookLlama/) +> * [Browser Use with Llama](./end-to-end-use-cases/browser_use/) -> [!NOTE] -> Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. -> -> More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). +> Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor ## Repository Structure: - [3P Integrations](./3p-integrations): Getting Started Recipes and End to End Use-Cases from various Llama providers - [End to End Use Cases](./end-to-end-use-cases): As the name suggests, spanning various domains and applications - [Getting Started](./getting-started/): Reference for inferencing, fine-tuning and RAG examples +- [src](./src/): Contains the src for the original llama-recipes library along with some FAQs for fine-tuning. ## FAQ: +- Q: Prompt Template changes for Multi-Modality? + +A: Llama 3.2 follows the same prompt template as Llama 3.1, with a new special token `<|image|>` representing the input image for the multimodal models. + +More details on the prompt templates for image reasoning, tool-calling and code interpreter can be found [on the documentation website](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2). + +- Q: I have some questions for Fine-Tuning, is there a section to address these? + +A: Checkout the Fine-Tuning FAQ [here](./src/docs/) + - Q: Some links are broken/folders are missing: A: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor diff --git a/UPDATES.md b/UPDATES.md index 0281eb309..74385fe63 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -20,4 +20,5 @@ Removed folders: - /flagged (Empty folder) - /recipes/quickstart/Running_Llama3_Anywhere (Redundant code) - /recipes/quickstart/inference/codellama (deprecated model) +- /recipes/quickstart/getting-to-know-llama-3.ipynb diff --git a/end-to-end-use-cases/README.md b/end-to-end-use-cases/README.md index 653088010..68284444e 100644 --- a/end-to-end-use-cases/README.md +++ b/end-to-end-use-cases/README.md @@ -1,34 +1,57 @@ -## [Agentic Tutorial](./agents/): 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models +# End to End Use Applications using various Llama Models + +## [Agentic Tutorial](./agents/): + +### 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models 101 notebooks show how to apply Llama models and enable tool calling functionality, 201 notebook walks you through an end to end workflow of building an agent that can search two papers, fetch their details and find their differences. -## [Benchmarks](./benchmarks/): a folder contains benchmark scripts +## [Benchmarks](./benchmarks/): + +### A folder contains benchmark scripts The scripts apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality -## [Browser Usage](./browser_use/): Demo of how to apply Llama models and use them for browsing the internet and completing tasks +## [Browser Usage](./browser_use/): -## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience +### Demo of how to apply Llama models and use them for browsing the internet and completing tasks + +## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb): + +### Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications. -## [VideoSummary](video_summary.ipynb): Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../3p-integrations/octoai/video_summary.ipynb)) +## [VideoSummary](video_summary.ipynb): + +### Ask Llama 3 to Summarize a Long YouTube Video (using Replicate or [OctoAI](../3p-integrations/octoai/video_summary.ipynb)) This demo app uses Llama 3 to return a text summary of a YouTube video. It shows how to retrieve the caption of a YouTube video and how to ask Llama to summarize the content in different ways, from the simplest naive way that works for short text to more advanced methods of using LangChain's map_reduce and refine to overcome the 8K context length limit of Llama 3. -## [NBA2023-24](./coding/text2sql/quickstart.ipynb): Ask Llama 3 about Structured Data +## [NBA2023-24](./coding/text2sql/quickstart.ipynb): + +### Ask Llama 3 about Structured Data This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players. -## [NotebookLlama](./NotebookLlama/): PDF to Podcast using Llama Models +## [NotebookLlama](./NotebookLlama/): + +### PDF to Podcast using Llama Models Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast -## [live_data](live_data.ipynb): Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p-integrations/octoai/live_data.ipynb)) +## [live_data](live_data.ipynb): + +### Ask Llama 3 about Live Data (using Replicate or [OctoAI](../3p-integrations/octoai/live_data.ipynb)) This demo app shows how to perform live data augmented generation tasks with Llama 3, [LlamaIndex](https://github.com/run-llama/llama_index), another leading open-source framework for building LLM apps, and the [Tavily](https://tavily.com) live search API. -## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md): Building a Llama 3 Enabled WhatsApp Chatbot +## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md): +### Building a Llama 3 Enabled WhatsApp Chatbot This step-by-step tutorial shows how to use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to build a Llama 3 enabled WhatsApp chatbot. -## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md): Building a Llama 3 Enabled Messenger Chatbot +## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md): + +### Building a Llama 3 Enabled Messenger Chatbot This step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot. ### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) or on [OctoAI](../3p-integrations/octoai/RAG_chatbot_example/RAG_chatbot_example.ipynb)) A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG). You can run Llama2 locally if you have a good enough GPU or on OctoAI if you follow the note [here](../README.md#octoai_note). -## [Sales Bot](./customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb): Sales Bot with Llama3 - A Summarization and RAG Use Case +## [Sales Bot](./customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb): + +### Sales Bot with Llama3 - A Summarization and RAG Use Case An summarization + RAG use case built around the Amazon product review Kaggle dataset to build a helpful Music Store Sales Bot. The summarization and RAG are built on top of Llama models hosted on OctoAI, and the vector database is hosted on Weaviate Cloud Services. From eeea0aea41b396c99ee5189b62b2f0c6ffd0840e Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 14 Jan 2025 13:26:00 -0800 Subject: [PATCH 22/23] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 148f0c560..da232b526 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,19 @@ Welcome to the official repository for helping you get started with [inference](./getting-started/inference/), [fine-tuning](./getting-started/finetuning) and [end-to-end use-cases](./end-to-end-use-cases) of building with the Llama Model family. -The examples cover the most popular community approaches, popular use-cases and the latest Llama 3.2 Vision and Llama 3.2 Text, in this repository. +This repository covers the most popular community approaches, use-cases and the latest recipes for Llama Text and Vision models. > [!TIP] > Popular getting started links: -> * [Build with Llama Notebook](./getting-started/build_with_Llama_3_2.ipynb) +> * [Build with Llama Tutorial](./getting-started/build_with_Llama_3_2.ipynb) > * [Multimodal Inference with Llama 3.2 Vision](./getting-started/inference/local_inference/README.md#multimodal-inference) -> * [Inference on Llama Guard 1B + Multimodal inference on Llama Guard 11B-Vision](./end-to-end-use-cases/responsible_ai/llama_guard/) +> * [Inferencing using Llama Guard (Safety Model)](./end-to-end-use-cases/responsible_ai/llama_guard/) > [!TIP] > Popular end to end recipes: -> * [Agentic Examples](./end-to-end-use-cases/agents/) +> * [Email Agent](./end-to-end-use-cases/email_agent/) > * [NotebookLlama](./end-to-end-use-cases/NotebookLlama/) -> * [Browser Use with Llama](./end-to-end-use-cases/browser_use/) +> * [Text to SQL](./end-to-end-use-cases/coding/text2sql/) > Note: We recently did a refactor of the repo, [archive-main](https://github.com/meta-llama/llama-recipes/tree/archive-main) is a snapshot branch from before the refactor From 526ee789ccd4ed0e0b59b22dcde5377a0735e3a2 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 14 Jan 2025 13:43:31 -0800 Subject: [PATCH 23/23] update --- .github/scripts/spellcheck_conf/wordlist.txt | 4 + .../finetuning/datasets/ocrvqa_dataset.py | 113 ++++++++++---- .../local_inference/multi_modal_infer.py | 142 +++++++++++------- 3 files changed, 174 insertions(+), 85 deletions(-) diff --git a/.github/scripts/spellcheck_conf/wordlist.txt b/.github/scripts/spellcheck_conf/wordlist.txt index ceb26b3ad..98fac7ed8 100644 --- a/.github/scripts/spellcheck_conf/wordlist.txt +++ b/.github/scripts/spellcheck_conf/wordlist.txt @@ -1522,3 +1522,7 @@ globals gmail multiagent yyy +jpeg +toend +codellama +DIFFLOG diff --git a/getting-started/finetuning/datasets/ocrvqa_dataset.py b/getting-started/finetuning/datasets/ocrvqa_dataset.py index 19ce2262b..9597cac62 100644 --- a/getting-started/finetuning/datasets/ocrvqa_dataset.py +++ b/getting-started/finetuning/datasets/ocrvqa_dataset.py @@ -3,46 +3,60 @@ import copy -from datasets import load_dataset import itertools + import torch +from datasets import load_dataset + # check system prompt token seq or user prompt token seq is in the current token list -def check_header(targets,seq): - for i in range(len(seq)-3): - if seq[i:i+3] in targets: +def check_header(targets, seq): + for i in range(len(seq) - 3): + if seq[i : i + 3] in targets: return True return False -def replace_target(target,seq): - for i in range(len(seq)-3): - if seq[i:i+3] == target: - seq[i],seq[i+1],seq[i+2] = -100,-100,-100 + + +def replace_target(target, seq): + for i in range(len(seq) - 3): + if seq[i : i + 3] == target: + seq[i], seq[i + 1], seq[i + 2] = -100, -100, -100 return seq + + def tokenize_dialogs(dialogs, images, processor): text_prompt = processor.apply_chat_template(dialogs) - batch = processor(images=images, text=text_prompt,padding = True, return_tensors="pt") + text_prompt = [prompt.replace('<|begin_of_text|>','') for prompt in text_prompt] + batch = processor( + images=images, + text=text_prompt, + padding=True, + return_tensors="pt", + ) label_list = [] for i in range(len(batch["input_ids"])): dialog_tokens = batch["input_ids"][i].tolist() labels = copy.copy(dialog_tokens) - eot_indices = [i for i,n in enumerate(labels) if n == 128009] + eot_indices = [i for i, n in enumerate(labels) if n == 128009] last_idx = 0 # system prompt header "<|start_header_id|>system<|end_header_id|>" has been tokenized to [128006, 9125, 128007] # user prompt header "<|start_header_id|>user<|end_header_id|>" has been tokenized to [128006, 882, 128007] - prompt_header_seqs = [[128006, 9125, 128007],[128006, 882, 128007]] + prompt_header_seqs = [[128006, 9125, 128007], [128006, 882, 128007]] for n, idx in enumerate(eot_indices): - current_seq = labels[last_idx:idx+1] - if check_header(prompt_header_seqs,current_seq): + current_seq = labels[last_idx : idx + 1] + if check_header(prompt_header_seqs, current_seq): # found prompt header, indicating that this seq should be masked - labels[last_idx:idx+1] = [-100] * (idx-last_idx+1) + labels[last_idx : idx + 1] = [-100] * (idx - last_idx + 1) else: - last_idx = idx+1 + last_idx = idx + 1 # Mask all the assistant header prompt <|start_header_id|>assistant<|end_header_id|>, which has been tokenized to [128006, 78191, 128007] assistant_header_seq = [128006, 78191, 128007] - labels = replace_target(assistant_header_seq,labels) - # Mask the padding token and image token 128256 + labels = replace_target(assistant_header_seq, labels) + # Mask the padding token and image token 128256 for i in range(len(labels)): - if labels[i] == processor.tokenizer.pad_token_id or labels[i] == 128256: # 128256 is image token index + if ( + labels[i] == processor.tokenizer.pad_token_id or labels[i] == 128256 + ): # 128256 is image token index labels[i] = -100 label_list.append(labels) batch["labels"] = torch.tensor(label_list) @@ -52,39 +66,74 @@ def tokenize_dialogs(dialogs, images, processor): def get_custom_dataset(dataset_config, processor, split, split_ratio=0.9): # load_dataset will return DatasetDict that contains all the data in the train set dataset_dict = load_dataset("HuggingFaceM4/the_cauldron", name="ocrvqa") - dataset = dataset_dict['train'] + dataset = dataset_dict["train"] # Comment out the following line to use the full dataset, for quick testing only use 2000 samples dataset = dataset.select(range(2000)) - dataset = dataset.train_test_split(test_size=1-split_ratio, shuffle=True, seed=42)[split] + dataset = dataset.train_test_split( + test_size=1 - split_ratio, shuffle=True, seed=42 + )[split] return dataset + class OCRVQADataCollator: def __init__(self, processor): self.processor = processor - self.processor.tokenizer.padding_side = "right" # during training, one always uses padding on the right + self.processor.tokenizer.padding_side = ( + "right" # during training, one always uses padding on the right + ) + def __call__(self, samples): - dialogs,images = [],[] + dialogs, images = [], [] for sample in samples: - image_list,sample_list = sample["images"],sample["texts"] + image_list, sample_list = sample["images"], sample["texts"] if len(image_list) > 1: raise ValueError("Only support one image per sample") - image = image_list[0].convert("RGB") # only use the first image + image = image_list[0].convert("RGB") # only use the first image dialog = [] for sample_dict in sample_list: if not dialog: # only append image to the first sentence dialog += [ - {"role":"user","content":[{"type": "image"},{"type": "text", "text": sample_dict["user"].strip()}]}, - {"role":"assistant","content":[{"type": "text", "text": sample_dict["assistant"].strip()}]} - ] - + { + "role": "user", + "content": [ + {"type": "image"}, + {"type": "text", "text": sample_dict["user"].strip()}, + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": sample_dict["assistant"].strip(), + } + ], + }, + ] + else: dialog += [ - {"role":"user","content":[{"type": "text", "text": sample_dict["user"].strip()}]}, - {"role":"assistant","content":[{"type": "text", "text": sample_dict["assistant"].strip()}]} - ] + { + "role": "user", + "content": [ + {"type": "text", "text": sample_dict["user"].strip()} + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": sample_dict["assistant"].strip(), + } + ], + }, + ] dialogs.append(dialog) images.append([image]) - return tokenize_dialogs(dialogs,images, self.processor) + return tokenize_dialogs(dialogs, images, self.processor) + + def get_data_collator(processor): return OCRVQADataCollator(processor) diff --git a/getting-started/inference/local_inference/multi_modal_infer.py b/getting-started/inference/local_inference/multi_modal_infer.py index 071dc8683..8bbd0f29d 100644 --- a/getting-started/inference/local_inference/multi_modal_infer.py +++ b/getting-started/inference/local_inference/multi_modal_infer.py @@ -1,13 +1,15 @@ import argparse import os import sys + +import gradio as gr import torch from accelerate import Accelerator +from huggingface_hub import HfFolder +from peft import PeftModel from PIL import Image as PIL_Image from transformers import MllamaForConditionalGeneration, MllamaProcessor -from peft import PeftModel -import gradio as gr -from huggingface_hub import HfFolder + # Initialize accelerator accelerator = Accelerator() device = accelerator.device @@ -43,24 +45,24 @@ def load_model_and_processor(model_name: str, finetuning_path: str = None): torch_dtype=torch.bfloat16, use_safetensors=True, device_map=device, - token=hf_token + token=hf_token, + ) + processor = MllamaProcessor.from_pretrained( + model_name, token=hf_token, use_safetensors=True ) - processor = MllamaProcessor.from_pretrained(model_name, token=hf_token, use_safetensors=True) if finetuning_path and os.path.exists(finetuning_path): print(f"Loading LoRA adapter from '{finetuning_path}'...") model = PeftModel.from_pretrained( - model, - finetuning_path, - is_adapter=True, - torch_dtype=torch.bfloat16 + model, finetuning_path, is_adapter=True, torch_dtype=torch.bfloat16 ) print("LoRA adapter merged successfully") - + model, processor = accelerator.prepare(model, processor) return model, processor -def process_image(image_path: str = None, image = None) -> PIL_Image.Image: + +def process_image(image_path: str = None, image=None) -> PIL_Image.Image: """Process and validate image input""" if image is not None: return image.convert("RGB") @@ -68,29 +70,44 @@ def process_image(image_path: str = None, image = None) -> PIL_Image.Image: return PIL_Image.open(image_path).convert("RGB") raise ValueError("No valid image provided") -def generate_text_from_image(model, processor, image, prompt_text: str, temperature: float, top_p: float): + +def generate_text_from_image( + model, processor, image, prompt_text: str, temperature: float, top_p: float +): """Generate text from image using model""" conversation = [ - {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]} + { + "role": "user", + "content": [{"type": "image"}, {"type": "text", "text": prompt_text}], + } ] - prompt = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) - inputs = processor(image, prompt, return_tensors="pt").to(device) - output = model.generate(**inputs, temperature=temperature, top_p=top_p, max_new_tokens=MAX_OUTPUT_TOKENS) - return processor.decode(output[0])[len(prompt):] + prompt = processor.apply_chat_template( + conversation, add_generation_prompt=True, tokenize=False + ) + inputs = processor( + image, prompt, text_kwargs={"add_special_tokens": False}, return_tensors="pt" + ).to(device) + print("Input Prompt:\n", processor.tokenizer.decode(inputs.input_ids[0])) + output = model.generate( + **inputs, temperature=temperature, top_p=top_p, max_new_tokens=MAX_OUTPUT_TOKENS + ) + return processor.decode(output[0])[len(prompt) :] + def gradio_interface(model_name: str): """Create Gradio UI with LoRA support""" # Initialize model state current_model = {"model": None, "processor": None} - + def load_or_reload_model(enable_lora: bool, lora_path: str = None): current_model["model"], current_model["processor"] = load_model_and_processor( - model_name, - lora_path if enable_lora else None + model_name, lora_path if enable_lora else None ) return "Model loaded successfully" + (" with LoRA" if enable_lora else "") - def describe_image(image, user_prompt, temperature, top_k, top_p, max_tokens, history): + def describe_image( + image, user_prompt, temperature, top_k, top_p, max_tokens, history + ): if image is not None: try: processed_image = process_image(image=image) @@ -100,7 +117,7 @@ def describe_image(image, user_prompt, temperature, top_k, top_p, max_tokens, hi processed_image, user_prompt, temperature, - top_p + top_p, ) history.append((user_prompt, result)) except Exception as e: @@ -112,7 +129,7 @@ def clear_chat(): with gr.Blocks() as demo: gr.HTML("

Llama Vision Model Interface

") - + with gr.Row(): with gr.Column(scale=1): # Model loading controls @@ -121,58 +138,74 @@ def clear_chat(): lora_path = gr.Textbox( label="LoRA Weights Path", placeholder="Path to LoRA weights folder", - visible=False + visible=False, ) load_status = gr.Textbox(label="Load Status", interactive=False) load_button = gr.Button("Load/Reload Model") # Image and parameter controls - image_input = gr.Image(label="Image", type="pil", image_mode="RGB", height=512, width=512) - temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.6, step=0.1) - top_k = gr.Slider(label="Top-k", minimum=1, maximum=100, value=50, step=1) - top_p = gr.Slider(label="Top-p", minimum=0.1, maximum=1.0, value=0.9, step=0.1) - max_tokens = gr.Slider(label="Max Tokens", minimum=50, maximum=MAX_OUTPUT_TOKENS, value=100, step=50) + image_input = gr.Image( + label="Image", type="pil", image_mode="RGB", height=512, width=512 + ) + temperature = gr.Slider( + label="Temperature", minimum=0.1, maximum=1.0, value=0.6, step=0.1 + ) + top_k = gr.Slider( + label="Top-k", minimum=1, maximum=100, value=50, step=1 + ) + top_p = gr.Slider( + label="Top-p", minimum=0.1, maximum=1.0, value=0.9, step=0.1 + ) + max_tokens = gr.Slider( + label="Max Tokens", + minimum=50, + maximum=MAX_OUTPUT_TOKENS, + value=100, + step=50, + ) with gr.Column(scale=2): chat_history = gr.Chatbot(label="Chat", height=512) user_prompt = gr.Textbox( - show_label=False, - placeholder="Enter your prompt", - lines=2 + show_label=False, placeholder="Enter your prompt", lines=2 ) - + with gr.Row(): generate_button = gr.Button("Generate") clear_button = gr.Button("Clear") # Event handlers enable_lora.change( - fn=lambda x: gr.update(visible=x), - inputs=[enable_lora], - outputs=[lora_path] + fn=lambda x: gr.update(visible=x), inputs=[enable_lora], outputs=[lora_path] ) - + load_button.click( fn=load_or_reload_model, inputs=[enable_lora, lora_path], - outputs=[load_status] + outputs=[load_status], ) generate_button.click( fn=describe_image, inputs=[ - image_input, user_prompt, temperature, - top_k, top_p, max_tokens, chat_history + image_input, + user_prompt, + temperature, + top_k, + top_p, + max_tokens, + chat_history, ], - outputs=[chat_history] + outputs=[chat_history], ) - + clear_button.click(fn=clear_chat, outputs=[chat_history]) # Initial model load load_or_reload_model(False) return demo + def main(args): """Main execution flow""" if args.gradio_ui: @@ -180,27 +213,30 @@ def main(args): demo.launch() else: model, processor = load_model_and_processor( - args.model_name, - args.finetuning_path + args.model_name, args.finetuning_path ) image = process_image(image_path=args.image_path) result = generate_text_from_image( - model, processor, image, - args.prompt_text, - args.temperature, - args.top_p + model, processor, image, args.prompt_text, args.temperature, args.top_p ) print("Generated Text:", result) + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Multi-modal inference with optional Gradio UI and LoRA support") + parser = argparse.ArgumentParser( + description="Multi-modal inference with optional Gradio UI and LoRA support" + ) parser.add_argument("--image_path", type=str, help="Path to the input image") parser.add_argument("--prompt_text", type=str, help="Prompt text for the image") - parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature") + parser.add_argument( + "--temperature", type=float, default=0.7, help="Sampling temperature" + ) parser.add_argument("--top_p", type=float, default=0.9, help="Top-p sampling") - parser.add_argument("--model_name", type=str, default=DEFAULT_MODEL, help="Model name") + parser.add_argument( + "--model_name", type=str, default=DEFAULT_MODEL, help="Model name" + ) parser.add_argument("--finetuning_path", type=str, help="Path to LoRA weights") parser.add_argument("--gradio_ui", action="store_true", help="Launch Gradio UI") - + args = parser.parse_args() - main(args) + main(args) \ No newline at end of file