From 6cb5afbac61ec10dc1286e28e484bea866a6c585 Mon Sep 17 00:00:00 2001 From: Jaco Pieterse Date: Thu, 28 Nov 2024 13:52:44 +0200 Subject: [PATCH] Ollama streaming solution --- .../week1 EXERCISE ollama streaming.ipynb | 415 ++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 week1/solutions/week1 EXERCISE ollama streaming.ipynb diff --git a/week1/solutions/week1 EXERCISE ollama streaming.ipynb b/week1/solutions/week1 EXERCISE ollama streaming.ipynb new file mode 100644 index 00000000..0d27dc50 --- /dev/null +++ b/week1/solutions/week1 EXERCISE ollama streaming.ipynb @@ -0,0 +1,415 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", + "metadata": {}, + "source": [ + "# End of week 1 exercise\n", + "\n", + "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n", + "and responds with an explanation. This is a tool that you will be able to use yourself during the course!" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "c1070317-3ed9-4659-abe3-828943230e03", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "import ollama\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", + "metadata": {}, + "outputs": [], + "source": [ + "# constants\n", + "MODEL_GPT = 'gpt-4o-mini'\n", + "MODEL_LLAMA = 'llama3.2'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "API key found and looks good so far!\n" + ] + } + ], + "source": [ + "# set up environment\n", + "load_dotenv()\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", + "metadata": {}, + "outputs": [], + "source": [ + "# here is the question; type over this to ask something new\n", + "\n", + "myQuestion = \"\"\"\n", + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "dccf6cbd-38ab-4333-a93b-6f27d1ce684f", + "metadata": {}, + "outputs": [], + "source": [ + "# Define our system prompt\n", + "system_prompt = \"\"\"You are an assistant that analyzes the code and explains it in a clear and easy-to-understand manner.\n", + "Example: print(\"Hello world\")\n", + "Respond in markdown: Sure! Let's break it down step by step:\n", + "print(\"Hello world\")\n", + "This is a simple Python statement that does one thing: it shows the text \"Hello world\" on the screen.\n", + "1. print:\n", + "The word print is a command in Python.\n", + "It tells the computer: 'Display something for me.'\n", + "2. \"Hello world\":\n", + "The words \"Hello world\" are inside quotes.\n", + "Anything inside quotes is called a string in Python, which means it's just text.\n", + "Together:\n", + "The print command looks at what's inside the parentheses ( ) and displays it on the screen.\n", + "In this case, it displays the words Hello world.\n", + "Output:\n", + "When you run the code, the computer will show:\n", + "Hello world\n", + "Itโ€™s like saying to the computer:\n", + "\"Hey, computer, please write 'Hello world' so people can see it!\" ๐Ÿ˜Š\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "2e3c2aa7-99fa-4e97-90d6-25a1a805d52c", + "metadata": {}, + "outputs": [], + "source": [ + "def get_user_prompt(question):\n", + " user_prompt = f\"Explain the question {question}\\n\"\n", + " user_prompt += \"In a clear and easy to understand manner in markdown.\\n\"\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f35b37a6-ffb7-4a8a-af5d-51491012d0be", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Explain the question \n", + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\n", + "In a clear and easy to understand manner in markdown.\n", + "\n" + ] + } + ], + "source": [ + "print(get_user_prompt(question))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4", + "metadata": {}, + "outputs": [], + "source": [ + "# Get gpt-4o-mini to answer, with streaming\n", + "openai = OpenAI()\n", + "def answer_question_with_streaming_OpenAI(question):\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": get_user_prompt(question, )}\n", + " ],\n", + " stream=True\n", + " )\n", + "\n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response), display_id=display_handle.display_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "213a2773-7e22-45a4-a2bd-03a98fbef335", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Sure! Let's break down this code step by step:\n", + "\n", + "python\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\n", + "\n", + "This line of code is a generator expression that yields unique authors from a collection of books. Here's a detailed explanation:\n", + "\n", + "1. **yield from**:\n", + " - The keyword `yield` is used in Python to create a generator, which can be paused and resumed, returning values one at a time.\n", + " - The `from` keyword indicates that you want to yield items from an iterable (like a list or a set). In this case, itโ€™s yielding from a set comprehension.\n", + "\n", + "2. **{...}**:\n", + " - The curly braces `{}` indicate that we are creating a set. A set is a collection of unique items in Python, which means it will automatically remove any duplicate entries.\n", + "\n", + "3. **book.get(\"author\")**:\n", + " - This part accesses the value associated with the key \"author\" in each `book` dictionary. Here, `book` represents each individual item in the `books` collection.\n", + " - The `get` method is used for dictionaries to safely retrieve the value for a given key. If the key doesn't exist, it will return `None` instead of throwing an error.\n", + "\n", + "4. **for book in books**:\n", + " - This part of the code iterates over each `book` in a collection called `books`. It assumes that `books` is a list (or any iterable) of dictionaries.\n", + "\n", + "5. **if book.get(\"author\")**:\n", + " - This is a condition that filters the books. It ensures that only books with a valid \"author\" (i.e., not `None` or an empty string) are included in the set comprehension. If `book.get(\"author\")` returns a valid author, the `if` clause evaluates to `True`.\n", + "\n", + "### Together:\n", + "\n", + "- The entire line of code creates a set of unique authors from the list of books, while ignoring any books that do not have an author specified.\n", + "- The `yield from` statement then yields each author one at a time, allowing you to iterate over them or collect them in another structure without creating a full list in memory.\n", + "\n", + "### Example:\n", + "\n", + "If you have the following books:\n", + "python\n", + "books = [\n", + " {\"title\": \"Book One\", \"author\": \"Alice\"},\n", + " {\"title\": \"Book Two\", \"author\": \"Bob\"},\n", + " {\"title\": \"Book Three\", \"author\": \"Alice\"},\n", + " {\"title\": \"Book Four\", \"author\": None},\n", + "]\n", + "\n", + "\n", + "When you run the line of code, the output will yield:\n", + "\n", + "Alice\n", + "Bob\n", + "\n", + "Output:\n", + "- The generator will yield unique authors, which means even though \"Alice\" is listed twice, it will only show up once.\n", + "\n", + "### Conclusion:\n", + "\n", + "Essentially, this code is a convenient way to extract a unique list of authors from a dataset of books, making it easy to work with authors throughout your program. It performs this function efficiently without needing extra memory for duplicates. ๐Ÿ˜Š" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "answer_question_with_streaming(question)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538", + "metadata": {}, + "outputs": [], + "source": [ + "# Get Llama 3.2 to answer\n", + "\n", + "def answer_question_with_streaming_Ollama(question):\n", + " print(question)\n", + " stream = ollama.chat(\n", + " model=MODEL_LLAMA,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": get_user_prompt(question)}\n", + " ],\n", + " stream=True\n", + " )\n", + "\n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + "\n", + " chunk_accumulator = [] # To batch small chunks\n", + " for chunk in stream:\n", + " # Debug: print the full chunk\n", + " #print(chunk)\n", + "\n", + " if 'message' in chunk and 'content' in chunk['message']:\n", + " content = chunk['message']['content']\n", + " chunk_accumulator.append(content)\n", + "\n", + " # Accumulate the response\n", + " response += content\n", + "\n", + " # Update the display periodically (e.g., every 5 chunks)\n", + " if len(chunk_accumulator) >= 5:\n", + " display_content = response.replace(\"```\", \"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(display_content), display_id=display_handle.display_id)\n", + " chunk_accumulator = [] # Reset the accumulator\n", + "\n", + " # Final update to display any remaining chunks\n", + " display_content = response.replace(\"```\", \"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(display_content), display_id=display_handle.display_id)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "02644168-1b06-4908-afd1-7ed3aaf7852c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\n" + ] + }, + { + "data": { + "text/markdown": [ + "Let's break down this complex code step by step:\n", + "\n", + "### Understanding the Code\n", + "python\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\n", + "This line of code is using several advanced features of Python, so let's break it down into smaller parts.\n", + "\n", + "**1. `yield from`**\n", + "The word `yield` is used to create a generator function in Python.\n", + "A generator function is like a normal function, but instead of returning all the values at once (like a regular list), it returns them one by one.\n", + "When you use `yield`, the function doesn't finish executing until it reaches the `yield` keyword.\n", + "\n", + "**2. `{}`**\n", + "These curly brackets are used to create an expression in Python called a **dictionary comprehension**.\n", + "A dictionary comprehension is like a regular dictionary, but instead of using the `dict()` function or `{key: value}`, you use this syntax.\n", + "\n", + "**3. `.get(\"author\") for book in books`**\n", + "This part of the code is using another advanced feature called a **generator expression**.\n", + "It's similar to a list comprehension, but instead of creating a new list, it creates an iterator that yields each value one by one.\n", + "\n", + "* `.get(\"author\")`: This method calls the `get()` function on the `book` object and returns the value associated with the key `\"author\"`.\n", + "* `for book in books`: This part loops over each item (`book`) in a collection (`books`).\n", + "\n", + "So, this generator expression takes each book in the `books` list, gets its author (if it exists), and yields that author.\n", + "\n", + "**4. `yield from {...}`**\n", + "When you use `yield from`, it's like saying \"yield all these values, one by one\".\n", + "\n", + "In this case, the dictionary comprehension `{book.get(\"author\") for book in books if book.get(\"author\")}` is yielding a dictionary with authors as values.\n", + "By using `yield from`, we're essentially saying \"yield each author, one by one\".\n", + "\n", + "**Putting it All Together**\n", + "So, what does this code do?\n", + "It creates an iterator that yields each author's name from the `books` list, one by one. The resulting iterator is like a stream of authors.\n", + "\n", + "Here's an example:\n", + "python\n", + "books = [\n", + " {\"title\": \"Book 1\", \"author\": \"Author A\"},\n", + " {\"title\": \"Book 2\", \"author\": \"Author B\"},\n", + " {\"title\": \"Book 3\"}\n", + "]\n", + "\n", + "for author in yield from {book.get(\"author\") for book in books if book.get(\"author\")}:\n", + " print(author)\n", + "\n", + "Output:\n", + "\n", + "Author A\n", + "Author B\n", + "\n", + "Note that this code uses a clever way to iterate over the authors, but it's still just printing each author's name one by one." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "answer_question_with_streaming_Ollama(myQuestion)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ef6be37-de9a-4cd2-8007-cab3f2abdb67", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}