From 6cb5afbac61ec10dc1286e28e484bea866a6c585 Mon Sep 17 00:00:00 2001
From: Jaco Pieterse <jaco@MacBookAir.home>
Date: Thu, 28 Nov 2024 13:52:44 +0200
Subject: [PATCH] Ollama streaming solution

---
 .../week1 EXERCISE ollama streaming.ipynb     | 415 ++++++++++++++++++
 1 file changed, 415 insertions(+)
 create mode 100644 week1/solutions/week1 EXERCISE ollama streaming.ipynb

diff --git a/week1/solutions/week1 EXERCISE ollama streaming.ipynb b/week1/solutions/week1 EXERCISE ollama streaming.ipynb
new file mode 100644
index 00000000..0d27dc50
--- /dev/null
+++ b/week1/solutions/week1 EXERCISE ollama streaming.ipynb	
@@ -0,0 +1,415 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
+   "metadata": {},
+   "source": [
+    "# End of week 1 exercise\n",
+    "\n",
+    "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  \n",
+    "and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "c1070317-3ed9-4659-abe3-828943230e03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "from openai import OpenAI\n",
+    "import ollama\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# constants\n",
+    "MODEL_GPT = 'gpt-4o-mini'\n",
+    "MODEL_LLAMA = 'llama3.2'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "API key found and looks good so far!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# set up environment\n",
+    "load_dotenv()\n",
+    "api_key = os.getenv('OPENAI_API_KEY')\n",
+    "\n",
+    "# Check the key\n",
+    "\n",
+    "if not api_key:\n",
+    "    print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+    "elif not api_key.startswith(\"sk-proj-\"):\n",
+    "    print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+    "elif api_key.strip() != api_key:\n",
+    "    print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+    "else:\n",
+    "    print(\"API key found and looks good so far!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# here is the question; type over this to ask something new\n",
+    "\n",
+    "myQuestion = \"\"\"\n",
+    "Please explain what this code does and why:\n",
+    "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "dccf6cbd-38ab-4333-a93b-6f27d1ce684f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define our system prompt\n",
+    "system_prompt = \"\"\"You are an assistant that analyzes the code and explains it in a clear and easy-to-understand manner.\n",
+    "Example: print(\"Hello world\")\n",
+    "Respond in markdown: Sure! Let's break it down step by step:\n",
+    "print(\"Hello world\")\n",
+    "This is a simple Python statement that does one thing: it shows the text \"Hello world\" on the screen.\n",
+    "1. print:\n",
+    "The word print is a command in Python.\n",
+    "It tells the computer: 'Display something for me.'\n",
+    "2. \"Hello world\":\n",
+    "The words \"Hello world\" are inside quotes.\n",
+    "Anything inside quotes is called a string in Python, which means it's just text.\n",
+    "Together:\n",
+    "The print command looks at what's inside the parentheses ( ) and displays it on the screen.\n",
+    "In this case, it displays the words Hello world.\n",
+    "Output:\n",
+    "When you run the code, the computer will show:\n",
+    "Hello world\n",
+    "It’s like saying to the computer:\n",
+    "\"Hey, computer, please write 'Hello world' so people can see it!\" 😊\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "2e3c2aa7-99fa-4e97-90d6-25a1a805d52c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_user_prompt(question):\n",
+    "    user_prompt = f\"Explain the question {question}\\n\"\n",
+    "    user_prompt += \"In a clear and easy to understand manner in markdown.\\n\"\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "f35b37a6-ffb7-4a8a-af5d-51491012d0be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Explain the question \n",
+      "Please explain what this code does and why:\n",
+      "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+      "\n",
+      "In a clear and easy to understand manner in markdown.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_user_prompt(question))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get gpt-4o-mini to answer, with streaming\n",
+    "openai = OpenAI()\n",
+    "def answer_question_with_streaming_OpenAI(question):\n",
+    "    stream = openai.chat.completions.create(\n",
+    "        model=MODEL_GPT,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_user_prompt(question, )}\n",
+    "          ],\n",
+    "        stream=True\n",
+    "    )\n",
+    "\n",
+    "    response = \"\"\n",
+    "    display_handle = display(Markdown(\"\"), display_id=True)\n",
+    "    for chunk in stream:\n",
+    "        response += chunk.choices[0].delta.content or ''\n",
+    "        response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
+    "        update_display(Markdown(response), display_id=display_handle.display_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "213a2773-7e22-45a4-a2bd-03a98fbef335",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "Sure! Let's break down this code step by step:\n",
+       "\n",
+       "python\n",
+       "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+       "\n",
+       "\n",
+       "This line of code is a generator expression that yields unique authors from a collection of books. Here's a detailed explanation:\n",
+       "\n",
+       "1. **yield from**:\n",
+       "   - The keyword `yield` is used in Python to create a generator, which can be paused and resumed, returning values one at a time.\n",
+       "   - The `from` keyword indicates that you want to yield items from an iterable (like a list or a set). In this case, it’s yielding from a set comprehension.\n",
+       "\n",
+       "2. **{...}**:\n",
+       "   - The curly braces `{}` indicate that we are creating a set. A set is a collection of unique items in Python, which means it will automatically remove any duplicate entries.\n",
+       "\n",
+       "3. **book.get(\"author\")**:\n",
+       "   - This part accesses the value associated with the key \"author\" in each `book` dictionary. Here, `book` represents each individual item in the `books` collection.\n",
+       "   - The `get` method is used for dictionaries to safely retrieve the value for a given key. If the key doesn't exist, it will return `None` instead of throwing an error.\n",
+       "\n",
+       "4. **for book in books**:\n",
+       "   - This part of the code iterates over each `book` in a collection called `books`. It assumes that `books` is a list (or any iterable) of dictionaries.\n",
+       "\n",
+       "5. **if book.get(\"author\")**:\n",
+       "   - This is a condition that filters the books. It ensures that only books with a valid \"author\" (i.e., not `None` or an empty string) are included in the set comprehension. If `book.get(\"author\")` returns a valid author, the `if` clause evaluates to `True`.\n",
+       "\n",
+       "### Together:\n",
+       "\n",
+       "- The entire line of code creates a set of unique authors from the list of books, while ignoring any books that do not have an author specified.\n",
+       "- The `yield from` statement then yields each author one at a time, allowing you to iterate over them or collect them in another structure without creating a full list in memory.\n",
+       "\n",
+       "### Example:\n",
+       "\n",
+       "If you have the following books:\n",
+       "python\n",
+       "books = [\n",
+       "    {\"title\": \"Book One\", \"author\": \"Alice\"},\n",
+       "    {\"title\": \"Book Two\", \"author\": \"Bob\"},\n",
+       "    {\"title\": \"Book Three\", \"author\": \"Alice\"},\n",
+       "    {\"title\": \"Book Four\", \"author\": None},\n",
+       "]\n",
+       "\n",
+       "\n",
+       "When you run the line of code, the output will yield:\n",
+       "\n",
+       "Alice\n",
+       "Bob\n",
+       "\n",
+       "Output:\n",
+       "- The generator will yield unique authors, which means even though \"Alice\" is listed twice, it will only show up once.\n",
+       "\n",
+       "### Conclusion:\n",
+       "\n",
+       "Essentially, this code is a convenient way to extract a unique list of authors from a dataset of books, making it easy to work with authors throughout your program. It performs this function efficiently without needing extra memory for duplicates. 😊"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "answer_question_with_streaming(question)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get Llama 3.2 to answer\n",
+    "\n",
+    "def answer_question_with_streaming_Ollama(question):\n",
+    "    print(question)\n",
+    "    stream = ollama.chat(\n",
+    "        model=MODEL_LLAMA,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_user_prompt(question)}\n",
+    "        ],\n",
+    "        stream=True\n",
+    "    )\n",
+    "\n",
+    "    response = \"\"\n",
+    "    display_handle = display(Markdown(\"\"), display_id=True)\n",
+    "\n",
+    "    chunk_accumulator = []  # To batch small chunks\n",
+    "    for chunk in stream:\n",
+    "        # Debug: print the full chunk\n",
+    "        #print(chunk)\n",
+    "\n",
+    "        if 'message' in chunk and 'content' in chunk['message']:\n",
+    "            content = chunk['message']['content']\n",
+    "            chunk_accumulator.append(content)\n",
+    "\n",
+    "            # Accumulate the response\n",
+    "            response += content\n",
+    "\n",
+    "            # Update the display periodically (e.g., every 5 chunks)\n",
+    "            if len(chunk_accumulator) >= 5:\n",
+    "                display_content = response.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
+    "                update_display(Markdown(display_content), display_id=display_handle.display_id)\n",
+    "                chunk_accumulator = []  # Reset the accumulator\n",
+    "\n",
+    "    # Final update to display any remaining chunks\n",
+    "    display_content = response.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
+    "    update_display(Markdown(display_content), display_id=display_handle.display_id)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "02644168-1b06-4908-afd1-7ed3aaf7852c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Please explain what this code does and why:\n",
+      "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/markdown": [
+       "Let's break down this complex code step by step:\n",
+       "\n",
+       "### Understanding the Code\n",
+       "python\n",
+       "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+       "\n",
+       "This line of code is using several advanced features of Python, so let's break it down into smaller parts.\n",
+       "\n",
+       "**1. `yield from`**\n",
+       "The word `yield` is used to create a generator function in Python.\n",
+       "A generator function is like a normal function, but instead of returning all the values at once (like a regular list), it returns them one by one.\n",
+       "When you use `yield`, the function doesn't finish executing until it reaches the `yield` keyword.\n",
+       "\n",
+       "**2. `{}`**\n",
+       "These curly brackets are used to create an expression in Python called a **dictionary comprehension**.\n",
+       "A dictionary comprehension is like a regular dictionary, but instead of using the `dict()` function or `{key: value}`, you use this syntax.\n",
+       "\n",
+       "**3. `.get(\"author\") for book in books`**\n",
+       "This part of the code is using another advanced feature called a **generator expression**.\n",
+       "It's similar to a list comprehension, but instead of creating a new list, it creates an iterator that yields each value one by one.\n",
+       "\n",
+       "* `.get(\"author\")`: This method calls the `get()` function on the `book` object and returns the value associated with the key `\"author\"`.\n",
+       "* `for book in books`: This part loops over each item (`book`) in a collection (`books`).\n",
+       "\n",
+       "So, this generator expression takes each book in the `books` list, gets its author (if it exists), and yields that author.\n",
+       "\n",
+       "**4. `yield from {...}`**\n",
+       "When you use `yield from`, it's like saying \"yield all these values, one by one\".\n",
+       "\n",
+       "In this case, the dictionary comprehension `{book.get(\"author\") for book in books if book.get(\"author\")}` is yielding a dictionary with authors as values.\n",
+       "By using `yield from`, we're essentially saying \"yield each author, one by one\".\n",
+       "\n",
+       "**Putting it All Together**\n",
+       "So, what does this code do?\n",
+       "It creates an iterator that yields each author's name from the `books` list, one by one. The resulting iterator is like a stream of authors.\n",
+       "\n",
+       "Here's an example:\n",
+       "python\n",
+       "books = [\n",
+       "    {\"title\": \"Book 1\", \"author\": \"Author A\"},\n",
+       "    {\"title\": \"Book 2\", \"author\": \"Author B\"},\n",
+       "    {\"title\": \"Book 3\"}\n",
+       "]\n",
+       "\n",
+       "for author in yield from {book.get(\"author\") for book in books if book.get(\"author\")}:\n",
+       "    print(author)\n",
+       "\n",
+       "Output:\n",
+       "\n",
+       "Author A\n",
+       "Author B\n",
+       "\n",
+       "Note that this code uses a clever way to iterate over the authors, but it's still just printing each author's name one by one."
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "answer_question_with_streaming_Ollama(myQuestion)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ef6be37-de9a-4cd2-8007-cab3f2abdb67",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}