Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

week1-day2 exercise #18

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
70 changes: 58 additions & 12 deletions week1/Guide to Jupyter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "33d37cd8-55c9-4e03-868c-34aa9cab2c80",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Click anywhere in this cell and press Shift + Return\n",
"\n",
Expand All @@ -54,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "585eb9c1-85ee-4c27-8dc2-b4d8d022eda0",
"metadata": {},
"outputs": [],
Expand All @@ -66,10 +77,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "07792faa-761d-46cb-b9b7-2bbf70bb1628",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'bananas'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The result of the last statement is shown after you run it\n",
"\n",
Expand All @@ -78,10 +100,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "a067d2b1-53d5-4aeb-8a3c-574d39ff654a",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My favorite fruit is anything but anything but bananas\n"
]
}
],
"source": [
"# Use the variable\n",
"\n",
Expand All @@ -90,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "4c5a4e60-b7f4-4953-9e80-6d84ba4664ad",
"metadata": {},
"outputs": [],
Expand All @@ -116,10 +146,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "8e5ec81d-7c5b-4025-bd2e-468d67b581b6",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My favorite fruit is anything but anything but bananas\n"
]
}
],
"source": [
"# Then run this cell twice, and see if you understand what's going on\n",
"\n",
Expand All @@ -144,10 +182,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My favorite fruit is apples\n"
]
}
],
"source": [
"print(f\"My favorite fruit is {favorite_fruit}\")"
]
Expand Down
20 changes: 15 additions & 5 deletions week1/day1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,8 @@
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" \n",
" \n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
Expand All @@ -178,7 +176,8 @@
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")\n",
"ed = Website(\"https://jaivikhimalay.com\")\n",
"#ed = Website(\"https://edwarddonner.com\")\n",
"print(ed.title)\n",
"print(ed.text)"
]
Expand Down Expand Up @@ -308,6 +307,7 @@
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" \n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
Expand Down Expand Up @@ -474,6 +474,16 @@
"id": "682eff74-55c4-4d4b-b267-703edbc293c7",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://jaivikhimalay.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2df0c85f-3815-4c7b-bc17-3fb61aeb75f6",
"metadata": {},
"outputs": [],
"source": []
}
],
Expand Down
172 changes: 172 additions & 0 deletions week1/day2 EXERCISE.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,178 @@
"\n",
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00496450-b078-469c-8072-4cb633df9928",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "37aea1a6-2d35-4c8b-9264-43da5884d069",
"metadata": {},
"outputs": [],
"source": [
"# Constants\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "383f8e2a-c98d-48bc-9c3f-ece4ef077ec9",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped\n",
" \"\"\"\n",
" url: str\n",
" title: str\n",
" text: str\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36e99eba-5758-4471-8860-2f5a42771e6b",
"metadata": {},
"outputs": [],
"source": [
"ed = Website(\"https://jaivikhimalay.com\")\n",
"print(ed.url)\n",
"print(ed.title)\n",
"print(ed.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "793f8c35-fbde-4160-90b5-a2fd8ea5f5e6",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecbfa0e2-39c2-4467-989b-4ab9810de72d",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"The contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf5a1da8-1628-4bbb-a2f8-e06409fe4e45",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fc9040a8-0a62-4658-94d9-0674b9045986",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the Ollama function instead of OpenAI\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" messages = messages_for(website)\n",
" response = ollama.chat(model=MODEL, messages=messages)\n",
" return response['message']['content']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fc2aff45-ba1d-44f8-8b5d-f34934eb1ee8",
"metadata": {},
"outputs": [],
"source": [
"summarize(\"https://jaivikhimalay.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef0e940d-fed4-417b-8d95-4a0fd3b9b3d7",
"metadata": {},
"outputs": [],
"source": [
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "086296f1-c748-4747-93d2-554732392400",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://jaivikhimalay.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83e150e1-5b2c-45b7-9154-aae5b55943af",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading