From 9243de9ee6e5f279fcb3a7f9892dbe12c0c51e1e Mon Sep 17 00:00:00 2001 From: Lavanya Bhargava <162597096+lvb05@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:45:51 +0530 Subject: [PATCH] Added publication date and error handling - Modified it to include the publication date for each article (if available). - Added error handling for failed requests using `requests.exceptions.RequestException`. - Enhanced the HTML output to display the publication date of each article alongside the title, link, and summary. - The HTML document is saved with a timestamped filename to ensure uniqueness. --- Scripts/techStream/techStream.py | 36 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/Scripts/techStream/techStream.py b/Scripts/techStream/techStream.py index 727448a..e4bd586 100644 --- a/Scripts/techStream/techStream.py +++ b/Scripts/techStream/techStream.py @@ -11,19 +11,29 @@ # Loop through each URL and scrape the latest articles for url in urls: - response = requests.get(url) - soup = BeautifulSoup(response.content, features="xml") - - # Extract the title, link, and summary of the latest articles - articles = soup.find_all("item")[:5] - for article in articles: - title = article.find("title").text - link = article.find("link").text - summary = article.find("description").text - - # Add the title, link, and summary to the HTML document - html += "

" + title + "

" - html += "

" + summary + "

" + try: + response = requests.get(url) + response.raise_for_status() #check for successful request + soup = BeautifulSoup(response.content, features="xml") + # Extract the title, link, and summary of the latest articles + articles = soup.find_all("item")[:5] #latest 5 articles + for article in articles: + title = article.find("title").text + link = article.find("link").text + summary = article.find("description").text + + # extracting publivcation date(if there) + pub_date=article.find("pubDate") + pub_date = pub_date.text if pub_date else "No Date Available" + + # Add the title, link, summary and publication date to the HTML document + html += f"

{title}

" + html += f"

Published on: {pub_date}

" + html += f"

{summary}

" + except requests.exceptions.RequestException as e: + print(f"Error fetching {url}: {e}") + + # Close the HTML document html += ""