Skip to content

Commit

Permalink
Merge pull request #524 from bounswe/feature/BE-enhance-news
Browse files Browse the repository at this point in the history
feat: Add enhanced news implementation
  • Loading branch information
rukiyeaslan authored Dec 16, 2024
2 parents 261818f + 07458ee commit b883426
Showing 1 changed file with 111 additions and 23 deletions.
134 changes: 111 additions & 23 deletions backend/news/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@
from rest_framework.response import Response
from .serializers import NewsSerializer
from bs4 import BeautifulSoup
from html.parser import HTMLParser


class HTMLContentParser(HTMLParser):
def __init__(self):
super().__init__()
self.image_url = None

def handle_starttag(self, tag, attrs):
if tag == "img" and self.image_url is None:
for attr in attrs:
if attr[0] == "src":
self.image_url = attr[1]


class NewsView(generics.CreateAPIView):
serializer_class = NewsSerializer
Expand All @@ -14,52 +28,126 @@ def post(self, request):

urls = {
"financial times": "https://www.ft.com/rss/home",
"stock market": "https://www.spglobal.com/spdji/en/rss/rss-details/?rssFeedName=all-indices",
"cryptocurrency": "https://cointelegraph.com/rss"
"cryptocurrency": "https://cointelegraph.com/rss",
"comprehensive financial news": "http://feeds.benzinga.com/benzinga",
"financeasia": "https://www.financeasia.com/rss/latest",
"expert analysis": "https://moneyweek.com/feed/all",
"turkey": "https://www.ntv.com.tr/ekonomi.rss",
}

if feed_name not in urls:
return Response(
{"error": "Feed name not found. Available options are: 'financial times', 'stock market', 'cryptocurrency'."},
status=status.HTTP_404_NOT_FOUND
{"error": f"Feed name not found. Available options are: {', '.join(urls.keys())}."},
status=status.HTTP_404_NOT_FOUND,
)

url = urls[feed_name]
feed = feedparser.parse(url)
try:
feed = feedparser.parse(url)
num_entries = 30
entries = feed.entries[:num_entries] if num_entries else feed.entries
except Exception as e:
return Response({"error": f"Failed to fetch feed: {str(e)}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

response = []
for entry in feed.entries:
if feed_name == "financial times":

response_entry = {
for entry in entries:
try:
if feed_name == "financial times":
response_entry = {
"title": entry.get("title", "No title available"),
"link": entry.get("link", "#"),
"author": entry.get("author", "Financial Times"),
"published": entry.get("published", "No publish date available"),
"description": entry.get("summary", "No summary available"),
"image": entry.get("media_thumbnail")[0]['url'] if entry.get("media_thumbnail") and len(entry.get("media_thumbnail")) > 0 else ""
"image": entry.get("media_thumbnail")[0]['url']
if entry.get("media_thumbnail") and len(entry.get("media_thumbnail")) > 0
else "",
}

elif feed_name == "cryptocurrency":

html_content = entry['summary_detail']['value']
elif feed_name == "cryptocurrency":
html_content = entry['summary_detail']['value']
soup = BeautifulSoup(html_content, 'html.parser')
paragraphs = soup.find_all('p')
summary_text = paragraphs[1].get_text(strip=True) if len(paragraphs) > 1 else ''
response_entry = {
"title": entry.get("title", "No title available"),
"link": entry.get("link", "#"),
"author": entry.get("author", "Unknown"),
"published": entry.get("published", "No publish date available"),
"description": summary_text,
"image": entry.get("media_content")[0]['url']
if entry.get("media_content") and len(entry.get("media_content")) > 0
else "",
}

soup = BeautifulSoup(html_content, 'html.parser')
elif feed_name == "comprehensive financial news":
media_content = entry.get("media_content", [])
image_url = media_content[0]["url"] if media_content and isinstance(media_content, list) else ""
response_entry = {
"title": entry.get("title", "No title available"),
"link": entry.get("link", "#"),
"author": entry.get("dc:creator", "Unknown"),
"published": entry.get("pubDate", "No publish date available"),
"description": BeautifulSoup(entry.get("description", ""), "html.parser").get_text(strip=True),
"image": image_url,
}

paragraphs = soup.find_all('p')
if len(paragraphs) > 1:
summary_text = paragraphs[1].get_text(strip=True)
else:
summary_text = ''
response_entry = {
elif feed_name == "financeasia":
raw_description = entry.get("description", "")
soup = BeautifulSoup(raw_description, "html.parser")
image_tag = soup.find("img")
image_url = image_tag["src"] if image_tag else ""
clean_description = soup.get_text(strip=True)
response_entry = {
"title": entry.get("title", "No title available"),
"link": entry.get("link", "#"),
"author": entry.get("author", "Unknown"),
"published": entry.get("pubDate", "No publish date available"),
"description": clean_description,
"image": image_url,
}

elif feed_name == "expert analysis":
response_entry = {
"title": entry.title,
"link": entry.link,
"author": entry.get("author", "Unknown"),
"published": entry.published if "published" in entry else "Unknown",
"description": entry.description if "description" in entry else "",
"image": "",
}
if "enclosures" in entry:
for enclosure in entry.enclosures:
if enclosure.get("type", "").startswith("image/"):
response_entry["image"] = enclosure["url"]
break

elif feed_name == "turkey":
content_list = entry.get("content", [])
content_html = content_list[0].get("value", "") if content_list and isinstance(content_list[0], dict) else ""
parser = HTMLContentParser()
parser.feed(content_html)
image_url = parser.image_url if parser.image_url else ""
soup = BeautifulSoup(content_html, "html.parser")
description_text = soup.get_text(separator="\n").strip()
response_entry = {
"title": entry.get("title", "No title available"),
"link": entry.get("link", "#"),
"author": entry.get("author", "NTV"),
"published": entry.get("published", "No publish date available"),
"description": summary_text,
"image": entry.get("media_content")[0]['url'] if entry.get("media_content") and len(entry.get("media_content")) > 0 else ""
"description": description_text,
"image": image_url,
}

response.append(response_entry)
else:
continue

response.append(response_entry)

except Exception as e:
response.append({
"title": "Error parsing entry",
"error": str(e),
})

return Response(response, status=status.HTTP_200_OK)

0 comments on commit b883426

Please sign in to comment.