incorporate external context from DDG
This commit is contained in:
+34
-18
@@ -9,6 +9,8 @@ import signal
|
||||
import sys
|
||||
import re
|
||||
import email.utils
|
||||
import feedparser
|
||||
from duckduckgo_search import DDGS
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from bs4 import BeautifulSoup
|
||||
from openai import OpenAI
|
||||
@@ -136,6 +138,7 @@ def fetch_rss_feeds():
|
||||
logging.error("RSS_FEEDS is empty in foodie_config.py")
|
||||
return articles
|
||||
|
||||
logging.info(f"Processing feeds: {RSS_FEEDS}")
|
||||
for feed_url in RSS_FEEDS:
|
||||
logging.info(f"Processing feed: {feed_url}")
|
||||
try:
|
||||
@@ -182,6 +185,30 @@ def fetch_rss_feeds():
|
||||
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
||||
return articles
|
||||
|
||||
def fetch_duckduckgo_news_context(title, hours=24):
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||
titles = []
|
||||
for r in results:
|
||||
try:
|
||||
date_str = r["date"]
|
||||
if '+00:00' in date_str:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||
titles.append(r["title"].lower())
|
||||
except ValueError as e:
|
||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||
continue
|
||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||
return context
|
||||
except Exception as e:
|
||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||
return title
|
||||
|
||||
def curate_from_rss():
|
||||
articles = fetch_rss_feeds()
|
||||
if not articles:
|
||||
@@ -216,10 +243,12 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
|
||||
# Fetch additional context via DDG
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
if interest_score < 6:
|
||||
if interest_score < 7:
|
||||
print(f"RSS Interest Too Low: {interest_score}")
|
||||
logging.info(f"RSS Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
@@ -228,8 +257,9 @@ def curate_from_rss():
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
||||
f"Do NOT introduce unrelated concepts.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||
f"Expand on the core idea with relevant context about its appeal or significance.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
@@ -246,20 +276,6 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Remove the original title from the summary while preserving paragraphs
|
||||
title_pattern = re.compile(
|
||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
||||
re.IGNORECASE
|
||||
)
|
||||
paragraphs = final_summary.split('\n')
|
||||
cleaned_paragraphs = []
|
||||
for para in paragraphs:
|
||||
if para.strip():
|
||||
cleaned_para = title_pattern.sub('', para).strip()
|
||||
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
||||
cleaned_paragraphs.append(cleaned_para)
|
||||
final_summary = '\n'.join(cleaned_paragraphs)
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||
if not post_data:
|
||||
|
||||
Reference in New Issue
Block a user