incorporate external context from DDG

This commit is contained in:
2025-05-04 09:07:45 +10:00
parent 427a5cb919
commit e5ebd000fe
4 changed files with 79 additions and 37 deletions
+34 -10
View File
@@ -8,6 +8,7 @@ import json
import signal
import sys
import re
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from urllib.parse import quote
@@ -168,6 +169,30 @@ def get_top_comments(post_url, reddit, limit=3):
except Exception as e:
logging.error(f"Failed to fetch comments for {post_url}: {e}")
return []
def fetch_duckduckgo_news_context(title, hours=24):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
return title
def fetch_reddit_posts():
reddit = praw.Reddit(
@@ -211,7 +236,7 @@ def curate_from_reddit():
if not articles:
print("No Reddit posts available")
logging.info("No Reddit posts available")
return None, None, None
return None, None, random.randint(600, 1800)
articles.sort(key=lambda x: x["upvotes"], reverse=True)
@@ -249,6 +274,9 @@ def curate_from_reddit():
continue
top_comments = get_top_comments(link, reddit, limit=3)
# Fetch additional context via DDG
ddg_context = fetch_duckduckgo_news_context(title)
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting_reddit(
title,
summary,
@@ -266,15 +294,13 @@ def curate_from_reddit():
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = f"{title}\n\n{summary}"
if top_comments:
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
final_summary = summarize_with_gpt4o(
content_to_summarize,
@@ -300,7 +326,6 @@ def curate_from_reddit():
image_url, image_source, uploader, page_url = get_image(image_query)
hook = get_dynamic_hook(post_data["title"]).strip()
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
@@ -309,7 +334,7 @@ def curate_from_reddit():
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
is_posting = True
@@ -335,8 +360,7 @@ def curate_from_reddit():
share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
post_data["content"] = f"{final_summary}\n\n{share_links}"
is_posting = True
try:
post_to_wp(