|
|
|
|
@ -8,6 +8,7 @@ import json |
|
|
|
|
import signal |
|
|
|
|
import sys |
|
|
|
|
import re |
|
|
|
|
from duckduckgo_search import DDGS |
|
|
|
|
from datetime import datetime, timedelta, timezone |
|
|
|
|
from openai import OpenAI |
|
|
|
|
from urllib.parse import quote |
|
|
|
|
@ -169,6 +170,30 @@ def get_top_comments(post_url, reddit, limit=3): |
|
|
|
|
logging.error(f"Failed to fetch comments for {post_url}: {e}") |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
def fetch_duckduckgo_news_context(title, hours=24): |
|
|
|
|
try: |
|
|
|
|
with DDGS() as ddgs: |
|
|
|
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5) |
|
|
|
|
titles = [] |
|
|
|
|
for r in results: |
|
|
|
|
try: |
|
|
|
|
date_str = r["date"] |
|
|
|
|
if '+00:00' in date_str: |
|
|
|
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) |
|
|
|
|
else: |
|
|
|
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) |
|
|
|
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): |
|
|
|
|
titles.append(r["title"].lower()) |
|
|
|
|
except ValueError as e: |
|
|
|
|
logging.warning(f"Date parsing failed for '{date_str}': {e}") |
|
|
|
|
continue |
|
|
|
|
context = " ".join(titles) if titles else "No recent news found within 24 hours" |
|
|
|
|
logging.info(f"DuckDuckGo News context for '{title}': {context}") |
|
|
|
|
return context |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}") |
|
|
|
|
return title |
|
|
|
|
|
|
|
|
|
def fetch_reddit_posts(): |
|
|
|
|
reddit = praw.Reddit( |
|
|
|
|
client_id=REDDIT_CLIENT_ID, |
|
|
|
|
@ -211,7 +236,7 @@ def curate_from_reddit(): |
|
|
|
|
if not articles: |
|
|
|
|
print("No Reddit posts available") |
|
|
|
|
logging.info("No Reddit posts available") |
|
|
|
|
return None, None, None |
|
|
|
|
return None, None, random.randint(600, 1800) |
|
|
|
|
|
|
|
|
|
articles.sort(key=lambda x: x["upvotes"], reverse=True) |
|
|
|
|
|
|
|
|
|
@ -249,6 +274,9 @@ def curate_from_reddit(): |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
top_comments = get_top_comments(link, reddit, limit=3) |
|
|
|
|
# Fetch additional context via DDG |
|
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
|
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}" |
|
|
|
|
interest_score = is_interesting_reddit( |
|
|
|
|
title, |
|
|
|
|
summary, |
|
|
|
|
@ -266,15 +294,13 @@ def curate_from_reddit(): |
|
|
|
|
num_paragraphs = determine_paragraph_count(interest_score) |
|
|
|
|
extra_prompt = ( |
|
|
|
|
f"Generate exactly {num_paragraphs} paragraphs.\n" |
|
|
|
|
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" |
|
|
|
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" |
|
|
|
|
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n" |
|
|
|
|
f"Do NOT introduce unrelated concepts unless in the content or comments.\n" |
|
|
|
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" |
|
|
|
|
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n" |
|
|
|
|
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n" |
|
|
|
|
f"Do not include emojis in the summary." |
|
|
|
|
) |
|
|
|
|
content_to_summarize = f"{title}\n\n{summary}" |
|
|
|
|
if top_comments: |
|
|
|
|
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" |
|
|
|
|
|
|
|
|
|
final_summary = summarize_with_gpt4o( |
|
|
|
|
content_to_summarize, |
|
|
|
|
@ -300,7 +326,6 @@ def curate_from_reddit(): |
|
|
|
|
image_url, image_source, uploader, page_url = get_image(image_query) |
|
|
|
|
|
|
|
|
|
hook = get_dynamic_hook(post_data["title"]).strip() |
|
|
|
|
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None) |
|
|
|
|
|
|
|
|
|
# Generate viral share prompt |
|
|
|
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) |
|
|
|
|
@ -309,7 +334,7 @@ def curate_from_reddit(): |
|
|
|
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' |
|
|
|
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' |
|
|
|
|
) |
|
|
|
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content |
|
|
|
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}" |
|
|
|
|
|
|
|
|
|
global is_posting |
|
|
|
|
is_posting = True |
|
|
|
|
@ -335,8 +360,7 @@ def curate_from_reddit(): |
|
|
|
|
share_text_encoded = quote(share_text) |
|
|
|
|
post_url_encoded = quote(post_url) |
|
|
|
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) |
|
|
|
|
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url) |
|
|
|
|
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content |
|
|
|
|
post_data["content"] = f"{final_summary}\n\n{share_links}" |
|
|
|
|
is_posting = True |
|
|
|
|
try: |
|
|
|
|
post_to_wp( |
|
|
|
|
|