incorporate external context from DDG
This commit is contained in:
@@ -215,7 +215,9 @@ def curate_from_google_trends(geo_list=['US']):
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
scoring_content = f"{title}\n\n{summary}"
|
||||
# Fetch additional context via DDG
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
if interest_score < 6:
|
||||
@@ -227,8 +229,9 @@ def curate_from_google_trends(geo_list=['US']):
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
||||
f"Do NOT introduce unrelated concepts.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
@@ -291,8 +294,7 @@ def curate_from_google_trends(geo_list=['US']):
|
||||
share_text_encoded = quote(share_text)
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
is_posting = True
|
||||
try:
|
||||
post_to_wp(
|
||||
|
||||
+34
-10
@@ -8,6 +8,7 @@ import json
|
||||
import signal
|
||||
import sys
|
||||
import re
|
||||
from duckduckgo_search import DDGS
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from openai import OpenAI
|
||||
from urllib.parse import quote
|
||||
@@ -169,6 +170,30 @@ def get_top_comments(post_url, reddit, limit=3):
|
||||
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
||||
return []
|
||||
|
||||
def fetch_duckduckgo_news_context(title, hours=24):
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||
titles = []
|
||||
for r in results:
|
||||
try:
|
||||
date_str = r["date"]
|
||||
if '+00:00' in date_str:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||
titles.append(r["title"].lower())
|
||||
except ValueError as e:
|
||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||
continue
|
||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||
return context
|
||||
except Exception as e:
|
||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||
return title
|
||||
|
||||
def fetch_reddit_posts():
|
||||
reddit = praw.Reddit(
|
||||
client_id=REDDIT_CLIENT_ID,
|
||||
@@ -211,7 +236,7 @@ def curate_from_reddit():
|
||||
if not articles:
|
||||
print("No Reddit posts available")
|
||||
logging.info("No Reddit posts available")
|
||||
return None, None, None
|
||||
return None, None, random.randint(600, 1800)
|
||||
|
||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||
|
||||
@@ -249,6 +274,9 @@ def curate_from_reddit():
|
||||
continue
|
||||
|
||||
top_comments = get_top_comments(link, reddit, limit=3)
|
||||
# Fetch additional context via DDG
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting_reddit(
|
||||
title,
|
||||
summary,
|
||||
@@ -266,15 +294,13 @@ def curate_from_reddit():
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
||||
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
content_to_summarize = f"{title}\n\n{summary}"
|
||||
if top_comments:
|
||||
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
||||
|
||||
final_summary = summarize_with_gpt4o(
|
||||
content_to_summarize,
|
||||
@@ -300,7 +326,6 @@ def curate_from_reddit():
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
|
||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
|
||||
|
||||
# Generate viral share prompt
|
||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||
@@ -309,7 +334,7 @@ def curate_from_reddit():
|
||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||
)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||
|
||||
global is_posting
|
||||
is_posting = True
|
||||
@@ -335,8 +360,7 @@ def curate_from_reddit():
|
||||
share_text_encoded = quote(share_text)
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
is_posting = True
|
||||
try:
|
||||
post_to_wp(
|
||||
|
||||
+34
-18
@@ -9,6 +9,8 @@ import signal
|
||||
import sys
|
||||
import re
|
||||
import email.utils
|
||||
import feedparser
|
||||
from duckduckgo_search import DDGS
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from bs4 import BeautifulSoup
|
||||
from openai import OpenAI
|
||||
@@ -136,6 +138,7 @@ def fetch_rss_feeds():
|
||||
logging.error("RSS_FEEDS is empty in foodie_config.py")
|
||||
return articles
|
||||
|
||||
logging.info(f"Processing feeds: {RSS_FEEDS}")
|
||||
for feed_url in RSS_FEEDS:
|
||||
logging.info(f"Processing feed: {feed_url}")
|
||||
try:
|
||||
@@ -182,6 +185,30 @@ def fetch_rss_feeds():
|
||||
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
||||
return articles
|
||||
|
||||
def fetch_duckduckgo_news_context(title, hours=24):
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||
titles = []
|
||||
for r in results:
|
||||
try:
|
||||
date_str = r["date"]
|
||||
if '+00:00' in date_str:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||
titles.append(r["title"].lower())
|
||||
except ValueError as e:
|
||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||
continue
|
||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||
return context
|
||||
except Exception as e:
|
||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||
return title
|
||||
|
||||
def curate_from_rss():
|
||||
articles = fetch_rss_feeds()
|
||||
if not articles:
|
||||
@@ -216,10 +243,12 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
|
||||
# Fetch additional context via DDG
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
if interest_score < 6:
|
||||
if interest_score < 7:
|
||||
print(f"RSS Interest Too Low: {interest_score}")
|
||||
logging.info(f"RSS Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
@@ -228,8 +257,9 @@ def curate_from_rss():
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
||||
f"Do NOT introduce unrelated concepts.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||
f"Expand on the core idea with relevant context about its appeal or significance.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
@@ -246,20 +276,6 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Remove the original title from the summary while preserving paragraphs
|
||||
title_pattern = re.compile(
|
||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
||||
re.IGNORECASE
|
||||
)
|
||||
paragraphs = final_summary.split('\n')
|
||||
cleaned_paragraphs = []
|
||||
for para in paragraphs:
|
||||
if para.strip():
|
||||
cleaned_para = title_pattern.sub('', para).strip()
|
||||
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
||||
cleaned_paragraphs.append(cleaned_para)
|
||||
final_summary = '\n'.join(cleaned_paragraphs)
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||
if not post_data:
|
||||
|
||||
+4
-4
@@ -612,10 +612,10 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
target_para = random.choice([p for p in paragraphs if p.strip()])
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
phrases = [
|
||||
f"Learn more from {link_pattern}",
|
||||
f"{link_pattern} shares this insight",
|
||||
f"Discover more at {link_pattern}",
|
||||
f"Check out {link_pattern} for details"
|
||||
f"According to {link_pattern}", # Changed to a more neutral phrasing
|
||||
f"{link_pattern} notes this insight", # Adjusted phrasing
|
||||
f"Details shared by {link_pattern}", # Adjusted phrasing
|
||||
f"Source: {link_pattern}" # Simple attribution
|
||||
]
|
||||
insertion_phrase = random.choice(phrases)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user