incorporate external context from DDG

main
Shane 7 months ago
parent 427a5cb919
commit e5ebd000fe
  1. 12
      foodie_automator_google.py
  2. 44
      foodie_automator_reddit.py
  3. 52
      foodie_automator_rss.py
  4. 8
      foodie_utils.py

@ -215,7 +215,9 @@ def curate_from_google_trends(geo_list=['US']):
attempts += 1 attempts += 1
continue continue
scoring_content = f"{title}\n\n{summary}" # Fetch additional context via DDG
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6: if interest_score < 6:
@ -227,8 +229,9 @@ def curate_from_google_trends(geo_list=['US']):
num_paragraphs = determine_paragraph_count(interest_score) num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = ( extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n" f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Do NOT introduce unrelated concepts.\n" f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary." f"Do not include emojis in the summary."
) )
@ -291,8 +294,7 @@ def curate_from_google_trends(geo_list=['US']):
share_text_encoded = quote(share_text) share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url) post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url) post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
is_posting = True is_posting = True
try: try:
post_to_wp( post_to_wp(

@ -8,6 +8,7 @@ import json
import signal import signal
import sys import sys
import re import re
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from openai import OpenAI from openai import OpenAI
from urllib.parse import quote from urllib.parse import quote
@ -169,6 +170,30 @@ def get_top_comments(post_url, reddit, limit=3):
logging.error(f"Failed to fetch comments for {post_url}: {e}") logging.error(f"Failed to fetch comments for {post_url}: {e}")
return [] return []
def fetch_duckduckgo_news_context(title, hours=24):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
return title
def fetch_reddit_posts(): def fetch_reddit_posts():
reddit = praw.Reddit( reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID, client_id=REDDIT_CLIENT_ID,
@ -211,7 +236,7 @@ def curate_from_reddit():
if not articles: if not articles:
print("No Reddit posts available") print("No Reddit posts available")
logging.info("No Reddit posts available") logging.info("No Reddit posts available")
return None, None, None return None, None, random.randint(600, 1800)
articles.sort(key=lambda x: x["upvotes"], reverse=True) articles.sort(key=lambda x: x["upvotes"], reverse=True)
@ -249,6 +274,9 @@ def curate_from_reddit():
continue continue
top_comments = get_top_comments(link, reddit, limit=3) top_comments = get_top_comments(link, reddit, limit=3)
# Fetch additional context via DDG
ddg_context = fetch_duckduckgo_news_context(title)
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting_reddit( interest_score = is_interesting_reddit(
title, title,
summary, summary,
@ -266,15 +294,13 @@ def curate_from_reddit():
num_paragraphs = determine_paragraph_count(interest_score) num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = ( extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n" f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n" f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
f"Do NOT introduce unrelated concepts unless in the content or comments.\n" f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n" f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary." f"Do not include emojis in the summary."
) )
content_to_summarize = f"{title}\n\n{summary}"
if top_comments:
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
final_summary = summarize_with_gpt4o( final_summary = summarize_with_gpt4o(
content_to_summarize, content_to_summarize,
@ -300,7 +326,6 @@ def curate_from_reddit():
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query)
hook = get_dynamic_hook(post_data["title"]).strip() hook = get_dynamic_hook(post_data["title"]).strip()
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
# Generate viral share prompt # Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
@ -309,7 +334,7 @@ def curate_from_reddit():
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
) )
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting global is_posting
is_posting = True is_posting = True
@ -335,8 +360,7 @@ def curate_from_reddit():
share_text_encoded = quote(share_text) share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url) post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url) post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
is_posting = True is_posting = True
try: try:
post_to_wp( post_to_wp(

@ -9,6 +9,8 @@ import signal
import sys import sys
import re import re
import email.utils import email.utils
import feedparser
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from openai import OpenAI from openai import OpenAI
@ -136,6 +138,7 @@ def fetch_rss_feeds():
logging.error("RSS_FEEDS is empty in foodie_config.py") logging.error("RSS_FEEDS is empty in foodie_config.py")
return articles return articles
logging.info(f"Processing feeds: {RSS_FEEDS}")
for feed_url in RSS_FEEDS: for feed_url in RSS_FEEDS:
logging.info(f"Processing feed: {feed_url}") logging.info(f"Processing feed: {feed_url}")
try: try:
@ -182,6 +185,30 @@ def fetch_rss_feeds():
logging.info(f"Total RSS articles fetched: {len(articles)}") logging.info(f"Total RSS articles fetched: {len(articles)}")
return articles return articles
def fetch_duckduckgo_news_context(title, hours=24):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
return title
def curate_from_rss(): def curate_from_rss():
articles = fetch_rss_feeds() articles = fetch_rss_feeds()
if not articles: if not articles:
@ -216,10 +243,12 @@ def curate_from_rss():
attempts += 1 attempts += 1
continue continue
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}" # Fetch additional context via DDG
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6: if interest_score < 7:
print(f"RSS Interest Too Low: {interest_score}") print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}") logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1 attempts += 1
@ -228,8 +257,9 @@ def curate_from_rss():
num_paragraphs = determine_paragraph_count(interest_score) num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = ( extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n" f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Do NOT introduce unrelated concepts.\n" f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance.\n" f"Expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary." f"Do not include emojis in the summary."
) )
@ -246,20 +276,6 @@ def curate_from_rss():
attempts += 1 attempts += 1
continue continue
# Remove the original title from the summary while preserving paragraphs
title_pattern = re.compile(
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
re.IGNORECASE
)
paragraphs = final_summary.split('\n')
cleaned_paragraphs = []
for para in paragraphs:
if para.strip():
cleaned_para = title_pattern.sub('', para).strip()
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
cleaned_paragraphs.append(cleaned_para)
final_summary = '\n'.join(cleaned_paragraphs)
final_summary = insert_link_naturally(final_summary, source_name, link) final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data: if not post_data:

@ -612,10 +612,10 @@ def insert_link_naturally(summary, source_name, source_url):
target_para = random.choice([p for p in paragraphs if p.strip()]) target_para = random.choice([p for p in paragraphs if p.strip()])
link_pattern = f'<a href="{source_url}">{source_name}</a>' link_pattern = f'<a href="{source_url}">{source_name}</a>'
phrases = [ phrases = [
f"Learn more from {link_pattern}", f"According to {link_pattern}", # Changed to a more neutral phrasing
f"{link_pattern} shares this insight", f"{link_pattern} notes this insight", # Adjusted phrasing
f"Discover more at {link_pattern}", f"Details shared by {link_pattern}", # Adjusted phrasing
f"Check out {link_pattern} for details" f"Source: {link_pattern}" # Simple attribution
] ]
insertion_phrase = random.choice(phrases) insertion_phrase = random.choice(phrases)

Loading…
Cancel
Save