incorporate external context from DDG
This commit is contained in:
@@ -215,7 +215,9 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scoring_content = f"{title}\n\n{summary}"
|
# Fetch additional context via DDG
|
||||||
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting(scoring_content)
|
interest_score = is_interesting(scoring_content)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 6:
|
||||||
@@ -227,8 +229,9 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Do NOT introduce unrelated concepts.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
)
|
||||||
@@ -291,8 +294,7 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
|
|||||||
+34
-10
@@ -8,6 +8,7 @@ import json
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
from duckduckgo_search import DDGS
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
@@ -168,6 +169,30 @@ def get_top_comments(post_url, reddit, limit=3):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def fetch_duckduckgo_news_context(title, hours=24):
|
||||||
|
try:
|
||||||
|
with DDGS() as ddgs:
|
||||||
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||||
|
titles = []
|
||||||
|
for r in results:
|
||||||
|
try:
|
||||||
|
date_str = r["date"]
|
||||||
|
if '+00:00' in date_str:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
|
else:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||||
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
|
titles.append(r["title"].lower())
|
||||||
|
except ValueError as e:
|
||||||
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
|
continue
|
||||||
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
|
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||||
|
return context
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||||
|
return title
|
||||||
|
|
||||||
def fetch_reddit_posts():
|
def fetch_reddit_posts():
|
||||||
reddit = praw.Reddit(
|
reddit = praw.Reddit(
|
||||||
@@ -211,7 +236,7 @@ def curate_from_reddit():
|
|||||||
if not articles:
|
if not articles:
|
||||||
print("No Reddit posts available")
|
print("No Reddit posts available")
|
||||||
logging.info("No Reddit posts available")
|
logging.info("No Reddit posts available")
|
||||||
return None, None, None
|
return None, None, random.randint(600, 1800)
|
||||||
|
|
||||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||||
|
|
||||||
@@ -249,6 +274,9 @@ def curate_from_reddit():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
top_comments = get_top_comments(link, reddit, limit=3)
|
top_comments = get_top_comments(link, reddit, limit=3)
|
||||||
|
# Fetch additional context via DDG
|
||||||
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
|
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting_reddit(
|
interest_score = is_interesting_reddit(
|
||||||
title,
|
title,
|
||||||
summary,
|
summary,
|
||||||
@@ -266,15 +294,13 @@ def curate_from_reddit():
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
||||||
f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
|
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
||||||
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
)
|
||||||
content_to_summarize = f"{title}\n\n{summary}"
|
|
||||||
if top_comments:
|
|
||||||
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
|
||||||
|
|
||||||
final_summary = summarize_with_gpt4o(
|
final_summary = summarize_with_gpt4o(
|
||||||
content_to_summarize,
|
content_to_summarize,
|
||||||
@@ -300,7 +326,6 @@ def curate_from_reddit():
|
|||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
|
|
||||||
|
|
||||||
# Generate viral share prompt
|
# Generate viral share prompt
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
@@ -309,7 +334,7 @@ def curate_from_reddit():
|
|||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
)
|
)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
global is_posting
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
@@ -335,8 +360,7 @@ def curate_from_reddit():
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
|
|||||||
+34
-18
@@ -9,6 +9,8 @@ import signal
|
|||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
import email.utils
|
import email.utils
|
||||||
|
import feedparser
|
||||||
|
from duckduckgo_search import DDGS
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
@@ -136,6 +138,7 @@ def fetch_rss_feeds():
|
|||||||
logging.error("RSS_FEEDS is empty in foodie_config.py")
|
logging.error("RSS_FEEDS is empty in foodie_config.py")
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
|
logging.info(f"Processing feeds: {RSS_FEEDS}")
|
||||||
for feed_url in RSS_FEEDS:
|
for feed_url in RSS_FEEDS:
|
||||||
logging.info(f"Processing feed: {feed_url}")
|
logging.info(f"Processing feed: {feed_url}")
|
||||||
try:
|
try:
|
||||||
@@ -182,6 +185,30 @@ def fetch_rss_feeds():
|
|||||||
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
|
def fetch_duckduckgo_news_context(title, hours=24):
|
||||||
|
try:
|
||||||
|
with DDGS() as ddgs:
|
||||||
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||||
|
titles = []
|
||||||
|
for r in results:
|
||||||
|
try:
|
||||||
|
date_str = r["date"]
|
||||||
|
if '+00:00' in date_str:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
|
else:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||||
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
|
titles.append(r["title"].lower())
|
||||||
|
except ValueError as e:
|
||||||
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
|
continue
|
||||||
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
|
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||||
|
return context
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||||
|
return title
|
||||||
|
|
||||||
def curate_from_rss():
|
def curate_from_rss():
|
||||||
articles = fetch_rss_feeds()
|
articles = fetch_rss_feeds()
|
||||||
if not articles:
|
if not articles:
|
||||||
@@ -216,10 +243,12 @@ def curate_from_rss():
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
|
# Fetch additional context via DDG
|
||||||
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
|
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting(scoring_content)
|
interest_score = is_interesting(scoring_content)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 7:
|
||||||
print(f"RSS Interest Too Low: {interest_score}")
|
print(f"RSS Interest Too Low: {interest_score}")
|
||||||
logging.info(f"RSS Interest Too Low: {interest_score}")
|
logging.info(f"RSS Interest Too Low: {interest_score}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
@@ -228,8 +257,9 @@ def curate_from_rss():
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Do NOT introduce unrelated concepts.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance.\n"
|
f"Expand on the core idea with relevant context about its appeal or significance.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
)
|
||||||
@@ -246,20 +276,6 @@ def curate_from_rss():
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Remove the original title from the summary while preserving paragraphs
|
|
||||||
title_pattern = re.compile(
|
|
||||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
|
||||||
re.IGNORECASE
|
|
||||||
)
|
|
||||||
paragraphs = final_summary.split('\n')
|
|
||||||
cleaned_paragraphs = []
|
|
||||||
for para in paragraphs:
|
|
||||||
if para.strip():
|
|
||||||
cleaned_para = title_pattern.sub('', para).strip()
|
|
||||||
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
|
||||||
cleaned_paragraphs.append(cleaned_para)
|
|
||||||
final_summary = '\n'.join(cleaned_paragraphs)
|
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||||
if not post_data:
|
if not post_data:
|
||||||
|
|||||||
+4
-4
@@ -612,10 +612,10 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
target_para = random.choice([p for p in paragraphs if p.strip()])
|
target_para = random.choice([p for p in paragraphs if p.strip()])
|
||||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
phrases = [
|
phrases = [
|
||||||
f"Learn more from {link_pattern}",
|
f"According to {link_pattern}", # Changed to a more neutral phrasing
|
||||||
f"{link_pattern} shares this insight",
|
f"{link_pattern} notes this insight", # Adjusted phrasing
|
||||||
f"Discover more at {link_pattern}",
|
f"Details shared by {link_pattern}", # Adjusted phrasing
|
||||||
f"Check out {link_pattern} for details"
|
f"Source: {link_pattern}" # Simple attribution
|
||||||
]
|
]
|
||||||
insertion_phrase = random.choice(phrases)
|
insertion_phrase = random.choice(phrases)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user