Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4116d5f742 | |||
| 2ecab209c5 | |||
| 3d0d320648 | |||
| 504d7f6349 | |||
| ccddefbc8b | |||
| d2022222c3 | |||
| 7fba0fe96a | |||
| 6be8493878 | |||
| e445b6ef33 | |||
| 5554abdc4a | |||
| 64d17d5599 | |||
| aa0f3364d5 | |||
| e5ebd000fe |
+11
-11
@@ -208,14 +208,15 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
print(f"Trying Google Trend: {title} from {source_name}")
|
print(f"Trying Google Trend: {title} from {source_name}")
|
||||||
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
||||||
|
|
||||||
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if skip:
|
if skip:
|
||||||
print(f"Skipping filtered Google Trend: {title}")
|
print(f"Skipping filtered Google Trend: {title}")
|
||||||
logging.info(f"Skipping filtered Google Trend: {title}")
|
logging.info(f"Skipping filtered Google Trend: {title}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scoring_content = f"{title}\n\n{summary}"
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting(scoring_content)
|
interest_score = is_interesting(scoring_content)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 6:
|
||||||
@@ -227,8 +228,9 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Do NOT introduce unrelated concepts.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
)
|
||||||
@@ -247,18 +249,17 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
|
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||||
if not post_data:
|
if not post_data:
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
# Generate viral share prompt
|
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
share_links_template = (
|
share_links_template = (
|
||||||
f'<p>{share_prompt} '
|
f'<p>{share_prompt} '
|
||||||
@@ -279,7 +280,7 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
original_source=original_source,
|
original_source=original_source,
|
||||||
image_source=image_source,
|
image_source=image_source,
|
||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
pixabay_url=pixabay_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
should_post_tweet=True
|
should_post_tweet=True
|
||||||
)
|
)
|
||||||
@@ -291,8 +292,7 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
@@ -304,7 +304,7 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
original_source=original_source,
|
original_source=original_source,
|
||||||
image_source=image_source,
|
image_source=image_source,
|
||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
pixabay_url=pixabay_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
post_id=post_id,
|
post_id=post_id,
|
||||||
should_post_tweet=False
|
should_post_tweet=False
|
||||||
|
|||||||
+38
-16
@@ -8,6 +8,7 @@ import json
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
from duckduckgo_search import DDGS
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
@@ -169,6 +170,30 @@ def get_top_comments(post_url, reddit, limit=3):
|
|||||||
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def fetch_duckduckgo_news_context(title, hours=24):
|
||||||
|
try:
|
||||||
|
with DDGS() as ddgs:
|
||||||
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||||
|
titles = []
|
||||||
|
for r in results:
|
||||||
|
try:
|
||||||
|
date_str = r["date"]
|
||||||
|
if '+00:00' in date_str:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
|
else:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||||
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
|
titles.append(r["title"].lower())
|
||||||
|
except ValueError as e:
|
||||||
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
|
continue
|
||||||
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
|
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||||
|
return context
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||||
|
return title
|
||||||
|
|
||||||
def fetch_reddit_posts():
|
def fetch_reddit_posts():
|
||||||
reddit = praw.Reddit(
|
reddit = praw.Reddit(
|
||||||
client_id=REDDIT_CLIENT_ID,
|
client_id=REDDIT_CLIENT_ID,
|
||||||
@@ -211,7 +236,7 @@ def curate_from_reddit():
|
|||||||
if not articles:
|
if not articles:
|
||||||
print("No Reddit posts available")
|
print("No Reddit posts available")
|
||||||
logging.info("No Reddit posts available")
|
logging.info("No Reddit posts available")
|
||||||
return None, None, None
|
return None, None, random.randint(600, 1800)
|
||||||
|
|
||||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||||
|
|
||||||
@@ -241,7 +266,7 @@ def curate_from_reddit():
|
|||||||
print(f"Trying Reddit Post: {title} from {source_name}")
|
print(f"Trying Reddit Post: {title} from {source_name}")
|
||||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||||
|
|
||||||
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
|
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
|
||||||
print(f"Skipping filtered Reddit post: {title}")
|
print(f"Skipping filtered Reddit post: {title}")
|
||||||
logging.info(f"Skipping filtered Reddit post: {title}")
|
logging.info(f"Skipping filtered Reddit post: {title}")
|
||||||
@@ -249,6 +274,8 @@ def curate_from_reddit():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
top_comments = get_top_comments(link, reddit, limit=3)
|
top_comments = get_top_comments(link, reddit, limit=3)
|
||||||
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
|
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting_reddit(
|
interest_score = is_interesting_reddit(
|
||||||
title,
|
title,
|
||||||
summary,
|
summary,
|
||||||
@@ -266,15 +293,13 @@ def curate_from_reddit():
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
||||||
f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
|
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
||||||
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
)
|
||||||
content_to_summarize = f"{title}\n\n{summary}"
|
|
||||||
if top_comments:
|
|
||||||
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
|
||||||
|
|
||||||
final_summary = summarize_with_gpt4o(
|
final_summary = summarize_with_gpt4o(
|
||||||
content_to_summarize,
|
content_to_summarize,
|
||||||
@@ -290,26 +315,24 @@ def curate_from_reddit():
|
|||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
|
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||||
if not post_data:
|
if not post_data:
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
|
|
||||||
|
|
||||||
# Generate viral share prompt
|
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
share_links_template = (
|
share_links_template = (
|
||||||
f'<p>{share_prompt} '
|
f'<p>{share_prompt} '
|
||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
)
|
)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
global is_posting
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
@@ -323,7 +346,7 @@ def curate_from_reddit():
|
|||||||
original_source=original_source,
|
original_source=original_source,
|
||||||
image_source=image_source,
|
image_source=image_source,
|
||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
pixabay_url=pixabay_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
should_post_tweet=True
|
should_post_tweet=True
|
||||||
)
|
)
|
||||||
@@ -335,8 +358,7 @@ def curate_from_reddit():
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
@@ -348,7 +370,7 @@ def curate_from_reddit():
|
|||||||
original_source=original_source,
|
original_source=original_source,
|
||||||
image_source=image_source,
|
image_source=image_source,
|
||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
pixabay_url=pixabay_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
post_id=post_id,
|
post_id=post_id,
|
||||||
should_post_tweet=False
|
should_post_tweet=False
|
||||||
|
|||||||
+44
-37
@@ -9,6 +9,8 @@ import signal
|
|||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
import email.utils
|
import email.utils
|
||||||
|
import feedparser
|
||||||
|
from duckduckgo_search import DDGS
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
@@ -136,6 +138,7 @@ def fetch_rss_feeds():
|
|||||||
logging.error("RSS_FEEDS is empty in foodie_config.py")
|
logging.error("RSS_FEEDS is empty in foodie_config.py")
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
|
logging.info(f"Processing feeds: {RSS_FEEDS}")
|
||||||
for feed_url in RSS_FEEDS:
|
for feed_url in RSS_FEEDS:
|
||||||
logging.info(f"Processing feed: {feed_url}")
|
logging.info(f"Processing feed: {feed_url}")
|
||||||
try:
|
try:
|
||||||
@@ -182,8 +185,32 @@ def fetch_rss_feeds():
|
|||||||
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
|
def fetch_duckduckgo_news_context(title, hours=24):
|
||||||
|
try:
|
||||||
|
with DDGS() as ddgs:
|
||||||
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||||
|
titles = []
|
||||||
|
for r in results:
|
||||||
|
try:
|
||||||
|
date_str = r["date"]
|
||||||
|
if '+00:00' in date_str:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
|
else:
|
||||||
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||||
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
|
titles.append(r["title"].lower())
|
||||||
|
except ValueError as e:
|
||||||
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
|
continue
|
||||||
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
|
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||||
|
return context
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||||
|
return title
|
||||||
|
|
||||||
def curate_from_rss():
|
def curate_from_rss():
|
||||||
articles = fetch_rss_feeds()
|
articles = fetch_rss_feeds() # Corrected from fetch_rss_articles to fetch_rss_feeds
|
||||||
if not articles:
|
if not articles:
|
||||||
print("No RSS articles available")
|
print("No RSS articles available")
|
||||||
logging.info("No RSS articles available")
|
logging.info("No RSS articles available")
|
||||||
@@ -195,9 +222,8 @@ def curate_from_rss():
|
|||||||
article = articles.pop(0)
|
article = articles.pop(0)
|
||||||
title = article["title"]
|
title = article["title"]
|
||||||
link = article["link"]
|
link = article["link"]
|
||||||
summary = article["summary"]
|
summary = article.get("summary", "")
|
||||||
content = article["content"]
|
source_name = article.get("feed_title", "Unknown Source") # Adjusted to match fetch_rss_feeds output
|
||||||
source_name = article["feed_title"]
|
|
||||||
original_source = f'<a href="{link}">{source_name}</a>'
|
original_source = f'<a href="{link}">{source_name}</a>'
|
||||||
|
|
||||||
if title in posted_titles:
|
if title in posted_titles:
|
||||||
@@ -209,14 +235,15 @@ def curate_from_rss():
|
|||||||
print(f"Trying RSS Article: {title} from {source_name}")
|
print(f"Trying RSS Article: {title} from {source_name}")
|
||||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||||
|
|
||||||
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if skip:
|
if skip:
|
||||||
print(f"Skipping filtered RSS article: {title}")
|
print(f"Skipping filtered RSS article: {title}")
|
||||||
logging.info(f"Skipping filtered RSS article: {title}")
|
logging.info(f"Skipping filtered RSS article: {title}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting(scoring_content)
|
interest_score = is_interesting(scoring_content)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 6:
|
||||||
@@ -228,9 +255,10 @@ def curate_from_rss():
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Do NOT introduce unrelated concepts.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance.\n"
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||||
|
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
)
|
||||||
content_to_summarize = scoring_content
|
content_to_summarize = scoring_content
|
||||||
@@ -246,46 +274,26 @@ def curate_from_rss():
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Remove the original title from the summary while preserving paragraphs
|
|
||||||
title_pattern = re.compile(
|
|
||||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
|
||||||
re.IGNORECASE
|
|
||||||
)
|
|
||||||
paragraphs = final_summary.split('\n')
|
|
||||||
cleaned_paragraphs = []
|
|
||||||
for para in paragraphs:
|
|
||||||
if para.strip():
|
|
||||||
cleaned_para = title_pattern.sub('', para).strip()
|
|
||||||
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
|
||||||
cleaned_paragraphs.append(cleaned_para)
|
|
||||||
final_summary = '\n'.join(cleaned_paragraphs)
|
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
|
||||||
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||||
if not post_data:
|
if not post_data:
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Fetch image
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
if not image_url:
|
|
||||||
logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
# Generate viral share prompt
|
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
share_links_template = (
|
share_links_template = (
|
||||||
f'<p>{share_prompt} '
|
f'<p>{share_prompt} '
|
||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
)
|
)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
global is_posting
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
@@ -299,7 +307,7 @@ def curate_from_rss():
|
|||||||
original_source=original_source,
|
original_source=original_source,
|
||||||
image_source=image_source,
|
image_source=image_source,
|
||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
pixabay_url=pixabay_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
should_post_tweet=True
|
should_post_tweet=True
|
||||||
)
|
)
|
||||||
@@ -311,8 +319,7 @@ def curate_from_rss():
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
@@ -324,7 +331,7 @@ def curate_from_rss():
|
|||||||
original_source=original_source,
|
original_source=original_source,
|
||||||
image_source=image_source,
|
image_source=image_source,
|
||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
pixabay_url=pixabay_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
post_id=post_id,
|
post_id=post_id,
|
||||||
should_post_tweet=False
|
should_post_tweet=False
|
||||||
|
|||||||
+7
-2
@@ -245,7 +245,7 @@ RSS_FEED_NAMES = {
|
|||||||
"https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"),
|
"https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"),
|
||||||
"https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"),
|
"https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"),
|
||||||
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"),
|
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"),
|
||||||
"https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food")
|
"https://www.theguardian.com/food/rss": ("The Guardian", "https://www.theguardian.com/food")
|
||||||
}
|
}
|
||||||
|
|
||||||
RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"]
|
RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"]
|
||||||
@@ -264,7 +264,12 @@ REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
|
|||||||
REDDIT_SUBREDDITS = [
|
REDDIT_SUBREDDITS = [
|
||||||
"food",
|
"food",
|
||||||
"FoodPorn",
|
"FoodPorn",
|
||||||
"spicy"
|
"spicy",
|
||||||
|
"KoreanFood",
|
||||||
|
"JapaneseFood",
|
||||||
|
"DessertPorn",
|
||||||
|
"ChineseFood",
|
||||||
|
"IndianFood"
|
||||||
]
|
]
|
||||||
FAST_FOOD_KEYWORDS = [
|
FAST_FOOD_KEYWORDS = [
|
||||||
"mcdonald", "burger king", "wendy", "taco bell", "kfc",
|
"mcdonald", "burger king", "wendy", "taco bell", "kfc",
|
||||||
|
|||||||
+179
-150
@@ -29,6 +29,8 @@ from foodie_config import (
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
|
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py
|
||||||
|
|
||||||
def load_json_file(file_path, expiration_hours):
|
def load_json_file(file_path, expiration_hours):
|
||||||
entries = []
|
entries = []
|
||||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
|
||||||
@@ -341,9 +343,10 @@ def smart_image_and_filter(title, summary):
|
|||||||
prompt = (
|
prompt = (
|
||||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
||||||
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
||||||
"otherwise focus on the main theme. "
|
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
||||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
||||||
"Return as JSON with double quotes for all property names and string values (e.g., {\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"action\": \"KEEP\" or \"SKIP\"})."
|
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||||
|
"{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})."
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
@@ -357,38 +360,54 @@ def smart_image_and_filter(title, summary):
|
|||||||
raw_result = response.choices[0].message.content.strip()
|
raw_result = response.choices[0].message.content.strip()
|
||||||
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
||||||
|
|
||||||
# Remove ```json markers and fix single quotes in JSON structure
|
|
||||||
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
||||||
# Replace single quotes with double quotes, but preserve single quotes within string values
|
|
||||||
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = json.loads(fixed_result)
|
result = json.loads(fixed_result)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
# Fallback: Extract main topic using simple keyword matching
|
||||||
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
|
return main_topic, [main_topic, "food"], False
|
||||||
|
|
||||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
||||||
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
|
return main_topic, [main_topic, "food"], False
|
||||||
|
|
||||||
image_query = result["image_query"]
|
image_query = result["image_query"]
|
||||||
relevance_keywords = result["relevance"]
|
relevance_keywords = result["relevance"]
|
||||||
|
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
||||||
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
||||||
|
|
||||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
|
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
|
||||||
|
|
||||||
if not image_query or len(image_query.split()) < 2:
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"]
|
||||||
|
if not image_query:
|
||||||
|
logging.warning(f"Image query is empty, using fallback")
|
||||||
|
return main_topic, [main_topic, "food"], skip_flag
|
||||||
|
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
||||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], skip_flag
|
return main_topic, [main_topic, "food"], skip_flag
|
||||||
|
|
||||||
return image_query, relevance_keywords, skip_flag
|
return image_query, relevance_keywords, main_topic, skip_flag
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
|
return main_topic, [main_topic, "food"], False
|
||||||
|
|
||||||
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None):
|
def extract_main_topic(text):
|
||||||
|
# Common food-related keywords (expand as needed)
|
||||||
|
food_keywords = ["kimchi", "sushi", "pizza", "taco", "burger", "ramen", "curry", "pasta", "salad", "soup"]
|
||||||
|
for keyword in food_keywords:
|
||||||
|
if keyword in text:
|
||||||
|
return keyword
|
||||||
|
# Fallback to a generic term if no specific food item is found
|
||||||
|
return "food trends"
|
||||||
|
|
||||||
|
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, page_url=None):
|
||||||
try:
|
try:
|
||||||
safe_title = post_title.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')[:50]
|
safe_title = post_title.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')[:50]
|
||||||
headers = {
|
headers = {
|
||||||
@@ -401,12 +420,11 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
|
|||||||
}
|
}
|
||||||
logging.info(f"Fetching image from {image_url} for '{post_title}'")
|
logging.info(f"Fetching image from {image_url} for '{post_title}'")
|
||||||
|
|
||||||
# Add rate limit handling for image download
|
|
||||||
for attempt in range(3):
|
for attempt in range(3):
|
||||||
try:
|
try:
|
||||||
image_response = requests.get(image_url, headers=image_headers, timeout=10)
|
image_response = requests.get(image_url, headers=image_headers, timeout=10)
|
||||||
if image_response.status_code == 429:
|
if image_response.status_code == 429:
|
||||||
wait_time = 10 * (2 ** attempt) # 10s, 20s, 40s
|
wait_time = 10 * (2 ** attempt)
|
||||||
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
continue
|
continue
|
||||||
@@ -431,7 +449,12 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
image_id = response.json()["id"]
|
image_id = response.json()["id"]
|
||||||
caption = f'<a href="{pixabay_url}">{image_source}</a> by {uploader}' if pixabay_url and uploader else image_source
|
if page_url and uploader:
|
||||||
|
caption = f'<a href="{page_url}">{image_source}</a> by {uploader}'
|
||||||
|
elif page_url:
|
||||||
|
caption = f'<a href="{page_url}">{image_source}</a>'
|
||||||
|
else:
|
||||||
|
caption = image_source
|
||||||
requests.post(
|
requests.post(
|
||||||
f"{wp_base_url}/media/{image_id}",
|
f"{wp_base_url}/media/{image_id}",
|
||||||
headers={"Authorization": headers["Authorization"], "Content-Type": "application/json"},
|
headers={"Authorization": headers["Authorization"], "Content-Type": "application/json"},
|
||||||
@@ -565,76 +588,50 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
try:
|
try:
|
||||||
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
||||||
|
|
||||||
prompt = (
|
paragraphs = summary.split('\n')
|
||||||
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
|
if not paragraphs or all(not p.strip() for p in paragraphs):
|
||||||
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, "
|
logging.error("No valid paragraphs to insert link.")
|
||||||
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} shares this insight.' "
|
return summary
|
||||||
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). "
|
|
||||||
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). "
|
|
||||||
"Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. "
|
|
||||||
"Each paragraph in the input summary is separated by a single \\n; ensure the output maintains this exact separation. "
|
|
||||||
"Do not add or remove newlines beyond the original summary structure. "
|
|
||||||
"Return the modified summary with exactly one link.\n\n"
|
|
||||||
"Summary:\n{summary}\n\n"
|
|
||||||
"Source Name: {source_name}\nSource URL: {source_url}"
|
|
||||||
).format(summary=summary, source_name=source_name, source_url=source_url)
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
|
||||||
model=LIGHT_TASK_MODEL,
|
if not eligible_paragraphs:
|
||||||
messages=[
|
logging.warning("No paragraph with multiple sentences found, appending to last paragraph.")
|
||||||
{"role": "system", "content": prompt},
|
target_para = paragraphs[-1].strip()
|
||||||
{"role": "user", "content": "Insert the link naturally into the summary."}
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
],
|
new_para = f"{target_para} Source: {link_pattern}."
|
||||||
max_tokens=1000,
|
paragraphs[-1] = new_para
|
||||||
temperature=0.7
|
|
||||||
)
|
|
||||||
new_summary = response.choices[0].message.content.strip()
|
|
||||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
|
||||||
if new_summary and new_summary.count(link_pattern) == 1:
|
|
||||||
paragraphs = new_summary.split('\n')
|
|
||||||
paragraphs = [p.strip() for p in paragraphs]
|
|
||||||
new_summary = '\n'.join(paragraphs)
|
new_summary = '\n'.join(paragraphs)
|
||||||
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
|
logging.info(f"Appended link to summary: {new_summary!r}")
|
||||||
return new_summary
|
return new_summary
|
||||||
|
|
||||||
logging.warning(f"GPT failed to insert link correctly: {new_summary}. Using fallback.")
|
target_para = random.choice(eligible_paragraphs)
|
||||||
|
sentences = re.split(r'(?<=[.!?])\s+', target_para.strip())
|
||||||
|
|
||||||
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()]
|
||||||
|
if not eligible_sentences:
|
||||||
|
logging.error("No eligible sentences found for link insertion.")
|
||||||
|
return summary
|
||||||
|
|
||||||
|
sentence_idx, sentence = random.choice(eligible_sentences)
|
||||||
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
|
|
||||||
|
# Insert the link at the end of the sentence
|
||||||
|
new_sentence = f"{sentence.rstrip('.')} according to {link_pattern}."
|
||||||
|
|
||||||
|
sentences[sentence_idx] = new_sentence
|
||||||
|
new_para = ' '.join(sentences)
|
||||||
|
paragraphs[paragraphs.index(target_para)] = new_para
|
||||||
|
|
||||||
|
new_summary = '\n'.join(paragraphs)
|
||||||
|
logging.info(f"Summary with naturally embedded link: {new_summary!r}")
|
||||||
|
return new_summary
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Link insertion failed: {e}")
|
logging.error(f"Link insertion failed: {e}")
|
||||||
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
# Fallback path
|
new_summary = f"{summary}\n\nSource: {link_pattern}."
|
||||||
time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b'
|
logging.info(f"Fallback summary with link: {new_summary!r}")
|
||||||
protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary)
|
return new_summary
|
||||||
paragraphs = protected_summary.split('\n')
|
|
||||||
if not paragraphs or all(not p.strip() for p in paragraphs):
|
|
||||||
logging.error("No valid paragraphs to insert link.")
|
|
||||||
return summary
|
|
||||||
|
|
||||||
target_para = random.choice([p for p in paragraphs if p.strip()])
|
|
||||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
|
||||||
phrases = [
|
|
||||||
f"Learn more from {link_pattern}",
|
|
||||||
f"{link_pattern} shares this insight",
|
|
||||||
f"Discover more at {link_pattern}",
|
|
||||||
f"Check out {link_pattern} for details"
|
|
||||||
]
|
|
||||||
insertion_phrase = random.choice(phrases)
|
|
||||||
|
|
||||||
sentences = re.split(r'(?<=[.!?])\s+', target_para)
|
|
||||||
insertion_point = -1
|
|
||||||
for i, sent in enumerate(sentences):
|
|
||||||
if sent.strip() and '@' not in sent:
|
|
||||||
insertion_point = sum(len(s) + 1 for s in sentences[:i+1])
|
|
||||||
break
|
|
||||||
if insertion_point == -1:
|
|
||||||
insertion_point = len(target_para)
|
|
||||||
|
|
||||||
new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip()
|
|
||||||
paragraphs[paragraphs.index(target_para)] = new_para
|
|
||||||
new_summary = '\n'.join(paragraphs)
|
|
||||||
|
|
||||||
new_summary = new_summary.replace('@', '.')
|
|
||||||
logging.info(f"Fallback summary with link: {new_summary!r}")
|
|
||||||
return new_summary
|
|
||||||
|
|
||||||
def generate_category_from_summary(summary):
|
def generate_category_from_summary(summary):
|
||||||
try:
|
try:
|
||||||
@@ -702,7 +699,7 @@ def get_wp_tag_id(tag_name, wp_base_url, wp_username, wp_password):
|
|||||||
logging.error(f"Failed to get WP tag ID for '{tag_name}': {e}")
|
logging.error(f"Failed to get WP tag ID for '{tag_name}': {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, pixabay_url=None, interest_score=4, post_id=None, should_post_tweet=True):
|
def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, page_url=None, interest_score=4, post_id=None, should_post_tweet=True):
|
||||||
wp_base_url = "https://insiderfoodie.com/wp-json/wp/v2"
|
wp_base_url = "https://insiderfoodie.com/wp-json/wp/v2"
|
||||||
logging.info(f"Starting post_to_wp for '{post_data['title']}', image_source: {image_source}")
|
logging.info(f"Starting post_to_wp for '{post_data['title']}', image_source: {image_source}")
|
||||||
|
|
||||||
@@ -749,6 +746,8 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
|||||||
content = "Content unavailable. Check the original source for details."
|
content = "Content unavailable. Check the original source for details."
|
||||||
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
|
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
|
||||||
|
|
||||||
|
# Removed the block that appends image attribution to the content
|
||||||
|
|
||||||
author_id_map = {
|
author_id_map = {
|
||||||
"owenjohnson": 10,
|
"owenjohnson": 10,
|
||||||
"javiermorales": 2,
|
"javiermorales": 2,
|
||||||
@@ -759,17 +758,16 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
|||||||
}
|
}
|
||||||
author_id = author_id_map.get(author["username"], 5)
|
author_id = author_id_map.get(author["username"], 5)
|
||||||
|
|
||||||
# Handle image upload
|
|
||||||
image_id = None
|
image_id = None
|
||||||
if image_url:
|
if image_url:
|
||||||
logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}")
|
logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}")
|
||||||
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url)
|
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url)
|
||||||
if not image_id:
|
if not image_id:
|
||||||
logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay")
|
logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay")
|
||||||
pixabay_query = post_data["title"][:50]
|
pixabay_query = post_data["title"][:50]
|
||||||
image_url, image_source, uploader, pixabay_url = get_image(pixabay_query)
|
image_url, image_source, uploader, page_url = get_image(pixabay_query)
|
||||||
if image_url:
|
if image_url:
|
||||||
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url)
|
image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url)
|
||||||
if not image_id:
|
if not image_id:
|
||||||
logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image")
|
logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image")
|
||||||
|
|
||||||
@@ -808,11 +806,9 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
|||||||
post_id = post_info["id"]
|
post_id = post_info["id"]
|
||||||
post_url = post_info["link"]
|
post_url = post_info["link"]
|
||||||
|
|
||||||
# Save to recent_posts.json
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
|
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
|
||||||
|
|
||||||
# Post article tweet to X only if should_post_tweet is True
|
|
||||||
if should_post_tweet:
|
if should_post_tweet:
|
||||||
try:
|
try:
|
||||||
post = {"title": post_data["title"], "url": post_url}
|
post = {"title": post_data["title"], "url": post_url}
|
||||||
@@ -864,42 +860,39 @@ used_images = set()
|
|||||||
# Load used images from file if it exists
|
# Load used images from file if it exists
|
||||||
if os.path.exists(used_images_file):
|
if os.path.exists(used_images_file):
|
||||||
try:
|
try:
|
||||||
with open(used_images_file, 'r') as f:
|
entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24) # Use load_json_file for consistency
|
||||||
content = f.read().strip()
|
for entry in entries:
|
||||||
if not content:
|
if isinstance(entry, dict) and "title" in entry and entry["title"].startswith('https://'):
|
||||||
logging.warning(f"Used images file {used_images_file} is empty. Resetting to empty list.")
|
used_images.add(entry["title"])
|
||||||
data = []
|
|
||||||
else:
|
else:
|
||||||
data = json.loads(content)
|
logging.warning(f"Skipping invalid entry in {used_images_file}: {entry}")
|
||||||
if not isinstance(data, list):
|
|
||||||
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.")
|
|
||||||
if isinstance(data, dict):
|
|
||||||
# If it's a dict, try to extract URLs from values
|
|
||||||
data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')]
|
|
||||||
else:
|
|
||||||
logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.")
|
|
||||||
data = []
|
|
||||||
# Filter out non-string or non-URL entries
|
|
||||||
data = [item for item in data if isinstance(item, str) and item.startswith('https://')]
|
|
||||||
used_images.update(data)
|
|
||||||
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
|
||||||
used_images = set()
|
used_images = set()
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
json.dump([], f)
|
f.write("")
|
||||||
|
|
||||||
# Function to save used_images to file
|
# Function to save used_images to file
|
||||||
def save_used_images():
|
def save_used_images():
|
||||||
try:
|
try:
|
||||||
# Ensure used_images contains only valid URLs
|
# Load existing entries to preserve timestamps
|
||||||
valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')]
|
entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24)
|
||||||
if len(valid_urls) != len(used_images):
|
existing_entries = {entry["title"]: entry for entry in entries if isinstance(entry, dict) and "title" in entry}
|
||||||
logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set")
|
|
||||||
|
# Create new entries for used_images
|
||||||
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
|
updated_entries = []
|
||||||
|
for url in used_images:
|
||||||
|
if url in existing_entries:
|
||||||
|
updated_entries.append(existing_entries[url])
|
||||||
|
else:
|
||||||
|
updated_entries.append({"title": url, "timestamp": timestamp})
|
||||||
|
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
json.dump(valid_urls, f, indent=2)
|
for entry in updated_entries:
|
||||||
logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}")
|
f.write(json.dumps(entry) + '\n')
|
||||||
|
logging.info(f"Saved {len(updated_entries)} used image URLs to {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
||||||
|
|
||||||
@@ -930,8 +923,18 @@ def process_photo(photo, search_query):
|
|||||||
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not img_url or img_url in used_images:
|
if not img_url:
|
||||||
logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
|
logging.info(f"Image URL invalid for photo {photo.id}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Check if the image is highly relevant to the query
|
||||||
|
query_keywords = set(search_query.lower().split())
|
||||||
|
photo_keywords = set(tags + title.split())
|
||||||
|
is_relevant = bool(query_keywords & photo_keywords) # Check if any query keyword is in tags or title
|
||||||
|
|
||||||
|
# Allow reuse of highly relevant images
|
||||||
|
if img_url in used_images and not is_relevant:
|
||||||
|
logging.info(f"Image already used and not highly relevant for photo {photo.id}: {img_url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
uploader = photo.owner.username
|
uploader = photo.owner.username
|
||||||
@@ -1037,14 +1040,13 @@ def classify_keywords(keywords):
|
|||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||||
return {kw: "specific" for kw in keywords}
|
return {kw: "specific" for kw in keywords}
|
||||||
|
|
||||||
def get_flickr_image(search_query, relevance_keywords):
|
def get_flickr_image(search_query, relevance_keywords, main_topic):
|
||||||
global last_flickr_request_time, flickr_request_count
|
global last_flickr_request_time, flickr_request_count
|
||||||
|
|
||||||
reset_flickr_request_count()
|
reset_flickr_request_count()
|
||||||
flickr_request_count += 1
|
flickr_request_count += 1
|
||||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||||
|
|
||||||
# Enforce a minimum delay of 10 seconds between Flickr requests
|
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
if time_since_last_request < 10:
|
if time_since_last_request < 10:
|
||||||
@@ -1052,7 +1054,15 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
|
|
||||||
last_flickr_request_time = time.time()
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
# Step 1: Search DDG to find Flickr photo IDs
|
# Step 1: Search Flickr directly with the original query
|
||||||
|
logging.info(f"Searching Flickr directly with query: '{search_query}'")
|
||||||
|
photos = search_flickr(search_query)
|
||||||
|
for photo in photos:
|
||||||
|
result = process_photo(photo, search_query)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Step 2: Search DDG to find Flickr photo IDs
|
||||||
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
||||||
photo_ids = search_ddg_for_flickr(search_query)
|
photo_ids = search_ddg_for_flickr(search_query)
|
||||||
if photo_ids:
|
if photo_ids:
|
||||||
@@ -1063,13 +1073,12 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 2: Break down the query into keywords and classify them for direct Flickr API search
|
# Step 3: Break down the query into keywords and classify them
|
||||||
keywords = search_query.lower().split()
|
keywords = search_query.lower().split()
|
||||||
if len(keywords) > 1:
|
if len(keywords) > 1:
|
||||||
classifications = classify_keywords(keywords)
|
classifications = classify_keywords(keywords)
|
||||||
logging.info(f"Keyword classifications: {classifications}")
|
logging.info(f"Keyword classifications: {classifications}")
|
||||||
|
|
||||||
# Prioritize specific keywords
|
|
||||||
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
||||||
if specific_keywords:
|
if specific_keywords:
|
||||||
for keyword in specific_keywords:
|
for keyword in specific_keywords:
|
||||||
@@ -1080,9 +1089,17 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 3: Final fallback using relevance keywords
|
# Step 4: Fallback using main topic
|
||||||
|
logging.info(f"No results found. Falling back to main topic: '{main_topic}'")
|
||||||
|
photos = search_flickr(main_topic)
|
||||||
|
for photo in photos:
|
||||||
|
result = process_photo(photo, main_topic)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Step 5: Final fallback using relevance keywords
|
||||||
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
||||||
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
logging.info(f"No results with main topic. Falling back to relevance keywords: '{fallback_query}'")
|
||||||
photos = search_flickr(fallback_query)
|
photos = search_flickr(fallback_query)
|
||||||
for photo in photos:
|
for photo in photos:
|
||||||
result = process_photo(photo, search_query)
|
result = process_photo(photo, search_query)
|
||||||
@@ -1116,46 +1133,58 @@ def select_best_author(summary):
|
|||||||
logging.error(f"Author selection failed: {e}")
|
logging.error(f"Author selection failed: {e}")
|
||||||
return "owenjohnson"
|
return "owenjohnson"
|
||||||
|
|
||||||
def prepare_post_data(final_summary, original_title, context_info=""):
|
def prepare_post_data(summary, title, main_topic=None):
|
||||||
innovative_title = generate_title_from_summary(final_summary)
|
try:
|
||||||
if not innovative_title:
|
logging.info(f"Preparing post data for summary: {summary[:100]}...")
|
||||||
logging.info(f"Title generation failed for '{original_title}' {context_info}")
|
|
||||||
return None, None, None, None, None, None, None
|
|
||||||
|
|
||||||
# Pass innovative_title and final_summary as separate arguments
|
# Use the original generate_title_from_summary function to generate the title
|
||||||
search_query, relevance_keywords, _ = generate_image_query(innovative_title, final_summary)
|
new_title = generate_title_from_summary(summary)
|
||||||
if not search_query:
|
if not new_title:
|
||||||
logging.info(f"Image query generation failed for '{innovative_title}' {context_info}")
|
logging.warning("Title generation failed, using fallback title")
|
||||||
return None, None, None, None, None, None, None
|
new_title = "A Tasty Food Discovery Awaits You"
|
||||||
|
logging.info(f"Generated new title: '{new_title}'")
|
||||||
|
|
||||||
logging.info(f"Fetching Flickr image for query: '{search_query}' {context_info}")
|
# Update to unpack four values
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(search_query, relevance_keywords)
|
search_query, relevance_keywords, generated_main_topic, skip_flag = smart_image_and_filter(new_title, summary)
|
||||||
|
if skip_flag:
|
||||||
|
logging.info("Summary filtered out during post preparation")
|
||||||
|
return None, None, None, None, None, None, None
|
||||||
|
|
||||||
if not image_url:
|
# Use the provided main_topic if available, otherwise use the generated one
|
||||||
logging.info(f"Flickr fetch failed for '{search_query}' - falling back to Pixabay {context_info}")
|
effective_main_topic = main_topic if main_topic else generated_main_topic
|
||||||
# Use the same title and summary for fallback
|
|
||||||
image_query, _, _ = generate_image_query(innovative_title, final_summary)
|
image_url, image_source, uploader, page_url = get_flickr_image(search_query, relevance_keywords, effective_main_topic)
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.info(f"Pixabay fetch failed for title '{innovative_title}' - falling back to summary {context_info}")
|
image_url, image_source, uploader, page_url = get_image(search_query)
|
||||||
image_query, _, _ = generate_image_query(final_summary, final_summary) # Using summary as both title and summary for fallback
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
if not image_url:
|
|
||||||
logging.info(f"Image fetch failed again for '{original_title}' - proceeding without image {context_info}")
|
|
||||||
|
|
||||||
post_data = {"title": innovative_title, "content": final_summary}
|
if not image_url:
|
||||||
selected_username = select_best_author(final_summary)
|
logging.warning("No image found for post, skipping")
|
||||||
author = next((a for a in AUTHORS if a["username"] == selected_username), None)
|
return None, None, None, None, None, None, None
|
||||||
if not author:
|
|
||||||
logging.error(f"Author '{selected_username}' not found in AUTHORS, defaulting to owenjohnson")
|
|
||||||
author = {"username": "owenjohnson", "password": "rfjk xhn6 2RPy FuQ9 cGlU K8mC"}
|
|
||||||
category = generate_category_from_summary(final_summary)
|
|
||||||
|
|
||||||
return post_data, author, category, image_url, image_source, uploader, page_url
|
# Select a full author dictionary from AUTHORS (already imported from foodie_config)
|
||||||
|
author = random.choice(AUTHORS)
|
||||||
|
|
||||||
|
categories = ["Food", "Trends", "Eats", "Culture"]
|
||||||
|
category = random.choice(categories)
|
||||||
|
|
||||||
|
post_data = {
|
||||||
|
"title": new_title,
|
||||||
|
"content": summary,
|
||||||
|
"status": "publish",
|
||||||
|
"author": author["username"], # Use the username in post_data
|
||||||
|
"categories": [category]
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.info(f"Post data prepared: Title: '{new_title}', Category: {category}, Author: {author['username']}")
|
||||||
|
return post_data, author, category, image_url, image_source, uploader, page_url
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to prepare post data: {e}")
|
||||||
|
return None, None, None, None, None, None, None
|
||||||
|
|
||||||
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
||||||
try:
|
try:
|
||||||
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
|
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json', 24) # Added expiration_hours
|
||||||
entry = {
|
entry = {
|
||||||
"title": post_title,
|
"title": post_title,
|
||||||
"url": post_url,
|
"url": post_url,
|
||||||
|
|||||||
Reference in New Issue
Block a user