merge posting x into main files

my-fix-branch
Shane 7 months ago
parent a1d2ce4215
commit ea7d36a22b
  1. 44
      author_backgrounds.json
  2. 13
      foodie_automator_google.py
  3. 38
      foodie_automator_reddit.py
  4. 50
      foodie_automator_rss.py
  5. 198
      foodie_config.py
  6. 299
      foodie_utils.py
  7. 166
      foodie_x_poster.py

@ -0,0 +1,44 @@
[
{
"username": "owenjohnson",
"hometown": "New York, NY",
"cultural_influences": "Global dining enthusiast with a focus on innovative trends",
"career_path": "Editor-in-chief, shaping food narratives with a worldwide lens",
"engagement_themes": ["global cuisines", "dining innovations", "food tech"]
},
{
"username": "javiermorales",
"hometown": "Los Angeles, CA",
"cultural_influences": "Latin-American roots with a critical eye on culinary arts",
"career_path": "Food critic known for incisive reviews and bold takes",
"engagement_themes": ["restaurant experiences", "dish quality", "chef innovations"]
},
{
"username": "aishapatel",
"hometown": "Sunnyvale, CA",
"cultural_influences": "Indian-American heritage with a focus on innovative cuisines",
"career_path": "Food blogger turned trend analyst, predicting food movements",
"engagement_themes": ["emerging food trends", "innovative cuisines", "sustainable dining"]
},
{
"username": "trangnguyen",
"hometown": "Seattle, WA",
"cultural_influences": "Vietnamese heritage with a passion for cultural storytelling",
"career_path": "Food writer weaving history and tradition into modern narratives",
"engagement_themes": ["culinary traditions", "cultural dishes", "food history"]
},
{
"username": "keishareid",
"hometown": "Atlanta, GA",
"cultural_influences": "African-American roots with a focus on soul food heritage",
"career_path": "Culinary storyteller celebrating resilience and flavor",
"engagement_themes": ["soul food classics", "cultural heritage", "comfort foods"]
},
{
"username": "lilamoreau",
"hometown": "Miami, FL",
"cultural_influences": "Global traveler with a love for street food diversity",
"career_path": "Food adventurer documenting street eats worldwide",
"engagement_themes": ["street food finds", "global flavors", "food trucks"]
}
]

@ -1,3 +1,4 @@
# foodie_automator_google.py
import requests import requests
import random import random
import time import time
@ -19,18 +20,20 @@ from selenium.common.exceptions import TimeoutException
from duckduckgo_search import DDGS from duckduckgo_search import DDGS
from foodie_config import ( from foodie_config import (
AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
SUMMARY_PERSONA_PROMPTS, CATEGORIES, CTAS, get_clean_source_name PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name, X_API_CREDENTIALS
) )
from foodie_utils import ( from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query, load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, select_best_persona, determine_paragraph_count, is_interesting, upload_image_to_wp, select_best_persona, determine_paragraph_count,
generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, is_interesting, generate_title_from_summary, summarize_with_gpt4o,
prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg generate_category_from_summary, post_to_wp, prepare_post_data,
smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg
) )
from foodie_hooks import get_dynamic_hook, select_best_cta from foodie_hooks import get_dynamic_hook, select_best_cta
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
# Flag to indicate if we're in the middle of posting # Flag to indicate if we're in the middle of posting
is_posting = False is_posting = False
@ -187,7 +190,7 @@ def curate_from_google_trends(geo_list=['US']):
attempts = 0 attempts = 0
max_attempts = 10 max_attempts = 10
while attempts < max_attempts and trends: while attempts < max_attempts and trends:
trend = trends.pop(0) # Take highest-volume trend trend = trends.pop(0)
title = trend["title"] title = trend["title"]
link = trend["link"] link = trend["link"]
search_volume = trend["search_volume"] search_volume = trend["search_volume"]

@ -1,3 +1,4 @@
# foodie_automator_reddit.py
import requests import requests
import random import random
import time import time
@ -15,14 +16,16 @@ from requests.adapters import HTTPAdapter
import praw import praw
from foodie_config import ( from foodie_config import (
AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
SUMMARY_PERSONA_PROMPTS, CATEGORIES, CTAS, get_clean_source_name, PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name,
REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL,
X_API_CREDENTIALS
) )
from foodie_utils import ( from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query, load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
summarize_with_gpt4o, generate_category_from_summary, post_to_wp, summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image_via_ddg prepare_post_data, select_best_author, smart_image_and_filter,
get_flickr_image_via_ddg
) )
from foodie_hooks import get_dynamic_hook, select_best_cta from foodie_hooks import get_dynamic_hook, select_best_cta
@ -48,7 +51,6 @@ def setup_logging():
with open(LOG_FILE, 'r') as f: with open(LOG_FILE, 'r') as f:
lines = f.readlines() lines = f.readlines()
# Group lines into log entries based on timestamp pattern
log_entries = [] log_entries = []
current_entry = [] current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
@ -105,8 +107,6 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def clean_reddit_title(title): def clean_reddit_title(title):
"""Remove Reddit flairs like [pro/chef] or [homemade] from the title."""
# Match patterns like [pro/chef], [homemade], etc. at the start of the title
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip() cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'") logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
return cleaned_title return cleaned_title
@ -158,7 +158,7 @@ def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
def get_top_comments(post_url, reddit, limit=3): def get_top_comments(post_url, reddit, limit=3):
try: try:
submission = reddit.submission(url=post_url) submission = reddit.submission(url=post_url)
submission.comment_sort = 'top' # Move this line up submission.comment_sort = 'top'
submission.comments.replace_more(limit=0) submission.comments.replace_more(limit=0)
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')] top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}") logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
@ -186,11 +186,10 @@ def fetch_reddit_posts():
if pub_date < cutoff_date: if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})") logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue continue
# Clean the title before storing
cleaned_title = clean_reddit_title(submission.title) cleaned_title = clean_reddit_title(submission.title)
articles.append({ articles.append({
"title": cleaned_title, # Use cleaned title "title": cleaned_title,
"raw_title": submission.title, # Store raw title for reference if needed "raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}", "link": f"https://www.reddit.com{submission.permalink}",
"summary": submission.selftext, "summary": submission.selftext,
"feed_title": get_clean_source_name(subreddit_name), "feed_title": get_clean_source_name(subreddit_name),
@ -212,7 +211,6 @@ def curate_from_reddit():
logging.info("No Reddit posts available") logging.info("No Reddit posts available")
return None, None, None return None, None, None
# Sort by upvotes descending
articles.sort(key=lambda x: x["upvotes"], reverse=True) articles.sort(key=lambda x: x["upvotes"], reverse=True)
reddit = praw.Reddit( reddit = praw.Reddit(
@ -224,15 +222,15 @@ def curate_from_reddit():
attempts = 0 attempts = 0
max_attempts = 10 max_attempts = 10
while attempts < max_attempts and articles: while attempts < max_attempts and articles:
article = articles.pop(0) # Take highest-upvote post article = articles.pop(0)
title = article["title"] # Use cleaned title title = article["title"]
raw_title = article["raw_title"] # Use raw title for deduplication raw_title = article["raw_title"]
link = article["link"] link = article["link"]
summary = article["summary"] summary = article["summary"]
source_name = "Reddit" source_name = "Reddit"
original_source = '<a href="https://www.reddit.com/">Reddit</a>' original_source = '<a href="https://www.reddit.com/">Reddit</a>'
if raw_title in posted_titles: # Check against raw title if raw_title in posted_titles:
print(f"Skipping already posted post: {raw_title}") print(f"Skipping already posted post: {raw_title}")
logging.info(f"Skipping already posted post: {raw_title}") logging.info(f"Skipping already posted post: {raw_title}")
attempts += 1 attempts += 1
@ -250,7 +248,7 @@ def curate_from_reddit():
top_comments = get_top_comments(link, reddit, limit=3) top_comments = get_top_comments(link, reddit, limit=3)
interest_score = is_interesting_reddit( interest_score = is_interesting_reddit(
title, # Use cleaned title title,
summary, summary,
article["upvotes"], article["upvotes"],
article["comment_count"], article["comment_count"],
@ -272,7 +270,7 @@ def curate_from_reddit():
"If brief, expand on the core idea with relevant context about its appeal or significance. " "If brief, expand on the core idea with relevant context about its appeal or significance. "
"Do not include emojis in the summary." "Do not include emojis in the summary."
) )
content_to_summarize = f"{title}\n\n{summary}" # Use cleaned title content_to_summarize = f"{title}\n\n{summary}"
if top_comments: if top_comments:
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
@ -290,7 +288,7 @@ def curate_from_reddit():
final_summary = insert_link_naturally(final_summary, source_name, link) final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) # Use cleaned title post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data: if not post_data:
attempts += 1 attempts += 1
continue continue
@ -345,8 +343,8 @@ def curate_from_reddit():
is_posting = False is_posting = False
timestamp = datetime.now(timezone.utc).isoformat() timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) # Save raw title save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
posted_titles.add(raw_title) # Add raw title to set posted_titles.add(raw_title)
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}") logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
if image_url: if image_url:

@ -1,3 +1,4 @@
# foodie_automator_rss.py
import requests import requests
import random import random
import time import time
@ -13,12 +14,17 @@ from openai import OpenAI
from urllib.parse import quote from urllib.parse import quote
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from foodie_config import RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, CATEGORIES, get_clean_source_name from foodie_config import (
RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS,
HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES,
get_clean_source_name, X_API_CREDENTIALS
)
from foodie_utils import ( from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query, load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, is_interesting, upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, is_interesting, generate_title_from_summary, summarize_with_gpt4o,
prepare_post_data, select_best_author, smart_image_and_filter generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter
) )
from foodie_hooks import get_dynamic_hook, select_best_cta from foodie_hooks import get_dynamic_hook, select_best_cta
import feedparser import feedparser
@ -27,6 +33,7 @@ from typing import List, Dict, Any, Optional
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
# Flag to indicate if we're in the middle of posting # Flag to indicate if we're in the middle of posting
is_posting = False is_posting = False
@ -43,10 +50,10 @@ signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log" LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
LOG_PRUNE_DAYS = 30 LOG_PRUNE_DAYS = 30
MAX_WORKERS = 5 # Number of concurrent workers for parallel processing MAX_WORKERS = 5
RATE_LIMIT_DELAY = 1 # Delay between API calls in seconds RATE_LIMIT_DELAY = 1
FEED_TIMEOUT = 30 # Timeout for feed requests in seconds FEED_TIMEOUT = 30
MAX_RETRIES = 3 # Maximum number of retries for failed requests MAX_RETRIES = 3
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@ -58,7 +65,6 @@ posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def setup_logging(): def setup_logging():
"""Configure logging with rotation and cleanup."""
if os.path.exists(LOG_FILE): if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f: with open(LOG_FILE, 'r') as f:
lines = f.readlines() lines = f.readlines()
@ -81,9 +87,14 @@ def setup_logging():
format="%(asctime)s - %(levelname)s - %(message)s", format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S" datefmt="%Y-%m-%d %H:%M:%S"
) )
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_rss.py")
setup_logging()
def create_http_session() -> requests.Session: def create_http_session() -> requests.Session:
"""Create and configure an HTTP session with retry logic."""
session = requests.Session() session = requests.Session()
retry_strategy = Retry( retry_strategy = Retry(
total=MAX_RETRIES, total=MAX_RETRIES,
@ -101,7 +112,6 @@ def create_http_session() -> requests.Session:
return session return session
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]: def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
"""Fetch and parse an RSS feed with error handling and retries."""
try: try:
response = session.get(feed_url, timeout=FEED_TIMEOUT) response = session.get(feed_url, timeout=FEED_TIMEOUT)
response.raise_for_status() response.raise_for_status()
@ -117,20 +127,14 @@ def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.
return None return None
def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool: def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
"""Enhanced content filtering with improved scoring."""
try: try:
# Basic validation
if not title or not summary: if not title or not summary:
return False return False
# Check if content is too old
if datetime.now(timezone.utc) - pub_date > timedelta(days=7): if datetime.now(timezone.utc) - pub_date > timedelta(days=7):
return False return False
# Calculate interest score
score = 0 score = 0
# Title analysis
title_lower = title.lower() title_lower = title.lower()
if any(keyword in title_lower for keyword in RECIPE_KEYWORDS): if any(keyword in title_lower for keyword in RECIPE_KEYWORDS):
score += 3 score += 3
@ -139,7 +143,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
if any(keyword in title_lower for keyword in HOME_KEYWORDS): if any(keyword in title_lower for keyword in HOME_KEYWORDS):
score += 1 score += 1
# Content analysis
summary_lower = summary.lower() summary_lower = summary.lower()
if len(summary.split()) < 100: if len(summary.split()) < 100:
score -= 2 score -= 2
@ -152,7 +155,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
return False return False
def fetch_rss_feeds() -> List[Dict[str, Any]]: def fetch_rss_feeds() -> List[Dict[str, Any]]:
"""Fetch RSS feeds with parallel processing and improved error handling."""
session = create_http_session() session = create_http_session()
articles = [] articles = []
@ -177,7 +179,6 @@ def fetch_rss_feeds() -> List[Dict[str, Any]]:
return [] return []
def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]: def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]:
"""Process a single RSS feed and extract articles."""
try: try:
feed = fetch_feed(feed_url, session) feed = fetch_feed(feed_url, session)
if not feed: if not feed:
@ -192,7 +193,8 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
"title": entry.title, "title": entry.title,
"link": entry.link, "link": entry.link,
"summary": entry.summary if hasattr(entry, 'summary') else entry.description, "summary": entry.summary if hasattr(entry, 'summary') else entry.description,
"feed_title": get_clean_source_name(feed.feed.title), "content": getattr(entry, 'content', [{'value': ''}])[0].value,
"feed_title": get_clean_source_name(feed_url),
"pub_date": pub_date "pub_date": pub_date
} }
@ -229,13 +231,12 @@ def curate_from_rss():
attempts = 0 attempts = 0
max_attempts = 10 max_attempts = 10
while attempts < max_attempts and articles: while attempts < max_attempts and articles:
article = articles.pop(0) # Take newest article article = articles.pop(0)
title = article["title"] title = article["title"]
link = article["link"] link = article["link"]
summary = article["summary"] summary = article["summary"]
content = article["content"] content = article["content"]
feed_url = article["feed_title"] source_name = article["feed_title"]
source_name = feed_url[0] if isinstance(feed_url, tuple) and len(feed_url) > 0 else feed_url
original_source = f'<a href="{link}">{source_name}</a>' original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles: if title in posted_titles:
@ -254,7 +255,6 @@ def curate_from_rss():
attempts += 1 attempts += 1
continue continue
# Score using title, summary, and content
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}" scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")

@ -58,114 +58,212 @@ AUTHORS = [
} }
] ]
POSTED_RSS_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' X_API_CREDENTIALS = [
POSTED_GOOGLE_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json' {
POSTED_REDDIT_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json' "username": "owenjohnson",
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' "x_username": "@insiderfoodieowen",
EXPIRATION_DAYS = 3 "api_key": os.getenv("OWENJOHNSON_X_API_KEY"),
IMAGE_EXPIRATION_DAYS = 7 "api_secret": os.getenv("OWENJOHNSON_X_API_SECRET"),
"access_token": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN"),
RSS_FEEDS = [ "access_token_secret": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN_SECRET"),
"https://www.eater.com/rss/full.xml", "client_secret": os.getenv("OWENJOHNSON_X_CLIENT_SECRET")
"https://modernrestaurantmanagement.com/feed/", },
"https://thespoon.tech/feed/", {
"https://www.nrn.com/rss.xml", "username": "javiermorales",
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml", "x_username": "@insiderfoodiejavier",
"https://www.bakingbusiness.com/rss/articles", "api_key": os.getenv("JAVIERMORALES_X_API_KEY"),
"https://www.theguardian.com/food/rss" "api_secret": os.getenv("JAVIERMORALES_X_API_SECRET"),
] "access_token": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN_SECRET"),
RSS_FEED_NAMES = { "client_secret": os.getenv("JAVIERMORALES_X_CLIENT_SECRET")
"https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"), },
"https://modernrestaurantmanagement.com/feed/": ("Modern Restaurant Management", "https://modernrestaurantmanagement.com/"), {
"https://thespoon.tech/feed/": ("The Spoon", "https://thespoon.tech/"), "username": "aishapatel",
"https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"), "x_username": "@insiderfoodieaisha",
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"), "api_key": os.getenv("AISHAPATEL_X_API_KEY"),
"https://www.bakingbusiness.com/rss/articles": ("Baking Business", "https://www.bakingbusiness.com/"), "api_secret": os.getenv("AISHAPATEL_X_API_SECRET"),
"https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food") "access_token": os.getenv("AISHAPATEL_X_ACCESS_TOKEN"),
} "access_token_secret": os.getenv("AISHAPATEL_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("AISHAPATEL_X_CLIENT_SECRET")
RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"] },
PROMO_KEYWORDS = ["we serve", "our guests", "event", "competition", "franchise", "off", "discount", "sale"] {
HOME_KEYWORDS = ["home", "house", "household", "appliance", "kitchen", "gadget"] "username": "trangnguyen",
PRODUCT_KEYWORDS = ["best", "buy", "storage", "organizer", "shop", "price", "container", "product", "deal", "sale", "discount"] "x_username": "@insiderfoodietrang",
"api_key": os.getenv("TRANGNGUYEN_X_API_KEY"),
CATEGORIES = [ "api_secret": os.getenv("TRANGNGUYEN_X_API_SECRET"),
"People", "Trends", "Travel", "access_token": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN"),
"Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food" "Eats" "access_token_secret": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN_SECRET"),
] "client_secret": os.getenv("TRANGNGUYEN_X_CLIENT_SECRET")
},
CTAS = [ {
"Love This Take? Share It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!", "username": "keishareid",
"Dig This Scoop? Post It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!", "x_username": "@insiderfoodiekeisha",
"Wild For This? Spread It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!", "api_key": os.getenv("KEISHAREID_X_API_KEY"),
"Crave This Read? Tweet It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!", "api_secret": os.getenv("KEISHAREID_X_API_SECRET"),
"Buzzing Over This? Share On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!" "access_token": os.getenv("KEISHAREID_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("KEISHAREID_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("KEISHAREID_X_CLIENT_SECRET")
},
{
"username": "lilamoreau",
"x_username": "@insiderfoodielila",
"api_key": os.getenv("LILAMOREAU_X_API_KEY"),
"api_secret": os.getenv("LILAMOREAU_X_API_SECRET"),
"access_token": os.getenv("LILAMOREAU_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("LILAMOREAU_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("LILAMOREAU_X_CLIENT_SECRET")
}
] ]
SUMMARY_PERSONA_PROMPTS = { PERSONA_CONFIGS = {
"Visionary Editor": { "Visionary Editor": {
"description": "a commanding food editor with a borderless view", "description": "a commanding food editor with a borderless view",
"tone": "a polished and insightful tone, like 'This redefines culinary excellence.'", "tone": "a polished and insightful tone, like 'This redefines culinary excellence.'",
"prompt": ( "article_prompt": (
"You’re {description}. Summarize this article in {tone}. " "You’re {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a bold take and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." "Add a bold take and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
"For article tweets, include the article title, a quirky hook, and the URL. "
"For engagement tweets, ask a question about food trends, foods, or articles to engage the public. "
"Avoid emojis and clichés like 'game-changer'. Return only the tweet text."
) )
}, },
"Foodie Critic": { "Foodie Critic": {
"description": "a seasoned foodie reviewer with a sharp eye", "description": "a seasoned foodie reviewer with a sharp eye",
"tone": "a professional yet engaging tone, like 'This dish is a revelation.'", "tone": "a professional yet engaging tone, like 'This dish is a revelation.'",
"prompt": ( "article_prompt": (
"You’re {description}. Summarize this article in {tone}. " "You’re {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a subtle opinion and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." "Add a subtle opinion and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
"For article tweets, include the article title, a quirky hook, and the URL. "
"For engagement tweets, ask a question about food trends, foods, or articles to engage the public. "
"Avoid emojis and clichés like 'game-changer'. Return only the tweet text."
) )
}, },
"Trend Scout": { "Trend Scout": {
"description": "a forward-thinking editor obsessed with trends", "description": "a forward-thinking editor obsessed with trends",
"tone": "an insightful and forward-looking tone, like 'This sets the stage for what’s next.'", "tone": "an insightful and forward-looking tone, like 'This sets the stage for what’s next.'",
"prompt": ( "article_prompt": (
"You’re {description}. Summarize this article in {tone}. " "You’re {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Predict what’s next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." "Predict what’s next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
"For article tweets, include the article title, a quirky hook, and the URL. "
"For engagement tweets, ask a question about food trends, foods, or articles to engage the public. "
"Avoid emojis and clichés like 'game-changer'. Return only the tweet text."
) )
}, },
"Culture Connoisseur": { "Culture Connoisseur": {
"description": "a cultured food writer who loves storytelling", "description": "a cultured food writer who loves storytelling",
"tone": "a warm and thoughtful tone, like 'This evokes a sense of tradition.'", "tone": "a warm and thoughtful tone, like 'This evokes a sense of tradition.'",
"prompt": ( "article_prompt": (
"You’re {description}. Summarize this article in {tone}. " "You’re {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a thoughtful observation and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." "Add a thoughtful observation and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
"For article tweets, include the article title, a quirky hook, and the URL. "
"For engagement tweets, ask a question about food trends, foods, or articles to engage the public. "
"Avoid emojis and clichés like 'game-changer'. Return only the tweet text."
) )
}, },
"African-American Soul Food Sage": { "African-American Soul Food Sage": {
"description": "a vibrant storyteller rooted in African-American culinary heritage", "description": "a vibrant storyteller rooted in African-American culinary heritage",
"tone": "a heartfelt and authentic tone, like 'This captures the essence of heritage.'", "tone": "a heartfelt and authentic tone, like 'This captures the essence of heritage.'",
"prompt": ( "article_prompt": (
"You’re {description}. Summarize this article in {tone}. " "You’re {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a heritage twist and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." "Add a heritage twist and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
"For article tweets, include the article title, a quirky hook, and the URL. "
"For engagement tweets, ask a question about food trends, foods, or articles to engage the public. "
"Avoid emojis and clichés like 'game-changer'. Return only the tweet text."
) )
}, },
"Global Street Food Nomad": { "Global Street Food Nomad": {
"description": "an adventurous explorer of global street food", "description": "an adventurous explorer of global street food",
"tone": "a bold and adventurous tone, like 'This takes you on a global journey.'", "tone": "a bold and adventurous tone, like 'This takes you on a global journey.'",
"prompt": ( "article_prompt": (
"You’re {description}. Summarize this article in {tone}. " "You’re {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Drop a street-level insight and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." "Drop a street-level insight and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
"For article tweets, include the article title, a quirky hook, and the URL. "
"For engagement tweets, ask a question about food trends, foods, or articles to engage the public. "
"Avoid emojis and clichés like 'game-changer'. Return only the tweet text."
) )
} }
} }
# File paths
POSTED_RSS_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
POSTED_GOOGLE_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
POSTED_REDDIT_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
AUTHOR_BACKGROUNDS_FILE = '/home/shane/foodie_automator/author_backgrounds.json'
X_POST_COUNTS_FILE = '/home/shane/foodie_automator/x_post_counts.json'
RECENT_POSTS_FILE = '/home/shane/foodie_automator/recent_posts.json'
EXPIRATION_DAYS = 3
IMAGE_EXPIRATION_DAYS = 7
RSS_FEEDS = [
"https://www.eater.com/rss/full.xml",
"https://modernrestaurantmanagement.com/feed/",
"https://thespoon.tech/feed/",
"https://www.nrn.com/rss.xml",
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml",
"https://www.bakingbusiness.com/rss/articles",
"https://www.theguardian.com/food/rss"
]
RSS_FEED_NAMES = {
"https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"),
"https://modernrestaurantmanagement.com/feed/": ("Modern Restaurant Management", "https://modernrestaurantmanagement.com/"),
"https://thespoon.tech/feed/": ("The Spoon", "https://thespoon.tech/"),
"https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"),
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"),
"https://www.bakingbusiness.com/rss/articles": ("Baking Business", "https://www.bakingbusiness.com/"),
"https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food")
}
RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"]
PROMO_KEYWORDS = ["we serve", "our guests", "event", "competition", "franchise", "off", "discount", "sale"]
HOME_KEYWORDS = ["home", "house", "household", "appliance", "kitchen", "gadget"]
PRODUCT_KEYWORDS = ["best", "buy", "storage", "organizer", "shop", "price", "container", "product", "deal", "sale", "discount"]
CATEGORIES = [
"People", "Trends", "Travel",
"Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food", "Eats"
]
CTAS = [
"Love This Take? Share It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!",
"Dig This Scoop? Post It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!",
"Wild For This? Spread It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!",
"Crave This Read? Tweet It On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!",
"Buzzing Over This? Share On <a href='{share_url}'><i class=\"tsi tsi-twitter\"></i></a>!"
]
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID") REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET") REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT") REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")

@ -11,7 +11,6 @@ import tempfile
import requests import requests
import time import time
from dotenv import load_dotenv from dotenv import load_dotenv
import os
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
from openai import OpenAI from openai import OpenAI
from urllib.parse import quote from urllib.parse import quote
@ -19,10 +18,12 @@ from duckduckgo_search import DDGS
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry from requests.packages.urllib3.util.retry import Retry
import tweepy
from foodie_config import ( from foodie_config import (
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS,
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS
) )
load_dotenv() load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@ -43,7 +44,7 @@ def load_json_file(filename, expiration_days=None):
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.warning(f"Skipping invalid JSON line in {filename} at line {i}: {e}") logging.warning(f"Skipping invalid JSON line in {filename} at line {i}: {e}")
if expiration_days: if expiration_days:
cutoff = (datetime.now() - timedelta(days=expiration_days)).isoformat() cutoff = (datetime.now(timezone.utc) - timedelta(days=expiration_days)).isoformat()
data = [entry for entry in data if entry["timestamp"] > cutoff] data = [entry for entry in data if entry["timestamp"] > cutoff]
logging.info(f"Loaded {len(data)} entries from {filename}, {len(data)} valid after expiration check") logging.info(f"Loaded {len(data)} entries from {filename}, {len(data)} valid after expiration check")
except Exception as e: except Exception as e:
@ -70,6 +71,95 @@ def save_json_file(filename, key, value):
except Exception as e: except Exception as e:
logging.error(f"Failed to save or prune {filename}: {e}") logging.error(f"Failed to save or prune {filename}: {e}")
def load_post_counts():
counts = load_json_file('/home/shane/foodie_automator/x_post_counts.json')
if not counts:
counts = [{
"username": author["username"],
"month": datetime.now(timezone.utc).strftime("%Y-%m"),
"monthly_count": 0,
"day": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
"daily_count": 0
} for author in AUTHORS]
current_month = datetime.now(timezone.utc).strftime("%Y-%m")
current_day = datetime.now(timezone.utc).strftime("%Y-%m-%d")
for entry in counts:
if entry["month"] != current_month:
entry["month"] = current_month
entry["monthly_count"] = 0
if entry["day"] != current_day:
entry["day"] = current_day
entry["daily_count"] = 0
return counts
def save_post_counts(counts):
with open('/home/shane/foodie_automator/x_post_counts.json', 'w') as f:
for item in counts:
json.dump(item, f)
f.write('\n')
logging.info("Saved post counts to x_post_counts.json")
def generate_article_tweet(author, post, persona):
persona_config = PERSONA_CONFIGS[persona]
base_prompt = persona_config["x_prompt"].format(
description=persona_config["description"],
tone=persona_config["tone"]
)
prompt = base_prompt.replace(
"For article tweets, include the article title, a quirky hook, and the URL.",
f"Generate an article tweet including the title '{post['title']}', a quirky hook, and the URL '{post['url']}'."
)
try:
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": f"Generate tweet for {post['title']}."}
],
max_tokens=100,
temperature=0.9
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.info(f"Generated article tweet for {author['username']}: {tweet}")
return tweet
except Exception as e:
logging.error(f"Failed to generate article tweet for {author['username']}: {e}")
return f"This trend is fire! Check out {post['title']} at {post['url']} #Foodie"
def post_tweet(author, tweet):
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return False
post_counts = load_post_counts()
author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None)
if author_count["monthly_count"] >= 500:
logging.warning(f"Monthly post limit (500) reached for {author['username']}")
return False
if author_count["daily_count"] >= 20:
logging.warning(f"Daily post limit (20) reached for {author['username']}")
return False
try:
client = tweepy.Client(
consumer_key=credentials["api_key"],
consumer_secret=credentials["api_secret"],
access_token=credentials["access_token"],
access_token_secret=credentials["access_token_secret"]
)
response = client.create_tweet(text=tweet)
author_count["monthly_count"] += 1
author_count["daily_count"] += 1
save_post_counts(post_counts)
logging.info(f"Posted tweet for {author['username']}: {tweet}")
return True
except Exception as e:
logging.error(f"Failed to post tweet for {author['username']}: {e}")
return False
def select_best_persona(interest_score, content=""): def select_best_persona(interest_score, content=""):
logging.info("Using select_best_persona with interest_score and content") logging.info("Using select_best_persona with interest_score and content")
personas = ["Visionary Editor", "Foodie Critic", "Trend Scout", "Culture Connoisseur"] personas = ["Visionary Editor", "Foodie Critic", "Trend Scout", "Culture Connoisseur"]
@ -206,7 +296,6 @@ def smart_image_and_filter(title, summary):
raw_result = response.choices[0].message.content.strip() raw_result = response.choices[0].message.content.strip()
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
# Clean and parse JSON
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
try: try:
result = json.loads(cleaned_result) result = json.loads(cleaned_result)
@ -339,14 +428,12 @@ def generate_title_from_summary(summary):
def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_prompt=""): def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_prompt=""):
try: try:
persona = select_best_persona(interest_score, content) persona = select_best_persona(interest_score, content)
# Access the persona configuration persona_config = PERSONA_CONFIGS.get(persona, {
persona_config = SUMMARY_PERSONA_PROMPTS.get(persona, { "article_prompt": "Write a concise, engaging summary that captures the essence of the content for food lovers.",
"prompt": "Write a concise, engaging summary that captures the essence of the content for food lovers.",
"description": "a generic food writer", "description": "a generic food writer",
"tone": "an engaging tone" "tone": "an engaging tone"
}) })
# Format the prompt using description and tone prompt = persona_config["article_prompt"].format(
prompt = persona_config["prompt"].format(
description=persona_config["description"], description=persona_config["description"],
tone=persona_config["tone"], tone=persona_config["tone"],
num_paragraphs=determine_paragraph_count(interest_score) num_paragraphs=determine_paragraph_count(interest_score)
@ -379,166 +466,7 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
logging.error(f"Summary generation failed with model {SUMMARY_MODEL}: {e}") logging.error(f"Summary generation failed with model {SUMMARY_MODEL}: {e}")
return None return None
def smart_image_and_filter(title, summary):
try:
content = f"{title}\n\n{summary}"
prompt = (
'Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) '
'for an image search about food industry trends or viral content. Prioritize specific terms if present, '
'otherwise focus on the main theme. '
'Return "SKIP" if the article is about home appliances, recipes, promotions, or contains "homemade", else "KEEP". '
'Return as JSON with double quotes: {"image_query": "specific term", "relevance": ["keyword1", "keyword2"], "action": "KEEP" or "SKIP"}'
)
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": content}
],
max_tokens=100
)
raw_result = response.choices[0].message.content.strip()
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
try:
result = json.loads(cleaned_result)
except json.JSONDecodeError as e:
logging.warning(f"JSON parsing failed: {e}, raw: '{cleaned_result}'. Using fallback.")
return "food trends", ["cuisine", "dining"], False
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
logging.warning(f"Invalid GPT response format: {result}, using fallback")
return "food trends", ["cuisine", "dining"], False
image_query = result["image_query"]
relevance_keywords = result["relevance"]
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
if not image_query or len(image_query.split()) < 2:
logging.warning(f"Image query '{image_query}' too vague, using fallback")
return "food trends", ["cuisine", "dining"], skip_flag
return image_query, relevance_keywords, skip_flag
except Exception as e:
logging.error(f"Smart image/filter failed: {e}, using fallback")
return "food trends", ["cuisine", "dining"], False
def is_interesting(summary):
try:
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": (
"Rate this content from 0-10 based on its rarity, buzzworthiness, and engagement potential for food lovers, covering a wide range of food topics (skip recipes). "
"Score 8-10 for rare, highly shareable ideas that grab attention. "
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
"Return only a number."
)},
{"role": "user", "content": f"Content: {summary}"}
],
max_tokens=5
)
raw_score = response.choices[0].message.content.strip()
score = int(raw_score) if raw_score.isdigit() else 0
print(f"Interest Score for '{summary[:50]}...': {score} (raw: {raw_score})")
logging.info(f"Interest Score: {score} (raw: {raw_score})")
return score
except Exception as e:
logging.error(f"Interestingness scoring failed with model {LIGHT_TASK_MODEL}: {e}")
print(f"Interest Error: {e}")
return 0
def select_paragraphs(paragraphs, target_count, persona, original_content):
"""Select or generate paragraphs to match target_count, preserving key content."""
if len(paragraphs) == target_count and all(60 <= len(p.split()) <= 80 for p in paragraphs):
return paragraphs
# Score paragraphs by food-related keywords
keywords = ["food", "dish", "trend", "menu", "cuisine", "flavor", "taste", "eat", "dining", "restaurant"]
scores = []
for para in paragraphs:
score = sum(para.lower().count(kw) for kw in keywords)
word_count = len(para.split())
# Penalize paragraphs outside word range
score -= abs(word_count - 70) # Favor ~70 words
scores.append(score)
# Handle too many paragraphs
if len(paragraphs) > target_count:
# Keep last paragraph unless it's low-scoring
if scores[-1] >= min(scores[:-1]) or len(paragraphs) == target_count + 1:
selected_indices = sorted(range(len(paragraphs)-1), key=lambda i: scores[i], reverse=True)[:target_count-1] + [len(paragraphs)-1]
else:
selected_indices = sorted(range(len(paragraphs)), key=lambda i: scores[i], reverse=True)[:target_count]
selected = [paragraphs[i] for i in sorted(selected_indices)]
else:
selected = paragraphs[:]
# Handle word count adjustments or too few paragraphs
adjusted = []
for para in selected:
word_count = len(para.split())
if word_count < 60 or word_count > 80:
# Rephrase to fit 60-80 words
rephrase_prompt = (
f"Rephrase this paragraph to exactly 60-80 words, keeping the same tone as a {persona} and all key ideas: '{para}'"
)
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": rephrase_prompt},
{"role": "user", "content": para}
],
max_tokens=150,
temperature=0.7
)
new_para = response.choices[0].message.content.strip()
if 60 <= len(new_para.split()) <= 80:
adjusted.append(new_para)
else:
adjusted.append(para) # Fallback to original if rephrase fails
except Exception as e:
logging.warning(f"Rephrasing failed for paragraph: {e}")
adjusted.append(para)
else:
adjusted.append(para)
# Generate additional paragraphs if needed
while len(adjusted) < target_count:
extra_prompt = (
f"Generate one additional paragraph (60-80 words) in the style of a {persona}, "
f"based on this content: '{original_content[:200]}...'. Match the tone of: '{adjusted[-1] if adjusted else 'This trend is fire!'}'"
)
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": extra_prompt},
{"role": "user", "content": original_content}
],
max_tokens=150,
temperature=0.7
)
new_para = response.choices[0].message.content.strip()
if 60 <= len(new_para.split()) <= 80:
adjusted.append(new_para)
else:
adjusted.append("This trend is sparking buzz across menus!") # Fallback
except Exception as e:
logging.warning(f"Extra paragraph generation failed: {e}")
adjusted.append("This vibe is shaking up the food scene!")
return adjusted[:target_count]
def insert_link_naturally(summary, source_name, source_url): def insert_link_naturally(summary, source_name, source_url):
import re
try: try:
prompt = ( prompt = (
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). " "Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
@ -571,8 +499,7 @@ def insert_link_naturally(summary, source_name, source_url):
except Exception as e: except Exception as e:
logging.error(f"Link insertion failed: {e}") logging.error(f"Link insertion failed: {e}")
# Fallback: Protect times and insert at sentence end time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b'
time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' # Matches 6.30am, 12.15pm
protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary)
paragraphs = protected_summary.split('\n') paragraphs = protected_summary.split('\n')
if not paragraphs or all(not p.strip() for p in paragraphs): if not paragraphs or all(not p.strip() for p in paragraphs):
@ -588,22 +515,19 @@ def insert_link_naturally(summary, source_name, source_url):
] ]
insertion_phrase = random.choice(phrases) insertion_phrase = random.choice(phrases)
# Find sentence boundary, avoiding protected times
sentences = re.split(r'(?<=[.!?])\s+', target_para) sentences = re.split(r'(?<=[.!?])\s+', target_para)
insertion_point = -1 insertion_point = -1
for i, sent in enumerate(sentences): for i, sent in enumerate(sentences):
if sent.strip() and '@' not in sent: # Avoid sentences with protected times if sent.strip() and '@' not in sent:
insertion_point = sum(len(s) + 1 for s in sentences[:i+1]) insertion_point = sum(len(s) + 1 for s in sentences[:i+1])
break break
if insertion_point == -1: if insertion_point == -1:
insertion_point = len(target_para) # Append if no good boundary insertion_point = len(target_para)
# Add space after insertion phrase
new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip() new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip()
paragraphs[paragraphs.index(target_para)] = new_para paragraphs[paragraphs.index(target_para)] = new_para
new_summary = '\n'.join(paragraphs) new_summary = '\n'.join(paragraphs)
# Restore periods in times
new_summary = new_summary.replace('@', '.') new_summary = new_summary.replace('@', '.')
logging.info(f"Fallback summary with link: {new_summary}") logging.info(f"Fallback summary with link: {new_summary}")
return new_summary return new_summary
@ -759,7 +683,7 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image")
endpoint = f"{wp_base_url}/posts/{post_id}" if post_id else f"{wp_base_url}/posts" endpoint = f"{wp_base_url}/posts/{post_id}" if post_id else f"{wp_base_url}/posts"
method = requests.post # Use POST for both create and update (WP API handles it) method = requests.post
logging.debug(f"Sending WP request to {endpoint} with payload: {json.dumps(payload, indent=2)}") logging.debug(f"Sending WP request to {endpoint} with payload: {json.dumps(payload, indent=2)}")
@ -775,12 +699,20 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
post_id = post_info["id"] post_id = post_info["id"]
post_url = post_info["link"] post_url = post_info["link"]
# Save to recent_posts.json # Save to recent_posts.json
timestamp = datetime.now(timezone.utc).isoformat() timestamp = datetime.now(timezone.utc).isoformat()
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
logging.info(f"Posted/Updated by {author['username']}: {post_data['title']} (ID: {post_id})") # Post article tweet to X
return post_id, post_url try:
post = {"title": post_data["title"], "url": post_url}
tweet = generate_article_tweet(author, post, author["persona"])
if post_tweet(author, tweet):
logging.info(f"Successfully posted article tweet for {author['username']} on X")
else:
logging.warning(f"Failed to post article tweet for {author['username']} on X")
except Exception as e:
logging.error(f"Error posting article tweet for {author['username']}: {e}")
logging.info(f"Posted/Updated by {author['username']}: {post_data['title']} (ID: {post_id})") logging.info(f"Posted/Updated by {author['username']}: {post_data['title']} (ID: {post_id})")
return post_id, post_url return post_id, post_url
@ -860,7 +792,6 @@ def get_flickr_image_via_ddg(search_query, relevance_keywords):
result = random.choice(candidates) result = random.choice(candidates)
image_url = result["image_url"] image_url = result["image_url"]
# OCR check on the selected image
temp_file = None temp_file = None
try: try:
img_response = requests.get(image_url, headers=headers, timeout=10) img_response = requests.get(image_url, headers=headers, timeout=10)
@ -876,9 +807,8 @@ def get_flickr_image_via_ddg(search_query, relevance_keywords):
if char_count > 200: if char_count > 200:
logging.info(f"Skipping text-heavy image (OCR): {image_url} (char_count: {char_count})") logging.info(f"Skipping text-heavy image (OCR): {image_url} (char_count: {char_count})")
return None, None, None, None # Fall back to Pixabay return None, None, None, None
# Success: Save and return
flickr_data = { flickr_data = {
"title": search_query, "title": search_query,
"image_url": image_url, "image_url": image_url,
@ -945,7 +875,6 @@ def prepare_post_data(final_summary, original_title, context_info=""):
logging.info(f"Title generation failed for '{original_title}' {context_info}") logging.info(f"Title generation failed for '{original_title}' {context_info}")
return None, None, None, None, None, None, None return None, None, None, None, None, None, None
# Note: This function still uses generate_image_query, but curate_from_rss overrides it with smart_image_and_filter
search_query, relevance_keywords = generate_image_query(f"{innovative_title}\n\n{final_summary}") search_query, relevance_keywords = generate_image_query(f"{innovative_title}\n\n{final_summary}")
if not search_query: if not search_query:
logging.info(f"Image query generation failed for '{innovative_title}' {context_info}") logging.info(f"Image query generation failed for '{innovative_title}' {context_info}")
@ -976,7 +905,6 @@ def prepare_post_data(final_summary, original_title, context_info=""):
return post_data, author, category, image_url, image_source, uploader, page_url return post_data, author, category, image_url, image_source, uploader, page_url
def save_post_to_recent(post_title, post_url, author_username, timestamp): def save_post_to_recent(post_title, post_url, author_username, timestamp):
"""Save post details to recent_posts.json."""
try: try:
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
entry = { entry = {
@ -995,7 +923,6 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp):
logging.error(f"Failed to save post to recent_posts.json: {e}") logging.error(f"Failed to save post to recent_posts.json: {e}")
def prune_recent_posts(): def prune_recent_posts():
"""Prune recent_posts.json to keep only entries from the last 24 hours."""
try: try:
cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat() cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')

@ -6,12 +6,10 @@ import time
import sys import sys
import signal import signal
import os import os
from datetime import datetime, timedelta, timezone from datetime import datetime, timezone
from openai import OpenAI from openai import OpenAI
import tweepy from foodie_config import OPENAI_API_KEY, AUTHORS, LIGHT_TASK_MODEL, PERSONA_CONFIGS, AUTHOR_BACKGROUNDS_FILE
from foodie_config import OPENAI_API_KEY, AUTHORS, LIGHT_TASK_MODEL from foodie_utils import load_json_file, post_tweet
from foodie_utils import load_json_file
from foodie_x_config import X_API_CREDENTIALS, X_PERSONA_PROMPTS, AUTHOR_BACKGROUNDS_FILE, X_POST_COUNTS_FILE, RECENT_POSTS_FILE
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
@ -49,24 +47,6 @@ except Exception as e:
logging.error(f"Failed to load author_backgrounds.json: {e}") logging.error(f"Failed to load author_backgrounds.json: {e}")
sys.exit(1) sys.exit(1)
def load_post_counts():
counts = load_json_file(X_POST_COUNTS_FILE)
if not counts:
counts = [{"username": author["username"], "count": 0, "month": datetime.now(timezone.utc).strftime("%Y-%m")} for author in AUTHORS]
current_month = datetime.now(timezone.utc).strftime("%Y-%m")
for entry in counts:
if entry["month"] != current_month:
entry["count"] = 0
entry["month"] = current_month
return counts
def save_post_counts(counts):
with open(X_POST_COUNTS_FILE, 'w') as f:
for item in counts:
json.dump(item, f)
f.write('\n')
logging.info(f"Saved post counts to {X_POST_COUNTS_FILE}")
is_posting = False is_posting = False
def signal_handler(sig, frame): def signal_handler(sig, frame):
@ -80,87 +60,28 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
def get_recent_posts_for_author(username): def generate_engagement_tweet(author, persona):
posts = load_json_file(RECENT_POSTS_FILE)
return [post for post in posts if post["author_username"] == username]
def delete_used_post(post_title):
posts = load_json_file(RECENT_POSTS_FILE)
posts = [post for post in posts if post["title"] != post_title]
with open(RECENT_POSTS_FILE, 'w') as f:
for item in posts:
json.dump(item, f)
f.write('\n')
logging.info(f"Deleted post '{post_title}' from recent_posts.json")
def generate_article_tweet(author, post, persona):
# Format the prompt using description and tone
persona_config = X_PERSONA_PROMPTS[persona]
base_prompt = persona_config["prompt"].format(
description=persona_config["description"],
tone=persona_config["tone"]
)
prompt = base_prompt.replace(
"For article tweets, include the article title, a quirky hook, and the URL.",
f"Generate an article tweet including the title '{post['title']}', a quirky hook, and the URL '{post['url']}'."
)
try:
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": f"Generate tweet for {post['title']}."}
],
max_tokens=100,
temperature=0.9
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.info(f"Generated article tweet for {author['username']}: {tweet}")
return tweet
except Exception as e:
logging.error(f"Failed to generate article tweet for {author['username']}: {e}")
return f"This trend is fire! Check out {post['title']} at {post['url']} #Foodie"
def generate_personal_tweet(author, persona):
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {}) background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
if not background: if not background or "engagement_themes" not in background:
logging.warning(f"No background found for {author['username']}") logging.warning(f"No background or engagement themes found for {author['username']}")
return f"Loving my gig at InsiderFoodie, dishing out food trends! #FoodieLife" return "What food trends are you loving right now? Share your thoughts! #FoodieTrends"
# Get DOB and calculate age
dob = author.get('dob', '1980-01-01')
current_year = datetime.now().year
birth_year = int(dob.split('-')[0])
age = current_year - birth_year
is_role_reflection = random.choice([True, False])
if is_role_reflection:
content = f"Reflect on your role at InsiderFoodie as {author['persona']}. Mention you're {age} years old."
else:
content = (
f"Share a personal story about your background, considering you were born on {dob} and are {age} years old. "
f"Hometown: {background['hometown']}, Cultural influences: {background['cultural_influences']}, "
f"Early memory: {background['early_memory']}, Career path: {background['career_path']}."
)
# Format the prompt using description and tone theme = random.choice(background["engagement_themes"])
persona_config = X_PERSONA_PROMPTS[persona] persona_config = PERSONA_CONFIGS[persona]
base_prompt = persona_config["prompt"].format( base_prompt = persona_config["x_prompt"].format(
description=persona_config["description"], description=persona_config["description"],
tone=persona_config["tone"] tone=persona_config["tone"]
) )
prompt = base_prompt.replace( prompt = base_prompt.replace(
"For personal tweets, reflect on your role at InsiderFoodie or background.", "For engagement tweets, ask a question about food trends, foods, or articles to engage the public.",
content f"Generate an engagement tweet asking a question about {theme} to engage the public."
) )
try: try:
response = client.chat.completions.create( response = client.chat.completions.create(
model="gpt-4o-mini", model=LIGHT_TASK_MODEL,
messages=[ messages=[
{"role": "system", "content": prompt}, {"role": "system", "content": prompt},
{"role": "user", "content": f"Generate personal tweet for {author['username']}."} {"role": "user", "content": f"Generate engagement tweet for {author['username']} about {theme}."}
], ],
max_tokens=100, max_tokens=100,
temperature=0.9 temperature=0.9
@ -168,64 +89,21 @@ def generate_personal_tweet(author, persona):
tweet = response.choices[0].message.content.strip() tweet = response.choices[0].message.content.strip()
if len(tweet) > 280: if len(tweet) > 280:
tweet = tweet[:277] + "..." tweet = tweet[:277] + "..."
logging.info(f"Generated personal tweet for {author['username']}: {tweet}") logging.info(f"Generated engagement tweet for {author['username']}: {tweet}")
return tweet return tweet
except Exception as e: except Exception as e:
logging.error(f"Failed to generate personal tweet for {author['username']}: {e}") logging.error(f"Failed to generate engagement tweet for {author['username']}: {e}")
return f"Loving my gig at InsiderFoodie, dishing out food trends! #FoodieLife" return f"What’s your take on {theme}? Let’s talk! #FoodieTrends"
def post_tweet(author, tweet):
global is_posting
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return False
post_counts = load_post_counts()
author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None)
if author_count["count"] >= 450:
logging.warning(f"Post limit reached for {author['username']} this month")
return False
try:
client = tweepy.Client(
consumer_key=credentials["api_key"],
consumer_secret=credentials["api_secret"],
access_token=credentials["access_token"],
access_token_secret=credentials["access_token_secret"]
)
is_posting = True
response = client.create_tweet(text=tweet)
is_posting = False
author_count["count"] += 1
save_post_counts(post_counts)
logging.info(f"Posted tweet for {author['username']}: {tweet}")
return True
except Exception as e:
is_posting = False
logging.error(f"Failed to post tweet for {author['username']}: {e}")
return False
def main(): def main():
global is_posting
logging.info("***** X Poster Launched *****") logging.info("***** X Poster Launched *****")
for author in AUTHORS: for author in AUTHORS:
posts = get_recent_posts_for_author(author["username"]) is_posting = True
if not posts: tweet = generate_engagement_tweet(author, author["persona"])
logging.info(f"No recent posts for {author['username']}, skipping")
continue
article_tweets = 0
for post in posts[:2]:
tweet = generate_article_tweet(author, post, author["persona"])
if post_tweet(author, tweet):
delete_used_post(post["title"])
article_tweets += 1
time.sleep(random.uniform(3600, 7200))
if article_tweets >= 2:
break
tweet = generate_personal_tweet(author, author["persona"])
post_tweet(author, tweet) post_tweet(author, tweet)
is_posting = False
time.sleep(random.uniform(3600, 7200))
logging.info("X posting completed") logging.info("X posting completed")
return random.randint(600, 1800) return random.randint(600, 1800)

Loading…
Cancel
Save