From ea7d36a22b0f4f4e9d1d1ec3ce5ee34ded4b6e64 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 28 Apr 2025 21:23:12 +1000 Subject: [PATCH] merge posting x into main files --- author_backgrounds.json | 44 ++++++ foodie_automator_google.py | 13 +- foodie_automator_reddit.py | 38 +++-- foodie_automator_rss.py | 50 +++--- foodie_config.py | 198 ++++++++++++++++++------ foodie_utils.py | 301 ++++++++++++++----------------------- foodie_x_poster.py | 166 +++----------------- 7 files changed, 379 insertions(+), 431 deletions(-) create mode 100644 author_backgrounds.json diff --git a/author_backgrounds.json b/author_backgrounds.json new file mode 100644 index 0000000..a9a6f87 --- /dev/null +++ b/author_backgrounds.json @@ -0,0 +1,44 @@ +[ + { + "username": "owenjohnson", + "hometown": "New York, NY", + "cultural_influences": "Global dining enthusiast with a focus on innovative trends", + "career_path": "Editor-in-chief, shaping food narratives with a worldwide lens", + "engagement_themes": ["global cuisines", "dining innovations", "food tech"] + }, + { + "username": "javiermorales", + "hometown": "Los Angeles, CA", + "cultural_influences": "Latin-American roots with a critical eye on culinary arts", + "career_path": "Food critic known for incisive reviews and bold takes", + "engagement_themes": ["restaurant experiences", "dish quality", "chef innovations"] + }, + { + "username": "aishapatel", + "hometown": "Sunnyvale, CA", + "cultural_influences": "Indian-American heritage with a focus on innovative cuisines", + "career_path": "Food blogger turned trend analyst, predicting food movements", + "engagement_themes": ["emerging food trends", "innovative cuisines", "sustainable dining"] + }, + { + "username": "trangnguyen", + "hometown": "Seattle, WA", + "cultural_influences": "Vietnamese heritage with a passion for cultural storytelling", + "career_path": "Food writer weaving history and tradition into modern narratives", + "engagement_themes": ["culinary traditions", "cultural dishes", "food history"] + }, + { + "username": "keishareid", + "hometown": "Atlanta, GA", + "cultural_influences": "African-American roots with a focus on soul food heritage", + "career_path": "Culinary storyteller celebrating resilience and flavor", + "engagement_themes": ["soul food classics", "cultural heritage", "comfort foods"] + }, + { + "username": "lilamoreau", + "hometown": "Miami, FL", + "cultural_influences": "Global traveler with a love for street food diversity", + "career_path": "Food adventurer documenting street eats worldwide", + "engagement_themes": ["street food finds", "global flavors", "food trucks"] + } +] \ No newline at end of file diff --git a/foodie_automator_google.py b/foodie_automator_google.py index 8bd9542..9ee5d8a 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -1,3 +1,4 @@ +# foodie_automator_google.py import requests import random import time @@ -19,18 +20,20 @@ from selenium.common.exceptions import TimeoutException from duckduckgo_search import DDGS from foodie_config import ( AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, - SUMMARY_PERSONA_PROMPTS, CATEGORIES, CTAS, get_clean_source_name + PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name, X_API_CREDENTIALS ) from foodie_utils import ( load_json_file, save_json_file, get_image, generate_image_query, - upload_image_to_wp, select_best_persona, determine_paragraph_count, is_interesting, - generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, - prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg + upload_image_to_wp, select_best_persona, determine_paragraph_count, + is_interesting, generate_title_from_summary, summarize_with_gpt4o, + generate_category_from_summary, post_to_wp, prepare_post_data, + smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg ) from foodie_hooks import get_dynamic_hook, select_best_cta from dotenv import load_dotenv load_dotenv() + # Flag to indicate if we're in the middle of posting is_posting = False @@ -187,7 +190,7 @@ def curate_from_google_trends(geo_list=['US']): attempts = 0 max_attempts = 10 while attempts < max_attempts and trends: - trend = trends.pop(0) # Take highest-volume trend + trend = trends.pop(0) title = trend["title"] link = trend["link"] search_volume = trend["search_volume"] diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index f4caa65..386a693 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -1,3 +1,4 @@ +# foodie_automator_reddit.py import requests import random import time @@ -15,14 +16,16 @@ from requests.adapters import HTTPAdapter import praw from foodie_config import ( AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, - SUMMARY_PERSONA_PROMPTS, CATEGORIES, CTAS, get_clean_source_name, - REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL + PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name, + REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL, + X_API_CREDENTIALS ) from foodie_utils import ( load_json_file, save_json_file, get_image, generate_image_query, upload_image_to_wp, determine_paragraph_count, insert_link_naturally, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, - prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image_via_ddg + prepare_post_data, select_best_author, smart_image_and_filter, + get_flickr_image_via_ddg ) from foodie_hooks import get_dynamic_hook, select_best_cta @@ -48,7 +51,6 @@ def setup_logging(): with open(LOG_FILE, 'r') as f: lines = f.readlines() - # Group lines into log entries based on timestamp pattern log_entries = [] current_entry = [] timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') @@ -105,8 +107,6 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def clean_reddit_title(title): - """Remove Reddit flairs like [pro/chef] or [homemade] from the title.""" - # Match patterns like [pro/chef], [homemade], etc. at the start of the title cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip() logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'") return cleaned_title @@ -158,7 +158,7 @@ def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments): def get_top_comments(post_url, reddit, limit=3): try: submission = reddit.submission(url=post_url) - submission.comment_sort = 'top' # Move this line up + submission.comment_sort = 'top' submission.comments.replace_more(limit=0) top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')] logging.info(f"Fetched {len(top_comments)} top comments for {post_url}") @@ -186,11 +186,10 @@ def fetch_reddit_posts(): if pub_date < cutoff_date: logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})") continue - # Clean the title before storing cleaned_title = clean_reddit_title(submission.title) articles.append({ - "title": cleaned_title, # Use cleaned title - "raw_title": submission.title, # Store raw title for reference if needed + "title": cleaned_title, + "raw_title": submission.title, "link": f"https://www.reddit.com{submission.permalink}", "summary": submission.selftext, "feed_title": get_clean_source_name(subreddit_name), @@ -212,7 +211,6 @@ def curate_from_reddit(): logging.info("No Reddit posts available") return None, None, None - # Sort by upvotes descending articles.sort(key=lambda x: x["upvotes"], reverse=True) reddit = praw.Reddit( @@ -224,15 +222,15 @@ def curate_from_reddit(): attempts = 0 max_attempts = 10 while attempts < max_attempts and articles: - article = articles.pop(0) # Take highest-upvote post - title = article["title"] # Use cleaned title - raw_title = article["raw_title"] # Use raw title for deduplication + article = articles.pop(0) + title = article["title"] + raw_title = article["raw_title"] link = article["link"] summary = article["summary"] source_name = "Reddit" original_source = 'Reddit' - if raw_title in posted_titles: # Check against raw title + if raw_title in posted_titles: print(f"Skipping already posted post: {raw_title}") logging.info(f"Skipping already posted post: {raw_title}") attempts += 1 @@ -250,7 +248,7 @@ def curate_from_reddit(): top_comments = get_top_comments(link, reddit, limit=3) interest_score = is_interesting_reddit( - title, # Use cleaned title + title, summary, article["upvotes"], article["comment_count"], @@ -272,7 +270,7 @@ def curate_from_reddit(): "If brief, expand on the core idea with relevant context about its appeal or significance. " "Do not include emojis in the summary." ) - content_to_summarize = f"{title}\n\n{summary}" # Use cleaned title + content_to_summarize = f"{title}\n\n{summary}" if top_comments: content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" @@ -290,7 +288,7 @@ def curate_from_reddit(): final_summary = insert_link_naturally(final_summary, source_name, link) - post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) # Use cleaned title + post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) if not post_data: attempts += 1 continue @@ -345,8 +343,8 @@ def curate_from_reddit(): is_posting = False timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) # Save raw title - posted_titles.add(raw_title) # Add raw title to set + save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) + posted_titles.add(raw_title) logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}") if image_url: diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index b1d76eb..8525ded 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -1,3 +1,4 @@ +# foodie_automator_rss.py import requests import random import time @@ -13,12 +14,17 @@ from openai import OpenAI from urllib.parse import quote from requests.packages.urllib3.util.retry import Retry from requests.adapters import HTTPAdapter -from foodie_config import RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, CATEGORIES, get_clean_source_name +from foodie_config import ( + RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, + HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES, + get_clean_source_name, X_API_CREDENTIALS +) from foodie_utils import ( load_json_file, save_json_file, get_image, generate_image_query, - upload_image_to_wp, determine_paragraph_count, insert_link_naturally, is_interesting, - generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, - prepare_post_data, select_best_author, smart_image_and_filter + upload_image_to_wp, determine_paragraph_count, insert_link_naturally, + is_interesting, generate_title_from_summary, summarize_with_gpt4o, + generate_category_from_summary, post_to_wp, prepare_post_data, + select_best_author, smart_image_and_filter ) from foodie_hooks import get_dynamic_hook, select_best_cta import feedparser @@ -27,6 +33,7 @@ from typing import List, Dict, Any, Optional from dotenv import load_dotenv load_dotenv() + # Flag to indicate if we're in the middle of posting is_posting = False @@ -43,10 +50,10 @@ signal.signal(signal.SIGINT, signal_handler) LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log" LOG_PRUNE_DAYS = 30 -MAX_WORKERS = 5 # Number of concurrent workers for parallel processing -RATE_LIMIT_DELAY = 1 # Delay between API calls in seconds -FEED_TIMEOUT = 30 # Timeout for feed requests in seconds -MAX_RETRIES = 3 # Maximum number of retries for failed requests +MAX_WORKERS = 5 +RATE_LIMIT_DELAY = 1 +FEED_TIMEOUT = 30 +MAX_RETRIES = 3 POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' @@ -58,7 +65,6 @@ posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) def setup_logging(): - """Configure logging with rotation and cleanup.""" if os.path.exists(LOG_FILE): with open(LOG_FILE, 'r') as f: lines = f.readlines() @@ -81,9 +87,14 @@ def setup_logging(): format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logging.getLogger().addHandler(console_handler) + logging.info("Logging initialized for foodie_automator_rss.py") + +setup_logging() def create_http_session() -> requests.Session: - """Create and configure an HTTP session with retry logic.""" session = requests.Session() retry_strategy = Retry( total=MAX_RETRIES, @@ -101,7 +112,6 @@ def create_http_session() -> requests.Session: return session def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]: - """Fetch and parse an RSS feed with error handling and retries.""" try: response = session.get(feed_url, timeout=FEED_TIMEOUT) response.raise_for_status() @@ -117,20 +127,14 @@ def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser. return None def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool: - """Enhanced content filtering with improved scoring.""" try: - # Basic validation if not title or not summary: return False - # Check if content is too old if datetime.now(timezone.utc) - pub_date > timedelta(days=7): return False - # Calculate interest score score = 0 - - # Title analysis title_lower = title.lower() if any(keyword in title_lower for keyword in RECIPE_KEYWORDS): score += 3 @@ -139,7 +143,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool: if any(keyword in title_lower for keyword in HOME_KEYWORDS): score += 1 - # Content analysis summary_lower = summary.lower() if len(summary.split()) < 100: score -= 2 @@ -152,7 +155,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool: return False def fetch_rss_feeds() -> List[Dict[str, Any]]: - """Fetch RSS feeds with parallel processing and improved error handling.""" session = create_http_session() articles = [] @@ -177,7 +179,6 @@ def fetch_rss_feeds() -> List[Dict[str, Any]]: return [] def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]: - """Process a single RSS feed and extract articles.""" try: feed = fetch_feed(feed_url, session) if not feed: @@ -192,7 +193,8 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any "title": entry.title, "link": entry.link, "summary": entry.summary if hasattr(entry, 'summary') else entry.description, - "feed_title": get_clean_source_name(feed.feed.title), + "content": getattr(entry, 'content', [{'value': ''}])[0].value, + "feed_title": get_clean_source_name(feed_url), "pub_date": pub_date } @@ -229,13 +231,12 @@ def curate_from_rss(): attempts = 0 max_attempts = 10 while attempts < max_attempts and articles: - article = articles.pop(0) # Take newest article + article = articles.pop(0) title = article["title"] link = article["link"] summary = article["summary"] content = article["content"] - feed_url = article["feed_title"] - source_name = feed_url[0] if isinstance(feed_url, tuple) and len(feed_url) > 0 else feed_url + source_name = article["feed_title"] original_source = f'{source_name}' if title in posted_titles: @@ -254,7 +255,6 @@ def curate_from_rss(): attempts += 1 continue - # Score using title, summary, and content scoring_content = f"{title}\n\n{summary}\n\nContent: {content}" interest_score = is_interesting(scoring_content) logging.info(f"Interest score for '{title}': {interest_score}") diff --git a/foodie_config.py b/foodie_config.py index b834b26..fe77b65 100644 --- a/foodie_config.py +++ b/foodie_config.py @@ -58,114 +58,212 @@ AUTHORS = [ } ] -POSTED_RSS_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' -POSTED_GOOGLE_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json' -POSTED_REDDIT_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json' -USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' -EXPIRATION_DAYS = 3 -IMAGE_EXPIRATION_DAYS = 7 - -RSS_FEEDS = [ - "https://www.eater.com/rss/full.xml", - "https://modernrestaurantmanagement.com/feed/", - "https://thespoon.tech/feed/", - "https://www.nrn.com/rss.xml", - "https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml", - "https://www.bakingbusiness.com/rss/articles", - "https://www.theguardian.com/food/rss" -] - -RSS_FEED_NAMES = { - "https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"), - "https://modernrestaurantmanagement.com/feed/": ("Modern Restaurant Management", "https://modernrestaurantmanagement.com/"), - "https://thespoon.tech/feed/": ("The Spoon", "https://thespoon.tech/"), - "https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"), - "https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"), - "https://www.bakingbusiness.com/rss/articles": ("Baking Business", "https://www.bakingbusiness.com/"), - "https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food") -} - -RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"] -PROMO_KEYWORDS = ["we serve", "our guests", "event", "competition", "franchise", "off", "discount", "sale"] -HOME_KEYWORDS = ["home", "house", "household", "appliance", "kitchen", "gadget"] -PRODUCT_KEYWORDS = ["best", "buy", "storage", "organizer", "shop", "price", "container", "product", "deal", "sale", "discount"] - -CATEGORIES = [ - "People", "Trends", "Travel", - "Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food" "Eats" -] - -CTAS = [ - "Love This Take? Share It On !", - "Dig This Scoop? Post It On !", - "Wild For This? Spread It On !", - "Crave This Read? Tweet It On !", - "Buzzing Over This? Share On !" +X_API_CREDENTIALS = [ + { + "username": "owenjohnson", + "x_username": "@insiderfoodieowen", + "api_key": os.getenv("OWENJOHNSON_X_API_KEY"), + "api_secret": os.getenv("OWENJOHNSON_X_API_SECRET"), + "access_token": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN"), + "access_token_secret": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN_SECRET"), + "client_secret": os.getenv("OWENJOHNSON_X_CLIENT_SECRET") + }, + { + "username": "javiermorales", + "x_username": "@insiderfoodiejavier", + "api_key": os.getenv("JAVIERMORALES_X_API_KEY"), + "api_secret": os.getenv("JAVIERMORALES_X_API_SECRET"), + "access_token": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN"), + "access_token_secret": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN_SECRET"), + "client_secret": os.getenv("JAVIERMORALES_X_CLIENT_SECRET") + }, + { + "username": "aishapatel", + "x_username": "@insiderfoodieaisha", + "api_key": os.getenv("AISHAPATEL_X_API_KEY"), + "api_secret": os.getenv("AISHAPATEL_X_API_SECRET"), + "access_token": os.getenv("AISHAPATEL_X_ACCESS_TOKEN"), + "access_token_secret": os.getenv("AISHAPATEL_X_ACCESS_TOKEN_SECRET"), + "client_secret": os.getenv("AISHAPATEL_X_CLIENT_SECRET") + }, + { + "username": "trangnguyen", + "x_username": "@insiderfoodietrang", + "api_key": os.getenv("TRANGNGUYEN_X_API_KEY"), + "api_secret": os.getenv("TRANGNGUYEN_X_API_SECRET"), + "access_token": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN"), + "access_token_secret": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN_SECRET"), + "client_secret": os.getenv("TRANGNGUYEN_X_CLIENT_SECRET") + }, + { + "username": "keishareid", + "x_username": "@insiderfoodiekeisha", + "api_key": os.getenv("KEISHAREID_X_API_KEY"), + "api_secret": os.getenv("KEISHAREID_X_API_SECRET"), + "access_token": os.getenv("KEISHAREID_X_ACCESS_TOKEN"), + "access_token_secret": os.getenv("KEISHAREID_X_ACCESS_TOKEN_SECRET"), + "client_secret": os.getenv("KEISHAREID_X_CLIENT_SECRET") + }, + { + "username": "lilamoreau", + "x_username": "@insiderfoodielila", + "api_key": os.getenv("LILAMOREAU_X_API_KEY"), + "api_secret": os.getenv("LILAMOREAU_X_API_SECRET"), + "access_token": os.getenv("LILAMOREAU_X_ACCESS_TOKEN"), + "access_token_secret": os.getenv("LILAMOREAU_X_ACCESS_TOKEN_SECRET"), + "client_secret": os.getenv("LILAMOREAU_X_CLIENT_SECRET") + } ] -SUMMARY_PERSONA_PROMPTS = { +PERSONA_CONFIGS = { "Visionary Editor": { "description": "a commanding food editor with a borderless view", "tone": "a polished and insightful tone, like 'This redefines culinary excellence.'", - "prompt": ( + "article_prompt": ( "You’re {description}. Summarize this article in {tone}. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Add a bold take and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." + ), + "x_prompt": ( + "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. " + "For article tweets, include the article title, a quirky hook, and the URL. " + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public. " + "Avoid emojis and clichés like 'game-changer'. Return only the tweet text." ) }, "Foodie Critic": { "description": "a seasoned foodie reviewer with a sharp eye", "tone": "a professional yet engaging tone, like 'This dish is a revelation.'", - "prompt": ( + "article_prompt": ( "You’re {description}. Summarize this article in {tone}. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Add a subtle opinion and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." + ), + "x_prompt": ( + "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. " + "For article tweets, include the article title, a quirky hook, and the URL. " + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public. " + "Avoid emojis and clichés like 'game-changer'. Return only the tweet text." ) }, "Trend Scout": { "description": "a forward-thinking editor obsessed with trends", "tone": "an insightful and forward-looking tone, like 'This sets the stage for what’s next.'", - "prompt": ( + "article_prompt": ( "You’re {description}. Summarize this article in {tone}. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Predict what’s next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." + ), + "x_prompt": ( + "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. " + "For article tweets, include the article title, a quirky hook, and the URL. " + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public. " + "Avoid emojis and clichés like 'game-changer'. Return only the tweet text." ) }, "Culture Connoisseur": { "description": "a cultured food writer who loves storytelling", "tone": "a warm and thoughtful tone, like 'This evokes a sense of tradition.'", - "prompt": ( + "article_prompt": ( "You’re {description}. Summarize this article in {tone}. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Add a thoughtful observation and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." + ), + "x_prompt": ( + "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. " + "For article tweets, include the article title, a quirky hook, and the URL. " + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public. " + "Avoid emojis and clichés like 'game-changer'. Return only the tweet text." ) }, "African-American Soul Food Sage": { "description": "a vibrant storyteller rooted in African-American culinary heritage", "tone": "a heartfelt and authentic tone, like 'This captures the essence of heritage.'", - "prompt": ( + "article_prompt": ( "You’re {description}. Summarize this article in {tone}. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Add a heritage twist and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." + ), + "x_prompt": ( + "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. " + "For article tweets, include the article title, a quirky hook, and the URL. " + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public. " + "Avoid emojis and clichés like 'game-changer'. Return only the tweet text." ) }, "Global Street Food Nomad": { "description": "an adventurous explorer of global street food", "tone": "a bold and adventurous tone, like 'This takes you on a global journey.'", - "prompt": ( + "article_prompt": ( "You’re {description}. Summarize this article in {tone}. " "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. " "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. " "Drop a street-level insight and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary." + ), + "x_prompt": ( + "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. " + "For article tweets, include the article title, a quirky hook, and the URL. " + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public. " + "Avoid emojis and clichés like 'game-changer'. Return only the tweet text." ) } } +# File paths +POSTED_RSS_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' +POSTED_GOOGLE_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json' +POSTED_REDDIT_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json' +USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' +AUTHOR_BACKGROUNDS_FILE = '/home/shane/foodie_automator/author_backgrounds.json' +X_POST_COUNTS_FILE = '/home/shane/foodie_automator/x_post_counts.json' +RECENT_POSTS_FILE = '/home/shane/foodie_automator/recent_posts.json' + +EXPIRATION_DAYS = 3 +IMAGE_EXPIRATION_DAYS = 7 + +RSS_FEEDS = [ + "https://www.eater.com/rss/full.xml", + "https://modernrestaurantmanagement.com/feed/", + "https://thespoon.tech/feed/", + "https://www.nrn.com/rss.xml", + "https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml", + "https://www.bakingbusiness.com/rss/articles", + "https://www.theguardian.com/food/rss" +] + +RSS_FEED_NAMES = { + "https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"), + "https://modernrestaurantmanagement.com/feed/": ("Modern Restaurant Management", "https://modernrestaurantmanagement.com/"), + "https://thespoon.tech/feed/": ("The Spoon", "https://thespoon.tech/"), + "https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"), + "https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"), + "https://www.bakingbusiness.com/rss/articles": ("Baking Business", "https://www.bakingbusiness.com/"), + "https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food") +} + +RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"] +PROMO_KEYWORDS = ["we serve", "our guests", "event", "competition", "franchise", "off", "discount", "sale"] +HOME_KEYWORDS = ["home", "house", "household", "appliance", "kitchen", "gadget"] +PRODUCT_KEYWORDS = ["best", "buy", "storage", "organizer", "shop", "price", "container", "product", "deal", "sale", "discount"] + +CATEGORIES = [ + "People", "Trends", "Travel", + "Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food", "Eats" +] + +CTAS = [ + "Love This Take? Share It On !", + "Dig This Scoop? Post It On !", + "Wild For This? Spread It On !", + "Crave This Read? Tweet It On !", + "Buzzing Over This? Share On !" +] + REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID") REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET") REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT") diff --git a/foodie_utils.py b/foodie_utils.py index a87d6c4..e9f8a52 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -11,7 +11,6 @@ import tempfile import requests import time from dotenv import load_dotenv -import os from datetime import datetime, timezone, timedelta from openai import OpenAI from urllib.parse import quote @@ -19,10 +18,12 @@ from duckduckgo_search import DDGS from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +import tweepy from foodie_config import ( - RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, - get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL + RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, + get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS ) + load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) @@ -43,7 +44,7 @@ def load_json_file(filename, expiration_days=None): except json.JSONDecodeError as e: logging.warning(f"Skipping invalid JSON line in {filename} at line {i}: {e}") if expiration_days: - cutoff = (datetime.now() - timedelta(days=expiration_days)).isoformat() + cutoff = (datetime.now(timezone.utc) - timedelta(days=expiration_days)).isoformat() data = [entry for entry in data if entry["timestamp"] > cutoff] logging.info(f"Loaded {len(data)} entries from {filename}, {len(data)} valid after expiration check") except Exception as e: @@ -70,6 +71,95 @@ def save_json_file(filename, key, value): except Exception as e: logging.error(f"Failed to save or prune {filename}: {e}") +def load_post_counts(): + counts = load_json_file('/home/shane/foodie_automator/x_post_counts.json') + if not counts: + counts = [{ + "username": author["username"], + "month": datetime.now(timezone.utc).strftime("%Y-%m"), + "monthly_count": 0, + "day": datetime.now(timezone.utc).strftime("%Y-%m-%d"), + "daily_count": 0 + } for author in AUTHORS] + current_month = datetime.now(timezone.utc).strftime("%Y-%m") + current_day = datetime.now(timezone.utc).strftime("%Y-%m-%d") + for entry in counts: + if entry["month"] != current_month: + entry["month"] = current_month + entry["monthly_count"] = 0 + if entry["day"] != current_day: + entry["day"] = current_day + entry["daily_count"] = 0 + return counts + +def save_post_counts(counts): + with open('/home/shane/foodie_automator/x_post_counts.json', 'w') as f: + for item in counts: + json.dump(item, f) + f.write('\n') + logging.info("Saved post counts to x_post_counts.json") + +def generate_article_tweet(author, post, persona): + persona_config = PERSONA_CONFIGS[persona] + base_prompt = persona_config["x_prompt"].format( + description=persona_config["description"], + tone=persona_config["tone"] + ) + prompt = base_prompt.replace( + "For article tweets, include the article title, a quirky hook, and the URL.", + f"Generate an article tweet including the title '{post['title']}', a quirky hook, and the URL '{post['url']}'." + ) + try: + response = client.chat.completions.create( + model=LIGHT_TASK_MODEL, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": f"Generate tweet for {post['title']}."} + ], + max_tokens=100, + temperature=0.9 + ) + tweet = response.choices[0].message.content.strip() + if len(tweet) > 280: + tweet = tweet[:277] + "..." + logging.info(f"Generated article tweet for {author['username']}: {tweet}") + return tweet + except Exception as e: + logging.error(f"Failed to generate article tweet for {author['username']}: {e}") + return f"This trend is fire! Check out {post['title']} at {post['url']} #Foodie" + +def post_tweet(author, tweet): + credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) + if not credentials: + logging.error(f"No X credentials found for {author['username']}") + return False + + post_counts = load_post_counts() + author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None) + if author_count["monthly_count"] >= 500: + logging.warning(f"Monthly post limit (500) reached for {author['username']}") + return False + if author_count["daily_count"] >= 20: + logging.warning(f"Daily post limit (20) reached for {author['username']}") + return False + + try: + client = tweepy.Client( + consumer_key=credentials["api_key"], + consumer_secret=credentials["api_secret"], + access_token=credentials["access_token"], + access_token_secret=credentials["access_token_secret"] + ) + response = client.create_tweet(text=tweet) + author_count["monthly_count"] += 1 + author_count["daily_count"] += 1 + save_post_counts(post_counts) + logging.info(f"Posted tweet for {author['username']}: {tweet}") + return True + except Exception as e: + logging.error(f"Failed to post tweet for {author['username']}: {e}") + return False + def select_best_persona(interest_score, content=""): logging.info("Using select_best_persona with interest_score and content") personas = ["Visionary Editor", "Foodie Critic", "Trend Scout", "Culture Connoisseur"] @@ -206,7 +296,6 @@ def smart_image_and_filter(title, summary): raw_result = response.choices[0].message.content.strip() logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") - # Clean and parse JSON cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() try: result = json.loads(cleaned_result) @@ -339,14 +428,12 @@ def generate_title_from_summary(summary): def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_prompt=""): try: persona = select_best_persona(interest_score, content) - # Access the persona configuration - persona_config = SUMMARY_PERSONA_PROMPTS.get(persona, { - "prompt": "Write a concise, engaging summary that captures the essence of the content for food lovers.", + persona_config = PERSONA_CONFIGS.get(persona, { + "article_prompt": "Write a concise, engaging summary that captures the essence of the content for food lovers.", "description": "a generic food writer", "tone": "an engaging tone" }) - # Format the prompt using description and tone - prompt = persona_config["prompt"].format( + prompt = persona_config["article_prompt"].format( description=persona_config["description"], tone=persona_config["tone"], num_paragraphs=determine_paragraph_count(interest_score) @@ -379,166 +466,7 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro logging.error(f"Summary generation failed with model {SUMMARY_MODEL}: {e}") return None -def smart_image_and_filter(title, summary): - try: - content = f"{title}\n\n{summary}" - - prompt = ( - 'Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) ' - 'for an image search about food industry trends or viral content. Prioritize specific terms if present, ' - 'otherwise focus on the main theme. ' - 'Return "SKIP" if the article is about home appliances, recipes, promotions, or contains "homemade", else "KEEP". ' - 'Return as JSON with double quotes: {"image_query": "specific term", "relevance": ["keyword1", "keyword2"], "action": "KEEP" or "SKIP"}' - ) - - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": prompt}, - {"role": "user", "content": content} - ], - max_tokens=100 - ) - raw_result = response.choices[0].message.content.strip() - logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") - - cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() - try: - result = json.loads(cleaned_result) - except json.JSONDecodeError as e: - logging.warning(f"JSON parsing failed: {e}, raw: '{cleaned_result}'. Using fallback.") - return "food trends", ["cuisine", "dining"], False - - if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: - logging.warning(f"Invalid GPT response format: {result}, using fallback") - return "food trends", ["cuisine", "dining"], False - - image_query = result["image_query"] - relevance_keywords = result["relevance"] - skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower() - - logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}") - - if not image_query or len(image_query.split()) < 2: - logging.warning(f"Image query '{image_query}' too vague, using fallback") - return "food trends", ["cuisine", "dining"], skip_flag - - return image_query, relevance_keywords, skip_flag - - except Exception as e: - logging.error(f"Smart image/filter failed: {e}, using fallback") - return "food trends", ["cuisine", "dining"], False - -def is_interesting(summary): - try: - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": ( - "Rate this content from 0-10 based on its rarity, buzzworthiness, and engagement potential for food lovers, covering a wide range of food topics (skip recipes). " - "Score 8-10 for rare, highly shareable ideas that grab attention. " - "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. " - "Return only a number." - )}, - {"role": "user", "content": f"Content: {summary}"} - ], - max_tokens=5 - ) - raw_score = response.choices[0].message.content.strip() - score = int(raw_score) if raw_score.isdigit() else 0 - print(f"Interest Score for '{summary[:50]}...': {score} (raw: {raw_score})") - logging.info(f"Interest Score: {score} (raw: {raw_score})") - return score - except Exception as e: - logging.error(f"Interestingness scoring failed with model {LIGHT_TASK_MODEL}: {e}") - print(f"Interest Error: {e}") - return 0 - -def select_paragraphs(paragraphs, target_count, persona, original_content): - """Select or generate paragraphs to match target_count, preserving key content.""" - if len(paragraphs) == target_count and all(60 <= len(p.split()) <= 80 for p in paragraphs): - return paragraphs - - # Score paragraphs by food-related keywords - keywords = ["food", "dish", "trend", "menu", "cuisine", "flavor", "taste", "eat", "dining", "restaurant"] - scores = [] - for para in paragraphs: - score = sum(para.lower().count(kw) for kw in keywords) - word_count = len(para.split()) - # Penalize paragraphs outside word range - score -= abs(word_count - 70) # Favor ~70 words - scores.append(score) - - # Handle too many paragraphs - if len(paragraphs) > target_count: - # Keep last paragraph unless it's low-scoring - if scores[-1] >= min(scores[:-1]) or len(paragraphs) == target_count + 1: - selected_indices = sorted(range(len(paragraphs)-1), key=lambda i: scores[i], reverse=True)[:target_count-1] + [len(paragraphs)-1] - else: - selected_indices = sorted(range(len(paragraphs)), key=lambda i: scores[i], reverse=True)[:target_count] - selected = [paragraphs[i] for i in sorted(selected_indices)] - else: - selected = paragraphs[:] - - # Handle word count adjustments or too few paragraphs - adjusted = [] - for para in selected: - word_count = len(para.split()) - if word_count < 60 or word_count > 80: - # Rephrase to fit 60-80 words - rephrase_prompt = ( - f"Rephrase this paragraph to exactly 60-80 words, keeping the same tone as a {persona} and all key ideas: '{para}'" - ) - try: - response = client.chat.completions.create( - model=SUMMARY_MODEL, - messages=[ - {"role": "system", "content": rephrase_prompt}, - {"role": "user", "content": para} - ], - max_tokens=150, - temperature=0.7 - ) - new_para = response.choices[0].message.content.strip() - if 60 <= len(new_para.split()) <= 80: - adjusted.append(new_para) - else: - adjusted.append(para) # Fallback to original if rephrase fails - except Exception as e: - logging.warning(f"Rephrasing failed for paragraph: {e}") - adjusted.append(para) - else: - adjusted.append(para) - - # Generate additional paragraphs if needed - while len(adjusted) < target_count: - extra_prompt = ( - f"Generate one additional paragraph (60-80 words) in the style of a {persona}, " - f"based on this content: '{original_content[:200]}...'. Match the tone of: '{adjusted[-1] if adjusted else 'This trend is fire!'}'" - ) - try: - response = client.chat.completions.create( - model=SUMMARY_MODEL, - messages=[ - {"role": "system", "content": extra_prompt}, - {"role": "user", "content": original_content} - ], - max_tokens=150, - temperature=0.7 - ) - new_para = response.choices[0].message.content.strip() - if 60 <= len(new_para.split()) <= 80: - adjusted.append(new_para) - else: - adjusted.append("This trend is sparking buzz across menus!") # Fallback - except Exception as e: - logging.warning(f"Extra paragraph generation failed: {e}") - adjusted.append("This vibe is shaking up the food scene!") - - return adjusted[:target_count] - def insert_link_naturally(summary, source_name, source_url): - import re try: prompt = ( "Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). " @@ -571,8 +499,7 @@ def insert_link_naturally(summary, source_name, source_url): except Exception as e: logging.error(f"Link insertion failed: {e}") - # Fallback: Protect times and insert at sentence end - time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' # Matches 6.30am, 12.15pm + time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) paragraphs = protected_summary.split('\n') if not paragraphs or all(not p.strip() for p in paragraphs): @@ -588,22 +515,19 @@ def insert_link_naturally(summary, source_name, source_url): ] insertion_phrase = random.choice(phrases) - # Find sentence boundary, avoiding protected times sentences = re.split(r'(?<=[.!?])\s+', target_para) insertion_point = -1 for i, sent in enumerate(sentences): - if sent.strip() and '@' not in sent: # Avoid sentences with protected times + if sent.strip() and '@' not in sent: insertion_point = sum(len(s) + 1 for s in sentences[:i+1]) break if insertion_point == -1: - insertion_point = len(target_para) # Append if no good boundary + insertion_point = len(target_para) - # Add space after insertion phrase new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip() paragraphs[paragraphs.index(target_para)] = new_para new_summary = '\n'.join(paragraphs) - # Restore periods in times new_summary = new_summary.replace('@', '.') logging.info(f"Fallback summary with link: {new_summary}") return new_summary @@ -759,7 +683,7 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") endpoint = f"{wp_base_url}/posts/{post_id}" if post_id else f"{wp_base_url}/posts" - method = requests.post # Use POST for both create and update (WP API handles it) + method = requests.post logging.debug(f"Sending WP request to {endpoint} with payload: {json.dumps(payload, indent=2)}") @@ -775,13 +699,21 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im post_id = post_info["id"] post_url = post_info["link"] - # Save to recent_posts.json + # Save to recent_posts.json timestamp = datetime.now(timezone.utc).isoformat() save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) - logging.info(f"Posted/Updated by {author['username']}: {post_data['title']} (ID: {post_id})") - return post_id, post_url - + # Post article tweet to X + try: + post = {"title": post_data["title"], "url": post_url} + tweet = generate_article_tweet(author, post, author["persona"]) + if post_tweet(author, tweet): + logging.info(f"Successfully posted article tweet for {author['username']} on X") + else: + logging.warning(f"Failed to post article tweet for {author['username']} on X") + except Exception as e: + logging.error(f"Error posting article tweet for {author['username']}: {e}") + logging.info(f"Posted/Updated by {author['username']}: {post_data['title']} (ID: {post_id})") return post_id, post_url @@ -860,7 +792,6 @@ def get_flickr_image_via_ddg(search_query, relevance_keywords): result = random.choice(candidates) image_url = result["image_url"] - # OCR check on the selected image temp_file = None try: img_response = requests.get(image_url, headers=headers, timeout=10) @@ -876,9 +807,8 @@ def get_flickr_image_via_ddg(search_query, relevance_keywords): if char_count > 200: logging.info(f"Skipping text-heavy image (OCR): {image_url} (char_count: {char_count})") - return None, None, None, None # Fall back to Pixabay + return None, None, None, None - # Success: Save and return flickr_data = { "title": search_query, "image_url": image_url, @@ -945,7 +875,6 @@ def prepare_post_data(final_summary, original_title, context_info=""): logging.info(f"Title generation failed for '{original_title}' {context_info}") return None, None, None, None, None, None, None - # Note: This function still uses generate_image_query, but curate_from_rss overrides it with smart_image_and_filter search_query, relevance_keywords = generate_image_query(f"{innovative_title}\n\n{final_summary}") if not search_query: logging.info(f"Image query generation failed for '{innovative_title}' {context_info}") @@ -976,7 +905,6 @@ def prepare_post_data(final_summary, original_title, context_info=""): return post_data, author, category, image_url, image_source, uploader, page_url def save_post_to_recent(post_title, post_url, author_username, timestamp): - """Save post details to recent_posts.json.""" try: recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') entry = { @@ -995,7 +923,6 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp): logging.error(f"Failed to save post to recent_posts.json: {e}") def prune_recent_posts(): - """Prune recent_posts.json to keep only entries from the last 24 hours.""" try: cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat() recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') diff --git a/foodie_x_poster.py b/foodie_x_poster.py index 393ba0b..94964d5 100644 --- a/foodie_x_poster.py +++ b/foodie_x_poster.py @@ -6,12 +6,10 @@ import time import sys import signal import os -from datetime import datetime, timedelta, timezone +from datetime import datetime, timezone from openai import OpenAI -import tweepy -from foodie_config import OPENAI_API_KEY, AUTHORS, LIGHT_TASK_MODEL -from foodie_utils import load_json_file -from foodie_x_config import X_API_CREDENTIALS, X_PERSONA_PROMPTS, AUTHOR_BACKGROUNDS_FILE, X_POST_COUNTS_FILE, RECENT_POSTS_FILE +from foodie_config import OPENAI_API_KEY, AUTHORS, LIGHT_TASK_MODEL, PERSONA_CONFIGS, AUTHOR_BACKGROUNDS_FILE +from foodie_utils import load_json_file, post_tweet from dotenv import load_dotenv load_dotenv() @@ -49,24 +47,6 @@ except Exception as e: logging.error(f"Failed to load author_backgrounds.json: {e}") sys.exit(1) -def load_post_counts(): - counts = load_json_file(X_POST_COUNTS_FILE) - if not counts: - counts = [{"username": author["username"], "count": 0, "month": datetime.now(timezone.utc).strftime("%Y-%m")} for author in AUTHORS] - current_month = datetime.now(timezone.utc).strftime("%Y-%m") - for entry in counts: - if entry["month"] != current_month: - entry["count"] = 0 - entry["month"] = current_month - return counts - -def save_post_counts(counts): - with open(X_POST_COUNTS_FILE, 'w') as f: - for item in counts: - json.dump(item, f) - f.write('\n') - logging.info(f"Saved post counts to {X_POST_COUNTS_FILE}") - is_posting = False def signal_handler(sig, frame): @@ -80,87 +60,28 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) -def get_recent_posts_for_author(username): - posts = load_json_file(RECENT_POSTS_FILE) - return [post for post in posts if post["author_username"] == username] - -def delete_used_post(post_title): - posts = load_json_file(RECENT_POSTS_FILE) - posts = [post for post in posts if post["title"] != post_title] - with open(RECENT_POSTS_FILE, 'w') as f: - for item in posts: - json.dump(item, f) - f.write('\n') - logging.info(f"Deleted post '{post_title}' from recent_posts.json") - -def generate_article_tweet(author, post, persona): - # Format the prompt using description and tone - persona_config = X_PERSONA_PROMPTS[persona] - base_prompt = persona_config["prompt"].format( - description=persona_config["description"], - tone=persona_config["tone"] - ) - prompt = base_prompt.replace( - "For article tweets, include the article title, a quirky hook, and the URL.", - f"Generate an article tweet including the title '{post['title']}', a quirky hook, and the URL '{post['url']}'." - ) - try: - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": prompt}, - {"role": "user", "content": f"Generate tweet for {post['title']}."} - ], - max_tokens=100, - temperature=0.9 - ) - tweet = response.choices[0].message.content.strip() - if len(tweet) > 280: - tweet = tweet[:277] + "..." - logging.info(f"Generated article tweet for {author['username']}: {tweet}") - return tweet - except Exception as e: - logging.error(f"Failed to generate article tweet for {author['username']}: {e}") - return f"This trend is fire! Check out {post['title']} at {post['url']} #Foodie" - -def generate_personal_tweet(author, persona): +def generate_engagement_tweet(author, persona): background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {}) - if not background: - logging.warning(f"No background found for {author['username']}") - return f"Loving my gig at InsiderFoodie, dishing out food trends! #FoodieLife" + if not background or "engagement_themes" not in background: + logging.warning(f"No background or engagement themes found for {author['username']}") + return "What food trends are you loving right now? Share your thoughts! #FoodieTrends" - # Get DOB and calculate age - dob = author.get('dob', '1980-01-01') - current_year = datetime.now().year - birth_year = int(dob.split('-')[0]) - age = current_year - birth_year - - is_role_reflection = random.choice([True, False]) - if is_role_reflection: - content = f"Reflect on your role at InsiderFoodie as {author['persona']}. Mention you're {age} years old." - else: - content = ( - f"Share a personal story about your background, considering you were born on {dob} and are {age} years old. " - f"Hometown: {background['hometown']}, Cultural influences: {background['cultural_influences']}, " - f"Early memory: {background['early_memory']}, Career path: {background['career_path']}." - ) - - # Format the prompt using description and tone - persona_config = X_PERSONA_PROMPTS[persona] - base_prompt = persona_config["prompt"].format( + theme = random.choice(background["engagement_themes"]) + persona_config = PERSONA_CONFIGS[persona] + base_prompt = persona_config["x_prompt"].format( description=persona_config["description"], tone=persona_config["tone"] ) prompt = base_prompt.replace( - "For personal tweets, reflect on your role at InsiderFoodie or background.", - content + "For engagement tweets, ask a question about food trends, foods, or articles to engage the public.", + f"Generate an engagement tweet asking a question about {theme} to engage the public." ) try: response = client.chat.completions.create( - model="gpt-4o-mini", + model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": prompt}, - {"role": "user", "content": f"Generate personal tweet for {author['username']}."} + {"role": "user", "content": f"Generate engagement tweet for {author['username']} about {theme}."} ], max_tokens=100, temperature=0.9 @@ -168,64 +89,21 @@ def generate_personal_tweet(author, persona): tweet = response.choices[0].message.content.strip() if len(tweet) > 280: tweet = tweet[:277] + "..." - logging.info(f"Generated personal tweet for {author['username']}: {tweet}") + logging.info(f"Generated engagement tweet for {author['username']}: {tweet}") return tweet except Exception as e: - logging.error(f"Failed to generate personal tweet for {author['username']}: {e}") - return f"Loving my gig at InsiderFoodie, dishing out food trends! #FoodieLife" - -def post_tweet(author, tweet): - global is_posting - credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) - if not credentials: - logging.error(f"No X credentials found for {author['username']}") - return False - - post_counts = load_post_counts() - author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None) - if author_count["count"] >= 450: - logging.warning(f"Post limit reached for {author['username']} this month") - return False - - try: - client = tweepy.Client( - consumer_key=credentials["api_key"], - consumer_secret=credentials["api_secret"], - access_token=credentials["access_token"], - access_token_secret=credentials["access_token_secret"] - ) - is_posting = True - response = client.create_tweet(text=tweet) - is_posting = False - author_count["count"] += 1 - save_post_counts(post_counts) - logging.info(f"Posted tweet for {author['username']}: {tweet}") - return True - except Exception as e: - is_posting = False - logging.error(f"Failed to post tweet for {author['username']}: {e}") - return False + logging.error(f"Failed to generate engagement tweet for {author['username']}: {e}") + return f"What’s your take on {theme}? Let’s talk! #FoodieTrends" def main(): + global is_posting logging.info("***** X Poster Launched *****") for author in AUTHORS: - posts = get_recent_posts_for_author(author["username"]) - if not posts: - logging.info(f"No recent posts for {author['username']}, skipping") - continue - - article_tweets = 0 - for post in posts[:2]: - tweet = generate_article_tweet(author, post, author["persona"]) - if post_tweet(author, tweet): - delete_used_post(post["title"]) - article_tweets += 1 - time.sleep(random.uniform(3600, 7200)) - if article_tweets >= 2: - break - - tweet = generate_personal_tweet(author, author["persona"]) + is_posting = True + tweet = generate_engagement_tweet(author, author["persona"]) post_tweet(author, tweet) + is_posting = False + time.sleep(random.uniform(3600, 7200)) logging.info("X posting completed") return random.randint(600, 1800)