Compare commits

..

No commits in common. 'main' and 'my-fix-branch' have entirely different histories.

  1. 129
      check_rate_limits.py
  2. 191
      check_x_capacity.py
  3. 532
      foodie_automator_google.py
  4. 754
      foodie_automator_reddit.py
  5. 600
      foodie_automator_rss.py
  6. 76
      foodie_config.py
  7. 236
      foodie_engagement_generator.py
  8. 370
      foodie_engagement_tweet.py
  9. 1763
      foodie_utils.py
  10. 448
      foodie_weekly_thread.py
  11. 32
      foodie_x_poster.py
  12. 188
      manage_scripts.sh
  13. 8
      requirements.txt

@ -1,129 +0,0 @@
import logging
logging.basicConfig(
filename='/home/shane/foodie_automator/logs/check_x_capacity.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
import requests
from requests_oauthlib import OAuth1
from datetime import datetime, timezone
from dotenv import load_dotenv
import os
import time
from foodie_config import X_API_CREDENTIALS
# Load environment variables from .env file
load_dotenv()
# Function to delete a tweet
def delete_tweet(tweet_id, auth):
try:
response = requests.delete(f"https://api.x.com/2/tweets/{tweet_id}", auth=auth)
response.raise_for_status()
logging.info(f"Successfully deleted tweet {tweet_id}")
return True
except Exception as e:
logging.error(f"Failed to delete tweet {tweet_id}: {e}")
return False
# Function to check rate limits for a given author
def check_rate_limits_for_author(username, credentials, retry=False):
logging.info(f"{'Retrying' if retry else 'Checking'} rate limits for {username} (handle: {credentials['x_username']})")
# Retrieve OAuth 1.0a credentials for the author
consumer_key = credentials["api_key"]
consumer_secret = credentials["api_secret"]
access_token = credentials["access_token"]
access_token_secret = credentials["access_token_secret"]
# Validate credentials
if not all([consumer_key, consumer_secret, access_token, access_token_secret]):
logging.error(f"Missing OAuth credentials for {username} in X_API_CREDENTIALS.")
return None
# Set up OAuth 1.0a authentication
auth = OAuth1(consumer_key, consumer_secret, access_token, access_token_secret)
# Add delay to avoid IP-based rate limiting
logging.info(f"Waiting 5 seconds before attempting to post for {username}")
time.sleep(5)
# Try posting a test tweet to get v2 rate limit headers
tweet_id = None
try:
tweet_data = {"text": f"Test tweet to check rate limits for {username} - please ignore"}
response = requests.post("https://api.x.com/2/tweets", json=tweet_data, auth=auth)
response.raise_for_status()
tweet_id = response.json()['data']['id']
logging.info("Successfully posted test tweet for %s: %s", username, response.json())
logging.info("Response Headers for %s: %s", username, response.headers)
# Extract rate limit headers if present
app_limit = response.headers.get('x-app-limit-24hour-limit', 'N/A')
app_remaining = response.headers.get('x-app-limit-24hour-remaining', 'N/A')
app_reset = response.headers.get('x-app-limit-24hour-reset', 'N/A')
logging.info("App 24-Hour Tweet Limit for %s: %s", username, app_limit)
logging.info("App 24-Hour Tweets Remaining for %s: %s", username, app_remaining)
if app_reset != 'N/A':
reset_time = datetime.fromtimestamp(int(app_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
logging.info("App 24-Hour Reset (Readable) for %s: %s", username, reset_time)
return tweet_id
except requests.exceptions.HTTPError as e:
logging.info("Test Tweet Response Status Code for %s: %s", username, e.response.status_code)
logging.info("Test Tweet Response Headers for %s: %s", username, e.response.headers)
if e.response.status_code == 429:
logging.info("Rate Limit Exceeded for /2/tweets for %s", username)
# Extract user-specific 24-hour limits
user_limit = e.response.headers.get('x-user-limit-24hour-limit', 'N/A')
user_remaining = e.response.headers.get('x-user-limit-24hour-remaining', 'N/A')
user_reset = e.response.headers.get('x-user-limit-24hour-reset', 'N/A')
logging.info("User 24-Hour Tweet Limit for %s: %s", username, user_limit)
logging.info("User 24-Hour Tweets Remaining for %s: %s", username, user_remaining)
logging.info("User 24-Hour Reset (Timestamp) for %s: %s", username, user_reset)
if user_reset != 'N/A':
reset_time = datetime.fromtimestamp(int(user_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
logging.info("User 24-Hour Reset (Readable) for %s: %s", username, reset_time)
# Extract app-specific 24-hour limits
app_limit = e.response.headers.get('x-app-limit-24hour-limit', 'N/A')
app_remaining = e.response.headers.get('x-app-limit-24hour-remaining', 'N/A')
app_reset = e.response.headers.get('x-app-limit-24hour-reset', 'N/A')
logging.info("App 24-Hour Tweet Limit for %s: %s", username, app_limit)
logging.info("App 24-Hour Tweets Remaining for %s: %s", username, app_remaining)
logging.info("App 24-Hour Reset (Timestamp) for %s: %s", username, app_reset)
if app_reset != 'N/A':
reset_time = datetime.fromtimestamp(int(app_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
logging.info("App 24-Hour Reset (Readable) for %s: %s", username, reset_time)
return None
except Exception as e:
logging.error("Failed to post test tweet for %s: %s", username, e)
return None
# Main loop to check rate limits for all authors
if __name__ == "__main__":
# First pass: Attempt to post for all authors
successful_tweets = {}
for username, credentials in X_API_CREDENTIALS.items():
tweet_id = check_rate_limits_for_author(username, credentials)
if tweet_id:
successful_tweets[username] = (tweet_id, credentials)
logging.info("-" * 50)
# Delete successful tweets to free up quota
for username, (tweet_id, credentials) in successful_tweets.items():
auth = OAuth1(
credentials["api_key"],
credentials["api_secret"],
credentials["access_token"],
credentials["access_token_secret"]
)
delete_tweet(tweet_id, auth)
# Second pass: Retry for authors that failed
logging.info("Retrying for authors that initially failed...")
for username, credentials in X_API_CREDENTIALS.items():
if username not in successful_tweets:
check_rate_limits_for_author(username, credentials, retry=True)
logging.info("-" * 50)

@ -1,191 +0,0 @@
#!/usr/bin/env python3
import logging
import pytz
logging.basicConfig(
filename='/home/shane/foodie_automator/logs/check_x_capacity.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logging.info("TEST: Logging is configured and working.")
logger = logging.getLogger(__name__)
from datetime import datetime, timezone
from foodie_utils import (
AUTHORS, check_author_rate_limit, load_json_file,
get_x_rate_limit_status, update_system_activity, is_any_script_running,
save_json_file
)
import time
import sys
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from foodie_config import EMAIL_CONFIG
# File to track sent notifications
NOTIFICATION_FILE = '/home/shane/foodie_automator/notification_tracking.json'
def load_notification_tracking():
"""Load notification tracking data as a dict. If not a dict, reset to {}."""
data = load_json_file(NOTIFICATION_FILE, default={})
if not isinstance(data, dict):
logging.warning(f"notification_tracking.json was not a dict, resetting to empty dict.")
data = {}
save_json_file(NOTIFICATION_FILE, data)
return data
def save_notification_tracking(tracking_data):
"""Save notification tracking data as a dict."""
if not isinstance(tracking_data, dict):
logging.warning(f"Attempted to save non-dict to notification_tracking.json, resetting to empty dict.")
tracking_data = {}
save_json_file(NOTIFICATION_FILE, tracking_data)
def should_send_notification(username, reset_time):
"""Check if we should send a notification for this author."""
tracking = load_notification_tracking()
author_data = tracking.get(username, {})
reset_time_str = str(reset_time)
logging.debug(f"[DEBUG] should_send_notification: username={username}, reset_time_str={reset_time_str}, author_data={author_data}")
if not author_data or str(author_data.get('reset_time')) != reset_time_str:
logging.info(f"[DEBUG] Sending notification for {username}. Previous reset_time: {author_data.get('reset_time')}, New reset_time: {reset_time_str}")
tracking[username] = {
'last_notification': datetime.now(timezone.utc).isoformat(),
'reset_time': reset_time_str
}
save_notification_tracking(tracking)
return True
logging.info(f"[DEBUG] Skipping notification for {username}. Already notified for reset_time: {reset_time_str}")
return False
def send_capacity_alert(username, remaining, reset_time):
"""Send email alert when an author's tweet capacity is full."""
# Always use string for reset_time
reset_time_str = str(reset_time)
logging.debug(f"[DEBUG] send_capacity_alert: username={username}, remaining={remaining}, reset_time_str={reset_time_str}")
if not should_send_notification(username, reset_time_str):
logger.info(f"Skipping duplicate notification for {username}")
return
try:
msg = MIMEMultipart()
msg['From'] = EMAIL_CONFIG['from_email']
msg['To'] = EMAIL_CONFIG['to_email']
msg['Subject'] = f" X Capacity Alert: {username}"
body = f"""
X Tweet Capacity Alert!
Username: {username}
Time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}
Remaining Tweets: {remaining}/17
Reset Time: {reset_time_str}
This author has reached their daily tweet limit.
The quota will reset at the time shown above.
This is an automated alert from your foodie_automator system.
"""
msg.attach(MIMEText(body, 'plain'))
with smtplib.SMTP(EMAIL_CONFIG['smtp_server'], EMAIL_CONFIG['smtp_port']) as server:
server.starttls()
server.login(EMAIL_CONFIG['smtp_username'], EMAIL_CONFIG['smtp_password'])
server.send_message(msg)
logger.info(f"Sent capacity alert email for {username}")
except Exception as e:
logger.error(f"Failed to send capacity alert email: {e}")
def display_author_status(author):
"""Display detailed status for a single author."""
username = author['username']
can_post, remaining, reset = check_author_rate_limit(author)
reset_time_utc = datetime.fromtimestamp(reset, tz=timezone.utc)
reset_time_str = reset_time_utc.strftime('%Y-%m-%d %H:%M:%S UTC')
# Convert to Sydney time
try:
sydney_tz = pytz.timezone('Australia/Sydney')
reset_time_sydney = reset_time_utc.astimezone(sydney_tz)
reset_time_sydney_str = reset_time_sydney.strftime('%Y-%m-%d %H:%M:%S %Z')
except Exception as e:
reset_time_sydney_str = 'N/A'
status = "" if can_post else ""
print(f"\n{status} {username}:")
print(f" • Remaining tweets: {remaining}/17")
print(f" • Reset time (UTC): {reset_time_str}")
print(f" • Reset time (Sydney): {reset_time_sydney_str}")
print(f" • Can post: {'Yes' if can_post else 'No'}")
# Send alert if capacity is full
if remaining == 0:
send_capacity_alert(username, remaining, reset_time_str)
# Show API status for verification
if not is_any_script_running():
api_remaining, api_reset = get_x_rate_limit_status(author)
if api_remaining is not None:
api_reset_time = datetime.fromtimestamp(api_reset, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
print(f" • API Status: {api_remaining} remaining, resets at {api_reset_time}")
def display_total_capacity():
"""Display total capacity across all authors."""
total_capacity = len(AUTHORS) * 17
total_used = 0
available_authors = 0
print("\n=== X Posting Capacity Status ===")
print(f"Total daily capacity: {total_capacity} tweets ({len(AUTHORS)} authors × 17 tweets)")
print("\nAuthor Status:")
for author in AUTHORS:
can_post, remaining, _ = check_author_rate_limit(author)
# Only check API if no scripts are running
if not is_any_script_running():
api_remaining, _ = get_x_rate_limit_status(author)
if api_remaining is not None:
remaining = api_remaining
can_post = remaining > 0
used = 17 - remaining
total_used += used
if can_post:
available_authors += 1
display_author_status(author)
print("\n=== Summary ===")
print(f"Total tweets used today: {total_used}")
print(f"Total tweets remaining: {total_capacity - total_used}")
print(f"Authors available to post: {available_authors}/{len(AUTHORS)}")
# Calculate percentage used
percent_used = (total_used / total_capacity) * 100
print(f"Capacity used: {percent_used:.1f}%")
if percent_used > 80:
print("\n Warning: High capacity usage! Consider adding more authors.")
elif percent_used > 60:
print("\n Note: Moderate capacity usage. Monitor usage patterns.")
def main():
try:
# Update system activity
update_system_activity("check_x_capacity", "running", os.getpid())
# Display capacity status
display_total_capacity()
# Update system activity
update_system_activity("check_x_capacity", "stopped")
except KeyboardInterrupt:
print("\nScript interrupted by user")
update_system_activity("check_x_capacity", "stopped")
sys.exit(0)
except Exception as e:
logger.error(f"Error: {e}")
update_system_activity("check_x_capacity", "stopped")
sys.exit(1)
if __name__ == "__main__":
main()

@ -24,37 +24,20 @@ from foodie_config import (
)
from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
upload_image_to_wp, select_best_persona, determine_paragraph_count,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image,
get_next_author_round_robin, check_author_rate_limit, update_system_activity
smart_image_and_filter, insert_link_naturally, get_flickr_image
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
from dotenv import load_dotenv
import fcntl
load_dotenv()
# Define constants at the top
SCRIPT_NAME = "foodie_automator_google"
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock"
# Load JSON files after constants are defined
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
def signal_handler(sig, frame):
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
logging.info("Received termination signal, checking if safe to exit...")
if is_posting:
logging.info("Currently posting, will exit after completion.")
else:
@ -64,104 +47,26 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_google.log"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
try:
# Ensure log directory exists
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
# Check write permissions
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
# Prune old logs
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.debug(f"Log file pruned: {LOG_FILE}")
# Configure logging
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True # Ensure this config takes precedence
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_google.py")
except Exception as e:
# Fallback to console logging if file logging fails
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Console logging initialized as fallback for foodie_automator_google.py")
logger = logging.getLogger()
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a')
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_google.py")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_google.py is running")
sys.exit(0)
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def parse_search_volume(volume_text):
try:
@ -184,11 +89,10 @@ def scrape_google_trends(geo='US'):
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
driver = None
driver = webdriver.Chrome(options=chrome_options)
try:
for attempt in range(MAX_RETRIES):
for attempt in range(3):
try:
driver = webdriver.Chrome(options=chrome_options)
time.sleep(random.uniform(2, 5))
url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
logging.info(f"Navigating to {url} (attempt {attempt + 1})")
@ -201,13 +105,10 @@ def scrape_google_trends(geo='US'):
break
except TimeoutException:
logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
if attempt == MAX_RETRIES - 1:
logging.error(f"Failed after {MAX_RETRIES} attempts for geo={geo}")
if attempt == 2:
logging.error(f"Failed after 3 attempts for geo={geo}")
return []
time.sleep(RETRY_BACKOFF * (2 ** attempt))
if driver:
driver.quit()
continue
time.sleep(5)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
@ -244,186 +145,157 @@ def scrape_google_trends(geo='US'):
if trends:
trends.sort(key=lambda x: x["search_volume"], reverse=True)
logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}")
print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}")
else:
logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
return trends
except Exception as e:
logging.error(f"Unexpected error in scrape_google_trends: {e}", exc_info=True)
return []
finally:
if driver:
driver.quit()
logging.info(f"Chrome driver closed for geo={geo}")
driver.quit()
logging.info(f"Chrome driver closed for geo={geo}")
def fetch_duckduckgo_news_context(trend_title, hours=24):
for attempt in range(MAX_RETRIES):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
# Handle both ISO formats with and without timezone
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
try:
with DDGS() as ddgs:
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}")
return trend_title
def curate_from_google_trends(geo_list=['US']):
all_trends = []
for geo in geo_list:
trends = scrape_google_trends(geo=geo)
if trends:
all_trends.extend(trends)
if not all_trends:
print("No Google Trends data available")
logging.info("No Google Trends data available")
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
while attempts < max_attempts and all_trends:
trend = all_trends.pop(0)
title = trend["title"]
link = trend.get("link", "https://trends.google.com/")
summary = trend.get("summary", "")
source_name = "Google Trends"
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted trend: {title}")
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
return trend_title
def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images):
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
regions = ['US', 'GB', 'AU']
all_trends = []
for geo in regions:
logging.info(f"Scraping Google Trends for geo={geo}")
trends = scrape_google_trends(geo=geo)
if trends:
logging.info(f"Collected {len(trends)} trends for geo={geo}")
all_trends.extend(trends)
else:
logging.warning(f"No trends collected for geo={geo}")
unique_trends = []
seen_titles = set()
for trend in all_trends:
if trend["title"] not in seen_titles:
unique_trends.append(trend)
seen_titles.add(trend["title"])
if not unique_trends:
logging.info("No Google Trends data available across regions")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
unique_trends.sort(key=lambda x: x["search_volume"], reverse=True)
logging.info(f"Total unique trends collected: {len(unique_trends)}")
attempts = 0
max_attempts = 10
while attempts < max_attempts and unique_trends:
trend = unique_trends.pop(0)
title = trend["title"]
link = trend.get("link", "")
summary = trend.get("summary", "")
source_name = trend.get("source", "Google Trends")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
if title in posted_titles:
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
continue
print(f"Trying Google Trend: {title} from {source_name}")
logging.info(f"Trying Google Trend: {title} from {source_name}")
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
if skip:
print(f"Skipping filtered Google Trend: {title}")
logging.info(f"Skipping filtered Google Trend: {title}")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
scoring_content = f"{title}\n\n{summary}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
print(f"Google Trends Interest Too Low: {interest_score}")
logging.info(f"Google Trends Interest Too Low: {interest_score}")
attempts += 1
continue
logging.info(f"Trying Google Trend: {title} from {source_name}")
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Do NOT introduce unrelated concepts.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
# Fetch DuckDuckGo context early to enhance smart_image_and_filter
ddg_context = fetch_duckduckgo_news_context(title)
enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context
final_summary = insert_link_naturally(final_summary, source_name, link)
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data:
attempts += 1
continue
if skip:
logging.info(f"Skipping filtered trend: {title}")
attempts += 1
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Trend Interest Too Low: {interest_score}")
attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip()
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
interest_score=interest_score,
extra_prompt=extra_prompt
should_post_tweet=True
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
finally:
is_posting = False
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
if post_id:
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
is_posting = True
try:
post_id, post_url = post_to_wp(
post_to_wp(
post_data=post_data,
category=category,
link=link,
@ -432,39 +304,11 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
pixabay_url=pixabay_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
post_id=post_id,
should_post_tweet=False
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
finally:
is_posting = False
@ -478,51 +322,27 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
return post_data, category, random.randint(0, 1800)
logging.info("No interesting Google Trend found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Google Trend found after attempts")
logging.info("No interesting Google Trend found after attempts")
return None, None, random.randint(600, 1800)
def run_google_trends_automator():
lock_fd = None
try:
lock_fd = acquire_lock()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
logging.info("***** Google Trends Automator Launched *****")
# Load JSON files once
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, sleep_time = curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data:
logging.info("No postable Google Trend found")
logging.info("Completed Google Trends run")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
# Sleep while still marked as running
time.sleep(sleep_time)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop after sleep
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(1200, 1800) # 20–30 minutes
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return None, None, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
logging.info("***** Google Trends Automator Launched *****")
geo_list = ['US', 'GB', 'AU']
post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list)
if sleep_time is None:
sleep_time = random.randint(600, 1800)
print(f"Sleeping for {sleep_time}s")
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
time.sleep(sleep_time)
return post_data, category, sleep_time
if __name__ == "__main__":
setup_logging()
post_data, category, sleep_time = run_google_trends_automator()
# logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
run_google_trends_automator()

@ -8,7 +8,6 @@ import json
import signal
import sys
import re
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from urllib.parse import quote
@ -25,24 +24,18 @@ from foodie_config import (
from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image,
get_next_author_round_robin, check_author_rate_limit, update_system_activity
summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
prepare_post_data, select_best_author, smart_image_and_filter,
get_flickr_image
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
import fcntl
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
load_dotenv()
SCRIPT_NAME = "foodie_automator_reddit"
is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_reddit.lock"
def signal_handler(sig, frame):
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
logging.info("Received termination signal, checking if safe to exit...")
if is_posting:
logging.info("Currently posting, will exit after completion.")
else:
@ -52,10 +45,56 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_reddit.log"
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("prawcore").setLevel(logging.WARNING)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py")
setup_logging()
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@ -69,405 +108,238 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def setup_logging():
try:
# Ensure log directory exists
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
# Check write permissions
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
# Prune old logs
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.debug(f"Log file pruned: {LOG_FILE}")
# Configure logging
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True # Ensure this config takes precedence
)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("prawcore").setLevel(logging.WARNING)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py")
except Exception as e:
# Fallback to console logging if file logging fails
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Console logging initialized as fallback for foodie_automator_reddit.py")
def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_reddit.py is running")
sys.exit(0)
def clean_reddit_title(title):
"""Clean Reddit post title by removing prefixes, newlines, and special characters."""
if not title or not isinstance(title, str):
logging.warning(f"Invalid title received: {title}")
return ""
# Remove [prefixes], newlines, and excessive whitespace
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title) # Remove [prefix]
cleaned_title = re.sub(r'\n+', ' ', cleaned_title) # Replace newlines with space
cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip() # Normalize spaces
# Remove special characters (keep alphanumeric, spaces, and basic punctuation)
cleaned_title = re.sub(r'[^\w\s.,!?-]', '', cleaned_title)
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
return cleaned_title
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
for attempt in range(MAX_RETRIES):
try:
content = f"Title: {title}\n\nContent: {summary}"
if top_comments:
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": (
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
"Consider comments for added context (e.g., specific locations or unique details). "
"Return only a number"
)},
{"role": "user", "content": content}
],
max_tokens=5
)
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
engagement_boost = 0
if upvotes >= 500:
engagement_boost += 3
elif upvotes >= 100:
engagement_boost += 2
elif upvotes >= 50:
engagement_boost += 1
if comment_count >= 100:
engagement_boost += 2
elif comment_count >= 20:
engagement_boost += 1
final_score = min(base_score + engagement_boost, 10)
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
return final_score
except Exception as e:
logging.warning(f"Reddit interestingness scoring failed (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.error(f"Failed to score Reddit post '{title}' after {MAX_RETRIES} attempts")
return 0
try:
content = f"Title: {title}\n\nContent: {summary}"
if top_comments:
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": (
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
"Consider comments for added context (e.g., specific locations or unique details). "
"Return only a number."
)},
{"role": "user", "content": content}
],
max_tokens=5
)
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
engagement_boost = 0
if upvotes >= 500:
engagement_boost += 3
elif upvotes >= 100:
engagement_boost += 2
elif upvotes >= 50:
engagement_boost += 1
if comment_count >= 100:
engagement_boost += 2
elif comment_count >= 20:
engagement_boost += 1
final_score = min(base_score + engagement_boost, 10)
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})")
return final_score
except Exception as e:
logging.error(f"Reddit interestingness scoring failed: {e}")
print(f"Reddit Interest Error: {e}")
return 0
def get_top_comments(post_url, reddit, limit=3):
for attempt in range(MAX_RETRIES):
try:
submission = reddit.submission(url=post_url)
submission.comment_sort = 'top'
submission.comments.replace_more(limit=0)
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
return top_comments
except Exception as e:
logging.warning(f"Failed to fetch comments for {post_url} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.error(f"Failed to fetch comments for {post_url} after {MAX_RETRIES} attempts")
return []
try:
submission = reddit.submission(url=post_url)
submission.comment_sort = 'top'
submission.comments.replace_more(limit=0)
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
return top_comments
except Exception as e:
logging.error(f"Failed to fetch comments for {post_url}: {e}")
return []
def fetch_duckduckgo_news_context(title, hours=24):
for attempt in range(MAX_RETRIES):
def fetch_reddit_posts():
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
articles = []
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
for subreddit_name in feeds:
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
subreddit = reddit.subreddit(subreddit_name)
for submission in subreddit.top(time_filter='day', limit=100):
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue
cleaned_title = clean_reddit_title(submission.title)
articles.append({
"title": cleaned_title,
"raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}",
"summary": submission.selftext,
"feed_title": get_clean_source_name(subreddit_name),
"pub_date": pub_date,
"upvotes": submission.score,
"comment_count": submission.num_comments
})
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
logging.info(f"Total Reddit posts fetched: {len(articles)}")
return articles
def curate_from_reddit():
articles = fetch_reddit_posts()
if not articles:
print("No Reddit posts available")
logging.info("No Reddit posts available")
return None, None, None
articles.sort(key=lambda x: x["upvotes"], reverse=True)
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
raw_title = article["raw_title"]
link = article["link"]
summary = article["summary"]
source_name = "Reddit"
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
if raw_title in posted_titles:
print(f"Skipping already posted post: {raw_title}")
logging.info(f"Skipping already posted post: {raw_title}")
attempts += 1
continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
def fetch_reddit_posts():
"""Fetch Reddit posts from specified subreddits, filtering low-quality and [homemade] posts."""
try:
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
feeds = [
"food",
"FoodPorn",
"spicy",
"KoreanFood",
"JapaneseFood",
"DessertPorn",
"ChineseFood",
"IndianFood"
]
articles = []
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
for subreddit_name in feeds:
for attempt in range(MAX_RETRIES):
try:
subreddit = reddit.subreddit(subreddit_name)
for submission in subreddit.top(time_filter='day', limit=100):
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue
if "[homemade]" in submission.title.lower():
logging.info(f"Skipping homemade post: {submission.title}")
continue
cleaned_title = clean_reddit_title(submission.title)
if not cleaned_title or len(cleaned_title) < 5:
logging.info(f"Skipping post with invalid or short title: {submission.title}")
continue
# Filter out posts with empty or very short summaries
summary = submission.selftext.strip() if submission.selftext else ""
if len(summary) < 20 and not submission.url.endswith(('.jpg', '.jpeg', '.png', '.gif')):
logging.info(f"Skipping post with insufficient summary: {cleaned_title}")
continue
# Fetch top comments for additional context
top_comments = get_top_comments(f"https://www.reddit.com{submission.permalink}", reddit)
articles.append({
"title": cleaned_title,
"raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}",
"summary": summary,
"feed_title": get_clean_source_name(subreddit_name),
"pub_date": pub_date,
"upvotes": submission.score,
"comment_count": submission.num_comments,
"top_comments": top_comments
})
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
break
except Exception as e:
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.info(f"Total Reddit posts fetched: {len(articles)}")
return articles
except Exception as e:
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
return []
def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images):
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
posts = fetch_reddit_posts()
if not posts:
logging.info("No Reddit posts available")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
attempts = 0
max_attempts = 10
while attempts < max_attempts and posts:
post = posts.pop(0)
title = post["title"]
link = post.get("link", "")
summary = post.get("summary", "")
source_name = "Reddit"
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
upvotes = post.get("upvotes", 0)
comment_count = post.get("comment_count", 0)
top_comments = post.get("top_comments", [])
if title in posted_titles:
logging.info(f"Skipping already posted Reddit post: {title}")
attempts += 1
continue
print(f"Trying Reddit Post: {title} from {source_name}")
logging.info(f"Trying Reddit Post: {title} from {source_name}")
if upvotes < 300:
logging.info(f"Skipping post '{title}' due to insufficient upvotes ({upvotes} < 300)")
attempts += 1
continue
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
print(f"Skipping filtered Reddit post: {title}")
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
top_comments = get_top_comments(link, reddit, limit=3)
interest_score = is_interesting_reddit(
title,
summary,
article["upvotes"],
article["comment_count"],
top_comments
)
logging.info(f"Interest Score: {interest_score} for '{title}'")
if interest_score < 6:
print(f"Reddit Interest Too Low: {interest_score}")
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = f"{title}\n\n{summary}"
if top_comments:
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
logging.info(f"Trying Reddit Post: {title} from {source_name}")
final_summary = insert_link_naturally(final_summary, source_name, link)
# Combine summary and top comments for smart_image_and_filter
enhanced_summary = summary
if top_comments:
enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data:
attempts += 1
continue
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
if skip:
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip()
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
logging.debug(f"Scoring content for '{title}': {scoring_content}")
interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
continue
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
interest_score=interest_score,
extra_prompt=extra_prompt
should_post_tweet=True
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
finally:
is_posting = False
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
if post_id:
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
is_posting = True
try:
post_id, post_url = post_to_wp(
post_to_wp(
post_data=post_data,
category=category,
link=link,
@ -476,97 +348,51 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
pixabay_url=pixabay_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
post_id=post_id,
should_post_tweet=False
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
posted_titles.add(raw_title)
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
print(f"Actual post URL: {post_url}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
logging.info(f"Actual post URL: {post_url}")
return post_data, category, random.randint(0, 1800)
logging.info("No interesting Reddit post found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Reddit post found after attempts")
logging.info("No interesting Reddit post found after attempts")
return None, None, random.randint(600, 1800)
def run_reddit_automator():
lock_fd = None
try:
lock_fd = acquire_lock()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
logging.info("***** Reddit Automator Launched *****")
# Load JSON files once
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, sleep_time = curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data:
logging.info("No postable Reddit article found")
logging.info("Completed Reddit run")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
# Sleep while still marked as running
time.sleep(sleep_time)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop after sleep
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(1200, 1800) # 20–30 minutes
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return None, None, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****")
logging.info("***** Reddit Automator Launched *****")
post_data, category, sleep_time = curate_from_reddit()
if not post_data:
print(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
else:
print(f"Completed Reddit run with sleep time: {sleep_time} seconds")
logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds")
print(f"Sleeping for {sleep_time}s")
time.sleep(sleep_time)
return post_data, category, sleep_time
if __name__ == "__main__":
setup_logging()
post_data, category, sleep_time = run_reddit_automator()
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
run_reddit_automator()

@ -9,8 +9,6 @@ import signal
import sys
import re
import email.utils
import feedparser
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone
from bs4 import BeautifulSoup
from openai import OpenAI
@ -27,127 +25,90 @@ from foodie_utils import (
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image,
get_next_author_round_robin, check_author_rate_limit, update_system_activity
select_best_author, smart_image_and_filter, get_flickr_image
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv
import fcntl
load_dotenv()
is_posting = False
SCRIPT_NAME = "foodie_automator_rss"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_rss.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_rss.log"
def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...")
if is_posting:
logging.info("Currently posting, will exit after completion.")
else:
logging.info("Safe to exit immediately.")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
LOG_PRUNE_DAYS = 30
FEED_TIMEOUT = 15
MAX_RETRIES = 3
RETRY_BACKOFF = 2
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
logging.debug("Attempting to set up logging")
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.debug(f"Log file pruned: {LOG_FILE}")
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_automator_rss.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
# Call setup_logging immediately
setup_logging()
check_author_rate_limit.script_run_id = int(time.time())
logging.info(f"Set script_run_id to {check_author_rate_limit.script_run_id}")
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def acquire_lock():
try:
logging.debug("Attempting to acquire lock")
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
logging.debug(f"Lock acquired: {LOCK_FILE}")
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_rss.py is running")
sys.exit(0)
def signal_handler(sig, frame):
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
def setup_logging():
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_automator_rss.py")
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
setup_logging()
def create_http_session() -> requests.Session:
session = requests.Session()
retry_strategy = Retry(
total=MAX_RETRIES,
backoff_factor=RETRY_BACKOFF,
backoff_factor=2,
status_forcelist=[403, 429, 500, 502, 503, 504],
allowed_methods=["GET", "POST"]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
adapter = HTTPAdapter(
max_retries=retry_strategy,
pool_connections=10,
pool_maxsize=10
)
session.mount("http://", adapter)
session.mount("https://", adapter)
session.headers.update({
@ -175,201 +136,186 @@ def fetch_rss_feeds():
logging.error("RSS_FEEDS is empty in foodie_config.py")
return articles
logging.info(f"Processing feeds: {RSS_FEEDS}")
for feed_url in RSS_FEEDS:
for attempt in range(MAX_RETRIES):
logging.info(f"Processing feed: {feed_url} (attempt {attempt + 1})")
try:
response = session.get(feed_url, timeout=FEED_TIMEOUT)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'xml')
items = soup.find_all('item')
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
for item in items:
try:
title = item.find('title').text.strip() if item.find('title') else "Untitled"
link = item.find('link').text.strip() if item.find('link') else ""
pub_date = item.find('pubDate')
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
continue
description = item.find('description')
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
content = item.find('content:encoded')
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
articles.append({
"title": title,
"link": link,
"summary": summary,
"content": content_text,
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
"pub_date": pub_date
})
logging.debug(f"Processed article: {title}")
except Exception as e:
logging.warning(f"Error processing entry in {feed_url}: {e}")
logging.info(f"Processing feed: {feed_url}")
try:
response = session.get(feed_url, timeout=FEED_TIMEOUT)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'xml')
items = soup.find_all('item')
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
for item in items:
try:
title = item.find('title').text.strip() if item.find('title') else "Untitled"
link = item.find('link').text.strip() if item.find('link') else ""
pub_date = item.find('pubDate')
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
continue
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
break
except Exception as e:
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
description = item.find('description')
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
content = item.find('content:encoded')
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
articles.append({
"title": title,
"link": link,
"summary": summary,
"content": content_text,
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
"pub_date": pub_date
})
logging.debug(f"Processed article: {title}")
except Exception as e:
logging.warning(f"Error processing entry in {feed_url}: {e}")
continue
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
except Exception as e:
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
continue
articles.sort(key=lambda x: x["pub_date"], reverse=True)
logging.info(f"Total RSS articles fetched: {len(articles)}")
return articles
def fetch_duckduckgo_news_context(title, hours=24):
for attempt in range(MAX_RETRIES):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
def curate_from_rss():
articles = fetch_rss_feeds()
if not articles:
print("No RSS articles available")
logging.info("No RSS articles available")
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article["summary"]
content = article["content"]
source_name = article["feed_title"]
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted article: {title}")
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images):
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
articles = fetch_rss_feeds()
if not articles:
logging.info("No RSS articles available")
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return None, None, sleep_time
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article.get("summary", "")
source_name = article.get("feed_title", "Unknown Source")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link # Store for fallback
if title in posted_titles:
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
print(f"Trying RSS Article: {title} from {source_name}")
logging.info(f"Trying RSS Article: {title} from {source_name}")
# Select author
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping article '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
if skip:
print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
logging.info(f"Trying RSS Article: {title} from {source_name}")
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Do NOT introduce unrelated concepts.\n"
f"Expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
if skip:
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
# Remove the original title from the summary while preserving paragraphs
title_pattern = re.compile(
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
re.IGNORECASE
)
paragraphs = final_summary.split('\n')
cleaned_paragraphs = []
for para in paragraphs:
if para.strip():
cleaned_para = title_pattern.sub('', para).strip()
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
cleaned_paragraphs.append(cleaned_para)
final_summary = '\n'.join(cleaned_paragraphs)
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data:
attempts += 1
continue
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"RSS Interest Too Low: {interest_score}")
# Fetch image
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
if not image_url:
logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url:
logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
hook = get_dynamic_hook(post_data["title"]).strip()
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
interest_score=interest_score,
extra_prompt=extra_prompt
should_post_tweet=True
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
finally:
is_posting = False
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
if post_id:
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}"
global is_posting
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
is_posting = True
try:
post_id, post_url = post_to_wp(
post_to_wp(
post_data=post_data,
category=category,
link=link,
@ -378,103 +324,43 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
pixabay_url=pixabay_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
post_id=post_id,
should_post_tweet=False
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}"
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None, # Skip image re-upload
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****")
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
attempts += 1
finally:
is_posting = False
logging.info("No interesting RSS article found after attempts")
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return None, None, sleep_time
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
return post_data, category, random.randint(0, 1800)
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting RSS article found after attempts")
logging.info("No interesting RSS article found after attempts")
return None, None, random.randint(600, 1800)
def run_rss_automator():
lock_fd = None
try:
lock_fd = acquire_lock()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
logging.info("***** RSS Automator Launched *****")
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, sleep_time = curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data:
logging.info("No postable RSS article found")
logging.info("Completed RSS run")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
# Sleep while still marked as running
time.sleep(sleep_time)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop after sleep
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(1200, 1800) # Fixed to 20–30 minutes
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return None, None, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
logging.info("***** RSS Automator Launched *****")
post_data, category, sleep_time = curate_from_rss()
print(f"Sleeping for {sleep_time}s")
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
time.sleep(sleep_time)
return post_data, category, sleep_time
if __name__ == "__main__":
post_data, category, sleep_time = run_rss_automator()
# logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
run_rss_automator()

@ -31,7 +31,7 @@ AUTHORS = [
"username": "aishapatel",
"password": os.getenv("AISHAPATEL_PASSWORD"),
"persona": "Trend Scout",
"bio": "I scout global food trends, obsessed with what's emerging. My sharp predictions map the industry's path—always one step ahead.",
"bio": "I scout global food trends, obsessed with what’s emerging. My sharp predictions map the industry’s path—always one step ahead.",
"dob": "1999-03-15"
},
{
@ -47,7 +47,7 @@ AUTHORS = [
"username": "keishareid",
"password": os.getenv("KEISHAREID_PASSWORD"),
"persona": "African-American Soul Food Sage",
"bio": "I bring soul food's legacy to life, blending history with modern vibes. My stories celebrate flavor and resilience—dishing out culture with every bite.",
"bio": "I bring soul foods legacy to life, blending history with modern vibes. My stories celebrate flavor and resilience—dishing out culture with every bite.",
"dob": "1994-06-10"
},
{
@ -60,63 +60,69 @@ AUTHORS = [
}
]
X_API_CREDENTIALS = {
"owenjohnson": {
"x_username": "@mrowenjohnson",
X_API_CREDENTIALS = [
{
"username": "owenjohnson",
"x_username": "@insiderfoodieowen",
"api_key": os.getenv("OWENJOHNSON_X_API_KEY"),
"api_secret": os.getenv("OWENJOHNSON_X_API_SECRET"),
"access_token": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("OWENJOHNSON_X_CLIENT_SECRET")
},
"javiermorales": {
"x_username": "@mrjaviermorales",
{
"username": "javiermorales",
"x_username": "@insiderfoodiejavier",
"api_key": os.getenv("JAVIERMORALES_X_API_KEY"),
"api_secret": os.getenv("JAVIERMORALES_X_API_SECRET"),
"access_token": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("JAVIERMORALES_X_CLIENT_SECRET")
},
"aishapatel": {
"x_username": "@missaishapatel",
{
"username": "aishapatel",
"x_username": "@insiderfoodieaisha",
"api_key": os.getenv("AISHAPATEL_X_API_KEY"),
"api_secret": os.getenv("AISHAPATEL_X_API_SECRET"),
"access_token": os.getenv("AISHAPATEL_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("AISHAPATEL_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("AISHAPATEL_X_CLIENT_SECRET")
},
"trangnguyen": {
"x_username": "@mrtrangnguyen",
{
"username": "trangnguyen",
"x_username": "@insiderfoodietrang",
"api_key": os.getenv("TRANGNGUYEN_X_API_KEY"),
"api_secret": os.getenv("TRANGNGUYEN_X_API_SECRET"),
"access_token": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("TRANGNGUYEN_X_CLIENT_SECRET")
},
"keishareid": {
"x_username": "@misskeishareid",
{
"username": "keishareid",
"x_username": "@insiderfoodiekeisha",
"api_key": os.getenv("KEISHAREID_X_API_KEY"),
"api_secret": os.getenv("KEISHAREID_X_API_SECRET"),
"access_token": os.getenv("KEISHAREID_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("KEISHAREID_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("KEISHAREID_X_CLIENT_SECRET")
},
"lilamoreau": {
"x_username": "@misslilamoreau",
{
"username": "lilamoreau",
"x_username": "@insiderfoodielila",
"api_key": os.getenv("LILAMOREAU_X_API_KEY"),
"api_secret": os.getenv("LILAMOREAU_X_API_SECRET"),
"access_token": os.getenv("LILAMOREAU_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("LILAMOREAU_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("LILAMOREAU_X_CLIENT_SECRET")
}
}
]
PERSONA_CONFIGS = {
"Visionary Editor": {
"description": "a commanding food editor with a borderless view",
"tone": "a polished and insightful tone, like 'This redefines culinary excellence.'",
"article_prompt": (
"You're {description}. Summarize this article in {tone}. "
"Youre {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a bold take and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -133,7 +139,7 @@ PERSONA_CONFIGS = {
"description": "a seasoned foodie reviewer with a sharp eye",
"tone": "a professional yet engaging tone, like 'This dish is a revelation.'",
"article_prompt": (
"You're {description}. Summarize this article in {tone}. "
"Youre {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a subtle opinion and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -148,12 +154,12 @@ PERSONA_CONFIGS = {
},
"Trend Scout": {
"description": "a forward-thinking editor obsessed with trends",
"tone": "an insightful and forward-looking tone, like 'This sets the stage for what's next.'",
"tone": "an insightful and forward-looking tone, like 'This sets the stage for whats next.'",
"article_prompt": (
"You're {description}. Summarize this article in {tone}. "
"Youre {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Predict what's next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
"Predict whats next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
@ -167,7 +173,7 @@ PERSONA_CONFIGS = {
"description": "a cultured food writer who loves storytelling",
"tone": "a warm and thoughtful tone, like 'This evokes a sense of tradition.'",
"article_prompt": (
"You're {description}. Summarize this article in {tone}. "
"Youre {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a thoughtful observation and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -184,7 +190,7 @@ PERSONA_CONFIGS = {
"description": "a vibrant storyteller rooted in African-American culinary heritage",
"tone": "a heartfelt and authentic tone, like 'This captures the essence of heritage.'",
"article_prompt": (
"You're {description}. Summarize this article in {tone}. "
"Youre {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a heritage twist and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -201,7 +207,7 @@ PERSONA_CONFIGS = {
"description": "an adventurous explorer of global street food",
"tone": "a bold and adventurous tone, like 'This takes you on a global journey.'",
"article_prompt": (
"You're {description}. Summarize this article in {tone}. "
"Youre {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Drop a street-level insight and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -239,7 +245,7 @@ RSS_FEED_NAMES = {
"https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"),
"https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"),
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"),
"https://www.theguardian.com/food/rss": ("The Guardian", "https://www.theguardian.com/food")
"https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food")
}
RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"]
@ -248,7 +254,8 @@ HOME_KEYWORDS = ["home", "house", "household", "appliance", "kitchen", "gadget"]
PRODUCT_KEYWORDS = ["best", "buy", "storage", "organizer", "shop", "price", "container", "product", "deal", "sale", "discount"]
CATEGORIES = [
"Buzz", "Trends", "Lifestyle", "Culture", "Health", "Drink", "Food", "Eats"
"People", "Trends", "Travel",
"Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food", "Eats"
]
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
@ -257,12 +264,7 @@ REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
REDDIT_SUBREDDITS = [
"food",
"FoodPorn",
"spicy",
"KoreanFood",
"JapaneseFood",
"DessertPorn",
"ChineseFood",
"IndianFood"
"spicy"
]
FAST_FOOD_KEYWORDS = [
"mcdonald", "burger king", "wendy", "taco bell", "kfc",
@ -283,13 +285,3 @@ def get_clean_source_name(source_name):
if feed_url == source_name:
return clean_name
return source_name
# Email configuration for alerts
EMAIL_CONFIG = {
'from_email': 'systemalerts@insiderfoodie.com', # System alerts email
'to_email': 'systemalerts@insiderfoodie.com', # Same email for receiving alerts
'smtp_server': 'mail.insiderfoodie.com', # Your SMTP server
'smtp_port': 587, # STARTTLS port
'smtp_username': 'systemalerts', # SMTP username
'smtp_password': os.getenv('INSIDERFOODIE_EMAIL_PASSWORD') # Store password in .env
}

@ -1,236 +0,0 @@
# foodie_engagement_generator.py
import json
import logging
import random
import signal
import sys
import fcntl
import os
import time
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from foodie_utils import AUTHORS, SUMMARY_MODEL, load_json_file, save_json_file, update_system_activity
from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE
from dotenv import load_dotenv
load_dotenv()
SCRIPT_NAME = "foodie_engagement_generator"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_generator.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_generator.log"
ENGAGEMENT_TWEETS_FILE = "/home/shane/foodie_automator/engagement_tweets.json"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_engagement_generator.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_engagement_generator.py is running")
sys.exit(0)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize OpenAI client
try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
except Exception as e:
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
# Load author backgrounds
try:
with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f:
AUTHOR_BACKGROUNDS = json.load(f)
except Exception as e:
logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True)
sys.exit(1)
def generate_engagement_tweet(author):
"""Generate an engagement tweet using author background themes."""
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
if not background or "engagement_themes" not in background:
logging.warning(f"No background or engagement themes found for {author['username']}")
theme = "food trends"
else:
theme = random.choice(background["engagement_themes"])
prompt = (
f"Generate a concise tweet (under 230 characters) for {author_handle}. "
f"Create an engaging question or statement about {theme} to spark interaction. "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for attempt in range(MAX_RETRIES):
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated engagement tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts")
engagement_templates = [
f"What's the most mouthwatering {theme} you've seen this week? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"{theme.capitalize()} lovers unite! What's your go-to pick? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Ever tried a {theme} that blew your mind? Share your favorites and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"What {theme} trend are you loving right now? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
logging.info(f"Using fallback engagement tweet: {template}")
return template
def generate_engagement_tweets():
"""Generate engagement tweets for authors and save to file."""
try:
logging.info("Starting foodie_engagement_generator.py")
tweets = []
timestamp = datetime.now(timezone.utc).isoformat()
for author in AUTHORS:
try:
tweet = generate_engagement_tweet(author)
if not tweet:
logging.error(f"Failed to generate engagement tweet for {author['username']}, skipping")
continue
# Collect tweet data
tweet_data = {
"username": author["username"],
"x_handle": X_API_CREDENTIALS[author["username"]]["x_username"],
"tweet": tweet,
"timestamp": timestamp
}
tweets.append(tweet_data)
logging.info(f"Generated engagement tweet for {author['username']}: {tweet}")
except Exception as e:
logging.error(f"Error generating engagement tweet for {author['username']}: {e}", exc_info=True)
continue
# Save tweets to file, overwriting any existing content
if tweets:
try:
tweet_data = {
"timestamp": timestamp,
"tweets": tweets
}
save_json_file(ENGAGEMENT_TWEETS_FILE, tweet_data)
logging.info(f"Saved {len(tweets)} engagement tweets to {ENGAGEMENT_TWEETS_FILE}")
except Exception as e:
logging.error(f"Failed to save engagement tweets to {ENGAGEMENT_TWEETS_FILE}: {e}")
else:
logging.warning("No engagement tweets generated, nothing to save")
logging.info("Completed foodie_engagement_generator.py")
sleep_time = random.randint(82800, 86400) # ~23–24 hours
return True, sleep_time
except Exception as e:
logging.error(f"Unexpected error in generate_engagement_tweets: {e}", exc_info=True)
sleep_time = random.randint(82800, 86400) # ~23–24 hours
return False, sleep_time
def main():
"""Main function to run the script."""
lock_fd = None
try:
lock_fd = acquire_lock()
setup_logging()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
success, sleep_time = generate_engagement_tweets()
update_system_activity(SCRIPT_NAME, "stopped") # Record stop
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return success, sleep_time
except Exception as e:
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(82800, 86400) # ~23–24 hours
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return False, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
success, sleep_time = main()

@ -1,324 +1,76 @@
# foodie_engagement_tweet.py
import json
import logging
import random
import signal
import sys
import fcntl
import os
import time
import logging
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from foodie_utils import (
post_tweet,
AUTHORS,
SUMMARY_MODEL,
check_author_rate_limit,
load_json_file,
save_json_file, # Add this
update_system_activity,
get_next_author_round_robin
)
from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE
from dotenv import load_dotenv
from openai import OpenAI # Add this import
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
from dotenv import load_dotenv # Add this import
print("Loading environment variables")
load_dotenv()
print(f"Environment variables loaded: OPENAI_API_KEY={bool(os.getenv('OPENAI_API_KEY'))}")
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
SCRIPT_NAME = "foodie_engagement_tweet"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_tweet.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_tweet.log"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
print("Entering setup_logging")
try:
log_dir = os.path.dirname(LOG_FILE)
print(f"Ensuring log directory exists: {log_dir}")
os.makedirs(log_dir, exist_ok=True)
print(f"Log directory permissions: {os.stat(log_dir).st_mode & 0o777}, owner: {os.stat(log_dir).st_uid}")
if os.path.exists(LOG_FILE):
print(f"Pruning old logs in {LOG_FILE}")
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
print(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
print(f"Log file pruned, new size: {os.path.getsize(LOG_FILE)} bytes")
print(f"Configuring logging to {LOG_FILE}")
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_engagement_tweet.py")
print("Logging setup complete")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
print("Entering acquire_lock")
try:
lock_dir = os.path.dirname(LOCK_FILE)
print(f"Ensuring lock directory exists: {lock_dir}")
os.makedirs(lock_dir, exist_ok=True)
print(f"Opening lock file: {LOCK_FILE}")
lock_fd = open(LOCK_FILE, 'w')
print(f"Attempting to acquire lock on {LOCK_FILE}")
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
print(f"Lock acquired, PID: {os.getpid()}")
return lock_fd
except IOError as e:
print(f"Failed to acquire lock, another instance is running: {e}")
logging.info("Another instance of foodie_engagement_tweet.py is running")
sys.exit(0)
except Exception as e:
print(f"Unexpected error in acquire_lock: {e}")
sys.exit(1)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
print(f"Received signal: {sig}")
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Load environment variables
load_dotenv()
# Initialize OpenAI client
print("Initializing OpenAI client")
try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
print("OPENAI_API_KEY is not set")
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
print("OpenAI client initialized")
except Exception as e:
print(f"Failed to initialize OpenAI client: {e}")
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
# Load author backgrounds
print(f"Loading author backgrounds from {AUTHOR_BACKGROUNDS_FILE}")
try:
with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f:
AUTHOR_BACKGROUNDS = json.load(f)
print(f"Author backgrounds loaded: {len(AUTHOR_BACKGROUNDS)} entries")
except Exception as e:
print(f"Failed to load author_backgrounds.json: {e}")
logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True)
sys.exit(1)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def generate_engagement_tweet(author):
"""Generate an engagement tweet using author background themes and persona."""
print(f"Generating tweet for author: {author['username']}")
try:
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
print(f"No X credentials found for {author['username']}")
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
print(f"Author handle: {author_handle}")
author_handle = author["x_username"] # Updated to use x_username from X_API_CREDENTIALS
prompt = (
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
f"Create an engaging food-related question or statement to spark interaction. "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
if not background or "engagement_themes" not in background:
print(f"No background or themes for {author['username']}, using default theme")
logging.warning(f"No background or engagement themes found for {author['username']}")
theme = "food trends"
else:
theme = random.choice(background["engagement_themes"])
print(f"Selected theme: {theme}")
# Get the author's persona from AUTHORS
persona = next((a["persona"] for a in AUTHORS if a["username"] == author["username"]), "Unknown")
prompt = (
f"Generate a concise tweet (under 230 characters) for {author_handle} as a {persona}. "
f"Create an engaging, specific question about {theme} to spark interaction (e.g., 'What's your go-to sushi spot in Tokyo?'). "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
print(f"OpenAI prompt: {prompt}")
for attempt in range(MAX_RETRIES):
print(f"Attempt {attempt + 1} to generate tweet")
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
print(f"Generated tweet: {tweet}")
logging.debug(f"Generated engagement tweet: {tweet}")
return tweet
except Exception as e:
print(f"Failed to generate tweet (attempt {attempt + 1}): {e}")
logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
print(f"Exhausted retries for {author['username']}")
logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts")
engagement_templates = [
f"What's your favorite {theme} dish? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Which {theme} spot is a must-visit? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Got a {theme} hidden gem? Share it and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"What's the best {theme} you've tried? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
print(f"Using fallback tweet: {template}")
logging.info(f"Using fallback engagement tweet: {template}")
return template
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
return tweet
except Exception as e:
print(f"Error in generate_engagement_tweet for {author['username']}: {e}")
logging.error(f"Error in generate_engagement_tweet for {author['username']}: {e}", exc_info=True)
return None
logging.warning(f"Failed to generate engagement tweet for {author['username']}: {e}")
# Fallback templates
engagement_templates = [
f"Whats the most mouthwatering dish youve seen this week Share below and follow {author_handle} for more foodie ideas on InsiderFoodie.com Link: https://insiderfoodie.com",
f"Food lovers unite Whats your go to comfort food Tell us and like this tweet for more tasty ideas from {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com",
f"Ever tried a dish that looked too good to eat Share your favorites and follow {author_handle} for more culinary trends on InsiderFoodie.com Link: https://insiderfoodie.com",
f"What food trend are you loving right now Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
return template
def post_engagement_tweet():
"""Post engagement tweets for all authors with a delay between posts."""
print("Entering post_engagement_tweet")
try:
logging.info("Starting foodie_engagement_tweet.py")
posted = False
state_file = '/home/shane/foodie_automator/author_state.json'
state = load_json_file(state_file, default={'last_author_index': -1})
delay_seconds = 30 # Delay between posts to avoid rate limits and spread engagement
# Iterate through all authors
for index, author in enumerate(AUTHORS):
username = author['username']
print(f"Processing author: {username}")
logging.info(f"Processing author: {username}")
try:
print("Checking rate limit")
if not check_author_rate_limit(author):
print(f"Rate limit exceeded for {username}, skipping")
logging.info(f"Rate limit exceeded for {username}, skipping")
continue
print("Generating tweet")
tweet = generate_engagement_tweet(author)
if not tweet:
print(f"Failed to generate tweet for {username}, skipping")
logging.error(f"Failed to generate engagement tweet for {username}, skipping")
continue
print(f"Posting tweet: {tweet}")
logging.info(f"Posting engagement tweet for {username}: {tweet}")
if post_tweet(author, tweet):
print(f"Successfully posted tweet for {username}")
logging.info(f"Successfully posted engagement tweet for {username}")
posted = True
# Update last_author_index to maintain round-robin consistency
state['last_author_index'] = index
save_json_file(state_file, state)
else:
print(f"Failed to post tweet for {username}")
logging.warning(f"Failed to post tweet for {username}")
# Add delay between posts (except for the last author)
if index < len(AUTHORS) - 1:
print(f"Waiting {delay_seconds} seconds before next post")
logging.info(f"Waiting {delay_seconds} seconds before next post")
time.sleep(delay_seconds)
except Exception as e:
print(f"Error posting tweet for {username}: {e}")
logging.error(f"Error posting tweet for {username}: {e}", exc_info=True)
continue
print("Completed post_engagement_tweet")
logging.info("Completed foodie_engagement_tweet.py")
sleep_time = 86400 # 1 day for cron
return posted, sleep_time
except Exception as e:
print(f"Unexpected error in post_engagement_tweet: {e}")
logging.error(f"Unexpected error in post_engagement_tweet: {e}", exc_info=True)
sleep_time = 86400 # 1 day
return False, sleep_time
def main():
"""Main function to run the script."""
print("Starting main")
lock_fd = None
try:
print("Acquiring lock")
lock_fd = acquire_lock()
print("Setting up logging")
setup_logging()
print("Updating system activity to running")
update_system_activity(SCRIPT_NAME, "running", os.getpid())
print("Checking author state file")
author_state_file = "/home/shane/foodie_automator/author_state.json"
if not os.path.exists(author_state_file):
print(f"Author state file not found: {author_state_file}")
logging.error(f"Author state file not found: {author_state_file}")
raise FileNotFoundError(f"Author state file not found: {author_state_file}")
print(f"Author state file exists: {author_state_file}")
print("Posting engagement tweet")
posted, sleep_time = post_engagement_tweet()
print("Updating system activity to stopped")
update_system_activity(SCRIPT_NAME, "stopped")
print(f"Run completed, posted: {posted}, sleep_time: {sleep_time}")
logging.info(f"Run completed, posted: {posted}, sleep_time: {sleep_time} seconds")
return posted, sleep_time
except Exception as e:
print(f"Exception in main: {e}")
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
update_system_activity(SCRIPT_NAME, "stopped")
sleep_time = 86400 # 1 day for cron
print(f"Run completed, sleep_time: {sleep_time}")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return False, sleep_time
finally:
if lock_fd:
print("Releasing lock")
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
print(f"Lock file removed: {LOCK_FILE}")
# Reference date for calculating the 2-day interval
reference_date = datetime(2025, 4, 29, tzinfo=timezone.utc) # Starting from April 29, 2025
current_date = datetime.now(timezone.utc)
# Calculate the number of days since the reference date
days_since_reference = (current_date - reference_date).days
# Post only if the number of days since the reference date is divisible by 2
if days_since_reference % 2 == 0:
logging.info("Today is an engagement tweet day (every 2 days). Posting...")
for author in AUTHORS:
tweet = generate_engagement_tweet(author)
logging.info(f"Posting engagement tweet for {author['username']}: {tweet}")
if post_tweet(author, tweet):
logging.info(f"Successfully posted engagement tweet for {author['username']}")
else:
logging.warning(f"Failed to post engagement tweet for {author['username']}")
else:
logging.info("Today is not an engagement tweet day (every 2 days). Skipping...")
if __name__ == "__main__":
posted, sleep_time = main()
post_engagement_tweet()

File diff suppressed because it is too large Load Diff

@ -1,395 +1,133 @@
# foodie_weekly_thread.py
import json
import os
from datetime import datetime, timedelta
import logging
import random
import signal
import sys
import fcntl
import time
import re
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from foodie_utils import AUTHORS, SUMMARY_MODEL, load_json_file, save_json_file, update_system_activity
from foodie_config import X_API_CREDENTIALS, RECENT_POSTS_FILE
from dotenv import load_dotenv
import shutil
from openai import OpenAI # Add this import
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
load_dotenv()
SCRIPT_NAME = "foodie_weekly_thread"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_weekly_thread.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_weekly_thread.log"
WEEKLY_THREADS_FILE = "/home/shane/foodie_automator/weekly_threads.json"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_weekly_thread.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_weekly_thread.py is running")
sys.exit(0)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize OpenAI client
try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
except Exception as e:
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def generate_intro_tweet(author):
"""Generate an intro tweet for the weekly thread."""
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
logging.debug(f"Generating intro tweet for {author_handle}")
prompt = (
f"Generate a concise tweet (under 200 characters) for {author_handle}. "
f"Introduce a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
f"Make it engaging, create curiosity, and include a call to action to visit InsiderFoodie.com or follow {author_handle}. "
f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. "
f"Strictly exclude emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for attempt in range(MAX_RETRIES):
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=150,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
tweet = re.sub(r'[\U0001F000-\U0001FFFF]', '', tweet) # Remove emojis
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated intro tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate intro tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate intro tweet after {MAX_RETRIES} attempts")
fallback = (
f"Top 10 foodie posts this week by {author_handle}! Visit InsiderFoodie.com and follow {author_handle} for more."
)
logging.info(f"Using fallback intro tweet: {fallback}")
return fallback
def generate_final_cta(author):
"""Generate a final CTA tweet for the weekly thread using GPT."""
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
logging.debug(f"Generating final CTA tweet for {author_handle}")
prompt = (
f"Generate a concise tweet (under 200 characters) for {author_handle}. "
f"Conclude a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
f"Make it engaging, value-driven, in the style of Neil Patel. "
f"Include a call to action to visit InsiderFoodie.com and follow {author_handle}. "
f"Mention that top 10 foodie trends are shared every Monday. "
f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. "
f"Strictly exclude emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for attempt in range(MAX_RETRIES):
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=150,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
tweet = re.sub(r'[\U0001F000-\U0001FFFF]', '', tweet) # Remove emojis
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated final CTA tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate final CTA tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate final CTA tweet after {MAX_RETRIES} attempts")
fallback = (
f"Want more foodie insights? Visit insiderfoodie.com and follow {author_handle} "
f"for top 10 foodie trends every Monday."
)
logging.info(f"Using fallback final CTA tweet: {fallback}")
return fallback
RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json"
def load_recent_posts():
"""Load and deduplicate posts from recent_posts.json."""
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
posts = load_json_file(RECENT_POSTS_FILE)
if not posts:
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
return []
posts = []
if not os.path.exists(RECENT_POSTS_FILE):
return posts
# Deduplicate posts
unique_posts = {}
for post in posts:
try:
required_fields = ["title", "url", "author_username", "timestamp"]
if not all(key in post for key in required_fields):
logging.warning(f"Skipping invalid post: missing fields {post}")
continue
datetime.fromisoformat(post["timestamp"].replace('Z', '+00:00'))
key = (post["title"], post["url"], post["author_username"])
if key not in unique_posts:
unique_posts[key] = post
else:
logging.debug(f"Skipping duplicate post: {post['title']}")
except (KeyError, ValueError) as e:
logging.warning(f"Skipping post due to invalid format: {e}")
continue
with open(RECENT_POSTS_FILE, 'r') as f:
for line in f:
if line.strip():
try:
entry = json.loads(line.strip())
posts.append(entry)
except json.JSONDecodeError as e:
logging.warning(f"Skipping invalid JSON line in {RECENT_POSTS_FILE}: {e}")
deduped_posts = list(unique_posts.values())
logging.info(f"Loaded {len(deduped_posts)} unique posts from {RECENT_POSTS_FILE}")
return deduped_posts
return posts
def filter_posts_for_week(posts, start_date, end_date):
"""Filter posts within the given week range."""
filtered_posts = []
for post in posts:
try:
post_date = datetime.fromisoformat(post["timestamp"])
logging.debug(f"Checking post: title={post['title']}, timestamp={post_date}, in range {start_date} to {end_date}")
if start_date <= post_date <= end_date:
filtered_posts.append(post)
logging.debug(f"Included post: {post['title']}")
else:
logging.debug(f"Excluded post: {post['title']} (timestamp {post_date} outside range)")
except (KeyError, ValueError) as e:
logging.warning(f"Skipping post due to invalid format: {e}")
continue
logging.info(f"Filtered to {len(filtered_posts)} posts for the week")
timestamp = datetime.fromisoformat(post["timestamp"])
if start_date <= timestamp <= end_date:
filtered_posts.append(post)
return filtered_posts
def generate_weekly_thread():
"""Generate weekly thread content for each author and save to file on Mondays."""
logging.info("Starting foodie_weekly_thread.py")
def generate_intro_tweet(author):
author_handle = author["handle"]
prompt = (
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
f"Introduce a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
f"Make it engaging, create curiosity, and include a call to action to visit InsiderFoodie.com, follow {author_handle}, or like the thread. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
return tweet
except Exception as e:
logging.warning(f"Failed to generate intro tweet for {author['username']}: {e}")
# Fallback template
return (
f"This weeks top 10 foodie finds by {author_handle} Check out the best on InsiderFoodie.com "
f"Follow {author_handle} for more and like this thread to stay in the loop Visit us at https://insiderfoodie.com"
)
# Check if today is Monday
def post_weekly_thread():
# Determine the date range (Monday to Sunday of the past week)
today = datetime.now(timezone.utc)
if today.weekday() != 0: # 0 = Monday
logging.info(f"Today is not Monday (weekday: {today.weekday()}), skipping weekly thread")
return
days_since_monday = (today.weekday() + 1) % 7 + 7 # Go back to previous Monday
start_date = (today - timedelta(days=days_since_monday)).replace(hour=0, minute=0, second=0, microsecond=0)
end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59)
# Calculate date range: 7 days prior to run date
start_date = (today - timedelta(days=7)).replace(hour=0, minute=0, second=0, microsecond=0)
end_date = (today - timedelta(days=1)).replace(hour=23, minute=59, second=59, microsecond=999999)
logging.info(f"Fetching posts from {start_date} to {end_date}")
# Load and filter posts
recent_posts = load_json_file(RECENT_POSTS_FILE)
logging.info(f"Loaded {len(recent_posts)} posts from {RECENT_POSTS_FILE}")
# Deduplicate posts
seen = set()
deduped_posts = []
for post in recent_posts:
key = (post["title"], post["url"], post["author_username"])
if key not in seen:
seen.add(key)
deduped_posts.append(post)
logging.info(f"Filtered to {len(deduped_posts)} unique posts after deduplication")
weekly_posts = filter_posts_for_week(deduped_posts, start_date, end_date)
if not weekly_posts:
logging.warning(f"No posts found within the week range {start_date} to {end_date}, exiting generate_weekly_thread")
return
all_posts = load_recent_posts()
weekly_posts = filter_posts_for_week(all_posts, start_date, end_date)
# Group posts by author
posts_by_author = {author["username"]: [] for author in AUTHORS}
posts_by_author = {}
for post in weekly_posts:
username = post["author_username"]
if username in posts_by_author:
posts_by_author[username].append(post)
# Generate thread content for each author
thread_content = []
timestamp = datetime.now(timezone.utc).isoformat()
author = post["author_username"] # Updated to match the key in recent_posts.json
if author not in posts_by_author:
posts_by_author[author] = []
posts_by_author[author].append(post)
# For each author, post a thread
for author in AUTHORS:
username = author["username"]
author_posts = posts_by_author.get(username, [])
author_posts = posts_by_author.get(author["username"], [])
if not author_posts:
logging.info(f"No posts found for {username}, skipping")
logging.info(f"No posts found for {author['username']} this week")
continue
# Select top 2 posts (to fit within 3-tweet limit: lead + 2 posts)
author_posts = sorted(author_posts, key=lambda x: datetime.fromisoformat(x["timestamp"]), reverse=True)
selected_posts = author_posts[:2]
logging.info(f"Found {len(author_posts)} posts for {username}, selected {len(selected_posts)}")
# Sort by timestamp (as a proxy for interest_score) and take top 10
author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
top_posts = author_posts[:10]
# Generate thread content
try:
# Generate intro tweet
intro_tweet = generate_intro_tweet(author)
if not intro_tweet:
logging.error(f"Failed to generate intro tweet for {username}, skipping")
continue
# Generate thread tweets (up to 2)
thread_tweets = []
for i, post in enumerate(selected_posts, 1):
thread_tweet = (
f"{i}. {post['title']} "
f"Read more: {post['url']}"
)
if len(thread_tweet) > 280:
thread_tweet = f"{i}. {post['title'][:200]}... Read more: {post['url']}"
thread_tweets.append(thread_tweet)
logging.info(f"Generated thread tweet {i} for {username}: {thread_tweet}")
# Generate final CTA tweet
final_cta = generate_final_cta(author)
if not final_cta:
logging.error(f"Failed to generate final CTA tweet for {username}, using fallback")
final_cta = (
f"Want more foodie insights? Visit insiderfoodie.com and follow {X_API_CREDENTIALS[username]['x_username']} "
f"for top 10 foodie trends every Monday."
)
if not top_posts:
continue
# Collect thread content for this author
author_thread = {
"username": username,
"x_handle": X_API_CREDENTIALS[username]["x_username"],
"intro_tweet": intro_tweet,
"thread_tweets": thread_tweets,
"final_cta": final_cta,
"timestamp": timestamp
}
thread_content.append(author_thread)
logging.info(f"Generated thread content for {username}")
# First tweet: Intro with CTA (generated by GPT)
intro_tweet = generate_intro_tweet(author)
except Exception as e:
logging.error(f"Error generating thread content for {username}: {e}", exc_info=True)
logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}")
intro_response = post_tweet(author, intro_tweet)
if not intro_response:
logging.warning(f"Failed to post intro tweet for {author['username']}")
continue
# Save thread content to file, overwriting any existing content
if thread_content:
try:
# Backup existing file before overwriting
if os.path.exists(WEEKLY_THREADS_FILE):
backup_dir = "/home/shane/foodie_automator/backups"
os.makedirs(backup_dir, exist_ok=True)
backup_file = f"{backup_dir}/weekly_threads_{timestamp.replace(':', '-')}.json"
shutil.copy(WEEKLY_THREADS_FILE, backup_file)
logging.info(f"Backed up existing {WEEKLY_THREADS_FILE} to {backup_file}")
intro_tweet_id = intro_response.get("id")
# Save new thread content, overwriting the file
thread_data = {
"week_start": start_date.isoformat(),
"week_end": end_date.isoformat(),
"timestamp": timestamp,
"threads": thread_content
}
save_json_file(WEEKLY_THREADS_FILE, thread_data)
logging.info(f"Saved thread content for {len(thread_content)} authors to {WEEKLY_THREADS_FILE}")
except Exception as e:
logging.error(f"Failed to save thread content to {WEEKLY_THREADS_FILE}: {e}")
else:
logging.warning("No thread content generated, nothing to save")
logging.info("Completed foodie_weekly_thread.py")
# Post each top post as a reply in the thread
for i, post in enumerate(top_posts, 1):
post_tweet_content = (
f"{i}. {post['title']} Link: {post['url']}"
)
logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
post_tweet(author, post_tweet_content, reply_to_id=intro_tweet_id)
def main():
"""Main function to run the script."""
lock_fd = None
try:
lock_fd = acquire_lock()
setup_logging()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
generate_weekly_thread()
update_system_activity(SCRIPT_NAME, "stopped") # Record stop
except Exception as e:
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sys.exit(1)
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
logging.info(f"Successfully posted weekly thread for {author['username']}")
if __name__ == "__main__":
main()
# Run only on Sundays
if datetime.now(timezone.utc).weekday() == 6: # Sunday (0 = Monday, 6 = Sunday)
post_weekly_thread()
else:
logging.info("Not Sunday - skipping weekly thread posting")

@ -9,7 +9,7 @@ import os
from datetime import datetime, timezone, timedelta
from openai import OpenAI
from foodie_config import OPENAI_API_KEY, AUTHORS, LIGHT_TASK_MODEL, PERSONA_CONFIGS, AUTHOR_BACKGROUNDS_FILE
from foodie_utils import load_json_file, post_tweet, check_author_rate_limit
from foodie_utils import load_json_file, post_tweet
from dotenv import load_dotenv
load_dotenv()
@ -93,37 +93,17 @@ def generate_engagement_tweet(author, persona):
return tweet
except Exception as e:
logging.error(f"Failed to generate engagement tweet for {author['username']}: {e}")
return f"What's your take on {theme}? Let's talk!"
def get_next_author_round_robin():
for author in AUTHORS:
# Check if the author can post before generating the tweet
can_post, remaining, reset = check_author_rate_limit(author)
if can_post:
return author
return None
return f"What’s your take on {theme}? Let’s talk! #FoodieTrends"
def main():
global is_posting
logging.info("***** X Poster Launched *****")
# Get next available author using round-robin
author = get_next_author_round_robin()
if not author:
logging.info("No authors available due to rate limits")
return random.randint(600, 1800)
is_posting = True
try:
for author in AUTHORS:
is_posting = True
tweet = generate_engagement_tweet(author, author["persona"])
if post_tweet(author, tweet):
logging.info(f"Successfully posted engagement tweet for {author['username']}")
else:
logging.warning(f"Failed to post engagement tweet for {author['username']}")
except Exception as e:
logging.error(f"Error posting engagement tweet for {author['username']}: {e}", exc_info=True)
finally:
post_tweet(author, tweet)
is_posting = False
time.sleep(random.uniform(3600, 7200))
logging.info("X posting completed")
return random.randint(600, 1800)

@ -1,195 +1,95 @@
#!/bin/bash
# Directory to monitor
BASE_DIR="/home/shane/foodie_automator"
LOG_DIR="$BASE_DIR/logs"
LOCK_DIR="$BASE_DIR/locks"
LOG_FILE="$LOG_DIR/manage_scripts.log"
VENV_PYTHON="$BASE_DIR/venv/bin/python"
CHECKSUM_FILE="$BASE_DIR/.file_checksum"
LOG_FILE="$BASE_DIR/manage_scripts.log"
mkdir -p "$LOG_DIR" "$LOCK_DIR" || { echo "Error: Failed to create directories"; exit 1; }
# Log function
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
echo "$1"
}
# Calculate checksum of files (excluding logs, JSON files, and venv)
calculate_checksum() {
find "$BASE_DIR" -type f \
-not -path "$BASE_DIR/logs/*" \
-not -path "$BASE_DIR/*.log" \
-not -path "$BASE_DIR/*.json" \
-not -path "$BASE_DIR/.file_checksum" \
-not -path "$BASE_DIR/venv/*" \
-not -path "$BASE_DIR/locks/*" \
-exec sha256sum {} \; | sort | sha256sum | awk '{print $1}'
}
# Check if scripts are running
check_running() {
local script_name="$1"
local lock_file="$LOCK_DIR/${script_name}.lock"
if [ -f "$lock_file" ]; then
local pid=$(cat "$lock_file")
if ps -p "$pid" > /dev/null; then
log "$script_name is already running (PID: $pid)"
return 0
else
log "Stale lock file for $script_name, removing"
rm -f "$lock_file"
fi
fi
return 1
}
run_script() {
local script="$1"
local script_name="${script%.py}"
local script_log="$LOG_DIR/${script_name}.log"
if check_running "$script_name"; then
echo "0" # Skip sleep
return 1
fi
log "Running $script..."
"$VENV_PYTHON" "$BASE_DIR/$script" >> "$script_log" 2>&1 &
local pid=$!
echo "$pid" > "$LOCK_DIR/${script_name}.lock"
wait "$pid"
local exit_code=$?
if [ $exit_code -eq 0 ]; then
log "$script completed successfully"
else
log "$script failed with exit code $exit_code"
fi
sleep_time=$(grep "sleep_time:" "$script_log" | tail -n 1 | grep -oP 'sleep_time: \K[0-9]+' || echo $((RANDOM % 601 + 1200)))
log "$script completed, sleep_time: $sleep_time seconds"
rm -f "$LOCK_DIR/${script_name}.lock"
echo "$sleep_time"
pgrep -f "python3.*foodie_automator" > /dev/null
}
# Stop scripts
stop_scripts() {
log "Stopping scripts..."
for script in foodie_automator_rss.py foodie_automator_reddit.py foodie_automator_google.py; do
pkill -TERM -f "python3.*foodie_automator" || true
sleep 10
pkill -9 -f "python3.*foodie_automator" || true
log "Scripts stopped."
}
# Start scripts
start_scripts() {
log "Starting scripts..."
cd "$BASE_DIR"
source venv/bin/activate
# Find all foodie_automator_*.py scripts and start them
for script in foodie_automator_*.py; do
if [ -f "$script" ]; then
local script_name="${script%.py}"
if pkill -TERM -f "$VENV_PYTHON.*$script_name"; then
log "Sent TERM to $script_name"
sleep 2
pkill -9 -f "$VENV_PYTHON.*$script_name" || true
else
log "No running $script_name found"
fi
rm -f "$LOCK_DIR/${script_name}.lock"
log "Removed lock file for $script_name"
log "Starting $script..."
nohup python3 "$script" >> "${script%.py}.log" 2>&1 &
fi
done
log "Scripts stopped."
log "All scripts started."
}
# Update dependencies
update_dependencies() {
log "Updating dependencies..."
cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
cd "$BASE_DIR"
# Create venv if it doesn't exist
if [ ! -d "venv" ]; then
python3 -m venv venv
log "Created new virtual environment"
fi
source "$BASE_DIR/venv/bin/activate"
source venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt || (pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager && log "Fallback: Installed core dependencies")
log "Dependencies updated."
}
if [ "$1" == "stop" ]; then
log "Received stop command, stopping all scripts..."
stop_scripts
for script in foodie_engagement_generator.py foodie_weekly_thread.py; do
local script_name="${script%.py}"
if pkill -TERM -f "$VENV_PYTHON.*$script_name"; then
log "Sent TERM to $script_name"
sleep 2
pkill -9 -f "$VENV_PYTHON.*$script_name" || true
else
log "No running $script_name found"
fi
rm -f "$LOCK_DIR/$script_name.lock"
log "Stopped $script_name"
done
log "All scripts stopped. Reminder: Disable cron jobs (crontab -e)."
exit 0
fi
if [ "$1" == "start" ]; then
log "Received start command, starting all scripts..."
cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
source "$BASE_DIR/venv/bin/activate"
if [ -f "$BASE_DIR/.env" ]; then
while IFS='=' read -r key value; do
if [[ ! -z "$key" && ! "$key" =~ ^# ]]; then
export "$key=$value"
fi
done < <(grep -v '^#' "$BASE_DIR/.env")
log ".env variables loaded"
else
log "Error: .env file not found"
exit 1
fi
for script in foodie_automator_rss.py foodie_automator_reddit.py foodie_automator_google.py; do
if [ -f "$script" ]; then
sleep_time=$(run_script "$script" | tail -n 1)
if [ "$sleep_time" != "0" ]; then
log "Sleeping for $sleep_time seconds after $script"
sleep "$sleep_time"
fi
else
log "Script $script not found"
fi
done
if [ -f "foodie_engagement_generator.py" ]; then
if ! check_running "foodie_engagement_generator"; then
log "Running foodie_engagement_generator.py..."
"$VENV_PYTHON" "foodie_engagement_generator.py" >> "$LOG_DIR/foodie_engagement_generator.log" 2>&1
log "foodie_engagement_generator.py completed"
fi
fi
log "All scripts started. Ensure cron jobs are enabled (crontab -l)."
exit 0
fi
# Main logic
log "Checking for file changes..."
CURRENT_CHECKSUM=$(calculate_checksum)
if [ -f "$CHECKSUM_FILE" ]; then
PREVIOUS_CHECKSUM=$(cat "$CHECKSUM_FILE")
else
PREVIOUS_CHECKSUM=""
fi
if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then
log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM"
if pgrep -f "$VENV_PYTHON.*foodie_automator" > /dev/null; then
# Stop scripts if running
if check_running; then
stop_scripts
fi
# Update dependencies
update_dependencies
# Start scripts
start_scripts
# Update checksum
echo "$CURRENT_CHECKSUM" > "$CHECKSUM_FILE"
log "Checksum updated."
fi
cd "$BASE_DIR"
source "$BASE_DIR/venv/bin/activate"
if [ -f "$BASE_DIR/.env" ]; then
while IFS='=' read -r key value; do
if [[ ! -z "$key" && ! "$key" =~ ^# ]]; then
export "$key=$value"
fi
done < <(grep -v '^#' "$BASE_DIR/.env")
log ".env variables loaded"
else
log "Error: .env file not found"
exit 1
log "No file changes detected."
fi
for script in foodie_automator_rss.py foodie_automator_reddit.py foodie_automator_google.py; do
if [ -f "$script" ]; then
sleep_time=$(run_script "$script" | tail -n 1)
if [ "$sleep_time" != "0" ]; then
log "Sleeping for $sleep_time seconds after $script"
sleep "$sleep_time"
fi
else
log "Script $script not found"
fi
done
log "All scripts processed."
exit 0

@ -8,8 +8,6 @@ Pillow==11.1.0
pytesseract==0.3.13
feedparser==6.0.11
webdriver-manager==4.0.2
tweepy==4.15.0
python-dotenv==1.1.0
flickr-api==0.7.7
filelock==3.16.1
requests-oauthlib==2.0.0
tweepy==4.14.0
python-dotenv==1.0.1
flickr-api==0.7.1
Loading…
Cancel
Save