Compare commits

..

142 Commits

Author SHA1 Message Date
Shane 23c6c42c51 fix 7 months ago
Shane e2e5adbff5 update to post all authors 7 months ago
Shane 3fc99a8a28 try 7 months ago
Shane eb27a036c4 fix 7 months ago
Shane 3cd0a9cfb6 add debug 7 months ago
Shane f28d529ac9 remove foodie_engagement_tweet 7 months ago
Shane b0f11666d3 try fix running status 7 months ago
Shane 889254d151 fix error 7 months ago
Shane e53f3abc1d try 7 months ago
Shane 055544b111 add better category selection 7 months ago
Shane e9913ab659 test 7 months ago
Shane 555fe4799f add reddit threshold 7 months ago
Shane 0a333f5be5 add watermark url 7 months ago
Shane f98340bff6 fix running instead of sleep for X 7 months ago
Shane fc47142a2c try 7 months ago
Shane 470c775d7a try 7 months ago
Shane a193dbacd7 try 7 months ago
Shane b95952563f try 7 months ago
Shane c30fa1108d try 7 months ago
Shane b77212d88f change email alerts 7 months ago
Shane 827adb4730 convert to sydney time 7 months ago
Shane f0c84f8660 try 7 months ago
Shane 4ffcebd288 try 7 months ago
Shane 491dcc8883 try 7 months ago
Shane 46c86fc82d try 7 months ago
Shane 04a219ed8a try 7 months ago
Shane b8ab6dded7 try 7 months ago
Shane 1010a8cb2a try 7 months ago
Shane 10f918ae24 try 7 months ago
Shane 49835f351c add debug 7 months ago
Shane e72a3673fa try 7 months ago
Shane 7c7c9a7b0a try 7 months ago
Shane eff6f585bb fix 7 months ago
Shane 05f2dfed06 try 7 months ago
Shane c7ccf8aed4 try 7 months ago
Shane 54314609d8 fix rate limit getting info 7 months ago
Shane 66fab42c73 add full capacity email X posting 7 months ago
Shane 173897d6eb use file instead of API checks for rate limit X 7 months ago
Shane 7833cf443a add email alert for low rate limit X 7 months ago
Shane d9da9af095 check 7 months ago
Shane 33287c8a4e add new X post check 7 months ago
Shane f7b84c5de8 update stock images watermark urls 7 months ago
Shane 69eaed4464 try 7 months ago
Shane 12b389fe2b update OCR to black n white 7 months ago
Shane 9c15c1b658 remove requirements install 7 months ago
Shane 12383c6d4e update 7 months ago
Shane 964e6d1816 update 7 months ago
Shane 599d352cbd fix image error, and requirements file 7 months ago
Shane f47a9f1249 comment out weekly tweet start 7 months ago
Shane a7e7a5dad4 update manage_scripts.sh to random run scripts at different times 7 months ago
Shane 7d2b4938d0 improve insert link naturally with cursor 7 months ago
Shane 3edc8135f3 use cursor to fix insert_link_naturally 7 months ago
Shane 5f38374abd OCR images url filter 7 months ago
Shane 6e0f8b4759 fix better images 7 months ago
Shane 9870d276a3 new specific_term functionality 7 months ago
Shane 3b1b030025 fix rss script pixabay 7 months ago
Shane 5cd45cf67f try 7 months ago
Shane a130c65edf try 7 months ago
Shane c36eac7587 try 7 months ago
Shane d54e640644 try 7 months ago
Shane 2554693895 update title filter for reddit homemade 7 months ago
Shane c89a9df6e2 update insert link naturally 7 months ago
Shane 692811190e update categories to add Buzz 7 months ago
Shane 37f9fdcc44 remove emojies 7 months ago
Shane aa27d344af try 7 months ago
Shane e974bd1262 try 7 months ago
Shane 8a24a93878 add engagement manual posting to X 7 months ago
Shane 6346e29b8f update 7 months ago
Shane ba8d54e0fe change weekly to manual posting for X 7 months ago
Shane 4be19ef116 update reddit link in post 7 months ago
Shane 68b2459da4 update reddit feeds 7 months ago
Shane 8c7049fa4c fix 7 months ago
Shane e972714ada fix 7 months ago
Shane 941fe12ec5 add new system_activity.json for rate limit X posts 7 months ago
Shane 765967fb8c skip youtube 7 months ago
Shane eeff0d9861 watermark detection 7 months ago
Shane 07a68837a3 Minimum Resolution Filter 1280px 7 months ago
Shane b5417f3397 try 7 months ago
Shane 677c9b646d try to fix the image upload 7 months ago
Shane ee21e5bf6b fix 7 months ago
Shane 55d2cf81e4 fix 7 months ago
Shane 071726f016 stop using flickr API for images 7 months ago
Shane 7c69b4a451 updage google regions 7 months ago
Shane 7dafac8615 Rate Limit Handling 7 months ago
Shane 903dbf21d0 try 7 months ago
Shane 9806ecfa25 try 7 months ago
Shane e2fec73a72 try 7 months ago
Shane 7950ddd0d8 fix 7 months ago
Shane 1d4fe844c3 check all authors on start rate limit X 7 months ago
Shane 5561516481 update write to rate limit file 7 months ago
Shane fb3adcdc4e adjust tweet length 7 months ago
Shane 99403e7cfe fix 7 months ago
Shane aa7d3aacbd replace the existing post_to_wp function 7 months ago
Shane d7593f7fa7 update better tweet for article summary 7 months ago
Shane 00e6354cff add check once for rate limiting X 7 months ago
Shane 532dd30f65 fix author X post to use persona 7 months ago
Shane ac50299b94 try 7 months ago
Shane 83e69a35b7 try 7 months ago
Shane 5ea9f20dd8 try 7 months ago
Shane 82f4a1d8b1 try 7 months ago
Shane ae194b502f fix 7 months ago
Shane c97425f5e2 fix 7 months ago
Shane 447bfb0087 update real time rate limiting checks for X 7 months ago
Shane 01bab56eb6 update google & reddit rate limiting realtime 7 months ago
Shane bfeec7a560 try 7 months ago
Shane 5f03aabde4 update 7 months ago
Shane 753934db4f update 7 months ago
Shane 167506ef30 update realtime rate limit for X 7 months ago
Shane 3405572ab0 fix 7 months ago
Shane 2158c780ca add max tweet to author limit 7 months ago
Shane 4adaa3442c fix 7 months ago
Shane e7a06e3375 fix all json formatting 7 months ago
Shane a407ece36b fix 7 months ago
Shane dd4eeaed10 fix 7 months ago
Shane 77743121b5 number of entries and each entry’s timestamp 7 months ago
Shane 2041084962 dictionary structure of X_API_CREDENTIALS 7 months ago
Shane 8825d7a9f8 update username X 7 months ago
Shane f6ab7e78d3 balanced author selection logic 7 months ago
Shane 4da83f1d4b fix 7 months ago
Shane ad21bac601 fix 7 months ago
Shane dbe76795c2 fix 7 months ago
Shane 4368bf68a5 fix image upload issue 7 months ago
Shane 028dfc3fc8 add lock files and update weekly tweet to include last tweet to follow 7 months ago
Shane 331979ca9e add logging 7 months ago
Shane 1091ed34c2 test 7 months ago
Shane 9a091a4fa4 test new script 7 months ago
Shane 79f357269d test weekly twet 7 months ago
Shane 256b6c8bad import os 7 months ago
Shane 61b3de52a2 update double tweet & other scripts to new code 7 months ago
Shane 4116d5f742 add more subreddits 7 months ago
Shane 2ecab209c5 fix insert link naturally 7 months ago
Shane 3d0d320648 fix 7 months ago
Shane 504d7f6349 fix 7 months ago
Shane ccddefbc8b try 7 months ago
Shane d2022222c3 try 7 months ago
Shane 7fba0fe96a fix 7 months ago
Shane 6be8493878 fix 7 months ago
Shane e445b6ef33 fix 7 months ago
Shane 5554abdc4a fix 7 months ago
Shane 64d17d5599 try 7 months ago
Shane aa0f3364d5 fix image swap 7 months ago
Shane e5ebd000fe incorporate external context from DDG 7 months ago
  1. 129
      check_rate_limits.py
  2. 191
      check_x_capacity.py
  3. 532
      foodie_automator_google.py
  4. 774
      foodie_automator_reddit.py
  5. 600
      foodie_automator_rss.py
  6. 78
      foodie_config.py
  7. 236
      foodie_engagement_generator.py
  8. 370
      foodie_engagement_tweet.py
  9. 1785
      foodie_utils.py
  10. 458
      foodie_weekly_thread.py
  11. 32
      foodie_x_poster.py
  12. 190
      manage_scripts.sh
  13. 8
      requirements.txt

@ -0,0 +1,129 @@
import logging
logging.basicConfig(
filename='/home/shane/foodie_automator/logs/check_x_capacity.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
import requests
from requests_oauthlib import OAuth1
from datetime import datetime, timezone
from dotenv import load_dotenv
import os
import time
from foodie_config import X_API_CREDENTIALS
# Load environment variables from .env file
load_dotenv()
# Function to delete a tweet
def delete_tweet(tweet_id, auth):
try:
response = requests.delete(f"https://api.x.com/2/tweets/{tweet_id}", auth=auth)
response.raise_for_status()
logging.info(f"Successfully deleted tweet {tweet_id}")
return True
except Exception as e:
logging.error(f"Failed to delete tweet {tweet_id}: {e}")
return False
# Function to check rate limits for a given author
def check_rate_limits_for_author(username, credentials, retry=False):
logging.info(f"{'Retrying' if retry else 'Checking'} rate limits for {username} (handle: {credentials['x_username']})")
# Retrieve OAuth 1.0a credentials for the author
consumer_key = credentials["api_key"]
consumer_secret = credentials["api_secret"]
access_token = credentials["access_token"]
access_token_secret = credentials["access_token_secret"]
# Validate credentials
if not all([consumer_key, consumer_secret, access_token, access_token_secret]):
logging.error(f"Missing OAuth credentials for {username} in X_API_CREDENTIALS.")
return None
# Set up OAuth 1.0a authentication
auth = OAuth1(consumer_key, consumer_secret, access_token, access_token_secret)
# Add delay to avoid IP-based rate limiting
logging.info(f"Waiting 5 seconds before attempting to post for {username}")
time.sleep(5)
# Try posting a test tweet to get v2 rate limit headers
tweet_id = None
try:
tweet_data = {"text": f"Test tweet to check rate limits for {username} - please ignore"}
response = requests.post("https://api.x.com/2/tweets", json=tweet_data, auth=auth)
response.raise_for_status()
tweet_id = response.json()['data']['id']
logging.info("Successfully posted test tweet for %s: %s", username, response.json())
logging.info("Response Headers for %s: %s", username, response.headers)
# Extract rate limit headers if present
app_limit = response.headers.get('x-app-limit-24hour-limit', 'N/A')
app_remaining = response.headers.get('x-app-limit-24hour-remaining', 'N/A')
app_reset = response.headers.get('x-app-limit-24hour-reset', 'N/A')
logging.info("App 24-Hour Tweet Limit for %s: %s", username, app_limit)
logging.info("App 24-Hour Tweets Remaining for %s: %s", username, app_remaining)
if app_reset != 'N/A':
reset_time = datetime.fromtimestamp(int(app_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
logging.info("App 24-Hour Reset (Readable) for %s: %s", username, reset_time)
return tweet_id
except requests.exceptions.HTTPError as e:
logging.info("Test Tweet Response Status Code for %s: %s", username, e.response.status_code)
logging.info("Test Tweet Response Headers for %s: %s", username, e.response.headers)
if e.response.status_code == 429:
logging.info("Rate Limit Exceeded for /2/tweets for %s", username)
# Extract user-specific 24-hour limits
user_limit = e.response.headers.get('x-user-limit-24hour-limit', 'N/A')
user_remaining = e.response.headers.get('x-user-limit-24hour-remaining', 'N/A')
user_reset = e.response.headers.get('x-user-limit-24hour-reset', 'N/A')
logging.info("User 24-Hour Tweet Limit for %s: %s", username, user_limit)
logging.info("User 24-Hour Tweets Remaining for %s: %s", username, user_remaining)
logging.info("User 24-Hour Reset (Timestamp) for %s: %s", username, user_reset)
if user_reset != 'N/A':
reset_time = datetime.fromtimestamp(int(user_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
logging.info("User 24-Hour Reset (Readable) for %s: %s", username, reset_time)
# Extract app-specific 24-hour limits
app_limit = e.response.headers.get('x-app-limit-24hour-limit', 'N/A')
app_remaining = e.response.headers.get('x-app-limit-24hour-remaining', 'N/A')
app_reset = e.response.headers.get('x-app-limit-24hour-reset', 'N/A')
logging.info("App 24-Hour Tweet Limit for %s: %s", username, app_limit)
logging.info("App 24-Hour Tweets Remaining for %s: %s", username, app_remaining)
logging.info("App 24-Hour Reset (Timestamp) for %s: %s", username, app_reset)
if app_reset != 'N/A':
reset_time = datetime.fromtimestamp(int(app_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
logging.info("App 24-Hour Reset (Readable) for %s: %s", username, reset_time)
return None
except Exception as e:
logging.error("Failed to post test tweet for %s: %s", username, e)
return None
# Main loop to check rate limits for all authors
if __name__ == "__main__":
# First pass: Attempt to post for all authors
successful_tweets = {}
for username, credentials in X_API_CREDENTIALS.items():
tweet_id = check_rate_limits_for_author(username, credentials)
if tweet_id:
successful_tweets[username] = (tweet_id, credentials)
logging.info("-" * 50)
# Delete successful tweets to free up quota
for username, (tweet_id, credentials) in successful_tweets.items():
auth = OAuth1(
credentials["api_key"],
credentials["api_secret"],
credentials["access_token"],
credentials["access_token_secret"]
)
delete_tweet(tweet_id, auth)
# Second pass: Retry for authors that failed
logging.info("Retrying for authors that initially failed...")
for username, credentials in X_API_CREDENTIALS.items():
if username not in successful_tweets:
check_rate_limits_for_author(username, credentials, retry=True)
logging.info("-" * 50)

@ -0,0 +1,191 @@
#!/usr/bin/env python3
import logging
import pytz
logging.basicConfig(
filename='/home/shane/foodie_automator/logs/check_x_capacity.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logging.info("TEST: Logging is configured and working.")
logger = logging.getLogger(__name__)
from datetime import datetime, timezone
from foodie_utils import (
AUTHORS, check_author_rate_limit, load_json_file,
get_x_rate_limit_status, update_system_activity, is_any_script_running,
save_json_file
)
import time
import sys
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from foodie_config import EMAIL_CONFIG
# File to track sent notifications
NOTIFICATION_FILE = '/home/shane/foodie_automator/notification_tracking.json'
def load_notification_tracking():
"""Load notification tracking data as a dict. If not a dict, reset to {}."""
data = load_json_file(NOTIFICATION_FILE, default={})
if not isinstance(data, dict):
logging.warning(f"notification_tracking.json was not a dict, resetting to empty dict.")
data = {}
save_json_file(NOTIFICATION_FILE, data)
return data
def save_notification_tracking(tracking_data):
"""Save notification tracking data as a dict."""
if not isinstance(tracking_data, dict):
logging.warning(f"Attempted to save non-dict to notification_tracking.json, resetting to empty dict.")
tracking_data = {}
save_json_file(NOTIFICATION_FILE, tracking_data)
def should_send_notification(username, reset_time):
"""Check if we should send a notification for this author."""
tracking = load_notification_tracking()
author_data = tracking.get(username, {})
reset_time_str = str(reset_time)
logging.debug(f"[DEBUG] should_send_notification: username={username}, reset_time_str={reset_time_str}, author_data={author_data}")
if not author_data or str(author_data.get('reset_time')) != reset_time_str:
logging.info(f"[DEBUG] Sending notification for {username}. Previous reset_time: {author_data.get('reset_time')}, New reset_time: {reset_time_str}")
tracking[username] = {
'last_notification': datetime.now(timezone.utc).isoformat(),
'reset_time': reset_time_str
}
save_notification_tracking(tracking)
return True
logging.info(f"[DEBUG] Skipping notification for {username}. Already notified for reset_time: {reset_time_str}")
return False
def send_capacity_alert(username, remaining, reset_time):
"""Send email alert when an author's tweet capacity is full."""
# Always use string for reset_time
reset_time_str = str(reset_time)
logging.debug(f"[DEBUG] send_capacity_alert: username={username}, remaining={remaining}, reset_time_str={reset_time_str}")
if not should_send_notification(username, reset_time_str):
logger.info(f"Skipping duplicate notification for {username}")
return
try:
msg = MIMEMultipart()
msg['From'] = EMAIL_CONFIG['from_email']
msg['To'] = EMAIL_CONFIG['to_email']
msg['Subject'] = f" X Capacity Alert: {username}"
body = f"""
X Tweet Capacity Alert!
Username: {username}
Time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}
Remaining Tweets: {remaining}/17
Reset Time: {reset_time_str}
This author has reached their daily tweet limit.
The quota will reset at the time shown above.
This is an automated alert from your foodie_automator system.
"""
msg.attach(MIMEText(body, 'plain'))
with smtplib.SMTP(EMAIL_CONFIG['smtp_server'], EMAIL_CONFIG['smtp_port']) as server:
server.starttls()
server.login(EMAIL_CONFIG['smtp_username'], EMAIL_CONFIG['smtp_password'])
server.send_message(msg)
logger.info(f"Sent capacity alert email for {username}")
except Exception as e:
logger.error(f"Failed to send capacity alert email: {e}")
def display_author_status(author):
"""Display detailed status for a single author."""
username = author['username']
can_post, remaining, reset = check_author_rate_limit(author)
reset_time_utc = datetime.fromtimestamp(reset, tz=timezone.utc)
reset_time_str = reset_time_utc.strftime('%Y-%m-%d %H:%M:%S UTC')
# Convert to Sydney time
try:
sydney_tz = pytz.timezone('Australia/Sydney')
reset_time_sydney = reset_time_utc.astimezone(sydney_tz)
reset_time_sydney_str = reset_time_sydney.strftime('%Y-%m-%d %H:%M:%S %Z')
except Exception as e:
reset_time_sydney_str = 'N/A'
status = "" if can_post else ""
print(f"\n{status} {username}:")
print(f" • Remaining tweets: {remaining}/17")
print(f" • Reset time (UTC): {reset_time_str}")
print(f" • Reset time (Sydney): {reset_time_sydney_str}")
print(f" • Can post: {'Yes' if can_post else 'No'}")
# Send alert if capacity is full
if remaining == 0:
send_capacity_alert(username, remaining, reset_time_str)
# Show API status for verification
if not is_any_script_running():
api_remaining, api_reset = get_x_rate_limit_status(author)
if api_remaining is not None:
api_reset_time = datetime.fromtimestamp(api_reset, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
print(f" • API Status: {api_remaining} remaining, resets at {api_reset_time}")
def display_total_capacity():
"""Display total capacity across all authors."""
total_capacity = len(AUTHORS) * 17
total_used = 0
available_authors = 0
print("\n=== X Posting Capacity Status ===")
print(f"Total daily capacity: {total_capacity} tweets ({len(AUTHORS)} authors × 17 tweets)")
print("\nAuthor Status:")
for author in AUTHORS:
can_post, remaining, _ = check_author_rate_limit(author)
# Only check API if no scripts are running
if not is_any_script_running():
api_remaining, _ = get_x_rate_limit_status(author)
if api_remaining is not None:
remaining = api_remaining
can_post = remaining > 0
used = 17 - remaining
total_used += used
if can_post:
available_authors += 1
display_author_status(author)
print("\n=== Summary ===")
print(f"Total tweets used today: {total_used}")
print(f"Total tweets remaining: {total_capacity - total_used}")
print(f"Authors available to post: {available_authors}/{len(AUTHORS)}")
# Calculate percentage used
percent_used = (total_used / total_capacity) * 100
print(f"Capacity used: {percent_used:.1f}%")
if percent_used > 80:
print("\n Warning: High capacity usage! Consider adding more authors.")
elif percent_used > 60:
print("\n Note: Moderate capacity usage. Monitor usage patterns.")
def main():
try:
# Update system activity
update_system_activity("check_x_capacity", "running", os.getpid())
# Display capacity status
display_total_capacity()
# Update system activity
update_system_activity("check_x_capacity", "stopped")
except KeyboardInterrupt:
print("\nScript interrupted by user")
update_system_activity("check_x_capacity", "stopped")
sys.exit(0)
except Exception as e:
logger.error(f"Error: {e}")
update_system_activity("check_x_capacity", "stopped")
sys.exit(1)
if __name__ == "__main__":
main()

@ -24,20 +24,37 @@ from foodie_config import (
)
from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, select_best_persona, determine_paragraph_count,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
smart_image_and_filter, insert_link_naturally, get_flickr_image
select_best_author, smart_image_and_filter, get_flickr_image,
get_next_author_round_robin, check_author_rate_limit, update_system_activity
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv
import fcntl
load_dotenv()
# Define constants at the top
SCRIPT_NAME = "foodie_automator_google"
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock"
# Load JSON files after constants are defined
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...")
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
if is_posting:
logging.info("Currently posting, will exit after completion.")
else:
@ -47,26 +64,104 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a')
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_google.py")
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_google.log"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def setup_logging():
try:
# Ensure log directory exists
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
# Check write permissions
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
# Prune old logs
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.debug(f"Log file pruned: {LOG_FILE}")
# Configure logging
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True # Ensure this config takes precedence
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_google.py")
except Exception as e:
# Fallback to console logging if file logging fails
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Console logging initialized as fallback for foodie_automator_google.py")
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_google.py is running")
sys.exit(0)
def parse_search_volume(volume_text):
try:
@ -89,10 +184,11 @@ def scrape_google_trends(geo='US'):
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
driver = webdriver.Chrome(options=chrome_options)
driver = None
try:
for attempt in range(3):
for attempt in range(MAX_RETRIES):
try:
driver = webdriver.Chrome(options=chrome_options)
time.sleep(random.uniform(2, 5))
url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
logging.info(f"Navigating to {url} (attempt {attempt + 1})")
@ -105,10 +201,13 @@ def scrape_google_trends(geo='US'):
break
except TimeoutException:
logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
if attempt == 2:
logging.error(f"Failed after 3 attempts for geo={geo}")
if attempt == MAX_RETRIES - 1:
logging.error(f"Failed after {MAX_RETRIES} attempts for geo={geo}")
return []
time.sleep(5)
time.sleep(RETRY_BACKOFF * (2 ** attempt))
if driver:
driver.quit()
continue
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
@ -145,157 +244,186 @@ def scrape_google_trends(geo='US'):
if trends:
trends.sort(key=lambda x: x["search_volume"], reverse=True)
logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}")
print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}")
else:
logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
return trends
except Exception as e:
logging.error(f"Unexpected error in scrape_google_trends: {e}", exc_info=True)
return []
finally:
driver.quit()
logging.info(f"Chrome driver closed for geo={geo}")
if driver:
driver.quit()
logging.info(f"Chrome driver closed for geo={geo}")
def fetch_duckduckgo_news_context(trend_title, hours=24):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}")
return trend_title
def curate_from_google_trends(geo_list=['US']):
all_trends = []
for geo in geo_list:
trends = scrape_google_trends(geo=geo)
if trends:
all_trends.extend(trends)
if not all_trends:
print("No Google Trends data available")
logging.info("No Google Trends data available")
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
while attempts < max_attempts and all_trends:
trend = all_trends.pop(0)
title = trend["title"]
link = trend.get("link", "https://trends.google.com/")
summary = trend.get("summary", "")
source_name = "Google Trends"
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted trend: {title}")
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
for attempt in range(MAX_RETRIES):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
# Handle both ISO formats with and without timezone
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
return trend_title
print(f"Trying Google Trend: {title} from {source_name}")
logging.info(f"Trying Google Trend: {title} from {source_name}")
def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images):
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
regions = ['US', 'GB', 'AU']
all_trends = []
for geo in regions:
logging.info(f"Scraping Google Trends for geo={geo}")
trends = scrape_google_trends(geo=geo)
if trends:
logging.info(f"Collected {len(trends)} trends for geo={geo}")
all_trends.extend(trends)
else:
logging.warning(f"No trends collected for geo={geo}")
unique_trends = []
seen_titles = set()
for trend in all_trends:
if trend["title"] not in seen_titles:
unique_trends.append(trend)
seen_titles.add(trend["title"])
if not unique_trends:
logging.info("No Google Trends data available across regions")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
unique_trends.sort(key=lambda x: x["search_volume"], reverse=True)
logging.info(f"Total unique trends collected: {len(unique_trends)}")
attempts = 0
max_attempts = 10
while attempts < max_attempts and unique_trends:
trend = unique_trends.pop(0)
title = trend["title"]
link = trend.get("link", "")
summary = trend.get("summary", "")
source_name = trend.get("source", "Google Trends")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
if title in posted_titles:
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
continue
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
if skip:
print(f"Skipping filtered Google Trend: {title}")
logging.info(f"Skipping filtered Google Trend: {title}")
attempts += 1
continue
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
scoring_content = f"{title}\n\n{summary}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
print(f"Google Trends Interest Too Low: {interest_score}")
logging.info(f"Google Trends Interest Too Low: {interest_score}")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Do NOT introduce unrelated concepts.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
logging.info(f"Trying Google Trend: {title} from {source_name}")
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data:
attempts += 1
continue
# Fetch DuckDuckGo context early to enhance smart_image_and_filter
ddg_context = fetch_duckduckgo_news_context(title)
enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip()
if skip:
logging.info(f"Skipping filtered trend: {title}")
attempts += 1
continue
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Trend Interest Too Low: {interest_score}")
attempts += 1
continue
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
should_post_tweet=True
extra_prompt=extra_prompt
)
finally:
is_posting = False
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
if post_id:
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
is_posting = True
try:
post_to_wp(
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
@ -304,11 +432,39 @@ def curate_from_google_trends(geo_list=['US']):
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
should_post_tweet=True,
summary=final_summary
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
finally:
is_posting = False
@ -322,27 +478,51 @@ def curate_from_google_trends(geo_list=['US']):
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
return post_data, category, random.randint(0, 1800)
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Google Trend found after attempts")
logging.info("No interesting Google Trend found after attempts")
return None, None, random.randint(600, 1800)
logging.info("No interesting Google Trend found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
def run_google_trends_automator():
logging.info("***** Google Trends Automator Launched *****")
geo_list = ['US', 'GB', 'AU']
post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list)
if sleep_time is None:
sleep_time = random.randint(600, 1800)
print(f"Sleeping for {sleep_time}s")
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
time.sleep(sleep_time)
return post_data, category, sleep_time
lock_fd = None
try:
lock_fd = acquire_lock()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
logging.info("***** Google Trends Automator Launched *****")
# Load JSON files once
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, sleep_time = curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data:
logging.info("No postable Google Trend found")
logging.info("Completed Google Trends run")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
# Sleep while still marked as running
time.sleep(sleep_time)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop after sleep
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(1200, 1800) # 20–30 minutes
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return None, None, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
run_google_trends_automator()
setup_logging()
post_data, category, sleep_time = run_google_trends_automator()
# logging.info(f"Run completed, sleep_time: {sleep_time} seconds")

@ -8,6 +8,7 @@ import json
import signal
import sys
import re
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from urllib.parse import quote
@ -24,18 +25,24 @@ from foodie_config import (
from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
prepare_post_data, select_best_author, smart_image_and_filter,
get_flickr_image
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image,
get_next_author_round_robin, check_author_rate_limit, update_system_activity
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
import fcntl
load_dotenv()
SCRIPT_NAME = "foodie_automator_reddit"
is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_reddit.lock"
def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...")
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
if is_posting:
logging.info("Currently posting, will exit after completion.")
else:
@ -45,56 +52,10 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_reddit.log"
LOG_PRUNE_DAYS = 30
def setup_logging():
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("prawcore").setLevel(logging.WARNING)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py")
setup_logging()
MAX_RETRIES = 3
RETRY_BACKOFF = 2
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@ -108,238 +69,405 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def clean_reddit_title(title):
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
return cleaned_title
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
def setup_logging():
try:
content = f"Title: {title}\n\nContent: {summary}"
if top_comments:
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
# Ensure log directory exists
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": (
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
"Consider comments for added context (e.g., specific locations or unique details). "
"Return only a number."
)},
{"role": "user", "content": content}
],
max_tokens=5
)
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
engagement_boost = 0
if upvotes >= 500:
engagement_boost += 3
elif upvotes >= 100:
engagement_boost += 2
elif upvotes >= 50:
engagement_boost += 1
# Check write permissions
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
# Prune old logs
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.debug(f"Log file pruned: {LOG_FILE}")
if comment_count >= 100:
engagement_boost += 2
elif comment_count >= 20:
engagement_boost += 1
final_score = min(base_score + engagement_boost, 10)
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})")
return final_score
# Configure logging
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True # Ensure this config takes precedence
)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("prawcore").setLevel(logging.WARNING)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py")
except Exception as e:
logging.error(f"Reddit interestingness scoring failed: {e}")
print(f"Reddit Interest Error: {e}")
return 0
# Fallback to console logging if file logging fails
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Console logging initialized as fallback for foodie_automator_reddit.py")
def get_top_comments(post_url, reddit, limit=3):
def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
submission = reddit.submission(url=post_url)
submission.comment_sort = 'top'
submission.comments.replace_more(limit=0)
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
return top_comments
except Exception as e:
logging.error(f"Failed to fetch comments for {post_url}: {e}")
return []
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_reddit.py is running")
sys.exit(0)
def fetch_reddit_posts():
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
articles = []
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
for subreddit_name in feeds:
def clean_reddit_title(title):
"""Clean Reddit post title by removing prefixes, newlines, and special characters."""
if not title or not isinstance(title, str):
logging.warning(f"Invalid title received: {title}")
return ""
# Remove [prefixes], newlines, and excessive whitespace
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title) # Remove [prefix]
cleaned_title = re.sub(r'\n+', ' ', cleaned_title) # Replace newlines with space
cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip() # Normalize spaces
# Remove special characters (keep alphanumeric, spaces, and basic punctuation)
cleaned_title = re.sub(r'[^\w\s.,!?-]', '', cleaned_title)
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
return cleaned_title
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
for attempt in range(MAX_RETRIES):
try:
subreddit = reddit.subreddit(subreddit_name)
for submission in subreddit.top(time_filter='day', limit=100):
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue
cleaned_title = clean_reddit_title(submission.title)
articles.append({
"title": cleaned_title,
"raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}",
"summary": submission.selftext,
"feed_title": get_clean_source_name(subreddit_name),
"pub_date": pub_date,
"upvotes": submission.score,
"comment_count": submission.num_comments
})
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
except Exception as e:
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
logging.info(f"Total Reddit posts fetched: {len(articles)}")
return articles
content = f"Title: {title}\n\nContent: {summary}"
if top_comments:
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": (
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
"Consider comments for added context (e.g., specific locations or unique details). "
"Return only a number"
)},
{"role": "user", "content": content}
],
max_tokens=5
)
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
def curate_from_reddit():
articles = fetch_reddit_posts()
if not articles:
print("No Reddit posts available")
logging.info("No Reddit posts available")
return None, None, None
engagement_boost = 0
if upvotes >= 500:
engagement_boost += 3
elif upvotes >= 100:
engagement_boost += 2
elif upvotes >= 50:
engagement_boost += 1
if comment_count >= 100:
engagement_boost += 2
elif comment_count >= 20:
engagement_boost += 1
articles.sort(key=lambda x: x["upvotes"], reverse=True)
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
raw_title = article["raw_title"]
link = article["link"]
summary = article["summary"]
source_name = "Reddit"
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
if raw_title in posted_titles:
print(f"Skipping already posted post: {raw_title}")
logging.info(f"Skipping already posted post: {raw_title}")
attempts += 1
continue
print(f"Trying Reddit Post: {title} from {source_name}")
logging.info(f"Trying Reddit Post: {title} from {source_name}")
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
print(f"Skipping filtered Reddit post: {title}")
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
top_comments = get_top_comments(link, reddit, limit=3)
interest_score = is_interesting_reddit(
title,
summary,
article["upvotes"],
article["comment_count"],
top_comments
)
logging.info(f"Interest Score: {interest_score} for '{title}'")
if interest_score < 6:
print(f"Reddit Interest Too Low: {interest_score}")
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
final_score = min(base_score + engagement_boost, 10)
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
return final_score
except Exception as e:
logging.warning(f"Reddit interestingness scoring failed (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = f"{title}\n\n{summary}"
if top_comments:
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
logging.error(f"Failed to score Reddit post '{title}' after {MAX_RETRIES} attempts")
return 0
def get_top_comments(post_url, reddit, limit=3):
for attempt in range(MAX_RETRIES):
try:
submission = reddit.submission(url=post_url)
submission.comment_sort = 'top'
submission.comments.replace_more(limit=0)
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
return top_comments
except Exception as e:
logging.warning(f"Failed to fetch comments for {post_url} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data:
attempts += 1
logging.error(f"Failed to fetch comments for {post_url} after {MAX_RETRIES} attempts")
return []
def fetch_duckduckgo_news_context(title, hours=24):
for attempt in range(MAX_RETRIES):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
hook = get_dynamic_hook(post_data["title"]).strip()
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
def fetch_reddit_posts():
"""Fetch Reddit posts from specified subreddits, filtering low-quality and [homemade] posts."""
try:
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
feeds = [
"food",
"FoodPorn",
"spicy",
"KoreanFood",
"JapaneseFood",
"DessertPorn",
"ChineseFood",
"IndianFood"
]
articles = []
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
for subreddit_name in feeds:
for attempt in range(MAX_RETRIES):
try:
subreddit = reddit.subreddit(subreddit_name)
for submission in subreddit.top(time_filter='day', limit=100):
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue
if "[homemade]" in submission.title.lower():
logging.info(f"Skipping homemade post: {submission.title}")
continue
cleaned_title = clean_reddit_title(submission.title)
if not cleaned_title or len(cleaned_title) < 5:
logging.info(f"Skipping post with invalid or short title: {submission.title}")
continue
# Filter out posts with empty or very short summaries
summary = submission.selftext.strip() if submission.selftext else ""
if len(summary) < 20 and not submission.url.endswith(('.jpg', '.jpeg', '.png', '.gif')):
logging.info(f"Skipping post with insufficient summary: {cleaned_title}")
continue
# Fetch top comments for additional context
top_comments = get_top_comments(f"https://www.reddit.com{submission.permalink}", reddit)
articles.append({
"title": cleaned_title,
"raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}",
"summary": summary,
"feed_title": get_clean_source_name(subreddit_name),
"pub_date": pub_date,
"upvotes": submission.score,
"comment_count": submission.num_comments,
"top_comments": top_comments
})
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
break
except Exception as e:
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.info(f"Total Reddit posts fetched: {len(articles)}")
return articles
except Exception as e:
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
return []
def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images):
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
posts = fetch_reddit_posts()
if not posts:
logging.info("No Reddit posts available")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
attempts = 0
max_attempts = 10
while attempts < max_attempts and posts:
post = posts.pop(0)
title = post["title"]
link = post.get("link", "")
summary = post.get("summary", "")
source_name = "Reddit"
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
upvotes = post.get("upvotes", 0)
comment_count = post.get("comment_count", 0)
top_comments = post.get("top_comments", [])
if title in posted_titles:
logging.info(f"Skipping already posted Reddit post: {title}")
attempts += 1
continue
if upvotes < 300:
logging.info(f"Skipping post '{title}' due to insufficient upvotes ({upvotes} < 300)")
attempts += 1
continue
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
logging.info(f"Trying Reddit Post: {title} from {source_name}")
# Combine summary and top comments for smart_image_and_filter
enhanced_summary = summary
if top_comments:
enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments)
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
if skip:
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
logging.debug(f"Scoring content for '{title}': {scoring_content}")
interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
should_post_tweet=True
extra_prompt=extra_prompt
)
finally:
is_posting = False
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
final_summary = insert_link_naturally(final_summary, source_name, link)
if post_id:
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
is_posting = True
try:
post_to_wp(
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
@ -348,51 +476,97 @@ def curate_from_reddit():
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
should_post_tweet=True,
summary=final_summary
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
posted_titles.add(raw_title)
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
print(f"Actual post URL: {post_url}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
logging.info(f"Actual post URL: {post_url}")
return post_data, category, random.randint(0, 1800)
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Reddit post found after attempts")
logging.info("No interesting Reddit post found after attempts")
return None, None, random.randint(600, 1800)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
logging.info("No interesting Reddit post found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
def run_reddit_automator():
print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****")
logging.info("***** Reddit Automator Launched *****")
post_data, category, sleep_time = curate_from_reddit()
if not post_data:
print(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
else:
print(f"Completed Reddit run with sleep time: {sleep_time} seconds")
logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds")
print(f"Sleeping for {sleep_time}s")
time.sleep(sleep_time)
return post_data, category, sleep_time
lock_fd = None
try:
lock_fd = acquire_lock()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
logging.info("***** Reddit Automator Launched *****")
# Load JSON files once
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, sleep_time = curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data:
logging.info("No postable Reddit article found")
logging.info("Completed Reddit run")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
# Sleep while still marked as running
time.sleep(sleep_time)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop after sleep
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(1200, 1800) # 20–30 minutes
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return None, None, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
run_reddit_automator()
setup_logging()
post_data, category, sleep_time = run_reddit_automator()
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")

@ -9,6 +9,8 @@ import signal
import sys
import re
import email.utils
import feedparser
from duckduckgo_search import DDGS
from datetime import datetime, timedelta, timezone
from bs4 import BeautifulSoup
from openai import OpenAI
@ -25,90 +27,127 @@ from foodie_utils import (
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image
select_best_author, smart_image_and_filter, get_flickr_image,
get_next_author_round_robin, check_author_rate_limit, update_system_activity
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv
import fcntl
load_dotenv()
is_posting = False
def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...")
if is_posting:
logging.info("Currently posting, will exit after completion.")
else:
logging.info("Safe to exit immediately.")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
SCRIPT_NAME = "foodie_automator_rss"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_rss.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_rss.log"
LOG_PRUNE_DAYS = 30
FEED_TIMEOUT = 15
MAX_RETRIES = 3
RETRY_BACKOFF = 2
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
logging.debug("Attempting to set up logging")
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.debug(f"Log file pruned: {LOG_FILE}")
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_automator_rss.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
# Call setup_logging immediately
setup_logging()
check_author_rate_limit.script_run_id = int(time.time())
logging.info(f"Set script_run_id to {check_author_rate_limit.script_run_id}")
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def setup_logging():
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_automator_rss.py")
def acquire_lock():
try:
logging.debug("Attempting to acquire lock")
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
logging.debug(f"Lock acquired: {LOCK_FILE}")
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_rss.py is running")
sys.exit(0)
setup_logging()
def signal_handler(sig, frame):
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
def create_http_session() -> requests.Session:
session = requests.Session()
retry_strategy = Retry(
total=MAX_RETRIES,
backoff_factor=2,
backoff_factor=RETRY_BACKOFF,
status_forcelist=[403, 429, 500, 502, 503, 504],
allowed_methods=["GET", "POST"]
)
adapter = HTTPAdapter(
max_retries=retry_strategy,
pool_connections=10,
pool_maxsize=10
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("http://", adapter)
session.mount("https://", adapter)
session.headers.update({
@ -136,186 +175,201 @@ def fetch_rss_feeds():
logging.error("RSS_FEEDS is empty in foodie_config.py")
return articles
logging.info(f"Processing feeds: {RSS_FEEDS}")
for feed_url in RSS_FEEDS:
logging.info(f"Processing feed: {feed_url}")
try:
response = session.get(feed_url, timeout=FEED_TIMEOUT)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'xml')
items = soup.find_all('item')
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
for item in items:
try:
title = item.find('title').text.strip() if item.find('title') else "Untitled"
link = item.find('link').text.strip() if item.find('link') else ""
pub_date = item.find('pubDate')
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
for attempt in range(MAX_RETRIES):
logging.info(f"Processing feed: {feed_url} (attempt {attempt + 1})")
try:
response = session.get(feed_url, timeout=FEED_TIMEOUT)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'xml')
items = soup.find_all('item')
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
for item in items:
try:
title = item.find('title').text.strip() if item.find('title') else "Untitled"
link = item.find('link').text.strip() if item.find('link') else ""
pub_date = item.find('pubDate')
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
continue
description = item.find('description')
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
content = item.find('content:encoded')
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
articles.append({
"title": title,
"link": link,
"summary": summary,
"content": content_text,
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
"pub_date": pub_date
})
logging.debug(f"Processed article: {title}")
except Exception as e:
logging.warning(f"Error processing entry in {feed_url}: {e}")
continue
description = item.find('description')
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
content = item.find('content:encoded')
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
articles.append({
"title": title,
"link": link,
"summary": summary,
"content": content_text,
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
"pub_date": pub_date
})
logging.debug(f"Processed article: {title}")
except Exception as e:
logging.warning(f"Error processing entry in {feed_url}: {e}")
continue
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
except Exception as e:
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
continue
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
break
except Exception as e:
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
articles.sort(key=lambda x: x["pub_date"], reverse=True)
logging.info(f"Total RSS articles fetched: {len(articles)}")
return articles
def curate_from_rss():
articles = fetch_rss_feeds()
if not articles:
print("No RSS articles available")
logging.info("No RSS articles available")
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article["summary"]
content = article["content"]
source_name = article["feed_title"]
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted article: {title}")
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
print(f"Trying RSS Article: {title} from {source_name}")
logging.info(f"Trying RSS Article: {title} from {source_name}")
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
if skip:
print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
def fetch_duckduckgo_news_context(title, hours=24):
for attempt in range(MAX_RETRIES):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images):
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
articles = fetch_rss_feeds()
if not articles:
logging.info("No RSS articles available")
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return None, None, sleep_time
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article.get("summary", "")
source_name = article.get("feed_title", "Unknown Source")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link # Store for fallback
if title in posted_titles:
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Do NOT introduce unrelated concepts.\n"
f"Expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
# Select author
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping article '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
# Remove the original title from the summary while preserving paragraphs
title_pattern = re.compile(
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
re.IGNORECASE
)
paragraphs = final_summary.split('\n')
cleaned_paragraphs = []
for para in paragraphs:
if para.strip():
cleaned_para = title_pattern.sub('', para).strip()
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
cleaned_paragraphs.append(cleaned_para)
final_summary = '\n'.join(cleaned_paragraphs)
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data:
attempts += 1
continue
logging.info(f"Trying RSS Article: {title} from {source_name}")
# Fetch image
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
if not image_url:
logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url:
logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip()
if skip:
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
# Generate viral share prompt
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
should_post_tweet=True
extra_prompt=extra_prompt
)
finally:
is_posting = False
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
if post_id:
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}"
global is_posting
is_posting = True
try:
post_to_wp(
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
@ -324,43 +378,103 @@ def curate_from_rss():
original_source=original_source,
image_source=image_source,
uploader=uploader,
pixabay_url=pixabay_url,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
should_post_tweet=True,
summary=final_summary
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}"
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None, # Skip image re-upload
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****")
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
attempts += 1
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
return post_data, category, random.randint(0, 1800)
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting RSS article found after attempts")
logging.info("No interesting RSS article found after attempts")
return None, None, random.randint(600, 1800)
logging.info("No interesting RSS article found after attempts")
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800) # 20–30 minutes
return None, None, sleep_time
def run_rss_automator():
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
logging.info("***** RSS Automator Launched *****")
post_data, category, sleep_time = curate_from_rss()
print(f"Sleeping for {sleep_time}s")
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
time.sleep(sleep_time)
return post_data, category, sleep_time
lock_fd = None
try:
lock_fd = acquire_lock()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
logging.info("***** RSS Automator Launched *****")
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, sleep_time = curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data:
logging.info("No postable RSS article found")
logging.info("Completed RSS run")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
# Sleep while still marked as running
time.sleep(sleep_time)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop after sleep
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(1200, 1800) # Fixed to 20–30 minutes
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return None, None, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
run_rss_automator()
post_data, category, sleep_time = run_rss_automator()
# logging.info(f"Run completed, sleep_time: {sleep_time} seconds")

@ -31,7 +31,7 @@ AUTHORS = [
"username": "aishapatel",
"password": os.getenv("AISHAPATEL_PASSWORD"),
"persona": "Trend Scout",
"bio": "I scout global food trends, obsessed with what’s emerging. My sharp predictions map the industry’s path—always one step ahead.",
"bio": "I scout global food trends, obsessed with what's emerging. My sharp predictions map the industry's path—always one step ahead.",
"dob": "1999-03-15"
},
{
@ -47,7 +47,7 @@ AUTHORS = [
"username": "keishareid",
"password": os.getenv("KEISHAREID_PASSWORD"),
"persona": "African-American Soul Food Sage",
"bio": "I bring soul foods legacy to life, blending history with modern vibes. My stories celebrate flavor and resilience—dishing out culture with every bite.",
"bio": "I bring soul food's legacy to life, blending history with modern vibes. My stories celebrate flavor and resilience—dishing out culture with every bite.",
"dob": "1994-06-10"
},
{
@ -60,69 +60,63 @@ AUTHORS = [
}
]
X_API_CREDENTIALS = [
{
"username": "owenjohnson",
"x_username": "@insiderfoodieowen",
X_API_CREDENTIALS = {
"owenjohnson": {
"x_username": "@mrowenjohnson",
"api_key": os.getenv("OWENJOHNSON_X_API_KEY"),
"api_secret": os.getenv("OWENJOHNSON_X_API_SECRET"),
"access_token": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("OWENJOHNSON_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("OWENJOHNSON_X_CLIENT_SECRET")
},
{
"username": "javiermorales",
"x_username": "@insiderfoodiejavier",
"javiermorales": {
"x_username": "@mrjaviermorales",
"api_key": os.getenv("JAVIERMORALES_X_API_KEY"),
"api_secret": os.getenv("JAVIERMORALES_X_API_SECRET"),
"access_token": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("JAVIERMORALES_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("JAVIERMORALES_X_CLIENT_SECRET")
},
{
"username": "aishapatel",
"x_username": "@insiderfoodieaisha",
"aishapatel": {
"x_username": "@missaishapatel",
"api_key": os.getenv("AISHAPATEL_X_API_KEY"),
"api_secret": os.getenv("AISHAPATEL_X_API_SECRET"),
"access_token": os.getenv("AISHAPATEL_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("AISHAPATEL_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("AISHAPATEL_X_CLIENT_SECRET")
},
{
"username": "trangnguyen",
"x_username": "@insiderfoodietrang",
"trangnguyen": {
"x_username": "@mrtrangnguyen",
"api_key": os.getenv("TRANGNGUYEN_X_API_KEY"),
"api_secret": os.getenv("TRANGNGUYEN_X_API_SECRET"),
"access_token": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("TRANGNGUYEN_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("TRANGNGUYEN_X_CLIENT_SECRET")
},
{
"username": "keishareid",
"x_username": "@insiderfoodiekeisha",
"keishareid": {
"x_username": "@misskeishareid",
"api_key": os.getenv("KEISHAREID_X_API_KEY"),
"api_secret": os.getenv("KEISHAREID_X_API_SECRET"),
"access_token": os.getenv("KEISHAREID_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("KEISHAREID_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("KEISHAREID_X_CLIENT_SECRET")
},
{
"username": "lilamoreau",
"x_username": "@insiderfoodielila",
"lilamoreau": {
"x_username": "@misslilamoreau",
"api_key": os.getenv("LILAMOREAU_X_API_KEY"),
"api_secret": os.getenv("LILAMOREAU_X_API_SECRET"),
"access_token": os.getenv("LILAMOREAU_X_ACCESS_TOKEN"),
"access_token_secret": os.getenv("LILAMOREAU_X_ACCESS_TOKEN_SECRET"),
"client_secret": os.getenv("LILAMOREAU_X_CLIENT_SECRET")
}
]
}
PERSONA_CONFIGS = {
"Visionary Editor": {
"description": "a commanding food editor with a borderless view",
"tone": "a polished and insightful tone, like 'This redefines culinary excellence.'",
"article_prompt": (
"Youre {description}. Summarize this article in {tone}. "
"You're {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a bold take and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -139,7 +133,7 @@ PERSONA_CONFIGS = {
"description": "a seasoned foodie reviewer with a sharp eye",
"tone": "a professional yet engaging tone, like 'This dish is a revelation.'",
"article_prompt": (
"Youre {description}. Summarize this article in {tone}. "
"You're {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a subtle opinion and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -154,12 +148,12 @@ PERSONA_CONFIGS = {
},
"Trend Scout": {
"description": "a forward-thinking editor obsessed with trends",
"tone": "an insightful and forward-looking tone, like 'This sets the stage for whats next.'",
"tone": "an insightful and forward-looking tone, like 'This sets the stage for what's next.'",
"article_prompt": (
"Youre {description}. Summarize this article in {tone}. "
"You're {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Predict whats next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
"Predict what's next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
),
"x_prompt": (
"Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
@ -173,7 +167,7 @@ PERSONA_CONFIGS = {
"description": "a cultured food writer who loves storytelling",
"tone": "a warm and thoughtful tone, like 'This evokes a sense of tradition.'",
"article_prompt": (
"Youre {description}. Summarize this article in {tone}. "
"You're {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a thoughtful observation and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -190,7 +184,7 @@ PERSONA_CONFIGS = {
"description": "a vibrant storyteller rooted in African-American culinary heritage",
"tone": "a heartfelt and authentic tone, like 'This captures the essence of heritage.'",
"article_prompt": (
"Youre {description}. Summarize this article in {tone}. "
"You're {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Add a heritage twist and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -207,7 +201,7 @@ PERSONA_CONFIGS = {
"description": "an adventurous explorer of global street food",
"tone": "a bold and adventurous tone, like 'This takes you on a global journey.'",
"article_prompt": (
"Youre {description}. Summarize this article in {tone}. "
"You're {description}. Summarize this article in {tone}. "
"Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
"Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
"Drop a street-level insight and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@ -245,7 +239,7 @@ RSS_FEED_NAMES = {
"https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"),
"https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"),
"https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"),
"https://www.theguardian.com/food/rss": ("The Guardian Food", "https://www.theguardian.com/food")
"https://www.theguardian.com/food/rss": ("The Guardian", "https://www.theguardian.com/food")
}
RECIPE_KEYWORDS = ["recipe", "cook", "bake", "baking", "cooking", "ingredient", "method", "mix", "stir", "preheat", "dinners", "make", "dish", "healthy"]
@ -254,8 +248,7 @@ HOME_KEYWORDS = ["home", "house", "household", "appliance", "kitchen", "gadget"]
PRODUCT_KEYWORDS = ["best", "buy", "storage", "organizer", "shop", "price", "container", "product", "deal", "sale", "discount"]
CATEGORIES = [
"People", "Trends", "Travel",
"Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food", "Eats"
"Buzz", "Trends", "Lifestyle", "Culture", "Health", "Drink", "Food", "Eats"
]
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
@ -264,7 +257,12 @@ REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
REDDIT_SUBREDDITS = [
"food",
"FoodPorn",
"spicy"
"spicy",
"KoreanFood",
"JapaneseFood",
"DessertPorn",
"ChineseFood",
"IndianFood"
]
FAST_FOOD_KEYWORDS = [
"mcdonald", "burger king", "wendy", "taco bell", "kfc",
@ -284,4 +282,14 @@ def get_clean_source_name(source_name):
for feed_url, (clean_name, _) in RSS_FEED_NAMES.items():
if feed_url == source_name:
return clean_name
return source_name
return source_name
# Email configuration for alerts
EMAIL_CONFIG = {
'from_email': 'systemalerts@insiderfoodie.com', # System alerts email
'to_email': 'systemalerts@insiderfoodie.com', # Same email for receiving alerts
'smtp_server': 'mail.insiderfoodie.com', # Your SMTP server
'smtp_port': 587, # STARTTLS port
'smtp_username': 'systemalerts', # SMTP username
'smtp_password': os.getenv('INSIDERFOODIE_EMAIL_PASSWORD') # Store password in .env
}

@ -0,0 +1,236 @@
# foodie_engagement_generator.py
import json
import logging
import random
import signal
import sys
import fcntl
import os
import time
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from foodie_utils import AUTHORS, SUMMARY_MODEL, load_json_file, save_json_file, update_system_activity
from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE
from dotenv import load_dotenv
load_dotenv()
SCRIPT_NAME = "foodie_engagement_generator"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_generator.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_generator.log"
ENGAGEMENT_TWEETS_FILE = "/home/shane/foodie_automator/engagement_tweets.json"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_engagement_generator.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_engagement_generator.py is running")
sys.exit(0)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize OpenAI client
try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
except Exception as e:
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
# Load author backgrounds
try:
with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f:
AUTHOR_BACKGROUNDS = json.load(f)
except Exception as e:
logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True)
sys.exit(1)
def generate_engagement_tweet(author):
"""Generate an engagement tweet using author background themes."""
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
if not background or "engagement_themes" not in background:
logging.warning(f"No background or engagement themes found for {author['username']}")
theme = "food trends"
else:
theme = random.choice(background["engagement_themes"])
prompt = (
f"Generate a concise tweet (under 230 characters) for {author_handle}. "
f"Create an engaging question or statement about {theme} to spark interaction. "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for attempt in range(MAX_RETRIES):
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated engagement tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts")
engagement_templates = [
f"What's the most mouthwatering {theme} you've seen this week? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"{theme.capitalize()} lovers unite! What's your go-to pick? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Ever tried a {theme} that blew your mind? Share your favorites and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"What {theme} trend are you loving right now? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
logging.info(f"Using fallback engagement tweet: {template}")
return template
def generate_engagement_tweets():
"""Generate engagement tweets for authors and save to file."""
try:
logging.info("Starting foodie_engagement_generator.py")
tweets = []
timestamp = datetime.now(timezone.utc).isoformat()
for author in AUTHORS:
try:
tweet = generate_engagement_tweet(author)
if not tweet:
logging.error(f"Failed to generate engagement tweet for {author['username']}, skipping")
continue
# Collect tweet data
tweet_data = {
"username": author["username"],
"x_handle": X_API_CREDENTIALS[author["username"]]["x_username"],
"tweet": tweet,
"timestamp": timestamp
}
tweets.append(tweet_data)
logging.info(f"Generated engagement tweet for {author['username']}: {tweet}")
except Exception as e:
logging.error(f"Error generating engagement tweet for {author['username']}: {e}", exc_info=True)
continue
# Save tweets to file, overwriting any existing content
if tweets:
try:
tweet_data = {
"timestamp": timestamp,
"tweets": tweets
}
save_json_file(ENGAGEMENT_TWEETS_FILE, tweet_data)
logging.info(f"Saved {len(tweets)} engagement tweets to {ENGAGEMENT_TWEETS_FILE}")
except Exception as e:
logging.error(f"Failed to save engagement tweets to {ENGAGEMENT_TWEETS_FILE}: {e}")
else:
logging.warning("No engagement tweets generated, nothing to save")
logging.info("Completed foodie_engagement_generator.py")
sleep_time = random.randint(82800, 86400) # ~23–24 hours
return True, sleep_time
except Exception as e:
logging.error(f"Unexpected error in generate_engagement_tweets: {e}", exc_info=True)
sleep_time = random.randint(82800, 86400) # ~23–24 hours
return False, sleep_time
def main():
"""Main function to run the script."""
lock_fd = None
try:
lock_fd = acquire_lock()
setup_logging()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
success, sleep_time = generate_engagement_tweets()
update_system_activity(SCRIPT_NAME, "stopped") # Record stop
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return success, sleep_time
except Exception as e:
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sleep_time = random.randint(82800, 86400) # ~23–24 hours
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return False, sleep_time
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
success, sleep_time = main()

@ -1,76 +1,324 @@
import random
# foodie_engagement_tweet.py
import json
import logging
import random
import signal
import sys
import fcntl
import os
import time
from datetime import datetime, timedelta, timezone
from openai import OpenAI # Add this import
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
from dotenv import load_dotenv # Add this import
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
from openai import OpenAI
from foodie_utils import (
post_tweet,
AUTHORS,
SUMMARY_MODEL,
check_author_rate_limit,
load_json_file,
save_json_file, # Add this
update_system_activity,
get_next_author_round_robin
)
from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE
from dotenv import load_dotenv
# Load environment variables
print("Loading environment variables")
load_dotenv()
print(f"Environment variables loaded: OPENAI_API_KEY={bool(os.getenv('OPENAI_API_KEY'))}")
SCRIPT_NAME = "foodie_engagement_tweet"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_tweet.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_tweet.log"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
print("Entering setup_logging")
try:
log_dir = os.path.dirname(LOG_FILE)
print(f"Ensuring log directory exists: {log_dir}")
os.makedirs(log_dir, exist_ok=True)
print(f"Log directory permissions: {os.stat(log_dir).st_mode & 0o777}, owner: {os.stat(log_dir).st_uid}")
if os.path.exists(LOG_FILE):
print(f"Pruning old logs in {LOG_FILE}")
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
print(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
print(f"Log file pruned, new size: {os.path.getsize(LOG_FILE)} bytes")
print(f"Configuring logging to {LOG_FILE}")
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_engagement_tweet.py")
print("Logging setup complete")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
print("Entering acquire_lock")
try:
lock_dir = os.path.dirname(LOCK_FILE)
print(f"Ensuring lock directory exists: {lock_dir}")
os.makedirs(lock_dir, exist_ok=True)
print(f"Opening lock file: {LOCK_FILE}")
lock_fd = open(LOCK_FILE, 'w')
print(f"Attempting to acquire lock on {LOCK_FILE}")
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
print(f"Lock acquired, PID: {os.getpid()}")
return lock_fd
except IOError as e:
print(f"Failed to acquire lock, another instance is running: {e}")
logging.info("Another instance of foodie_engagement_tweet.py is running")
sys.exit(0)
except Exception as e:
print(f"Unexpected error in acquire_lock: {e}")
sys.exit(1)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
print(f"Received signal: {sig}")
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
print("Initializing OpenAI client")
try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
print("OPENAI_API_KEY is not set")
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
print("OpenAI client initialized")
except Exception as e:
print(f"Failed to initialize OpenAI client: {e}")
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
# Load author backgrounds
print(f"Loading author backgrounds from {AUTHOR_BACKGROUNDS_FILE}")
try:
with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f:
AUTHOR_BACKGROUNDS = json.load(f)
print(f"Author backgrounds loaded: {len(AUTHOR_BACKGROUNDS)} entries")
except Exception as e:
print(f"Failed to load author_backgrounds.json: {e}")
logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True)
sys.exit(1)
def generate_engagement_tweet(author):
author_handle = author["x_username"] # Updated to use x_username from X_API_CREDENTIALS
prompt = (
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
f"Create an engaging food-related question or statement to spark interaction. "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
"""Generate an engagement tweet using author background themes and persona."""
print(f"Generating tweet for author: {author['username']}")
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
print(f"No X credentials found for {author['username']}")
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
print(f"Author handle: {author_handle}")
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
if not background or "engagement_themes" not in background:
print(f"No background or themes for {author['username']}, using default theme")
logging.warning(f"No background or engagement themes found for {author['username']}")
theme = "food trends"
else:
theme = random.choice(background["engagement_themes"])
print(f"Selected theme: {theme}")
# Get the author's persona from AUTHORS
persona = next((a["persona"] for a in AUTHORS if a["username"] == author["username"]), "Unknown")
prompt = (
f"Generate a concise tweet (under 230 characters) for {author_handle} as a {persona}. "
f"Create an engaging, specific question about {theme} to spark interaction (e.g., 'What's your go-to sushi spot in Tokyo?'). "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
return tweet
print(f"OpenAI prompt: {prompt}")
for attempt in range(MAX_RETRIES):
print(f"Attempt {attempt + 1} to generate tweet")
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
print(f"Generated tweet: {tweet}")
logging.debug(f"Generated engagement tweet: {tweet}")
return tweet
except Exception as e:
print(f"Failed to generate tweet (attempt {attempt + 1}): {e}")
logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
print(f"Exhausted retries for {author['username']}")
logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts")
engagement_templates = [
f"What's your favorite {theme} dish? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Which {theme} spot is a must-visit? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Got a {theme} hidden gem? Share it and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"What's the best {theme} you've tried? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
print(f"Using fallback tweet: {template}")
logging.info(f"Using fallback engagement tweet: {template}")
return template
except Exception as e:
logging.warning(f"Failed to generate engagement tweet for {author['username']}: {e}")
# Fallback templates
engagement_templates = [
f"Whats the most mouthwatering dish youve seen this week Share below and follow {author_handle} for more foodie ideas on InsiderFoodie.com Link: https://insiderfoodie.com",
f"Food lovers unite Whats your go to comfort food Tell us and like this tweet for more tasty ideas from {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com",
f"Ever tried a dish that looked too good to eat Share your favorites and follow {author_handle} for more culinary trends on InsiderFoodie.com Link: https://insiderfoodie.com",
f"What food trend are you loving right now Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
return template
print(f"Error in generate_engagement_tweet for {author['username']}: {e}")
logging.error(f"Error in generate_engagement_tweet for {author['username']}: {e}", exc_info=True)
return None
def post_engagement_tweet():
# Reference date for calculating the 2-day interval
reference_date = datetime(2025, 4, 29, tzinfo=timezone.utc) # Starting from April 29, 2025
current_date = datetime.now(timezone.utc)
# Calculate the number of days since the reference date
days_since_reference = (current_date - reference_date).days
# Post only if the number of days since the reference date is divisible by 2
if days_since_reference % 2 == 0:
logging.info("Today is an engagement tweet day (every 2 days). Posting...")
for author in AUTHORS:
tweet = generate_engagement_tweet(author)
logging.info(f"Posting engagement tweet for {author['username']}: {tweet}")
if post_tweet(author, tweet):
logging.info(f"Successfully posted engagement tweet for {author['username']}")
else:
logging.warning(f"Failed to post engagement tweet for {author['username']}")
else:
logging.info("Today is not an engagement tweet day (every 2 days). Skipping...")
"""Post engagement tweets for all authors with a delay between posts."""
print("Entering post_engagement_tweet")
try:
logging.info("Starting foodie_engagement_tweet.py")
posted = False
state_file = '/home/shane/foodie_automator/author_state.json'
state = load_json_file(state_file, default={'last_author_index': -1})
delay_seconds = 30 # Delay between posts to avoid rate limits and spread engagement
# Iterate through all authors
for index, author in enumerate(AUTHORS):
username = author['username']
print(f"Processing author: {username}")
logging.info(f"Processing author: {username}")
try:
print("Checking rate limit")
if not check_author_rate_limit(author):
print(f"Rate limit exceeded for {username}, skipping")
logging.info(f"Rate limit exceeded for {username}, skipping")
continue
print("Generating tweet")
tweet = generate_engagement_tweet(author)
if not tweet:
print(f"Failed to generate tweet for {username}, skipping")
logging.error(f"Failed to generate engagement tweet for {username}, skipping")
continue
print(f"Posting tweet: {tweet}")
logging.info(f"Posting engagement tweet for {username}: {tweet}")
if post_tweet(author, tweet):
print(f"Successfully posted tweet for {username}")
logging.info(f"Successfully posted engagement tweet for {username}")
posted = True
# Update last_author_index to maintain round-robin consistency
state['last_author_index'] = index
save_json_file(state_file, state)
else:
print(f"Failed to post tweet for {username}")
logging.warning(f"Failed to post tweet for {username}")
# Add delay between posts (except for the last author)
if index < len(AUTHORS) - 1:
print(f"Waiting {delay_seconds} seconds before next post")
logging.info(f"Waiting {delay_seconds} seconds before next post")
time.sleep(delay_seconds)
except Exception as e:
print(f"Error posting tweet for {username}: {e}")
logging.error(f"Error posting tweet for {username}: {e}", exc_info=True)
continue
print("Completed post_engagement_tweet")
logging.info("Completed foodie_engagement_tweet.py")
sleep_time = 86400 # 1 day for cron
return posted, sleep_time
except Exception as e:
print(f"Unexpected error in post_engagement_tweet: {e}")
logging.error(f"Unexpected error in post_engagement_tweet: {e}", exc_info=True)
sleep_time = 86400 # 1 day
return False, sleep_time
def main():
"""Main function to run the script."""
print("Starting main")
lock_fd = None
try:
print("Acquiring lock")
lock_fd = acquire_lock()
print("Setting up logging")
setup_logging()
print("Updating system activity to running")
update_system_activity(SCRIPT_NAME, "running", os.getpid())
print("Checking author state file")
author_state_file = "/home/shane/foodie_automator/author_state.json"
if not os.path.exists(author_state_file):
print(f"Author state file not found: {author_state_file}")
logging.error(f"Author state file not found: {author_state_file}")
raise FileNotFoundError(f"Author state file not found: {author_state_file}")
print(f"Author state file exists: {author_state_file}")
print("Posting engagement tweet")
posted, sleep_time = post_engagement_tweet()
print("Updating system activity to stopped")
update_system_activity(SCRIPT_NAME, "stopped")
print(f"Run completed, posted: {posted}, sleep_time: {sleep_time}")
logging.info(f"Run completed, posted: {posted}, sleep_time: {sleep_time} seconds")
return posted, sleep_time
except Exception as e:
print(f"Exception in main: {e}")
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
update_system_activity(SCRIPT_NAME, "stopped")
sleep_time = 86400 # 1 day for cron
print(f"Run completed, sleep_time: {sleep_time}")
logging.info(f"Run completed, sleep_time: {sleep_time} seconds")
return False, sleep_time
finally:
if lock_fd:
print("Releasing lock")
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
print(f"Lock file removed: {LOCK_FILE}")
if __name__ == "__main__":
post_engagement_tweet()
posted, sleep_time = main()

File diff suppressed because it is too large Load Diff

@ -1,133 +1,395 @@
# foodie_weekly_thread.py
import json
from datetime import datetime, timedelta
import os
import logging
import random
from openai import OpenAI # Add this import
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
import signal
import sys
import fcntl
import time
import re
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from foodie_utils import AUTHORS, SUMMARY_MODEL, load_json_file, save_json_file, update_system_activity
from foodie_config import X_API_CREDENTIALS, RECENT_POSTS_FILE
from dotenv import load_dotenv
import shutil
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
load_dotenv()
SCRIPT_NAME = "foodie_weekly_thread"
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_weekly_thread.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_weekly_thread.log"
WEEKLY_THREADS_FILE = "/home/shane/foodie_automator/weekly_threads.json"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_weekly_thread.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_weekly_thread.py is running")
sys.exit(0)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
logging.info("Received termination signal, marking script as stopped...")
update_system_activity(SCRIPT_NAME, "stopped")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
except Exception as e:
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
def generate_intro_tweet(author):
"""Generate an intro tweet for the weekly thread."""
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
logging.debug(f"Generating intro tweet for {author_handle}")
prompt = (
f"Generate a concise tweet (under 200 characters) for {author_handle}. "
f"Introduce a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
f"Make it engaging, create curiosity, and include a call to action to visit InsiderFoodie.com or follow {author_handle}. "
f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. "
f"Strictly exclude emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for attempt in range(MAX_RETRIES):
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=150,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
tweet = re.sub(r'[\U0001F000-\U0001FFFF]', '', tweet) # Remove emojis
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated intro tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate intro tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate intro tweet after {MAX_RETRIES} attempts")
fallback = (
f"Top 10 foodie posts this week by {author_handle}! Visit InsiderFoodie.com and follow {author_handle} for more."
)
logging.info(f"Using fallback intro tweet: {fallback}")
return fallback
RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json"
def generate_final_cta(author):
"""Generate a final CTA tweet for the weekly thread using GPT."""
credentials = X_API_CREDENTIALS.get(author["username"])
if not credentials:
logging.error(f"No X credentials found for {author['username']}")
return None
author_handle = credentials["x_username"]
logging.debug(f"Generating final CTA tweet for {author_handle}")
prompt = (
f"Generate a concise tweet (under 200 characters) for {author_handle}. "
f"Conclude a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
f"Make it engaging, value-driven, in the style of Neil Patel. "
f"Include a call to action to visit InsiderFoodie.com and follow {author_handle}. "
f"Mention that top 10 foodie trends are shared every Monday. "
f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. "
f"Strictly exclude emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for attempt in range(MAX_RETRIES):
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=150,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
tweet = re.sub(r'[\U0001F000-\U0001FFFF]', '', tweet) # Remove emojis
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated final CTA tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate final CTA tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate final CTA tweet after {MAX_RETRIES} attempts")
fallback = (
f"Want more foodie insights? Visit insiderfoodie.com and follow {author_handle} "
f"for top 10 foodie trends every Monday."
)
logging.info(f"Using fallback final CTA tweet: {fallback}")
return fallback
def load_recent_posts():
posts = []
if not os.path.exists(RECENT_POSTS_FILE):
return posts
"""Load and deduplicate posts from recent_posts.json."""
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
posts = load_json_file(RECENT_POSTS_FILE)
with open(RECENT_POSTS_FILE, 'r') as f:
for line in f:
if line.strip():
try:
entry = json.loads(line.strip())
posts.append(entry)
except json.JSONDecodeError as e:
logging.warning(f"Skipping invalid JSON line in {RECENT_POSTS_FILE}: {e}")
if not posts:
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
return []
# Deduplicate posts
unique_posts = {}
for post in posts:
try:
required_fields = ["title", "url", "author_username", "timestamp"]
if not all(key in post for key in required_fields):
logging.warning(f"Skipping invalid post: missing fields {post}")
continue
datetime.fromisoformat(post["timestamp"].replace('Z', '+00:00'))
key = (post["title"], post["url"], post["author_username"])
if key not in unique_posts:
unique_posts[key] = post
else:
logging.debug(f"Skipping duplicate post: {post['title']}")
except (KeyError, ValueError) as e:
logging.warning(f"Skipping post due to invalid format: {e}")
continue
return posts
deduped_posts = list(unique_posts.values())
logging.info(f"Loaded {len(deduped_posts)} unique posts from {RECENT_POSTS_FILE}")
return deduped_posts
def filter_posts_for_week(posts, start_date, end_date):
"""Filter posts within the given week range."""
filtered_posts = []
for post in posts:
timestamp = datetime.fromisoformat(post["timestamp"])
if start_date <= timestamp <= end_date:
filtered_posts.append(post)
try:
post_date = datetime.fromisoformat(post["timestamp"])
logging.debug(f"Checking post: title={post['title']}, timestamp={post_date}, in range {start_date} to {end_date}")
if start_date <= post_date <= end_date:
filtered_posts.append(post)
logging.debug(f"Included post: {post['title']}")
else:
logging.debug(f"Excluded post: {post['title']} (timestamp {post_date} outside range)")
except (KeyError, ValueError) as e:
logging.warning(f"Skipping post due to invalid format: {e}")
continue
logging.info(f"Filtered to {len(filtered_posts)} posts for the week")
return filtered_posts
def generate_intro_tweet(author):
author_handle = author["handle"]
prompt = (
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
f"Introduce a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
f"Make it engaging, create curiosity, and include a call to action to visit InsiderFoodie.com, follow {author_handle}, or like the thread. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
def generate_weekly_thread():
"""Generate weekly thread content for each author and save to file on Mondays."""
logging.info("Starting foodie_weekly_thread.py")
try:
response = client.chat.completions.create(
model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
return tweet
except Exception as e:
logging.warning(f"Failed to generate intro tweet for {author['username']}: {e}")
# Fallback template
return (
f"This weeks top 10 foodie finds by {author_handle} Check out the best on InsiderFoodie.com "
f"Follow {author_handle} for more and like this thread to stay in the loop Visit us at https://insiderfoodie.com"
)
def post_weekly_thread():
# Determine the date range (Monday to Sunday of the past week)
# Check if today is Monday
today = datetime.now(timezone.utc)
days_since_monday = (today.weekday() + 1) % 7 + 7 # Go back to previous Monday
start_date = (today - timedelta(days=days_since_monday)).replace(hour=0, minute=0, second=0, microsecond=0)
end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59)
if today.weekday() != 0: # 0 = Monday
logging.info(f"Today is not Monday (weekday: {today.weekday()}), skipping weekly thread")
return
# Calculate date range: 7 days prior to run date
start_date = (today - timedelta(days=7)).replace(hour=0, minute=0, second=0, microsecond=0)
end_date = (today - timedelta(days=1)).replace(hour=23, minute=59, second=59, microsecond=999999)
logging.info(f"Fetching posts from {start_date} to {end_date}")
# Load and filter posts
all_posts = load_recent_posts()
weekly_posts = filter_posts_for_week(all_posts, start_date, end_date)
recent_posts = load_json_file(RECENT_POSTS_FILE)
logging.info(f"Loaded {len(recent_posts)} posts from {RECENT_POSTS_FILE}")
# Deduplicate posts
seen = set()
deduped_posts = []
for post in recent_posts:
key = (post["title"], post["url"], post["author_username"])
if key not in seen:
seen.add(key)
deduped_posts.append(post)
logging.info(f"Filtered to {len(deduped_posts)} unique posts after deduplication")
weekly_posts = filter_posts_for_week(deduped_posts, start_date, end_date)
if not weekly_posts:
logging.warning(f"No posts found within the week range {start_date} to {end_date}, exiting generate_weekly_thread")
return
# Group posts by author
posts_by_author = {}
posts_by_author = {author["username"]: [] for author in AUTHORS}
for post in weekly_posts:
author = post["author_username"] # Updated to match the key in recent_posts.json
if author not in posts_by_author:
posts_by_author[author] = []
posts_by_author[author].append(post)
username = post["author_username"]
if username in posts_by_author:
posts_by_author[username].append(post)
# Generate thread content for each author
thread_content = []
timestamp = datetime.now(timezone.utc).isoformat()
# For each author, post a thread
for author in AUTHORS:
author_posts = posts_by_author.get(author["username"], [])
username = author["username"]
author_posts = posts_by_author.get(username, [])
if not author_posts:
logging.info(f"No posts found for {author['username']} this week")
logging.info(f"No posts found for {username}, skipping")
continue
# Sort by timestamp (as a proxy for interest_score) and take top 10
author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
top_posts = author_posts[:10]
# Select top 2 posts (to fit within 3-tweet limit: lead + 2 posts)
author_posts = sorted(author_posts, key=lambda x: datetime.fromisoformat(x["timestamp"]), reverse=True)
selected_posts = author_posts[:2]
logging.info(f"Found {len(author_posts)} posts for {username}, selected {len(selected_posts)}")
if not top_posts:
# Generate thread content
try:
# Generate intro tweet
intro_tweet = generate_intro_tweet(author)
if not intro_tweet:
logging.error(f"Failed to generate intro tweet for {username}, skipping")
continue
# Generate thread tweets (up to 2)
thread_tweets = []
for i, post in enumerate(selected_posts, 1):
thread_tweet = (
f"{i}. {post['title']} "
f"Read more: {post['url']}"
)
if len(thread_tweet) > 280:
thread_tweet = f"{i}. {post['title'][:200]}... Read more: {post['url']}"
thread_tweets.append(thread_tweet)
logging.info(f"Generated thread tweet {i} for {username}: {thread_tweet}")
# Generate final CTA tweet
final_cta = generate_final_cta(author)
if not final_cta:
logging.error(f"Failed to generate final CTA tweet for {username}, using fallback")
final_cta = (
f"Want more foodie insights? Visit insiderfoodie.com and follow {X_API_CREDENTIALS[username]['x_username']} "
f"for top 10 foodie trends every Monday."
)
# Collect thread content for this author
author_thread = {
"username": username,
"x_handle": X_API_CREDENTIALS[username]["x_username"],
"intro_tweet": intro_tweet,
"thread_tweets": thread_tweets,
"final_cta": final_cta,
"timestamp": timestamp
}
thread_content.append(author_thread)
logging.info(f"Generated thread content for {username}")
except Exception as e:
logging.error(f"Error generating thread content for {username}: {e}", exc_info=True)
continue
# First tweet: Intro with CTA (generated by GPT)
intro_tweet = generate_intro_tweet(author)
logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}")
intro_response = post_tweet(author, intro_tweet)
if not intro_response:
logging.warning(f"Failed to post intro tweet for {author['username']}")
continue
intro_tweet_id = intro_response.get("id")
# Post each top post as a reply in the thread
for i, post in enumerate(top_posts, 1):
post_tweet_content = (
f"{i}. {post['title']} Link: {post['url']}"
)
logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
post_tweet(author, post_tweet_content, reply_to_id=intro_tweet_id)
logging.info(f"Successfully posted weekly thread for {author['username']}")
# Save thread content to file, overwriting any existing content
if thread_content:
try:
# Backup existing file before overwriting
if os.path.exists(WEEKLY_THREADS_FILE):
backup_dir = "/home/shane/foodie_automator/backups"
os.makedirs(backup_dir, exist_ok=True)
backup_file = f"{backup_dir}/weekly_threads_{timestamp.replace(':', '-')}.json"
shutil.copy(WEEKLY_THREADS_FILE, backup_file)
logging.info(f"Backed up existing {WEEKLY_THREADS_FILE} to {backup_file}")
# Save new thread content, overwriting the file
thread_data = {
"week_start": start_date.isoformat(),
"week_end": end_date.isoformat(),
"timestamp": timestamp,
"threads": thread_content
}
save_json_file(WEEKLY_THREADS_FILE, thread_data)
logging.info(f"Saved thread content for {len(thread_content)} authors to {WEEKLY_THREADS_FILE}")
except Exception as e:
logging.error(f"Failed to save thread content to {WEEKLY_THREADS_FILE}: {e}")
else:
logging.warning("No thread content generated, nothing to save")
logging.info("Completed foodie_weekly_thread.py")
def main():
"""Main function to run the script."""
lock_fd = None
try:
lock_fd = acquire_lock()
setup_logging()
update_system_activity(SCRIPT_NAME, "running", os.getpid()) # Record start
generate_weekly_thread()
update_system_activity(SCRIPT_NAME, "stopped") # Record stop
except Exception as e:
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
update_system_activity(SCRIPT_NAME, "stopped") # Record stop on error
sys.exit(1)
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
# Run only on Sundays
if datetime.now(timezone.utc).weekday() == 6: # Sunday (0 = Monday, 6 = Sunday)
post_weekly_thread()
else:
logging.info("Not Sunday - skipping weekly thread posting")
main()

@ -9,7 +9,7 @@ import os
from datetime import datetime, timezone, timedelta
from openai import OpenAI
from foodie_config import OPENAI_API_KEY, AUTHORS, LIGHT_TASK_MODEL, PERSONA_CONFIGS, AUTHOR_BACKGROUNDS_FILE
from foodie_utils import load_json_file, post_tweet
from foodie_utils import load_json_file, post_tweet, check_author_rate_limit
from dotenv import load_dotenv
load_dotenv()
@ -93,17 +93,37 @@ def generate_engagement_tweet(author, persona):
return tweet
except Exception as e:
logging.error(f"Failed to generate engagement tweet for {author['username']}: {e}")
return f"What’s your take on {theme}? Let’s talk! #FoodieTrends"
return f"What's your take on {theme}? Let's talk!"
def get_next_author_round_robin():
for author in AUTHORS:
# Check if the author can post before generating the tweet
can_post, remaining, reset = check_author_rate_limit(author)
if can_post:
return author
return None
def main():
global is_posting
logging.info("***** X Poster Launched *****")
for author in AUTHORS:
is_posting = True
# Get next available author using round-robin
author = get_next_author_round_robin()
if not author:
logging.info("No authors available due to rate limits")
return random.randint(600, 1800)
is_posting = True
try:
tweet = generate_engagement_tweet(author, author["persona"])
post_tweet(author, tweet)
if post_tweet(author, tweet):
logging.info(f"Successfully posted engagement tweet for {author['username']}")
else:
logging.warning(f"Failed to post engagement tweet for {author['username']}")
except Exception as e:
logging.error(f"Error posting engagement tweet for {author['username']}: {e}", exc_info=True)
finally:
is_posting = False
time.sleep(random.uniform(3600, 7200))
logging.info("X posting completed")
return random.randint(600, 1800)

@ -1,95 +1,195 @@
#!/bin/bash
# Directory to monitor
BASE_DIR="/home/shane/foodie_automator"
LOG_DIR="$BASE_DIR/logs"
LOCK_DIR="$BASE_DIR/locks"
LOG_FILE="$LOG_DIR/manage_scripts.log"
VENV_PYTHON="$BASE_DIR/venv/bin/python"
CHECKSUM_FILE="$BASE_DIR/.file_checksum"
LOG_FILE="$BASE_DIR/manage_scripts.log"
# Log function
mkdir -p "$LOG_DIR" "$LOCK_DIR" || { echo "Error: Failed to create directories"; exit 1; }
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
echo "$1"
}
# Calculate checksum of files (excluding logs, JSON files, and venv)
calculate_checksum() {
find "$BASE_DIR" -type f \
-not -path "$BASE_DIR/*.log" \
-not -path "$BASE_DIR/logs/*" \
-not -path "$BASE_DIR/*.json" \
-not -path "$BASE_DIR/.file_checksum" \
-not -path "$BASE_DIR/venv/*" \
-not -path "$BASE_DIR/locks/*" \
-exec sha256sum {} \; | sort | sha256sum | awk '{print $1}'
}
# Check if scripts are running
check_running() {
pgrep -f "python3.*foodie_automator" > /dev/null
local script_name="$1"
local lock_file="$LOCK_DIR/${script_name}.lock"
if [ -f "$lock_file" ]; then
local pid=$(cat "$lock_file")
if ps -p "$pid" > /dev/null; then
log "$script_name is already running (PID: $pid)"
return 0
else
log "Stale lock file for $script_name, removing"
rm -f "$lock_file"
fi
fi
return 1
}
# Stop scripts
stop_scripts() {
log "Stopping scripts..."
pkill -TERM -f "python3.*foodie_automator" || true
sleep 10
pkill -9 -f "python3.*foodie_automator" || true
log "Scripts stopped."
run_script() {
local script="$1"
local script_name="${script%.py}"
local script_log="$LOG_DIR/${script_name}.log"
if check_running "$script_name"; then
echo "0" # Skip sleep
return 1
fi
log "Running $script..."
"$VENV_PYTHON" "$BASE_DIR/$script" >> "$script_log" 2>&1 &
local pid=$!
echo "$pid" > "$LOCK_DIR/${script_name}.lock"
wait "$pid"
local exit_code=$?
if [ $exit_code -eq 0 ]; then
log "$script completed successfully"
else
log "$script failed with exit code $exit_code"
fi
sleep_time=$(grep "sleep_time:" "$script_log" | tail -n 1 | grep -oP 'sleep_time: \K[0-9]+' || echo $((RANDOM % 601 + 1200)))
log "$script completed, sleep_time: $sleep_time seconds"
rm -f "$LOCK_DIR/${script_name}.lock"
echo "$sleep_time"
}
# Start scripts
start_scripts() {
log "Starting scripts..."
cd "$BASE_DIR"
source venv/bin/activate
# Find all foodie_automator_*.py scripts and start them
for script in foodie_automator_*.py; do
stop_scripts() {
log "Stopping scripts..."
for script in foodie_automator_rss.py foodie_automator_reddit.py foodie_automator_google.py; do
if [ -f "$script" ]; then
log "Starting $script..."
nohup python3 "$script" >> "${script%.py}.log" 2>&1 &
local script_name="${script%.py}"
if pkill -TERM -f "$VENV_PYTHON.*$script_name"; then
log "Sent TERM to $script_name"
sleep 2
pkill -9 -f "$VENV_PYTHON.*$script_name" || true
else
log "No running $script_name found"
fi
rm -f "$LOCK_DIR/${script_name}.lock"
log "Removed lock file for $script_name"
fi
done
log "All scripts started."
log "Scripts stopped."
}
# Update dependencies
update_dependencies() {
log "Updating dependencies..."
cd "$BASE_DIR"
# Create venv if it doesn't exist
cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
if [ ! -d "venv" ]; then
python3 -m venv venv
log "Created new virtual environment"
fi
source venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt || (pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager && log "Fallback: Installed core dependencies")
source "$BASE_DIR/venv/bin/activate"
log "Dependencies updated."
}
# Main logic
if [ "$1" == "stop" ]; then
log "Received stop command, stopping all scripts..."
stop_scripts
for script in foodie_engagement_generator.py foodie_weekly_thread.py; do
local script_name="${script%.py}"
if pkill -TERM -f "$VENV_PYTHON.*$script_name"; then
log "Sent TERM to $script_name"
sleep 2
pkill -9 -f "$VENV_PYTHON.*$script_name" || true
else
log "No running $script_name found"
fi
rm -f "$LOCK_DIR/$script_name.lock"
log "Stopped $script_name"
done
log "All scripts stopped. Reminder: Disable cron jobs (crontab -e)."
exit 0
fi
if [ "$1" == "start" ]; then
log "Received start command, starting all scripts..."
cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
source "$BASE_DIR/venv/bin/activate"
if [ -f "$BASE_DIR/.env" ]; then
while IFS='=' read -r key value; do
if [[ ! -z "$key" && ! "$key" =~ ^# ]]; then
export "$key=$value"
fi
done < <(grep -v '^#' "$BASE_DIR/.env")
log ".env variables loaded"
else
log "Error: .env file not found"
exit 1
fi
for script in foodie_automator_rss.py foodie_automator_reddit.py foodie_automator_google.py; do
if [ -f "$script" ]; then
sleep_time=$(run_script "$script" | tail -n 1)
if [ "$sleep_time" != "0" ]; then
log "Sleeping for $sleep_time seconds after $script"
sleep "$sleep_time"
fi
else
log "Script $script not found"
fi
done
if [ -f "foodie_engagement_generator.py" ]; then
if ! check_running "foodie_engagement_generator"; then
log "Running foodie_engagement_generator.py..."
"$VENV_PYTHON" "foodie_engagement_generator.py" >> "$LOG_DIR/foodie_engagement_generator.log" 2>&1
log "foodie_engagement_generator.py completed"
fi
fi
log "All scripts started. Ensure cron jobs are enabled (crontab -l)."
exit 0
fi
log "Checking for file changes..."
CURRENT_CHECKSUM=$(calculate_checksum)
if [ -f "$CHECKSUM_FILE" ]; then
PREVIOUS_CHECKSUM=$(cat "$CHECKSUM_FILE")
else
PREVIOUS_CHECKSUM=""
fi
if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then
log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM"
# Stop scripts if running
if check_running; then
if pgrep -f "$VENV_PYTHON.*foodie_automator" > /dev/null; then
stop_scripts
fi
# Update dependencies
update_dependencies
# Start scripts
start_scripts
# Update checksum
echo "$CURRENT_CHECKSUM" > "$CHECKSUM_FILE"
log "Checksum updated."
fi
cd "$BASE_DIR"
source "$BASE_DIR/venv/bin/activate"
if [ -f "$BASE_DIR/.env" ]; then
while IFS='=' read -r key value; do
if [[ ! -z "$key" && ! "$key" =~ ^# ]]; then
export "$key=$value"
fi
done < <(grep -v '^#' "$BASE_DIR/.env")
log ".env variables loaded"
else
log "No file changes detected."
fi
log "Error: .env file not found"
exit 1
fi
for script in foodie_automator_rss.py foodie_automator_reddit.py foodie_automator_google.py; do
if [ -f "$script" ]; then
sleep_time=$(run_script "$script" | tail -n 1)
if [ "$sleep_time" != "0" ]; then
log "Sleeping for $sleep_time seconds after $script"
sleep "$sleep_time"
fi
else
log "Script $script not found"
fi
done
log "All scripts processed."
exit 0

@ -8,6 +8,8 @@ Pillow==11.1.0
pytesseract==0.3.13
feedparser==6.0.11
webdriver-manager==4.0.2
tweepy==4.14.0
python-dotenv==1.0.1
flickr-api==0.7.1
tweepy==4.15.0
python-dotenv==1.1.0
flickr-api==0.7.7
filelock==3.16.1
requests-oauthlib==2.0.0
Loading…
Cancel
Save