diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 62968db..246b990 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -336,7 +336,7 @@ def curate_from_rss(): final_summary = insert_link_naturally(final_summary, source_name, link) - # Use round-robin author selection + # Select author author = get_next_author_round_robin() author_username = author["username"] logging.info(f"Selected author via round-robin: {author_username}") @@ -362,13 +362,16 @@ def curate_from_rss(): page_url = None hook = get_dynamic_hook(post_data["title"]).strip() - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) + share_text = f"Check out this foodie gem! {post_data['title']}" + share_text_encoded = quote(share_text) share_links_template = ( f'
' ) + + # Prepare post content with share links (to be updated later with post_url) post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting @@ -392,6 +395,26 @@ def curate_from_rss(): logging.warning(f"Failed to post to WordPress for '{title}'") attempts += 1 continue + + # Update content with actual post_url + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + post_data["post_id"] = post_id # For update + post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=None, # No need to re-upload image + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) except Exception as e: print(f"WordPress posting error for '{title}': {e}") logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) @@ -400,49 +423,21 @@ def curate_from_rss(): finally: is_posting = False - if post_id: - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" - is_posting = True - try: - post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - post_id=post_id, - should_post_tweet=False - ) - except Exception as e: - print(f"Failed to update WordPress post '{title}' with share links: {e}") - logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) - finally: - is_posting = False - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") - return post_data, category, random.randint(0, 1800) + timestamp = datetime.now(timezone.utc).isoformat() + save_json_file(POSTED_TITLES_FILE, title, timestamp) + posted_titles.add(title) + print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") + logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") + + if image_url: + save_json_file(USED_IMAGES_FILE, image_url, timestamp) + used_images.add(image_url) + print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + + print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") + logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") + return post_data, category, random.randint(0, 1800) attempts += 1 print(f"WP posting failed for '{post_data['title']}'") @@ -460,18 +455,14 @@ def run_rss_automator(): lock_fd = None try: lock_fd = acquire_lock() - print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****") logging.info("***** RSS Automator Launched *****") post_data, category, sleep_time = curate_from_rss() if not post_data: - print("No postable RSS article found") logging.info("No postable RSS article found") - print(f"Sleeping for {sleep_time}s") logging.info(f"Completed run with sleep time: {sleep_time} seconds") time.sleep(sleep_time) return post_data, category, sleep_time except Exception as e: - print(f"Fatal error in run_rss_automator: {e}") logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) return None, None, random.randint(600, 1800) finally: diff --git a/foodie_utils.py b/foodie_utils.py index 8c53235..8f755b3 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -8,6 +8,7 @@ from PIL import Image import pytesseract import io import tempfile +import shutil import requests import time import openai @@ -28,6 +29,8 @@ from foodie_config import ( ) last_author_index = -1 +# Global to track round-robin index +round_robin_index = 0 load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) @@ -36,86 +39,73 @@ IMAGE_UPLOAD_TIMEOUT = 30 # Added to fix NameError IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py def load_json_file(file_path, expiration_hours=None): - """Load JSON file and return its contents as a list.""" + """ + Load JSON file, optionally filtering out expired entries. + """ + logger = logging.getLogger(__name__) + default = [] + + if not os.path.exists(file_path): + logger.info(f"File {file_path} does not exist. Returning default: {default}") + return default + try: - if not os.path.exists(file_path): - logging.info(f"File {file_path} does not exist, initializing with empty list") - with open(file_path, 'w') as f: - json.dump([], f) - return [] - with open(file_path, 'r') as f: data = json.load(f) - - if not isinstance(data, list): - logging.warning(f"Data in {file_path} is not a list, resetting to empty list") - with open(file_path, 'w') as f: - json.dump([], f) - return [] - - valid_entries = [] - if expiration_hours: + + if expiration_hours is not None: cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours) - for entry in data: - try: - timestamp_str = entry.get("timestamp") - if timestamp_str: - timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00')) - if timestamp < cutoff: - continue - valid_entries.append(entry) - except (ValueError, TypeError) as e: - logging.warning(f"Skipping malformed entry in {file_path}: {e}") - continue - else: - valid_entries = data - - logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}") - return valid_entries + filtered_data = [ + entry for entry in data + if datetime.fromisoformat(entry['timestamp']) > cutoff + ] + if len(filtered_data) < len(data): + logger.info(f"Filtered {len(data) - len(filtered_data)} expired entries from {file_path}") + save_json_file(file_path, filtered_data) # Save filtered data + data = filtered_data + + logger.info(f"Loaded {len(data)} valid entries from {file_path}") + return data except json.JSONDecodeError as e: - logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.") - with open(file_path, 'w') as f: - json.dump([], f) - return [] + logger.error(f"Invalid JSON in {file_path}: {str(e)}. Resetting to default.") + save_json_file(file_path, default) + return default except Exception as e: - logging.error(f"Failed to load JSON file {file_path}: {e}") - return [] - -def save_json_file(filename, data): - """Save data to a JSON file with locking to prevent corruption, without resetting on error.""" - lock = FileLock(f"{filename}.lock") + logger.error(f"Failed to load {file_path}: {str(e)}. Returning default.") + return default + +def save_json_file(file_path, data, timestamp=None): + """ + Save data to JSON file atomically. If timestamp is provided, append as an entry. + """ + logger = logging.getLogger(__name__) try: - with lock: - # Read existing data - existing_data = [] - try: - if os.path.exists(filename): - with open(filename, 'r') as f: - existing_data = json.load(f) - if not isinstance(existing_data, list): - logging.warning(f"Data in {filename} is not a list. Resetting to empty list.") - existing_data = [] - except (json.JSONDecodeError, FileNotFoundError) as e: - # If the file is corrupted, log the error and skip writing to preserve existing data - if isinstance(e, json.JSONDecodeError): - logging.error(f"Invalid JSON in {filename}: {e}. Skipping write to preserve existing data.") - return - logging.warning(f"File {filename} not found: {e}. Starting with empty list.") - - # Append new data if it's not already present - if isinstance(data, list): - existing_data.extend([item for item in data if item not in existing_data]) + # If timestamp is provided, append as a new entry + if timestamp: + current_data = load_json_file(file_path) + new_entry = {'title': data, 'timestamp': timestamp} + if new_entry not in current_data: # Avoid duplicates + current_data.append(new_entry) + data = current_data else: - if data not in existing_data: - existing_data.append(data) - - # Write back to file - with open(filename, 'w') as f: - json.dump(existing_data, f, indent=2) - logging.info(f"Saved data to {filename}") - except Exception as e: - logging.error(f"Failed to save to {filename}: {e}", exc_info=True) - raise + logger.info(f"Entry {data} already exists in {file_path}") + return True + + # Validate JSON + json.dumps(data) + + # Write to temp file + temp_file = tempfile.NamedTemporaryFile('w', delete=False, encoding='utf-8') + with open(temp_file.name, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + + # Atomically move to target + shutil.move(temp_file.name, file_path) + logger.info(f"Saved data to {file_path}") + return True + except (json.JSONDecodeError, IOError) as e: + logger.error(f"Failed to save {file_path}: {str(e)}") + return False def generate_article_tweet(author, post, persona): title = post["title"] @@ -1136,64 +1126,61 @@ def check_rate_limit(response): logging.warning(f"Failed to parse rate limit headers: {e}") return None, None -def check_author_rate_limit(author): - """Check if the author can post based on Twitter API rate limits.""" - from foodie_config import X_API_CREDENTIALS - import tweepy - - credentials = X_API_CREDENTIALS.get(author["username"]) - if not credentials: - logging.error(f"No X credentials found for {author['username']}") - return False, 0, 0 - - try: - client = tweepy.Client( - consumer_key=credentials["api_key"], - consumer_secret=credentials["api_secret"], - access_token=credentials["access_token"], - access_token_secret=credentials["access_token_secret"] - ) - # Make a lightweight API call to check rate limits - response = client.get_me() - remaining, reset = check_rate_limit(response) - if remaining is None or reset is None: - logging.warning(f"Could not determine rate limits for {author['username']}. Assuming rate-limited.") - return False, 0, 0 - can_post = remaining > 0 - if not can_post: - logging.info(f"Author {author['username']} is rate-limited. Remaining: {remaining}, Reset at: {reset}") - return can_post, remaining, reset - except tweepy.TweepyException as e: - logging.error(f"Failed to check rate limits for {author['username']}: {e}") - return False, 0, 0 - except Exception as e: - logging.error(f"Unexpected error checking rate limits for {author['username']}: {e}", exc_info=True) - return False, 0, 0 +def check_author_rate_limit(author, max_requests=10, window_seconds=3600): + """ + Check if an author is rate-limited. + """ + logger = logging.getLogger(__name__) + rate_limit_file = '/home/shane/foodie_automator/rate_limit_info.json' + rate_limit_info = load_json_file(rate_limit_file, default={}) + + if author['username'] not in rate_limit_info: + rate_limit_info[author['username']] = { + 'remaining': max_requests, + 'reset': time.time() + } + + info = rate_limit_info[author['username']] + current_time = time.time() + + # Reset if window expired + if current_time >= info['reset']: + info['remaining'] = max_requests + info['reset'] = current_time + window_seconds + logger.info(f"Reset rate limit for {author['username']}: {max_requests} requests available") + save_json_file(rate_limit_file, rate_limit_info) + + if info['remaining'] <= 0: + reset_time = datetime.fromtimestamp(info['reset'], tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S') + logger.info(f"Author {author['username']} is rate-limited. Remaining: {info['remaining']}, Reset at: {reset_time}") + return True + + # Decrement remaining requests + info['remaining'] -= 1 + save_json_file(rate_limit_file, rate_limit_info) + logger.info(f"Updated rate limit for {author['username']}: {info['remaining']} requests remaining") + return False def get_next_author_round_robin(): - """Select the next author in round-robin fashion, respecting rate limits.""" - global last_author_index - authors = AUTHORS - num_authors = len(authors) - if num_authors == 0: - logging.error("No authors available in AUTHORS list.") - return None - - # Try each author in round-robin order - for i in range(num_authors): - last_author_index = (last_author_index + 1) % num_authors - author = authors[last_author_index] - can_post, remaining, reset = check_author_rate_limit(author) - if can_post: - logging.info(f"Author {author['username']} can post") + """ + Select the next author using round-robin, respecting rate limits. + """ + from foodie_config import AUTHORS + global round_robin_index + logger = logging.getLogger(__name__) + + for _ in range(len(AUTHORS)): + author = AUTHORS[round_robin_index % len(AUTHORS)] + round_robin_index = (round_robin_index + 1) % len(AUTHORS) + + if not check_author_rate_limit(author): + logger.info(f"Selected author via round-robin: {author['username']}") return author - else: - reset_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(reset)) if reset else "Unknown" - logging.info(f"Skipping author {author['username']} due to rate limit. Remaining: {remaining}, Reset at: {reset_time}") - - # If no authors are available, return None instead of falling back - logging.warning("No authors available due to rate limits. Skipping posting.") - return None + + logger.warning("No authors available due to rate limits. Selecting a random author as fallback.") + author = random.choice(AUTHORS) + logger.info(f"Selected author via random fallback: {author['username']}") + return author def prepare_post_data(summary, title, main_topic=None): try: