|
|
|
|
@ -8,6 +8,7 @@ from PIL import Image |
|
|
|
|
import pytesseract |
|
|
|
|
import io |
|
|
|
|
import tempfile |
|
|
|
|
import shutil |
|
|
|
|
import requests |
|
|
|
|
import time |
|
|
|
|
import openai |
|
|
|
|
@ -28,6 +29,8 @@ from foodie_config import ( |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
last_author_index = -1 |
|
|
|
|
# Global to track round-robin index |
|
|
|
|
round_robin_index = 0 |
|
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
@ -36,86 +39,73 @@ IMAGE_UPLOAD_TIMEOUT = 30 # Added to fix NameError |
|
|
|
|
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py |
|
|
|
|
|
|
|
|
|
def load_json_file(file_path, expiration_hours=None): |
|
|
|
|
"""Load JSON file and return its contents as a list.""" |
|
|
|
|
""" |
|
|
|
|
Load JSON file, optionally filtering out expired entries. |
|
|
|
|
""" |
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
default = [] |
|
|
|
|
|
|
|
|
|
if not os.path.exists(file_path): |
|
|
|
|
logger.info(f"File {file_path} does not exist. Returning default: {default}") |
|
|
|
|
return default |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
if not os.path.exists(file_path): |
|
|
|
|
logging.info(f"File {file_path} does not exist, initializing with empty list") |
|
|
|
|
with open(file_path, 'w') as f: |
|
|
|
|
json.dump([], f) |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
with open(file_path, 'r') as f: |
|
|
|
|
data = json.load(f) |
|
|
|
|
|
|
|
|
|
if not isinstance(data, list): |
|
|
|
|
logging.warning(f"Data in {file_path} is not a list, resetting to empty list") |
|
|
|
|
with open(file_path, 'w') as f: |
|
|
|
|
json.dump([], f) |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
valid_entries = [] |
|
|
|
|
if expiration_hours: |
|
|
|
|
|
|
|
|
|
if expiration_hours is not None: |
|
|
|
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours) |
|
|
|
|
for entry in data: |
|
|
|
|
try: |
|
|
|
|
timestamp_str = entry.get("timestamp") |
|
|
|
|
if timestamp_str: |
|
|
|
|
timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00')) |
|
|
|
|
if timestamp < cutoff: |
|
|
|
|
continue |
|
|
|
|
valid_entries.append(entry) |
|
|
|
|
except (ValueError, TypeError) as e: |
|
|
|
|
logging.warning(f"Skipping malformed entry in {file_path}: {e}") |
|
|
|
|
continue |
|
|
|
|
else: |
|
|
|
|
valid_entries = data |
|
|
|
|
|
|
|
|
|
logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}") |
|
|
|
|
return valid_entries |
|
|
|
|
filtered_data = [ |
|
|
|
|
entry for entry in data |
|
|
|
|
if datetime.fromisoformat(entry['timestamp']) > cutoff |
|
|
|
|
] |
|
|
|
|
if len(filtered_data) < len(data): |
|
|
|
|
logger.info(f"Filtered {len(data) - len(filtered_data)} expired entries from {file_path}") |
|
|
|
|
save_json_file(file_path, filtered_data) # Save filtered data |
|
|
|
|
data = filtered_data |
|
|
|
|
|
|
|
|
|
logger.info(f"Loaded {len(data)} valid entries from {file_path}") |
|
|
|
|
return data |
|
|
|
|
except json.JSONDecodeError as e: |
|
|
|
|
logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.") |
|
|
|
|
with open(file_path, 'w') as f: |
|
|
|
|
json.dump([], f) |
|
|
|
|
return [] |
|
|
|
|
logger.error(f"Invalid JSON in {file_path}: {str(e)}. Resetting to default.") |
|
|
|
|
save_json_file(file_path, default) |
|
|
|
|
return default |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to load JSON file {file_path}: {e}") |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
def save_json_file(filename, data): |
|
|
|
|
"""Save data to a JSON file with locking to prevent corruption, without resetting on error.""" |
|
|
|
|
lock = FileLock(f"{filename}.lock") |
|
|
|
|
logger.error(f"Failed to load {file_path}: {str(e)}. Returning default.") |
|
|
|
|
return default |
|
|
|
|
|
|
|
|
|
def save_json_file(file_path, data, timestamp=None): |
|
|
|
|
""" |
|
|
|
|
Save data to JSON file atomically. If timestamp is provided, append as an entry. |
|
|
|
|
""" |
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
try: |
|
|
|
|
with lock: |
|
|
|
|
# Read existing data |
|
|
|
|
existing_data = [] |
|
|
|
|
try: |
|
|
|
|
if os.path.exists(filename): |
|
|
|
|
with open(filename, 'r') as f: |
|
|
|
|
existing_data = json.load(f) |
|
|
|
|
if not isinstance(existing_data, list): |
|
|
|
|
logging.warning(f"Data in {filename} is not a list. Resetting to empty list.") |
|
|
|
|
existing_data = [] |
|
|
|
|
except (json.JSONDecodeError, FileNotFoundError) as e: |
|
|
|
|
# If the file is corrupted, log the error and skip writing to preserve existing data |
|
|
|
|
if isinstance(e, json.JSONDecodeError): |
|
|
|
|
logging.error(f"Invalid JSON in {filename}: {e}. Skipping write to preserve existing data.") |
|
|
|
|
return |
|
|
|
|
logging.warning(f"File {filename} not found: {e}. Starting with empty list.") |
|
|
|
|
|
|
|
|
|
# Append new data if it's not already present |
|
|
|
|
if isinstance(data, list): |
|
|
|
|
existing_data.extend([item for item in data if item not in existing_data]) |
|
|
|
|
# If timestamp is provided, append as a new entry |
|
|
|
|
if timestamp: |
|
|
|
|
current_data = load_json_file(file_path) |
|
|
|
|
new_entry = {'title': data, 'timestamp': timestamp} |
|
|
|
|
if new_entry not in current_data: # Avoid duplicates |
|
|
|
|
current_data.append(new_entry) |
|
|
|
|
data = current_data |
|
|
|
|
else: |
|
|
|
|
if data not in existing_data: |
|
|
|
|
existing_data.append(data) |
|
|
|
|
|
|
|
|
|
# Write back to file |
|
|
|
|
with open(filename, 'w') as f: |
|
|
|
|
json.dump(existing_data, f, indent=2) |
|
|
|
|
logging.info(f"Saved data to {filename}") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to save to {filename}: {e}", exc_info=True) |
|
|
|
|
raise |
|
|
|
|
logger.info(f"Entry {data} already exists in {file_path}") |
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
# Validate JSON |
|
|
|
|
json.dumps(data) |
|
|
|
|
|
|
|
|
|
# Write to temp file |
|
|
|
|
temp_file = tempfile.NamedTemporaryFile('w', delete=False, encoding='utf-8') |
|
|
|
|
with open(temp_file.name, 'w', encoding='utf-8') as f: |
|
|
|
|
json.dump(data, f, indent=2) |
|
|
|
|
|
|
|
|
|
# Atomically move to target |
|
|
|
|
shutil.move(temp_file.name, file_path) |
|
|
|
|
logger.info(f"Saved data to {file_path}") |
|
|
|
|
return True |
|
|
|
|
except (json.JSONDecodeError, IOError) as e: |
|
|
|
|
logger.error(f"Failed to save {file_path}: {str(e)}") |
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
def generate_article_tweet(author, post, persona): |
|
|
|
|
title = post["title"] |
|
|
|
|
@ -1136,64 +1126,61 @@ def check_rate_limit(response): |
|
|
|
|
logging.warning(f"Failed to parse rate limit headers: {e}") |
|
|
|
|
return None, None |
|
|
|
|
|
|
|
|
|
def check_author_rate_limit(author): |
|
|
|
|
"""Check if the author can post based on Twitter API rate limits.""" |
|
|
|
|
from foodie_config import X_API_CREDENTIALS |
|
|
|
|
import tweepy |
|
|
|
|
|
|
|
|
|
credentials = X_API_CREDENTIALS.get(author["username"]) |
|
|
|
|
if not credentials: |
|
|
|
|
logging.error(f"No X credentials found for {author['username']}") |
|
|
|
|
return False, 0, 0 |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
client = tweepy.Client( |
|
|
|
|
consumer_key=credentials["api_key"], |
|
|
|
|
consumer_secret=credentials["api_secret"], |
|
|
|
|
access_token=credentials["access_token"], |
|
|
|
|
access_token_secret=credentials["access_token_secret"] |
|
|
|
|
) |
|
|
|
|
# Make a lightweight API call to check rate limits |
|
|
|
|
response = client.get_me() |
|
|
|
|
remaining, reset = check_rate_limit(response) |
|
|
|
|
if remaining is None or reset is None: |
|
|
|
|
logging.warning(f"Could not determine rate limits for {author['username']}. Assuming rate-limited.") |
|
|
|
|
return False, 0, 0 |
|
|
|
|
can_post = remaining > 0 |
|
|
|
|
if not can_post: |
|
|
|
|
logging.info(f"Author {author['username']} is rate-limited. Remaining: {remaining}, Reset at: {reset}") |
|
|
|
|
return can_post, remaining, reset |
|
|
|
|
except tweepy.TweepyException as e: |
|
|
|
|
logging.error(f"Failed to check rate limits for {author['username']}: {e}") |
|
|
|
|
return False, 0, 0 |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Unexpected error checking rate limits for {author['username']}: {e}", exc_info=True) |
|
|
|
|
return False, 0, 0 |
|
|
|
|
def check_author_rate_limit(author, max_requests=10, window_seconds=3600): |
|
|
|
|
""" |
|
|
|
|
Check if an author is rate-limited. |
|
|
|
|
""" |
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
rate_limit_file = '/home/shane/foodie_automator/rate_limit_info.json' |
|
|
|
|
rate_limit_info = load_json_file(rate_limit_file, default={}) |
|
|
|
|
|
|
|
|
|
if author['username'] not in rate_limit_info: |
|
|
|
|
rate_limit_info[author['username']] = { |
|
|
|
|
'remaining': max_requests, |
|
|
|
|
'reset': time.time() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
info = rate_limit_info[author['username']] |
|
|
|
|
current_time = time.time() |
|
|
|
|
|
|
|
|
|
# Reset if window expired |
|
|
|
|
if current_time >= info['reset']: |
|
|
|
|
info['remaining'] = max_requests |
|
|
|
|
info['reset'] = current_time + window_seconds |
|
|
|
|
logger.info(f"Reset rate limit for {author['username']}: {max_requests} requests available") |
|
|
|
|
save_json_file(rate_limit_file, rate_limit_info) |
|
|
|
|
|
|
|
|
|
if info['remaining'] <= 0: |
|
|
|
|
reset_time = datetime.fromtimestamp(info['reset'], tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
|
logger.info(f"Author {author['username']} is rate-limited. Remaining: {info['remaining']}, Reset at: {reset_time}") |
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
# Decrement remaining requests |
|
|
|
|
info['remaining'] -= 1 |
|
|
|
|
save_json_file(rate_limit_file, rate_limit_info) |
|
|
|
|
logger.info(f"Updated rate limit for {author['username']}: {info['remaining']} requests remaining") |
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
def get_next_author_round_robin(): |
|
|
|
|
"""Select the next author in round-robin fashion, respecting rate limits.""" |
|
|
|
|
global last_author_index |
|
|
|
|
authors = AUTHORS |
|
|
|
|
num_authors = len(authors) |
|
|
|
|
if num_authors == 0: |
|
|
|
|
logging.error("No authors available in AUTHORS list.") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
# Try each author in round-robin order |
|
|
|
|
for i in range(num_authors): |
|
|
|
|
last_author_index = (last_author_index + 1) % num_authors |
|
|
|
|
author = authors[last_author_index] |
|
|
|
|
can_post, remaining, reset = check_author_rate_limit(author) |
|
|
|
|
if can_post: |
|
|
|
|
logging.info(f"Author {author['username']} can post") |
|
|
|
|
""" |
|
|
|
|
Select the next author using round-robin, respecting rate limits. |
|
|
|
|
""" |
|
|
|
|
from foodie_config import AUTHORS |
|
|
|
|
global round_robin_index |
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
for _ in range(len(AUTHORS)): |
|
|
|
|
author = AUTHORS[round_robin_index % len(AUTHORS)] |
|
|
|
|
round_robin_index = (round_robin_index + 1) % len(AUTHORS) |
|
|
|
|
|
|
|
|
|
if not check_author_rate_limit(author): |
|
|
|
|
logger.info(f"Selected author via round-robin: {author['username']}") |
|
|
|
|
return author |
|
|
|
|
else: |
|
|
|
|
reset_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(reset)) if reset else "Unknown" |
|
|
|
|
logging.info(f"Skipping author {author['username']} due to rate limit. Remaining: {remaining}, Reset at: {reset_time}") |
|
|
|
|
|
|
|
|
|
# If no authors are available, return None instead of falling back |
|
|
|
|
logging.warning("No authors available due to rate limits. Skipping posting.") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
logger.warning("No authors available due to rate limits. Selecting a random author as fallback.") |
|
|
|
|
author = random.choice(AUTHORS) |
|
|
|
|
logger.info(f"Selected author via random fallback: {author['username']}") |
|
|
|
|
return author |
|
|
|
|
|
|
|
|
|
def prepare_post_data(summary, title, main_topic=None): |
|
|
|
|
try: |
|
|
|
|
|