From 00e6354cff028cd0833dda55c0e5c4e3b0580396 Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 9 May 2025 08:45:23 +1000 Subject: [PATCH] add check once for rate limiting X --- foodie_automator_rss.py | 15 +++- foodie_utils.py | 157 ++++++++++++++++++++++++---------------- 2 files changed, 107 insertions(+), 65 deletions(-) diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 5a81acb..e689984 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -109,6 +109,9 @@ def setup_logging(): # Call setup_logging immediately setup_logging() +check_author_rate_limit.script_run_id = int(time.time()) +logging.info(f"Set script_run_id to {check_author_rate_limit.script_run_id}") + posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) @@ -417,7 +420,9 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****") - return post_data, category, random.randint(0, 1800) + # Sleep for 20 to 30 minutes (1200 to 1800 seconds) + sleep_time = random.randint(1200, 1800) + return post_data, category, sleep_time except Exception as e: logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) @@ -435,10 +440,14 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im is_posting = False logging.info("No interesting RSS article found after attempts") - return None, None, random.randint(600, 1800) + # Sleep for 20 to 30 minutes (1200 to 1800 seconds) + sleep_time = random.randint(1200, 1800) + return None, None, sleep_time except Exception as e: logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) - return None, None, random.randint(600, 1800) + # Sleep for 20 to 30 minutes (1200 to 1800 seconds) + sleep_time = random.randint(1200, 1800) + return None, None, sleep_time def run_rss_automator(): lock_fd = None diff --git a/foodie_utils.py b/foodie_utils.py index 051b014..e76f5db 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -162,23 +162,23 @@ def generate_article_tweet(author, post, persona): logging.info(f"Generated tweet: {tweet}") return tweet -def post_tweet(author, content, media_ids=None, reply_to_id=None): +def post_tweet(author, content, media_ids=None, reply_to_id=None, tweet_type="rss"): """ - Post a tweet for an author using X API v2. - Returns (tweet_id, tweet_data) if successful, (None, None) if rate-limited or failed. + Post a tweet for the given author using X API v2. + Returns (tweet_id, tweet_data) on success, (None, None) on failure. """ logger = logging.getLogger(__name__) username = author['username'] credentials = X_API_CREDENTIALS.get(username) if not credentials: - logger.error(f"No X API credentials for {username}") + logger.error(f"No X API credentials found for {username}") return None, None # Check rate limit can_post, remaining, reset = check_author_rate_limit(author) if not can_post: reset_time = datetime.fromtimestamp(reset, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S') - logger.info(f"Cannot post tweet for {username}: rate-limited. Remaining: {remaining}, Reset at: {reset_time}") + logger.info(f"Cannot post {tweet_type} tweet for {username}: rate-limited. Remaining: {remaining}, Reset at: {reset_time}") return None, None oauth = OAuth1( @@ -198,31 +198,37 @@ def post_tweet(author, content, media_ids=None, reply_to_id=None): response = requests.post(url, json=payload, auth=oauth) headers = response.headers - # Update rate limit info + # Update in-run tweet counter rate_limit_file = '/home/shane/foodie_automator/rate_limit_info.json' rate_limit_info = load_json_file(rate_limit_file, default={}) - remaining = int(headers.get('x-user-limit-24hour-remaining', remaining)) - reset = int(headers.get('x-user-limit-24hour-reset', reset)) - rate_limit_info[username] = {'tweet_remaining': remaining, 'tweet_reset': reset} - save_json_file(rate_limit_file, rate_limit_info) + if username in rate_limit_info: + author_info = rate_limit_info[username] + author_info['tweets_posted_in_run'] = author_info.get('tweets_posted_in_run', 0) + 1 + remaining = author_info['tweet_remaining'] - author_info['tweets_posted_in_run'] + rate_limit_info[username] = author_info + save_json_file(rate_limit_file, rate_limit_info) + logger.info(f"Updated in-run tweet counter for {username} ({tweet_type}): {remaining}/17 tweets remaining") + else: + logger.warning(f"Rate limit info not found for {username}, assuming quota exhausted") + remaining = 0 if response.status_code == 201: tweet_data = response.json() tweet_id = tweet_data.get('data', {}).get('id') - logger.info(f"Successfully tweeted for {username}: {content[:50]}... (ID: {tweet_id})") + logger.info(f"Successfully tweeted {tweet_type} for {username}: {content[:50]}... (ID: {tweet_id})") return tweet_id, tweet_data elif response.status_code == 429: - logger.info(f"Rate limit exceeded for {username}: {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") + logger.info(f"Rate limit exceeded for {username} ({tweet_type}): {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") return None, None elif response.status_code == 403: - logger.error(f"403 Forbidden for {username}: {response.text}") + logger.error(f"403 Forbidden for {username} ({tweet_type}): {response.text}") return None, None else: - logger.error(f"Failed to tweet for {username}: {response.status_code} - {response.text}") + logger.error(f"Failed to post {tweet_type} tweet for {username}: {response.status_code} - {response.text}") return None, None except Exception as e: - logger.error(f"Unexpected error posting tweet for {username}: {e}", exc_info=True) + logger.error(f"Unexpected error posting {tweet_type} tweet for {username}: {e}", exc_info=True) return None, None def select_best_persona(interest_score, content=""): @@ -832,7 +838,7 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im timestamp = datetime.now(timezone.utc).isoformat() save_post_to_recent(post_data["title"], post_url, wp_username, timestamp) - # Post tweet if enabled + # Post tweet if enabled if should_post_tweet: credentials = X_API_CREDENTIALS.get(post_data["author"]) if credentials: @@ -845,7 +851,7 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im "url": post_url } tweet_text = generate_article_tweet(author, tweet_post, persona) - tweet_id, tweet_data = post_tweet(author, tweet_text) + tweet_id, tweet_data = post_tweet(author, tweet_text, tweet_type="rss") if tweet_id: logger.info(f"Successfully tweeted for post: {post_data['title']} (Tweet ID: {tweet_id})") else: @@ -1162,52 +1168,81 @@ def select_best_author(content, interest_score): def check_author_rate_limit(author, max_tweets=17, tweet_window_seconds=86400): """ - Check if an author is rate-limited for tweets using real-time X API v2 data. + Check if an author can post based on their X API Free tier quota (17 tweets per 24 hours per app). + Posts a test tweet only on script restart or for new authors, then tracks tweets in rate_limit_info.json. Returns (can_post, remaining, reset_timestamp) where can_post is True if tweets are available. - Caches API results in memory for 5 minutes. - Falls back to rate_limit_info.json or assumes 1 tweet remaining if API fails. """ logger = logging.getLogger(__name__) rate_limit_file = '/home/shane/foodie_automator/rate_limit_info.json' current_time = time.time() - # In-memory cache - if not hasattr(check_author_rate_limit, "cache"): - check_author_rate_limit.cache = {} + # Load rate limit info + rate_limit_info = load_json_file(rate_limit_file, default={}) + + # Get script run ID (set at startup in foodie_automator_rss.py) + if not hasattr(check_author_rate_limit, "script_run_id"): + check_author_rate_limit.script_run_id = int(current_time) + logger.info(f"Set script_run_id to {check_author_rate_limit.script_run_id}") username = author['username'] - cache_key = f"{username}_{int(current_time // 300)}" # Cache for 5 minutes - if cache_key in check_author_rate_limit.cache: - remaining, reset = check_author_rate_limit.cache[cache_key] - logger.debug(f"Using cached rate limit for {username}: {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") - else: + # Initialize or update author entry + if username not in rate_limit_info: + rate_limit_info[username] = { + 'tweet_remaining': max_tweets, + 'tweet_reset': current_time + tweet_window_seconds, + 'tweets_posted_in_run': 0, + 'script_run_id': 0 # Force test tweet for new authors + } + + author_info = rate_limit_info[username] + script_run_id = author_info.get('script_run_id', 0) + + # If script restarted or new author, post a test tweet to sync quota + if script_run_id != check_author_rate_limit.script_run_id: + logger.info(f"Script restart detected for {username}, posting test tweet to sync quota") remaining, reset = get_x_rate_limit_status(author) if remaining is None or reset is None: - # Fallback: Load from rate_limit_info.json or assume 1 tweet remaining - rate_limit_info = load_json_file(rate_limit_file, default={}) - if username not in rate_limit_info or current_time >= rate_limit_info.get(username, {}).get('tweet_reset', 0): - rate_limit_info[username] = { - 'tweet_remaining': 1, # Allow one tweet to avoid blocking - 'tweet_reset': current_time + tweet_window_seconds - } - save_json_file(rate_limit_file, rate_limit_info) - remaining = rate_limit_info[username].get('tweet_remaining', 1) - reset = rate_limit_info[username].get('tweet_reset', current_time + tweet_window_seconds) - logger.warning(f"X API rate limit check failed for {username}, using fallback: {remaining} remaining") - check_author_rate_limit.cache[cache_key] = (remaining, reset) + # Fallback: Use last known quota or assume 0 remaining + if current_time < author_info.get('tweet_reset', 0): + remaining = author_info.get('tweet_remaining', 0) + reset = author_info.get('tweet_reset', current_time + tweet_window_seconds) + logger.warning(f"Test tweet failed for {username}, using last known quota: {remaining} remaining") + else: + remaining = max_tweets + reset = current_time + tweet_window_seconds + logger.warning(f"Test tweet failed for {username}, resetting quota to {max_tweets}") + # Update author info with synced quota + author_info = { + 'tweet_remaining': remaining, + 'tweet_reset': reset, + 'tweets_posted_in_run': 0, + 'script_run_id': check_author_rate_limit.script_run_id + } + rate_limit_info[username] = author_info + save_json_file(rate_limit_file, rate_limit_info) + + # Calculate remaining tweets based on tweets posted in this run + remaining = author_info['tweet_remaining'] - author_info['tweets_posted_in_run'] + reset = author_info['tweet_reset'] + + # Check if quota has reset + if current_time >= reset: + logger.info(f"Quota reset for {username}, restoring to {max_tweets} tweets") + remaining = max_tweets + reset = current_time + tweet_window_seconds + author_info['tweet_remaining'] = remaining + author_info['tweet_reset'] = reset + author_info['tweets_posted_in_run'] = 0 + rate_limit_info[username] = author_info + save_json_file(rate_limit_file, rate_limit_info) can_post = remaining > 0 if not can_post: reset_time = datetime.fromtimestamp(reset, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S') - logger.info(f"Author {username} is rate-limited. Remaining: {remaining}, Reset at: {reset_time}") + logger.info(f"Author {username} quota exhausted. Remaining: {remaining}, Reset at: {reset_time}") else: - logger.info(f"Rate limit for {username}: {remaining}/{max_tweets} tweets remaining") - - # Update rate_limit_info.json - rate_limit_info = load_json_file(rate_limit_file, default={}) - rate_limit_info[username] = {'tweet_remaining': remaining, 'tweet_reset': reset} - save_json_file(rate_limit_file, rate_limit_info) + logger.info(f"Quota for {username}: {remaining}/{max_tweets} tweets remaining") return can_post, remaining, reset @@ -1245,9 +1280,8 @@ def get_next_author_round_robin(): def get_x_rate_limit_status(author): """ - Check the X API v2 rate limit status for an author by attempting a test tweet. - Returns (remaining, reset) where remaining is the number of tweets left in the 24-hour window, - and reset is the Unix timestamp when the limit resets. + Check the X API Free tier rate limit by posting a test tweet. + Returns (remaining, reset) based on app-level headers (x-rate-limit-remaining, x-rate-limit-reset). Returns (None, None) if the check fails. """ logger = logging.getLogger(__name__) @@ -1265,17 +1299,17 @@ def get_x_rate_limit_status(author): ) url = 'https://api.x.com/2/tweets' payload = {'text': f'Test tweet to check rate limits for {username} - please ignore {int(time.time())}'} - + try: response = requests.post(url, json=payload, auth=oauth) headers = response.headers - # Extract rate limit info from headers - remaining = int(headers.get('x-user-limit-24hour-remaining', 0)) - reset = int(headers.get('x-user-limit-24hour-reset', 0)) - + # Extract app-level rate limit info from headers + remaining = int(headers.get('x-rate-limit-remaining', 0)) + reset = int(headers.get('x-rate-limit-reset', 0)) + if response.status_code == 201: - # Tweet posted successfully, delete it + # Delete the test tweet tweet_id = response.json().get('data', {}).get('id') if tweet_id: delete_url = f'https://api.x.com/2/tweets/{tweet_id}' @@ -1283,20 +1317,19 @@ def get_x_rate_limit_status(author): if delete_response.status_code == 200: logger.info(f"Successfully deleted test tweet {tweet_id} for {username}") else: - logger.warning(f"Failed to delete test tweet {tweet_id} for {username}: {delete_response.status_code}") + logger.warning(f"Failed to delete test tweet {tweet_id} for {username}: {delete_response.status_code} - {delete_response.text}") + logger.info(f"Rate limit for {username}: {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") + return remaining, reset elif response.status_code == 429: - # Rate limit exceeded logger.info(f"Rate limit exceeded for {username}: {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") + return remaining, reset elif response.status_code == 403: - # Forbidden (e.g., account restrictions), but headers may still provide rate limit info logger.warning(f"403 Forbidden for {username}: {response.text}, rate limit info: {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") + return remaining, reset else: logger.error(f"Unexpected response for {username}: {response.status_code} - {response.text}") return None, None - logger.info(f"Rate limit for {username}: {remaining} remaining, reset at {datetime.fromtimestamp(reset, tz=timezone.utc)}") - return remaining, reset - except Exception as e: logger.error(f"Unexpected error fetching X rate limit for {username}: {e}", exc_info=True) return None, None