Rate Limit Handling
This commit is contained in:
+12
-13
@@ -36,9 +36,21 @@ import fcntl
|
|||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
# Define constants at the top
|
||||||
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
|
||||||
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||||
|
EXPIRATION_HOURS = 24
|
||||||
|
IMAGE_EXPIRATION_DAYS = 7
|
||||||
|
|
||||||
is_posting = False
|
is_posting = False
|
||||||
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock"
|
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock"
|
||||||
|
|
||||||
|
# Load JSON files after constants are defined
|
||||||
|
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
||||||
|
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
|
||||||
|
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
||||||
|
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
logging.info("Received termination signal, checking if safe to exit...")
|
logging.info("Received termination signal, checking if safe to exit...")
|
||||||
if is_posting:
|
if is_posting:
|
||||||
@@ -55,10 +67,6 @@ LOG_PRUNE_DAYS = 30
|
|||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
RETRY_BACKOFF = 2
|
RETRY_BACKOFF = 2
|
||||||
|
|
||||||
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
|
||||||
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
|
||||||
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
if os.path.exists(LOG_FILE):
|
if os.path.exists(LOG_FILE):
|
||||||
with open(LOG_FILE, 'r') as f:
|
with open(LOG_FILE, 'r') as f:
|
||||||
@@ -105,15 +113,6 @@ def setup_logging():
|
|||||||
|
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
|
|
||||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
|
||||||
EXPIRATION_HOURS = 24
|
|
||||||
IMAGE_EXPIRATION_DAYS = 7
|
|
||||||
|
|
||||||
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
|
||||||
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
|
||||||
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
|
||||||
|
|
||||||
def acquire_lock():
|
def acquire_lock():
|
||||||
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
lock_fd = open(LOCK_FILE, 'w')
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
|
|||||||
@@ -125,7 +125,16 @@ def acquire_lock():
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
def clean_reddit_title(title):
|
def clean_reddit_title(title):
|
||||||
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
|
"""Clean Reddit post title by removing prefixes, newlines, and special characters."""
|
||||||
|
if not title or not isinstance(title, str):
|
||||||
|
logging.warning(f"Invalid title received: {title}")
|
||||||
|
return ""
|
||||||
|
# Remove [prefixes], newlines, and excessive whitespace
|
||||||
|
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title) # Remove [prefix]
|
||||||
|
cleaned_title = re.sub(r'\n+', ' ', cleaned_title) # Replace newlines with space
|
||||||
|
cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip() # Normalize spaces
|
||||||
|
# Remove special characters (keep alphanumeric, spaces, and basic punctuation)
|
||||||
|
cleaned_title = re.sub(r'[^\w\s.,!?-]', '', cleaned_title)
|
||||||
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
|
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
|
||||||
return cleaned_title
|
return cleaned_title
|
||||||
|
|
||||||
@@ -223,6 +232,7 @@ def fetch_duckduckgo_news_context(title, hours=24):
|
|||||||
return title
|
return title
|
||||||
|
|
||||||
def fetch_reddit_posts():
|
def fetch_reddit_posts():
|
||||||
|
"""Fetch Reddit posts from specified subreddits, filtering low-quality posts."""
|
||||||
try:
|
try:
|
||||||
reddit = praw.Reddit(
|
reddit = praw.Reddit(
|
||||||
client_id=REDDIT_CLIENT_ID,
|
client_id=REDDIT_CLIENT_ID,
|
||||||
@@ -244,15 +254,26 @@ def fetch_reddit_posts():
|
|||||||
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
||||||
continue
|
continue
|
||||||
cleaned_title = clean_reddit_title(submission.title)
|
cleaned_title = clean_reddit_title(submission.title)
|
||||||
|
if not cleaned_title or len(cleaned_title) < 5:
|
||||||
|
logging.info(f"Skipping post with invalid or short title: {submission.title}")
|
||||||
|
continue
|
||||||
|
# Filter out posts with empty or very short summaries
|
||||||
|
summary = submission.selftext.strip() if submission.selftext else ""
|
||||||
|
if len(summary) < 20 and not submission.url.endswith(('.jpg', '.jpeg', '.png', '.gif')):
|
||||||
|
logging.info(f"Skipping post with insufficient summary: {cleaned_title}")
|
||||||
|
continue
|
||||||
|
# Fetch top comments for additional context
|
||||||
|
top_comments = get_top_comments(f"https://www.reddit.com{submission.permalink}", reddit)
|
||||||
articles.append({
|
articles.append({
|
||||||
"title": cleaned_title,
|
"title": cleaned_title,
|
||||||
"raw_title": submission.title,
|
"raw_title": submission.title,
|
||||||
"link": f"https://www.reddit.com{submission.permalink}",
|
"link": f"https://www.reddit.com{submission.permalink}",
|
||||||
"summary": submission.selftext,
|
"summary": summary,
|
||||||
"feed_title": get_clean_source_name(subreddit_name),
|
"feed_title": get_clean_source_name(subreddit_name),
|
||||||
"pub_date": pub_date,
|
"pub_date": pub_date,
|
||||||
"upvotes": submission.score,
|
"upvotes": submission.score,
|
||||||
"comment_count": submission.num_comments
|
"comment_count": submission.num_comments,
|
||||||
|
"top_comments": top_comments
|
||||||
})
|
})
|
||||||
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
|
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
|
||||||
break
|
break
|
||||||
@@ -283,16 +304,18 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
|
|||||||
title = post["title"]
|
title = post["title"]
|
||||||
link = post.get("link", "")
|
link = post.get("link", "")
|
||||||
summary = post.get("summary", "")
|
summary = post.get("summary", "")
|
||||||
source_name = post.get("source", "Reddit")
|
source_name = post.get("feed_title", "Reddit")
|
||||||
original_source = f'<a href="{link}">{source_name}</a>'
|
original_source = f'<a href="{link}">{source_name}</a>'
|
||||||
original_url = link # Store for fallback
|
original_url = link
|
||||||
|
upvotes = post.get("upvotes", 0)
|
||||||
|
comment_count = post.get("comment_count", 0)
|
||||||
|
top_comments = post.get("top_comments", [])
|
||||||
|
|
||||||
if title in posted_titles:
|
if title in posted_titles:
|
||||||
logging.info(f"Skipping already posted Reddit post: {title}")
|
logging.info(f"Skipping already posted Reddit post: {title}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check author availability before GPT calls
|
|
||||||
author = get_next_author_round_robin()
|
author = get_next_author_round_robin()
|
||||||
if not author:
|
if not author:
|
||||||
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
|
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
|
||||||
@@ -317,8 +340,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
ddg_context = fetch_duckduckgo_news_context(title)
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
# Log full scoring content for debugging
|
||||||
interest_score = is_interesting(scoring_content)
|
scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
|
||||||
|
logging.debug(f"Scoring content for '{title}': {scoring_content}")
|
||||||
|
interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 6:
|
||||||
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
||||||
|
|||||||
+8
-15
@@ -1324,31 +1324,20 @@ def check_author_rate_limit(author, max_tweets=17, tweet_window_seconds=86400):
|
|||||||
author_info = rate_limit_info[username]
|
author_info = rate_limit_info[username]
|
||||||
script_run_id = author_info.get('script_run_id', 0)
|
script_run_id = author_info.get('script_run_id', 0)
|
||||||
|
|
||||||
# Check if quota has reset based on previous reset time
|
|
||||||
reset = author_info.get('tweet_reset', current_time + tweet_window_seconds)
|
|
||||||
if current_time >= reset:
|
|
||||||
logger.info(f"Quota reset for {username}, restoring to {max_tweets} tweets")
|
|
||||||
author_info['tweet_remaining'] = max_tweets
|
|
||||||
author_info['tweet_reset'] = current_time + tweet_window_seconds
|
|
||||||
author_info['tweets_posted_in_run'] = 0
|
|
||||||
author_info['script_run_id'] = check_author_rate_limit.script_run_id
|
|
||||||
rate_limit_info[username] = author_info
|
|
||||||
save_json_file(rate_limit_file, rate_limit_info)
|
|
||||||
|
|
||||||
# If script restarted or new author, post a test tweet to sync quota
|
# If script restarted or new author, post a test tweet to sync quota
|
||||||
if script_run_id != check_author_rate_limit.script_run_id:
|
if script_run_id != check_author_rate_limit.script_run_id:
|
||||||
logger.info(f"Script restart detected for {username}, posting test tweet to sync quota")
|
logger.info(f"Script restart detected for {username}, posting test tweet to sync quota")
|
||||||
remaining, api_reset = get_x_rate_limit_status(author)
|
remaining, api_reset = get_x_rate_limit_status(author)
|
||||||
if remaining is None or api_reset is None:
|
if remaining is None or api_reset is None:
|
||||||
# Fallback: Use last known quota or assume 0 remaining
|
# Fallback: Use last known quota or assume 0 remaining
|
||||||
if current_time < author_info.get('tweet_reset', 0):
|
if current_time < author_info.get('tweet_reset', current_time + tweet_window_seconds):
|
||||||
remaining = author_info.get('tweet_remaining', 0)
|
remaining = author_info.get('tweet_remaining', 0)
|
||||||
reset = author_info.get('tweet_reset', current_time + tweet_window_seconds)
|
reset = author_info.get('tweet_reset', current_time + tweet_window_seconds)
|
||||||
logger.warning(f"Test tweet failed for {username}, using last known quota: {remaining} remaining")
|
logger.warning(f"Test tweet failed for {username}, using last known quota: {remaining} remaining")
|
||||||
else:
|
else:
|
||||||
remaining = max_tweets
|
remaining = 0 # Assume exhausted if API fails and reset time has passed
|
||||||
reset = current_time + tweet_window_seconds
|
reset = current_time + tweet_window_seconds
|
||||||
logger.warning(f"Test tweet failed for {username}, resetting quota to {max_tweets}")
|
logger.warning(f"Test tweet failed for {username}, assuming quota exhausted")
|
||||||
else:
|
else:
|
||||||
remaining = min(remaining, max_tweets) # Ensure within Free tier limit
|
remaining = min(remaining, max_tweets) # Ensure within Free tier limit
|
||||||
reset = api_reset
|
reset = api_reset
|
||||||
@@ -1360,9 +1349,13 @@ def check_author_rate_limit(author, max_tweets=17, tweet_window_seconds=86400):
|
|||||||
author_info['script_run_id'] = check_author_rate_limit.script_run_id
|
author_info['script_run_id'] = check_author_rate_limit.script_run_id
|
||||||
rate_limit_info[username] = author_info
|
rate_limit_info[username] = author_info
|
||||||
save_json_file(rate_limit_file, rate_limit_info)
|
save_json_file(rate_limit_file, rate_limit_info)
|
||||||
|
else:
|
||||||
|
# Use existing quota without resetting
|
||||||
|
remaining = author_info.get('tweet_remaining', max_tweets)
|
||||||
|
reset = author_info.get('tweet_reset', current_time + tweet_window_seconds)
|
||||||
|
|
||||||
# Calculate remaining tweets
|
# Calculate remaining tweets
|
||||||
remaining = author_info['tweet_remaining'] - author_info['tweets_posted_in_run']
|
remaining = remaining - author_info.get('tweets_posted_in_run', 0)
|
||||||
|
|
||||||
can_post = remaining > 0
|
can_post = remaining > 0
|
||||||
if not can_post:
|
if not can_post:
|
||||||
|
|||||||
Reference in New Issue
Block a user