From 01bab56eb6b55eb2a60846e0170414f650250590 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 8 May 2025 16:02:23 +1000 Subject: [PATCH] update google & reddit rate limiting realtime --- foodie_automator_google.py | 121 ++++++++++++++----------------------- foodie_automator_reddit.py | 118 ++++++++++++++---------------------- foodie_automator_rss.py | 32 +++------- foodie_utils.py | 37 ++++++------ 4 files changed, 117 insertions(+), 191 deletions(-) diff --git a/foodie_automator_google.py b/foodie_automator_google.py index 2f500bf..822fe5d 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -248,17 +248,12 @@ def fetch_duckduckgo_news_context(trend_title, hours=24): logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts") return trend_title -def curate_from_google_trends(): +def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images): try: - global posted_titles_data, posted_titles, used_images - posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) - posted_titles = set(entry["title"] for entry in posted_titles_data) - used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) - logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") + logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") - trends = fetch_google_trends() + trends = scrape_google_trends() if not trends: - print("No Google Trends data available") logging.info("No Google Trends data available") return None, None, False @@ -273,24 +268,20 @@ def curate_from_google_trends(): original_source = f'{source_name}' if title in posted_titles: - print(f"Skipping already posted trend: {title}") logging.info(f"Skipping already posted trend: {title}") attempts += 1 continue - print(f"Trying Google Trend: {title} from {source_name}") logging.info(f"Trying Google Trend: {title} from {source_name}") try: image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) except Exception as e: - print(f"Smart image/filter error for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 continue if skip: - print(f"Skipping filtered trend: {title}") logging.info(f"Skipping filtered trend: {title}") attempts += 1 continue @@ -298,10 +289,8 @@ def curate_from_google_trends(): ddg_context = fetch_duckduckgo_news_context(title) scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" interest_score = is_interesting(scoring_content) - print(f"Interest Score for '{title[:50]}...': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}") if interest_score < 6: - print(f"Trend Interest Too Low: {interest_score}") logging.info(f"Trend Interest Too Low: {interest_score}") attempts += 1 continue @@ -324,14 +313,12 @@ def curate_from_google_trends(): extra_prompt=extra_prompt ) if not final_summary: - print(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'") attempts += 1 continue final_summary = insert_link_naturally(final_summary, source_name, link) - # Use round-robin author selection author = get_next_author_round_robin() author_username = author["username"] logging.info(f"Selected author via round-robin: {author_username}") @@ -346,11 +333,9 @@ def curate_from_google_trends(): category = post_data["categories"][0] image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) if not image_url: - print(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") image_url, image_source, uploader, page_url = get_image(image_query) if not image_url: - print(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None uploader = None @@ -358,9 +343,11 @@ def curate_from_google_trends(): hook = get_dynamic_hook(post_data["title"]).strip() share_prompt = get_viral_share_prompt(post_data["title"], final_summary) + share_text = f"Check out this foodie gem! {post_data['title']}" + share_text_encoded = quote(share_text) share_links_template = ( f'

{share_prompt} ' - f' ' + f' ' f'

' ) post_data["content"] = f"{final_summary}\n\n{share_links_template}" @@ -382,72 +369,53 @@ def curate_from_google_trends(): should_post_tweet=True ) if not post_id: - print(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'") attempts += 1 continue + + # Update post with actual post_url + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + post_data["post_id"] = post_id + post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=None, # Skip image re-upload + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) except Exception as e: - print(f"WordPress posting error for '{title}': {e}") logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) attempts += 1 continue finally: is_posting = False - if post_id: - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" - is_posting = True - try: - post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - post_id=post_id, - should_post_tweet=False - ) - except Exception as e: - print(f"Failed to update WordPress post '{title}' with share links: {e}") - logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) - finally: - is_posting = False - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") - return post_data, category, True - - attempts += 1 - print(f"WP posting failed for '{post_data['title']}'") - logging.info(f"WP posting failed for '{post_data['title']}'") - - print("No interesting Google Trend found after attempts") + timestamp = datetime.now(timezone.utc).isoformat() + save_json_file(POSTED_TITLES_FILE, title, timestamp) + posted_titles.add(title) + logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") + + if image_url: + save_json_file(USED_IMAGES_FILE, image_url, timestamp) + used_images.add(image_url) + logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + + logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") + return post_data, category, True + logging.info("No interesting Google Trend found after attempts") return None, None, False except Exception as e: logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True) - print(f"Unexpected error in curate_from_google_trends: {e}") return None, None, False def run_google_trends_automator(): @@ -455,12 +423,15 @@ def run_google_trends_automator(): try: lock_fd = acquire_lock() logging.info("***** Google Trends Automator Launched *****") - geo_list = ['US', 'GB', 'AU'] - post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list) + # Load JSON files once + posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) + posted_titles = set(entry["title"] for entry in posted_titles_data) + used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) + used_images = set(entry["title"] for entry in used_images_data if "title" in entry) + post_data, category, should_continue = curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images) if not post_data: logging.info("No postable Google Trend found") - else: - logging.info("Completed Google Trends run") + logging.info("Completed Google Trends run") return post_data, category, should_continue except Exception as e: logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True) diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index 9313dac..a619739 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -268,17 +268,12 @@ def fetch_reddit_posts(): logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True) return [] -def curate_from_reddit(): +def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images): try: - global posted_titles_data, posted_titles, used_images - posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) - posted_titles = set(entry["title"] for entry in posted_titles_data) - used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) - logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") + logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") posts = fetch_reddit_posts() if not posts: - print("No Reddit posts available") logging.info("No Reddit posts available") return None, None, False @@ -293,24 +288,20 @@ def curate_from_reddit(): original_source = f'{source_name}' if title in posted_titles: - print(f"Skipping already posted Reddit post: {title}") logging.info(f"Skipping already posted Reddit post: {title}") attempts += 1 continue - print(f"Trying Reddit Post: {title} from {source_name}") logging.info(f"Trying Reddit Post: {title} from {source_name}") try: image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) except Exception as e: - print(f"Smart image/filter error for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 continue if skip: - print(f"Skipping filtered Reddit post: {title}") logging.info(f"Skipping filtered Reddit post: {title}") attempts += 1 continue @@ -318,10 +309,8 @@ def curate_from_reddit(): ddg_context = fetch_duckduckgo_news_context(title) scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" interest_score = is_interesting(scoring_content) - print(f"Interest Score for '{title[:50]}...': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}") if interest_score < 6: - print(f"Reddit Interest Too Low: {interest_score}") logging.info(f"Reddit Interest Too Low: {interest_score}") attempts += 1 continue @@ -344,14 +333,12 @@ def curate_from_reddit(): extra_prompt=extra_prompt ) if not final_summary: - print(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'") attempts += 1 continue final_summary = insert_link_naturally(final_summary, source_name, link) - # Use round-robin author selection author = get_next_author_round_robin() author_username = author["username"] logging.info(f"Selected author via round-robin: {author_username}") @@ -366,11 +353,9 @@ def curate_from_reddit(): category = post_data["categories"][0] image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) if not image_url: - print(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") image_url, image_source, uploader, page_url = get_image(image_query) if not image_url: - print(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None uploader = None @@ -378,9 +363,11 @@ def curate_from_reddit(): hook = get_dynamic_hook(post_data["title"]).strip() share_prompt = get_viral_share_prompt(post_data["title"], final_summary) + share_text = f"Check out this foodie gem! {post_data['title']}" + share_text_encoded = quote(share_text) share_links_template = ( f'

{share_prompt} ' - f' ' + f' ' f'

' ) post_data["content"] = f"{final_summary}\n\n{share_links_template}" @@ -402,72 +389,53 @@ def curate_from_reddit(): should_post_tweet=True ) if not post_id: - print(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'") attempts += 1 continue + + # Update post with actual post_url + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + post_data["post_id"] = post_id + post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=None, # Skip image re-upload + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) except Exception as e: - print(f"WordPress posting error for '{title}': {e}") logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) attempts += 1 continue finally: is_posting = False - if post_id: - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" - is_posting = True - try: - post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - post_id=post_id, - should_post_tweet=False - ) - except Exception as e: - print(f"Failed to update WordPress post '{title}' with share links: {e}") - logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) - finally: - is_posting = False - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") - return post_data, category, True - - attempts += 1 - print(f"WP posting failed for '{post_data['title']}'") - logging.info(f"WP posting failed for '{post_data['title']}'") - - print("No interesting Reddit post found after attempts") + timestamp = datetime.now(timezone.utc).isoformat() + save_json_file(POSTED_TITLES_FILE, title, timestamp) + posted_titles.add(title) + logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") + + if image_url: + save_json_file(USED_IMAGES_FILE, image_url, timestamp) + used_images.add(image_url) + logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + + logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") + return post_data, category, True + logging.info("No interesting Reddit post found after attempts") return None, None, False except Exception as e: logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True) - print(f"Unexpected error in curate_from_reddit: {e}") return None, None, False def run_reddit_automator(): @@ -475,11 +443,15 @@ def run_reddit_automator(): try: lock_fd = acquire_lock() logging.info("***** Reddit Automator Launched *****") - post_data, category, should_continue = curate_from_reddit() + # Load JSON files once + posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) + posted_titles = set(entry["title"] for entry in posted_titles_data) + used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) + used_images = set(entry["title"] for entry in used_images_data if "title" in entry) + post_data, category, should_continue = curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images) if not post_data: logging.info("No postable Reddit article found") - else: - logging.info("Completed Reddit run") + logging.info("Completed Reddit run") return post_data, category, should_continue except Exception as e: logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True) diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 246b990..05643c7 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -256,14 +256,15 @@ def fetch_duckduckgo_news_context(title, hours=24): def curate_from_rss(): try: global posted_titles_data, posted_titles, used_images + # Load JSON files once posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) - used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) + used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) + used_images = set(entry["title"] for entry in used_images_data if "title" in entry) logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") articles = fetch_rss_feeds() if not articles: - print("No RSS articles available") logging.info("No RSS articles available") return None, None, random.randint(600, 1800) @@ -278,24 +279,20 @@ def curate_from_rss(): original_source = f'{source_name}' if title in posted_titles: - print(f"Skipping already posted article: {title}") logging.info(f"Skipping already posted article: {title}") attempts += 1 continue - print(f"Trying RSS Article: {title} from {source_name}") logging.info(f"Trying RSS Article: {title} from {source_name}") try: image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) except Exception as e: - print(f"Smart image/filter error for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 continue if skip: - print(f"Skipping filtered RSS article: {title}") logging.info(f"Skipping filtered RSS article: {title}") attempts += 1 continue @@ -303,10 +300,8 @@ def curate_from_rss(): ddg_context = fetch_duckduckgo_news_context(title) scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" interest_score = is_interesting(scoring_content) - print(f"Interest Score for '{title[:50]}...': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}") if interest_score < 6: - print(f"RSS Interest Too Low: {interest_score}") logging.info(f"RSS Interest Too Low: {interest_score}") attempts += 1 continue @@ -329,7 +324,6 @@ def curate_from_rss(): extra_prompt=extra_prompt ) if not final_summary: - print(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'") attempts += 1 continue @@ -351,11 +345,9 @@ def curate_from_rss(): category = post_data["categories"][0] image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) if not image_url: - print(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") image_url, image_source, uploader, page_url = get_image(image_query) if not image_url: - print(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None uploader = None @@ -371,7 +363,7 @@ def curate_from_rss(): f'

' ) - # Prepare post content with share links (to be updated later with post_url) + # Prepare post content with share links placeholder post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting @@ -391,22 +383,21 @@ def curate_from_rss(): should_post_tweet=True ) if not post_id: - print(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'") attempts += 1 continue - # Update content with actual post_url + # Update post with actual post_url post_url_encoded = quote(post_url) share_links = share_links_template.format(post_url=post_url_encoded) post_data["content"] = f"{final_summary}\n\n{share_links}" - post_data["post_id"] = post_id # For update + post_data["post_id"] = post_id post_to_wp( post_data=post_data, category=category, link=link, author=author, - image_url=None, # No need to re-upload image + image_url=None, # Skip image re-upload original_source=original_source, image_source=image_source, uploader=uploader, @@ -416,7 +407,6 @@ def curate_from_rss(): should_post_tweet=False ) except Exception as e: - print(f"WordPress posting error for '{title}': {e}") logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) attempts += 1 continue @@ -426,29 +416,23 @@ def curate_from_rss(): timestamp = datetime.now(timezone.utc).isoformat() save_json_file(POSTED_TITLES_FILE, title, timestamp) posted_titles.add(title) - print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") if image_url: save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) - print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") return post_data, category, random.randint(0, 1800) attempts += 1 - print(f"WP posting failed for '{post_data['title']}'") logging.info(f"WP posting failed for '{post_data['title']}'") - print("No interesting RSS article found after attempts") logging.info("No interesting RSS article found after attempts") return None, None, random.randint(600, 1800) except Exception as e: logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) - print(f"Unexpected error in curate_from_rss: {e}") return None, None, random.randint(600, 1800) def run_rss_automator(): @@ -461,7 +445,7 @@ def run_rss_automator(): logging.info("No postable RSS article found") logging.info(f"Completed run with sleep time: {sleep_time} seconds") time.sleep(sleep_time) - return post_data, category, sleep_time + return post_data, category, fixes except Exception as e: logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) return None, None, random.randint(600, 1800) diff --git a/foodie_utils.py b/foodie_utils.py index 68803bf..4b1b7d1 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -857,24 +857,22 @@ if os.path.exists(used_images_file): # Function to save used_images to file def save_used_images(): + """ + Save used_images to used_images.json as a JSON array, preserving timestamps. + """ try: - # Load existing entries to preserve timestamps - entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24) - existing_entries = {entry["title"]: entry for entry in entries if isinstance(entry, dict) and "title" in entry} - - # Create new entries for used_images + # Create entries for used_images timestamp = datetime.now(timezone.utc).isoformat() - updated_entries = [] - for url in used_images: - if url in existing_entries: - updated_entries.append(existing_entries[url]) - else: - updated_entries.append({"title": url, "timestamp": timestamp}) - - with open(used_images_file, 'w') as f: - for entry in updated_entries: - f.write(json.dumps(entry) + '\n') - logging.info(f"Saved {len(updated_entries)} used image URLs to {used_images_file}") + entries = [ + {"title": url, "timestamp": entry.get("timestamp", timestamp)} + for url, entry in [ + (url, next((e for e in load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24) if e["title"] == url), {})) + for url in used_images + ] + ] + # Use save_json_file for atomic write + save_json_file(used_images_file, entries) + logging.info(f"Saved {len(entries)} used image URLs to {used_images_file}") except Exception as e: logging.warning(f"Failed to save used images to {used_images_file}: {e}") @@ -1136,17 +1134,18 @@ def check_author_rate_limit(author, max_requests=10, window_seconds=3600): rate_limit_info = load_json_file(rate_limit_file, default={}) username = author['username'] - if username not in rate_limit_info: + if username not in rate_limit_info or not isinstance(rate_limit_info[username].get('reset'), (int, float)): rate_limit_info[username] = { 'remaining': max_requests, 'reset': time.time() } + logger.info(f"Initialized rate limit for {username}: {max_requests} requests available") info = rate_limit_info[username] current_time = time.time() - # Reset if window expired - if current_time >= info['reset']: + # Reset if window expired or timestamp is invalid (e.g., 1970) + if current_time >= info['reset'] or info['reset'] < 1000000000: # 1000000000 is ~2001 info['remaining'] = max_requests info['reset'] = current_time + window_seconds logger.info(f"Reset rate limit for {username}: {max_requests} requests available")