update google & reddit rate limiting realtime

2025-05-08 16:02:23 +10:00
parent bfeec7a560
commit 01bab56eb6
4 changed files with 111 additions and 185 deletions
@@ -248,17 +248,12 @@ def fetch_duckduckgo_news_context(trend_title, hours=24):
    logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
    return trend_title

-def curate_from_google_trends():
+def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images):
    try:
-        global posted_titles_data, posted_titles, used_images
-        posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
-        posted_titles = set(entry["title"] for entry in posted_titles_data)
-        used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
-        logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
+        logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")

-        trends = fetch_google_trends()
+        trends = scrape_google_trends()
        if not trends:
-            print("No Google Trends data available")
            logging.info("No Google Trends data available")
            return None, None, False

@@ -273,24 +268,20 @@ def curate_from_google_trends():
            original_source = f'<a href="{link}">{source_name}</a>'

            if title in posted_titles:
-                print(f"Skipping already posted trend: {title}")
                logging.info(f"Skipping already posted trend: {title}")
                attempts += 1
                continue

-            print(f"Trying Google Trend: {title} from {source_name}")
            logging.info(f"Trying Google Trend: {title} from {source_name}")

            try:
                image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
            except Exception as e:
-                print(f"Smart image/filter error for '{title}': {e}")
                logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
                attempts += 1
                continue

            if skip:
-                print(f"Skipping filtered trend: {title}")
                logging.info(f"Skipping filtered trend: {title}")
                attempts += 1
                continue
@@ -298,10 +289,8 @@ def curate_from_google_trends():
            ddg_context = fetch_duckduckgo_news_context(title)
            scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
            interest_score = is_interesting(scoring_content)
-            print(f"Interest Score for '{title[:50]}...': {interest_score}")
            logging.info(f"Interest score for '{title}': {interest_score}")
            if interest_score < 6:
-                print(f"Trend Interest Too Low: {interest_score}")
                logging.info(f"Trend Interest Too Low: {interest_score}")
                attempts += 1
                continue
@@ -324,14 +313,12 @@ def curate_from_google_trends():
                extra_prompt=extra_prompt
            )
            if not final_summary:
-                print(f"Summary failed for '{title}'")
                logging.info(f"Summary failed for '{title}'")
                attempts += 1
                continue

            final_summary = insert_link_naturally(final_summary, source_name, link)

-            # Use round-robin author selection
            author = get_next_author_round_robin()
            author_username = author["username"]
            logging.info(f"Selected author via round-robin: {author_username}")
@@ -346,11 +333,9 @@ def curate_from_google_trends():
            category = post_data["categories"][0]
            image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
            if not image_url:
-                print(f"Flickr image fetch failed for '{image_query}', trying fallback")
                logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
                image_url, image_source, uploader, page_url = get_image(image_query)
                if not image_url:
-                    print(f"All image uploads failed for '{title}' - posting without image")
                    logging.warning(f"All image uploads failed for '{title}' - posting without image")
                    image_source = None
                    uploader = None
@@ -358,9 +343,11 @@ def curate_from_google_trends():

            hook = get_dynamic_hook(post_data["title"]).strip()
            share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
+            share_text = f"Check out this foodie gem! {post_data['title']}"
+            share_text_encoded = quote(share_text)
            share_links_template = (
                f'<p>{share_prompt} '
-                f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
+                f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
                f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
            )
            post_data["content"] = f"{final_summary}\n\n{share_links_template}"
@@ -382,32 +369,21 @@ def curate_from_google_trends():
                    should_post_tweet=True
                )
                if not post_id:
-                    print(f"Failed to post to WordPress for '{title}'")
                    logging.warning(f"Failed to post to WordPress for '{title}'")
                    attempts += 1
                    continue
-            except Exception as e:
-                print(f"WordPress posting error for '{title}': {e}")
-                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
-                attempts += 1
-                continue
-            finally:
-                is_posting = False

-            if post_id:
-                share_text = f"Check out this foodie gem! {post_data['title']}"
-                share_text_encoded = quote(share_text)
+                # Update post with actual post_url
                post_url_encoded = quote(post_url)
-                share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
+                share_links = share_links_template.format(post_url=post_url_encoded)
                post_data["content"] = f"{final_summary}\n\n{share_links}"
-                is_posting = True
-                try:
+                post_data["post_id"] = post_id
                post_to_wp(
                    post_data=post_data,
                    category=category,
                    link=link,
                    author=author,
-                        image_url=image_url,
+                    image_url=None,  # Skip image re-upload
                    original_source=original_source,
                    image_source=image_source,
                    uploader=uploader,
@@ -417,37 +393,29 @@ def curate_from_google_trends():
                    should_post_tweet=False
                )
            except Exception as e:
-                    print(f"Failed to update WordPress post '{title}' with share links: {e}")
-                    logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
+                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
+                attempts += 1
+                continue
            finally:
                is_posting = False

            timestamp = datetime.now(timezone.utc).isoformat()
            save_json_file(POSTED_TITLES_FILE, title, timestamp)
            posted_titles.add(title)
-                print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")

            if image_url:
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
-                    print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")

-                print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
            return post_data, category, True

-            attempts += 1
-            print(f"WP posting failed for '{post_data['title']}'")
-            logging.info(f"WP posting failed for '{post_data['title']}'")
-
-        print("No interesting Google Trend found after attempts")
        logging.info("No interesting Google Trend found after attempts")
        return None, None, False
    except Exception as e:
        logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
-        print(f"Unexpected error in curate_from_google_trends: {e}")
        return None, None, False

 def run_google_trends_automator():
@@ -455,11 +423,14 @@ def run_google_trends_automator():
    try:
        lock_fd = acquire_lock()
        logging.info("***** Google Trends Automator Launched *****")
-        geo_list = ['US', 'GB', 'AU']
-        post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list)
+        # Load JSON files once
+        posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
+        posted_titles = set(entry["title"] for entry in posted_titles_data)
+        used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
+        used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
+        post_data, category, should_continue = curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images)
        if not post_data:
            logging.info("No postable Google Trend found")
-        else:
        logging.info("Completed Google Trends run")
        return post_data, category, should_continue
    except Exception as e:
@@ -268,17 +268,12 @@ def fetch_reddit_posts():
        logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
        return []

-def curate_from_reddit():
+def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images):
    try:
-        global posted_titles_data, posted_titles, used_images
-        posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
-        posted_titles = set(entry["title"] for entry in posted_titles_data)
-        used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
-        logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
+        logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")

        posts = fetch_reddit_posts()
        if not posts:
-            print("No Reddit posts available")
            logging.info("No Reddit posts available")
            return None, None, False

@@ -293,24 +288,20 @@ def curate_from_reddit():
            original_source = f'<a href="{link}">{source_name}</a>'

            if title in posted_titles:
-                print(f"Skipping already posted Reddit post: {title}")
                logging.info(f"Skipping already posted Reddit post: {title}")
                attempts += 1
                continue

-            print(f"Trying Reddit Post: {title} from {source_name}")
            logging.info(f"Trying Reddit Post: {title} from {source_name}")

            try:
                image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
            except Exception as e:
-                print(f"Smart image/filter error for '{title}': {e}")
                logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
                attempts += 1
                continue

            if skip:
-                print(f"Skipping filtered Reddit post: {title}")
                logging.info(f"Skipping filtered Reddit post: {title}")
                attempts += 1
                continue
@@ -318,10 +309,8 @@ def curate_from_reddit():
            ddg_context = fetch_duckduckgo_news_context(title)
            scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
            interest_score = is_interesting(scoring_content)
-            print(f"Interest Score for '{title[:50]}...': {interest_score}")
            logging.info(f"Interest score for '{title}': {interest_score}")
            if interest_score < 6:
-                print(f"Reddit Interest Too Low: {interest_score}")
                logging.info(f"Reddit Interest Too Low: {interest_score}")
                attempts += 1
                continue
@@ -344,14 +333,12 @@ def curate_from_reddit():
                extra_prompt=extra_prompt
            )
            if not final_summary:
-                print(f"Summary failed for '{title}'")
                logging.info(f"Summary failed for '{title}'")
                attempts += 1
                continue

            final_summary = insert_link_naturally(final_summary, source_name, link)

-            # Use round-robin author selection
            author = get_next_author_round_robin()
            author_username = author["username"]
            logging.info(f"Selected author via round-robin: {author_username}")
@@ -366,11 +353,9 @@ def curate_from_reddit():
            category = post_data["categories"][0]
            image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
            if not image_url:
-                print(f"Flickr image fetch failed for '{image_query}', trying fallback")
                logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
                image_url, image_source, uploader, page_url = get_image(image_query)
                if not image_url:
-                    print(f"All image uploads failed for '{title}' - posting without image")
                    logging.warning(f"All image uploads failed for '{title}' - posting without image")
                    image_source = None
                    uploader = None
@@ -378,9 +363,11 @@ def curate_from_reddit():

            hook = get_dynamic_hook(post_data["title"]).strip()
            share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
+            share_text = f"Check out this foodie gem! {post_data['title']}"
+            share_text_encoded = quote(share_text)
            share_links_template = (
                f'<p>{share_prompt} '
-                f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
+                f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
                f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
            )
            post_data["content"] = f"{final_summary}\n\n{share_links_template}"
@@ -402,32 +389,21 @@ def curate_from_reddit():
                    should_post_tweet=True
                )
                if not post_id:
-                    print(f"Failed to post to WordPress for '{title}'")
                    logging.warning(f"Failed to post to WordPress for '{title}'")
                    attempts += 1
                    continue
-            except Exception as e:
-                print(f"WordPress posting error for '{title}': {e}")
-                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
-                attempts += 1
-                continue
-            finally:
-                is_posting = False

-            if post_id:
-                share_text = f"Check out this foodie gem! {post_data['title']}"
-                share_text_encoded = quote(share_text)
+                # Update post with actual post_url
                post_url_encoded = quote(post_url)
-                share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
+                share_links = share_links_template.format(post_url=post_url_encoded)
                post_data["content"] = f"{final_summary}\n\n{share_links}"
-                is_posting = True
-                try:
+                post_data["post_id"] = post_id
                post_to_wp(
                    post_data=post_data,
                    category=category,
                    link=link,
                    author=author,
-                        image_url=image_url,
+                    image_url=None,  # Skip image re-upload
                    original_source=original_source,
                    image_source=image_source,
                    uploader=uploader,
@@ -437,37 +413,29 @@ def curate_from_reddit():
                    should_post_tweet=False
                )
            except Exception as e:
-                    print(f"Failed to update WordPress post '{title}' with share links: {e}")
-                    logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
+                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
+                attempts += 1
+                continue
            finally:
                is_posting = False

            timestamp = datetime.now(timezone.utc).isoformat()
            save_json_file(POSTED_TITLES_FILE, title, timestamp)
            posted_titles.add(title)
-                print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")

            if image_url:
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
-                    print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")

-                print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
            return post_data, category, True

-            attempts += 1
-            print(f"WP posting failed for '{post_data['title']}'")
-            logging.info(f"WP posting failed for '{post_data['title']}'")
-
-        print("No interesting Reddit post found after attempts")
        logging.info("No interesting Reddit post found after attempts")
        return None, None, False
    except Exception as e:
        logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
-        print(f"Unexpected error in curate_from_reddit: {e}")
        return None, None, False

 def run_reddit_automator():
@@ -475,10 +443,14 @@ def run_reddit_automator():
    try:
        lock_fd = acquire_lock()
        logging.info("***** Reddit Automator Launched *****")
-        post_data, category, should_continue = curate_from_reddit()
+        # Load JSON files once
+        posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
+        posted_titles = set(entry["title"] for entry in posted_titles_data)
+        used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
+        used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
+        post_data, category, should_continue = curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images)
        if not post_data:
            logging.info("No postable Reddit article found")
-        else:
        logging.info("Completed Reddit run")
        return post_data, category, should_continue
    except Exception as e:
@@ -256,14 +256,15 @@ def fetch_duckduckgo_news_context(title, hours=24):
 def curate_from_rss():
    try:
        global posted_titles_data, posted_titles, used_images
+        # Load JSON files once
        posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
        posted_titles = set(entry["title"] for entry in posted_titles_data)
-        used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
+        used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
+        used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
        logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")

        articles = fetch_rss_feeds()
        if not articles:
-            print("No RSS articles available")
            logging.info("No RSS articles available")
            return None, None, random.randint(600, 1800)

@@ -278,24 +279,20 @@ def curate_from_rss():
            original_source = f'<a href="{link}">{source_name}</a>'

            if title in posted_titles:
-                print(f"Skipping already posted article: {title}")
                logging.info(f"Skipping already posted article: {title}")
                attempts += 1
                continue

-            print(f"Trying RSS Article: {title} from {source_name}")
            logging.info(f"Trying RSS Article: {title} from {source_name}")

            try:
                image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
            except Exception as e:
-                print(f"Smart image/filter error for '{title}': {e}")
                logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
                attempts += 1
                continue

            if skip:
-                print(f"Skipping filtered RSS article: {title}")
                logging.info(f"Skipping filtered RSS article: {title}")
                attempts += 1
                continue
@@ -303,10 +300,8 @@ def curate_from_rss():
            ddg_context = fetch_duckduckgo_news_context(title)
            scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
            interest_score = is_interesting(scoring_content)
-            print(f"Interest Score for '{title[:50]}...': {interest_score}")
            logging.info(f"Interest score for '{title}': {interest_score}")
            if interest_score < 6:
-                print(f"RSS Interest Too Low: {interest_score}")
                logging.info(f"RSS Interest Too Low: {interest_score}")
                attempts += 1
                continue
@@ -329,7 +324,6 @@ def curate_from_rss():
                extra_prompt=extra_prompt
            )
            if not final_summary:
-                print(f"Summary failed for '{title}'")
                logging.info(f"Summary failed for '{title}'")
                attempts += 1
                continue
@@ -351,11 +345,9 @@ def curate_from_rss():
            category = post_data["categories"][0]
            image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
            if not image_url:
-                print(f"Flickr image fetch failed for '{image_query}', trying fallback")
                logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
                image_url, image_source, uploader, page_url = get_image(image_query)
                if not image_url:
-                    print(f"All image uploads failed for '{title}' - posting without image")
                    logging.warning(f"All image uploads failed for '{title}' - posting without image")
                    image_source = None
                    uploader = None
@@ -371,7 +363,7 @@ def curate_from_rss():
                f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
            )

-            # Prepare post content with share links (to be updated later with post_url)
+            # Prepare post content with share links placeholder
            post_data["content"] = f"{final_summary}\n\n{share_links_template}"

            global is_posting
@@ -391,22 +383,21 @@ def curate_from_rss():
                    should_post_tweet=True
                )
                if not post_id:
-                    print(f"Failed to post to WordPress for '{title}'")
                    logging.warning(f"Failed to post to WordPress for '{title}'")
                    attempts += 1
                    continue

-                # Update content with actual post_url
+                # Update post with actual post_url
                post_url_encoded = quote(post_url)
                share_links = share_links_template.format(post_url=post_url_encoded)
                post_data["content"] = f"{final_summary}\n\n{share_links}"
-                post_data["post_id"] = post_id  # For update
+                post_data["post_id"] = post_id
                post_to_wp(
                    post_data=post_data,
                    category=category,
                    link=link,
                    author=author,
-                    image_url=None,  # No need to re-upload image
+                    image_url=None,  # Skip image re-upload
                    original_source=original_source,
                    image_source=image_source,
                    uploader=uploader,
@@ -416,7 +407,6 @@ def curate_from_rss():
                    should_post_tweet=False
                )
            except Exception as e:
-                print(f"WordPress posting error for '{title}': {e}")
                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
                attempts += 1
                continue
@@ -426,29 +416,23 @@ def curate_from_rss():
            timestamp = datetime.now(timezone.utc).isoformat()
            save_json_file(POSTED_TITLES_FILE, title, timestamp)
            posted_titles.add(title)
-            print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")

            if image_url:
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
-                print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")

-            print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
            return post_data, category, random.randint(0, 1800)

            attempts += 1
-            print(f"WP posting failed for '{post_data['title']}'")
            logging.info(f"WP posting failed for '{post_data['title']}'")

-        print("No interesting RSS article found after attempts")
        logging.info("No interesting RSS article found after attempts")
        return None, None, random.randint(600, 1800)
    except Exception as e:
        logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
-        print(f"Unexpected error in curate_from_rss: {e}")
        return None, None, random.randint(600, 1800)

 def run_rss_automator():
@@ -461,7 +445,7 @@ def run_rss_automator():
            logging.info("No postable RSS article found")
        logging.info(f"Completed run with sleep time: {sleep_time} seconds")
        time.sleep(sleep_time)
-        return post_data, category, sleep_time
+        return post_data, category, fixes
    except Exception as e:
        logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
        return None, None, random.randint(600, 1800)
@@ -857,24 +857,22 @@ if os.path.exists(used_images_file):

 # Function to save used_images to file
 def save_used_images():
+    """
+    Save used_images to used_images.json as a JSON array, preserving timestamps.
+    """
    try:
-        # Load existing entries to preserve timestamps
-        entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24)
-        existing_entries = {entry["title"]: entry for entry in entries if isinstance(entry, dict) and "title" in entry}
-        
-        # Create new entries for used_images
+        # Create entries for used_images
        timestamp = datetime.now(timezone.utc).isoformat()
-        updated_entries = []
-        for url in used_images:
-            if url in existing_entries:
-                updated_entries.append(existing_entries[url])
-            else:
-                updated_entries.append({"title": url, "timestamp": timestamp})
-        
-        with open(used_images_file, 'w') as f:
-            for entry in updated_entries:
-                f.write(json.dumps(entry) + '\n')
-        logging.info(f"Saved {len(updated_entries)} used image URLs to {used_images_file}")
+        entries = [
+            {"title": url, "timestamp": entry.get("timestamp", timestamp)}
+            for url, entry in [
+                (url, next((e for e in load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24) if e["title"] == url), {}))
+                for url in used_images
+            ]
+        ]
+        # Use save_json_file for atomic write
+        save_json_file(used_images_file, entries)
+        logging.info(f"Saved {len(entries)} used image URLs to {used_images_file}")
    except Exception as e:
        logging.warning(f"Failed to save used images to {used_images_file}: {e}")

@@ -1136,17 +1134,18 @@ def check_author_rate_limit(author, max_requests=10, window_seconds=3600):
    rate_limit_info = load_json_file(rate_limit_file, default={})
    
    username = author['username']
-    if username not in rate_limit_info:
+    if username not in rate_limit_info or not isinstance(rate_limit_info[username].get('reset'), (int, float)):
        rate_limit_info[username] = {
            'remaining': max_requests,
            'reset': time.time()
        }
+        logger.info(f"Initialized rate limit for {username}: {max_requests} requests available")
    
    info = rate_limit_info[username]
    current_time = time.time()
    
-    # Reset if window expired
-    if current_time >= info['reset']:
+    # Reset if window expired or timestamp is invalid (e.g., 1970)
+    if current_time >= info['reset'] or info['reset'] < 1000000000:  # 1000000000 is ~2001
        info['remaining'] = max_requests
        info['reset'] = current_time + window_seconds
        logger.info(f"Reset rate limit for {username}: {max_requests} requests available")