update realtime rate limit for X

2025-05-08 13:35:41 +10:00
parent 3405572ab0
commit 167506ef30
7 changed files with 222 additions and 145 deletions
@@ -25,9 +25,11 @@ from foodie_config import (
 from foodie_utils import (
    load_json_file, save_json_file, get_image, generate_image_query,
    upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
-    summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
-    prepare_post_data, select_best_author, smart_image_and_filter,
-    get_flickr_image
+    is_interesting, generate_title_from_summary, summarize_with_gpt4o,
+    generate_category_from_summary, post_to_wp, prepare_post_data,
+    select_best_author, smart_image_and_filter, get_flickr_image,
+    get_next_author_round_robin, fetch_duckduckgo_news_context,
+    check_author_rate_limit
 )
 from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
 import fcntl
@@ -268,70 +270,72 @@ def fetch_reddit_posts():

 def curate_from_reddit():
    try:
-        articles = fetch_reddit_posts()
-        if not articles:
+        global posted_titles_data, posted_titles, used_images
+        posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
+        posted_titles = set(entry["title"] for entry in posted_titles_data)
+        used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
+        logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
+
+        posts = fetch_reddit_posts()
+        if not posts:
+            print("No Reddit posts available")
            logging.info("No Reddit posts available")
            return None, None, False

-        articles.sort(key=lambda x: x["upvotes"], reverse=True)
-        
-        reddit = praw.Reddit(
-            client_id=REDDIT_CLIENT_ID,
-            client_secret=REDDIT_CLIENT_SECRET,
-            user_agent=REDDIT_USER_AGENT
-        )
-        
        attempts = 0
        max_attempts = 10
-        while attempts < max_attempts and articles:
-            article = articles.pop(0)
-            title = article["title"]
-            raw_title = article["raw_title"]
-            link = article["link"]
-            summary = article["summary"]
-            source_name = "Reddit"
-            original_source = '<a href="https://www.reddit.com/">Reddit</a>'
-            
-            if raw_title in posted_titles:
-                logging.info(f"Skipping already posted post: {raw_title}")
+        while attempts < max_attempts and posts:
+            post = posts.pop(0)
+            title = post["title"]
+            link = post.get("link", "")
+            summary = post.get("summary", "")
+            source_name = post.get("source", "Reddit")
+            original_source = f'<a href="{link}">{source_name}</a>'
+
+            if title in posted_titles:
+                print(f"Skipping already posted Reddit post: {title}")
+                logging.info(f"Skipping already posted Reddit post: {title}")
                attempts += 1
                continue
-            
+
+            print(f"Trying Reddit Post: {title} from {source_name}")
            logging.info(f"Trying Reddit Post: {title} from {source_name}")
-            
-            image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
-            if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
+
+            try:
+                image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
+            except Exception as e:
+                print(f"Smart image/filter error for '{title}': {e}")
+                logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
+                attempts += 1
+                continue
+
+            if skip:
+                print(f"Skipping filtered Reddit post: {title}")
                logging.info(f"Skipping filtered Reddit post: {title}")
                attempts += 1
                continue
-            
-            top_comments = get_top_comments(link, reddit, limit=3)
+
            ddg_context = fetch_duckduckgo_news_context(title)
-            content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
-            interest_score = is_interesting_reddit(
-                title,
-                summary,
-                article["upvotes"],
-                article["comment_count"],
-                top_comments
-            )
-            logging.info(f"Interest Score: {interest_score} for '{title}'")
+            scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
+            interest_score = is_interesting(scoring_content)
+            print(f"Interest Score for '{title[:50]}...': {interest_score}")
+            logging.info(f"Interest score for '{title}': {interest_score}")
            if interest_score < 6:
+                print(f"Reddit Interest Too Low: {interest_score}")
                logging.info(f"Reddit Interest Too Low: {interest_score}")
                attempts += 1
                continue
-            
+
            num_paragraphs = determine_paragraph_count(interest_score)
            extra_prompt = (
                f"Generate exactly {num_paragraphs} paragraphs.\n"
                f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
-                f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
                f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
-                f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
-                f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
+                f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
+                f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
                f"Do not include emojis in the summary."
            )
-            
+            content_to_summarize = scoring_content
            final_summary = summarize_with_gpt4o(
                content_to_summarize,
                source_name,
@@ -340,12 +344,13 @@ def curate_from_reddit():
                extra_prompt=extra_prompt
            )
            if not final_summary:
+                print(f"Summary failed for '{title}'")
                logging.info(f"Summary failed for '{title}'")
                attempts += 1
                continue
-            
+
            final_summary = insert_link_naturally(final_summary, source_name, link)
-            
+
            # Use round-robin author selection
            author = get_next_author_round_robin()
            author_username = author["username"]
@@ -361,15 +366,17 @@ def curate_from_reddit():
            category = post_data["categories"][0]
            image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
            if not image_url:
+                print(f"Flickr image fetch failed for '{image_query}', trying fallback")
+                logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
                image_url, image_source, uploader, page_url = get_image(image_query)
                if not image_url:
+                    print(f"All image uploads failed for '{title}' - posting without image")
                    logging.warning(f"All image uploads failed for '{title}' - posting without image")
                    image_source = None
                    uploader = None
                    page_url = None

            hook = get_dynamic_hook(post_data["title"]).strip()
-            
            share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
            share_links_template = (
                f'<p>{share_prompt} '
@@ -377,7 +384,7 @@ def curate_from_reddit():
                f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
            )
            post_data["content"] = f"{final_summary}\n\n{share_links_template}"
-            
+
            global is_posting
            is_posting = True
            try:
@@ -394,7 +401,13 @@ def curate_from_reddit():
                    interest_score=interest_score,
                    should_post_tweet=True
                )
+                if not post_id:
+                    print(f"Failed to post to WordPress for '{title}'")
+                    logging.warning(f"Failed to post to WordPress for '{title}'")
+                    attempts += 1
+                    continue
            except Exception as e:
+                print(f"WordPress posting error for '{title}': {e}")
                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
                attempts += 1
                continue
@@ -424,29 +437,37 @@ def curate_from_reddit():
                        should_post_tweet=False
                    )
                except Exception as e:
+                    print(f"Failed to update WordPress post '{title}' with share links: {e}")
                    logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
                finally:
                    is_posting = False
-                
+
                timestamp = datetime.now(timezone.utc).isoformat()
-                save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
-                posted_titles.add(raw_title)
-                logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE}")
-                
+                save_json_file(POSTED_TITLES_FILE, title, timestamp)
+                posted_titles.add(title)
+                print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
+                logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
+
                if image_url:
                    save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                    used_images.add(image_url)
+                    print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
                    logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
-                
+
+                print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
                logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
                return post_data, category, True
+
            attempts += 1
+            print(f"WP posting failed for '{post_data['title']}'")
            logging.info(f"WP posting failed for '{post_data['title']}'")
-        
+
+        print("No interesting Reddit post found after attempts")
        logging.info("No interesting Reddit post found after attempts")
        return None, None, False
    except Exception as e:
        logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
+        print(f"Unexpected error in curate_from_reddit: {e}")
        return None, None, False

 def run_reddit_automator():