try

2025-05-03 16:46:09 +10:00
parent 73e0ef4f53
commit b265b5aa30
2 changed files with 78 additions and 23 deletions
@@ -27,7 +27,8 @@ from foodie_utils import (
    upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
    is_interesting, generate_title_from_summary, summarize_with_gpt4o,
    generate_category_from_summary, post_to_wp, prepare_post_data,
-    select_best_author, smart_image_and_filter, get_flickr_image
+    select_best_author, smart_image_and_filter, get_flickr_image,
+    select_best_persona
 )
 from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
 from dotenv import load_dotenv
@@ -248,29 +249,52 @@ class RSSScraper:
            num_paragraphs = determine_paragraph_count(interest_score)
            extra_prompt = (
                f"Generate exactly {num_paragraphs} paragraphs.\n"
-                f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
-                f"Do NOT introduce unrelated concepts.\n"
-                f"Expand on the core idea with relevant context about its appeal or significance.\n"
-                f"Do not include emojis in the summary."
+                f"Focus on the most interesting aspects of the content.\n"
+                f"Use a {select_best_persona(interest_score, content)} tone.\n"
+                f"Make it engaging and shareable."
            )
            
-            final_summary = summarize_with_gpt4o(
-                scoring_content,
-                source_name,
-                link,
-                interest_score=interest_score,
-                extra_prompt=extra_prompt
-            )
-
-            if not final_summary:
-                logger.info(f"Summary failed for '{title}'")
+            summary = summarize_with_gpt4o(content, source_name, link, interest_score, extra_prompt)
+            if not summary:
+                logger.warning(f"Failed to generate summary for '{title}'")
                continue
            
-            final_summary = insert_link_naturally(final_summary, source_name, link)
-            post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
+            summary = insert_link_naturally(summary, source_name, link)
+            if not summary:
+                logger.warning(f"Failed to insert link for '{title}'")
+                continue
            
-            if post_data and author:
-                return post_data, author, random.randint(600, 1800)
+            post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(
+                summary, title, f"RSS: {source_name}"
+            )
+            
+            if not post_data or not author:
+                logger.warning(f"Failed to prepare post data for '{title}'")
+                continue
+            
+            try:
+                post_id, post_url = post_to_wp(
+                    post_data=post_data,
+                    category=category,
+                    link=link,
+                    author=author,
+                    image_url=image_url,
+                    original_source=source_name,
+                    image_source=image_source,
+                    uploader=uploader,
+                    pixabay_url=page_url,
+                    interest_score=interest_score
+                )
+                
+                if post_id and post_url:
+                    logger.info(f"Successfully posted '{title}' to WordPress (ID: {post_id})")
+                    self.posted_titles.add(title)
+                    save_json_file(FILE_PATHS["posted_rss_titles"], title, datetime.now(timezone.utc).isoformat())
+                    return post_data, author["username"], random.randint(600, 1800)
+                
+            except Exception as e:
+                logger.error(f"Error in RSS automator: {e}")
+                continue
        
        return None, None, random.randint(600, 1800)

@@ -46,6 +46,11 @@ logger = logging.getLogger(__name__)
 load_dotenv()
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

+# Initialize global variables
+used_images = set()
+pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600)  # 100 requests per hour
+flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600)  # 3600 requests per hour
+
 def validate_json_entry(entry: Dict[str, Any]) -> bool:
    """Validate the structure of a JSON entry."""
    required_fields = {"title", "timestamp"}
@@ -862,6 +867,32 @@ def prune_recent_posts():
    except Exception as e:
        logger.error(f"Failed to prune recent_posts.json: {e}")

+def load_used_images():
+    """Load the set of used image URLs from file."""
+    global used_images
+    try:
+        if os.path.exists(USED_IMAGES_FILE):
+            with open(USED_IMAGES_FILE, 'r') as f:
+                used_images = set(json.loads(line.strip())['url'] for line in f if line.strip())
+            logger.info(f"Loaded {len(used_images)} used images from {USED_IMAGES_FILE}")
+    except Exception as e:
+        logger.error(f"Failed to load used images: {e}")
+        used_images = set()
+
+def save_used_images():
+    """Save the set of used image URLs to file."""
+    try:
+        with open(USED_IMAGES_FILE, 'w') as f:
+            for url in used_images:
+                json.dump({'url': url, 'timestamp': datetime.now(timezone.utc).isoformat()}, f)
+                f.write('\n')
+        logger.info(f"Saved {len(used_images)} used images to {USED_IMAGES_FILE}")
+    except Exception as e:
+        logger.error(f"Failed to save used images: {e}")
+
+# Load used images on startup
+load_used_images()
+
 def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
    """Get an image with improved rate limiting and error handling."""
    headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}