try

2025-05-03 16:46:09 +10:00
parent 73e0ef4f53
commit b265b5aa30
2 changed files with 78 additions and 23 deletions
@@ -27,7 +27,8 @@ from foodie_utils import (
    upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
    is_interesting, generate_title_from_summary, summarize_with_gpt4o,
    generate_category_from_summary, post_to_wp, prepare_post_data,
-    select_best_author, smart_image_and_filter, get_flickr_image
+    select_best_author, smart_image_and_filter, get_flickr_image,
    select_best_persona
 )
 from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
 from dotenv import load_dotenv
@@ -248,29 +249,52 @@ class RSSScraper:
            num_paragraphs = determine_paragraph_count(interest_score)
            extra_prompt = (
                f"Generate exactly {num_paragraphs} paragraphs.\n"
-                f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
+                f"Focus on the most interesting aspects of the content.\n"
-                f"Do NOT introduce unrelated concepts.\n"
+                f"Use a {select_best_persona(interest_score, content)} tone.\n"
-                f"Expand on the core idea with relevant context about its appeal or significance.\n"
+                f"Make it engaging and shareable."
                f"Do not include emojis in the summary."
            )
-            final_summary = summarize_with_gpt4o(
+            summary = summarize_with_gpt4o(content, source_name, link, interest_score, extra_prompt)
-                scoring_content,
+            if not summary:
-                source_name,
+                logger.warning(f"Failed to generate summary for '{title}'")
                link,
                interest_score=interest_score,
                extra_prompt=extra_prompt
            )
            if not final_summary:
                logger.info(f"Summary failed for '{title}'")
                continue
-            final_summary = insert_link_naturally(final_summary, source_name, link)
+            summary = insert_link_naturally(summary, source_name, link)
-            post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
+            if not summary:
                logger.warning(f"Failed to insert link for '{title}'")
                continue
-            if post_data and author:
+            post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(
-                return post_data, author, random.randint(600, 1800)
+                summary, title, f"RSS: {source_name}"
            )
            if not post_data or not author:
                logger.warning(f"Failed to prepare post data for '{title}'")
                continue
            try:
                post_id, post_url = post_to_wp(
                    post_data=post_data,
                    category=category,
                    link=link,
                    author=author,
                    image_url=image_url,
                    original_source=source_name,
                    image_source=image_source,
                    uploader=uploader,
                    pixabay_url=page_url,
                    interest_score=interest_score
                )
                if post_id and post_url:
                    logger.info(f"Successfully posted '{title}' to WordPress (ID: {post_id})")
                    self.posted_titles.add(title)
                    save_json_file(FILE_PATHS["posted_rss_titles"], title, datetime.now(timezone.utc).isoformat())
                    return post_data, author["username"], random.randint(600, 1800)
            except Exception as e:
                logger.error(f"Error in RSS automator: {e}")
                continue
        return None, None, random.randint(600, 1800)
@@ -46,6 +46,11 @@ logger = logging.getLogger(__name__)
 load_dotenv()
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 # Initialize global variables
 used_images = set()
 pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600)  # 100 requests per hour
 flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600)  # 3600 requests per hour
 def validate_json_entry(entry: Dict[str, Any]) -> bool:
    """Validate the structure of a JSON entry."""
    required_fields = {"title", "timestamp"}
@@ -862,6 +867,32 @@ def prune_recent_posts():
    except Exception as e:
        logger.error(f"Failed to prune recent_posts.json: {e}")
 def load_used_images():
    """Load the set of used image URLs from file."""
    global used_images
    try:
        if os.path.exists(USED_IMAGES_FILE):
            with open(USED_IMAGES_FILE, 'r') as f:
                used_images = set(json.loads(line.strip())['url'] for line in f if line.strip())
            logger.info(f"Loaded {len(used_images)} used images from {USED_IMAGES_FILE}")
    except Exception as e:
        logger.error(f"Failed to load used images: {e}")
        used_images = set()
 def save_used_images():
    """Save the set of used image URLs to file."""
    try:
        with open(USED_IMAGES_FILE, 'w') as f:
            for url in used_images:
                json.dump({'url': url, 'timestamp': datetime.now(timezone.utc).isoformat()}, f)
                f.write('\n')
        logger.info(f"Saved {len(used_images)} used images to {USED_IMAGES_FILE}")
    except Exception as e:
        logger.error(f"Failed to save used images: {e}")
 # Load used images on startup
 load_used_images()
 def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
    """Get an image with improved rate limiting and error handling."""
    headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}