From b265b5aa303263fa14d873b1b60e995c86d16d59 Mon Sep 17 00:00:00 2001 From: Shane Date: Sat, 3 May 2025 16:46:09 +1000 Subject: [PATCH] try --- foodie_automator_rss.py | 70 +++++++++++++++++++++++++++-------------- foodie_utils.py | 31 ++++++++++++++++++ 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 7ba1e24..0682f6a 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -27,7 +27,8 @@ from foodie_utils import ( upload_image_to_wp, determine_paragraph_count, insert_link_naturally, is_interesting, generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, prepare_post_data, - select_best_author, smart_image_and_filter, get_flickr_image + select_best_author, smart_image_and_filter, get_flickr_image, + select_best_persona ) from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from dotenv import load_dotenv @@ -248,30 +249,53 @@ class RSSScraper: num_paragraphs = determine_paragraph_count(interest_score) extra_prompt = ( f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" - f"Do NOT introduce unrelated concepts.\n" - f"Expand on the core idea with relevant context about its appeal or significance.\n" - f"Do not include emojis in the summary." + f"Focus on the most interesting aspects of the content.\n" + f"Use a {select_best_persona(interest_score, content)} tone.\n" + f"Make it engaging and shareable." ) - - final_summary = summarize_with_gpt4o( - scoring_content, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - - if not final_summary: - logger.info(f"Summary failed for '{title}'") + + summary = summarize_with_gpt4o(content, source_name, link, interest_score, extra_prompt) + if not summary: + logger.warning(f"Failed to generate summary for '{title}'") continue - - final_summary = insert_link_naturally(final_summary, source_name, link) - post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) - - if post_data and author: - return post_data, author, random.randint(600, 1800) - + + summary = insert_link_naturally(summary, source_name, link) + if not summary: + logger.warning(f"Failed to insert link for '{title}'") + continue + + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data( + summary, title, f"RSS: {source_name}" + ) + + if not post_data or not author: + logger.warning(f"Failed to prepare post data for '{title}'") + continue + + try: + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=source_name, + image_source=image_source, + uploader=uploader, + pixabay_url=page_url, + interest_score=interest_score + ) + + if post_id and post_url: + logger.info(f"Successfully posted '{title}' to WordPress (ID: {post_id})") + self.posted_titles.add(title) + save_json_file(FILE_PATHS["posted_rss_titles"], title, datetime.now(timezone.utc).isoformat()) + return post_data, author["username"], random.randint(600, 1800) + + except Exception as e: + logger.error(f"Error in RSS automator: {e}") + continue + return None, None, random.randint(600, 1800) def run_rss_automator(): diff --git a/foodie_utils.py b/foodie_utils.py index 5c0b15a..d6ef4aa 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -46,6 +46,11 @@ logger = logging.getLogger(__name__) load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) +# Initialize global variables +used_images = set() +pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600) # 100 requests per hour +flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600) # 3600 requests per hour + def validate_json_entry(entry: Dict[str, Any]) -> bool: """Validate the structure of a JSON entry.""" required_fields = {"title", "timestamp"} @@ -862,6 +867,32 @@ def prune_recent_posts(): except Exception as e: logger.error(f"Failed to prune recent_posts.json: {e}") +def load_used_images(): + """Load the set of used image URLs from file.""" + global used_images + try: + if os.path.exists(USED_IMAGES_FILE): + with open(USED_IMAGES_FILE, 'r') as f: + used_images = set(json.loads(line.strip())['url'] for line in f if line.strip()) + logger.info(f"Loaded {len(used_images)} used images from {USED_IMAGES_FILE}") + except Exception as e: + logger.error(f"Failed to load used images: {e}") + used_images = set() + +def save_used_images(): + """Save the set of used image URLs to file.""" + try: + with open(USED_IMAGES_FILE, 'w') as f: + for url in used_images: + json.dump({'url': url, 'timestamp': datetime.now(timezone.utc).isoformat()}, f) + f.write('\n') + logger.info(f"Saved {len(used_images)} used images to {USED_IMAGES_FILE}") + except Exception as e: + logger.error(f"Failed to save used images: {e}") + +# Load used images on startup +load_used_images() + def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]: """Get an image with improved rate limiting and error handling.""" headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}