This commit is contained in:
2025-05-03 16:46:09 +10:00
parent 73e0ef4f53
commit b265b5aa30
2 changed files with 78 additions and 23 deletions
+43 -19
View File
@@ -27,7 +27,8 @@ from foodie_utils import (
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o, is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data, generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image select_best_author, smart_image_and_filter, get_flickr_image,
select_best_persona
) )
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -248,29 +249,52 @@ class RSSScraper:
num_paragraphs = determine_paragraph_count(interest_score) num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = ( extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n" f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" f"Focus on the most interesting aspects of the content.\n"
f"Do NOT introduce unrelated concepts.\n" f"Use a {select_best_persona(interest_score, content)} tone.\n"
f"Expand on the core idea with relevant context about its appeal or significance.\n" f"Make it engaging and shareable."
f"Do not include emojis in the summary."
) )
final_summary = summarize_with_gpt4o( summary = summarize_with_gpt4o(content, source_name, link, interest_score, extra_prompt)
scoring_content, if not summary:
source_name, logger.warning(f"Failed to generate summary for '{title}'")
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logger.info(f"Summary failed for '{title}'")
continue continue
final_summary = insert_link_naturally(final_summary, source_name, link) summary = insert_link_naturally(summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) if not summary:
logger.warning(f"Failed to insert link for '{title}'")
continue
if post_data and author: post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(
return post_data, author, random.randint(600, 1800) summary, title, f"RSS: {source_name}"
)
if not post_data or not author:
logger.warning(f"Failed to prepare post data for '{title}'")
continue
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=source_name,
image_source=image_source,
uploader=uploader,
pixabay_url=page_url,
interest_score=interest_score
)
if post_id and post_url:
logger.info(f"Successfully posted '{title}' to WordPress (ID: {post_id})")
self.posted_titles.add(title)
save_json_file(FILE_PATHS["posted_rss_titles"], title, datetime.now(timezone.utc).isoformat())
return post_data, author["username"], random.randint(600, 1800)
except Exception as e:
logger.error(f"Error in RSS automator: {e}")
continue
return None, None, random.randint(600, 1800) return None, None, random.randint(600, 1800)
+31
View File
@@ -46,6 +46,11 @@ logger = logging.getLogger(__name__)
load_dotenv() load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Initialize global variables
used_images = set()
pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600) # 100 requests per hour
flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600) # 3600 requests per hour
def validate_json_entry(entry: Dict[str, Any]) -> bool: def validate_json_entry(entry: Dict[str, Any]) -> bool:
"""Validate the structure of a JSON entry.""" """Validate the structure of a JSON entry."""
required_fields = {"title", "timestamp"} required_fields = {"title", "timestamp"}
@@ -862,6 +867,32 @@ def prune_recent_posts():
except Exception as e: except Exception as e:
logger.error(f"Failed to prune recent_posts.json: {e}") logger.error(f"Failed to prune recent_posts.json: {e}")
def load_used_images():
"""Load the set of used image URLs from file."""
global used_images
try:
if os.path.exists(USED_IMAGES_FILE):
with open(USED_IMAGES_FILE, 'r') as f:
used_images = set(json.loads(line.strip())['url'] for line in f if line.strip())
logger.info(f"Loaded {len(used_images)} used images from {USED_IMAGES_FILE}")
except Exception as e:
logger.error(f"Failed to load used images: {e}")
used_images = set()
def save_used_images():
"""Save the set of used image URLs to file."""
try:
with open(USED_IMAGES_FILE, 'w') as f:
for url in used_images:
json.dump({'url': url, 'timestamp': datetime.now(timezone.utc).isoformat()}, f)
f.write('\n')
logger.info(f"Saved {len(used_images)} used images to {USED_IMAGES_FILE}")
except Exception as e:
logger.error(f"Failed to save used images: {e}")
# Load used images on startup
load_used_images()
def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]: def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
"""Get an image with improved rate limiting and error handling.""" """Get an image with improved rate limiting and error handling."""
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'} headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}