This commit is contained in:
2025-05-03 16:46:09 +10:00
parent 73e0ef4f53
commit b265b5aa30
2 changed files with 78 additions and 23 deletions
+43 -19
View File
@@ -27,7 +27,8 @@ from foodie_utils import (
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter, get_flickr_image
select_best_author, smart_image_and_filter, get_flickr_image,
select_best_persona
)
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv
@@ -248,29 +249,52 @@ class RSSScraper:
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
f"Do NOT introduce unrelated concepts.\n"
f"Expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
f"Focus on the most interesting aspects of the content.\n"
f"Use a {select_best_persona(interest_score, content)} tone.\n"
f"Make it engaging and shareable."
)
final_summary = summarize_with_gpt4o(
scoring_content,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logger.info(f"Summary failed for '{title}'")
summary = summarize_with_gpt4o(content, source_name, link, interest_score, extra_prompt)
if not summary:
logger.warning(f"Failed to generate summary for '{title}'")
continue
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
summary = insert_link_naturally(summary, source_name, link)
if not summary:
logger.warning(f"Failed to insert link for '{title}'")
continue
if post_data and author:
return post_data, author, random.randint(600, 1800)
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(
summary, title, f"RSS: {source_name}"
)
if not post_data or not author:
logger.warning(f"Failed to prepare post data for '{title}'")
continue
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=source_name,
image_source=image_source,
uploader=uploader,
pixabay_url=page_url,
interest_score=interest_score
)
if post_id and post_url:
logger.info(f"Successfully posted '{title}' to WordPress (ID: {post_id})")
self.posted_titles.add(title)
save_json_file(FILE_PATHS["posted_rss_titles"], title, datetime.now(timezone.utc).isoformat())
return post_data, author["username"], random.randint(600, 1800)
except Exception as e:
logger.error(f"Error in RSS automator: {e}")
continue
return None, None, random.randint(600, 1800)
+31
View File
@@ -46,6 +46,11 @@ logger = logging.getLogger(__name__)
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Initialize global variables
used_images = set()
pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600) # 100 requests per hour
flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600) # 3600 requests per hour
def validate_json_entry(entry: Dict[str, Any]) -> bool:
"""Validate the structure of a JSON entry."""
required_fields = {"title", "timestamp"}
@@ -862,6 +867,32 @@ def prune_recent_posts():
except Exception as e:
logger.error(f"Failed to prune recent_posts.json: {e}")
def load_used_images():
"""Load the set of used image URLs from file."""
global used_images
try:
if os.path.exists(USED_IMAGES_FILE):
with open(USED_IMAGES_FILE, 'r') as f:
used_images = set(json.loads(line.strip())['url'] for line in f if line.strip())
logger.info(f"Loaded {len(used_images)} used images from {USED_IMAGES_FILE}")
except Exception as e:
logger.error(f"Failed to load used images: {e}")
used_images = set()
def save_used_images():
"""Save the set of used image URLs to file."""
try:
with open(USED_IMAGES_FILE, 'w') as f:
for url in used_images:
json.dump({'url': url, 'timestamp': datetime.now(timezone.utc).isoformat()}, f)
f.write('\n')
logger.info(f"Saved {len(used_images)} used images to {USED_IMAGES_FILE}")
except Exception as e:
logger.error(f"Failed to save used images: {e}")
# Load used images on startup
load_used_images()
def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
"""Get an image with improved rate limiting and error handling."""
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}