try
This commit is contained in:
+47
-23
@@ -27,7 +27,8 @@ from foodie_utils import (
|
|||||||
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
||||||
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
||||||
generate_category_from_summary, post_to_wp, prepare_post_data,
|
generate_category_from_summary, post_to_wp, prepare_post_data,
|
||||||
select_best_author, smart_image_and_filter, get_flickr_image
|
select_best_author, smart_image_and_filter, get_flickr_image,
|
||||||
|
select_best_persona
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -248,30 +249,53 @@ class RSSScraper:
|
|||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
|
f"Focus on the most interesting aspects of the content.\n"
|
||||||
f"Do NOT introduce unrelated concepts.\n"
|
f"Use a {select_best_persona(interest_score, content)} tone.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance.\n"
|
f"Make it engaging and shareable."
|
||||||
f"Do not include emojis in the summary."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
final_summary = summarize_with_gpt4o(
|
summary = summarize_with_gpt4o(content, source_name, link, interest_score, extra_prompt)
|
||||||
scoring_content,
|
if not summary:
|
||||||
source_name,
|
logger.warning(f"Failed to generate summary for '{title}'")
|
||||||
link,
|
|
||||||
interest_score=interest_score,
|
|
||||||
extra_prompt=extra_prompt
|
|
||||||
)
|
|
||||||
|
|
||||||
if not final_summary:
|
|
||||||
logger.info(f"Summary failed for '{title}'")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
summary = insert_link_naturally(summary, source_name, link)
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
if not summary:
|
||||||
|
logger.warning(f"Failed to insert link for '{title}'")
|
||||||
if post_data and author:
|
continue
|
||||||
return post_data, author, random.randint(600, 1800)
|
|
||||||
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(
|
||||||
|
summary, title, f"RSS: {source_name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not post_data or not author:
|
||||||
|
logger.warning(f"Failed to prepare post data for '{title}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
post_id, post_url = post_to_wp(
|
||||||
|
post_data=post_data,
|
||||||
|
category=category,
|
||||||
|
link=link,
|
||||||
|
author=author,
|
||||||
|
image_url=image_url,
|
||||||
|
original_source=source_name,
|
||||||
|
image_source=image_source,
|
||||||
|
uploader=uploader,
|
||||||
|
pixabay_url=page_url,
|
||||||
|
interest_score=interest_score
|
||||||
|
)
|
||||||
|
|
||||||
|
if post_id and post_url:
|
||||||
|
logger.info(f"Successfully posted '{title}' to WordPress (ID: {post_id})")
|
||||||
|
self.posted_titles.add(title)
|
||||||
|
save_json_file(FILE_PATHS["posted_rss_titles"], title, datetime.now(timezone.utc).isoformat())
|
||||||
|
return post_data, author["username"], random.randint(600, 1800)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in RSS automator: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
return None, None, random.randint(600, 1800)
|
return None, None, random.randint(600, 1800)
|
||||||
|
|
||||||
def run_rss_automator():
|
def run_rss_automator():
|
||||||
|
|||||||
@@ -46,6 +46,11 @@ logger = logging.getLogger(__name__)
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
|
# Initialize global variables
|
||||||
|
used_images = set()
|
||||||
|
pixabay_rate_limiter = RateLimiter(max_requests=100, time_window=3600) # 100 requests per hour
|
||||||
|
flickr_rate_limiter = RateLimiter(max_requests=3600, time_window=3600) # 3600 requests per hour
|
||||||
|
|
||||||
def validate_json_entry(entry: Dict[str, Any]) -> bool:
|
def validate_json_entry(entry: Dict[str, Any]) -> bool:
|
||||||
"""Validate the structure of a JSON entry."""
|
"""Validate the structure of a JSON entry."""
|
||||||
required_fields = {"title", "timestamp"}
|
required_fields = {"title", "timestamp"}
|
||||||
@@ -862,6 +867,32 @@ def prune_recent_posts():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to prune recent_posts.json: {e}")
|
logger.error(f"Failed to prune recent_posts.json: {e}")
|
||||||
|
|
||||||
|
def load_used_images():
|
||||||
|
"""Load the set of used image URLs from file."""
|
||||||
|
global used_images
|
||||||
|
try:
|
||||||
|
if os.path.exists(USED_IMAGES_FILE):
|
||||||
|
with open(USED_IMAGES_FILE, 'r') as f:
|
||||||
|
used_images = set(json.loads(line.strip())['url'] for line in f if line.strip())
|
||||||
|
logger.info(f"Loaded {len(used_images)} used images from {USED_IMAGES_FILE}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load used images: {e}")
|
||||||
|
used_images = set()
|
||||||
|
|
||||||
|
def save_used_images():
|
||||||
|
"""Save the set of used image URLs to file."""
|
||||||
|
try:
|
||||||
|
with open(USED_IMAGES_FILE, 'w') as f:
|
||||||
|
for url in used_images:
|
||||||
|
json.dump({'url': url, 'timestamp': datetime.now(timezone.utc).isoformat()}, f)
|
||||||
|
f.write('\n')
|
||||||
|
logger.info(f"Saved {len(used_images)} used images to {USED_IMAGES_FILE}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save used images: {e}")
|
||||||
|
|
||||||
|
# Load used images on startup
|
||||||
|
load_used_images()
|
||||||
|
|
||||||
def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
|
def get_image(search_query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
|
||||||
"""Get an image with improved rate limiting and error handling."""
|
"""Get an image with improved rate limiting and error handling."""
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|||||||
Reference in New Issue
Block a user