diff --git a/foodie_automator_google.py b/foodie_automator_google.py index 9ddb4c1..30fccac 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -285,210 +285,98 @@ def fetch_duckduckgo_news_context(trend_title, hours=24): logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts") return trend_title -def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images): +def curate_from_google(item, original_source, source_name, link, page_url): + logger = logging.getLogger(__name__) try: - logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") + content = item.get('snippet', '') + if not content: + logger.info(f"No content for Google item: {item.get('title', 'unknown')}") + return None, None - regions = ['US', 'GB', 'AU'] - all_trends = [] + interest_score = is_interesting(content) + if interest_score < 4: + logger.info(f"Google item '{item.get('title', 'unknown')}' not interesting enough: score {interest_score}") + return None, None - for geo in regions: - logging.info(f"Scraping Google Trends for geo={geo}") - trends = scrape_google_trends(geo=geo) - if trends: - logging.info(f"Collected {len(trends)} trends for geo={geo}") - all_trends.extend(trends) - else: - logging.warning(f"No trends collected for geo={geo}") + summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score) + if not summary: + logger.warning(f"Failed to summarize Google item: {item.get('title', 'unknown')}") + return None, None - unique_trends = [] - seen_titles = set() - for trend in all_trends: - if trend["title"] not in seen_titles: - unique_trends.append(trend) - seen_titles.add(trend["title"]) - - if not unique_trends: - logging.info("No Google Trends data available across regions") - sleep_time = random.randint(1200, 1800) - return None, None, sleep_time + # Remove the original title from the summary if present + if item.get('title', '') in summary: + summary = summary.replace(item.get('title', ''), "").strip() + while "\n\n\n" in summary: + summary = summary.replace("\n\n\n", "\n\n") - unique_trends.sort(key=lambda x: x["search_volume"], reverse=True) - logging.info(f"Total unique trends collected: {len(unique_trends)}") + final_summary = insert_link_naturally(summary, source_name, link) + if not final_summary: + logger.warning(f"Failed to insert link for Google item: {item.get('title', 'unknown')}") + return None, None - attempts = 0 - max_attempts = 10 - while attempts < max_attempts and unique_trends: - trend = unique_trends.pop(0) - title = trend["title"] - link = trend.get("link", "") - summary = trend.get("summary", "") - source_name = trend.get("source", "Google Trends") - original_source = f'{source_name}' - original_url = link + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, item.get('title', 'unknown')) + if not post_data: + logger.info(f"Post preparation failed for Google item: {item.get('title', 'unknown')}") + return None, None - if title in posted_titles: - logging.info(f"Skipping already posted trend: {title}") - attempts += 1 - continue + share_text = f"Check out this tasty find: {post_data['title']}" + share_text_encoded = quote(share_text) + share_links_template = ( + "Share this post: " + 'X | ' + 'Facebook' + ) - author = get_next_author_round_robin() - if not author: - logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors") - attempts += 1 - continue + # First call: Post without share links + post_data["content"] = final_summary + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + should_post_tweet=True, + summary=final_summary + ) - author_username = author["username"] - logging.info(f"Selected author via round-robin: {author_username}") + if not post_id: + logger.warning(f"Failed to post Google item to WP: {post_data['title']}") + return None, None - logging.info(f"Trying Google Trend: {title} from {source_name}") + # Second call: Update with share links + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=None, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) - # Fetch DuckDuckGo context early to enhance smart_image_and_filter - ddg_context = fetch_duckduckgo_news_context(title) - enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context + if post_id: + logger.info(f"Successfully curated and posted Google item: {post_data['title']} (URL: {post_url})") + return post_id, post_url + else: + logger.warning(f"Failed to update Google post with share links: {post_data['title']}") + return None, None - try: - image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary) - except Exception as e: - logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") - attempts += 1 - continue - - if skip: - logging.info(f"Skipping filtered trend: {title}") - attempts += 1 - continue - - scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" - interest_score = is_interesting(scoring_content) - logging.info(f"Interest score for '{title}': {interest_score}") - if interest_score < 6: - logging.info(f"Trend Interest Too Low: {interest_score}") - attempts += 1 - continue - - num_paragraphs = determine_paragraph_count(interest_score) - extra_prompt = ( - f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" - f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" - f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" - f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" - f"Do not include emojis in the summary." - ) - content_to_summarize = scoring_content - final_summary = summarize_with_gpt4o( - content_to_summarize, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - if not final_summary: - logging.info(f"Summary failed for '{title}'") - attempts += 1 - continue - - final_summary = insert_link_naturally(final_summary, source_name, link) - - post_data = { - "title": generate_title_from_summary(final_summary), - "content": final_summary, - "status": "publish", - "author": author_username, - "categories": [generate_category_from_summary(final_summary)] - } - category = post_data["categories"][0] - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term) - if not image_url: - logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") - image_url, image_source, uploader, page_url = get_image(image_query, specific_term) - if not image_url: - logging.warning(f"All image uploads failed for '{title}' - posting without image") - image_source = None - uploader = None - page_url = None - - hook = get_dynamic_hook(post_data["title"]).strip() - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - share_links_template = ( - f'

{share_prompt} ' - f' ' - f'

' - ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" - - global is_posting - is_posting = True - try: - post_id, post_url = post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - should_post_tweet=True, - summary=final_summary - ) - if not post_id: - logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}") - post_url = original_url - else: - logging.info(f"Posted to WordPress for {author_username}: {post_url}") - - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" - post_data["post_id"] = post_id - if post_id: - post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=None, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - post_id=post_id, - should_post_tweet=False - ) - except Exception as e: - logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) - post_url = original_url - finally: - is_posting = False - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****") - sleep_time = random.randint(1200, 1800) - return post_data, category, sleep_time - - logging.info("No interesting Google Trend found after attempts") - sleep_time = random.randint(1200, 1800) - return None, None, sleep_time except Exception as e: - logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True) - sleep_time = random.randint(1200, 1800) - return None, None, sleep_time + logger.error(f"Error curating Google item '{item.get('title', 'unknown')}': {e}") + return None, None def run_google_trends_automator(): lock_fd = None diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index 9d0aed4..c2a5747 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -339,195 +339,98 @@ def fetch_reddit_posts(): logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True) return [] -def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images): +def curate_from_reddit(post, original_source, source_name, link, page_url): + logger = logging.getLogger(__name__) try: - logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") + content = post.selftext if post.selftext else post.url + if not content: + logger.info(f"No content for Reddit post: {post.title}") + return None, None - posts = fetch_reddit_posts() - if not posts: - logging.info("No Reddit posts available") - sleep_time = random.randint(1200, 1800) - return None, None, sleep_time + interest_score = is_interesting(content) + if interest_score < 4: + logger.info(f"Reddit post '{post.title}' not interesting enough: score {interest_score}") + return None, None - attempts = 0 - max_attempts = 10 - while attempts < max_attempts and posts: - post = posts.pop(0) - title = post["title"] - link = post.get("link", "") - summary = post.get("summary", "") - source_name = "Reddit" - original_source = f'{source_name}' - original_url = link - upvotes = post.get("upvotes", 0) - comment_count = post.get("comment_count", 0) - top_comments = post.get("top_comments", []) + summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score) + if not summary: + logger.warning(f"Failed to summarize Reddit post: {post.title}") + return None, None - if title in posted_titles: - logging.info(f"Skipping already posted Reddit post: {title}") - attempts += 1 - continue + # Remove the original title from the summary if present + if post.title in summary: + summary = summary.replace(post.title, "").strip() + while "\n\n\n" in summary: + summary = summary.replace("\n\n\n", "\n\n") - author = get_next_author_round_robin() - if not author: - logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors") - attempts += 1 - continue + final_summary = insert_link_naturally(summary, source_name, link) + if not final_summary: + logger.warning(f"Failed to insert link for Reddit post: {post.title}") + return None, None - author_username = author["username"] - logging.info(f"Selected author via round-robin: {author_username}") + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, post.title) + if not post_data: + logger.info(f"Post preparation failed for Reddit post: {post.title}") + return None, None - logging.info(f"Trying Reddit Post: {title} from {source_name}") + share_text = f"Check out this tasty find: {post_data['title']}" + share_text_encoded = quote(share_text) + share_links_template = ( + "Share this post: " + 'X | ' + 'Facebook' + ) - # Combine summary and top comments for smart_image_and_filter - enhanced_summary = summary - if top_comments: - enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments) + # First call: Post without share links + post_data["content"] = final_summary + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + should_post_tweet=True, + summary=final_summary + ) - try: - image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary) - except Exception as e: - logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") - attempts += 1 - continue + if not post_id: + logger.warning(f"Failed to post Reddit post to WP: {post_data['title']}") + return None, None - if skip: - logging.info(f"Skipping filtered Reddit post: {title}") - attempts += 1 - continue + # Second call: Update with share links + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=None, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) - ddg_context = fetch_duckduckgo_news_context(title) - scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}" - logging.debug(f"Scoring content for '{title}': {scoring_content}") - interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments) - logging.info(f"Interest score for '{title}': {interest_score}") - if interest_score < 6: - logging.info(f"Reddit Interest Too Low: {interest_score}") - attempts += 1 - continue + if post_id: + logger.info(f"Successfully curated and posted Reddit post: {post_data['title']} (URL: {post_url})") + return post_id, post_url + else: + logger.warning(f"Failed to update Reddit post with share links: {post_data['title']}") + return None, None - num_paragraphs = determine_paragraph_count(interest_score) - extra_prompt = ( - f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" - f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" - f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" - f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" - f"Do not include emojis in the summary." - ) - content_to_summarize = scoring_content - final_summary = summarize_with_gpt4o( - content_to_summarize, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - if not final_summary: - logging.info(f"Summary failed for '{title}'") - attempts += 1 - continue - - final_summary = insert_link_naturally(final_summary, source_name, link) - - post_data = { - "title": generate_title_from_summary(final_summary), - "content": final_summary, - "status": "publish", - "author": author_username, - "categories": [generate_category_from_summary(final_summary)] - } - category = post_data["categories"][0] - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term) - if not image_url: - logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") - image_url, image_source, uploader, page_url = get_image(image_query, specific_term) - if not image_url: - logging.warning(f"All image uploads failed for '{title}' - posting without image") - image_source = None - uploader = None - page_url = None - - hook = get_dynamic_hook(post_data["title"]).strip() - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - share_links_template = ( - f'

{share_prompt} ' - f' ' - f'

' - ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" - - global is_posting - is_posting = True - try: - post_id, post_url = post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - should_post_tweet=True, - summary=final_summary - ) - if not post_id: - logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}") - post_url = original_url - else: - logging.info(f"Posted to WordPress for {author_username}: {post_url}") - - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" - post_data["post_id"] = post_id - if post_id: - post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=None, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - post_id=post_id, - should_post_tweet=False - ) - except Exception as e: - logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) - post_url = original_url - finally: - is_posting = False - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****") - sleep_time = random.randint(1200, 1800) - return post_data, category, sleep_time - - logging.info("No interesting Reddit post found after attempts") - sleep_time = random.randint(1200, 1800) - return None, None, sleep_time except Exception as e: - logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True) - sleep_time = random.randint(1200, 1800) - return None, None, sleep_time + logger.error(f"Error curating Reddit post '{post.get('title', 'unknown')}': {e}") + return None, None def run_reddit_automator(): lock_fd = None diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 0893281..c48be50 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -254,195 +254,98 @@ def fetch_duckduckgo_news_context(title, hours=24): logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts") return title -def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images): +def curate_from_rss(entry, original_source, source_name, link, page_url): + logger = logging.getLogger(__name__) try: - logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") + content = entry.summary + if not content: + logger.info(f"No content for RSS entry: {entry.title}") + return None, None - articles = fetch_rss_feeds() - if not articles: - logging.info("No RSS articles available") - sleep_time = random.randint(1200, 1800) # 20–30 minutes - return None, None, sleep_time + interest_score = is_interesting(content) + if interest_score < 4: + logger.info(f"RSS entry '{entry.title}' not interesting enough: score {interest_score}") + return None, None - attempts = 0 - max_attempts = 10 - while attempts < max_attempts and articles: - article = articles.pop(0) - title = article["title"] - link = article["link"] - summary = article.get("summary", "") - source_name = article.get("feed_title", "Unknown Source") - original_source = f'{source_name}' - original_url = link # Store for fallback + summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score) + if not summary: + logger.warning(f"Failed to summarize RSS entry: {entry.title}") + return None, None - if title in posted_titles: - logging.info(f"Skipping already posted article: {title}") - attempts += 1 - continue + # Remove the original title from the summary if present + if entry.title in summary: + summary = summary.replace(entry.title, "").strip() + while "\n\n\n" in summary: + summary = summary.replace("\n\n\n", "\n\n") - # Select author - author = get_next_author_round_robin() - if not author: - logging.info(f"Skipping article '{title}' due to tweet rate limits for all authors") - attempts += 1 - continue - author_username = author["username"] - logging.info(f"Selected author via round-robin: {author_username}") + final_summary = insert_link_naturally(summary, source_name, link) + if not final_summary: + logger.warning(f"Failed to insert link for RSS entry: {entry.title}") + return None, None - logging.info(f"Trying RSS Article: {title} from {source_name}") + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, entry.title) + if not post_data: + logger.info(f"Post preparation failed for RSS entry: {entry.title}") + return None, None - try: - image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary) - except Exception as e: - logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") - attempts += 1 - continue + share_text = f"Check out this tasty find: {post_data['title']}" + share_text_encoded = quote(share_text) + share_links_template = ( + "Share this post: " + 'X | ' + 'Facebook' + ) - if skip: - logging.info(f"Skipping filtered RSS article: {title}") - attempts += 1 - continue + # First call: Post without share links + post_data["content"] = final_summary + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + should_post_tweet=True, + summary=final_summary + ) - ddg_context = fetch_duckduckgo_news_context(title) - scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" - interest_score = is_interesting(scoring_content) - logging.info(f"Interest score for '{title}': {interest_score}") - if interest_score < 6: - logging.info(f"RSS Interest Too Low: {interest_score}") - attempts += 1 - continue + if not post_id: + logger.warning(f"Failed to post RSS entry to WP: {post_data['title']}") + return None, None - num_paragraphs = determine_paragraph_count(interest_score) - extra_prompt = ( - f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" - f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" - f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" - f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" - f"Do not include emojis in the summary." - ) - content_to_summarize = scoring_content - final_summary = summarize_with_gpt4o( - content_to_summarize, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - if not final_summary: - logging.info(f"Summary failed for '{title}'") - attempts += 1 - continue + # Second call: Update with share links + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + post_id, post_url = post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=None, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) - final_summary = insert_link_naturally(final_summary, source_name, link) + if post_id: + logger.info(f"Successfully curated and posted RSS entry: {post_data['title']} (URL: {post_url})") + return post_id, post_url + else: + logger.warning(f"Failed to update RSS post with share links: {post_data['title']}") + return None, None - post_data = { - "title": generate_title_from_summary(final_summary), - "content": final_summary, - "status": "publish", - "author": author_username, - "categories": [generate_category_from_summary(final_summary)] - } - category = post_data["categories"][0] - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term) - if not image_url: - logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") - image_url, image_source, uploader, page_url = get_image(image_query, specific_term) - if not image_url: - logging.warning(f"All image uploads failed for '{title}' - posting without image") - image_source = None - uploader = None - page_url = None - - hook = get_dynamic_hook(post_data["title"]).strip() - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - share_links_template = ( - f'

{share_prompt} ' - f' ' - f'

' - ) - - post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}" - - global is_posting - is_posting = True - try: - post_id, post_url = post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - should_post_tweet=True, - summary=final_summary - ) - if not post_id: - logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}") - post_url = original_url - else: - logging.info(f"Posted to WordPress for {author_username}: {post_url}") - - post_url_encoded = quote(post_url) - post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}" - if post_id: - post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=None, # Skip image re-upload - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - post_id=post_id, - should_post_tweet=False - ) - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****") - sleep_time = random.randint(1200, 1800) # 20–30 minutes - return post_data, category, sleep_time - - except Exception as e: - logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) - post_url = original_url - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - attempts += 1 - finally: - is_posting = False - - logging.info("No interesting RSS article found after attempts") - sleep_time = random.randint(1200, 1800) # 20–30 minutes - return None, None, sleep_time except Exception as e: - logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) - sleep_time = random.randint(1200, 1800) # 20–30 minutes - return None, None, sleep_time + logger.error(f"Error curating RSS entry '{entry.get('title', 'unknown')}': {e}") + return None, None def run_rss_automator(): lock_fd = None diff --git a/foodie_utils.py b/foodie_utils.py index 874470b..7bb48ba 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -615,23 +615,28 @@ def insert_link_naturally(summary, source_name, source_url): try: logging.info(f"Input summary to insert_link_naturally: {summary!r}") - # Split summary into paragraphs using \n\n (correct separator) + # Split summary into paragraphs using \n\n paragraphs = summary.split('\n\n') if not paragraphs or all(not p.strip() for p in paragraphs): logging.error("No valid paragraphs to insert link.") - return summary + return append_link_as_fallback(summary, source_name, source_url) # Find paragraphs with at least two sentences - eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] + eligible_paragraphs = [ + p for p in paragraphs + if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2 + ] if not eligible_paragraphs: logging.warning("No paragraph with multiple sentences found, using fallback.") return append_link_as_fallback(summary, source_name, source_url) - # Alternative phrases for variety + # Alternative phrases for manual insertion (as a fallback) link_phrases = [ "according to {source}", "as reported by {source}", - "{source} notes that" + "{source} notes that", + "per {source}", + "says {source}" ] best_candidate = None @@ -643,22 +648,28 @@ def insert_link_naturally(summary, source_name, source_url): sentences = re.split(r'(?<=[.!?])\s+', para.strip()) eligible_sentences = [ (i, s) for i, s in enumerate(sentences) - if s.strip() and not s.endswith('?') # Exclude sentences ending with '?' - and not s.endswith('!') # Exclude exclamations for smoother integration + if s.strip() + and not s.endswith('?') # Exclude questions + and not s.endswith('!') # Exclude exclamations + and '{source_name}' formatted_link = link_phrase.format(source=link_pattern) - # Insert the link at the end of the selected sentence (no capitalization needed) - new_sentence = f"{sentence.rstrip('.')} {formatted_link}." + # Use GPT to rewrite the sentence with the link + prompt = ( + f"Rewrite the following sentence to naturally include a reference to the source '{source_name}' " + f"with a hyperlink in HTML format: {source_name}. " + "Integrate the link into the sentence seamlessly, maintaining the original tone and style. " + "Do not add extra sentences, change the meaning, or include additional punctuation like a trailing period. " + "Return only the rewritten sentence." + ) + response = client.chat.completions.create( + model=LIGHT_TASK_MODEL, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": sentence} + ], + max_tokens=100, + temperature=0.7 + ) + new_sentence = response.choices[0].message.content.strip() + if not new_sentence or ' tags + # Format content with

tags, splitting on \n\n to match summary format content = post_data["content"] if content is None: logger.error(f"Post content is None for title '{post_data['title']}' - using fallback") content = "Content unavailable. Check the original source for details." - formatted_content = "\n".join(f"

{para}

" for para in content.split('\n') if para.strip()) + formatted_content = "\n".join(f"

{para}

" for para in content.split('\n\n') if para.strip()) # Upload image before posting image_id = None