fix insert link naturally

This commit is contained in:
2025-05-13 19:09:26 +10:00
parent 5f38374abd
commit 79cc367579
4 changed files with 286 additions and 557 deletions
+79 -176
View File
@@ -254,195 +254,98 @@ def fetch_duckduckgo_news_context(title, hours=24):
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images):
def curate_from_rss(entry, original_source, source_name, link, page_url):
logger = logging.getLogger(__name__)
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
content = entry.summary
if not content:
logger.info(f"No content for RSS entry: {entry.title}")
return None, None
articles = fetch_rss_feeds()
if not articles:
logging.info("No RSS articles available")
sleep_time = random.randint(1200, 1800) # 2030 minutes
return None, None, sleep_time
interest_score = is_interesting(content)
if interest_score < 4:
logger.info(f"RSS entry '{entry.title}' not interesting enough: score {interest_score}")
return None, None
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article.get("summary", "")
source_name = article.get("feed_title", "Unknown Source")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link # Store for fallback
summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
if not summary:
logger.warning(f"Failed to summarize RSS entry: {entry.title}")
return None, None
if title in posted_titles:
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
# Remove the original title from the summary if present
if entry.title in summary:
summary = summary.replace(entry.title, "").strip()
while "\n\n\n" in summary:
summary = summary.replace("\n\n\n", "\n\n")
# Select author
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping article '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
final_summary = insert_link_naturally(summary, source_name, link)
if not final_summary:
logger.warning(f"Failed to insert link for RSS entry: {entry.title}")
return None, None
logging.info(f"Trying RSS Article: {title} from {source_name}")
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, entry.title)
if not post_data:
logger.info(f"Post preparation failed for RSS entry: {entry.title}")
return None, None
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
share_text = f"Check out this tasty find: {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
"Share this post: "
'<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
'<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
)
if skip:
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
# First call: Post without share links
post_data["content"] = final_summary
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
)
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
if not post_id:
logger.warning(f"Failed to post RSS entry to WP: {post_data['title']}")
return None, None
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
# Second call: Update with share links
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
final_summary = insert_link_naturally(final_summary, source_name, link)
if post_id:
logger.info(f"Successfully curated and posted RSS entry: {post_data['title']} (URL: {post_url})")
return post_id, post_url
else:
logger.warning(f"Failed to update RSS post with share links: {post_data['title']}")
return None, None
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}"
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}"
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None, # Skip image re-upload
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****")
sleep_time = random.randint(1200, 1800) # 2030 minutes
return post_data, category, sleep_time
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
attempts += 1
finally:
is_posting = False
logging.info("No interesting RSS article found after attempts")
sleep_time = random.randint(1200, 1800) # 2030 minutes
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800) # 2030 minutes
return None, None, sleep_time
logger.error(f"Error curating RSS entry '{entry.get('title', 'unknown')}': {e}")
return None, None
def run_rss_automator():
lock_fd = None