fix insert link naturally

This commit is contained in:
2025-05-13 19:09:26 +10:00
parent 5f38374abd
commit 79cc367579
4 changed files with 286 additions and 557 deletions
+49 -161
View File
@@ -285,144 +285,50 @@ def fetch_duckduckgo_news_context(trend_title, hours=24):
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts") logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
return trend_title return trend_title
def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images): def curate_from_google(item, original_source, source_name, link, page_url):
logger = logging.getLogger(__name__)
try: try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") content = item.get('snippet', '')
if not content:
logger.info(f"No content for Google item: {item.get('title', 'unknown')}")
return None, None
regions = ['US', 'GB', 'AU'] interest_score = is_interesting(content)
all_trends = [] if interest_score < 4:
logger.info(f"Google item '{item.get('title', 'unknown')}' not interesting enough: score {interest_score}")
return None, None
for geo in regions: summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
logging.info(f"Scraping Google Trends for geo={geo}") if not summary:
trends = scrape_google_trends(geo=geo) logger.warning(f"Failed to summarize Google item: {item.get('title', 'unknown')}")
if trends: return None, None
logging.info(f"Collected {len(trends)} trends for geo={geo}")
all_trends.extend(trends)
else:
logging.warning(f"No trends collected for geo={geo}")
unique_trends = [] # Remove the original title from the summary if present
seen_titles = set() if item.get('title', '') in summary:
for trend in all_trends: summary = summary.replace(item.get('title', ''), "").strip()
if trend["title"] not in seen_titles: while "\n\n\n" in summary:
unique_trends.append(trend) summary = summary.replace("\n\n\n", "\n\n")
seen_titles.add(trend["title"])
if not unique_trends: final_summary = insert_link_naturally(summary, source_name, link)
logging.info("No Google Trends data available across regions")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
unique_trends.sort(key=lambda x: x["search_volume"], reverse=True)
logging.info(f"Total unique trends collected: {len(unique_trends)}")
attempts = 0
max_attempts = 10
while attempts < max_attempts and unique_trends:
trend = unique_trends.pop(0)
title = trend["title"]
link = trend.get("link", "")
summary = trend.get("summary", "")
source_name = trend.get("source", "Google Trends")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
if title in posted_titles:
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
continue
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
logging.info(f"Trying Google Trend: {title} from {source_name}")
# Fetch DuckDuckGo context early to enhance smart_image_and_filter
ddg_context = fetch_duckduckgo_news_context(title)
enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
if skip:
logging.info(f"Skipping filtered trend: {title}")
attempts += 1
continue
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Trend Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary: if not final_summary:
logging.info(f"Summary failed for '{title}'") logger.warning(f"Failed to insert link for Google item: {item.get('title', 'unknown')}")
attempts += 1 return None, None
continue
final_summary = insert_link_naturally(final_summary, source_name, link) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, item.get('title', 'unknown'))
if not post_data:
logger.info(f"Post preparation failed for Google item: {item.get('title', 'unknown')}")
return None, None
post_data = { share_text = f"Check out this tasty find: {post_data['title']}"
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text) share_text_encoded = quote(share_text)
share_links_template = ( share_links_template = (
f'<p>{share_prompt} ' "Share this post: "
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' '<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' '<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
) )
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting # First call: Post without share links
is_posting = True post_data["content"] = final_summary
try:
post_id, post_url = post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
@@ -437,18 +343,16 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
should_post_tweet=True, should_post_tweet=True,
summary=final_summary summary=final_summary
) )
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
if not post_id:
logger.warning(f"Failed to post Google item to WP: {post_data['title']}")
return None, None
# Second call: Update with share links
post_url_encoded = quote(post_url) post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}" post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id post_id, post_url = post_to_wp(
if post_id:
post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
@@ -462,33 +366,17 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
post_id=post_id, post_id=post_id,
should_post_tweet=False should_post_tweet=False
) )
if post_id:
logger.info(f"Successfully curated and posted Google item: {post_data['title']} (URL: {post_url})")
return post_id, post_url
else:
logger.warning(f"Failed to update Google post with share links: {post_data['title']}")
return None, None
except Exception as e: except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logger.error(f"Error curating Google item '{item.get('title', 'unknown')}': {e}")
post_url = original_url return None, None
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
logging.info("No interesting Google Trend found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
def run_google_trends_automator(): def run_google_trends_automator():
lock_fd = None lock_fd = None
+49 -146
View File
@@ -339,129 +339,50 @@ def fetch_reddit_posts():
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True) logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
return [] return []
def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images): def curate_from_reddit(post, original_source, source_name, link, page_url):
logger = logging.getLogger(__name__)
try: try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") content = post.selftext if post.selftext else post.url
if not content:
logger.info(f"No content for Reddit post: {post.title}")
return None, None
posts = fetch_reddit_posts() interest_score = is_interesting(content)
if not posts: if interest_score < 4:
logging.info("No Reddit posts available") logger.info(f"Reddit post '{post.title}' not interesting enough: score {interest_score}")
sleep_time = random.randint(1200, 1800) return None, None
return None, None, sleep_time
attempts = 0 summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
max_attempts = 10 if not summary:
while attempts < max_attempts and posts: logger.warning(f"Failed to summarize Reddit post: {post.title}")
post = posts.pop(0) return None, None
title = post["title"]
link = post.get("link", "")
summary = post.get("summary", "")
source_name = "Reddit"
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
upvotes = post.get("upvotes", 0)
comment_count = post.get("comment_count", 0)
top_comments = post.get("top_comments", [])
if title in posted_titles: # Remove the original title from the summary if present
logging.info(f"Skipping already posted Reddit post: {title}") if post.title in summary:
attempts += 1 summary = summary.replace(post.title, "").strip()
continue while "\n\n\n" in summary:
summary = summary.replace("\n\n\n", "\n\n")
author = get_next_author_round_robin() final_summary = insert_link_naturally(summary, source_name, link)
if not author:
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
logging.info(f"Trying Reddit Post: {title} from {source_name}")
# Combine summary and top comments for smart_image_and_filter
enhanced_summary = summary
if top_comments:
enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments)
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
if skip:
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
logging.debug(f"Scoring content for '{title}': {scoring_content}")
interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary: if not final_summary:
logging.info(f"Summary failed for '{title}'") logger.warning(f"Failed to insert link for Reddit post: {post.title}")
attempts += 1 return None, None
continue
final_summary = insert_link_naturally(final_summary, source_name, link) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, post.title)
if not post_data:
logger.info(f"Post preparation failed for Reddit post: {post.title}")
return None, None
post_data = { share_text = f"Check out this tasty find: {post_data['title']}"
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text) share_text_encoded = quote(share_text)
share_links_template = ( share_links_template = (
f'<p>{share_prompt} ' "Share this post: "
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' '<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' '<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
) )
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting # First call: Post without share links
is_posting = True post_data["content"] = final_summary
try:
post_id, post_url = post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
@@ -476,18 +397,16 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
should_post_tweet=True, should_post_tweet=True,
summary=final_summary summary=final_summary
) )
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
if not post_id:
logger.warning(f"Failed to post Reddit post to WP: {post_data['title']}")
return None, None
# Second call: Update with share links
post_url_encoded = quote(post_url) post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}" post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id post_id, post_url = post_to_wp(
if post_id:
post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
@@ -501,33 +420,17 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
post_id=post_id, post_id=post_id,
should_post_tweet=False should_post_tweet=False
) )
if post_id:
logger.info(f"Successfully curated and posted Reddit post: {post_data['title']} (URL: {post_url})")
return post_id, post_url
else:
logger.warning(f"Failed to update Reddit post with share links: {post_data['title']}")
return None, None
except Exception as e: except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logger.error(f"Error curating Reddit post '{post.get('title', 'unknown')}': {e}")
post_url = original_url return None, None
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
logging.info("No interesting Reddit post found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
def run_reddit_automator(): def run_reddit_automator():
lock_fd = None lock_fd = None
+49 -146
View File
@@ -254,121 +254,50 @@ def fetch_duckduckgo_news_context(title, hours=24):
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts") logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title return title
def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images): def curate_from_rss(entry, original_source, source_name, link, page_url):
logger = logging.getLogger(__name__)
try: try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images") content = entry.summary
if not content:
logger.info(f"No content for RSS entry: {entry.title}")
return None, None
articles = fetch_rss_feeds() interest_score = is_interesting(content)
if not articles: if interest_score < 4:
logging.info("No RSS articles available") logger.info(f"RSS entry '{entry.title}' not interesting enough: score {interest_score}")
sleep_time = random.randint(1200, 1800) # 2030 minutes return None, None
return None, None, sleep_time
attempts = 0 summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
max_attempts = 10 if not summary:
while attempts < max_attempts and articles: logger.warning(f"Failed to summarize RSS entry: {entry.title}")
article = articles.pop(0) return None, None
title = article["title"]
link = article["link"]
summary = article.get("summary", "")
source_name = article.get("feed_title", "Unknown Source")
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link # Store for fallback
if title in posted_titles: # Remove the original title from the summary if present
logging.info(f"Skipping already posted article: {title}") if entry.title in summary:
attempts += 1 summary = summary.replace(entry.title, "").strip()
continue while "\n\n\n" in summary:
summary = summary.replace("\n\n\n", "\n\n")
# Select author final_summary = insert_link_naturally(summary, source_name, link)
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping article '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
logging.info(f"Trying RSS Article: {title} from {source_name}")
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
if skip:
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary: if not final_summary:
logging.info(f"Summary failed for '{title}'") logger.warning(f"Failed to insert link for RSS entry: {entry.title}")
attempts += 1 return None, None
continue
final_summary = insert_link_naturally(final_summary, source_name, link) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, entry.title)
if not post_data:
logger.info(f"Post preparation failed for RSS entry: {entry.title}")
return None, None
post_data = { share_text = f"Check out this tasty find: {post_data['title']}"
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text) share_text_encoded = quote(share_text)
share_links_template = ( share_links_template = (
f'<p>{share_prompt} ' "Share this post: "
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' '<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' '<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
) )
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}" # First call: Post without share links
post_data["content"] = final_summary
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
@@ -383,21 +312,21 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
should_post_tweet=True, should_post_tweet=True,
summary=final_summary summary=final_summary
) )
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
if not post_id:
logger.warning(f"Failed to post RSS entry to WP: {post_data['title']}")
return None, None
# Second call: Update with share links
post_url_encoded = quote(post_url) post_url_encoded = quote(post_url)
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}" share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
if post_id: post_data["content"] = f"{final_summary}\n\n{share_links}"
post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
author=author, author=author,
image_url=None, # Skip image re-upload image_url=None,
original_source=original_source, original_source=original_source,
image_source=image_source, image_source=image_source,
uploader=uploader, uploader=uploader,
@@ -407,42 +336,16 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
should_post_tweet=False should_post_tweet=False
) )
timestamp = datetime.now(timezone.utc).isoformat() if post_id:
save_json_file(POSTED_TITLES_FILE, title, timestamp) logger.info(f"Successfully curated and posted RSS entry: {post_data['title']} (URL: {post_url})")
posted_titles.add(title) return post_id, post_url
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") else:
logger.warning(f"Failed to update RSS post with share links: {post_data['title']}")
if image_url: return None, None
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****")
sleep_time = random.randint(1200, 1800) # 2030 minutes
return post_data, category, sleep_time
except Exception as e: except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logger.error(f"Error curating RSS entry '{entry.get('title', 'unknown')}': {e}")
post_url = original_url return None, None
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
attempts += 1
finally:
is_posting = False
logging.info("No interesting RSS article found after attempts")
sleep_time = random.randint(1200, 1800) # 2030 minutes
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800) # 2030 minutes
return None, None, sleep_time
def run_rss_automator(): def run_rss_automator():
lock_fd = None lock_fd = None
+48 -13
View File
@@ -615,23 +615,28 @@ def insert_link_naturally(summary, source_name, source_url):
try: try:
logging.info(f"Input summary to insert_link_naturally: {summary!r}") logging.info(f"Input summary to insert_link_naturally: {summary!r}")
# Split summary into paragraphs using \n\n (correct separator) # Split summary into paragraphs using \n\n
paragraphs = summary.split('\n\n') paragraphs = summary.split('\n\n')
if not paragraphs or all(not p.strip() for p in paragraphs): if not paragraphs or all(not p.strip() for p in paragraphs):
logging.error("No valid paragraphs to insert link.") logging.error("No valid paragraphs to insert link.")
return summary return append_link_as_fallback(summary, source_name, source_url)
# Find paragraphs with at least two sentences # Find paragraphs with at least two sentences
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] eligible_paragraphs = [
p for p in paragraphs
if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2
]
if not eligible_paragraphs: if not eligible_paragraphs:
logging.warning("No paragraph with multiple sentences found, using fallback.") logging.warning("No paragraph with multiple sentences found, using fallback.")
return append_link_as_fallback(summary, source_name, source_url) return append_link_as_fallback(summary, source_name, source_url)
# Alternative phrases for variety # Alternative phrases for manual insertion (as a fallback)
link_phrases = [ link_phrases = [
"according to {source}", "according to {source}",
"as reported by {source}", "as reported by {source}",
"{source} notes that" "{source} notes that",
"per {source}",
"says {source}"
] ]
best_candidate = None best_candidate = None
@@ -643,22 +648,28 @@ def insert_link_naturally(summary, source_name, source_url):
sentences = re.split(r'(?<=[.!?])\s+', para.strip()) sentences = re.split(r'(?<=[.!?])\s+', para.strip())
eligible_sentences = [ eligible_sentences = [
(i, s) for i, s in enumerate(sentences) (i, s) for i, s in enumerate(sentences)
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?' if s.strip()
and not s.endswith('!') # Exclude exclamations for smoother integration and not s.endswith('?') # Exclude questions
and not s.endswith('!') # Exclude exclamations
and '<a href=' not in s # Avoid sentences with existing links
and len(s.split()) >= 5 # Prefer sentences with at least 5 words
] ]
if not eligible_sentences: if not eligible_sentences:
continue continue
# Score sentences based on suitability (prefer declarative sentences) # Score sentences based on suitability
for idx, sentence in eligible_sentences: for idx, sentence in eligible_sentences:
score = 0 score = 0
# Favor sentences with factual content (simplified heuristic) # Favor sentences with factual content
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]): if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
score += 2 score += 2
# Prefer longer sentences for better context # Prefer longer sentences for better context
score += len(sentence.split()) // 5 score += len(sentence.split()) // 5
# Prefer middle sentences for natural flow # Prefer middle sentences for natural flow
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle
# Boost score for sentences mentioning the source topic
if source_name.lower() in sentence.lower():
score += 3
if score > best_score: if score > best_score:
best_score = score best_score = score
@@ -669,14 +680,38 @@ def insert_link_naturally(summary, source_name, source_url):
logging.warning("No suitable sentence found, using fallback.") logging.warning("No suitable sentence found, using fallback.")
return append_link_as_fallback(summary, source_name, source_url) return append_link_as_fallback(summary, source_name, source_url)
# Select a link phrase based on sentence structure # Select a link phrase for fallback manual insertion
sentence_idx, sentence = best_candidate sentence_idx, sentence = best_candidate
link_phrase = random.choice(link_phrases) link_phrase = random.choice(link_phrases)
link_pattern = f'<a href="{source_url}">{source_name}</a>' link_pattern = f'<a href="{source_url}">{source_name}</a>'
formatted_link = link_phrase.format(source=link_pattern) formatted_link = link_phrase.format(source=link_pattern)
# Insert the link at the end of the selected sentence (no capitalization needed) # Use GPT to rewrite the sentence with the link
prompt = (
f"Rewrite the following sentence to naturally include a reference to the source '{source_name}' "
f"with a hyperlink in HTML format: <a href=\"{source_url}\">{source_name}</a>. "
"Integrate the link into the sentence seamlessly, maintaining the original tone and style. "
"Do not add extra sentences, change the meaning, or include additional punctuation like a trailing period. "
"Return only the rewritten sentence."
)
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL,
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": sentence}
],
max_tokens=100,
temperature=0.7
)
new_sentence = response.choices[0].message.content.strip()
if not new_sentence or '<a href=' not in new_sentence:
logging.warning("GPT failed to rewrite sentence, using manual insertion")
new_sentence = f"{sentence.rstrip('.')} {formatted_link}." new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
else:
# Ensure the sentence ends with a period if the original did
if sentence.rstrip().endswith('.'):
new_sentence = new_sentence.rstrip('.') + '.'
sentences[sentence_idx] = new_sentence sentences[sentence_idx] = new_sentence
new_para = ' '.join(sentences) new_para = ' '.join(sentences)
paragraphs[paragraphs.index(best_paragraph)] = new_para paragraphs[paragraphs.index(best_paragraph)] = new_para
@@ -838,12 +873,12 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
tags.append(picks_tag_id) tags.append(picks_tag_id)
logger.info(f"Added 'Picks' tag (ID: {picks_tag_id}) due to high interest score: {interest_score}") logger.info(f"Added 'Picks' tag (ID: {picks_tag_id}) due to high interest score: {interest_score}")
# Format content with <p> tags # Format content with <p> tags, splitting on \n\n to match summary format
content = post_data["content"] content = post_data["content"]
if content is None: if content is None:
logger.error(f"Post content is None for title '{post_data['title']}' - using fallback") logger.error(f"Post content is None for title '{post_data['title']}' - using fallback")
content = "Content unavailable. Check the original source for details." content = "Content unavailable. Check the original source for details."
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip()) formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n\n') if para.strip())
# Upload image before posting # Upload image before posting
image_id = None image_id = None