fix insert link naturally

This commit is contained in:
2025-05-13 19:09:26 +10:00
parent 5f38374abd
commit 79cc367579
4 changed files with 286 additions and 557 deletions
+79 -176
View File
@@ -339,195 +339,98 @@ def fetch_reddit_posts():
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
return []
def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images):
def curate_from_reddit(post, original_source, source_name, link, page_url):
logger = logging.getLogger(__name__)
try:
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
content = post.selftext if post.selftext else post.url
if not content:
logger.info(f"No content for Reddit post: {post.title}")
return None, None
posts = fetch_reddit_posts()
if not posts:
logging.info("No Reddit posts available")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
interest_score = is_interesting(content)
if interest_score < 4:
logger.info(f"Reddit post '{post.title}' not interesting enough: score {interest_score}")
return None, None
attempts = 0
max_attempts = 10
while attempts < max_attempts and posts:
post = posts.pop(0)
title = post["title"]
link = post.get("link", "")
summary = post.get("summary", "")
source_name = "Reddit"
original_source = f'<a href="{link}">{source_name}</a>'
original_url = link
upvotes = post.get("upvotes", 0)
comment_count = post.get("comment_count", 0)
top_comments = post.get("top_comments", [])
summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
if not summary:
logger.warning(f"Failed to summarize Reddit post: {post.title}")
return None, None
if title in posted_titles:
logging.info(f"Skipping already posted Reddit post: {title}")
attempts += 1
continue
# Remove the original title from the summary if present
if post.title in summary:
summary = summary.replace(post.title, "").strip()
while "\n\n\n" in summary:
summary = summary.replace("\n\n\n", "\n\n")
author = get_next_author_round_robin()
if not author:
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
attempts += 1
continue
final_summary = insert_link_naturally(summary, source_name, link)
if not final_summary:
logger.warning(f"Failed to insert link for Reddit post: {post.title}")
return None, None
author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}")
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, post.title)
if not post_data:
logger.info(f"Post preparation failed for Reddit post: {post.title}")
return None, None
logging.info(f"Trying Reddit Post: {title} from {source_name}")
share_text = f"Check out this tasty find: {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
"Share this post: "
'<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
'<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
)
# Combine summary and top comments for smart_image_and_filter
enhanced_summary = summary
if top_comments:
enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments)
# First call: Post without share links
post_data["content"] = final_summary
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
)
try:
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
if not post_id:
logger.warning(f"Failed to post Reddit post to WP: {post_data['title']}")
return None, None
if skip:
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
# Second call: Update with share links
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
logging.debug(f"Scoring content for '{title}': {scoring_content}")
interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
continue
if post_id:
logger.info(f"Successfully curated and posted Reddit post: {post_data['title']} (URL: {post_url})")
return post_id, post_url
else:
logger.warning(f"Failed to update Reddit post with share links: {post_data['title']}")
return None, None
num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data = {
"title": generate_title_from_summary(final_summary),
"content": final_summary,
"status": "publish",
"author": author_username,
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
is_posting = True
try:
post_id, post_url = post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
should_post_tweet=True,
summary=final_summary
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
post_url = original_url
else:
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
if post_id:
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
post_url = original_url
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****")
sleep_time = random.randint(1200, 1800)
return post_data, category, sleep_time
logging.info("No interesting Reddit post found after attempts")
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
except Exception as e:
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
sleep_time = random.randint(1200, 1800)
return None, None, sleep_time
logger.error(f"Error curating Reddit post '{post.get('title', 'unknown')}': {e}")
return None, None
def run_reddit_automator():
lock_fd = None