fix insert link naturally
This commit is contained in:
+79
-191
@@ -285,210 +285,98 @@ def fetch_duckduckgo_news_context(trend_title, hours=24):
|
||||
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
|
||||
return trend_title
|
||||
|
||||
def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images):
|
||||
def curate_from_google(item, original_source, source_name, link, page_url):
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
|
||||
content = item.get('snippet', '')
|
||||
if not content:
|
||||
logger.info(f"No content for Google item: {item.get('title', 'unknown')}")
|
||||
return None, None
|
||||
|
||||
regions = ['US', 'GB', 'AU']
|
||||
all_trends = []
|
||||
interest_score = is_interesting(content)
|
||||
if interest_score < 4:
|
||||
logger.info(f"Google item '{item.get('title', 'unknown')}' not interesting enough: score {interest_score}")
|
||||
return None, None
|
||||
|
||||
for geo in regions:
|
||||
logging.info(f"Scraping Google Trends for geo={geo}")
|
||||
trends = scrape_google_trends(geo=geo)
|
||||
if trends:
|
||||
logging.info(f"Collected {len(trends)} trends for geo={geo}")
|
||||
all_trends.extend(trends)
|
||||
else:
|
||||
logging.warning(f"No trends collected for geo={geo}")
|
||||
summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
|
||||
if not summary:
|
||||
logger.warning(f"Failed to summarize Google item: {item.get('title', 'unknown')}")
|
||||
return None, None
|
||||
|
||||
unique_trends = []
|
||||
seen_titles = set()
|
||||
for trend in all_trends:
|
||||
if trend["title"] not in seen_titles:
|
||||
unique_trends.append(trend)
|
||||
seen_titles.add(trend["title"])
|
||||
# Remove the original title from the summary if present
|
||||
if item.get('title', '') in summary:
|
||||
summary = summary.replace(item.get('title', ''), "").strip()
|
||||
while "\n\n\n" in summary:
|
||||
summary = summary.replace("\n\n\n", "\n\n")
|
||||
|
||||
if not unique_trends:
|
||||
logging.info("No Google Trends data available across regions")
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
final_summary = insert_link_naturally(summary, source_name, link)
|
||||
if not final_summary:
|
||||
logger.warning(f"Failed to insert link for Google item: {item.get('title', 'unknown')}")
|
||||
return None, None
|
||||
|
||||
unique_trends.sort(key=lambda x: x["search_volume"], reverse=True)
|
||||
logging.info(f"Total unique trends collected: {len(unique_trends)}")
|
||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, item.get('title', 'unknown'))
|
||||
if not post_data:
|
||||
logger.info(f"Post preparation failed for Google item: {item.get('title', 'unknown')}")
|
||||
return None, None
|
||||
|
||||
attempts = 0
|
||||
max_attempts = 10
|
||||
while attempts < max_attempts and unique_trends:
|
||||
trend = unique_trends.pop(0)
|
||||
title = trend["title"]
|
||||
link = trend.get("link", "")
|
||||
summary = trend.get("summary", "")
|
||||
source_name = trend.get("source", "Google Trends")
|
||||
original_source = f'<a href="{link}">{source_name}</a>'
|
||||
original_url = link
|
||||
share_text = f"Check out this tasty find: {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
share_links_template = (
|
||||
"Share this post: "
|
||||
'<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
|
||||
'<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
|
||||
)
|
||||
|
||||
if title in posted_titles:
|
||||
logging.info(f"Skipping already posted trend: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
# First call: Post without share links
|
||||
post_data["content"] = final_summary
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True,
|
||||
summary=final_summary
|
||||
)
|
||||
|
||||
author = get_next_author_round_robin()
|
||||
if not author:
|
||||
logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors")
|
||||
attempts += 1
|
||||
continue
|
||||
if not post_id:
|
||||
logger.warning(f"Failed to post Google item to WP: {post_data['title']}")
|
||||
return None, None
|
||||
|
||||
author_username = author["username"]
|
||||
logging.info(f"Selected author via round-robin: {author_username}")
|
||||
# Second call: Update with share links
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
|
||||
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
||||
if post_id:
|
||||
logger.info(f"Successfully curated and posted Google item: {post_data['title']} (URL: {post_url})")
|
||||
return post_id, post_url
|
||||
else:
|
||||
logger.warning(f"Failed to update Google post with share links: {post_data['title']}")
|
||||
return None, None
|
||||
|
||||
# Fetch DuckDuckGo context early to enhance smart_image_and_filter
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
if skip:
|
||||
logging.info(f"Skipping filtered trend: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
if interest_score < 6:
|
||||
logging.info(f"Trend Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
content_to_summarize = scoring_content
|
||||
final_summary = summarize_with_gpt4o(
|
||||
content_to_summarize,
|
||||
source_name,
|
||||
link,
|
||||
interest_score=interest_score,
|
||||
extra_prompt=extra_prompt
|
||||
)
|
||||
if not final_summary:
|
||||
logging.info(f"Summary failed for '{title}'")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
|
||||
post_data = {
|
||||
"title": generate_title_from_summary(final_summary),
|
||||
"content": final_summary,
|
||||
"status": "publish",
|
||||
"author": author_username,
|
||||
"categories": [generate_category_from_summary(final_summary)]
|
||||
}
|
||||
category = post_data["categories"][0]
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||
image_source = None
|
||||
uploader = None
|
||||
page_url = None
|
||||
|
||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
share_links_template = (
|
||||
f'<p>{share_prompt} '
|
||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||
)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||
|
||||
global is_posting
|
||||
is_posting = True
|
||||
try:
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True,
|
||||
summary=final_summary
|
||||
)
|
||||
if not post_id:
|
||||
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
|
||||
post_url = original_url
|
||||
else:
|
||||
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
|
||||
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
post_data["post_id"] = post_id
|
||||
if post_id:
|
||||
post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||
post_url = original_url
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||
posted_titles.add(title)
|
||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||
|
||||
if image_url:
|
||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||
used_images.add(image_url)
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||
|
||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****")
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return post_data, category, sleep_time
|
||||
|
||||
logging.info("No interesting Google Trend found after attempts")
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
logger.error(f"Error curating Google item '{item.get('title', 'unknown')}': {e}")
|
||||
return None, None
|
||||
|
||||
def run_google_trends_automator():
|
||||
lock_fd = None
|
||||
|
||||
+79
-176
@@ -339,195 +339,98 @@ def fetch_reddit_posts():
|
||||
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images):
|
||||
def curate_from_reddit(post, original_source, source_name, link, page_url):
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
|
||||
content = post.selftext if post.selftext else post.url
|
||||
if not content:
|
||||
logger.info(f"No content for Reddit post: {post.title}")
|
||||
return None, None
|
||||
|
||||
posts = fetch_reddit_posts()
|
||||
if not posts:
|
||||
logging.info("No Reddit posts available")
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
interest_score = is_interesting(content)
|
||||
if interest_score < 4:
|
||||
logger.info(f"Reddit post '{post.title}' not interesting enough: score {interest_score}")
|
||||
return None, None
|
||||
|
||||
attempts = 0
|
||||
max_attempts = 10
|
||||
while attempts < max_attempts and posts:
|
||||
post = posts.pop(0)
|
||||
title = post["title"]
|
||||
link = post.get("link", "")
|
||||
summary = post.get("summary", "")
|
||||
source_name = "Reddit"
|
||||
original_source = f'<a href="{link}">{source_name}</a>'
|
||||
original_url = link
|
||||
upvotes = post.get("upvotes", 0)
|
||||
comment_count = post.get("comment_count", 0)
|
||||
top_comments = post.get("top_comments", [])
|
||||
summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
|
||||
if not summary:
|
||||
logger.warning(f"Failed to summarize Reddit post: {post.title}")
|
||||
return None, None
|
||||
|
||||
if title in posted_titles:
|
||||
logging.info(f"Skipping already posted Reddit post: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
# Remove the original title from the summary if present
|
||||
if post.title in summary:
|
||||
summary = summary.replace(post.title, "").strip()
|
||||
while "\n\n\n" in summary:
|
||||
summary = summary.replace("\n\n\n", "\n\n")
|
||||
|
||||
author = get_next_author_round_robin()
|
||||
if not author:
|
||||
logging.info(f"Skipping post '{title}' due to tweet rate limits for all authors")
|
||||
attempts += 1
|
||||
continue
|
||||
final_summary = insert_link_naturally(summary, source_name, link)
|
||||
if not final_summary:
|
||||
logger.warning(f"Failed to insert link for Reddit post: {post.title}")
|
||||
return None, None
|
||||
|
||||
author_username = author["username"]
|
||||
logging.info(f"Selected author via round-robin: {author_username}")
|
||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, post.title)
|
||||
if not post_data:
|
||||
logger.info(f"Post preparation failed for Reddit post: {post.title}")
|
||||
return None, None
|
||||
|
||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||
share_text = f"Check out this tasty find: {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
share_links_template = (
|
||||
"Share this post: "
|
||||
'<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
|
||||
'<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
|
||||
)
|
||||
|
||||
# Combine summary and top comments for smart_image_and_filter
|
||||
enhanced_summary = summary
|
||||
if top_comments:
|
||||
enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments)
|
||||
# First call: Post without share links
|
||||
post_data["content"] = final_summary
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True,
|
||||
summary=final_summary
|
||||
)
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
continue
|
||||
if not post_id:
|
||||
logger.warning(f"Failed to post Reddit post to WP: {post_data['title']}")
|
||||
return None, None
|
||||
|
||||
if skip:
|
||||
logging.info(f"Skipping filtered Reddit post: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
# Second call: Update with share links
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
|
||||
logging.debug(f"Scoring content for '{title}': {scoring_content}")
|
||||
interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
if interest_score < 6:
|
||||
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
continue
|
||||
if post_id:
|
||||
logger.info(f"Successfully curated and posted Reddit post: {post_data['title']} (URL: {post_url})")
|
||||
return post_id, post_url
|
||||
else:
|
||||
logger.warning(f"Failed to update Reddit post with share links: {post_data['title']}")
|
||||
return None, None
|
||||
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
content_to_summarize = scoring_content
|
||||
final_summary = summarize_with_gpt4o(
|
||||
content_to_summarize,
|
||||
source_name,
|
||||
link,
|
||||
interest_score=interest_score,
|
||||
extra_prompt=extra_prompt
|
||||
)
|
||||
if not final_summary:
|
||||
logging.info(f"Summary failed for '{title}'")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
|
||||
post_data = {
|
||||
"title": generate_title_from_summary(final_summary),
|
||||
"content": final_summary,
|
||||
"status": "publish",
|
||||
"author": author_username,
|
||||
"categories": [generate_category_from_summary(final_summary)]
|
||||
}
|
||||
category = post_data["categories"][0]
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||
image_source = None
|
||||
uploader = None
|
||||
page_url = None
|
||||
|
||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
share_links_template = (
|
||||
f'<p>{share_prompt} '
|
||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||
)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||
|
||||
global is_posting
|
||||
is_posting = True
|
||||
try:
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True,
|
||||
summary=final_summary
|
||||
)
|
||||
if not post_id:
|
||||
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
|
||||
post_url = original_url
|
||||
else:
|
||||
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
|
||||
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
post_data["post_id"] = post_id
|
||||
if post_id:
|
||||
post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||
post_url = original_url
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||
posted_titles.add(title)
|
||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||
|
||||
if image_url:
|
||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||
used_images.add(image_url)
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||
|
||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****")
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return post_data, category, sleep_time
|
||||
|
||||
logging.info("No interesting Reddit post found after attempts")
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
|
||||
sleep_time = random.randint(1200, 1800)
|
||||
return None, None, sleep_time
|
||||
logger.error(f"Error curating Reddit post '{post.get('title', 'unknown')}': {e}")
|
||||
return None, None
|
||||
|
||||
def run_reddit_automator():
|
||||
lock_fd = None
|
||||
|
||||
+79
-176
@@ -254,195 +254,98 @@ def fetch_duckduckgo_news_context(title, hours=24):
|
||||
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
|
||||
return title
|
||||
|
||||
def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_images):
|
||||
def curate_from_rss(entry, original_source, source_name, link, page_url):
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
|
||||
content = entry.summary
|
||||
if not content:
|
||||
logger.info(f"No content for RSS entry: {entry.title}")
|
||||
return None, None
|
||||
|
||||
articles = fetch_rss_feeds()
|
||||
if not articles:
|
||||
logging.info("No RSS articles available")
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
return None, None, sleep_time
|
||||
interest_score = is_interesting(content)
|
||||
if interest_score < 4:
|
||||
logger.info(f"RSS entry '{entry.title}' not interesting enough: score {interest_score}")
|
||||
return None, None
|
||||
|
||||
attempts = 0
|
||||
max_attempts = 10
|
||||
while attempts < max_attempts and articles:
|
||||
article = articles.pop(0)
|
||||
title = article["title"]
|
||||
link = article["link"]
|
||||
summary = article.get("summary", "")
|
||||
source_name = article.get("feed_title", "Unknown Source")
|
||||
original_source = f'<a href="{link}">{source_name}</a>'
|
||||
original_url = link # Store for fallback
|
||||
summary = summarize_with_gpt4o(content, source_name, link, interest_score=interest_score)
|
||||
if not summary:
|
||||
logger.warning(f"Failed to summarize RSS entry: {entry.title}")
|
||||
return None, None
|
||||
|
||||
if title in posted_titles:
|
||||
logging.info(f"Skipping already posted article: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
# Remove the original title from the summary if present
|
||||
if entry.title in summary:
|
||||
summary = summary.replace(entry.title, "").strip()
|
||||
while "\n\n\n" in summary:
|
||||
summary = summary.replace("\n\n\n", "\n\n")
|
||||
|
||||
# Select author
|
||||
author = get_next_author_round_robin()
|
||||
if not author:
|
||||
logging.info(f"Skipping article '{title}' due to tweet rate limits for all authors")
|
||||
attempts += 1
|
||||
continue
|
||||
author_username = author["username"]
|
||||
logging.info(f"Selected author via round-robin: {author_username}")
|
||||
final_summary = insert_link_naturally(summary, source_name, link)
|
||||
if not final_summary:
|
||||
logger.warning(f"Failed to insert link for RSS entry: {entry.title}")
|
||||
return None, None
|
||||
|
||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, entry.title)
|
||||
if not post_data:
|
||||
logger.info(f"Post preparation failed for RSS entry: {entry.title}")
|
||||
return None, None
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
continue
|
||||
share_text = f"Check out this tasty find: {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
share_links_template = (
|
||||
"Share this post: "
|
||||
'<a href="https://x.com/intent/tweet?url={post_url}&text={share_text}">X</a> | '
|
||||
'<a href="https://www.facebook.com/sharer/sharer.php?u={post_url}">Facebook</a>'
|
||||
)
|
||||
|
||||
if skip:
|
||||
logging.info(f"Skipping filtered RSS article: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
# First call: Post without share links
|
||||
post_data["content"] = final_summary
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True,
|
||||
summary=final_summary
|
||||
)
|
||||
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
if interest_score < 6:
|
||||
logging.info(f"RSS Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
continue
|
||||
if not post_id:
|
||||
logger.warning(f"Failed to post RSS entry to WP: {post_data['title']}")
|
||||
return None, None
|
||||
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
content_to_summarize = scoring_content
|
||||
final_summary = summarize_with_gpt4o(
|
||||
content_to_summarize,
|
||||
source_name,
|
||||
link,
|
||||
interest_score=interest_score,
|
||||
extra_prompt=extra_prompt
|
||||
)
|
||||
if not final_summary:
|
||||
logging.info(f"Summary failed for '{title}'")
|
||||
attempts += 1
|
||||
continue
|
||||
# Second call: Update with share links
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
if post_id:
|
||||
logger.info(f"Successfully curated and posted RSS entry: {post_data['title']} (URL: {post_url})")
|
||||
return post_id, post_url
|
||||
else:
|
||||
logger.warning(f"Failed to update RSS post with share links: {post_data['title']}")
|
||||
return None, None
|
||||
|
||||
post_data = {
|
||||
"title": generate_title_from_summary(final_summary),
|
||||
"content": final_summary,
|
||||
"status": "publish",
|
||||
"author": author_username,
|
||||
"categories": [generate_category_from_summary(final_summary)]
|
||||
}
|
||||
category = post_data["categories"][0]
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||
image_source = None
|
||||
uploader = None
|
||||
page_url = None
|
||||
|
||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
share_links_template = (
|
||||
f'<p>{share_prompt} '
|
||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||
)
|
||||
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}"
|
||||
|
||||
global is_posting
|
||||
is_posting = True
|
||||
try:
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True,
|
||||
summary=final_summary
|
||||
)
|
||||
if not post_id:
|
||||
logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
|
||||
post_url = original_url
|
||||
else:
|
||||
logging.info(f"Posted to WordPress for {author_username}: {post_url}")
|
||||
|
||||
post_url_encoded = quote(post_url)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}"
|
||||
if post_id:
|
||||
post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=None, # Skip image re-upload
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||
posted_titles.add(title)
|
||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||
|
||||
if image_url:
|
||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||
used_images.add(image_url)
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||
|
||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from RSS *****")
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
return post_data, category, sleep_time
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||
post_url = original_url
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||
posted_titles.add(title)
|
||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||
if image_url:
|
||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||
used_images.add(image_url)
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||
attempts += 1
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
logging.info("No interesting RSS article found after attempts")
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
return None, None, sleep_time
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
|
||||
sleep_time = random.randint(1200, 1800) # 20–30 minutes
|
||||
return None, None, sleep_time
|
||||
logger.error(f"Error curating RSS entry '{entry.get('title', 'unknown')}': {e}")
|
||||
return None, None
|
||||
|
||||
def run_rss_automator():
|
||||
lock_fd = None
|
||||
|
||||
+49
-14
@@ -615,23 +615,28 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
try:
|
||||
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
||||
|
||||
# Split summary into paragraphs using \n\n (correct separator)
|
||||
# Split summary into paragraphs using \n\n
|
||||
paragraphs = summary.split('\n\n')
|
||||
if not paragraphs or all(not p.strip() for p in paragraphs):
|
||||
logging.error("No valid paragraphs to insert link.")
|
||||
return summary
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Find paragraphs with at least two sentences
|
||||
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
|
||||
eligible_paragraphs = [
|
||||
p for p in paragraphs
|
||||
if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2
|
||||
]
|
||||
if not eligible_paragraphs:
|
||||
logging.warning("No paragraph with multiple sentences found, using fallback.")
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Alternative phrases for variety
|
||||
# Alternative phrases for manual insertion (as a fallback)
|
||||
link_phrases = [
|
||||
"according to {source}",
|
||||
"as reported by {source}",
|
||||
"{source} notes that"
|
||||
"{source} notes that",
|
||||
"per {source}",
|
||||
"says {source}"
|
||||
]
|
||||
|
||||
best_candidate = None
|
||||
@@ -643,22 +648,28 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
sentences = re.split(r'(?<=[.!?])\s+', para.strip())
|
||||
eligible_sentences = [
|
||||
(i, s) for i, s in enumerate(sentences)
|
||||
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?'
|
||||
and not s.endswith('!') # Exclude exclamations for smoother integration
|
||||
if s.strip()
|
||||
and not s.endswith('?') # Exclude questions
|
||||
and not s.endswith('!') # Exclude exclamations
|
||||
and '<a href=' not in s # Avoid sentences with existing links
|
||||
and len(s.split()) >= 5 # Prefer sentences with at least 5 words
|
||||
]
|
||||
if not eligible_sentences:
|
||||
continue
|
||||
|
||||
# Score sentences based on suitability (prefer declarative sentences)
|
||||
# Score sentences based on suitability
|
||||
for idx, sentence in eligible_sentences:
|
||||
score = 0
|
||||
# Favor sentences with factual content (simplified heuristic)
|
||||
# Favor sentences with factual content
|
||||
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
|
||||
score += 2
|
||||
# Prefer longer sentences for better context
|
||||
score += len(sentence.split()) // 5
|
||||
# Prefer middle sentences for natural flow
|
||||
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle
|
||||
# Boost score for sentences mentioning the source topic
|
||||
if source_name.lower() in sentence.lower():
|
||||
score += 3
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
@@ -669,14 +680,38 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
logging.warning("No suitable sentence found, using fallback.")
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Select a link phrase based on sentence structure
|
||||
# Select a link phrase for fallback manual insertion
|
||||
sentence_idx, sentence = best_candidate
|
||||
link_phrase = random.choice(link_phrases)
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
formatted_link = link_phrase.format(source=link_pattern)
|
||||
|
||||
# Insert the link at the end of the selected sentence (no capitalization needed)
|
||||
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
||||
# Use GPT to rewrite the sentence with the link
|
||||
prompt = (
|
||||
f"Rewrite the following sentence to naturally include a reference to the source '{source_name}' "
|
||||
f"with a hyperlink in HTML format: <a href=\"{source_url}\">{source_name}</a>. "
|
||||
"Integrate the link into the sentence seamlessly, maintaining the original tone and style. "
|
||||
"Do not add extra sentences, change the meaning, or include additional punctuation like a trailing period. "
|
||||
"Return only the rewritten sentence."
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model=LIGHT_TASK_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": sentence}
|
||||
],
|
||||
max_tokens=100,
|
||||
temperature=0.7
|
||||
)
|
||||
new_sentence = response.choices[0].message.content.strip()
|
||||
if not new_sentence or '<a href=' not in new_sentence:
|
||||
logging.warning("GPT failed to rewrite sentence, using manual insertion")
|
||||
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
||||
else:
|
||||
# Ensure the sentence ends with a period if the original did
|
||||
if sentence.rstrip().endswith('.'):
|
||||
new_sentence = new_sentence.rstrip('.') + '.'
|
||||
|
||||
sentences[sentence_idx] = new_sentence
|
||||
new_para = ' '.join(sentences)
|
||||
paragraphs[paragraphs.index(best_paragraph)] = new_para
|
||||
@@ -838,12 +873,12 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
||||
tags.append(picks_tag_id)
|
||||
logger.info(f"Added 'Picks' tag (ID: {picks_tag_id}) due to high interest score: {interest_score}")
|
||||
|
||||
# Format content with <p> tags
|
||||
# Format content with <p> tags, splitting on \n\n to match summary format
|
||||
content = post_data["content"]
|
||||
if content is None:
|
||||
logger.error(f"Post content is None for title '{post_data['title']}' - using fallback")
|
||||
content = "Content unavailable. Check the original source for details."
|
||||
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
|
||||
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n\n') if para.strip())
|
||||
|
||||
# Upload image before posting
|
||||
image_id = None
|
||||
|
||||
Reference in New Issue
Block a user