update google & reddit rate limiting realtime

main
Shane 7 months ago
parent bfeec7a560
commit 01bab56eb6
  1. 121
      foodie_automator_google.py
  2. 118
      foodie_automator_reddit.py
  3. 32
      foodie_automator_rss.py
  4. 37
      foodie_utils.py

@ -248,17 +248,12 @@ def fetch_duckduckgo_news_context(trend_title, hours=24):
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts") logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
return trend_title return trend_title
def curate_from_google_trends(): def curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images):
try: try:
global posted_titles_data, posted_titles, used_images logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
trends = fetch_google_trends() trends = scrape_google_trends()
if not trends: if not trends:
print("No Google Trends data available")
logging.info("No Google Trends data available") logging.info("No Google Trends data available")
return None, None, False return None, None, False
@ -273,24 +268,20 @@ def curate_from_google_trends():
original_source = f'<a href="{link}">{source_name}</a>' original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles: if title in posted_titles:
print(f"Skipping already posted trend: {title}")
logging.info(f"Skipping already posted trend: {title}") logging.info(f"Skipping already posted trend: {title}")
attempts += 1 attempts += 1
continue continue
print(f"Trying Google Trend: {title} from {source_name}")
logging.info(f"Trying Google Trend: {title} from {source_name}") logging.info(f"Trying Google Trend: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
print(f"Smart image/filter error for '{title}': {e}")
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
continue continue
if skip: if skip:
print(f"Skipping filtered trend: {title}")
logging.info(f"Skipping filtered trend: {title}") logging.info(f"Skipping filtered trend: {title}")
attempts += 1 attempts += 1
continue continue
@ -298,10 +289,8 @@ def curate_from_google_trends():
ddg_context = fetch_duckduckgo_news_context(title) ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
print(f"Interest Score for '{title[:50]}...': {interest_score}")
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6: if interest_score < 6:
print(f"Trend Interest Too Low: {interest_score}")
logging.info(f"Trend Interest Too Low: {interest_score}") logging.info(f"Trend Interest Too Low: {interest_score}")
attempts += 1 attempts += 1
continue continue
@ -324,14 +313,12 @@ def curate_from_google_trends():
extra_prompt=extra_prompt extra_prompt=extra_prompt
) )
if not final_summary: if not final_summary:
print(f"Summary failed for '{title}'")
logging.info(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'")
attempts += 1 attempts += 1
continue continue
final_summary = insert_link_naturally(final_summary, source_name, link) final_summary = insert_link_naturally(final_summary, source_name, link)
# Use round-robin author selection
author = get_next_author_round_robin() author = get_next_author_round_robin()
author_username = author["username"] author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}") logging.info(f"Selected author via round-robin: {author_username}")
@ -346,11 +333,9 @@ def curate_from_google_trends():
category = post_data["categories"][0] category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url: if not image_url:
print(f"Flickr image fetch failed for '{image_query}', trying fallback")
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url: if not image_url:
print(f"All image uploads failed for '{title}' - posting without image")
logging.warning(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None image_source = None
uploader = None uploader = None
@ -358,9 +343,11 @@ def curate_from_google_trends():
hook = get_dynamic_hook(post_data["title"]).strip() hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = ( share_links_template = (
f'<p>{share_prompt} ' f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
) )
post_data["content"] = f"{final_summary}\n\n{share_links_template}" post_data["content"] = f"{final_summary}\n\n{share_links_template}"
@ -382,72 +369,53 @@ def curate_from_google_trends():
should_post_tweet=True should_post_tweet=True
) )
if not post_id: if not post_id:
print(f"Failed to post to WordPress for '{title}'")
logging.warning(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'")
attempts += 1 attempts += 1
continue continue
# Update post with actual post_url
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None, # Skip image re-upload
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e: except Exception as e:
print(f"WordPress posting error for '{title}': {e}")
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1 attempts += 1
continue continue
finally: finally:
is_posting = False is_posting = False
if post_id: timestamp = datetime.now(timezone.utc).isoformat()
share_text = f"Check out this foodie gem! {post_data['title']}" save_json_file(POSTED_TITLES_FILE, title, timestamp)
share_text_encoded = quote(share_text) posted_titles.add(title)
post_url_encoded = quote(post_url) logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}" if image_url:
is_posting = True save_json_file(USED_IMAGES_FILE, image_url, timestamp)
try: used_images.add(image_url)
post_to_wp( logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
post_data=post_data,
category=category, logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
link=link, return post_data, category, True
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
print(f"Failed to update WordPress post '{title}' with share links: {e}")
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
return post_data, category, True
attempts += 1
print(f"WP posting failed for '{post_data['title']}'")
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Google Trend found after attempts")
logging.info("No interesting Google Trend found after attempts") logging.info("No interesting Google Trend found after attempts")
return None, None, False return None, None, False
except Exception as e: except Exception as e:
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True) logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
print(f"Unexpected error in curate_from_google_trends: {e}")
return None, None, False return None, None, False
def run_google_trends_automator(): def run_google_trends_automator():
@ -455,12 +423,15 @@ def run_google_trends_automator():
try: try:
lock_fd = acquire_lock() lock_fd = acquire_lock()
logging.info("***** Google Trends Automator Launched *****") logging.info("***** Google Trends Automator Launched *****")
geo_list = ['US', 'GB', 'AU'] # Load JSON files once
post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list) posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, should_continue = curate_from_google_trends(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data: if not post_data:
logging.info("No postable Google Trend found") logging.info("No postable Google Trend found")
else: logging.info("Completed Google Trends run")
logging.info("Completed Google Trends run")
return post_data, category, should_continue return post_data, category, should_continue
except Exception as e: except Exception as e:
logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True) logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True)

@ -268,17 +268,12 @@ def fetch_reddit_posts():
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True) logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
return [] return []
def curate_from_reddit(): def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images):
try: try:
global posted_titles_data, posted_titles, used_images logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
posts = fetch_reddit_posts() posts = fetch_reddit_posts()
if not posts: if not posts:
print("No Reddit posts available")
logging.info("No Reddit posts available") logging.info("No Reddit posts available")
return None, None, False return None, None, False
@ -293,24 +288,20 @@ def curate_from_reddit():
original_source = f'<a href="{link}">{source_name}</a>' original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles: if title in posted_titles:
print(f"Skipping already posted Reddit post: {title}")
logging.info(f"Skipping already posted Reddit post: {title}") logging.info(f"Skipping already posted Reddit post: {title}")
attempts += 1 attempts += 1
continue continue
print(f"Trying Reddit Post: {title} from {source_name}")
logging.info(f"Trying Reddit Post: {title} from {source_name}") logging.info(f"Trying Reddit Post: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
print(f"Smart image/filter error for '{title}': {e}")
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
continue continue
if skip: if skip:
print(f"Skipping filtered Reddit post: {title}")
logging.info(f"Skipping filtered Reddit post: {title}") logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1 attempts += 1
continue continue
@ -318,10 +309,8 @@ def curate_from_reddit():
ddg_context = fetch_duckduckgo_news_context(title) ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
print(f"Interest Score for '{title[:50]}...': {interest_score}")
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6: if interest_score < 6:
print(f"Reddit Interest Too Low: {interest_score}")
logging.info(f"Reddit Interest Too Low: {interest_score}") logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1 attempts += 1
continue continue
@ -344,14 +333,12 @@ def curate_from_reddit():
extra_prompt=extra_prompt extra_prompt=extra_prompt
) )
if not final_summary: if not final_summary:
print(f"Summary failed for '{title}'")
logging.info(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'")
attempts += 1 attempts += 1
continue continue
final_summary = insert_link_naturally(final_summary, source_name, link) final_summary = insert_link_naturally(final_summary, source_name, link)
# Use round-robin author selection
author = get_next_author_round_robin() author = get_next_author_round_robin()
author_username = author["username"] author_username = author["username"]
logging.info(f"Selected author via round-robin: {author_username}") logging.info(f"Selected author via round-robin: {author_username}")
@ -366,11 +353,9 @@ def curate_from_reddit():
category = post_data["categories"][0] category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url: if not image_url:
print(f"Flickr image fetch failed for '{image_query}', trying fallback")
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url: if not image_url:
print(f"All image uploads failed for '{title}' - posting without image")
logging.warning(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None image_source = None
uploader = None uploader = None
@ -378,9 +363,11 @@ def curate_from_reddit():
hook = get_dynamic_hook(post_data["title"]).strip() hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
share_links_template = ( share_links_template = (
f'<p>{share_prompt} ' f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
) )
post_data["content"] = f"{final_summary}\n\n{share_links_template}" post_data["content"] = f"{final_summary}\n\n{share_links_template}"
@ -402,72 +389,53 @@ def curate_from_reddit():
should_post_tweet=True should_post_tweet=True
) )
if not post_id: if not post_id:
print(f"Failed to post to WordPress for '{title}'")
logging.warning(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'")
attempts += 1 attempts += 1
continue continue
# Update post with actual post_url
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id
post_to_wp(
post_data=post_data,
category=category,
link=link,
author=author,
image_url=None, # Skip image re-upload
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e: except Exception as e:
print(f"WordPress posting error for '{title}': {e}")
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1 attempts += 1
continue continue
finally: finally:
is_posting = False is_posting = False
if post_id: timestamp = datetime.now(timezone.utc).isoformat()
share_text = f"Check out this foodie gem! {post_data['title']}" save_json_file(POSTED_TITLES_FILE, title, timestamp)
share_text_encoded = quote(share_text) posted_titles.add(title)
post_url_encoded = quote(post_url) logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}" if image_url:
is_posting = True save_json_file(USED_IMAGES_FILE, image_url, timestamp)
try: used_images.add(image_url)
post_to_wp( logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
post_data=post_data,
category=category, logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
link=link, return post_data, category, True
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
print(f"Failed to update WordPress post '{title}' with share links: {e}")
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
return post_data, category, True
attempts += 1
print(f"WP posting failed for '{post_data['title']}'")
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Reddit post found after attempts")
logging.info("No interesting Reddit post found after attempts") logging.info("No interesting Reddit post found after attempts")
return None, None, False return None, None, False
except Exception as e: except Exception as e:
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True) logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
print(f"Unexpected error in curate_from_reddit: {e}")
return None, None, False return None, None, False
def run_reddit_automator(): def run_reddit_automator():
@ -475,11 +443,15 @@ def run_reddit_automator():
try: try:
lock_fd = acquire_lock() lock_fd = acquire_lock()
logging.info("***** Reddit Automator Launched *****") logging.info("***** Reddit Automator Launched *****")
post_data, category, should_continue = curate_from_reddit() # Load JSON files once
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
post_data, category, should_continue = curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used_images)
if not post_data: if not post_data:
logging.info("No postable Reddit article found") logging.info("No postable Reddit article found")
else: logging.info("Completed Reddit run")
logging.info("Completed Reddit run")
return post_data, category, should_continue return post_data, category, should_continue
except Exception as e: except Exception as e:
logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True) logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True)

@ -256,14 +256,15 @@ def fetch_duckduckgo_news_context(title, hours=24):
def curate_from_rss(): def curate_from_rss():
try: try:
global posted_titles_data, posted_titles, used_images global posted_titles_data, posted_titles, used_images
# Load JSON files once
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data) posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
articles = fetch_rss_feeds() articles = fetch_rss_feeds()
if not articles: if not articles:
print("No RSS articles available")
logging.info("No RSS articles available") logging.info("No RSS articles available")
return None, None, random.randint(600, 1800) return None, None, random.randint(600, 1800)
@ -278,24 +279,20 @@ def curate_from_rss():
original_source = f'<a href="{link}">{source_name}</a>' original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles: if title in posted_titles:
print(f"Skipping already posted article: {title}")
logging.info(f"Skipping already posted article: {title}") logging.info(f"Skipping already posted article: {title}")
attempts += 1 attempts += 1
continue continue
print(f"Trying RSS Article: {title} from {source_name}")
logging.info(f"Trying RSS Article: {title} from {source_name}") logging.info(f"Trying RSS Article: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
print(f"Smart image/filter error for '{title}': {e}")
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
continue continue
if skip: if skip:
print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}") logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1 attempts += 1
continue continue
@ -303,10 +300,8 @@ def curate_from_rss():
ddg_context = fetch_duckduckgo_news_context(title) ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
print(f"Interest Score for '{title[:50]}...': {interest_score}")
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6: if interest_score < 6:
print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}") logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1 attempts += 1
continue continue
@ -329,7 +324,6 @@ def curate_from_rss():
extra_prompt=extra_prompt extra_prompt=extra_prompt
) )
if not final_summary: if not final_summary:
print(f"Summary failed for '{title}'")
logging.info(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'")
attempts += 1 attempts += 1
continue continue
@ -351,11 +345,9 @@ def curate_from_rss():
category = post_data["categories"][0] category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url: if not image_url:
print(f"Flickr image fetch failed for '{image_query}', trying fallback")
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url: if not image_url:
print(f"All image uploads failed for '{title}' - posting without image")
logging.warning(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None image_source = None
uploader = None uploader = None
@ -371,7 +363,7 @@ def curate_from_rss():
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
) )
# Prepare post content with share links (to be updated later with post_url) # Prepare post content with share links placeholder
post_data["content"] = f"{final_summary}\n\n{share_links_template}" post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting global is_posting
@ -391,22 +383,21 @@ def curate_from_rss():
should_post_tweet=True should_post_tweet=True
) )
if not post_id: if not post_id:
print(f"Failed to post to WordPress for '{title}'")
logging.warning(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'")
attempts += 1 attempts += 1
continue continue
# Update content with actual post_url # Update post with actual post_url
post_url_encoded = quote(post_url) post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded) share_links = share_links_template.format(post_url=post_url_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}" post_data["content"] = f"{final_summary}\n\n{share_links}"
post_data["post_id"] = post_id # For update post_data["post_id"] = post_id
post_to_wp( post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
author=author, author=author,
image_url=None, # No need to re-upload image image_url=None, # Skip image re-upload
original_source=original_source, original_source=original_source,
image_source=image_source, image_source=image_source,
uploader=uploader, uploader=uploader,
@ -416,7 +407,6 @@ def curate_from_rss():
should_post_tweet=False should_post_tweet=False
) )
except Exception as e: except Exception as e:
print(f"WordPress posting error for '{title}': {e}")
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1 attempts += 1
continue continue
@ -426,29 +416,23 @@ def curate_from_rss():
timestamp = datetime.now(timezone.utc).isoformat() timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp) save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title) posted_titles.add(title)
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url: if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp) save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url) used_images.add(image_url)
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
return post_data, category, random.randint(0, 1800) return post_data, category, random.randint(0, 1800)
attempts += 1 attempts += 1
print(f"WP posting failed for '{post_data['title']}'")
logging.info(f"WP posting failed for '{post_data['title']}'") logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting RSS article found after attempts")
logging.info("No interesting RSS article found after attempts") logging.info("No interesting RSS article found after attempts")
return None, None, random.randint(600, 1800) return None, None, random.randint(600, 1800)
except Exception as e: except Exception as e:
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
print(f"Unexpected error in curate_from_rss: {e}")
return None, None, random.randint(600, 1800) return None, None, random.randint(600, 1800)
def run_rss_automator(): def run_rss_automator():
@ -461,7 +445,7 @@ def run_rss_automator():
logging.info("No postable RSS article found") logging.info("No postable RSS article found")
logging.info(f"Completed run with sleep time: {sleep_time} seconds") logging.info(f"Completed run with sleep time: {sleep_time} seconds")
time.sleep(sleep_time) time.sleep(sleep_time)
return post_data, category, sleep_time return post_data, category, fixes
except Exception as e: except Exception as e:
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
return None, None, random.randint(600, 1800) return None, None, random.randint(600, 1800)

@ -857,24 +857,22 @@ if os.path.exists(used_images_file):
# Function to save used_images to file # Function to save used_images to file
def save_used_images(): def save_used_images():
"""
Save used_images to used_images.json as a JSON array, preserving timestamps.
"""
try: try:
# Load existing entries to preserve timestamps # Create entries for used_images
entries = load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24)
existing_entries = {entry["title"]: entry for entry in entries if isinstance(entry, dict) and "title" in entry}
# Create new entries for used_images
timestamp = datetime.now(timezone.utc).isoformat() timestamp = datetime.now(timezone.utc).isoformat()
updated_entries = [] entries = [
for url in used_images: {"title": url, "timestamp": entry.get("timestamp", timestamp)}
if url in existing_entries: for url, entry in [
updated_entries.append(existing_entries[url]) (url, next((e for e in load_json_file(used_images_file, IMAGE_EXPIRATION_DAYS * 24) if e["title"] == url), {}))
else: for url in used_images
updated_entries.append({"title": url, "timestamp": timestamp}) ]
]
with open(used_images_file, 'w') as f: # Use save_json_file for atomic write
for entry in updated_entries: save_json_file(used_images_file, entries)
f.write(json.dumps(entry) + '\n') logging.info(f"Saved {len(entries)} used image URLs to {used_images_file}")
logging.info(f"Saved {len(updated_entries)} used image URLs to {used_images_file}")
except Exception as e: except Exception as e:
logging.warning(f"Failed to save used images to {used_images_file}: {e}") logging.warning(f"Failed to save used images to {used_images_file}: {e}")
@ -1136,17 +1134,18 @@ def check_author_rate_limit(author, max_requests=10, window_seconds=3600):
rate_limit_info = load_json_file(rate_limit_file, default={}) rate_limit_info = load_json_file(rate_limit_file, default={})
username = author['username'] username = author['username']
if username not in rate_limit_info: if username not in rate_limit_info or not isinstance(rate_limit_info[username].get('reset'), (int, float)):
rate_limit_info[username] = { rate_limit_info[username] = {
'remaining': max_requests, 'remaining': max_requests,
'reset': time.time() 'reset': time.time()
} }
logger.info(f"Initialized rate limit for {username}: {max_requests} requests available")
info = rate_limit_info[username] info = rate_limit_info[username]
current_time = time.time() current_time = time.time()
# Reset if window expired # Reset if window expired or timestamp is invalid (e.g., 1970)
if current_time >= info['reset']: if current_time >= info['reset'] or info['reset'] < 1000000000: # 1000000000 is ~2001
info['remaining'] = max_requests info['remaining'] = max_requests
info['reset'] = current_time + window_seconds info['reset'] = current_time + window_seconds
logger.info(f"Reset rate limit for {username}: {max_requests} requests available") logger.info(f"Reset rate limit for {username}: {max_requests} requests available")

Loading…
Cancel
Save