|
|
|
@ -25,9 +25,11 @@ from foodie_config import ( |
|
|
|
from foodie_utils import ( |
|
|
|
from foodie_utils import ( |
|
|
|
load_json_file, save_json_file, get_image, generate_image_query, |
|
|
|
load_json_file, save_json_file, get_image, generate_image_query, |
|
|
|
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, |
|
|
|
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, |
|
|
|
summarize_with_gpt4o, generate_category_from_summary, post_to_wp, |
|
|
|
is_interesting, generate_title_from_summary, summarize_with_gpt4o, |
|
|
|
prepare_post_data, select_best_author, smart_image_and_filter, |
|
|
|
generate_category_from_summary, post_to_wp, prepare_post_data, |
|
|
|
get_flickr_image |
|
|
|
select_best_author, smart_image_and_filter, get_flickr_image, |
|
|
|
|
|
|
|
get_next_author_round_robin, fetch_duckduckgo_news_context, |
|
|
|
|
|
|
|
check_author_rate_limit |
|
|
|
) |
|
|
|
) |
|
|
|
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt |
|
|
|
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt |
|
|
|
import fcntl |
|
|
|
import fcntl |
|
|
|
@ -268,55 +270,58 @@ def fetch_reddit_posts(): |
|
|
|
|
|
|
|
|
|
|
|
def curate_from_reddit(): |
|
|
|
def curate_from_reddit(): |
|
|
|
try: |
|
|
|
try: |
|
|
|
articles = fetch_reddit_posts() |
|
|
|
global posted_titles_data, posted_titles, used_images |
|
|
|
if not articles: |
|
|
|
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) |
|
|
|
|
|
|
|
posted_titles = set(entry["title"] for entry in posted_titles_data) |
|
|
|
|
|
|
|
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) |
|
|
|
|
|
|
|
logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
posts = fetch_reddit_posts() |
|
|
|
|
|
|
|
if not posts: |
|
|
|
|
|
|
|
print("No Reddit posts available") |
|
|
|
logging.info("No Reddit posts available") |
|
|
|
logging.info("No Reddit posts available") |
|
|
|
return None, None, False |
|
|
|
return None, None, False |
|
|
|
|
|
|
|
|
|
|
|
articles.sort(key=lambda x: x["upvotes"], reverse=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
reddit = praw.Reddit( |
|
|
|
|
|
|
|
client_id=REDDIT_CLIENT_ID, |
|
|
|
|
|
|
|
client_secret=REDDIT_CLIENT_SECRET, |
|
|
|
|
|
|
|
user_agent=REDDIT_USER_AGENT |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
attempts = 0 |
|
|
|
attempts = 0 |
|
|
|
max_attempts = 10 |
|
|
|
max_attempts = 10 |
|
|
|
while attempts < max_attempts and articles: |
|
|
|
while attempts < max_attempts and posts: |
|
|
|
article = articles.pop(0) |
|
|
|
post = posts.pop(0) |
|
|
|
title = article["title"] |
|
|
|
title = post["title"] |
|
|
|
raw_title = article["raw_title"] |
|
|
|
link = post.get("link", "") |
|
|
|
link = article["link"] |
|
|
|
summary = post.get("summary", "") |
|
|
|
summary = article["summary"] |
|
|
|
source_name = post.get("source", "Reddit") |
|
|
|
source_name = "Reddit" |
|
|
|
original_source = f'<a href="{link}">{source_name}</a>' |
|
|
|
original_source = '<a href="https://www.reddit.com/">Reddit</a>' |
|
|
|
|
|
|
|
|
|
|
|
if title in posted_titles: |
|
|
|
if raw_title in posted_titles: |
|
|
|
print(f"Skipping already posted Reddit post: {title}") |
|
|
|
logging.info(f"Skipping already posted post: {raw_title}") |
|
|
|
logging.info(f"Skipping already posted Reddit post: {title}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Trying Reddit Post: {title} from {source_name}") |
|
|
|
logging.info(f"Trying Reddit Post: {title} from {source_name}") |
|
|
|
logging.info(f"Trying Reddit Post: {title} from {source_name}") |
|
|
|
|
|
|
|
|
|
|
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) |
|
|
|
try: |
|
|
|
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]): |
|
|
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) |
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"Smart image/filter error for '{title}': {e}") |
|
|
|
|
|
|
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") |
|
|
|
|
|
|
|
attempts += 1 |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if skip: |
|
|
|
|
|
|
|
print(f"Skipping filtered Reddit post: {title}") |
|
|
|
logging.info(f"Skipping filtered Reddit post: {title}") |
|
|
|
logging.info(f"Skipping filtered Reddit post: {title}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
top_comments = get_top_comments(link, reddit, limit=3) |
|
|
|
|
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}" |
|
|
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" |
|
|
|
interest_score = is_interesting_reddit( |
|
|
|
interest_score = is_interesting(scoring_content) |
|
|
|
title, |
|
|
|
print(f"Interest Score for '{title[:50]}...': {interest_score}") |
|
|
|
summary, |
|
|
|
logging.info(f"Interest score for '{title}': {interest_score}") |
|
|
|
article["upvotes"], |
|
|
|
|
|
|
|
article["comment_count"], |
|
|
|
|
|
|
|
top_comments |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
logging.info(f"Interest Score: {interest_score} for '{title}'") |
|
|
|
|
|
|
|
if interest_score < 6: |
|
|
|
if interest_score < 6: |
|
|
|
|
|
|
|
print(f"Reddit Interest Too Low: {interest_score}") |
|
|
|
logging.info(f"Reddit Interest Too Low: {interest_score}") |
|
|
|
logging.info(f"Reddit Interest Too Low: {interest_score}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -325,13 +330,12 @@ def curate_from_reddit(): |
|
|
|
extra_prompt = ( |
|
|
|
extra_prompt = ( |
|
|
|
f"Generate exactly {num_paragraphs} paragraphs.\n" |
|
|
|
f"Generate exactly {num_paragraphs} paragraphs.\n" |
|
|
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" |
|
|
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" |
|
|
|
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n" |
|
|
|
|
|
|
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" |
|
|
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" |
|
|
|
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n" |
|
|
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" |
|
|
|
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n" |
|
|
|
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" |
|
|
|
f"Do not include emojis in the summary." |
|
|
|
f"Do not include emojis in the summary." |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
content_to_summarize = scoring_content |
|
|
|
final_summary = summarize_with_gpt4o( |
|
|
|
final_summary = summarize_with_gpt4o( |
|
|
|
content_to_summarize, |
|
|
|
content_to_summarize, |
|
|
|
source_name, |
|
|
|
source_name, |
|
|
|
@ -340,6 +344,7 @@ def curate_from_reddit(): |
|
|
|
extra_prompt=extra_prompt |
|
|
|
extra_prompt=extra_prompt |
|
|
|
) |
|
|
|
) |
|
|
|
if not final_summary: |
|
|
|
if not final_summary: |
|
|
|
|
|
|
|
print(f"Summary failed for '{title}'") |
|
|
|
logging.info(f"Summary failed for '{title}'") |
|
|
|
logging.info(f"Summary failed for '{title}'") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -361,15 +366,17 @@ def curate_from_reddit(): |
|
|
|
category = post_data["categories"][0] |
|
|
|
category = post_data["categories"][0] |
|
|
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) |
|
|
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) |
|
|
|
if not image_url: |
|
|
|
if not image_url: |
|
|
|
|
|
|
|
print(f"Flickr image fetch failed for '{image_query}', trying fallback") |
|
|
|
|
|
|
|
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") |
|
|
|
image_url, image_source, uploader, page_url = get_image(image_query) |
|
|
|
image_url, image_source, uploader, page_url = get_image(image_query) |
|
|
|
if not image_url: |
|
|
|
if not image_url: |
|
|
|
|
|
|
|
print(f"All image uploads failed for '{title}' - posting without image") |
|
|
|
logging.warning(f"All image uploads failed for '{title}' - posting without image") |
|
|
|
logging.warning(f"All image uploads failed for '{title}' - posting without image") |
|
|
|
image_source = None |
|
|
|
image_source = None |
|
|
|
uploader = None |
|
|
|
uploader = None |
|
|
|
page_url = None |
|
|
|
page_url = None |
|
|
|
|
|
|
|
|
|
|
|
hook = get_dynamic_hook(post_data["title"]).strip() |
|
|
|
hook = get_dynamic_hook(post_data["title"]).strip() |
|
|
|
|
|
|
|
|
|
|
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) |
|
|
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) |
|
|
|
share_links_template = ( |
|
|
|
share_links_template = ( |
|
|
|
f'<p>{share_prompt} ' |
|
|
|
f'<p>{share_prompt} ' |
|
|
|
@ -394,7 +401,13 @@ def curate_from_reddit(): |
|
|
|
interest_score=interest_score, |
|
|
|
interest_score=interest_score, |
|
|
|
should_post_tweet=True |
|
|
|
should_post_tweet=True |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
if not post_id: |
|
|
|
|
|
|
|
print(f"Failed to post to WordPress for '{title}'") |
|
|
|
|
|
|
|
logging.warning(f"Failed to post to WordPress for '{title}'") |
|
|
|
|
|
|
|
attempts += 1 |
|
|
|
|
|
|
|
continue |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"WordPress posting error for '{title}': {e}") |
|
|
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) |
|
|
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -424,29 +437,37 @@ def curate_from_reddit(): |
|
|
|
should_post_tweet=False |
|
|
|
should_post_tweet=False |
|
|
|
) |
|
|
|
) |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"Failed to update WordPress post '{title}' with share links: {e}") |
|
|
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) |
|
|
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) |
|
|
|
finally: |
|
|
|
finally: |
|
|
|
is_posting = False |
|
|
|
is_posting = False |
|
|
|
|
|
|
|
|
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) |
|
|
|
save_json_file(POSTED_TITLES_FILE, title, timestamp) |
|
|
|
posted_titles.add(raw_title) |
|
|
|
posted_titles.add(title) |
|
|
|
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE}") |
|
|
|
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") |
|
|
|
|
|
|
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") |
|
|
|
|
|
|
|
|
|
|
|
if image_url: |
|
|
|
if image_url: |
|
|
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp) |
|
|
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp) |
|
|
|
used_images.add(image_url) |
|
|
|
used_images.add(image_url) |
|
|
|
|
|
|
|
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") |
|
|
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") |
|
|
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") |
|
|
|
return post_data, category, True |
|
|
|
return post_data, category, True |
|
|
|
|
|
|
|
|
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
|
|
|
|
print(f"WP posting failed for '{post_data['title']}'") |
|
|
|
logging.info(f"WP posting failed for '{post_data['title']}'") |
|
|
|
logging.info(f"WP posting failed for '{post_data['title']}'") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("No interesting Reddit post found after attempts") |
|
|
|
logging.info("No interesting Reddit post found after attempts") |
|
|
|
logging.info("No interesting Reddit post found after attempts") |
|
|
|
return None, None, False |
|
|
|
return None, None, False |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True) |
|
|
|
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True) |
|
|
|
|
|
|
|
print(f"Unexpected error in curate_from_reddit: {e}") |
|
|
|
return None, None, False |
|
|
|
return None, None, False |
|
|
|
|
|
|
|
|
|
|
|
def run_reddit_automator(): |
|
|
|
def run_reddit_automator(): |
|
|
|
|