update reddit

my-fix-branch
Shane 7 months ago
parent 0c1284199b
commit 5b4b4c0253
  1. 4
      foodie_automator_google.py
  2. 36
      foodie_automator_reddit.py
  3. 2
      foodie_automator_rss.py
  4. 6
      foodie_utils.py

@ -28,7 +28,9 @@ from foodie_utils import (
prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg
) )
from foodie_hooks import get_dynamic_hook, select_best_cta from foodie_hooks import get_dynamic_hook, select_best_cta
from dotenv import load_dotenv
load_dotenv()
# Flag to indicate if we're in the middle of posting # Flag to indicate if we're in the middle of posting
is_posting = False is_posting = False
@ -53,7 +55,7 @@ console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(
logger.addHandler(console_handler) logger.addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_google.py") logging.info("Logging initialized for foodie_automator_google.py")
client = OpenAI(api_key="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") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json' POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'

@ -6,6 +6,7 @@ import os
import json import json
import signal import signal
import sys import sys
import re
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from openai import OpenAI from openai import OpenAI
from urllib.parse import quote from urllib.parse import quote
@ -83,7 +84,14 @@ posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry) used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
client = OpenAI(api_key="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") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def clean_reddit_title(title):
"""Remove Reddit flairs like [pro/chef] or [homemade] from the title."""
# Match patterns like [pro/chef], [homemade], etc. at the start of the title
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
return cleaned_title
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments): def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
try: try:
@ -160,8 +168,11 @@ def fetch_reddit_posts():
if pub_date < cutoff_date: if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})") logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue continue
# Clean the title before storing
cleaned_title = clean_reddit_title(submission.title)
articles.append({ articles.append({
"title": submission.title, "title": cleaned_title, # Use cleaned title
"raw_title": submission.title, # Store raw title for reference if needed
"link": f"https://www.reddit.com{submission.permalink}", "link": f"https://www.reddit.com{submission.permalink}",
"summary": submission.selftext, "summary": submission.selftext,
"feed_title": get_clean_source_name(subreddit_name), "feed_title": get_clean_source_name(subreddit_name),
@ -196,15 +207,16 @@ def curate_from_reddit():
max_attempts = 10 max_attempts = 10
while attempts < max_attempts and articles: while attempts < max_attempts and articles:
article = articles.pop(0) # Take highest-upvote post article = articles.pop(0) # Take highest-upvote post
title = article["title"] title = article["title"] # Use cleaned title
raw_title = article["raw_title"] # Use raw title for deduplication
link = article["link"] link = article["link"]
summary = article["summary"] summary = article["summary"]
source_name = "Reddit" source_name = "Reddit"
original_source = '<a href="https://www.reddit.com/">Reddit</a>' original_source = '<a href="https://www.reddit.com/">Reddit</a>'
if title in posted_titles: if raw_title in posted_titles: # Check against raw title
print(f"Skipping already posted post: {title}") print(f"Skipping already posted post: {raw_title}")
logging.info(f"Skipping already posted post: {title}") logging.info(f"Skipping already posted post: {raw_title}")
attempts += 1 attempts += 1
continue continue
@ -220,7 +232,7 @@ def curate_from_reddit():
top_comments = get_top_comments(link, reddit, limit=3) top_comments = get_top_comments(link, reddit, limit=3)
interest_score = is_interesting_reddit( interest_score = is_interesting_reddit(
title, title, # Use cleaned title
summary, summary,
article["upvotes"], article["upvotes"],
article["comment_count"], article["comment_count"],
@ -241,7 +253,7 @@ def curate_from_reddit():
"Do NOT introduce unrelated concepts unless in the content or comments. " "Do NOT introduce unrelated concepts unless in the content or comments. "
"If brief, expand on the core idea with relevant context about its appeal or significance." "If brief, expand on the core idea with relevant context about its appeal or significance."
) )
content_to_summarize = f"{title}\n\n{summary}" content_to_summarize = f"{title}\n\n{summary}" # Use cleaned title
if top_comments: if top_comments:
content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
@ -259,7 +271,7 @@ def curate_from_reddit():
final_summary = insert_link_naturally(final_summary, source_name, link) final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) # Use cleaned title
if not post_data: if not post_data:
attempts += 1 attempts += 1
continue continue
@ -314,9 +326,9 @@ def curate_from_reddit():
is_posting = False is_posting = False
timestamp = datetime.now(timezone.utc).isoformat() timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp) save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) # Save raw title
posted_titles.add(title) posted_titles.add(raw_title) # Add raw title to set
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}") logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
if image_url: if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp) save_json_file(USED_IMAGES_FILE, image_url, timestamp)

@ -24,7 +24,9 @@ from foodie_hooks import get_dynamic_hook, select_best_cta
import feedparser import feedparser
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from dotenv import load_dotenv
load_dotenv()
# Flag to indicate if we're in the middle of posting # Flag to indicate if we're in the middle of posting
is_posting = False is_posting = False

@ -10,6 +10,8 @@ import io
import tempfile import tempfile
import requests import requests
import time import time
from dotenv import load_dotenv
import os
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
from openai import OpenAI from openai import OpenAI
from urllib.parse import quote from urllib.parse import quote
@ -21,8 +23,8 @@ from foodie_config import (
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS,
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL
) )
#test load_dotenv()
client = OpenAI(api_key="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") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def load_json_file(filename, expiration_days=None): def load_json_file(filename, expiration_days=None):
data = [] data = []

Loading…
Cancel
Save