From 5b4b4c0253f8faca10b40ee3575ad49f27e09b16 Mon Sep 17 00:00:00 2001
From: Shane <shanehill@mail.com>
Date: Sat, 26 Apr 2025 14:47:25 +1000
Subject: [PATCH] update reddit

---
 foodie_automator_google.py |  4 +++-
 foodie_automator_reddit.py | 36 ++++++++++++++++++++++++------------
 foodie_automator_rss.py    |  2 ++
 foodie_utils.py            |  6 ++++--
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/foodie_automator_google.py b/foodie_automator_google.py
index 56c614a..cf007f8 100644
--- a/foodie_automator_google.py
+++ b/foodie_automator_google.py
@@ -28,7 +28,9 @@ from foodie_utils import (
     prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg
 )
 from foodie_hooks import get_dynamic_hook, select_best_cta
+from dotenv import load_dotenv
 
+load_dotenv()
 # Flag to indicate if we're in the middle of posting
 is_posting = False
 
@@ -53,7 +55,7 @@ console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(
 logger.addHandler(console_handler)
 logging.info("Logging initialized for foodie_automator_google.py")
 
-client = OpenAI(api_key="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")
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
 USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py
index 3f28e1b..0b0f244 100644
--- a/foodie_automator_reddit.py
+++ b/foodie_automator_reddit.py
@@ -6,6 +6,7 @@ import os
 import json
 import signal
 import sys
+import re
 from datetime import datetime, timedelta, timezone
 from openai import OpenAI
 from urllib.parse import quote
@@ -83,7 +84,14 @@ posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in
 used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
 used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
 
-client = OpenAI(api_key="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")
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+def clean_reddit_title(title):
+    """Remove Reddit flairs like [pro/chef] or [homemade] from the title."""
+    # Match patterns like [pro/chef], [homemade], etc. at the start of the title
+    cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
+    logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
+    return cleaned_title
 
 def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
     try:
@@ -160,8 +168,11 @@ def fetch_reddit_posts():
                 if pub_date < cutoff_date:
                     logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
                     continue
+                # Clean the title before storing
+                cleaned_title = clean_reddit_title(submission.title)
                 articles.append({
-                    "title": submission.title,
+                    "title": cleaned_title,  # Use cleaned title
+                    "raw_title": submission.title,  # Store raw title for reference if needed
                     "link": f"https://www.reddit.com{submission.permalink}",
                     "summary": submission.selftext,
                     "feed_title": get_clean_source_name(subreddit_name),
@@ -196,15 +207,16 @@ def curate_from_reddit():
     max_attempts = 10
     while attempts < max_attempts and articles:
         article = articles.pop(0)  # Take highest-upvote post
-        title = article["title"]
+        title = article["title"]  # Use cleaned title
+        raw_title = article["raw_title"]  # Use raw title for deduplication
         link = article["link"]
         summary = article["summary"]
         source_name = "Reddit"
         original_source = '<a href="https://www.reddit.com/">Reddit</a>'
         
-        if title in posted_titles:
-            print(f"Skipping already posted post: {title}")
-            logging.info(f"Skipping already posted post: {title}")
+        if raw_title in posted_titles:  # Check against raw title
+            print(f"Skipping already posted post: {raw_title}")
+            logging.info(f"Skipping already posted post: {raw_title}")
             attempts += 1
             continue
         
@@ -220,7 +232,7 @@ def curate_from_reddit():
         
         top_comments = get_top_comments(link, reddit, limit=3)
         interest_score = is_interesting_reddit(
-            title,
+            title,  # Use cleaned title
             summary,
             article["upvotes"],
             article["comment_count"],
@@ -241,7 +253,7 @@ def curate_from_reddit():
             "Do NOT introduce unrelated concepts unless in the content or comments. "
             "If brief, expand on the core idea with relevant context about its appeal or significance."
         )
-        content_to_summarize = f"{title}\n\n{summary}"
+        content_to_summarize = f"{title}\n\n{summary}"  # Use cleaned title
         if top_comments:
             content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
         
@@ -259,7 +271,7 @@ def curate_from_reddit():
         
         final_summary = insert_link_naturally(final_summary, source_name, link)
         
-        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
+        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)  # Use cleaned title
         if not post_data:
             attempts += 1
             continue
@@ -314,9 +326,9 @@ def curate_from_reddit():
                 is_posting = False
             
             timestamp = datetime.now(timezone.utc).isoformat()
-            save_json_file(POSTED_TITLES_FILE, title, timestamp)
-            posted_titles.add(title)
-            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
+            save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)  # Save raw title
+            posted_titles.add(raw_title)  # Add raw title to set
+            logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
             
             if image_url:
                 save_json_file(USED_IMAGES_FILE, image_url, timestamp)
diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py
index 2de6b0f..91554cf 100644
--- a/foodie_automator_rss.py
+++ b/foodie_automator_rss.py
@@ -24,7 +24,9 @@ from foodie_hooks import get_dynamic_hook, select_best_cta
 import feedparser
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Dict, Any, Optional
+from dotenv import load_dotenv
 
+load_dotenv()
 # Flag to indicate if we're in the middle of posting
 is_posting = False
 
diff --git a/foodie_utils.py b/foodie_utils.py
index 8c8b0a2..6c2d053 100644
--- a/foodie_utils.py
+++ b/foodie_utils.py
@@ -10,6 +10,8 @@ import io
 import tempfile
 import requests
 import time
+from dotenv import load_dotenv
+import os
 from datetime import datetime, timezone, timedelta
 from openai import OpenAI
 from urllib.parse import quote
@@ -21,8 +23,8 @@ from foodie_config import (
     RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, 
     get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL
 )
-#test
-client = OpenAI(api_key="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")
+load_dotenv()
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 def load_json_file(filename, expiration_days=None):
     data = []