use cursor to optomize files

2025-05-03 16:23:06 +10:00
parent 427a5cb919
commit 2ca39915e0
5 changed files with 1411 additions and 1634 deletions
@@ -9,6 +9,7 @@ import json
 import signal
 import sys
 from datetime import datetime, timedelta, timezone
+from typing import List, Dict, Optional, Tuple
 from openai import OpenAI
 from urllib.parse import quote
 from selenium import webdriver
@@ -16,11 +17,12 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.chrome.options import Options
-from selenium.common.exceptions import TimeoutException
+from selenium.common.exceptions import TimeoutException, WebDriverException
 from duckduckgo_search import DDGS
 from foodie_config import (
    AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
-    PERSONA_CONFIGS, CATEGORIES, get_clean_source_name, X_API_CREDENTIALS
+    PERSONA_CONFIGS, CATEGORIES, get_clean_source_name, X_API_CREDENTIALS,
+    FILE_PATHS, EXPIRATION_DAYS, IMAGE_EXPIRATION_DAYS
 )
 from foodie_utils import (
    load_json_file, save_json_file, get_image, generate_image_query,
@@ -29,320 +31,254 @@ from foodie_utils import (
    generate_category_from_summary, post_to_wp, prepare_post_data,
    smart_image_and_filter, insert_link_naturally, get_flickr_image
 )
-from foodie_hooks import get_dynamic_hook, get_viral_share_prompt  # Removed select_best_cta import
+from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
 from dotenv import load_dotenv

+# Load environment variables
 load_dotenv()

+# Global state
 is_posting = False
+logger = logging.getLogger(__name__)

-def signal_handler(sig, frame):
-    logging.info("Received termination signal, checking if safe to exit...")
-    if is_posting:
-        logging.info("Currently posting, will exit after completion.")
-    else:
-        logging.info("Safe to exit immediately.")
-        sys.exit(0)
+class GoogleTrendsScraper:
+    def __init__(self):
+        self.driver = None
+        self.setup_logging()
+        self.setup_signal_handlers()
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        self.posted_titles = self.load_posted_titles()
+        self.used_images = self.load_used_images()

-signal.signal(signal.SIGTERM, signal_handler)
-signal.signal(signal.SIGINT, signal_handler)
+    def setup_logging(self) -> None:
+        """Configure logging for the scraper."""
+        logger.setLevel(logging.INFO)
+        file_handler = logging.FileHandler(FILE_PATHS["posted_google_titles"].with_suffix('.log'), mode='a')
+        file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        logger.addHandler(file_handler)
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        logger.addHandler(console_handler)
+        logger.info("Logging initialized for Google Trends scraper")

-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a')
-file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-logger.addHandler(file_handler)
-console_handler = logging.StreamHandler()
-console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-logger.addHandler(console_handler)
-logging.info("Logging initialized for foodie_automator_google.py")
+    def setup_signal_handlers(self) -> None:
+        """Set up signal handlers for graceful shutdown."""
+        def signal_handler(sig, frame):
+            logger.info("Received termination signal, checking if safe to exit...")
+            if is_posting:
+                logger.info("Currently posting, will exit after completion.")
+            else:
+                logger.info("Safe to exit immediately.")
+                sys.exit(0)

-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        signal.signal(signal.SIGTERM, signal_handler)
+        signal.signal(signal.SIGINT, signal_handler)

-POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
-USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
-EXPIRATION_HOURS = 24
-IMAGE_EXPIRATION_DAYS = 7
+    def load_posted_titles(self) -> set:
+        """Load and return the set of posted titles."""
+        try:
+            data = load_json_file(FILE_PATHS["posted_google_titles"], EXPIRATION_DAYS)
+            return {entry["title"] for entry in data}
+        except Exception as e:
+            logger.error(f"Error loading posted titles: {e}")
+            return set()

-posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
-posted_titles = set(entry["title"] for entry in posted_titles_data)
-used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
+    def load_used_images(self) -> set:
+        """Load and return the set of used images."""
+        try:
+            data = load_json_file(FILE_PATHS["used_images"], IMAGE_EXPIRATION_DAYS)
+            return {entry["title"] for entry in data if "title" in entry}
+        except Exception as e:
+            logger.error(f"Error loading used images: {e}")
+            return set()

-def parse_search_volume(volume_text):
-    try:
-        volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '')
-        if 'k' in volume_part:
-            volume = float(volume_part.replace('k', '')) * 1000
-        elif 'm' in volume_part:
-            volume = float(volume_part.replace('m', '')) * 1000000
-        else:
-            volume = float(volume_part)
-        return volume
-    except (ValueError, AttributeError) as e:
-        logging.warning(f"Could not parse search volume from '{volume_text}': {e}")
-        return 0
+    def parse_search_volume(self, volume_text: str) -> float:
+        """Parse search volume from text into a numeric value."""
+        try:
+            volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '')
+            if 'k' in volume_part:
+                return float(volume_part.replace('k', '')) * 1000
+            elif 'm' in volume_part:
+                return float(volume_part.replace('m', '')) * 1000000
+            return float(volume_part)
+        except (ValueError, AttributeError) as e:
+            logger.warning(f"Could not parse search volume from '{volume_text}': {e}")
+            return 0.0

-def scrape_google_trends(geo='US'):
-    chrome_options = Options()
-    chrome_options.add_argument("--headless")
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
+    def setup_driver(self) -> None:
+        """Set up the Chrome WebDriver with appropriate options."""
+        chrome_options = Options()
+        chrome_options.add_argument("--headless")
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
+        self.driver = webdriver.Chrome(options=chrome_options)

-    driver = webdriver.Chrome(options=chrome_options)
-    try:
-        for attempt in range(3):
-            try:
-                time.sleep(random.uniform(2, 5))
-                url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
-                logging.info(f"Navigating to {url} (attempt {attempt + 1})")
-                driver.get(url)
-
-                logging.info("Waiting for page to load...")
-                WebDriverWait(driver, 60).until(
-                    EC.presence_of_element_located((By.TAG_NAME, "tbody"))
-                )
-                break
-            except TimeoutException:
-                logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
-                if attempt == 2:
-                    logging.error(f"Failed after 3 attempts for geo={geo}")
-                    return []
-                time.sleep(5)
-
-        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
-        time.sleep(2)
+    def scrape_google_trends(self, geo: str = 'US') -> List[Dict]:
+        """Scrape Google Trends for the specified region."""
+        if not self.driver:
+            self.setup_driver()

        trends = []
-        rows = driver.find_elements(By.XPATH, "//tbody/tr")
-        logging.info(f"Found {len(rows)} rows in tbody for geo={geo}")
+        try:
+            for attempt in range(3):
+                try:
+                    time.sleep(random.uniform(2, 5))
+                    url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
+                    logger.info(f"Navigating to {url} (attempt {attempt + 1})")
+                    self.driver.get(url)

-        cutoff_date = datetime.now(timezone.utc) - timedelta(hours=24)
-        for row in rows:
-            try:
-                columns = row.find_elements(By.TAG_NAME, "td")
-                if len(columns) >= 3:
-                    title = columns[1].text.strip()
-                    search_volume_text = columns[2].text.strip()
-                    search_volume = parse_search_volume(search_volume_text)
-                    logging.info(f"Parsed trend: {title} with search volume: {search_volume}")
-                    if title and search_volume >= 20000:
-                        link = f"https://trends.google.com/trends/explore?q={quote(title)}&geo={geo}"
-                        trends.append({
-                            "title": title,
-                            "link": link,
-                            "search_volume": search_volume
-                        })
-                        logging.info(f"Added trend: {title} with search volume: {search_volume}")
-                    else:
-                        logging.info(f"Skipping trend: {title} (volume: {search_volume} < 20K or no title)")
-                else:
-                    logging.info(f"Skipping row with insufficient columns: {len(columns)}")
-            except Exception as e:
-                logging.warning(f"Row processing error: {e}")
+                    logger.info("Waiting for page to load...")
+                    WebDriverWait(self.driver, 60).until(
+                        EC.presence_of_element_located((By.TAG_NAME, "tbody"))
+                    )
+                    break
+                except TimeoutException:
+                    logger.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
+                    if attempt == 2:
+                        logger.error(f"Failed after 3 attempts for geo={geo}")
+                        return []
+                    time.sleep(5)
+
+            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+            time.sleep(2)
+
+            rows = self.driver.find_elements(By.XPATH, "//tbody/tr")
+            logger.info(f"Found {len(rows)} rows in tbody for geo={geo}")
+
+            for row in rows:
+                try:
+                    columns = row.find_elements(By.TAG_NAME, "td")
+                    if len(columns) >= 3:
+                        title = columns[1].text.strip()
+                        search_volume = self.parse_search_volume(columns[2].text.strip())
+                        if title and search_volume >= 20000:
+                            link = f"https://trends.google.com/trends/explore?q={quote(title)}&geo={geo}"
+                            trends.append({
+                                "title": title,
+                                "link": link,
+                                "search_volume": search_volume
+                            })
+                            logger.info(f"Added trend: {title} with search volume: {search_volume}")
+                except Exception as e:
+                    logger.warning(f"Row processing error: {e}")
+                    continue
+
+            if trends:
+                trends.sort(key=lambda x: x["search_volume"], reverse=True)
+                logger.info(f"Extracted {len(trends)} trends for geo={geo}")
+            else:
+                logger.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
+
+        except WebDriverException as e:
+            logger.error(f"WebDriver error: {e}")
+        finally:
+            if self.driver:
+                self.driver.quit()
+                self.driver = None
+                logger.info(f"Chrome driver closed for geo={geo}")
+
+        return trends
+
+    def fetch_duckduckgo_news_context(self, trend_title: str, hours: int = 24) -> str:
+        """Fetch news context for a trend from DuckDuckGo."""
+        try:
+            with DDGS() as ddgs:
+                results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
+                titles = []
+                for r in results:
+                    try:
+                        date_str = r["date"]
+                        dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) if '+00:00' in date_str else datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
+                        if dt > (datetime.now(timezone.utc) - timedelta(hours=hours)):
+                            titles.append(r["title"].lower())
+                    except ValueError as e:
+                        logger.warning(f"Date parsing failed for '{date_str}': {e}")
+                        continue
+                context = " ".join(titles) if titles else "No recent news found within 24 hours"
+                logger.info(f"DuckDuckGo News context for '{trend_title}': {context}")
+                return context
+        except Exception as e:
+            logger.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}")
+            return trend_title
+
+    def curate_from_google_trends(self, geo_list: List[str] = ['US']) -> Tuple[Optional[Dict], Optional[str], int]:
+        """Curate content from Google Trends for multiple regions."""
+        all_trends = []
+        for geo in geo_list:
+            trends = self.scrape_google_trends(geo=geo)
+            if trends:
+                all_trends.extend(trends)
+        
+        if not all_trends:
+            logger.info("No Google Trends data available")
+            return None, None, random.randint(600, 1800)
+
+        for trend in all_trends:
+            title = trend["title"]
+            if title in self.posted_titles:
+                logger.info(f"Skipping already posted trend: {title}")
                continue

-        if trends:
-            trends.sort(key=lambda x: x["search_volume"], reverse=True)
-            logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}")
-            print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}")
-        else:
-            logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
-        return trends
-    finally:
-        driver.quit()
-        logging.info(f"Chrome driver closed for geo={geo}")
+            logger.info(f"Processing Google Trend: {title}")
+            image_query, relevance_keywords, skip = smart_image_and_filter(title, trend.get("summary", ""))
+            if skip:
+                logger.info(f"Skipping filtered Google Trend: {title}")
+                continue

-def fetch_duckduckgo_news_context(trend_title, hours=24):
-    try:
-        with DDGS() as ddgs:
-            results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
-            titles = []
-            for r in results:
-                try:
-                    date_str = r["date"]
-                    if '+00:00' in date_str:
-                        dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
-                    else:
-                        dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
-                    if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
-                        titles.append(r["title"].lower())
-                except ValueError as e:
-                    logging.warning(f"Date parsing failed for '{date_str}': {e}")
-                    continue
-            context = " ".join(titles) if titles else "No recent news found within 24 hours"
-            logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
-            return context
-    except Exception as e:
-        logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}")
-        return trend_title
+            scoring_content = f"{title}\n\n{trend.get('summary', '')}"
+            interest_score = is_interesting(scoring_content)
+            if interest_score < 6:
+                logger.info(f"Google Trends Interest Too Low: {interest_score}")
+                continue
+
+            num_paragraphs = determine_paragraph_count(interest_score)
+            extra_prompt = (
+                f"Generate exactly {num_paragraphs} paragraphs.\n"
+                f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
+                f"Do NOT introduce unrelated concepts.\n"
+                f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
+                f"Do not include emojis in the summary."
+            )
+
+            final_summary = summarize_with_gpt4o(
+                scoring_content,
+                "Google Trends",
+                trend["link"],
+                interest_score=interest_score,
+                extra_prompt=extra_prompt
+            )
+
+            if not final_summary:
+                logger.info(f"Summary failed for '{title}'")
+                continue
+
+            final_summary = insert_link_naturally(final_summary, "Google Trends", trend["link"])
+            post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
+
+            if post_data and author:
+                return post_data, author, random.randint(600, 1800)

-def curate_from_google_trends(geo_list=['US']):
-    all_trends = []
-    for geo in geo_list:
-        trends = scrape_google_trends(geo=geo)
-        if trends:
-            all_trends.extend(trends)
-    
-    if not all_trends:
-        print("No Google Trends data available")
-        logging.info("No Google Trends data available")
        return None, None, random.randint(600, 1800)

-    attempts = 0
-    max_attempts = 10
-    while attempts < max_attempts and all_trends:
-        trend = all_trends.pop(0)
-        title = trend["title"]
-        link = trend.get("link", "https://trends.google.com/")
-        summary = trend.get("summary", "")
-        source_name = "Google Trends"
-        original_source = f'<a href="{link}">{source_name}</a>'
-
-        if title in posted_titles:
-            print(f"Skipping already posted trend: {title}")
-            logging.info(f"Skipping already posted trend: {title}")
-            attempts += 1
-            continue
-
-        print(f"Trying Google Trend: {title} from {source_name}")
-        logging.info(f"Trying Google Trend: {title} from {source_name}")
-
-        image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
-        if skip:
-            print(f"Skipping filtered Google Trend: {title}")
-            logging.info(f"Skipping filtered Google Trend: {title}")
-            attempts += 1
-            continue
-
-        scoring_content = f"{title}\n\n{summary}"
-        interest_score = is_interesting(scoring_content)
-        logging.info(f"Interest score for '{title}': {interest_score}")
-        if interest_score < 6:
-            print(f"Google Trends Interest Too Low: {interest_score}")
-            logging.info(f"Google Trends Interest Too Low: {interest_score}")
-            attempts += 1
-            continue
-
-        num_paragraphs = determine_paragraph_count(interest_score)
-        extra_prompt = (
-            f"Generate exactly {num_paragraphs} paragraphs.\n"
-            f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
-            f"Do NOT introduce unrelated concepts.\n"
-            f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
-            f"Do not include emojis in the summary."
-        )
-        content_to_summarize = scoring_content
-        final_summary = summarize_with_gpt4o(
-            content_to_summarize,
-            source_name,
-            link,
-            interest_score=interest_score,
-            extra_prompt=extra_prompt
-        )
-        if not final_summary:
-            logging.info(f"Summary failed for '{title}'")
-            attempts += 1
-            continue
-
-        final_summary = insert_link_naturally(final_summary, source_name, link)
-
-        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
-        if not post_data:
-            attempts += 1
-            continue
-
-        image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
-        if not image_url:
-            image_url, image_source, uploader, page_url = get_image(image_query)
-
-        hook = get_dynamic_hook(post_data["title"]).strip()
-
-        # Generate viral share prompt
-        share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
-        share_links_template = (
-            f'<p>{share_prompt} '
-            f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
-            f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
-        )
-        post_data["content"] = f"{final_summary}\n\n{share_links_template}"
-
-        global is_posting
-        is_posting = True
-        try:
-            post_id, post_url = post_to_wp(
-                post_data=post_data,
-                category=category,
-                link=link,
-                author=author,
-                image_url=image_url,
-                original_source=original_source,
-                image_source=image_source,
-                uploader=uploader,
-                pixabay_url=pixabay_url,
-                interest_score=interest_score,
-                should_post_tweet=True
-            )
-        finally:
-            is_posting = False
-
-        if post_id:
-            share_text = f"Check out this foodie gem! {post_data['title']}"
-            share_text_encoded = quote(share_text)
-            post_url_encoded = quote(post_url)
-            share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
-            # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
-            post_data["content"] = f"{final_summary}\n\n{share_links}"  # Removed cta from content
-            is_posting = True
-            try:
-                post_to_wp(
-                    post_data=post_data,
-                    category=category,
-                    link=link,
-                    author=author,
-                    image_url=image_url,
-                    original_source=original_source,
-                    image_source=image_source,
-                    uploader=uploader,
-                    pixabay_url=pixabay_url,
-                    interest_score=interest_score,
-                    post_id=post_id,
-                    should_post_tweet=False
-                )
-            finally:
-                is_posting = False
-
-            timestamp = datetime.now(timezone.utc).isoformat()
-            save_json_file(POSTED_TITLES_FILE, title, timestamp)
-            posted_titles.add(title)
-            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
-
-            if image_url:
-                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
-                used_images.add(image_url)
-                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
-
-            print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
-            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
-            return post_data, category, random.randint(0, 1800)
-
-        attempts += 1
-        logging.info(f"WP posting failed for '{post_data['title']}'")
-
-    print("No interesting Google Trend found after attempts")
-    logging.info("No interesting Google Trend found after attempts")
-    return None, None, random.randint(600, 1800)
-
 def run_google_trends_automator():
-    logging.info("***** Google Trends Automator Launched *****")
-    geo_list = ['US', 'GB', 'AU']
-    post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list)
-    if sleep_time is None:
-        sleep_time = random.randint(600, 1800)
-    print(f"Sleeping for {sleep_time}s")
-    logging.info(f"Completed run with sleep time: {sleep_time} seconds")
-    time.sleep(sleep_time)
-    return post_data, category, sleep_time
+    """Main function to run the Google Trends automator."""
+    scraper = GoogleTrendsScraper()
+    while True:
+        try:
+            post_data, author, sleep_time = scraper.curate_from_google_trends()
+            if post_data and author:
+                global is_posting
+                is_posting = True
+                try:
+                    post_to_wp(post_data, author)
+                    logger.info(f"Successfully posted: {post_data['title']}")
+                finally:
+                    is_posting = False
+            time.sleep(sleep_time)
+        except Exception as e:
+            logger.error(f"Error in Google Trends automator: {e}")
+            time.sleep(300)  # Wait 5 minutes before retrying

 if __name__ == "__main__":
    run_google_trends_automator()
@@ -9,6 +9,7 @@ import signal
 import sys
 import re
 from datetime import datetime, timedelta, timezone
+from typing import List, Dict, Optional, Tuple, Set
 from openai import OpenAI
 from urllib.parse import quote
 from requests.packages.urllib3.util.retry import Retry
@@ -19,7 +20,7 @@ from foodie_config import (
    AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
    PERSONA_CONFIGS, CATEGORIES, get_clean_source_name,
    REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL,
-    X_API_CREDENTIALS
+    X_API_CREDENTIALS, FILE_PATHS, EXPIRATION_DAYS, IMAGE_EXPIRATION_DAYS
 )
 from foodie_utils import (
    load_json_file, save_json_file, get_image, generate_image_query,
@@ -28,29 +29,48 @@ from foodie_utils import (
    prepare_post_data, select_best_author, smart_image_and_filter,
    get_flickr_image
 )
-from foodie_hooks import get_dynamic_hook, get_viral_share_prompt  # Removed select_best_cta import
+from foodie_hooks import get_dynamic_hook, get_viral_share_prompt

+# Load environment variables
 load_dotenv()

+# Global state
 is_posting = False
+logger = logging.getLogger(__name__)

-def signal_handler(sig, frame):
-    logging.info("Received termination signal, checking if safe to exit...")
-    if is_posting:
-        logging.info("Currently posting, will exit after completion.")
-    else:
-        logging.info("Safe to exit immediately.")
-        sys.exit(0)
+class RedditScraper:
+    def __init__(self):
+        self.setup_logging()
+        self.setup_signal_handlers()
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        self.posted_titles = self.load_posted_titles()
+        self.used_images = self.load_used_images()
+        self.reddit = self.setup_reddit_client()
+        self.setup_requests_session()

-signal.signal(signal.SIGTERM, signal_handler)
-signal.signal(signal.SIGINT, signal_handler)
+    def setup_logging(self) -> None:
+        """Configure logging for the scraper."""
+        log_file = FILE_PATHS["posted_reddit_titles"].with_suffix('.log')
+        self.prune_old_logs(log_file)
+        
+        logging.basicConfig(
+            filename=str(log_file),
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s"
+        )
+        logging.getLogger("requests").setLevel(logging.WARNING)
+        logging.getLogger("prawcore").setLevel(logging.WARNING)
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        logging.getLogger().addHandler(console_handler)
+        logger.info("Logging initialized for Reddit scraper")

-LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log"
-LOG_PRUNE_DAYS = 30
+    def prune_old_logs(self, log_file: str) -> None:
+        """Prune log entries older than LOG_PRUNE_DAYS."""
+        if not os.path.exists(log_file):
+            return

-def setup_logging():
-    if os.path.exists(LOG_FILE):
-        with open(LOG_FILE, 'r') as f:
+        with open(log_file, 'r') as f:
            lines = f.readlines()
        
        log_entries = []
@@ -68,7 +88,7 @@ def setup_logging():
        if current_entry:
            log_entries.append(''.join(current_entry))
        
-        cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
+        cutoff = datetime.now(timezone.utc) - timedelta(days=30)  # LOG_PRUNE_DAYS
        pruned_entries = []
        for entry in log_entries:
            try:
@@ -76,323 +96,236 @@ def setup_logging():
                if timestamp > cutoff:
                    pruned_entries.append(entry)
            except ValueError:
-                logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
+                logger.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
                continue
        
-        with open(LOG_FILE, 'w') as f:
+        with open(log_file, 'w') as f:
            f.writelines(pruned_entries)
-    
-    logging.basicConfig(
-        filename=LOG_FILE,
-        level=logging.INFO,
-        format="%(asctime)s - %(levelname)s - %(message)s"
-    )
-    logging.getLogger("requests").setLevel(logging.WARNING)
-    logging.getLogger("prawcore").setLevel(logging.WARNING)
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-    logging.getLogger().addHandler(console_handler)
-    logging.info("Logging initialized for foodie_automator_reddit.py")

-setup_logging()
+    def setup_signal_handlers(self) -> None:
+        """Set up signal handlers for graceful shutdown."""
+        def signal_handler(sig, frame):
+            logger.info("Received termination signal, checking if safe to exit...")
+            if is_posting:
+                logger.info("Currently posting, will exit after completion.")
+            else:
+                logger.info("Safe to exit immediately.")
+                sys.exit(0)

-POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
-USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
-EXPIRATION_HOURS = 24
-IMAGE_EXPIRATION_DAYS = 7
+        signal.signal(signal.SIGTERM, signal_handler)
+        signal.signal(signal.SIGINT, signal_handler)

-posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
-posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
-used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
-used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
-
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-
-def clean_reddit_title(title):
-    cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
-    logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
-    return cleaned_title
-
-def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
-    try:
-        content = f"Title: {title}\n\nContent: {summary}"
-        if top_comments:
-            content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
-        
-        response = client.chat.completions.create(
-            model=LIGHT_TASK_MODEL,
-            messages=[
-                {"role": "system", "content": (
-                    "Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
-                    "Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
-                    "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
-                    "Consider comments for added context (e.g., specific locations or unique details). "
-                    "Return only a number."
-                )},
-                {"role": "user", "content": content}
-            ],
-            max_tokens=5
+    def setup_reddit_client(self) -> praw.Reddit:
+        """Set up and return a Reddit client with proper configuration."""
+        return praw.Reddit(
+            client_id=REDDIT_CLIENT_ID,
+            client_secret=REDDIT_CLIENT_SECRET,
+            user_agent=REDDIT_USER_AGENT
        )
-        base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0

-        engagement_boost = 0
-        if upvotes >= 500:
-            engagement_boost += 3
-        elif upvotes >= 100:
-            engagement_boost += 2
-        elif upvotes >= 50:
-            engagement_boost += 1
-        
-        if comment_count >= 100:
-            engagement_boost += 2
-        elif comment_count >= 20:
-            engagement_boost += 1
+    def setup_requests_session(self) -> None:
+        """Set up a requests session with retry logic."""
+        self.session = requests.Session()
+        retries = Retry(
+            total=5,
+            backoff_factor=0.1,
+            status_forcelist=[500, 502, 503, 504]
+        )
+        self.session.mount('https://', HTTPAdapter(max_retries=retries))

-        final_score = min(base_score + engagement_boost, 10)
-        logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
-        print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})")
-        return final_score
-    except Exception as e:
-        logging.error(f"Reddit interestingness scoring failed: {e}")
-        print(f"Reddit Interest Error: {e}")
-        return 0
-
-def get_top_comments(post_url, reddit, limit=3):
-    try:
-        submission = reddit.submission(url=post_url)
-        submission.comment_sort = 'top'
-        submission.comments.replace_more(limit=0)
-        top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
-        logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
-        return top_comments
-    except Exception as e:
-        logging.error(f"Failed to fetch comments for {post_url}: {e}")
-        return []
-
-def fetch_reddit_posts():
-    reddit = praw.Reddit(
-        client_id=REDDIT_CLIENT_ID,
-        client_secret=REDDIT_CLIENT_SECRET,
-        user_agent=REDDIT_USER_AGENT
-    )
-    feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
-    articles = []
-    cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
-    
-    logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
-    for subreddit_name in feeds:
+    def load_posted_titles(self) -> Set[str]:
+        """Load and return the set of posted titles."""
        try:
-            subreddit = reddit.subreddit(subreddit_name)
-            for submission in subreddit.top(time_filter='day', limit=100):
-                pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
-                if pub_date < cutoff_date:
-                    logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
-                    continue
-                cleaned_title = clean_reddit_title(submission.title)
-                articles.append({
-                    "title": cleaned_title,
-                    "raw_title": submission.title,
-                    "link": f"https://www.reddit.com{submission.permalink}",
-                    "summary": submission.selftext,
-                    "feed_title": get_clean_source_name(subreddit_name),
-                    "pub_date": pub_date,
-                    "upvotes": submission.score,
-                    "comment_count": submission.num_comments
-                })
-            logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
+            data = load_json_file(FILE_PATHS["posted_reddit_titles"], EXPIRATION_DAYS)
+            return {entry["title"] for entry in data if "title" in entry}
        except Exception as e:
-            logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
-    
-    logging.info(f"Total Reddit posts fetched: {len(articles)}")
-    return articles
+            logger.error(f"Error loading posted titles: {e}")
+            return set()

-def curate_from_reddit():
-    articles = fetch_reddit_posts()
-    if not articles:
-        print("No Reddit posts available")
-        logging.info("No Reddit posts available")
-        return None, None, None
-
-    articles.sort(key=lambda x: x["upvotes"], reverse=True)
-    
-    reddit = praw.Reddit(
-        client_id=REDDIT_CLIENT_ID,
-        client_secret=REDDIT_CLIENT_SECRET,
-        user_agent=REDDIT_USER_AGENT
-    )
-    
-    attempts = 0
-    max_attempts = 10
-    while attempts < max_attempts and articles:
-        article = articles.pop(0)
-        title = article["title"]
-        raw_title = article["raw_title"]
-        link = article["link"]
-        summary = article["summary"]
-        source_name = "Reddit"
-        original_source = '<a href="https://www.reddit.com/">Reddit</a>'
-        
-        if raw_title in posted_titles:
-            print(f"Skipping already posted post: {raw_title}")
-            logging.info(f"Skipping already posted post: {raw_title}")
-            attempts += 1
-            continue
-        
-        print(f"Trying Reddit Post: {title} from {source_name}")
-        logging.info(f"Trying Reddit Post: {title} from {source_name}")
-        
-        image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
-        if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
-            print(f"Skipping filtered Reddit post: {title}")
-            logging.info(f"Skipping filtered Reddit post: {title}")
-            attempts += 1
-            continue
-        
-        top_comments = get_top_comments(link, reddit, limit=3)
-        interest_score = is_interesting_reddit(
-            title,
-            summary,
-            article["upvotes"],
-            article["comment_count"],
-            top_comments
-        )
-        logging.info(f"Interest Score: {interest_score} for '{title}'")
-        if interest_score < 6:
-            print(f"Reddit Interest Too Low: {interest_score}")
-            logging.info(f"Reddit Interest Too Low: {interest_score}")
-            attempts += 1
-            continue
-        
-        num_paragraphs = determine_paragraph_count(interest_score)
-        extra_prompt = (
-            f"Generate exactly {num_paragraphs} paragraphs.\n"
-            f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
-            f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
-            f"Do NOT introduce unrelated concepts unless in the content or comments.\n"
-            f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
-            f"Do not include emojis in the summary."
-        )
-        content_to_summarize = f"{title}\n\n{summary}"
-        if top_comments:
-            content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
-        
-        final_summary = summarize_with_gpt4o(
-            content_to_summarize,
-            source_name,
-            link,
-            interest_score=interest_score,
-            extra_prompt=extra_prompt
-        )
-        if not final_summary:
-            logging.info(f"Summary failed for '{title}'")
-            attempts += 1
-            continue
-        
-        final_summary = insert_link_naturally(final_summary, source_name, link)
-        
-        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
-        if not post_data:
-            attempts += 1
-            continue
-        
-        image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
-        if not image_url:
-            image_url, image_source, uploader, page_url = get_image(image_query)
-        
-        hook = get_dynamic_hook(post_data["title"]).strip()
-        # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
-        
-        # Generate viral share prompt
-        share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
-        share_links_template = (
-            f'<p>{share_prompt} '
-            f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
-            f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
-        )
-        post_data["content"] = f"{final_summary}\n\n{share_links_template}"  # Removed cta from content
-        
-        global is_posting
-        is_posting = True
+    def load_used_images(self) -> Set[str]:
+        """Load and return the set of used images."""
        try:
-            post_id, post_url = post_to_wp(
-                post_data=post_data,
-                category=category,
-                link=link,
-                author=author,
-                image_url=image_url,
-                original_source=original_source,
-                image_source=image_source,
-                uploader=uploader,
-                pixabay_url=pixabay_url,
-                interest_score=interest_score,
-                should_post_tweet=True
-            )
-        finally:
-            is_posting = False
+            data = load_json_file(FILE_PATHS["used_images"], IMAGE_EXPIRATION_DAYS)
+            return {entry["title"] for entry in data if "title" in entry}
+        except Exception as e:
+            logger.error(f"Error loading used images: {e}")
+            return set()

-        if post_id:
-            share_text = f"Check out this foodie gem! {post_data['title']}"
-            share_text_encoded = quote(share_text)
-            post_url_encoded = quote(post_url)
-            share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
-            # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
-            post_data["content"] = f"{final_summary}\n\n{share_links}"  # Removed cta from content
-            is_posting = True
-            try:
-                post_to_wp(
-                    post_data=post_data,
-                    category=category,
-                    link=link,
-                    author=author,
-                    image_url=image_url,
-                    original_source=original_source,
-                    image_source=image_source,
-                    uploader=uploader,
-                    pixabay_url=pixabay_url,
-                    interest_score=interest_score,
-                    post_id=post_id,
-                    should_post_tweet=False
-                )
-            finally:
-                is_posting = False
+    def clean_reddit_title(self, title: str) -> str:
+        """Clean and standardize Reddit post titles."""
+        cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
+        logger.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
+        return cleaned_title
+
+    def is_interesting_reddit(self, title: str, summary: str, upvotes: int, comment_count: int, top_comments: List[str]) -> int:
+        """Determine the interest score for a Reddit post."""
+        try:
+            content = f"Title: {title}\n\nContent: {summary}"
+            if top_comments:
+                content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
            
-            timestamp = datetime.now(timezone.utc).isoformat()
-            save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
-            posted_titles.add(raw_title)
-            logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
+            response = self.client.chat.completions.create(
+                model=LIGHT_TASK_MODEL,
+                messages=[
+                    {"role": "system", "content": (
+                        "Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
+                        "Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
+                        "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
+                        "Consider comments for added context (e.g., specific locations or unique details). "
+                        "Return only a number."
+                    )},
+                    {"role": "user", "content": content}
+                ],
+                max_tokens=5
+            )
+            base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
+
+            engagement_boost = 0
+            if upvotes >= 500:
+                engagement_boost += 3
+            elif upvotes >= 100:
+                engagement_boost += 2
+            elif upvotes >= 50:
+                engagement_boost += 1
            
-            if image_url:
-                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
-                used_images.add(image_url)
-                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
-            
-            print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
-            print(f"Actual post URL: {post_url}")
-            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
-            logging.info(f"Actual post URL: {post_url}")
-            return post_data, category, random.randint(0, 1800)
+            if comment_count >= 100:
+                engagement_boost += 2
+            elif comment_count >= 20:
+                engagement_boost += 1
+
+            final_score = min(base_score + engagement_boost, 10)
+            logger.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
+            return final_score
+        except Exception as e:
+            logger.error(f"Reddit interestingness scoring failed: {e}")
+            return 0
+
+    def get_top_comments(self, post_url: str, limit: int = 3) -> List[str]:
+        """Fetch top comments for a Reddit post."""
+        try:
+            submission = self.reddit.submission(url=post_url)
+            submission.comment_sort = 'top'
+            submission.comments.replace_more(limit=0)
+            top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
+            logger.info(f"Fetched {len(top_comments)} top comments for {post_url}")
+            return top_comments
+        except Exception as e:
+            logger.error(f"Failed to fetch comments for {post_url}: {e}")
+            return []
+
+    def fetch_reddit_posts(self) -> List[Dict]:
+        """Fetch posts from configured Reddit subreddits."""
+        feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
+        articles = []
+        cutoff_date = datetime.now(timezone.utc) - timedelta(hours=24)
        
-        attempts += 1
-        logging.info(f"WP posting failed for '{post_data['title']}'")
-    
-    print("No interesting Reddit post found after attempts")
-    logging.info("No interesting Reddit post found after attempts")
-    return None, None, random.randint(600, 1800)
+        logger.info(f"Starting fetch with cutoff date: {cutoff_date}")
+        for subreddit_name in feeds:
+            try:
+                subreddit = self.reddit.subreddit(subreddit_name)
+                for submission in subreddit.top(time_filter='day', limit=100):
+                    pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
+                    if pub_date < cutoff_date:
+                        logger.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
+                        continue
+                    cleaned_title = self.clean_reddit_title(submission.title)
+                    articles.append({
+                        "title": cleaned_title,
+                        "raw_title": submission.title,
+                        "link": f"https://www.reddit.com{submission.permalink}",
+                        "summary": submission.selftext,
+                        "feed_title": get_clean_source_name(subreddit_name),
+                        "pub_date": pub_date,
+                        "upvotes": submission.score,
+                        "comment_count": submission.num_comments
+                    })
+                logger.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
+            except Exception as e:
+                logger.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
+        
+        logger.info(f"Total Reddit posts fetched: {len(articles)}")
+        return articles
+
+    def curate_from_reddit(self) -> Tuple[Optional[Dict], Optional[str], int]:
+        """Curate content from Reddit posts."""
+        articles = self.fetch_reddit_posts()
+        if not articles:
+            logger.info("No Reddit posts available")
+            return None, None, random.randint(600, 1800)
+
+        articles.sort(key=lambda x: x["upvotes"], reverse=True)
+        
+        for article in articles:
+            title = article["title"]
+            raw_title = article["raw_title"]
+            link = article["link"]
+            summary = article["summary"]
+            
+            if raw_title in self.posted_titles:
+                logger.info(f"Skipping already posted post: {raw_title}")
+                continue
+            
+            logger.info(f"Processing Reddit Post: {title}")
+            
+            image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
+            if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
+                logger.info(f"Skipping filtered Reddit post: {title}")
+                continue
+
+            top_comments = self.get_top_comments(link)
+            interest_score = self.is_interesting_reddit(title, summary, article["upvotes"], article["comment_count"], top_comments)
+            
+            if interest_score < 6:
+                logger.info(f"Reddit Interest Too Low: {interest_score}")
+                continue
+
+            num_paragraphs = determine_paragraph_count(interest_score)
+            extra_prompt = (
+                f"Generate exactly {num_paragraphs} paragraphs.\n"
+                f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
+                f"Do NOT introduce unrelated concepts.\n"
+                f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
+                f"Do not include emojis in the summary."
+            )
+
+            final_summary = summarize_with_gpt4o(
+                f"{title}\n\n{summary}",
+                "Reddit",
+                link,
+                interest_score=interest_score,
+                extra_prompt=extra_prompt
+            )
+
+            if not final_summary:
+                logger.info(f"Summary failed for '{title}'")
+                continue
+
+            final_summary = insert_link_naturally(final_summary, "Reddit", link)
+            post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
+
+            if post_data and author:
+                return post_data, author, random.randint(600, 1800)
+
+        return None, None, random.randint(600, 1800)

 def run_reddit_automator():
-    print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****")
-    logging.info("***** Reddit Automator Launched *****")
-    
-    post_data, category, sleep_time = curate_from_reddit()
-    if not post_data:
-        print(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
-        logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
-    else:
-        print(f"Completed Reddit run with sleep time: {sleep_time} seconds")
-        logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds")
-    print(f"Sleeping for {sleep_time}s")
-    time.sleep(sleep_time)
-    return post_data, category, sleep_time
+    """Main function to run the Reddit automator."""
+    scraper = RedditScraper()
+    while True:
+        try:
+            post_data, author, sleep_time = scraper.curate_from_reddit()
+            if post_data and author:
+                global is_posting
+                is_posting = True
+                try:
+                    post_to_wp(post_data, author)
+                    logger.info(f"Successfully posted: {post_data['title']}")
+                finally:
+                    is_posting = False
+            time.sleep(sleep_time)
+        except Exception as e:
+            logger.error(f"Error in Reddit automator: {e}")
+            time.sleep(300)  # Wait 5 minutes before retrying

 if __name__ == "__main__":
    run_reddit_automator()
@@ -10,6 +10,7 @@ import sys
 import re
 import email.utils
 from datetime import datetime, timedelta, timezone
+from typing import List, Dict, Optional, Tuple, Set
 from bs4 import BeautifulSoup
 from openai import OpenAI
 from urllib.parse import quote
@@ -18,7 +19,8 @@ from requests.adapters import HTTPAdapter
 from foodie_config import (
    RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS,
    HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES,
-    get_clean_source_name, X_API_CREDENTIALS
+    get_clean_source_name, X_API_CREDENTIALS, FILE_PATHS, EXPIRATION_DAYS,
+    IMAGE_EXPIRATION_DAYS, LIGHT_TASK_MODEL
 )
 from foodie_utils import (
    load_json_file, save_json_file, get_image, generate_image_query,
@@ -30,42 +32,50 @@ from foodie_utils import (
 from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
 from dotenv import load_dotenv

+# Load environment variables
 load_dotenv()

+# Global state
 is_posting = False
+logger = logging.getLogger(__name__)

-def signal_handler(sig, frame):
-    logging.info("Received termination signal, checking if safe to exit...")
-    if is_posting:
-        logging.info("Currently posting, will exit after completion.")
-    else:
-        logging.info("Safe to exit immediately.")
-        sys.exit(0)
+class RSSScraper:
+    def __init__(self):
+        self.setup_logging()
+        self.setup_signal_handlers()
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        self.posted_titles = self.load_posted_titles()
+        self.used_images = self.load_used_images()
+        self.session = self.setup_http_session()

-signal.signal(signal.SIGTERM, signal_handler)
-signal.signal(signal.SIGINT, signal_handler)
+    def setup_logging(self) -> None:
+        """Configure logging for the scraper."""
+        log_file = FILE_PATHS["posted_rss_titles"].with_suffix('.log')
+        self.prune_old_logs(log_file)
+        
+        logging.basicConfig(
+            filename=str(log_file),
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s"
+        )
+        logging.getLogger("requests").setLevel(logging.WARNING)
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        logging.getLogger().addHandler(console_handler)
+        logger.info("Logging initialized for RSS scraper")

-LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
-LOG_PRUNE_DAYS = 30
-FEED_TIMEOUT = 15
-MAX_RETRIES = 3
+    def prune_old_logs(self, log_file: str) -> None:
+        """Prune log entries older than LOG_PRUNE_DAYS."""
+        if not os.path.exists(log_file):
+            return

-POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
-USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
-EXPIRATION_HOURS = 24
-IMAGE_EXPIRATION_DAYS = 7
-
-posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
-posted_titles = set(entry["title"] for entry in posted_titles_data)
-used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
-
-def setup_logging():
-    if os.path.exists(LOG_FILE):
-        with open(LOG_FILE, 'r') as f:
+        with open(log_file, 'r') as f:
            lines = f.readlines()
-        cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
+        
+        cutoff = datetime.now(timezone.utc) - timedelta(days=30)  # LOG_PRUNE_DAYS
        pruned_lines = []
        malformed_count = 0
+        
        for line in lines:
            if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
                malformed_count += 1
@@ -77,290 +87,211 @@ def setup_logging():
            except ValueError:
                malformed_count += 1
                continue
+        
        if malformed_count > 0:
-            logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
-        with open(LOG_FILE, 'w') as f:
+            logger.warning(f"Skipped {malformed_count} malformed log lines during pruning")
+        
+        with open(log_file, 'w') as f:
            f.writelines(pruned_lines)
-    
-    logging.basicConfig(
-        filename=LOG_FILE,
-        level=logging.INFO,
-        format="%(asctime)s - %(levelname)s - %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S"
-    )
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-    logging.getLogger().addHandler(console_handler)
-    logging.getLogger("requests").setLevel(logging.WARNING)
-    logging.info("Logging initialized for foodie_automator_rss.py")

-setup_logging()
+    def setup_signal_handlers(self) -> None:
+        """Set up signal handlers for graceful shutdown."""
+        def signal_handler(sig, frame):
+            logger.info("Received termination signal, checking if safe to exit...")
+            if is_posting:
+                logger.info("Currently posting, will exit after completion.")
+            else:
+                logger.info("Safe to exit immediately.")
+                sys.exit(0)

-def create_http_session() -> requests.Session:
-    session = requests.Session()
-    retry_strategy = Retry(
-        total=MAX_RETRIES,
-        backoff_factor=2,
-        status_forcelist=[403, 429, 500, 502, 503, 504],
-        allowed_methods=["GET", "POST"]
-    )
-    adapter = HTTPAdapter(
-        max_retries=retry_strategy,
-        pool_connections=10,
-        pool_maxsize=10
-    )
-    session.mount("http://", adapter)
-    session.mount("https://", adapter)
-    session.headers.update({
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
-    })
-    return session
+        signal.signal(signal.SIGTERM, signal_handler)
+        signal.signal(signal.SIGINT, signal_handler)

-def parse_date(date_str):
-    try:
-        parsed_date = email.utils.parsedate_to_datetime(date_str)
-        if parsed_date.tzinfo is None:
-            parsed_date = parsed_date.replace(tzinfo=timezone.utc)
-        return parsed_date
-    except Exception as e:
-        logging.error(f"Failed to parse date '{date_str}': {e}")
-        return datetime.now(timezone.utc)
+    def setup_http_session(self) -> requests.Session:
+        """Set up a requests session with retry logic."""
+        session = requests.Session()
+        retry_strategy = Retry(
+            total=3,
+            backoff_factor=2,
+            status_forcelist=[403, 429, 500, 502, 503, 504],
+            allowed_methods=["GET", "POST"]
+        )
+        adapter = HTTPAdapter(
+            max_retries=retry_strategy,
+            pool_connections=10,
+            pool_maxsize=10
+        )
+        session.mount("http://", adapter)
+        session.mount("https://", adapter)
+        session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
+        })
+        return session

-def fetch_rss_feeds():
-    logging.info("Starting fetch_rss_feeds")
-    articles = []
-    cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
-    session = create_http_session()
-
-    if not RSS_FEEDS:
-        logging.error("RSS_FEEDS is empty in foodie_config.py")
-        return articles
-
-    for feed_url in RSS_FEEDS:
-        logging.info(f"Processing feed: {feed_url}")
+    def load_posted_titles(self) -> Set[str]:
+        """Load and return the set of posted titles."""
        try:
-            response = session.get(feed_url, timeout=FEED_TIMEOUT)
-            response.raise_for_status()
-            soup = BeautifulSoup(response.content, 'xml')
-            items = soup.find_all('item')
-
-            feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
-            for item in items:
-                try:
-                    title = item.find('title').text.strip() if item.find('title') else "Untitled"
-                    link = item.find('link').text.strip() if item.find('link') else ""
-                    pub_date = item.find('pubDate')
-                    pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
-
-                    if pub_date < cutoff_date:
-                        logging.info(f"Skipping old article: {title} (Published: {pub_date})")
-                        continue
-
-                    description = item.find('description')
-                    summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
-                    content = item.find('content:encoded')
-                    content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
-
-                    articles.append({
-                        "title": title,
-                        "link": link,
-                        "summary": summary,
-                        "content": content_text,
-                        "feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
-                        "pub_date": pub_date
-                    })
-                    logging.debug(f"Processed article: {title}")
-                except Exception as e:
-                    logging.warning(f"Error processing entry in {feed_url}: {e}")
-                    continue
-            logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
+            data = load_json_file(FILE_PATHS["posted_rss_titles"], EXPIRATION_DAYS)
+            return {entry["title"] for entry in data if "title" in entry}
        except Exception as e:
-            logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
-            continue
+            logger.error(f"Error loading posted titles: {e}")
+            return set()

-    articles.sort(key=lambda x: x["pub_date"], reverse=True)
-    logging.info(f"Total RSS articles fetched: {len(articles)}")
-    return articles
+    def load_used_images(self) -> Set[str]:
+        """Load and return the set of used images."""
+        try:
+            data = load_json_file(FILE_PATHS["used_images"], IMAGE_EXPIRATION_DAYS)
+            return {entry["title"] for entry in data if "title" in entry}
+        except Exception as e:
+            logger.error(f"Error loading used images: {e}")
+            return set()

-def curate_from_rss():
-    articles = fetch_rss_feeds()
-    if not articles:
-        print("No RSS articles available")
-        logging.info("No RSS articles available")
-        return None, None, random.randint(600, 1800)
+    def parse_date(self, date_str: str) -> datetime:
+        """Parse a date string into a datetime object."""
+        try:
+            parsed_date = email.utils.parsedate_to_datetime(date_str)
+            if parsed_date.tzinfo is None:
+                parsed_date = parsed_date.replace(tzinfo=timezone.utc)
+            return parsed_date
+        except Exception as e:
+            logger.error(f"Failed to parse date '{date_str}': {e}")
+            return datetime.now(timezone.utc)

-    attempts = 0
-    max_attempts = 10
-    while attempts < max_attempts and articles:
-        article = articles.pop(0)
-        title = article["title"]
-        link = article["link"]
-        summary = article["summary"]
-        content = article["content"]
-        source_name = article["feed_title"]
-        original_source = f'<a href="{link}">{source_name}</a>'
+    def fetch_rss_feeds(self) -> List[Dict]:
+        """Fetch and process RSS feeds."""
+        logger.info("Starting fetch_rss_feeds")
+        articles = []
+        cutoff_date = datetime.now(timezone.utc) - timedelta(hours=24)

-        if title in posted_titles:
-            print(f"Skipping already posted article: {title}")
-            logging.info(f"Skipping already posted article: {title}")
-            attempts += 1
-            continue
+        if not RSS_FEEDS:
+            logger.error("RSS_FEEDS is empty in foodie_config.py")
+            return articles

-        print(f"Trying RSS Article: {title} from {source_name}")
-        logging.info(f"Trying RSS Article: {title} from {source_name}")
+        for feed_url in RSS_FEEDS:
+            logger.info(f"Processing feed: {feed_url}")
+            try:
+                response = self.session.get(feed_url, timeout=15)
+                response.raise_for_status()
+                soup = BeautifulSoup(response.content, 'xml')
+                items = soup.find_all('item')

-        image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
-        if skip:
-            print(f"Skipping filtered RSS article: {title}")
-            logging.info(f"Skipping filtered RSS article: {title}")
-            attempts += 1
-            continue
+                feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
+                for item in items:
+                    try:
+                        title = item.find('title').text.strip() if item.find('title') else "Untitled"
+                        link = item.find('link').text.strip() if item.find('link') else ""
+                        pub_date = item.find('pubDate')
+                        pub_date = self.parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)

-        scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
-        interest_score = is_interesting(scoring_content)
-        logging.info(f"Interest score for '{title}': {interest_score}")
-        if interest_score < 6:
-            print(f"RSS Interest Too Low: {interest_score}")
-            logging.info(f"RSS Interest Too Low: {interest_score}")
-            attempts += 1
-            continue
+                        if pub_date < cutoff_date:
+                            logger.info(f"Skipping old article: {title} (Published: {pub_date})")
+                            continue

-        num_paragraphs = determine_paragraph_count(interest_score)
-        extra_prompt = (
-            f"Generate exactly {num_paragraphs} paragraphs.\n"
-            f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
-            f"Do NOT introduce unrelated concepts.\n"
-            f"Expand on the core idea with relevant context about its appeal or significance.\n"
-            f"Do not include emojis in the summary."
-        )
-        content_to_summarize = scoring_content
-        final_summary = summarize_with_gpt4o(
-            content_to_summarize,
-            source_name,
-            link,
-            interest_score=interest_score,
-            extra_prompt=extra_prompt
-        )
-        if not final_summary:
-            logging.info(f"Summary failed for '{title}'")
-            attempts += 1
-            continue
+                        description = item.find('description')
+                        summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
+                        content = item.find('content:encoded')
+                        content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary

-        # Remove the original title from the summary while preserving paragraphs
-        title_pattern = re.compile(
-            r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
-            re.IGNORECASE
-        )
-        paragraphs = final_summary.split('\n')
-        cleaned_paragraphs = []
-        for para in paragraphs:
-            if para.strip():
-                cleaned_para = title_pattern.sub('', para).strip()
-                cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
-                cleaned_paragraphs.append(cleaned_para)
-        final_summary = '\n'.join(cleaned_paragraphs)
-
-        final_summary = insert_link_naturally(final_summary, source_name, link)
-        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
-        if not post_data:
-            attempts += 1
-            continue
-
-        # Fetch image
-        image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
-        if not image_url:
-            logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
-            image_url, image_source, uploader, page_url = get_image(image_query)
-            if not image_url:
-                logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
-                attempts += 1
+                        articles.append({
+                            "title": title,
+                            "link": link,
+                            "summary": summary,
+                            "content": content_text,
+                            "feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
+                            "pub_date": pub_date
+                        })
+                    except Exception as e:
+                        logger.warning(f"Error processing entry in {feed_url}: {e}")
+                        continue
+                logger.info(f"Filtered to {len(articles)} articles from {feed_url}")
+            except Exception as e:
+                logger.error(f"Failed to fetch RSS feed {feed_url}: {e}")
                continue

-        hook = get_dynamic_hook(post_data["title"]).strip()
+        articles.sort(key=lambda x: x["pub_date"], reverse=True)
+        logger.info(f"Total RSS articles fetched: {len(articles)}")
+        return articles

-        # Generate viral share prompt
-        share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
-        share_links_template = (
-            f'<p>{share_prompt} '
-            f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
-            f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
-        )
-        post_data["content"] = f"{final_summary}\n\n{share_links_template}"  # Removed cta from content
+    def curate_from_rss(self) -> Tuple[Optional[Dict], Optional[str], int]:
+        """Curate content from RSS feeds."""
+        articles = self.fetch_rss_feeds()
+        if not articles:
+            logger.info("No RSS articles available")
+            return None, None, random.randint(600, 1800)

-        global is_posting
-        is_posting = True
-        try:
-            post_id, post_url = post_to_wp(
-                post_data=post_data,
-                category=category,
-                link=link,
-                author=author,
-                image_url=image_url,
-                original_source=original_source,
-                image_source=image_source,
-                uploader=uploader,
-                pixabay_url=pixabay_url,
-                interest_score=interest_score,
-                should_post_tweet=True
+        for article in articles:
+            title = article["title"]
+            link = article["link"]
+            summary = article["summary"]
+            content = article["content"]
+            source_name = article["feed_title"]
+            
+            if title in self.posted_titles:
+                logger.info(f"Skipping already posted article: {title}")
+                continue
+            
+            logger.info(f"Processing RSS Article: {title} from {source_name}")
+            
+            image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
+            if skip:
+                logger.info(f"Skipping filtered RSS article: {title}")
+                continue
+
+            scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
+            interest_score = is_interesting(scoring_content)
+            logger.info(f"Interest score for '{title}': {interest_score}")
+            
+            if interest_score < 6:
+                logger.info(f"RSS Interest Too Low: {interest_score}")
+                continue
+
+            num_paragraphs = determine_paragraph_count(interest_score)
+            extra_prompt = (
+                f"Generate exactly {num_paragraphs} paragraphs.\n"
+                f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n"
+                f"Do NOT introduce unrelated concepts.\n"
+                f"Expand on the core idea with relevant context about its appeal or significance.\n"
+                f"Do not include emojis in the summary."
            )
-        finally:
-            is_posting = False

-        if post_id:
-            share_text = f"Check out this foodie gem! {post_data['title']}"
-            share_text_encoded = quote(share_text)
-            post_url_encoded = quote(post_url)
-            share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
-            # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
-            post_data["content"] = f"{final_summary}\n\n{share_links}"  # Removed cta from content
-            is_posting = True
-            try:
-                post_to_wp(
-                    post_data=post_data,
-                    category=category,
-                    link=link,
-                    author=author,
-                    image_url=image_url,
-                    original_source=original_source,
-                    image_source=image_source,
-                    uploader=uploader,
-                    pixabay_url=pixabay_url,
-                    interest_score=interest_score,
-                    post_id=post_id,
-                    should_post_tweet=False
-                )
-            finally:
-                is_posting = False
+            final_summary = summarize_with_gpt4o(
+                scoring_content,
+                source_name,
+                link,
+                interest_score=interest_score,
+                extra_prompt=extra_prompt
+            )

-            timestamp = datetime.now(timezone.utc).isoformat()
-            save_json_file(POSTED_TITLES_FILE, title, timestamp)
-            posted_titles.add(title)
-            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
+            if not final_summary:
+                logger.info(f"Summary failed for '{title}'")
+                continue

-            if image_url:
-                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
-                used_images.add(image_url)
-                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
+            final_summary = insert_link_naturally(final_summary, source_name, link)
+            post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)

-            print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
-            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
-            return post_data, category, random.randint(0, 1800)
+            if post_data and author:
+                return post_data, author, random.randint(600, 1800)

-        attempts += 1
-        logging.info(f"WP posting failed for '{post_data['title']}'")
-
-    print("No interesting RSS article found after attempts")
-    logging.info("No interesting RSS article found after attempts")
-    return None, None, random.randint(600, 1800)
+        return None, None, random.randint(600, 1800)

 def run_rss_automator():
-    print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
-    logging.info("***** RSS Automator Launched *****")
-    post_data, category, sleep_time = curate_from_rss()
-    print(f"Sleeping for {sleep_time}s")
-    logging.info(f"Completed run with sleep time: {sleep_time} seconds")
-    time.sleep(sleep_time)
-    return post_data, category, sleep_time
+    """Main function to run the RSS automator."""
+    scraper = RSSScraper()
+    while True:
+        try:
+            post_data, author, sleep_time = scraper.curate_from_rss()
+            if post_data and author:
+                global is_posting
+                is_posting = True
+                try:
+                    post_to_wp(post_data, author)
+                    logger.info(f"Successfully posted: {post_data['title']}")
+                finally:
+                    is_posting = False
+            time.sleep(sleep_time)
+        except Exception as e:
+            logger.error(f"Error in RSS automator: {e}")
+            time.sleep(300)  # Wait 5 minutes before retrying

 if __name__ == "__main__":
    run_rss_automator()
@@ -2,14 +2,71 @@
 # Constants shared across all automator scripts
 from dotenv import load_dotenv
 import os
+from typing import Dict, List, Optional, TypedDict, Union
+from pathlib import Path
+import logging

+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('foodie_automator.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# Load environment variables
 load_dotenv()
+
+# API Keys
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 PIXABAY_API_KEY = os.getenv("PIXABAY_API_KEY")
 FLICKR_API_KEY = os.getenv("FLICKR_API_KEY")
 FLICKR_API_SECRET = os.getenv("FLICKR_API_SECRET")

-AUTHORS = [
+# Validate required API keys
+def validate_api_keys() -> None:
+    """Validate that all required API keys are present."""
+    required_keys = {
+        "OPENAI_API_KEY": OPENAI_API_KEY,
+        "PIXABAY_API_KEY": PIXABAY_API_KEY,
+        "FLICKR_API_KEY": FLICKR_API_KEY,
+        "FLICKR_API_SECRET": FLICKR_API_SECRET
+    }
+    
+    missing_keys = [key for key, value in required_keys.items() if not value]
+    if missing_keys:
+        logger.error(f"Missing required API keys: {', '.join(missing_keys)}")
+        raise ValueError(f"Missing required API keys: {', '.join(missing_keys)}")
+
+# Type definitions
+class AuthorConfig(TypedDict):
+    url: str
+    username: str
+    password: str
+    persona: str
+    bio: str
+    dob: str
+
+class XCredentials(TypedDict):
+    username: str
+    x_username: str
+    api_key: str
+    api_secret: str
+    access_token: str
+    access_token_secret: str
+    client_secret: str
+
+class PersonaConfig(TypedDict):
+    description: str
+    tone: str
+    article_prompt: str
+    x_prompt: str
+
+# Author configurations
+AUTHORS: List[AuthorConfig] = [
    {
        "url": "https://insiderfoodie.com",
        "username": "owenjohnson",
@@ -31,7 +88,7 @@ AUTHORS = [
        "username": "aishapatel",
        "password": os.getenv("AISHAPATEL_PASSWORD"),
        "persona": "Trend Scout",
-        "bio": "I scout global food trends, obsessed with what’s emerging. My sharp predictions map the industry’s path—always one step ahead.",
+        "bio": "I scout global food trends, obsessed with what's emerging. My sharp predictions map the industry's path—always one step ahead.",
        "dob": "1999-03-15"
    },
    {
@@ -47,7 +104,7 @@ AUTHORS = [
        "username": "keishareid",
        "password": os.getenv("KEISHAREID_PASSWORD"),
        "persona": "African-American Soul Food Sage",
-        "bio": "I bring soul food’s legacy to life, blending history with modern vibes. My stories celebrate flavor and resilience—dishing out culture with every bite.",
+        "bio": "I bring soul food's legacy to life, blending history with modern vibes. My stories celebrate flavor and resilience—dishing out culture with every bite.",
        "dob": "1994-06-10"
    },
    {
@@ -60,7 +117,8 @@ AUTHORS = [
    }
 ]

-X_API_CREDENTIALS = [
+# X (Twitter) API credentials
+X_API_CREDENTIALS: List[XCredentials] = [
    {
        "username": "owenjohnson",
        "x_username": "@insiderfoodieowen",
@@ -117,12 +175,13 @@ X_API_CREDENTIALS = [
    }
 ]

-PERSONA_CONFIGS = {
+# Persona configurations
+PERSONA_CONFIGS: Dict[str, PersonaConfig] = {
    "Visionary Editor": {
        "description": "a commanding food editor with a borderless view",
        "tone": "a polished and insightful tone, like 'This redefines culinary excellence.'",
        "article_prompt": (
-            "You’re {description}. Summarize this article in {tone}. "
+            "You're {description}. Summarize this article in {tone}. "
            "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
            "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
            "Add a bold take and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@@ -139,7 +198,7 @@ PERSONA_CONFIGS = {
        "description": "a seasoned foodie reviewer with a sharp eye",
        "tone": "a professional yet engaging tone, like 'This dish is a revelation.'",
        "article_prompt": (
-            "You’re {description}. Summarize this article in {tone}. "
+            "You're {description}. Summarize this article in {tone}. "
            "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
            "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
            "Add a subtle opinion and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@@ -154,12 +213,12 @@ PERSONA_CONFIGS = {
    },
    "Trend Scout": {
        "description": "a forward-thinking editor obsessed with trends",
-        "tone": "an insightful and forward-looking tone, like 'This sets the stage for what’s next.'",
+        "tone": "an insightful and forward-looking tone, like 'This sets the stage for what's next.'",
        "article_prompt": (
-            "You’re {description}. Summarize this article in {tone}. "
+            "You're {description}. Summarize this article in {tone}. "
            "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
            "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
-            "Predict what’s next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
+            "Predict what's next and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
        ),
        "x_prompt": (
            "Craft a tweet as {description}. Keep it under 280 characters, using {tone}. "
@@ -173,7 +232,7 @@ PERSONA_CONFIGS = {
        "description": "a cultured food writer who loves storytelling",
        "tone": "a warm and thoughtful tone, like 'This evokes a sense of tradition.'",
        "article_prompt": (
-            "You’re {description}. Summarize this article in {tone}. "
+            "You're {description}. Summarize this article in {tone}. "
            "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
            "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
            "Add a thoughtful observation and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@@ -190,7 +249,7 @@ PERSONA_CONFIGS = {
        "description": "a vibrant storyteller rooted in African-American culinary heritage",
        "tone": "a heartfelt and authentic tone, like 'This captures the essence of heritage.'",
        "article_prompt": (
-            "You’re {description}. Summarize this article in {tone}. "
+            "You're {description}. Summarize this article in {tone}. "
            "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
            "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
            "Add a heritage twist and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@@ -207,7 +266,7 @@ PERSONA_CONFIGS = {
        "description": "an adventurous explorer of global street food",
        "tone": "a bold and adventurous tone, like 'This takes you on a global journey.'",
        "article_prompt": (
-            "You’re {description}. Summarize this article in {tone}. "
+            "You're {description}. Summarize this article in {tone}. "
            "Explore a wide range of food-related topics, skip recipes. Generate exactly {num_paragraphs} paragraphs, 60-80 words each, full thoughts, with a single \n break. "
            "Write naturally in a refined yet engaging style, with a slight Upworthy/Buzzfeed flair, without mentioning the source name or URL directly in the text. "
            "Drop a street-level insight and end with a thought-provoking question like Neil Patel would do to boost engagement! Do not include emojis in the summary."
@@ -223,25 +282,30 @@ PERSONA_CONFIGS = {
 }

 # File paths
-POSTED_RSS_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
-POSTED_GOOGLE_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json'
-POSTED_REDDIT_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
-USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
-AUTHOR_BACKGROUNDS_FILE = '/home/shane/foodie_automator/author_backgrounds.json'
-X_POST_COUNTS_FILE = '/home/shane/foodie_automator/x_post_counts.json'
-RECENT_POSTS_FILE = '/home/shane/foodie_automator/recent_posts.json'
+BASE_DIR = Path("/home/shane/foodie_automator")
+FILE_PATHS = {
+    "posted_rss_titles": BASE_DIR / "posted_rss_titles.json",
+    "posted_google_titles": BASE_DIR / "posted_google_titles.json",
+    "posted_reddit_titles": BASE_DIR / "posted_reddit_titles.json",
+    "used_images": BASE_DIR / "used_images.json",
+    "author_backgrounds": BASE_DIR / "author_backgrounds.json",
+    "x_post_counts": BASE_DIR / "x_post_counts.json",
+    "recent_posts": BASE_DIR / "recent_posts.json"
+}

+# Expiration periods
 EXPIRATION_DAYS = 3
 IMAGE_EXPIRATION_DAYS = 7

-RSS_FEEDS = [
+# RSS feed configurations
+RSS_FEEDS: List[str] = [
    "https://www.eater.com/rss/full.xml",
    "https://www.nrn.com/rss.xml",
    "https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml",
    "https://www.theguardian.com/food/rss"
 ]

-RSS_FEED_NAMES = {
+RSS_FEED_NAMES: Dict[str, tuple[str, str]] = {
    "https://www.eater.com/rss/full.xml": ("Eater", "https://www.eater.com/"),
    "https://www.nrn.com/rss.xml": ("Nation's Restaurant News", "https://www.nrn.com/"),
    "https://rss.nytimes.com/services/xml/rss/nyt/DiningandWine.xml": ("The New York Times", "https://www.nytimes.com/section/food"),
@@ -276,12 +340,33 @@ FAST_FOOD_KEYWORDS = [
 SUMMARY_MODEL = "gpt-4o"  # or "gpt-4.1-mini" for testing
 LIGHT_TASK_MODEL = "gpt-4o-mini"

-def get_clean_source_name(source_name):
-    """
-    Retrieve a clean source name from RSS_FEED_NAMES if source_name matches a feed URL,
-    otherwise return the original source_name as a fallback.
-    """
-    for feed_url, (clean_name, _) in RSS_FEED_NAMES.items():
-        if feed_url == source_name:
-            return clean_name
-    return source_name
+def get_clean_source_name(source_name: str) -> str:
+    """Clean and standardize source names."""
+    try:
+        # Remove common prefixes and suffixes
+        clean_name = source_name.strip()
+        clean_name = clean_name.replace("The ", "").replace("the ", "")
+        clean_name = clean_name.replace("Food", "").replace("food", "")
+        clean_name = clean_name.replace("Dining", "").replace("dining", "")
+        clean_name = clean_name.replace("Restaurant", "").replace("restaurant", "")
+        
+        # Remove any remaining whitespace
+        clean_name = " ".join(clean_name.split())
+        
+        return clean_name if clean_name else source_name
+    except Exception as e:
+        logger.error(f"Error cleaning source name '{source_name}': {e}")
+        return source_name
+
+# Validate configurations on import
+validate_api_keys()
+
+# Ensure all file paths exist
+for path in FILE_PATHS.values():
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if not path.exists():
+        path.touch()
+        logger.info(f"Created missing file: {path}")
+
+# Log successful configuration
+logger.info("Configuration loaded successfully")