merge posting x into main files

2025-04-28 21:23:12 +10:00
parent a1d2ce4215
commit ea7d36a22b
7 changed files with 394 additions and 446 deletions
@@ -1,3 +1,4 @@
+# foodie_automator_reddit.py
 import requests
 import random
 import time
@@ -15,14 +16,16 @@ from requests.adapters import HTTPAdapter
 import praw
 from foodie_config import (
    AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
-    SUMMARY_PERSONA_PROMPTS, CATEGORIES, CTAS, get_clean_source_name,
-    REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL
+    PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name,
+    REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL,
+    X_API_CREDENTIALS
 )
 from foodie_utils import (
    load_json_file, save_json_file, get_image, generate_image_query,
    upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
    summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
-    prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image_via_ddg
+    prepare_post_data, select_best_author, smart_image_and_filter,
+    get_flickr_image_via_ddg
 )
 from foodie_hooks import get_dynamic_hook, select_best_cta

@@ -48,7 +51,6 @@ def setup_logging():
        with open(LOG_FILE, 'r') as f:
            lines = f.readlines()
        
-        # Group lines into log entries based on timestamp pattern
        log_entries = []
        current_entry = []
        timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
@@ -105,8 +107,6 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

 def clean_reddit_title(title):
-    """Remove Reddit flairs like [pro/chef] or [homemade] from the title."""
-    # Match patterns like [pro/chef], [homemade], etc. at the start of the title
    cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
    logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
    return cleaned_title
@@ -158,7 +158,7 @@ def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
 def get_top_comments(post_url, reddit, limit=3):
    try:
        submission = reddit.submission(url=post_url)
-        submission.comment_sort = 'top'  # Move this line up
+        submission.comment_sort = 'top'
        submission.comments.replace_more(limit=0)
        top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
        logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
@@ -186,11 +186,10 @@ def fetch_reddit_posts():
                if pub_date < cutoff_date:
                    logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
                    continue
-                # Clean the title before storing
                cleaned_title = clean_reddit_title(submission.title)
                articles.append({
-                    "title": cleaned_title,  # Use cleaned title
-                    "raw_title": submission.title,  # Store raw title for reference if needed
+                    "title": cleaned_title,
+                    "raw_title": submission.title,
                    "link": f"https://www.reddit.com{submission.permalink}",
                    "summary": submission.selftext,
                    "feed_title": get_clean_source_name(subreddit_name),
@@ -212,7 +211,6 @@ def curate_from_reddit():
        logging.info("No Reddit posts available")
        return None, None, None

-    # Sort by upvotes descending
    articles.sort(key=lambda x: x["upvotes"], reverse=True)
    
    reddit = praw.Reddit(
@@ -224,15 +222,15 @@ def curate_from_reddit():
    attempts = 0
    max_attempts = 10
    while attempts < max_attempts and articles:
-        article = articles.pop(0)  # Take highest-upvote post
-        title = article["title"]  # Use cleaned title
-        raw_title = article["raw_title"]  # Use raw title for deduplication
+        article = articles.pop(0)
+        title = article["title"]
+        raw_title = article["raw_title"]
        link = article["link"]
        summary = article["summary"]
        source_name = "Reddit"
        original_source = '<a href="https://www.reddit.com/">Reddit</a>'
        
-        if raw_title in posted_titles:  # Check against raw title
+        if raw_title in posted_titles:
            print(f"Skipping already posted post: {raw_title}")
            logging.info(f"Skipping already posted post: {raw_title}")
            attempts += 1
@@ -250,7 +248,7 @@ def curate_from_reddit():
        
        top_comments = get_top_comments(link, reddit, limit=3)
        interest_score = is_interesting_reddit(
-            title,  # Use cleaned title
+            title,
            summary,
            article["upvotes"],
            article["comment_count"],
@@ -272,7 +270,7 @@ def curate_from_reddit():
            "If brief, expand on the core idea with relevant context about its appeal or significance. "
            "Do not include emojis in the summary."
        )
-        content_to_summarize = f"{title}\n\n{summary}"  # Use cleaned title
+        content_to_summarize = f"{title}\n\n{summary}"
        if top_comments:
            content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
        
@@ -290,7 +288,7 @@ def curate_from_reddit():
        
        final_summary = insert_link_naturally(final_summary, source_name, link)
        
-        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)  # Use cleaned title
+        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
        if not post_data:
            attempts += 1
            continue
@@ -345,8 +343,8 @@ def curate_from_reddit():
                is_posting = False
            
            timestamp = datetime.now(timezone.utc).isoformat()
-            save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)  # Save raw title
-            posted_titles.add(raw_title)  # Add raw title to set
+            save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
+            posted_titles.add(raw_title)
            logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
            
            if image_url: