import requests
import random
import time
import logging
import os
import json
import signal
import sys
import re
from datetime import datetime, timedelta, timezone
from openai import OpenAI
from urllib.parse import quote
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
import praw
from foodie_config import (
    AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
    SUMMARY_PERSONA_PROMPTS, CATEGORIES, CTAS, get_clean_source_name,
    REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT, LIGHT_TASK_MODEL
)
from foodie_utils import (
    load_json_file, save_json_file, get_image, generate_image_query,
    upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
    summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
    prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image_via_ddg
)
from foodie_hooks import get_dynamic_hook, select_best_cta

# Flag to indicate if we're in the middle of posting
is_posting = False

def signal_handler(sig, frame):
    logging.info("Received termination signal, checking if safe to exit...")
    if is_posting:
        logging.info("Currently posting, will exit after completion.")
    else:
        logging.info("Safe to exit immediately.")
        sys.exit(0)

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)

LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log"
LOG_PRUNE_DAYS = 30

def setup_logging():
    if os.path.exists(LOG_FILE):
        with open(LOG_FILE, 'r') as f:
            lines = f.readlines()
        cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
        pruned_lines = []
        for line in lines:
            try:
                timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
                if timestamp > cutoff:
                    pruned_lines.append(line)
            except ValueError:
                logging.warning(f"Skipping malformed log line: {line.strip()[:50]}...")
                continue
        with open(LOG_FILE, 'w') as f:
            f.writelines(pruned_lines)
    
    logging.basicConfig(
        filename=LOG_FILE,
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s"
    )
    logging.getLogger("requests").setLevel(logging.WARNING)
    logging.getLogger("prawcore").setLevel(logging.WARNING)
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logging.getLogger().addHandler(console_handler)
    logging.info("Logging initialized for foodie_automator_reddit.py")

setup_logging()

POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7

posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def clean_reddit_title(title):
    """Remove Reddit flairs like [pro/chef] or [homemade] from the title."""
    # Match patterns like [pro/chef], [homemade], etc. at the start of the title
    cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
    logging.info(f"Cleaned Reddit title from '{title}' to '{cleaned_title}'")
    return cleaned_title

def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
    try:
        content = f"Title: {title}\n\nContent: {summary}"
        if top_comments:
            content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
        
        response = client.chat.completions.create(
            model=LIGHT_TASK_MODEL,
            messages=[
                {"role": "system", "content": (
                    "Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
                    "Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
                    "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
                    "Consider comments for added context (e.g., specific locations or unique details). "
                    "Return only a number."
                )},
                {"role": "user", "content": content}
            ],
            max_tokens=5
        )
        base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0

        engagement_boost = 0
        if upvotes >= 500:
            engagement_boost += 3
        elif upvotes >= 100:
            engagement_boost += 2
        elif upvotes >= 50:
            engagement_boost += 1
        
        if comment_count >= 100:
            engagement_boost += 2
        elif comment_count >= 20:
            engagement_boost += 1

        final_score = min(base_score + engagement_boost, 10)
        logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
        print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})")
        return final_score
    except Exception as e:
        logging.error(f"Reddit interestingness scoring failed: {e}")
        print(f"Reddit Interest Error: {e}")
        return 0

def get_top_comments(post_url, reddit, limit=3):
    try:
        submission = reddit.submission(url=post_url)
        submission.comments.replace_more(limit=0)
        submission.comment_sort = 'top'
        top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
        logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
        return top_comments
    except Exception as e:
        logging.error(f"Failed to fetch comments for {post_url}: {e}")
        return []

def fetch_reddit_posts():
    reddit = praw.Reddit(
        client_id=REDDIT_CLIENT_ID,
        client_secret=REDDIT_CLIENT_SECRET,
        user_agent=REDDIT_USER_AGENT
    )
    feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
    articles = []
    cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
    
    logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
    for subreddit_name in feeds:
        try:
            subreddit = reddit.subreddit(subreddit_name)
            for submission in subreddit.top(time_filter='day', limit=100):
                pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
                if pub_date < cutoff_date:
                    logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
                    continue
                # Clean the title before storing
                cleaned_title = clean_reddit_title(submission.title)
                articles.append({
                    "title": cleaned_title,  # Use cleaned title
                    "raw_title": submission.title,  # Store raw title for reference if needed
                    "link": f"https://www.reddit.com{submission.permalink}",
                    "summary": submission.selftext,
                    "feed_title": get_clean_source_name(subreddit_name),
                    "pub_date": pub_date,
                    "upvotes": submission.score,
                    "comment_count": submission.num_comments
                })
            logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
        except Exception as e:
            logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
    
    logging.info(f"Total Reddit posts fetched: {len(articles)}")
    return articles

def curate_from_reddit():
    articles = fetch_reddit_posts()
    if not articles:
        print("No Reddit posts available")
        logging.info("No Reddit posts available")
        return None, None, None

    # Sort by upvotes descending
    articles.sort(key=lambda x: x["upvotes"], reverse=True)
    
    reddit = praw.Reddit(
        client_id=REDDIT_CLIENT_ID,
        client_secret=REDDIT_CLIENT_SECRET,
        user_agent=REDDIT_USER_AGENT
    )
    
    attempts = 0
    max_attempts = 10
    while attempts < max_attempts and articles:
        article = articles.pop(0)  # Take highest-upvote post
        title = article["title"]  # Use cleaned title
        raw_title = article["raw_title"]  # Use raw title for deduplication
        link = article["link"]
        summary = article["summary"]
        source_name = "Reddit"
        original_source = '<a href="https://www.reddit.com/">Reddit</a>'
        
        if raw_title in posted_titles:  # Check against raw title
            print(f"Skipping already posted post: {raw_title}")
            logging.info(f"Skipping already posted post: {raw_title}")
            attempts += 1
            continue
        
        print(f"Trying Reddit Post: {title} from {source_name}")
        logging.info(f"Trying Reddit Post: {title} from {source_name}")
        
        image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
        if skip or any(keyword in title.lower() or keyword in summary.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
            print(f"Skipping filtered Reddit post: {title}")
            logging.info(f"Skipping filtered Reddit post: {title}")
            attempts += 1
            continue
        
        top_comments = get_top_comments(link, reddit, limit=3)
        interest_score = is_interesting_reddit(
            title,  # Use cleaned title
            summary,
            article["upvotes"],
            article["comment_count"],
            top_comments
        )
        logging.info(f"Interest Score: {interest_score} for '{title}'")
        if interest_score < 6:
            print(f"Reddit Interest Too Low: {interest_score}")
            logging.info(f"Reddit Interest Too Low: {interest_score}")
            attempts += 1
            continue
        
        num_paragraphs = determine_paragraph_count(interest_score)
        extra_prompt = (
            f"Generate exactly {num_paragraphs} paragraphs. "
            f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details. "
            "Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}. "
            "Do NOT introduce unrelated concepts unless in the content or comments. "
            "If brief, expand on the core idea with relevant context about its appeal or significance."
        )
        content_to_summarize = f"{title}\n\n{summary}"  # Use cleaned title
        if top_comments:
            content_to_summarize += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
        
        final_summary = summarize_with_gpt4o(
            content_to_summarize,
            source_name,
            link,
            interest_score=interest_score,
            extra_prompt=extra_prompt
        )
        if not final_summary:
            logging.info(f"Summary failed for '{title}'")
            attempts += 1
            continue
        
        final_summary = insert_link_naturally(final_summary, source_name, link)
        
        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)  # Use cleaned title
        if not post_data:
            attempts += 1
            continue
        
        image_url, image_source, uploader, page_url = get_flickr_image_via_ddg(image_query, relevance_keywords)
        if not image_url:
            image_url, image_source, uploader, page_url = get_image(image_query)
        
        hook = get_dynamic_hook(post_data["title"]).strip()
        cta = select_best_cta(post_data["title"], final_summary, post_url=None)
        
        post_data["content"] = f"{final_summary}\n\n{cta}"
        
        global is_posting
        is_posting = True
        try:
            post_id, post_url = post_to_wp(
                post_data=post_data,
                category=category,
                link=link,
                author=author,
                image_url=image_url,
                original_source=original_source,
                image_source=image_source,
                uploader=uploader,
                pixabay_url=pixabay_url,
                interest_score=interest_score
            )
        finally:
            is_posting = False

        if post_id:
            cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
            post_data["content"] = f"{final_summary}\n\n{cta}"
            
            is_posting = True
            try:
                post_to_wp(
                    post_data=post_data,
                    category=category,
                    link=link,
                    author=author,
                    image_url=image_url,
                    original_source=original_source,
                    image_source=image_source,
                    uploader=uploader,
                    pixabay_url=pixabay_url,
                    interest_score=interest_score,
                    post_id=post_id
                )
            finally:
                is_posting = False
            
            timestamp = datetime.now(timezone.utc).isoformat()
            save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)  # Save raw title
            posted_titles.add(raw_title)  # Add raw title to set
            logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
            
            if image_url:
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
            
            print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
            print(f"Actual post URL: {post_url}")
            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
            logging.info(f"Actual post URL: {post_url}")
            return post_data, category, random.randint(0, 1800)
        
        attempts += 1
        logging.info(f"WP posting failed for '{post_data['title']}'")
    
    print("No interesting Reddit post found after attempts")
    logging.info("No interesting Reddit post found after attempts")
    return None, None, random.randint(600, 1800)

def run_reddit_automator():
    print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****")
    logging.info("***** Reddit Automator Launched *****")
    
    post_data, category, sleep_time = curate_from_reddit()
    if not post_data:
        print(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
        logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
    else:
        print(f"Completed Reddit run with sleep time: {sleep_time} seconds")
        logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds")
    print(f"Sleeping for {sleep_time}s")
    time.sleep(sleep_time)
    return post_data, category, sleep_time

if __name__ == "__main__":
    run_reddit_automator()