import base64 import json import logging import os import random import re from PIL import Image import pytesseract import io import tempfile import requests import time from dotenv import load_dotenv from datetime import datetime, timezone, timedelta from openai import OpenAI from urllib.parse import quote from duckduckgo_search import DDGS from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import tweepy from foodie_config import ( RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS ) load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def load_json_file(filename, expiration_days=None): data = [] if os.path.exists(filename): try: with open(filename, 'r') as f: lines = f.readlines() for i, line in enumerate(lines, 1): if line.strip(): try: entry = json.loads(line.strip()) if not isinstance(entry, dict) or "title" not in entry or "timestamp" not in entry: logging.warning(f"Skipping malformed entry in {filename} at line {i}: {entry}") continue data.append(entry) except json.JSONDecodeError as e: logging.warning(f"Skipping invalid JSON line in {filename} at line {i}: {e}") if expiration_days: cutoff = (datetime.now(timezone.utc) - timedelta(days=expiration_days)).isoformat() data = [entry for entry in data if entry["timestamp"] > cutoff] logging.info(f"Loaded {len(data)} entries from {filename}, {len(data)} valid after expiration check") except Exception as e: logging.error(f"Failed to load {filename}: {e}") data = [] # Reset to empty on failure return data def save_json_file(filename, key, value): entry = {"title": key, "timestamp": value} PRUNE_INTERVAL_DAYS = 180 try: data = load_json_file(filename, expiration_days=PRUNE_INTERVAL_DAYS) # Remove duplicates by title data = [item for item in data if item["title"] != key] data.append(entry) with open(filename, 'w') as f: for item in data: json.dump(item, f) f.write('\n') logging.info(f"Saved '{key}' to {filename}") print(f"DEBUG: Saved '{key}' to {filename}") loaded_data = load_json_file(filename, expiration_days=PRUNE_INTERVAL_DAYS) logging.info(f"Pruned {filename} to {len(loaded_data)} entries (older than {PRUNE_INTERVAL_DAYS} days removed)") except Exception as e: logging.error(f"Failed to save or prune {filename}: {e}") def load_post_counts(): counts = [] filename = '/home/shane/foodie_automator/x_post_counts.json' if os.path.exists(filename): try: with open(filename, 'r') as f: lines = f.readlines() for i, line in enumerate(lines, 1): if line.strip(): try: entry = json.loads(line.strip()) # Check for expected fields in x_post_counts.json if not isinstance(entry, dict) or "username" not in entry or "month" not in entry or "monthly_count" not in entry or "day" not in entry or "daily_count" not in entry: logging.warning(f"Skipping malformed entry in {filename} at line {i}: {entry}") continue counts.append(entry) except json.JSONDecodeError as e: logging.warning(f"Skipping invalid JSON line in {filename} at line {i}: {e}") logging.info(f"Loaded {len(counts)} entries from {filename}") except Exception as e: logging.error(f"Failed to load {filename}: {e}") counts = [] # Reset to empty on failure if not counts: counts = [{ "username": author["username"], "month": datetime.now(timezone.utc).strftime("%Y-%m"), "monthly_count": 0, "day": datetime.now(timezone.utc).strftime("%Y-%m-%d"), "daily_count": 0 } for author in AUTHORS] current_month = datetime.now(timezone.utc).strftime("%Y-%m") current_day = datetime.now(timezone.utc).strftime("%Y-%m-%d") for entry in counts: if entry["month"] != current_month: entry["month"] = current_month entry["monthly_count"] = 0 if entry["day"] != current_day: entry["day"] = current_day entry["daily_count"] = 0 return counts def save_post_counts(counts): with open('/home/shane/foodie_automator/x_post_counts.json', 'w') as f: for item in counts: json.dump(item, f) f.write('\n') logging.info("Saved post counts to x_post_counts.json") def generate_article_tweet(author, post, persona): persona_config = PERSONA_CONFIGS[persona] base_prompt = persona_config["x_prompt"].format( description=persona_config["description"], tone=persona_config["tone"] ) prompt = base_prompt.replace( "For article tweets, include the article title, a quirky hook, and the URL.", f"Generate an article tweet including the title '{post['title']}', a quirky hook, and the URL '{post['url']}'." ) try: response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": f"Generate tweet for {post['title']}."} ], max_tokens=100, temperature=0.9 ) tweet = response.choices[0].message.content.strip() if len(tweet) > 280: tweet = tweet[:277] + "..." logging.info(f"Generated article tweet for {author['username']}: {tweet}") return tweet except Exception as e: logging.error(f"Failed to generate article tweet for {author['username']}: {e}") return f"This trend is fire! Check out {post['title']} at {post['url']} #Foodie" def post_tweet(author, tweet): credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) if not credentials: logging.error(f"No X credentials found for {author['username']}") return False post_counts = load_post_counts() author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None) if author_count["monthly_count"] >= 500: logging.warning(f"Monthly post limit (500) reached for {author['username']}") return False if author_count["daily_count"] >= 20: logging.warning(f"Daily post limit (20) reached for {author['username']}") return False try: client = tweepy.Client( consumer_key=credentials["api_key"], consumer_secret=credentials["api_secret"], access_token=credentials["access_token"], access_token_secret=credentials["access_token_secret"] ) response = client.create_tweet(text=tweet) author_count["monthly_count"] += 1 author_count["daily_count"] += 1 save_post_counts(post_counts) logging.info(f"Posted tweet for {author['username']}: {tweet}") return True except Exception as e: logging.error(f"Failed to post tweet for {author['username']}: {e}") return False def select_best_persona(interest_score, content=""): logging.info("Using select_best_persona with interest_score and content") personas = ["Visionary Editor", "Foodie Critic", "Trend Scout", "Culture Connoisseur"] content_lower = content.lower() if any(kw in content_lower for kw in ["tech", "ai", "innovation", "sustainability"]): return random.choice(["Trend Scout", "Visionary Editor"]) elif any(kw in content_lower for kw in ["review", "critic", "taste", "flavor"]): return "Foodie Critic" elif any(kw in content_lower for kw in ["culture", "tradition", "history"]): return "Culture Connoisseur" if interest_score >= 8: return random.choice(personas[:2]) elif interest_score >= 6: return random.choice(personas[2:]) return random.choice(personas) def get_image(search_query): api_key = "14836528-999c19a033d77d463113b1fb8" base_url = "https://pixabay.com/api/" queries = [search_query.split()[:2], search_query.split()] for query in queries: short_query = " ".join(query) params = { "key": api_key, "q": short_query, "image_type": "photo", "safesearch": True, "per_page": 20 } try: logging.info(f"Fetching Pixabay image for query '{short_query}'") response = requests.get(base_url, params=params, timeout=10) response.raise_for_status() data = response.json() if not data.get("hits"): logging.warning(f"No image hits for query '{short_query}'") continue valid_images = [ hit for hit in data["hits"] if all(tag not in hit.get("tags", "").lower() for tag in ["dog", "cat", "family", "child", "baby"]) ] if not valid_images: logging.warning(f"No valid images for query '{short_query}' after filtering") continue image = random.choice(valid_images) image_url = image["webformatURL"] image_source = "Pixabay" uploader = image.get("user", "Unknown") pixabay_url = image["pageURL"] logging.info(f"Fetched image URL: {image_url} by {uploader} for query '{short_query}'") print(f"DEBUG: Image selected for query '{short_query}': {image_url}") return image_url, image_source, uploader, pixabay_url except requests.exceptions.RequestException as e: logging.error(f"Image fetch failed for query '{short_query}': {e}") continue logging.error(f"All Pixabay image queries failed: {queries}") return None, None, None, None def generate_image_query(content): try: response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": ( "From this content (title and summary), generate two sets of 2-3 concise keywords for an image search about restaurant/food industry trends:\n" "1. Search keywords: For finding images (e.g., 'AI restaurant technology'). Focus on key themes like technology, sustainability, dining, or specific food concepts.\n" "2. Relevance keywords: For filtering relevant images (e.g., 'ai tech dining'). Focus on core concepts to ensure match.\n" "Avoid vague terms like 'trends', 'future', or unrelated words like 'dog', 'family'. " "Return as JSON: {'search': 'keyword1 keyword2', 'relevance': 'keyword3 keyword4'}" )}, {"role": "user", "content": content} ], max_tokens=100 ) raw_result = response.choices[0].message.content.strip() logging.info(f"Raw GPT image query response: '{raw_result}'") print(f"DEBUG: Raw GPT image query response: '{raw_result}'") cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() result = json.loads(cleaned_result) if not isinstance(result, dict) or "search" not in result or "relevance" not in result or len(result["search"].split()) < 2: logging.warning(f"Invalid image query format: {result}, using fallback") words = re.findall(r'\w+', content.lower()) filtered_words = [w for w in words if w not in RECIPE_KEYWORDS + PROMO_KEYWORDS + ['trends', 'future', 'dog', 'family']] search = " ".join(filtered_words[:3]) or "restaurant innovation" relevance = filtered_words[3:6] or ["dining", "tech"] result = {"search": search, "relevance": " ".join(relevance)} logging.info(f"Generated image query: {result}") print(f"DEBUG: Image query from content: {result}") return result["search"], result["relevance"].split() except json.JSONDecodeError as e: logging.error(f"JSON parsing failed for image query: {e}, raw response: '{raw_result}'") words = re.findall(r'\w+', content.lower()) filtered_words = [w for w in words if w not in RECIPE_KEYWORDS + PROMO_KEYWORDS + ['trends', 'future', 'dog', 'family']] search = " ".join(filtered_words[:3]) or "restaurant innovation" relevance = filtered_words[3:6] or ["dining", "tech"] logging.info(f"Fallback image query: {{'search': '{search}', 'relevance': '{' '.join(relevance)}'}}") return search, relevance except Exception as e: logging.error(f"Image query generation failed: {e}") print(f"Image Query Error: {e}") return None, None def smart_image_and_filter(title, summary): try: content = f"{title}\n\n{summary}" prompt = ( "Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " "for an image search about food industry trends or viral content. Prioritize specific terms if present, " "otherwise focus on the main theme. " "Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. " "Return as JSON: {'image_query': 'specific term', 'relevance': ['keyword1', 'keyword2'], 'action': 'KEEP' or 'SKIP'}" ) response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": content} ], max_tokens=100 ) raw_result = response.choices[0].message.content.strip() logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() try: result = json.loads(cleaned_result) except json.JSONDecodeError as e: logging.warning(f"JSON parsing failed: {e}, raw: '{cleaned_result}'. Using fallback.") return "food trends", ["cuisine", "dining"], False if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: logging.warning(f"Invalid GPT response format: {result}, using fallback") return "food trends", ["cuisine", "dining"], False image_query = result["image_query"] relevance_keywords = result["relevance"] skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower() logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}") if not image_query or len(image_query.split()) < 2: logging.warning(f"Image query '{image_query}' too vague, using fallback") return "food trends", ["cuisine", "dining"], skip_flag return image_query, relevance_keywords, skip_flag except Exception as e: logging.error(f"Smart image/filter failed: {e}, using fallback") return "food trends", ["cuisine", "dining"], False def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None): try: safe_title = post_title.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')[:50] headers = { "Authorization": f"Basic {base64.b64encode(f'{wp_username}:{wp_password}'.encode()).decode()}", "Content-Disposition": f"attachment; filename={safe_title}.jpg", "Content-Type": "image/jpeg" } image_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } logging.info(f"Fetching image from {image_url} for '{post_title}'") image_response = requests.get(image_url, headers=image_headers, timeout=10) image_response.raise_for_status() response = requests.post( f"{wp_base_url}/media", headers=headers, data=image_response.content ) response.raise_for_status() image_id = response.json()["id"] caption = f'{image_source} by {uploader}' if pixabay_url and uploader else image_source requests.post( f"{wp_base_url}/media/{image_id}", headers={"Authorization": headers["Authorization"], "Content-Type": "application/json"}, json={"caption": caption} ) logging.info(f"Uploaded image '{safe_title}.jpg' to WP (ID: {image_id}) with caption '{caption}'") return image_id except Exception as e: logging.error(f"Image upload to WP failed for '{post_title}': {e}") return None def determine_paragraph_count(interest_score): if interest_score >= 9: return 5 elif interest_score >= 7: return 4 return 3 def is_interesting(summary): try: response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": ( "Rate this content from 0-10 based on its rarity, buzzworthiness, and engagement potential for food lovers, covering a wide range of food topics (skip recipes). " "Score 8-10 for rare, highly shareable ideas that grab attention. " "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. " "Return only a number." )}, {"role": "user", "content": f"Content: {summary}"} ], max_tokens=5 ) raw_score = response.choices[0].message.content.strip() score = int(raw_score) if raw_score.isdigit() else 0 print(f"Interest Score for '{summary[:50]}...': {score} (raw: {raw_score})") logging.info(f"Interest Score: {score} (raw: {raw_score})") return score except Exception as e: logging.error(f"Interestingness scoring failed: {e}") print(f"Interest Error: {e}") return 0 def generate_title_from_summary(summary): banned_words = ["elevate", "elevating", "elevated"] for attempt in range(3): try: response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": ( "Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. " "Craft it with Upworthy/Buzzfeed flair—think ‘you won’t believe this’ or ‘this is nuts’—for food insiders. " "Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. " "End with a question to spark shares." )}, {"role": "user", "content": f"Summary: {summary}"} ], max_tokens=30 ) title = response.choices[0].message.content.strip().replace('"', '').replace("'", "") if ':' in title: title = title.split(':', 1)[1].strip() if len(title) > 100 or any(word in title.lower() for word in banned_words): reason = "length" if len(title) > 100 else "banned word" print(f"Rejected title (attempt {attempt + 1}/3): '{title}' due to {reason}") logging.info(f"Rejected title (attempt {attempt + 1}/3): '{title}' due to {reason}") continue logging.info(f"Generated title: {title}") return title except Exception as e: logging.error(f"Title generation failed (attempt {attempt + 1}/3): {e}") print(f"Title Error: {e}") print("Failed to generate valid title after 3 attempts") logging.info("Failed to generate valid title after 3 attempts") return None def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_prompt=""): try: persona = select_best_persona(interest_score, content) persona_config = PERSONA_CONFIGS.get(persona, { "article_prompt": "Write a concise, engaging summary that captures the essence of the content for food lovers.", "description": "a generic food writer", "tone": "an engaging tone" }) prompt = persona_config["article_prompt"].format( description=persona_config["description"], tone=persona_config["tone"], num_paragraphs=determine_paragraph_count(interest_score) ) logging.info(f"Using {persona} with interest_score and content") full_prompt = ( f"{prompt}\n\n" f"{extra_prompt}\n\n" f"Content to summarize:\n{content}\n\n" f"Source: {source_name}\n" f"Link: {link}" ) response = client.chat.completions.create( model=SUMMARY_MODEL, messages=[ {"role": "system", "content": full_prompt}, {"role": "user", "content": content} ], max_tokens=1000, temperature=0.7 ) summary = response.choices[0].message.content.strip() logging.info(f"Processed summary (Persona: {persona}): {summary}") return summary except Exception as e: logging.error(f"Summary generation failed with model {SUMMARY_MODEL}: {e}") return None def insert_link_naturally(summary, source_name, source_url): try: prompt = ( "Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). " "Use the format '{source_name}' and weave it into the text seamlessly, " "e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} uncovers this wild shift.' " "Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). " "Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). " "Maintain the original tone and flow, ensuring the link reads as part of the sentence, not standalone. " "Return the modified summary with exactly one link, no extra formatting or newlines beyond the original.\n\n" "Summary:\n{summary}\n\n" "Source Name: {source_name}\nSource URL: {source_url}" ).format(summary=summary, source_name=source_name, source_url=source_url) response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": "Insert the link naturally into the summary."} ], max_tokens=1000, temperature=0.7 ) new_summary = response.choices[0].message.content.strip() link_pattern = f'{source_name}' if new_summary and new_summary.count(link_pattern) == 1: logging.info(f"Summary with naturally embedded link: {new_summary}") return new_summary logging.warning(f"GPT failed to insert link correctly: {new_summary}. Using fallback.") except Exception as e: logging.error(f"Link insertion failed: {e}") time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) paragraphs = protected_summary.split('\n') if not paragraphs or all(not p.strip() for p in paragraphs): logging.error("No valid paragraphs to insert link.") return summary target_para = random.choice([p for p in paragraphs if p.strip()]) phrases = [ f"The scoop from {link_pattern} spills the details", f"{link_pattern} uncovers this wild shift", f"This gem via {link_pattern} drops some truth", f"{link_pattern} breaks down the buzz" ] insertion_phrase = random.choice(phrases) sentences = re.split(r'(?<=[.!?])\s+', target_para) insertion_point = -1 for i, sent in enumerate(sentences): if sent.strip() and '@' not in sent: insertion_point = sum(len(s) + 1 for s in sentences[:i+1]) break if insertion_point == -1: insertion_point = len(target_para) new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip() paragraphs[paragraphs.index(target_para)] = new_para new_summary = '\n'.join(paragraphs) new_summary = new_summary.replace('@', '.') logging.info(f"Fallback summary with link: {new_summary}") return new_summary def generate_category_from_summary(summary): try: if not isinstance(summary, str) or not summary.strip(): logging.warning(f"Invalid summary for category generation: {summary}. Defaulting to 'Trends'.") return "Trends" response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": ( "Based on this summary, select the most relevant category from: Food, Culture, Trends, Health, Lifestyle, Drink, Eats. " "Return only the category name." )}, {"role": "user", "content": summary} ], max_tokens=10 ) category = response.choices[0].message.content.strip() logging.info(f"Generated category: {category}") return category if category in ["Food", "Culture", "Trends", "Health", "Lifestyle", "Drink", "Eats"] else "Trends" except Exception as e: logging.error(f"Category generation failed: {e}") return "Trends" def get_wp_category_id(category_name, wp_base_url, wp_username, wp_password): try: headers = {"Authorization": f"Basic {base64.b64encode(f'{wp_username}:{wp_password}'.encode()).decode()}"} response = requests.get(f"{wp_base_url}/categories", headers=headers, params={"search": category_name}) response.raise_for_status() categories = response.json() for cat in categories: if cat["name"].lower() == category_name.lower(): return cat["id"] return None except Exception as e: logging.error(f"Failed to get WP category ID for '{category_name}': {e}") return None def create_wp_category(category_name, wp_base_url, wp_username, wp_password): try: headers = { "Authorization": f"Basic {base64.b64encode(f'{wp_username}:{wp_password}'.encode()).decode()}", "Content-Type": "application/json" } payload = {"name": category_name} response = requests.post(f"{wp_base_url}/categories", headers=headers, json=payload) response.raise_for_status() return response.json()["id"] except Exception as e: logging.error(f"Failed to create WP category '{category_name}': {e}") return None def get_wp_tag_id(tag_name, wp_base_url, wp_username, wp_password): try: headers = {"Authorization": f"Basic {base64.b64encode(f'{wp_username}:{wp_password}'.encode()).decode()}"} response = requests.get(f"{wp_base_url}/tags", headers=headers, params={"search": tag_name}) response.raise_for_status() tags = response.json() for tag in tags: if tag["name"].lower() == tag_name.lower(): return tag["id"] return None except Exception as e: logging.error(f"Failed to get WP tag ID for '{tag_name}': {e}") return None def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, pixabay_url=None, interest_score=4, post_id=None, post_tweet=True): wp_base_url = "https://insiderfoodie.com/wp-json/wp/v2" logging.info(f"Starting post_to_wp for '{post_data['title']}', image_source: {image_source}") if not isinstance(author, dict) or "username" not in author or "password" not in author: raise ValueError(f"Invalid author data: {author}. Expected a dictionary with 'username' and 'password' keys.") wp_username = author["username"] wp_password = author["password"] if not isinstance(interest_score, int): logging.error(f"Invalid interest_score type: {type(interest_score)}, value: '{interest_score}'. Defaulting to 4.") interest_score = 4 elif interest_score < 0 or interest_score > 10: logging.warning(f"interest_score out of valid range (0-10): {interest_score}. Clamping to 4.") interest_score = min(max(interest_score, 0), 10) try: headers = { "Authorization": f"Basic {base64.b64encode(f'{wp_username}:{wp_password}'.encode()).decode()}", "Content-Type": "application/json" } auth_test = requests.get(f"{wp_base_url}/users/me", headers=headers) auth_test.raise_for_status() logging.info(f"Auth test passed for {wp_username}: {auth_test.json()['id']}") category_id = get_wp_category_id(category, wp_base_url, wp_username, wp_password) if not category_id: category_id = create_wp_category(category, wp_base_url, wp_username, wp_password) logging.info(f"Created new category '{category}' with ID {category_id}") else: logging.info(f"Found existing category '{category}' with ID {category_id}") tags = [1] if interest_score >= 9: picks_tag_id = get_wp_tag_id("Picks", wp_base_url, wp_username, wp_password) if picks_tag_id and picks_tag_id not in tags: tags.append(picks_tag_id) logging.info(f"Added 'Picks' tag (ID: {picks_tag_id}) to post due to high interest score: {interest_score}") content = post_data["content"] if content is None: logging.error(f"Post content is None for title '{post_data['title']}' - using fallback") content = "Content unavailable. Check the original source for details." formatted_content = "\n".join(f"
{para}
" for para in content.split('\n') if para.strip()) author_id_map = { "owenjohnson": 10, "javiermorales": 2, "aishapatel": 3, "trangnguyen": 12, "keishareid": 13, "lilamoreau": 7 } author_id = author_id_map.get(author["username"], 5) payload = { "title": post_data["title"], "content": formatted_content, "status": "publish", "categories": [category_id], "tags": tags, "author": author_id, "meta": { "original_link": link, "original_source": original_source, "interest_score": interest_score } } if image_url and not post_id: logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) if not image_id: logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay") pixabay_query = post_data["title"][:50] image_url, image_source, uploader, pixabay_url = get_image(pixabay_query) if image_url: image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) if image_id: payload["featured_media"] = image_id else: logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") endpoint = f"{wp_base_url}/posts/{post_id}" if post_id else f"{wp_base_url}/posts" method = requests.post logging.debug(f"Sending WP request to {endpoint} with payload: {json.dumps(payload, indent=2)}") response = method(endpoint, headers=headers, json=payload) response.raise_for_status() post_info = response.json() logging.debug(f"WP response: {json.dumps(post_info, indent=2)}") if not isinstance(post_info, dict) or "id" not in post_info: raise ValueError(f"Invalid WP response: {post_info}") post_id = post_info["id"] post_url = post_info["link"] # Save to recent_posts.json timestamp = datetime.now(timezone.utc).isoformat() save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) # Post article tweet to X only if post_tweet is True if post_tweet: try: post = {"title": post_data["title"], "url": post_url} tweet = generate_article_tweet(author, post, author["persona"]) if post_tweet(author, tweet): logging.info(f"Successfully posted article tweet for {author['username']} on X") else: logging.warning(f"Failed to post article tweet for {author['username']} on X") except Exception as e: logging.error(f"Error posting article tweet for {author['username']}: {e}") logging.info(f"Posted/Updated by {author['username']}: {post_data['title']} (ID: {post_id})") return post_id, post_url except requests.exceptions.RequestException as e: logging.error(f"WP API request failed: {e} - Response: {e.response.text if e.response else 'No response'}") print(f"WP Error: {e}") return None, None except KeyError as e: logging.error(f"WP payload error - Missing key: {e} - Author data: {author}") print(f"WP Error: {e}") return None, None except Exception as e: logging.error(f"WP posting failed: {e}") print(f"WP Error: {e}") return None, None def get_flickr_image_via_ddg(search_query, relevance_keywords): try: with DDGS() as ddgs: results = ddgs.images( f"{search_query} flickr site:flickr.com -poster -infographic -chart -graph -data -stats -text -typography", license_image="sharecommercially", max_results=30 ) if not results: logging.warning(f"No Flickr images found via DDG for query '{search_query}'") return None, None, None, None headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'} candidates = [] for r in results: image_url = r.get("image", "") page_url = r.get("url", "") if not image_url or "live.staticflickr.com" not in image_url: continue try: response = requests.get(page_url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') time.sleep(1) tags_elem = soup.find_all('a', class_='tag') tags = [tag.text.strip().lower() for tag in tags_elem] if tags_elem else [] title_elem = soup.find('h1', class_='photo-title') title = title_elem.text.strip().lower() if title_elem else r.get("title", "").lower() exclude_keywords = [ "poster", "infographic", "chart", "graph", "data", "stats", "text", "typography", "design", "advertisement", "illustration", "diagram", "layout", "print" ] matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] if matched_keywords: logging.info(f"Skipping text-heavy image: {image_url} (tags: {tags}, title: {title}, matched: {matched_keywords})") continue uploader = soup.find('a', class_='owner-name') uploader = uploader.text.strip() if uploader else "Flickr User" candidates.append({ "image_url": image_url, "page_url": page_url, "uploader": uploader, "tags": tags, "title": title }) except requests.exceptions.RequestException as e: logging.info(f"Skipping unavailable image: {image_url} (page: {page_url}, error: {e})") continue if not candidates: logging.warning(f"No valid candidate images after filtering for '{search_query}'") return None, None, None, None result = random.choice(candidates) image_url = result["image_url"] temp_file = None try: img_response = requests.get(image_url, headers=headers, timeout=10) img_response.raise_for_status() with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: temp_file.write(img_response.content) temp_path = temp_file.name img = Image.open(temp_path) text = pytesseract.image_to_string(img) char_count = len(text.strip()) logging.info(f"OCR processed {image_url}: {char_count} characters detected") if char_count > 200: logging.info(f"Skipping text-heavy image (OCR): {image_url} (char_count: {char_count})") return None, None, None, None flickr_data = { "title": search_query, "image_url": image_url, "source": "Flickr", "uploader": result["uploader"], "page_url": result["page_url"], "timestamp": datetime.now().isoformat(), "ocr_chars": char_count } flickr_file = "/home/shane/foodie_automator/flickr_images.json" with open(flickr_file, 'a') as f: json.dump(flickr_data, f) f.write('\n') logging.info(f"Saved Flickr image to {flickr_file}: {image_url}") logging.info(f"Fetched Flickr image URL: {image_url} by {result['uploader']} for query '{search_query}' (tags: {result['tags']})") print(f"DEBUG: Flickr image selected: {image_url}") return image_url, "Flickr", result["uploader"], result["page_url"] except requests.exceptions.HTTPError as e: if e.response.status_code == 429: logging.warning(f"Rate limit hit for {image_url}. Falling back to Pixabay.") return None, None, None, None else: logging.warning(f"Download failed for {image_url}: {e}") return None, None, None, None except Exception as e: logging.warning(f"OCR processing failed for {image_url}: {e}") return None, None, None, None finally: if temp_file and os.path.exists(temp_path): os.unlink(temp_path) except Exception as e: logging.error(f"Flickr/DDG image fetch failed for '{search_query}': {e}") return None, None, None, None def select_best_author(summary): try: response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ {"role": "system", "content": ( "Based on this restaurant/food industry trend summary, pick the most suitable author from: " "owenjohnson, javiermorales, aishapatel, trangnguyen, keishareid, lilamoreau. " "Consider their expertise: owenjohnson (global dining trends), javiermorales (food critique), " "aishapatel (emerging food trends), trangnguyen (cultural dining), keishareid (soul food heritage), " "lilamoreau (global street food). Return only the username." )}, {"role": "user", "content": summary} ], max_tokens=20 ) author = response.choices[0].message.content.strip() valid_authors = ["owenjohnson", "javiermorales", "aishapatel", "trangnguyen", "keishareid", "lilamoreau"] logging.info(f"Selected author: {author}") return author if author in valid_authors else "owenjohnson" except Exception as e: logging.error(f"Author selection failed: {e}") return "owenjohnson" def prepare_post_data(final_summary, original_title, context_info=""): innovative_title = generate_title_from_summary(final_summary) if not innovative_title: logging.info(f"Title generation failed for '{original_title}' {context_info}") return None, None, None, None, None, None, None search_query, relevance_keywords = generate_image_query(f"{innovative_title}\n\n{final_summary}") if not search_query: logging.info(f"Image query generation failed for '{innovative_title}' {context_info}") return None, None, None, None, None, None, None logging.info(f"Fetching Flickr image for query: '{search_query}' {context_info}") image_url, image_source, uploader, page_url = get_flickr_image_via_ddg(search_query, relevance_keywords) if not image_url: logging.info(f"Flickr fetch failed for '{search_query}' - falling back to Pixabay {context_info}") image_query, _ = generate_image_query(f"{innovative_title}\n\n{final_summary}") image_url, image_source, uploader, page_url = get_image(image_query) if not image_url: logging.info(f"Pixabay fetch failed for title '{innovative_title}' - falling back to summary {context_info}") image_query, _ = generate_image_query(f"{final_summary}") image_url, image_source, uploader, page_url = get_image(image_query) if not image_url: logging.info(f"Image fetch failed again for '{original_title}' - proceeding without image {context_info}") post_data = {"title": innovative_title, "content": final_summary} selected_username = select_best_author(final_summary) author = next((a for a in AUTHORS if a["username"] == selected_username), None) if not author: logging.error(f"Author '{selected_username}' not found in AUTHORS, defaulting to owenjohnson") author = {"username": "owenjohnson", "password": "rfjk xhn6 2RPy FuQ9 cGlU K8mC"} category = generate_category_from_summary(final_summary) return post_data, author, category, image_url, image_source, uploader, page_url def save_post_to_recent(post_title, post_url, author_username, timestamp): try: recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') entry = { "title": post_title, "url": post_url, "author_username": author_username, "timestamp": timestamp } recent_posts.append(entry) with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f: for item in recent_posts: json.dump(item, f) f.write('\n') logging.info(f"Saved post '{post_title}' to recent_posts.json") except Exception as e: logging.error(f"Failed to save post to recent_posts.json: {e}") def prune_recent_posts(): try: cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat() recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff] with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f: for item in recent_posts: json.dump(item, f) f.write('\n') logging.info(f"Pruned recent_posts.json to {len(recent_posts)} entries") except Exception as e: logging.error(f"Failed to prune recent_posts.json: {e}")