# foodie_automator_google.py import requests import random import time import logging import re import os import json import signal import sys from datetime import datetime, timedelta, timezone from openai import OpenAI from urllib.parse import quote from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import TimeoutException from duckduckgo_search import DDGS from foodie_config import ( AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES, get_clean_source_name, X_API_CREDENTIALS ) from foodie_utils import ( load_json_file, save_json_file, get_image, generate_image_query, upload_image_to_wp, determine_paragraph_count, insert_link_naturally, is_interesting, generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image, get_next_author_round_robin, fetch_duckduckgo_news_context, check_author_rate_limit ) from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from dotenv import load_dotenv import fcntl load_dotenv() is_posting = False LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock" def signal_handler(sig, frame): logging.info("Received termination signal, checking if safe to exit...") if is_posting: logging.info("Currently posting, will exit after completion.") else: logging.info("Safe to exit immediately.") sys.exit(0) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_google.log" LOG_PRUNE_DAYS = 30 MAX_RETRIES = 3 RETRY_BACKOFF = 2 posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) def setup_logging(): if os.path.exists(LOG_FILE): with open(LOG_FILE, 'r') as f: lines = f.readlines() log_entries = [] current_entry = [] timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') for line in lines: if timestamp_pattern.match(line): if current_entry: log_entries.append(''.join(current_entry)) current_entry = [line] else: current_entry.append(line) if current_entry: log_entries.append(''.join(current_entry)) cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) pruned_entries = [] for entry in log_entries: try: timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) if timestamp > cutoff: pruned_entries.append(entry) except ValueError: logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...") continue with open(LOG_FILE, 'w') as f: f.writelines(pruned_entries) logger = logging.getLogger() logger.setLevel(logging.INFO) file_handler = logging.FileHandler(LOG_FILE, mode='a') file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(file_handler) console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(console_handler) logging.info("Logging initialized for foodie_automator_google.py") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' EXPIRATION_HOURS = 24 IMAGE_EXPIRATION_DAYS = 7 posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) def acquire_lock(): os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) lock_fd = open(LOCK_FILE, 'w') try: fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) lock_fd.write(str(os.getpid())) lock_fd.flush() return lock_fd except IOError: logging.info("Another instance of foodie_automator_google.py is running") sys.exit(0) def parse_search_volume(volume_text): try: volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '') if 'k' in volume_part: volume = float(volume_part.replace('k', '')) * 1000 elif 'm' in volume_part: volume = float(volume_part.replace('m', '')) * 1000000 else: volume = float(volume_part) return volume except (ValueError, AttributeError) as e: logging.warning(f"Could not parse search volume from '{volume_text}': {e}") return 0 def scrape_google_trends(geo='US'): chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36") driver = None try: for attempt in range(MAX_RETRIES): try: driver = webdriver.Chrome(options=chrome_options) time.sleep(random.uniform(2, 5)) url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5" logging.info(f"Navigating to {url} (attempt {attempt + 1})") driver.get(url) logging.info("Waiting for page to load...") WebDriverWait(driver, 60).until( EC.presence_of_element_located((By.TAG_NAME, "tbody")) ) break except TimeoutException: logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}") if attempt == MAX_RETRIES - 1: logging.error(f"Failed after {MAX_RETRIES} attempts for geo={geo}") return [] time.sleep(RETRY_BACKOFF * (2 ** attempt)) if driver: driver.quit() continue driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) trends = [] rows = driver.find_elements(By.XPATH, "//tbody/tr") logging.info(f"Found {len(rows)} rows in tbody for geo={geo}") cutoff_date = datetime.now(timezone.utc) - timedelta(hours=24) for row in rows: try: columns = row.find_elements(By.TAG_NAME, "td") if len(columns) >= 3: title = columns[1].text.strip() search_volume_text = columns[2].text.strip() search_volume = parse_search_volume(search_volume_text) logging.info(f"Parsed trend: {title} with search volume: {search_volume}") if title and search_volume >= 20000: link = f"https://trends.google.com/trends/explore?q={quote(title)}&geo={geo}" trends.append({ "title": title, "link": link, "search_volume": search_volume }) logging.info(f"Added trend: {title} with search volume: {search_volume}") else: logging.info(f"Skipping trend: {title} (volume: {search_volume} < 20K or no title)") else: logging.info(f"Skipping row with insufficient columns: {len(columns)}") except Exception as e: logging.warning(f"Row processing error: {e}") continue if trends: trends.sort(key=lambda x: x["search_volume"], reverse=True) logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}") else: logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}") return trends except Exception as e: logging.error(f"Unexpected error in scrape_google_trends: {e}", exc_info=True) return [] finally: if driver: driver.quit() logging.info(f"Chrome driver closed for geo={geo}") def fetch_duckduckgo_news_context(trend_title, hours=24): for attempt in range(MAX_RETRIES): try: with DDGS() as ddgs: results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5) titles = [] for r in results: try: date_str = r["date"] if '+00:00' in date_str: dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) else: dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): titles.append(r["title"].lower()) except ValueError as e: logging.warning(f"Date parsing failed for '{date_str}': {e}") continue context = " ".join(titles) if titles else "No recent news found within 24 hours" logging.info(f"DuckDuckGo News context for '{trend_title}': {context}") return context except Exception as e: logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}' (attempt {attempt + 1}): {e}") if attempt < MAX_RETRIES - 1: time.sleep(RETRY_BACKOFF * (2 ** attempt)) continue logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts") return trend_title def curate_from_google_trends(): try: global posted_titles_data, posted_titles, used_images posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") trends = fetch_google_trends() if not trends: print("No Google Trends data available") logging.info("No Google Trends data available") return None, None, False attempts = 0 max_attempts = 10 while attempts < max_attempts and trends: trend = trends.pop(0) title = trend["title"] link = trend.get("link", "") summary = trend.get("summary", "") source_name = trend.get("source", "Google Trends") original_source = f'{source_name}' if title in posted_titles: print(f"Skipping already posted trend: {title}") logging.info(f"Skipping already posted trend: {title}") attempts += 1 continue print(f"Trying Google Trend: {title} from {source_name}") logging.info(f"Trying Google Trend: {title} from {source_name}") try: image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) except Exception as e: print(f"Smart image/filter error for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 continue if skip: print(f"Skipping filtered trend: {title}") logging.info(f"Skipping filtered trend: {title}") attempts += 1 continue ddg_context = fetch_duckduckgo_news_context(title) scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" interest_score = is_interesting(scoring_content) print(f"Interest Score for '{title[:50]}...': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}") if interest_score < 6: print(f"Trend Interest Too Low: {interest_score}") logging.info(f"Trend Interest Too Low: {interest_score}") attempts += 1 continue num_paragraphs = determine_paragraph_count(interest_score) extra_prompt = ( f"Generate exactly {num_paragraphs} paragraphs.\n" f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" f"Do not include emojis in the summary." ) content_to_summarize = scoring_content final_summary = summarize_with_gpt4o( content_to_summarize, source_name, link, interest_score=interest_score, extra_prompt=extra_prompt ) if not final_summary: print(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'") attempts += 1 continue final_summary = insert_link_naturally(final_summary, source_name, link) # Use round-robin author selection author = get_next_author_round_robin() author_username = author["username"] logging.info(f"Selected author via round-robin: {author_username}") post_data = { "title": generate_title_from_summary(final_summary), "content": final_summary, "status": "publish", "author": author_username, "categories": [generate_category_from_summary(final_summary)] } category = post_data["categories"][0] image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) if not image_url: print(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") image_url, image_source, uploader, page_url = get_image(image_query) if not image_url: print(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None uploader = None page_url = None hook = get_dynamic_hook(post_data["title"]).strip() share_prompt = get_viral_share_prompt(post_data["title"], final_summary) share_links_template = ( f'
' ) post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting is_posting = True try: post_id, post_url = post_to_wp( post_data=post_data, category=category, link=link, author=author, image_url=image_url, original_source=original_source, image_source=image_source, uploader=uploader, page_url=page_url, interest_score=interest_score, should_post_tweet=True ) if not post_id: print(f"Failed to post to WordPress for '{title}'") logging.warning(f"Failed to post to WordPress for '{title}'") attempts += 1 continue except Exception as e: print(f"WordPress posting error for '{title}': {e}") logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) attempts += 1 continue finally: is_posting = False if post_id: share_text = f"Check out this foodie gem! {post_data['title']}" share_text_encoded = quote(share_text) post_url_encoded = quote(post_url) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) post_data["content"] = f"{final_summary}\n\n{share_links}" is_posting = True try: post_to_wp( post_data=post_data, category=category, link=link, author=author, image_url=image_url, original_source=original_source, image_source=image_source, uploader=uploader, page_url=page_url, interest_score=interest_score, post_id=post_id, should_post_tweet=False ) except Exception as e: print(f"Failed to update WordPress post '{title}' with share links: {e}") logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) finally: is_posting = False timestamp = datetime.now(timezone.utc).isoformat() save_json_file(POSTED_TITLES_FILE, title, timestamp) posted_titles.add(title) print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") if image_url: save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") return post_data, category, True attempts += 1 print(f"WP posting failed for '{post_data['title']}'") logging.info(f"WP posting failed for '{post_data['title']}'") print("No interesting Google Trend found after attempts") logging.info("No interesting Google Trend found after attempts") return None, None, False except Exception as e: logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True) print(f"Unexpected error in curate_from_google_trends: {e}") return None, None, False def run_google_trends_automator(): lock_fd = None try: lock_fd = acquire_lock() logging.info("***** Google Trends Automator Launched *****") geo_list = ['US', 'GB', 'AU'] post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list) if not post_data: logging.info("No postable Google Trend found") else: logging.info("Completed Google Trends run") return post_data, category, should_continue except Exception as e: logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True) return None, None, False finally: if lock_fd: fcntl.flock(lock_fd, fcntl.LOCK_UN) lock_fd.close() os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None if __name__ == "__main__": setup_logging() post_data, category, should_continue = run_google_trends_automator() logging.info(f"Run completed, should_continue: {should_continue}")