# foodie_automator_google.py import requests import random import time import logging import re import os import json import signal import sys from datetime import datetime, timedelta, timezone from openai import OpenAI from urllib.parse import quote from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import TimeoutException from duckduckgo_search import DDGS from foodie_config import ( AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES, get_clean_source_name, X_API_CREDENTIALS ) from foodie_utils import ( load_json_file, save_json_file, get_image, generate_image_query, upload_image_to_wp, select_best_persona, determine_paragraph_count, is_interesting, generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp, prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image ) from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import from dotenv import load_dotenv load_dotenv() is_posting = False def signal_handler(sig, frame): logging.info("Received termination signal, checking if safe to exit...") if is_posting: logging.info("Currently posting, will exit after completion.") else: logging.info("Safe to exit immediately.") sys.exit(0) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) logger = logging.getLogger() logger.setLevel(logging.INFO) file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a') file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(file_handler) console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(console_handler) logging.info("Logging initialized for foodie_automator_google.py") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_google_titles.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' EXPIRATION_HOURS = 24 IMAGE_EXPIRATION_DAYS = 7 posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) def parse_search_volume(volume_text): try: volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '') if 'k' in volume_part: volume = float(volume_part.replace('k', '')) * 1000 elif 'm' in volume_part: volume = float(volume_part.replace('m', '')) * 1000000 else: volume = float(volume_part) return volume except (ValueError, AttributeError) as e: logging.warning(f"Could not parse search volume from '{volume_text}': {e}") return 0 def scrape_google_trends(geo='US'): chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36") driver = webdriver.Chrome(options=chrome_options) try: for attempt in range(3): try: time.sleep(random.uniform(2, 5)) url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5" logging.info(f"Navigating to {url} (attempt {attempt + 1})") driver.get(url) logging.info("Waiting for page to load...") WebDriverWait(driver, 60).until( EC.presence_of_element_located((By.TAG_NAME, "tbody")) ) break except TimeoutException: logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}") if attempt == 2: logging.error(f"Failed after 3 attempts for geo={geo}") return [] time.sleep(5) driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) trends = [] rows = driver.find_elements(By.XPATH, "//tbody/tr") logging.info(f"Found {len(rows)} rows in tbody for geo={geo}") cutoff_date = datetime.now(timezone.utc) - timedelta(hours=24) for row in rows: try: columns = row.find_elements(By.TAG_NAME, "td") if len(columns) >= 3: title = columns[1].text.strip() search_volume_text = columns[2].text.strip() search_volume = parse_search_volume(search_volume_text) logging.info(f"Parsed trend: {title} with search volume: {search_volume}") if title and search_volume >= 20000: link = f"https://trends.google.com/trends/explore?q={quote(title)}&geo={geo}" trends.append({ "title": title, "link": link, "search_volume": search_volume }) logging.info(f"Added trend: {title} with search volume: {search_volume}") else: logging.info(f"Skipping trend: {title} (volume: {search_volume} < 20K or no title)") else: logging.info(f"Skipping row with insufficient columns: {len(columns)}") except Exception as e: logging.warning(f"Row processing error: {e}") continue if trends: trends.sort(key=lambda x: x["search_volume"], reverse=True) logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}") print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}") else: logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}") return trends finally: driver.quit() logging.info(f"Chrome driver closed for geo={geo}") def fetch_duckduckgo_news_context(trend_title, hours=24): try: with DDGS() as ddgs: results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5) titles = [] for r in results: try: date_str = r["date"] if '+00:00' in date_str: dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) else: dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): titles.append(r["title"].lower()) except ValueError as e: logging.warning(f"Date parsing failed for '{date_str}': {e}") continue context = " ".join(titles) if titles else "No recent news found within 24 hours" logging.info(f"DuckDuckGo News context for '{trend_title}': {context}") return context except Exception as e: logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}") return trend_title def curate_from_google_trends(geo_list=['US']): all_trends = [] for geo in geo_list: trends = scrape_google_trends(geo=geo) if trends: all_trends.extend(trends) if not all_trends: print("No Google Trends data available") logging.info("No Google Trends data available") return None, None, random.randint(600, 1800) attempts = 0 max_attempts = 10 while attempts < max_attempts and all_trends: trend = all_trends.pop(0) title = trend["title"] link = trend.get("link", "https://trends.google.com/") summary = trend.get("summary", "") source_name = "Google Trends" original_source = f'{source_name}' if title in posted_titles: print(f"Skipping already posted trend: {title}") logging.info(f"Skipping already posted trend: {title}") attempts += 1 continue print(f"Trying Google Trend: {title} from {source_name}") logging.info(f"Trying Google Trend: {title} from {source_name}") image_query, relevance_keywords, skip = smart_image_and_filter(title, summary) if skip: print(f"Skipping filtered Google Trend: {title}") logging.info(f"Skipping filtered Google Trend: {title}") attempts += 1 continue scoring_content = f"{title}\n\n{summary}" interest_score = is_interesting(scoring_content) logging.info(f"Interest score for '{title}': {interest_score}") if interest_score < 6: print(f"Google Trends Interest Too Low: {interest_score}") logging.info(f"Google Trends Interest Too Low: {interest_score}") attempts += 1 continue num_paragraphs = determine_paragraph_count(interest_score) extra_prompt = ( f"Generate exactly {num_paragraphs} paragraphs.\n" f"FOCUS: Summarize ONLY the provided content, explicitly mentioning '{title}' and sticking to its specific topic and details.\n" f"Do NOT introduce unrelated concepts.\n" f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" f"Do not include emojis in the summary." ) content_to_summarize = scoring_content final_summary = summarize_with_gpt4o( content_to_summarize, source_name, link, interest_score=interest_score, extra_prompt=extra_prompt ) if not final_summary: logging.info(f"Summary failed for '{title}'") attempts += 1 continue final_summary = insert_link_naturally(final_summary, source_name, link) post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title) if not post_data: attempts += 1 continue image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords) if not image_url: image_url, image_source, uploader, page_url = get_image(image_query) # Log the fetched image details logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") hook = get_dynamic_hook(post_data["title"]).strip() # Generate viral share prompt share_prompt = get_viral_share_prompt(post_data["title"], final_summary) share_links_template = ( f'
' ) post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting is_posting = True try: post_id, post_url = post_to_wp( post_data=post_data, category=category, link=link, author=author, image_url=image_url, original_source=original_source, image_source=image_source, uploader=uploader, pixabay_url=pixabay_url, interest_score=interest_score, should_post_tweet=True ) finally: is_posting = False if post_id: share_text = f"Check out this foodie gem! {post_data['title']}" share_text_encoded = quote(share_text) post_url_encoded = quote(post_url) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) post_data["content"] = f"{final_summary}\n\n{share_links}" is_posting = True try: post_to_wp( post_data=post_data, category=category, link=link, author=author, image_url=image_url, original_source=original_source, image_source=image_source, uploader=uploader, pixabay_url=pixabay_url, interest_score=interest_score, post_id=post_id, should_post_tweet=False ) finally: is_posting = False timestamp = datetime.now(timezone.utc).isoformat() save_json_file(POSTED_TITLES_FILE, title, timestamp) posted_titles.add(title) logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") if image_url: # Check if image is already used used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) used_image_urls = {entry["title"] for entry in used_images_list} if image_url in used_image_urls: logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image") image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords) if not image_url: image_url, image_source, uploader, page_url = get_image(image_query) logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") return post_data, category, random.randint(0, 1800) attempts += 1 logging.info(f"WP posting failed for '{post_data['title']}'") print("No interesting Google Trend found after attempts") logging.info("No interesting Google Trend found after attempts") return None, None, random.randint(600, 1800) def run_google_trends_automator(): logging.info("***** Google Trends Automator Launched *****") geo_list = ['US', 'GB', 'AU'] post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list) if sleep_time is None: sleep_time = random.randint(600, 1800) print(f"Sleeping for {sleep_time}s") logging.info(f"Completed run with sleep time: {sleep_time} seconds") time.sleep(sleep_time) return post_data, category, sleep_time if __name__ == "__main__": run_google_trends_automator()