diff --git a/foodie_automator_google.py b/foodie_automator_google.py index 804fdfc..4ab3403 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -29,12 +29,14 @@ from foodie_utils import ( generate_category_from_summary, post_to_wp, prepare_post_data, smart_image_and_filter, insert_link_naturally, get_flickr_image ) -from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import +from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from dotenv import load_dotenv +import fcntl load_dotenv() is_posting = False +LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock" def signal_handler(sig, frame): logging.info("Received termination signal, checking if safe to exit...") @@ -47,15 +49,58 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) -logger = logging.getLogger() -logger.setLevel(logging.INFO) -file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a') -file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) -logger.addHandler(file_handler) -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) -logger.addHandler(console_handler) -logging.info("Logging initialized for foodie_automator_google.py") +LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_google.log" +LOG_PRUNE_DAYS = 30 +MAX_RETRIES = 3 +RETRY_BACKOFF = 2 + +posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) +posted_titles = set(entry["title"] for entry in posted_titles_data) +used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) + +def setup_logging(): + if os.path.exists(LOG_FILE): + with open(LOG_FILE, 'r') as f: + lines = f.readlines() + + log_entries = [] + current_entry = [] + timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') + + for line in lines: + if timestamp_pattern.match(line): + if current_entry: + log_entries.append(''.join(current_entry)) + current_entry = [line] + else: + current_entry.append(line) + + if current_entry: + log_entries.append(''.join(current_entry)) + + cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) + pruned_entries = [] + for entry in log_entries: + try: + timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) + if timestamp > cutoff: + pruned_entries.append(entry) + except ValueError: + logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...") + continue + + with open(LOG_FILE, 'w') as f: + f.writelines(pruned_entries) + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + file_handler = logging.FileHandler(LOG_FILE, mode='a') + file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger.addHandler(file_handler) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger.addHandler(console_handler) + logging.info("Logging initialized for foodie_automator_google.py") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) @@ -68,6 +113,18 @@ posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) posted_titles = set(entry["title"] for entry in posted_titles_data) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) +def acquire_lock(): + os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) + lock_fd = open(LOCK_FILE, 'w') + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + lock_fd.write(str(os.getpid())) + lock_fd.flush() + return lock_fd + except IOError: + logging.info("Another instance of foodie_automator_google.py is running") + sys.exit(0) + def parse_search_volume(volume_text): try: volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '') @@ -89,10 +146,11 @@ def scrape_google_trends(geo='US'): chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36") - driver = webdriver.Chrome(options=chrome_options) + driver = None try: - for attempt in range(3): + for attempt in range(MAX_RETRIES): try: + driver = webdriver.Chrome(options=chrome_options) time.sleep(random.uniform(2, 5)) url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5" logging.info(f"Navigating to {url} (attempt {attempt + 1})") @@ -105,10 +163,13 @@ def scrape_google_trends(geo='US'): break except TimeoutException: logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}") - if attempt == 2: - logging.error(f"Failed after 3 attempts for geo={geo}") + if attempt == MAX_RETRIES - 1: + logging.error(f"Failed after {MAX_RETRIES} attempts for geo={geo}") return [] - time.sleep(5) + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + if driver: + driver.quit() + continue driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) @@ -145,157 +206,137 @@ def scrape_google_trends(geo='US'): if trends: trends.sort(key=lambda x: x["search_volume"], reverse=True) logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}") - print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}") else: logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}") return trends + except Exception as e: + logging.error(f"Unexpected error in scrape_google_trends: {e}", exc_info=True) + return [] finally: - driver.quit() - logging.info(f"Chrome driver closed for geo={geo}") + if driver: + driver.quit() + logging.info(f"Chrome driver closed for geo={geo}") def fetch_duckduckgo_news_context(trend_title, hours=24): - try: - with DDGS() as ddgs: - results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5) - titles = [] - for r in results: - try: - date_str = r["date"] - if '+00:00' in date_str: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) - else: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) - if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): - titles.append(r["title"].lower()) - except ValueError as e: - logging.warning(f"Date parsing failed for '{date_str}': {e}") - continue - context = " ".join(titles) if titles else "No recent news found within 24 hours" - logging.info(f"DuckDuckGo News context for '{trend_title}': {context}") - return context - except Exception as e: - logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}") - return trend_title - -def curate_from_google_trends(geo_list=['US']): - all_trends = [] - for geo in geo_list: - trends = scrape_google_trends(geo=geo) - if trends: - all_trends.extend(trends) - - if not all_trends: - print("No Google Trends data available") - logging.info("No Google Trends data available") - return None, None, random.randint(600, 1800) - - attempts = 0 - max_attempts = 10 - while attempts < max_attempts and all_trends: - trend = all_trends.pop(0) - title = trend["title"] - link = trend.get("link", "https://trends.google.com/") - summary = trend.get("summary", "") - source_name = "Google Trends" - original_source = f'{source_name}' - - if title in posted_titles: - print(f"Skipping already posted trend: {title}") - logging.info(f"Skipping already posted trend: {title}") - attempts += 1 + for attempt in range(MAX_RETRIES): + try: + with DDGS() as ddgs: + results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5) + titles = [] + for r in results: + try: + date_str = r["date"] + if '+00:00' in date_str: + dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) + else: + dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) + if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): + titles.append(r["title"].lower()) + except ValueError as e: + logging.warning(f"Date parsing failed for '{date_str}': {e}") + continue + context = " ".join(titles) if titles else "No recent news found within 24 hours" + logging.info(f"DuckDuckGo News context for '{trend_title}': {context}") + return context + except Exception as e: + logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}' (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) continue + logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts") + return trend_title - print(f"Trying Google Trend: {title} from {source_name}") - logging.info(f"Trying Google Trend: {title} from {source_name}") +def curate_from_google_trends(geo_list=['US']): + try: + all_trends = [] + for geo in geo_list: + trends = scrape_google_trends(geo=geo) + if trends: + all_trends.extend(trends) + + if not all_trends: + logging.info("No Google Trends data available") + return None, None, False + + attempts = 0 + max_attempts = 10 + while attempts < max_attempts and all_trends: + trend = all_trends.pop(0) + title = trend["title"] + link = trend.get("link", "https://trends.google.com/") + summary = trend.get("summary", "") + source_name = "Google Trends" + original_source = f'{source_name}' + + if title in posted_titles: + logging.info(f"Skipping already posted trend: {title}") + attempts += 1 + continue - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) - if skip: - print(f"Skipping filtered Google Trend: {title}") - logging.info(f"Skipping filtered Google Trend: {title}") - attempts += 1 - continue + logging.info(f"Trying Google Trend: {title} from {source_name}") - ddg_context = fetch_duckduckgo_news_context(title) - scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" - interest_score = is_interesting(scoring_content) - logging.info(f"Interest score for '{title}': {interest_score}") - if interest_score < 6: - print(f"Google Trends Interest Too Low: {interest_score}") - logging.info(f"Google Trends Interest Too Low: {interest_score}") - attempts += 1 - continue + image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + if skip: + logging.info(f"Skipping filtered Google Trend: {title}") + attempts += 1 + continue - num_paragraphs = determine_paragraph_count(interest_score) - extra_prompt = ( - f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" - f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" - f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" - f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" - f"Do not include emojis in the summary." - ) - content_to_summarize = scoring_content - final_summary = summarize_with_gpt4o( - content_to_summarize, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - if not final_summary: - logging.info(f"Summary failed for '{title}'") - attempts += 1 - continue + ddg_context = fetch_duckduckgo_news_context(title) + scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" + interest_score = is_interesting(scoring_content) + logging.info(f"Interest score for '{title}': {interest_score}") + if interest_score < 6: + logging.info(f"Google Trends Interest Too Low: {interest_score}") + attempts += 1 + continue - final_summary = insert_link_naturally(final_summary, source_name, link) + num_paragraphs = determine_paragraph_count(interest_score) + extra_prompt = ( + f"Generate exactly {num_paragraphs} paragraphs.\n" + f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" + f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" + f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" + f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" + f"Do not include emojis in the summary." + ) + content_to_summarize = scoring_content + final_summary = summarize_with_gpt4o( + content_to_summarize, + source_name, + link, + interest_score=interest_score, + extra_prompt=extra_prompt + ) + if not final_summary: + logging.info(f"Summary failed for '{title}'") + attempts += 1 + continue - post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) - if not post_data: - attempts += 1 - continue + final_summary = insert_link_naturally(final_summary, source_name, link) - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) - if not image_url: - image_url, image_source, uploader, page_url = get_image(image_query) + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) + if not post_data: + attempts += 1 + continue - hook = get_dynamic_hook(post_data["title"]).strip() + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) + if not image_url: + image_url, image_source, uploader, page_url = get_image(image_query) - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) - share_links_template = ( - f'

{share_prompt} ' - f' ' - f'

' - ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" + hook = get_dynamic_hook(post_data["title"]).strip() - global is_posting - is_posting = True - try: - post_id, post_url = post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - should_post_tweet=True + share_prompt = get_viral_share_prompt(post_data["title"], final_summary) + share_links_template = ( + f'

{share_prompt} ' + f' ' + f'

' ) - finally: - is_posting = False - - if post_id: - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" + post_data["content"] = f"{final_summary}\n\n{share_links_template}" + + global is_posting is_posting = True try: - post_to_wp( + post_id, post_url = post_to_wp( post_data=post_data, category=category, link=link, @@ -306,43 +347,86 @@ def curate_from_google_trends(geo_list=['US']): uploader=uploader, page_url=page_url, interest_score=interest_score, - post_id=post_id, - should_post_tweet=False + should_post_tweet=True ) + except Exception as e: + logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) + attempts += 1 + continue finally: is_posting = False - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") - return post_data, category, random.randint(0, 1800) + if post_id: + share_text = f"Check out this foodie gem! {post_data['title']}" + share_text_encoded = quote(share_text) + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + is_posting = True + try: + post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) + except Exception as e: + logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) + finally: + is_posting = False + + timestamp = datetime.now(timezone.utc).isoformat() + save_json_file(POSTED_TITLES_FILE, title, timestamp) + posted_titles.add(title) + logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") + + if image_url: + save_json_file(USED_IMAGES_FILE, image_url, timestamp) + used_images.add(image_url) + logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + + logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") + return post_data, category, True - attempts += 1 - logging.info(f"WP posting failed for '{post_data['title']}'") + attempts += 1 + logging.info(f"WP posting failed for '{post_data['title']}'") - print("No interesting Google Trend found after attempts") - logging.info("No interesting Google Trend found after attempts") - return None, None, random.randint(600, 1800) + logging.info("No interesting Google Trend found after attempts") + return None, None, False + except Exception as e: + logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True) + return None, None, False def run_google_trends_automator(): - logging.info("***** Google Trends Automator Launched *****") - geo_list = ['US', 'GB', 'AU'] - post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list) - if sleep_time is None: - sleep_time = random.randint(600, 1800) - print(f"Sleeping for {sleep_time}s") - logging.info(f"Completed run with sleep time: {sleep_time} seconds") - time.sleep(sleep_time) - return post_data, category, sleep_time + lock_fd = None + try: + lock_fd = acquire_lock() + logging.info("***** Google Trends Automator Launched *****") + geo_list = ['US', 'GB', 'AU'] + post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list) + if not post_data: + logging.info("No postable Google Trend found") + else: + logging.info("Completed Google Trends run") + return post_data, category, should_continue + except Exception as e: + logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True) + return None, None, False + finally: + if lock_fd: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + lock_fd.close() + os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None if __name__ == "__main__": - run_google_trends_automator() \ No newline at end of file + setup_logging() + post_data, category, should_continue = run_google_trends_automator() + logging.info(f"Run completed, should_continue: {should_continue}") \ No newline at end of file diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index cf27c45..8fbc926 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -29,11 +29,13 @@ from foodie_utils import ( prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image ) -from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import +from foodie_hooks import get_dynamic_hook, get_viral_share_prompt +import fcntl load_dotenv() is_posting = False +LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_reddit.lock" def signal_handler(sig, frame): logging.info("Received termination signal, checking if safe to exit...") @@ -46,8 +48,22 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) -LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log" +LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_reddit.log" LOG_PRUNE_DAYS = 30 +MAX_RETRIES = 3 +RETRY_BACKOFF = 2 + +POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json' +USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' +EXPIRATION_HOURS = 24 +IMAGE_EXPIRATION_DAYS = 7 + +posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) +posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry) +used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) +used_images = set(entry["title"] for entry in used_images_data if "title" in entry) + +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def setup_logging(): if os.path.exists(LOG_FILE): @@ -59,7 +75,7 @@ def setup_logging(): timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') for line in lines: - if timestamp_pattern.match(line): + if(timestamp_pattern.match(line)): if current_entry: log_entries.append(''.join(current_entry)) current_entry = [line] @@ -95,19 +111,17 @@ def setup_logging(): logging.getLogger().addHandler(console_handler) logging.info("Logging initialized for foodie_automator_reddit.py") -setup_logging() - -POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json' -USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' -EXPIRATION_HOURS = 24 -IMAGE_EXPIRATION_DAYS = 7 - -posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) -posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry) -used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) -used_images = set(entry["title"] for entry in used_images_data if "title" in entry) - -client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) +def acquire_lock(): + os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) + lock_fd = open(LOCK_FILE, 'w') + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + lock_fd.write(str(os.getpid())) + lock_fd.flush() + return lock_fd + except IOError: + logging.info("Another instance of foodie_automator_reddit.py is running") + sys.exit(0) def clean_reddit_title(title): cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip() @@ -115,253 +129,246 @@ def clean_reddit_title(title): return cleaned_title def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments): - try: - content = f"Title: {title}\n\nContent: {summary}" - if top_comments: - content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" - - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": ( - "Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). " - "Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). " - "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. " - "Consider comments for added context (e.g., specific locations or unique details). " - "Return only a number." - )}, - {"role": "user", "content": content} - ], - max_tokens=5 - ) - base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0 + for attempt in range(MAX_RETRIES): + try: + content = f"Title: {title}\n\nContent: {summary}" + if top_comments: + content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" + + response = client.chat.completions.create( + model=LIGHT_TASK_MODEL, + messages=[ + {"role": "system", "content": ( + "Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). " + "Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). " + "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. " + "Consider comments for added context (e.g., specific locations or unique details). " + "Return only a number" + )}, + {"role": "user", "content": content} + ], + max_tokens=5 + ) + base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0 - engagement_boost = 0 - if upvotes >= 500: - engagement_boost += 3 - elif upvotes >= 100: - engagement_boost += 2 - elif upvotes >= 50: - engagement_boost += 1 - - if comment_count >= 100: - engagement_boost += 2 - elif comment_count >= 20: - engagement_boost += 1 + engagement_boost = 0 + if upvotes >= 500: + engagement_boost += 3 + elif upvotes >= 100: + engagement_boost += 2 + elif upvotes >= 50: + engagement_boost += 1 + + if comment_count >= 100: + engagement_boost += 2 + elif comment_count >= 20: + engagement_boost += 1 - final_score = min(base_score + engagement_boost, 10) - logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'") - print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})") - return final_score - except Exception as e: - logging.error(f"Reddit interestingness scoring failed: {e}") - print(f"Reddit Interest Error: {e}") - return 0 + final_score = min(base_score + engagement_boost, 10) + logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'") + return final_score + except Exception as e: + logging.warning(f"Reddit interestingness scoring failed (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + continue + logging.error(f"Failed to score Reddit post '{title}' after {MAX_RETRIES} attempts") + return 0 def get_top_comments(post_url, reddit, limit=3): - try: - submission = reddit.submission(url=post_url) - submission.comment_sort = 'top' - submission.comments.replace_more(limit=0) - top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')] - logging.info(f"Fetched {len(top_comments)} top comments for {post_url}") - return top_comments - except Exception as e: - logging.error(f"Failed to fetch comments for {post_url}: {e}") - return [] - + for attempt in range(MAX_RETRIES): + try: + submission = reddit.submission(url=post_url) + submission.comment_sort = 'top' + submission.comments.replace_more(limit=0) + top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')] + logging.info(f"Fetched {len(top_comments)} top comments for {post_url}") + return top_comments + except Exception as e: + logging.warning(f"Failed to fetch comments for {post_url} (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + continue + logging.error(f"Failed to fetch comments for {post_url} after {MAX_RETRIES} attempts") + return [] + def fetch_duckduckgo_news_context(title, hours=24): + for attempt in range(MAX_RETRIES): + try: + with DDGS() as ddgs: + results = ddgs.news(f"{title} news", timelimit="d", max_results=5) + titles = [] + for r in results: + try: + date_str = r["date"] + if '+00:00' in date_str: + dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) + else: + dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) + if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): + titles.append(r["title"].lower()) + except ValueError as e: + logging.warning(f"Date parsing failed for '{date_str}': {e}") + continue + context = " ".join(titles) if titles else "No recent news found within 24 hours" + logging.info(f"DuckDuckGo News context for '{title}': {context}") + return context + except Exception as e: + logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + continue + logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts") + return title + +def fetch_reddit_posts(): try: - with DDGS() as ddgs: - results = ddgs.news(f"{title} news", timelimit="d", max_results=5) - titles = [] - for r in results: + reddit = praw.Reddit( + client_id=REDDIT_CLIENT_ID, + client_secret=REDDIT_CLIENT_SECRET, + user_agent=REDDIT_USER_AGENT + ) + feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food'] + articles = [] + cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS) + + logging.info(f"Starting fetch with cutoff date: {cutoff_date}") + for subreddit_name in feeds: + for attempt in range(MAX_RETRIES): try: - date_str = r["date"] - if '+00:00' in date_str: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) - else: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) - if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): - titles.append(r["title"].lower()) - except ValueError as e: - logging.warning(f"Date parsing failed for '{date_str}': {e}") + subreddit = reddit.subreddit(subreddit_name) + for submission in subreddit.top(time_filter='day', limit=100): + pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc) + if pub_date < cutoff_date: + logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})") + continue + cleaned_title = clean_reddit_title(submission.title) + articles.append({ + "title": cleaned_title, + "raw_title": submission.title, + "link": f"https://www.reddit.com{submission.permalink}", + "summary": submission.selftext, + "feed_title": get_clean_source_name(subreddit_name), + "pub_date": pub_date, + "upvotes": submission.score, + "comment_count": submission.num_comments + }) + logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}") + break + except Exception as e: + logging.error(f"Failed to fetch Reddit feed r/{subreddit_name} (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) continue - context = " ".join(titles) if titles else "No recent news found within 24 hours" - logging.info(f"DuckDuckGo News context for '{title}': {context}") - return context + logging.info(f"Total Reddit posts fetched: {len(articles)}") + return articles except Exception as e: - logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}") - return title - -def fetch_reddit_posts(): - reddit = praw.Reddit( - client_id=REDDIT_CLIENT_ID, - client_secret=REDDIT_CLIENT_SECRET, - user_agent=REDDIT_USER_AGENT - ) - feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food'] - articles = [] - cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS) - - logging.info(f"Starting fetch with cutoff date: {cutoff_date}") - for subreddit_name in feeds: - try: - subreddit = reddit.subreddit(subreddit_name) - for submission in subreddit.top(time_filter='day', limit=100): - pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc) - if pub_date < cutoff_date: - logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})") - continue - cleaned_title = clean_reddit_title(submission.title) - articles.append({ - "title": cleaned_title, - "raw_title": submission.title, - "link": f"https://www.reddit.com{submission.permalink}", - "summary": submission.selftext, - "feed_title": get_clean_source_name(subreddit_name), - "pub_date": pub_date, - "upvotes": submission.score, - "comment_count": submission.num_comments - }) - logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}") - except Exception as e: - logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}") - - logging.info(f"Total Reddit posts fetched: {len(articles)}") - return articles + logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True) + return [] def curate_from_reddit(): - articles = fetch_reddit_posts() - if not articles: - print("No Reddit posts available") - logging.info("No Reddit posts available") - return None, None, random.randint(600, 1800) + try: + articles = fetch_reddit_posts() + if not articles: + logging.info("No Reddit posts available") + return None, None, False - articles.sort(key=lambda x: x["upvotes"], reverse=True) - - reddit = praw.Reddit( - client_id=REDDIT_CLIENT_ID, - client_secret=REDDIT_CLIENT_SECRET, - user_agent=REDDIT_USER_AGENT - ) - - attempts = 0 - max_attempts = 10 - while attempts < max_attempts and articles: - article = articles.pop(0) - title = article["title"] - raw_title = article["raw_title"] - link = article["link"] - summary = article["summary"] - source_name = "Reddit" - original_source = 'Reddit' - - if raw_title in posted_titles: - print(f"Skipping already posted post: {raw_title}") - logging.info(f"Skipping already posted post: {raw_title}") - attempts += 1 - continue - - print(f"Trying Reddit Post: {title} from {source_name}") - logging.info(f"Trying Reddit Post: {title} from {source_name}") - - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) - if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]): - print(f"Skipping filtered Reddit post: {title}") - logging.info(f"Skipping filtered Reddit post: {title}") - attempts += 1 - continue - - top_comments = get_top_comments(link, reddit, limit=3) - ddg_context = fetch_duckduckgo_news_context(title) - content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}" - interest_score = is_interesting_reddit( - title, - summary, - article["upvotes"], - article["comment_count"], - top_comments - ) - logging.info(f"Interest Score: {interest_score} for '{title}'") - if interest_score < 6: - print(f"Reddit Interest Too Low: {interest_score}") - logging.info(f"Reddit Interest Too Low: {interest_score}") - attempts += 1 - continue + articles.sort(key=lambda x: x["upvotes"], reverse=True) - num_paragraphs = determine_paragraph_count(interest_score) - extra_prompt = ( - f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" - f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n" - f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" - f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n" - f"If brief, expand on the core idea with relevant context about its appeal or significance.\n" - f"Do not include emojis in the summary." + reddit = praw.Reddit( + client_id=REDDIT_CLIENT_ID, + client_secret=REDDIT_CLIENT_SECRET, + user_agent=REDDIT_USER_AGENT ) - final_summary = summarize_with_gpt4o( - content_to_summarize, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - if not final_summary: - logging.info(f"Summary failed for '{title}'") - attempts += 1 - continue - - final_summary = insert_link_naturally(final_summary, source_name, link) - - post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) - if not post_data: - attempts += 1 - continue - - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) - if not image_url: - image_url, image_source, uploader, page_url = get_image(image_query) - - hook = get_dynamic_hook(post_data["title"]).strip() - - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) - share_links_template = ( - f'

{share_prompt} ' - f' ' - f'

' - ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" - - global is_posting - is_posting = True - try: - post_id, post_url = post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, + attempts = 0 + max_attempts = 10 + while attempts < max_attempts and articles: + article = articles.pop(0) + title = article["title"] + raw_title = article["raw_title"] + link = article["link"] + summary = article["summary"] + source_name = "Reddit" + original_source = 'Reddit' + + if raw_title in posted_titles: + logging.info(f"Skipping already posted post: {raw_title}") + attempts += 1 + continue + + logging.info(f"Trying Reddit Post: {title} from {source_name}") + + image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]): + logging.info(f"Skipping filtered Reddit post: {title}") + attempts += 1 + continue + + top_comments = get_top_comments(link, reddit, limit=3) + ddg_context = fetch_duckduckgo_news_context(title) + content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}" + interest_score = is_interesting_reddit( + title, + summary, + article["upvotes"], + article["comment_count"], + top_comments + ) + logging.info(f"Interest Score: {interest_score} for '{title}'") + if interest_score < 6: + logging.info(f"Reddit Interest Too Low: {interest_score}") + attempts += 1 + continue + + num_paragraphs = determine_paragraph_count(interest_score) + extra_prompt = ( + f"Generate exactly {num_paragraphs} paragraphs.\n" + f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" + f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n" + f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" + f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n" + f"If brief, expand on the core idea with relevant context about its appeal or significance.\n" + f"Do not include emojis in the summary." + ) + + final_summary = summarize_with_gpt4o( + content_to_summarize, + source_name, + link, interest_score=interest_score, - should_post_tweet=True + extra_prompt=extra_prompt ) - finally: - is_posting = False - - if post_id: - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" + if not final_summary: + logging.info(f"Summary failed for '{title}'") + attempts += 1 + continue + + final_summary = insert_link_naturally(final_summary, source_name, link) + + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) + if not post_data: + attempts += 1 + continue + + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) + if not image_url: + image_url, image_source, uploader, page_url = get_image(image_query) + + hook = get_dynamic_hook(post_data["title"]).strip() + + share_prompt = get_viral_share_prompt(post_data["title"], final_summary) + share_links_template = ( + f'

{share_prompt} ' + f' ' + f'

' + ) + post_data["content"] = f"{final_summary}\n\n{share_links_template}" + + global is_posting is_posting = True try: - post_to_wp( + post_id, post_url = post_to_wp( post_data=post_data, category=category, link=link, @@ -372,49 +379,84 @@ def curate_from_reddit(): uploader=uploader, page_url=page_url, interest_score=interest_score, - post_id=post_id, - should_post_tweet=False + should_post_tweet=True ) + except Exception as e: + logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) + attempts += 1 + continue finally: is_posting = False - - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) - posted_titles.add(raw_title) - logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}") - - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") - print(f"Actual post URL: {post_url}") - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") - logging.info(f"Actual post URL: {post_url}") - return post_data, category, random.randint(0, 1800) + + if post_id: + share_text = f"Check out this foodie gem! {post_data['title']}" + share_text_encoded = quote(share_text) + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + is_posting = True + try: + post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) + except Exception as e: + logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) + finally: + is_posting = False + + timestamp = datetime.now(timezone.utc).isoformat() + save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) + posted_titles.add(raw_title) + logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE}") + + if image_url: + save_json_file(USED_IMAGES_FILE, image_url, timestamp) + used_images.add(image_url) + logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + + logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") + return post_data, category, True + attempts += 1 + logging.info(f"WP posting failed for '{post_data['title']}'") - attempts += 1 - logging.info(f"WP posting failed for '{post_data['title']}'") - - print("No interesting Reddit post found after attempts") - logging.info("No interesting Reddit post found after attempts") - return None, None, random.randint(600, 1800) + logging.info("No interesting Reddit post found after attempts") + return None, None, False + except Exception as e: + logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True) + return None, None, False def run_reddit_automator(): - print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****") - logging.info("***** Reddit Automator Launched *****") - - post_data, category, sleep_time = curate_from_reddit() - if not post_data: - print(f"No postable Reddit article found - sleeping for {sleep_time} seconds") - logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds") - else: - print(f"Completed Reddit run with sleep time: {sleep_time} seconds") - logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds") - print(f"Sleeping for {sleep_time}s") - time.sleep(sleep_time) - return post_data, category, sleep_time + lock_fd = None + try: + lock_fd = acquire_lock() + logging.info("***** Reddit Automator Launched *****") + post_data, category, should_continue = curate_from_reddit() + if not post_data: + logging.info("No postable Reddit article found") + else: + logging.info("Completed Reddit run") + return post_data, category, should_continue + except Exception as e: + logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True) + return None, None, False + finally: + if lock_fd: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + lock_fd.close() + os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None if __name__ == "__main__": - run_reddit_automator() \ No newline at end of file + setup_logging() + post_data, category, should_continue = run_reddit_automator() + logging.info(f"Run completed, should_continue: {should_continue}") \ No newline at end of file diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 824a956..a163bda 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -31,10 +31,12 @@ from foodie_utils import ( ) from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from dotenv import load_dotenv +import fcntl load_dotenv() is_posting = False +LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_rss.lock" def signal_handler(sig, frame): logging.info("Received termination signal, checking if safe to exit...") @@ -47,10 +49,11 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) -LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log" +LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_rss.log" LOG_PRUNE_DAYS = 30 FEED_TIMEOUT = 15 MAX_RETRIES = 3 +RETRY_BACKOFF = 2 POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' @@ -96,21 +99,27 @@ def setup_logging(): logging.getLogger("requests").setLevel(logging.WARNING) logging.info("Logging initialized for foodie_automator_rss.py") -setup_logging() +def acquire_lock(): + os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) + lock_fd = open(LOCK_FILE, 'w') + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + lock_fd.write(str(os.getpid())) + lock_fd.flush() + return lock_fd + except IOError: + logging.info("Another instance of foodie_automator_rss.py is running") + sys.exit(0) def create_http_session() -> requests.Session: session = requests.Session() retry_strategy = Retry( total=MAX_RETRIES, - backoff_factor=2, + backoff_factor=RETRY_BACKOFF, status_forcelist=[403, 429, 500, 502, 503, 504], allowed_methods=["GET", "POST"] ) - adapter = HTTPAdapter( - max_retries=retry_strategy, - pool_connections=10, - pool_maxsize=10 - ) + adapter = HTTPAdapter(max_retries=retry_strategy) session.mount("http://", adapter) session.mount("https://", adapter) session.headers.update({ @@ -140,189 +149,169 @@ def fetch_rss_feeds(): logging.info(f"Processing feeds: {RSS_FEEDS}") for feed_url in RSS_FEEDS: - logging.info(f"Processing feed: {feed_url}") - try: - response = session.get(feed_url, timeout=FEED_TIMEOUT) - response.raise_for_status() - soup = BeautifulSoup(response.content, 'xml') - items = soup.find_all('item') - - feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url)) - for item in items: - try: - title = item.find('title').text.strip() if item.find('title') else "Untitled" - link = item.find('link').text.strip() if item.find('link') else "" - pub_date = item.find('pubDate') - pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc) - - if pub_date < cutoff_date: - logging.info(f"Skipping old article: {title} (Published: {pub_date})") + for attempt in range(MAX_RETRIES): + logging.info(f"Processing feed: {feed_url} (attempt {attempt + 1})") + try: + response = session.get(feed_url, timeout=FEED_TIMEOUT) + response.raise_for_status() + soup = BeautifulSoup(response.content, 'xml') + items = soup.find_all('item') + + feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url)) + for item in items: + try: + title = item.find('title').text.strip() if item.find('title') else "Untitled" + link = item.find('link').text.strip() if item.find('link') else "" + pub_date = item.find('pubDate') + pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc) + + if pub_date < cutoff_date: + logging.info(f"Skipping old article: {title} (Published: {pub_date})") + continue + + description = item.find('description') + summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else "" + content = item.find('content:encoded') + content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary + + articles.append({ + "title": title, + "link": link, + "summary": summary, + "content": content_text, + "feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title, + "pub_date": pub_date + }) + logging.debug(f"Processed article: {title}") + except Exception as e: + logging.warning(f"Error processing entry in {feed_url}: {e}") continue - - description = item.find('description') - summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else "" - content = item.find('content:encoded') - content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary - - articles.append({ - "title": title, - "link": link, - "summary": summary, - "content": content_text, - "feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title, - "pub_date": pub_date - }) - logging.debug(f"Processed article: {title}") - except Exception as e: - logging.warning(f"Error processing entry in {feed_url}: {e}") - continue - logging.info(f"Filtered to {len(articles)} articles from {feed_url}") - except Exception as e: - logging.error(f"Failed to fetch RSS feed {feed_url}: {e}") - continue - + logging.info(f"Filtered to {len(articles)} articles from {feed_url}") + break + except Exception as e: + logging.error(f"Failed to fetch RSS feed {feed_url}: {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + continue articles.sort(key=lambda x: x["pub_date"], reverse=True) logging.info(f"Total RSS articles fetched: {len(articles)}") return articles def fetch_duckduckgo_news_context(title, hours=24): - try: - with DDGS() as ddgs: - results = ddgs.news(f"{title} news", timelimit="d", max_results=5) - titles = [] - for r in results: - try: - date_str = r["date"] - if '+00:00' in date_str: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) - else: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) - if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): - titles.append(r["title"].lower()) - except ValueError as e: - logging.warning(f"Date parsing failed for '{date_str}': {e}") - continue - context = " ".join(titles) if titles else "No recent news found within 24 hours" - logging.info(f"DuckDuckGo News context for '{title}': {context}") - return context - except Exception as e: - logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}") - return title - -def curate_from_rss(): - articles = fetch_rss_feeds() # Corrected from fetch_rss_articles to fetch_rss_feeds - if not articles: - print("No RSS articles available") - logging.info("No RSS articles available") - return None, None, random.randint(600, 1800) - - attempts = 0 - max_attempts = 10 - while attempts < max_attempts and articles: - article = articles.pop(0) - title = article["title"] - link = article["link"] - summary = article.get("summary", "") - source_name = article.get("feed_title", "Unknown Source") # Adjusted to match fetch_rss_feeds output - original_source = f'{source_name}' - - if title in posted_titles: - print(f"Skipping already posted article: {title}") - logging.info(f"Skipping already posted article: {title}") - attempts += 1 + for attempt in range(MAX_RETRIES): + try: + with DDGS() as ddgs: + results = ddgs.news(f"{title} news", timelimit="d", max_results=5) + titles = [] + for r in results: + try: + date_str = r["date"] + if '+00:00' in date_str: + dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) + else: + dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S%Z").replace(tzinfo=timezone.utc) + if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): + titles.append(r["title"].lower()) + except ValueError as e: + logging.warning(f"Date parsing failed for '{date_str}': {e}") + continue + context = " ".join(titles) if titles else "No recent news found within 24 hours" + logging.info(f"DuckDuckGo News context for '{title}': {context}") + return context + except Exception as e: + logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) continue + logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts") + return title - print(f"Trying RSS Article: {title} from {source_name}") - logging.info(f"Trying RSS Article: {title} from {source_name}") +def curate_from_rss(): + try: + articles = fetch_rss_feeds() + if not articles: + logging.info("No RSS articles available") + return None, None, False # Continue running + + attempts = 0 + max_attempts = 10 + while attempts < max_attempts and articles: + article = articles.pop(0) + title = article["title"] + link = article["link"] + summary = article.get("summary", "") + source_name = article.get("feed_title", "Unknown Source") + original_source = f'{source_name}' + + if title in posted_titles: + logging.info(f"Skipping already posted article: {title}") + attempts += 1 + continue - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) - if skip: - print(f"Skipping filtered RSS article: {title}") - logging.info(f"Skipping filtered RSS article: {title}") - attempts += 1 - continue + logging.info(f"Trying RSS Article: {title} from {source_name}") - ddg_context = fetch_duckduckgo_news_context(title) - scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" - interest_score = is_interesting(scoring_content) - logging.info(f"Interest score for '{title}': {interest_score}") - if interest_score < 6: - print(f"RSS Interest Too Low: {interest_score}") - logging.info(f"RSS Interest Too Low: {interest_score}") - attempts += 1 - continue + image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + if skip: + logging.info(f"Skipping filtered RSS article: {title}") + attempts += 1 + continue - num_paragraphs = determine_paragraph_count(interest_score) - extra_prompt = ( - f"Generate exactly {num_paragraphs} paragraphs.\n" - f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" - f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" - f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" - f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" - f"Do not include emojis in the summary." - ) - content_to_summarize = scoring_content - final_summary = summarize_with_gpt4o( - content_to_summarize, - source_name, - link, - interest_score=interest_score, - extra_prompt=extra_prompt - ) - if not final_summary: - logging.info(f"Summary failed for '{title}'") - attempts += 1 - continue + ddg_context = fetch_duckduckgo_news_context(title) + scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" + interest_score = is_interesting(scoring_content) + logging.info(f"Interest score for '{title}': {interest_score}") + if interest_score < 6: + logging.info(f"RSS Interest Too Low: {interest_score}") + attempts += 1 + continue - final_summary = insert_link_naturally(final_summary, source_name, link) + num_paragraphs = determine_paragraph_count(interest_score) + extra_prompt = ( + f"Generate exactly {num_paragraphs} paragraphs.\n" + f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" + f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" + f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" + f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" + f"Do not include emojis in the summary." + ) + content_to_summarize = scoring_content + final_summary = summarize_with_gpt4o( + content_to_summarize, + source_name, + link, + interest_score=interest_score, + extra_prompt=extra_prompt + ) + if not final_summary: + logging.info(f"Summary failed for '{title}'") + attempts += 1 + continue - post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) - if not post_data: - attempts += 1 - continue + final_summary = insert_link_naturally(final_summary, source_name, link) - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) - if not image_url: - image_url, image_source, uploader, page_url = get_image(image_query) + post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) + if not post_data: + attempts += 1 + continue - hook = get_dynamic_hook(post_data["title"]).strip() + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) + if not image_url: + image_url, image_source, uploader, page_url = get_image(image_query) - share_prompt = get_viral_share_prompt(post_data["title"], final_summary) - share_links_template = ( - f'

{share_prompt} ' - f' ' - f'

' - ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" + hook = get_dynamic_hook(post_data["title"]).strip() - global is_posting - is_posting = True - try: - post_id, post_url = post_to_wp( - post_data=post_data, - category=category, - link=link, - author=author, - image_url=image_url, - original_source=original_source, - image_source=image_source, - uploader=uploader, - page_url=page_url, - interest_score=interest_score, - should_post_tweet=True + share_prompt = get_viral_share_prompt(post_data["title"], final_summary) + share_links_template = ( + f'

{share_prompt} ' + f' ' + f'

' ) - finally: - is_posting = False - - if post_id: - share_text = f"Check out this foodie gem! {post_data['title']}" - share_text_encoded = quote(share_text) - post_url_encoded = quote(post_url) - share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - post_data["content"] = f"{final_summary}\n\n{share_links}" + post_data["content"] = f"{final_summary}\n\n{share_links_template}" + + global is_posting is_posting = True try: - post_to_wp( + post_id, post_url = post_to_wp( post_data=post_data, category=category, link=link, @@ -333,41 +322,85 @@ def curate_from_rss(): uploader=uploader, page_url=page_url, interest_score=interest_score, - post_id=post_id, - should_post_tweet=False + should_post_tweet=True ) + except Exception as e: + logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) + attempts += 1 + continue finally: is_posting = False - timestamp = datetime.now(timezone.utc).isoformat() - save_json_file(POSTED_TITLES_FILE, title, timestamp) - posted_titles.add(title) - logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") - - if image_url: - save_json_file(USED_IMAGES_FILE, image_url, timestamp) - used_images.add(image_url) - logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") - - print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") - logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") - return post_data, category, random.randint(0, 1800) - - attempts += 1 - logging.info(f"WP posting failed for '{post_data['title']}'") + if post_id: + share_text = f"Check out this foodie gem! {post_data['title']}" + share_text_encoded = quote(share_text) + post_url_encoded = quote(post_url) + share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) + post_data["content"] = f"{final_summary}\n\n{share_links}" + is_posting = True + try: + post_to_wp( + post_data=post_data, + category=category, + link=link, + author=author, + image_url=image_url, + original_source=original_source, + image_source=image_source, + uploader=uploader, + page_url=page_url, + interest_score=interest_score, + post_id=post_id, + should_post_tweet=False + ) + except Exception as e: + logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) + finally: + is_posting = False + + timestamp = datetime.now(timezone.utc).isoformat() + save_json_file(POSTED_TITLES_FILE, title, timestamp) + posted_titles.add(title) + logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") + + if image_url: + save_json_file(USED_IMAGES_FILE, image_url, timestamp) + used_images.add(image_url) + logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + + logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") + return post_data, category, True # Run again immediately + attempts += 1 + logging.info(f"WP posting failed for '{post_data['title']}'") - print("No interesting RSS article found after attempts") - logging.info("No interesting RSS article found after attempts") - return None, None, random.randint(600, 1800) + logging.info("No interesting RSS article found after attempts") + return None, None, False # Wait before running again + except Exception as e: + logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) + return None, None, False def run_rss_automator(): - print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****") - logging.info("***** RSS Automator Launched *****") - post_data, category, sleep_time = curate_from_rss() - print(f"Sleeping for {sleep_time}s") - logging.info(f"Completed run with sleep time: {sleep_time} seconds") - time.sleep(sleep_time) - return post_data, category, sleep_time + lock_fd = None + try: + lock_fd = acquire_lock() + logging.info("***** RSS Automator Launched *****") + post_data, category, should_continue = curate_from_rss() + if not post_data: + logging.info("No postable RSS article found") + else: + logging.info("Completed RSS run") + return post_data, category, should_continue + except Exception as e: + logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) + return None, None, False + finally: + if lock_fd: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + lock_fd.close() + os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None if __name__ == "__main__": - run_rss_automator() \ No newline at end of file + setup_logging() + post_data, category, should_continue = run_rss_automator() + # Remove sleep timer, let manage_scripts.sh control execution + logging.info(f"Run completed, should_continue: {should_continue}") \ No newline at end of file diff --git a/foodie_engagement_tweet.py b/foodie_engagement_tweet.py index 59c4b4e..2372e0b 100644 --- a/foodie_engagement_tweet.py +++ b/foodie_engagement_tweet.py @@ -1,83 +1,263 @@ -import random +# foodie_engagement_tweet.py +import json import logging +import random +import signal +import sys +import fcntl +import os from datetime import datetime, timedelta, timezone -from openai import OpenAI # Add this import -from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL -from foodie_config import X_API_CREDENTIALS -from dotenv import load_dotenv # Add this import - -# Setup logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +from openai import OpenAI +from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL, load_post_counts, save_post_counts +from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE +from dotenv import load_dotenv -# Load environment variables load_dotenv() +LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_tweet.lock" +LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_tweet.log" +REFERENCE_DATE_FILE = "/home/shane/foodie_automator/engagement_reference_date.json" +LOG_PRUNE_DAYS = 30 +MAX_RETRIES = 3 +RETRY_BACKOFF = 2 + +def setup_logging(): + """Initialize logging with pruning of old logs.""" + try: + os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) + if os.path.exists(LOG_FILE): + with open(LOG_FILE, 'r') as f: + lines = f.readlines() + cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) + pruned_lines = [] + malformed_count = 0 + for line in lines: + if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit(): + malformed_count += 1 + continue + try: + timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) + if timestamp > cutoff: + pruned_lines.append(line) + except ValueError: + malformed_count += 1 + continue + if malformed_count > 0: + logging.info(f"Skipped {malformed_count} malformed log lines during pruning") + with open(LOG_FILE, 'w') as f: + f.writelines(pruned_lines) + + logging.basicConfig( + filename=LOG_FILE, + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logging.getLogger().addHandler(console_handler) + logging.getLogger("openai").setLevel(logging.WARNING) + logging.info("Logging initialized for foodie_engagement_tweet.py") + except Exception as e: + print(f"Failed to setup logging: {e}") + sys.exit(1) + +def acquire_lock(): + """Acquire a lock to prevent concurrent runs.""" + os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) + lock_fd = open(LOCK_FILE, 'w') + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + lock_fd.write(str(os.getpid())) + lock_fd.flush() + return lock_fd + except IOError: + logging.info("Another instance of foodie_engagement_tweet.py is running") + sys.exit(0) + +def signal_handler(sig, frame): + """Handle termination signals gracefully.""" + logging.info("Received termination signal, exiting...") + sys.exit(0) + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + # Initialize OpenAI client -client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) +try: + client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + if not os.getenv("OPENAI_API_KEY"): + logging.error("OPENAI_API_KEY is not set in environment variables") + raise ValueError("OPENAI_API_KEY is required") +except Exception as e: + logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True) + sys.exit(1) + +# Load author backgrounds +try: + with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f: + AUTHOR_BACKGROUNDS = json.load(f) +except Exception as e: + logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True) + sys.exit(1) +def get_reference_date(): + """Load or initialize the reference date for the 2-day interval.""" + os.makedirs(os.path.dirname(REFERENCE_DATE_FILE), exist_ok=True) + if os.path.exists(REFERENCE_DATE_FILE): + try: + with open(REFERENCE_DATE_FILE, 'r') as f: + data = json.load(f) + reference_date = datetime.fromisoformat(data["reference_date"]).replace(tzinfo=timezone.utc) + logging.info(f"Loaded reference date: {reference_date.date()}") + return reference_date + except (json.JSONDecodeError, KeyError, ValueError) as e: + logging.error(f"Failed to load reference date from {REFERENCE_DATE_FILE}: {e}. Initializing new date.") + + # Initialize with current date (start of day) + reference_date = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) + try: + with open(REFERENCE_DATE_FILE, 'w') as f: + json.dump({"reference_date": reference_date.isoformat()}, f) + logging.info(f"Initialized reference date: {reference_date.date()}") + except Exception as e: + logging.error(f"Failed to save reference date to {REFERENCE_DATE_FILE}: {e}. Using current date.") + return reference_date def generate_engagement_tweet(author): - # Fetch x_username from X_API_CREDENTIALS + """Generate an engagement tweet using author background themes.""" credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) if not credentials: logging.error(f"No X credentials found for {author['username']}") return None author_handle = credentials["x_username"] + + background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {}) + if not background or "engagement_themes" not in background: + logging.warning(f"No background or engagement themes found for {author['username']}") + theme = "food trends" + else: + theme = random.choice(background["engagement_themes"]) + prompt = ( f"Generate a concise tweet (under 280 characters) for {author_handle}. " - f"Create an engaging food-related question or statement to spark interaction. " + f"Create an engaging question or statement about {theme} to spark interaction. " f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. " f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. " f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)." ) - try: - response = client.chat.completions.create( - model=SUMMARY_MODEL, - messages=[ - {"role": "system", "content": "You are a social media expert crafting engaging tweets."}, - {"role": "user", "content": prompt} - ], - max_tokens=100, - temperature=0.7 - ) - tweet = response.choices[0].message.content.strip() - if len(tweet) > 280: - tweet = tweet[:277] + "..." - return tweet - except Exception as e: - logging.warning(f"Failed to generate engagement tweet for {author['username']}: {e}") - # Fallback templates - engagement_templates = [ - f"Whats the most mouthwatering dish youve seen this week Share below and follow {author_handle} for more foodie ideas on InsiderFoodie.com Link: https://insiderfoodie.com", - f"Food lovers unite Whats your go to comfort food Tell us and like this tweet for more tasty ideas from {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com", - f"Ever tried a dish that looked too good to eat Share your favorites and follow {author_handle} for more culinary trends on InsiderFoodie.com Link: https://insiderfoodie.com", - f"What food trend are you loving right now Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com" - ] - template = random.choice(engagement_templates) - return template + for attempt in range(MAX_RETRIES): + try: + response = client.chat.completions.create( + model=SUMMARY_MODEL, + messages=[ + {"role": "system", "content": "You are a social media expert crafting engaging tweets."}, + {"role": "user", "content": prompt} + ], + max_tokens=100, + temperature=0.7 + ) + tweet = response.choices[0].message.content.strip() + if len(tweet) > 280: + tweet = tweet[:277] + "..." + logging.debug(f"Generated engagement tweet: {tweet}") + return tweet + except Exception as e: + logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + else: + logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts") + engagement_templates = [ + f"What's the most mouthwatering {theme} you've seen this week? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com", + f"{theme.capitalize()} lovers unite! What's your go-to pick? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com", + f"Ever tried a {theme} that blew your mind? Share your favorites and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com", + f"What {theme} trend are you loving right now? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com" + ] + template = random.choice(engagement_templates) + logging.info(f"Using fallback engagement tweet: {template}") + return template def post_engagement_tweet(): - # Reference date for calculating the 2-day interval - reference_date = datetime(2025, 4, 29, tzinfo=timezone.utc) # Starting from April 29, 2025 - current_date = datetime.now(timezone.utc) - - # Calculate the number of days since the reference date - days_since_reference = (current_date - reference_date).days - - # Post only if the number of days since the reference date is divisible by 2 - if days_since_reference % 2 == 0: - logging.info("Today is an engagement tweet day (every 2 days). Posting...") - for author in AUTHORS: - tweet = generate_engagement_tweet(author) + """Post engagement tweets for authors every 2 days.""" + try: + logging.info("Starting foodie_engagement_tweet.py") + print("Starting foodie_engagement_tweet.py") + + # Get reference date + reference_date = get_reference_date() + current_date = datetime.now(timezone.utc) + days_since_reference = (current_date - reference_date).days + logging.info(f"Days since reference date ({reference_date.date()}): {days_since_reference}") + print(f"Days since reference date ({reference_date.date()}): {days_since_reference}") + + # Post only if the number of days since the reference date is divisible by 2 + if days_since_reference % 2 == 0: + logging.info("Today is an engagement tweet day (every 2 days). Posting...") + print("Today is an engagement tweet day (every 2 days). Posting...") - logging.info(f"Posting engagement tweet for {author['username']}: {tweet}") - if post_tweet(author, tweet): - logging.info(f"Successfully posted engagement tweet for {author['username']}") - else: - logging.warning(f"Failed to post engagement tweet for {author['username']}") - else: - logging.info("Today is not an engagement tweet day (every 2 days). Skipping...") + # Load post counts to check limits + post_counts = load_post_counts() + + for author in AUTHORS: + try: + # Check post limits + author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None) + if not author_count: + logging.error(f"No post count entry for {author['username']}, skipping") + continue + if author_count["monthly_count"] >= 500: + logging.warning(f"Monthly post limit (500) reached for {author['username']}, skipping") + continue + if author_count["daily_count"] >= 20: + logging.warning(f"Daily post limit (20) reached for {author['username']}, skipping") + continue + + tweet = generate_engagement_tweet(author) + if not tweet: + logging.error(f"Failed to generate engagement tweet for {author['username']}, skipping") + continue + + logging.info(f"Posting engagement tweet for {author['username']}: {tweet}") + print(f"Posting engagement tweet for {author['username']}: {tweet}") + if post_tweet(author, tweet): + logging.info(f"Successfully posted engagement tweet for {author['username']}") + # Update post counts + author_count["monthly_count"] += 1 + author_count["daily_count"] += 1 + save_post_counts(post_counts) + else: + logging.warning(f"Failed to post engagement tweet for {author['username']}") + except Exception as e: + logging.error(f"Error posting engagement tweet for {author['username']}: {e}", exc_info=True) + continue + else: + logging.info(f"Today is not an engagement tweet day (every 2 days). Days since reference: {days_since_reference}. Skipping...") + print(f"Today is not an engagement tweet day (every 2 days). Days since reference: {days_since_reference}. Skipping...") + + logging.info("Completed foodie_engagement_tweet.py") + print("Completed foodie_engagement_tweet.py") + except Exception as e: + logging.error(f"Unexpected error in post_engagement_tweet: {e}", exc_info=True) + print(f"Error in post_engagement_tweet: {e}") + +def main(): + """Main function to run the script.""" + lock_fd = None + try: + lock_fd = acquire_lock() + setup_logging() + post_engagement_tweet() + except Exception as e: + logging.error(f"Fatal error in main: {e}", exc_info=True) + print(f"Fatal error: {e}") + sys.exit(1) + finally: + if lock_fd: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + lock_fd.close() + os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None if __name__ == "__main__": - post_engagement_tweet() \ No newline at end of file + main() \ No newline at end of file diff --git a/foodie_weekly_thread.py b/foodie_weekly_thread.py index 0dca632..374a11c 100644 --- a/foodie_weekly_thread.py +++ b/foodie_weekly_thread.py @@ -1,94 +1,134 @@ +# foodie_weekly_thread.py import json import os -from datetime import datetime, timedelta, timezone import logging import random +import signal +import sys +import fcntl +import time +from datetime import datetime, timedelta, timezone +import tweepy from openai import OpenAI from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL from foodie_config import X_API_CREDENTIALS from dotenv import load_dotenv -import tweepy load_dotenv() -# Logging configuration -LOG_FILE = "/home/shane/foodie_automator/foodie_weekly_thread.log" +LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_weekly_thread.lock" +LOG_FILE = "/home/shane/foodie_automator/logs/foodie_weekly_thread.log" LOG_PRUNE_DAYS = 30 +MAX_RETRIES = 3 +RETRY_BACKOFF = 2 +RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json" def setup_logging(): - if os.path.exists(LOG_FILE): - with open(LOG_FILE, 'r') as f: - lines = f.readlines() - cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) - pruned_lines = [] - for line in lines: - try: - timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) - if timestamp > cutoff: - pruned_lines.append(line) - except ValueError: - continue - with open(LOG_FILE, 'w') as f: - f.writelines(pruned_lines) - - logging.basicConfig( - filename=LOG_FILE, - level=logging.DEBUG, - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' - ) - console_handler = logging.StreamHandler() - console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) - logging.getLogger().addHandler(console_handler) - logging.info("Logging initialized for foodie_weekly_thread.py") + """Initialize logging with pruning of old logs.""" + try: + os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) + if os.path.exists(LOG_FILE): + with open(LOG_FILE, 'r') as f: + lines = f.readlines() + cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) + pruned_lines = [] + malformed_count = 0 + for line in lines: + if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit(): + malformed_count += 1 + continue + try: + timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) + if timestamp > cutoff: + pruned_lines.append(line) + except ValueError: + malformed_count += 1 + continue + if malformed_count > 0: + logging.info(f"Skipped {malformed_count} malformed log lines during pruning") + with open(LOG_FILE, 'w') as f: + f.writelines(pruned_lines) + + logging.basicConfig( + filename=LOG_FILE, + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logging.getLogger().addHandler(console_handler) + logging.getLogger("tweepy").setLevel(logging.WARNING) + logging.info("Logging initialized for foodie_weekly_thread.py") + except Exception as e: + print(f"Failed to setup logging: {e}") + sys.exit(1) + +def acquire_lock(): + """Acquire a lock to prevent concurrent runs.""" + os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) + lock_fd = open(LOCK_FILE, 'w') + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + lock_fd.write(str(os.getpid())) + lock_fd.flush() + return lock_fd + except IOError: + logging.info("Another instance of foodie_weekly_thread.py is running") + sys.exit(0) + +def signal_handler(sig, frame): + """Handle termination signals gracefully.""" + logging.info("Received termination signal, exiting...") + sys.exit(0) -setup_logging() +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) # Initialize OpenAI client -client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) -if not os.getenv("OPENAI_API_KEY"): - logging.error("OPENAI_API_KEY is not set in environment variables") - raise ValueError("OPENAI_API_KEY is required") +try: + client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + if not os.getenv("OPENAI_API_KEY"): + logging.error("OPENAI_API_KEY is not set in environment variables") + raise ValueError("OPENAI_API_KEY is required") +except Exception as e: + logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True) + sys.exit(1) -# Validate X_API_CREDENTIALS and test API access def validate_twitter_credentials(): + """Validate Twitter API credentials for all authors.""" logging.info("Validating Twitter API credentials for all authors") valid_credentials = [] for author in AUTHORS: credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) if not credentials: logging.error(f"No X credentials found for {author['username']} in X_API_CREDENTIALS") - print(f"No X credentials found for {author['username']}") continue - logging.debug(f"Testing credentials for {author['username']} (handle: {credentials['x_username']})") - try: - client = tweepy.Client( - consumer_key=credentials["api_key"], - consumer_secret=credentials["api_secret"], - access_token=credentials["access_token"], - access_token_secret=credentials["access_token_secret"] - ) - # Test API access by fetching the user's profile - user = client.get_me() - logging.info(f"Credentials valid for {author['username']} (handle: {credentials['x_username']}, user_id: {user.data.id})") - print(f"Credentials valid for {author['username']} (handle: {credentials['x_username']})") - valid_credentials.append(credentials) - except tweepy.TweepyException as e: - logging.error(f"Failed to validate credentials for {author['username']} (handle: {credentials['x_username']}): {e}") - if hasattr(e, 'response') and e.response: - logging.error(f"Twitter API response: {e.response.text}") - print(f"Failed to validate credentials for {author['username']}: {e}") + for attempt in range(MAX_RETRIES): + try: + twitter_client = tweepy.Client( + consumer_key=credentials["api_key"], + consumer_secret=credentials["api_secret"], + access_token=credentials["access_token"], + access_token_secret=credentials["access_token_secret"] + ) + user = twitter_client.get_me() + logging.info(f"Credentials valid for {author['username']} (handle: {credentials['x_username']})") + valid_credentials.append(credentials) + break + except tweepy.TweepyException as e: + logging.error(f"Failed to validate credentials for {author['username']} (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + else: + logging.error(f"Credentials invalid for {author['username']} after {MAX_RETRIES} attempts") if not valid_credentials: logging.error("No valid Twitter credentials found for any author") raise ValueError("No valid Twitter credentials found") return valid_credentials -# Run credential validation -validate_twitter_credentials() - -RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json" - def load_recent_posts(): + """Load and deduplicate posts from recent_posts.json.""" posts = [] unique_posts = {} logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}") @@ -131,13 +171,15 @@ def load_recent_posts(): continue logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)") except Exception as e: - logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}") + logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True) + return posts if not posts: logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}") return posts def filter_posts_for_week(posts, start_date, end_date): + """Filter posts within the specified week.""" filtered_posts = [] logging.debug(f"Filtering {len(posts)} posts for range {start_date} to {end_date}") @@ -155,6 +197,7 @@ def filter_posts_for_week(posts, start_date, end_date): return filtered_posts def generate_intro_tweet(author): + """Generate an intro tweet for the weekly thread.""" credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) if not credentials: logging.error(f"No X credentials found for {author['username']}") @@ -170,118 +213,221 @@ def generate_intro_tweet(author): f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)." ) - try: - response = client.chat.completions.create( - model=SUMMARY_MODEL, - messages=[ - {"role": "system", "content": "You are a social media expert crafting engaging tweets."}, - {"role": "user", "content": prompt} - ], - max_tokens=100, - temperature=0.7 - ) - tweet = response.choices[0].message.content.strip() - if len(tweet) > 280: - tweet = tweet[:277] + "..." - logging.debug(f"Generated intro tweet: {tweet}") - return tweet - except Exception as e: - logging.error(f"Failed to generate intro tweet for {author['username']}: {e}") - fallback = ( - f"This weeks top 10 foodie finds by {author_handle} Check out the best on InsiderFoodie.com " - f"Follow {author_handle} for more and like this thread to stay in the loop Visit us at https://insiderfoodie.com" - ) - logging.info(f"Using fallback intro tweet: {fallback}") - return fallback + for attempt in range(MAX_RETRIES): + try: + response = client.chat.completions.create( + model=SUMMARY_MODEL, + messages=[ + {"role": "system", "content": "You are a social media expert crafting engaging tweets."}, + {"role": "user", "content": prompt} + ], + max_tokens=100, + temperature=0.7 + ) + tweet = response.choices[0].message.content.strip() + if len(tweet) > 280: + tweet = tweet[:277] + "..." + logging.debug(f"Generated intro tweet: {tweet}") + return tweet + except Exception as e: + logging.warning(f"Failed to generate intro tweet for {author['username']} (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + else: + logging.error(f"Failed to generate intro tweet after {MAX_RETRIES} attempts") + fallback = ( + f"This week's top 10 foodie finds by {author_handle}! Check out the best on InsiderFoodie.com. " + f"Follow {author_handle} for more and like this thread to stay in the loop! Visit us at https://insiderfoodie.com" + ) + logging.info(f"Using fallback intro tweet: {fallback}") + return fallback -def post_weekly_thread(): - logging.info("Entering post_weekly_thread") - print("Entering post_weekly_thread") - - today = datetime.now(timezone.utc) - days_to_monday = today.weekday() - start_date = (today - timedelta(days=days_to_monday + 7)).replace(hour=0, minute=0, second=0, microsecond=0) - end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59) - - logging.info(f"Fetching posts from {start_date} to {end_date}") - print(f"Fetching posts from {start_date} to {end_date}") - - all_posts = load_recent_posts() - print(f"Loaded {len(all_posts)} posts from recent_posts.json") - logging.info(f"Loaded {len(all_posts)} posts from recent_posts.json") - - if not all_posts: - logging.warning("No posts loaded, exiting post_weekly_thread") - print("No posts loaded, exiting post_weekly_thread") - return - - weekly_posts = filter_posts_for_week(all_posts, start_date, end_date) - print(f"Filtered to {len(weekly_posts)} posts for the week") - logging.info(f"Filtered to {len(weekly_posts)} posts for the week") - - if not weekly_posts: - logging.warning("No posts found within the week range, exiting post_weekly_thread") - print("No posts found within the week range, exiting post_weekly_thread") - return +def generate_final_cta(author): + """Generate a final CTA tweet for the weekly thread using GPT.""" + credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) + if not credentials: + logging.error(f"No X credentials found for {author['username']}") + return None + author_handle = credentials["x_username"] + logging.debug(f"Generating final CTA tweet for {author_handle}") - posts_by_author = {} - for post in weekly_posts: - author = post["author_username"] - if author not in posts_by_author: - posts_by_author[author] = [] - posts_by_author[author].append(post) - logging.debug(f"Grouped posts by author: {list(posts_by_author.keys())}") + prompt = ( + f"Generate a concise tweet (under 280 characters) for {author_handle}. " + f"Conclude a thread of their top 10 foodie posts of the week on InsiderFoodie.com. " + f"Make it engaging, value-driven, and urgent, in the style of Neil Patel. " + f"Include a call to action to visit InsiderFoodie.com and follow {author_handle}. " + f"Mention that the top 10 foodie trends are shared every Monday. " + f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. " + f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)." + ) - for author in AUTHORS: - author_posts = posts_by_author.get(author["username"], []) - logging.info(f"Processing author {author['username']} with {len(author_posts)} posts") - print(f"Processing author {author['username']} with {len(author_posts)} posts") + for attempt in range(MAX_RETRIES): + try: + response = client.chat.completions.create( + model=SUMMARY_MODEL, + messages=[ + {"role": "system", "content": "You are a social media expert crafting engaging tweets."}, + {"role": "user", "content": prompt} + ], + max_tokens=100, + temperature=0.7 + ) + tweet = response.choices[0].message.content.strip() + if len(tweet) > 280: + tweet = tweet[:277] + "..." + logging.debug(f"Generated final CTA tweet: {tweet}") + return tweet + except Exception as e: + logging.warning(f"Failed to generate final CTA tweet for {author['username']} (attempt {attempt + 1}): {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_BACKOFF * (2 ** attempt)) + else: + logging.error(f"Failed to generate final CTA tweet after {MAX_RETRIES} attempts") + fallback = ( + f"Want more foodie insights like these? Check out insiderfoodie.com and follow {author_handle} " + f"for the world’s top 10 foodie trends every Monday. Don’t miss out!" + ) + logging.info(f"Using fallback final CTA tweet: {fallback}") + return fallback + +def post_weekly_thread(): + """Post weekly threads for each author.""" + try: + logging.info("Starting foodie_weekly_thread.py") + print("Starting foodie_weekly_thread.py") - if not author_posts: - logging.info(f"No posts found for {author['username']} this week") - print(f"No posts found for {author['username']} this week") - continue + valid_credentials = validate_twitter_credentials() + if not valid_credentials: + logging.error("No valid Twitter credentials found, exiting") + return - author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True) - top_posts = author_posts[:10] - logging.info(f"Selected {len(top_posts)} top posts for {author['username']}") - print(f"Selected {len(top_posts)} top posts for {author['username']}") + today = datetime.now(timezone.utc) + days_to_monday = today.weekday() + start_date = (today - timedelta(days=days_to_monday + 7)).replace(hour=0, minute=0, second=0, microsecond=0) + end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59) - intro_tweet = generate_intro_tweet(author) - if not intro_tweet: - logging.error(f"Failed to generate intro tweet for {author['username']}, skipping") - continue - logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}") - print(f"Posting intro tweet for {author['username']}: {intro_tweet}") + logging.info(f"Fetching posts from {start_date} to {end_date}") + print(f"Fetching posts from {start_date} to {end_date}") - intro_response = post_tweet(author, intro_tweet) - if not intro_response: - logging.error(f"Failed to post intro tweet for {author['username']}, skipping thread") - print(f"Failed to post intro tweet for {author['username']}") - continue + all_posts = load_recent_posts() + logging.info(f"Loaded {len(all_posts)} posts from recent_posts.json") + print(f"Loaded {len(all_posts)} posts from recent_posts.json") - intro_tweet_id = intro_response.get("id") - logging.debug(f"Intro tweet posted with ID {intro_tweet_id}") + if not all_posts: + logging.warning("No posts loaded, exiting post_weekly_thread") + print("No posts loaded, exiting post_weekly_thread") + return - for i, post in enumerate(top_posts, 1): - post_tweet_content = f"{i}. {post['title']} Link: {post['url']}" - logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}") - print(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}") - reply_response = post_tweet(author, post_tweet_content, reply_to_id=intro_tweet_id) - if not reply_response: - logging.error(f"Failed to post thread reply {i} for {author['username']}") - else: - logging.debug(f"Thread reply {i} posted with ID {reply_response.get('id')}") + weekly_posts = filter_posts_for_week(all_posts, start_date, end_date) + logging.info(f"Filtered to {len(weekly_posts)} posts for the week") + print(f"Filtered to {len(weekly_posts)} posts for the week") - logging.info(f"Successfully posted weekly thread for {author['username']}") - print(f"Successfully posted weekly thread for {author['username']}") + if not weekly_posts: + logging.warning("No posts found within the week range, exiting post_weekly_thread") + print("No posts found within the week range, exiting post_weekly_thread") + return + + posts_by_author = {} + for post in weekly_posts: + author = post["author_username"] + if author not in posts_by_author: + posts_by_author[author] = [] + posts_by_author[author].append(post) + logging.debug(f"Grouped posts by author: {list(posts_by_author.keys())}") + + for author in AUTHORS: + try: + author_posts = posts_by_author.get(author["username"], []) + logging.info(f"Processing author {author['username']} with {len(author_posts)} posts") + print(f"Processing author {author['username']} with {len(author_posts)} posts") + + if not author_posts: + logging.info(f"No posts found for {author['username']} this week") + print(f"No posts found for {author['username']} this week") + continue + + author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True) + top_posts = author_posts[:10] + logging.info(f"Selected {len(top_posts)} top posts for {author['username']}") + print(f"Selected {len(top_posts)} top posts for {author['username']}") + + intro_tweet = generate_intro_tweet(author) + if not intro_tweet: + logging.error(f"Failed to generate intro tweet for {author['username']}, skipping") + continue + logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}") + print(f"Posting intro tweet for {author['username']}: {intro_tweet}") + + intro_response = post_tweet(author, intro_tweet) + if not intro_response: + logging.error(f"Failed to post intro tweet for {author['username']}, skipping thread") + print(f"Failed to post intro tweet for {author['username']}") + continue + + intro_tweet_id = intro_response.get("id") + last_tweet_id = intro_tweet_id + logging.debug(f"Intro tweet posted with ID {intro_tweet_id}") + + for i, post in enumerate(top_posts, 1): + try: + post_tweet_content = f"{i}. {post['title']} Link: {post['url']}" + logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}") + print(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}") + reply_response = post_tweet(author, post_tweet_content, reply_to_id=last_tweet_id) + if not reply_response: + logging.error(f"Failed to post thread reply {i} for {author['username']}") + else: + last_tweet_id = reply_response.get("id") + logging.debug(f"Thread reply {i} posted with ID {last_tweet_id}") + except Exception as e: + logging.error(f"Error posting thread reply {i} for {author['username']}: {e}", exc_info=True) + continue + + # Post final CTA tweet + if last_tweet_id and top_posts: # Ensure there's a valid thread to reply to + try: + final_cta = generate_final_cta(author) + if not final_cta: + logging.error(f"Failed to generate final CTA tweet for {author['username']}, skipping") + continue + logging.info(f"Posting final CTA tweet for {author['username']}: {final_cta}") + print(f"Posting final CTA tweet for {author['username']}: {final_cta}") + cta_response = post_tweet(author, final_cta, reply_to_id=last_tweet_id) + if not cta_response: + logging.error(f"Failed to post final CTA tweet for {author['username']}") + else: + logging.debug(f"Final CTA tweet posted with ID {cta_response.get('id')}") + except Exception as e: + logging.error(f"Error posting final CTA tweet for {author['username']}: {e}", exc_info=True) + + logging.info(f"Successfully posted weekly thread for {author['username']}") + print(f"Successfully posted weekly thread for {author['username']}") + except Exception as e: + logging.error(f"Error processing author {author['username']}: {e}", exc_info=True) + continue + + logging.info("Completed foodie_weekly_thread.py") + print("Completed foodie_weekly_thread.py") + except Exception as e: + logging.error(f"Unexpected error in post_weekly_thread: {e}", exc_info=True) + print(f"Error in post_weekly_thread: {e}") -if __name__ == "__main__": - print("Starting foodie_weekly_thread.py") - logging.info("Starting foodie_weekly_thread.py") +def main(): + """Main function to run the script.""" + lock_fd = None try: + lock_fd = acquire_lock() + setup_logging() post_weekly_thread() except Exception as e: - logging.error(f"Unexpected error in post_weekly_thread: {e}", exc_info=True) - print("Completed foodie_weekly_thread.py") - logging.info("Completed foodie_weekly_thread.py") \ No newline at end of file + logging.error(f"Fatal error in main: {e}", exc_info=True) + print(f"Fatal error: {e}") + sys.exit(1) + finally: + if lock_fd: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + lock_fd.close() + os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/manage_scripts.sh b/manage_scripts.sh index f0402c2..d6968dd 100755 --- a/manage_scripts.sh +++ b/manage_scripts.sh @@ -3,7 +3,9 @@ # Directory to monitor BASE_DIR="/home/shane/foodie_automator" CHECKSUM_FILE="$BASE_DIR/.file_checksum" -LOG_FILE="$BASE_DIR/manage_scripts.log" +LOG_FILE="$BASE_DIR/logs/manage_scripts.log" +VENV_PYTHON="$BASE_DIR/venv/bin/python" +LOCK_DIR="$BASE_DIR/locks" # Log function log() { @@ -13,37 +15,105 @@ log() { # Calculate checksum of files (excluding logs, JSON files, and venv) calculate_checksum() { find "$BASE_DIR" -type f \ - -not -path "$BASE_DIR/*.log" \ + -not -path "$BASE_DIR/logs/*" \ -not -path "$BASE_DIR/*.json" \ -not -path "$BASE_DIR/.file_checksum" \ -not -path "$BASE_DIR/venv/*" \ + -not -path "$BASE_DIR/locks/*" \ -exec sha256sum {} \; | sort | sha256sum | awk '{print $1}' } -# Check if scripts are running +# Check if a script is running (using lock file) check_running() { - pgrep -f "python3.*foodie_automator" > /dev/null + local script_name="$1" + local lock_file="$LOCK_DIR/${script_name}.lock" + if [ -f "$lock_file" ]; then + local pid=$(cat "$lock_file") + if ps -p "$pid" > /dev/null; then + log "$script_name is already running (PID: $pid)" + return 0 + else + log "Stale lock file found for $script_name, removing" + rm -f "$lock_file" + fi + fi + return 1 +} + +# Create lock file +create_lock() { + local script_name="$1" + local lock_file="$LOCK_DIR/${script_name}.lock" + mkdir -p "$LOCK_DIR" + echo $$ > "$lock_file" + log "Created lock file for $script_name (PID: $$)" +} + +# Remove lock file +remove_lock() { + local script_name="$1" + local lock_file="$LOCK_DIR/${script_name}.lock" + rm -f "$lock_file" + log "Removed lock file for $script_name" } # Stop scripts stop_scripts() { log "Stopping scripts..." - pkill -TERM -f "python3.*foodie_automator" || true + for script in foodie_automator_*.py; do + if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then + local script_name="${script%.py}" + pkill -TERM -f "$VENV_PYTHON.*$script_name" || true + fi + done sleep 10 - pkill -9 -f "python3.*foodie_automator" || true + for script in foodie_automator_*.py; do + if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then + local script_name="${script%.py}" + pkill -9 -f "$VENV_PYTHON.*$script_name" || true + remove_lock "$script_name" + fi + done log "Scripts stopped." } # Start scripts start_scripts() { log "Starting scripts..." - cd "$BASE_DIR" - source venv/bin/activate - # Find all foodie_automator_*.py scripts and start them + cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; } + + # Source virtual environment + if [ -f "$BASE_DIR/venv/bin/activate" ]; then + source "$BASE_DIR/venv/bin/activate" + else + log "Error: Virtual environment not found at $BASE_DIR/venv" + exit 1 + fi + + # Load .env variables + if [ -f "$BASE_DIR/.env" ]; then + export $(grep -v '^#' "$BASE_DIR/.env" | xargs) + log ".env variables loaded" + else + log "Error: .env file not found at $BASE_DIR/.env" + exit 1 + fi + + # Find and start all foodie_automator_*.py scripts (excluding weekly/engagement) for script in foodie_automator_*.py; do - if [ -f "$script" ]; then - log "Starting $script..." - nohup python3 "$script" >> "${script%.py}.log" 2>&1 & + if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then + local script_name="${script%.py}" + if ! check_running "$script_name"; then + log "Starting $script..." + create_lock "$script_name" + nohup "$VENV_PYTHON" "$script" >> "$BASE_DIR/logs/${script_name}.log" 2>&1 & + if [ $? -eq 0 ]; then + log "$script started successfully" + else + log "Failed to start $script" + remove_lock "$script_name" + fi + fi fi done log "All scripts started." @@ -52,14 +122,34 @@ start_scripts() { # Update dependencies update_dependencies() { log "Updating dependencies..." - cd "$BASE_DIR" + cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; } + # Create venv if it doesn't exist if [ ! -d "venv" ]; then python3 -m venv venv + log "Created new virtual environment" + fi + + # Source virtual environment + if [ -f "$BASE_DIR/venv/bin/activate" ]; then + source "$BASE_DIR/venv/bin/activate" + else + log "Error: Virtual environment not found at $BASE_DIR/venv" + exit 1 + fi + + # Update pip and install requirements + "$VENV_PYTHON" -m pip install --upgrade pip + if [ -f "requirements.txt" ]; then + "$VENV_PYTHON" -m pip install -r requirements.txt || { + log "Failed to install requirements.txt, attempting fallback dependencies" + "$VENV_PYTHON" -m pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager + log "Fallback: Installed core dependencies" + } + else + log "Error: requirements.txt not found, installing core dependencies" + "$VENV_PYTHON" -m pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager fi - source venv/bin/activate - pip install --upgrade pip - pip install -r requirements.txt || (pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager && log "Fallback: Installed core dependencies") log "Dependencies updated." } @@ -77,7 +167,7 @@ if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM" # Stop scripts if running - if check_running; then + if pgrep -f "$VENV_PYTHON.*foodie_automator" > /dev/null; then stop_scripts fi @@ -92,4 +182,6 @@ if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then log "Checksum updated." else log "No file changes detected." -fi \ No newline at end of file +fi + +exit 0 \ No newline at end of file