add lock files and update weekly tweet to include last tweet to follow

main
Shane 7 months ago
parent 331979ca9e
commit 028dfc3fc8
  1. 436
      foodie_automator_google.py
  2. 606
      foodie_automator_reddit.py
  3. 433
      foodie_automator_rss.py
  4. 294
      foodie_engagement_tweet.py
  5. 468
      foodie_weekly_thread.py
  6. 128
      manage_scripts.sh

@ -29,12 +29,14 @@ from foodie_utils import (
generate_category_from_summary, post_to_wp, prepare_post_data, generate_category_from_summary, post_to_wp, prepare_post_data,
smart_image_and_filter, insert_link_naturally, get_flickr_image smart_image_and_filter, insert_link_naturally, get_flickr_image
) )
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv from dotenv import load_dotenv
import fcntl
load_dotenv() load_dotenv()
is_posting = False is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock"
def signal_handler(sig, frame): def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...") logging.info("Received termination signal, checking if safe to exit...")
@ -47,15 +49,58 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
logger = logging.getLogger() LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_google.log"
logger.setLevel(logging.INFO) LOG_PRUNE_DAYS = 30
file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a') MAX_RETRIES = 3
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) RETRY_BACKOFF = 2
logger.addHandler(file_handler)
console_handler = logging.StreamHandler() posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) posted_titles = set(entry["title"] for entry in posted_titles_data)
logger.addHandler(console_handler) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
logging.info("Logging initialized for foodie_automator_google.py")
def setup_logging():
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(LOG_FILE, mode='a')
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_google.py")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@ -68,6 +113,18 @@ posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data) posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_google.py is running")
sys.exit(0)
def parse_search_volume(volume_text): def parse_search_volume(volume_text):
try: try:
volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '') volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '')
@ -89,10 +146,11 @@ def scrape_google_trends(geo='US'):
chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36") chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
driver = webdriver.Chrome(options=chrome_options) driver = None
try: try:
for attempt in range(3): for attempt in range(MAX_RETRIES):
try: try:
driver = webdriver.Chrome(options=chrome_options)
time.sleep(random.uniform(2, 5)) time.sleep(random.uniform(2, 5))
url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5" url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
logging.info(f"Navigating to {url} (attempt {attempt + 1})") logging.info(f"Navigating to {url} (attempt {attempt + 1})")
@ -105,10 +163,13 @@ def scrape_google_trends(geo='US'):
break break
except TimeoutException: except TimeoutException:
logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}") logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
if attempt == 2: if attempt == MAX_RETRIES - 1:
logging.error(f"Failed after 3 attempts for geo={geo}") logging.error(f"Failed after {MAX_RETRIES} attempts for geo={geo}")
return [] return []
time.sleep(5) time.sleep(RETRY_BACKOFF * (2 ** attempt))
if driver:
driver.quit()
continue
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2) time.sleep(2)
@ -145,157 +206,137 @@ def scrape_google_trends(geo='US'):
if trends: if trends:
trends.sort(key=lambda x: x["search_volume"], reverse=True) trends.sort(key=lambda x: x["search_volume"], reverse=True)
logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}") logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}")
print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}")
else: else:
logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}") logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
return trends return trends
except Exception as e:
logging.error(f"Unexpected error in scrape_google_trends: {e}", exc_info=True)
return []
finally: finally:
driver.quit() if driver:
logging.info(f"Chrome driver closed for geo={geo}") driver.quit()
logging.info(f"Chrome driver closed for geo={geo}")
def fetch_duckduckgo_news_context(trend_title, hours=24): def fetch_duckduckgo_news_context(trend_title, hours=24):
try: for attempt in range(MAX_RETRIES):
with DDGS() as ddgs: try:
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5) with DDGS() as ddgs:
titles = [] results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
for r in results: titles = []
try: for r in results:
date_str = r["date"] try:
if '+00:00' in date_str: date_str = r["date"]
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) if '+00:00' in date_str:
else: dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) else:
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
titles.append(r["title"].lower()) if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
except ValueError as e: titles.append(r["title"].lower())
logging.warning(f"Date parsing failed for '{date_str}': {e}") except ValueError as e:
continue logging.warning(f"Date parsing failed for '{date_str}': {e}")
context = " ".join(titles) if titles else "No recent news found within 24 hours" continue
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}") context = " ".join(titles) if titles else "No recent news found within 24 hours"
return context logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
except Exception as e: return context
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}") except Exception as e:
return trend_title logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
def curate_from_google_trends(geo_list=['US']): time.sleep(RETRY_BACKOFF * (2 ** attempt))
all_trends = []
for geo in geo_list:
trends = scrape_google_trends(geo=geo)
if trends:
all_trends.extend(trends)
if not all_trends:
print("No Google Trends data available")
logging.info("No Google Trends data available")
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
while attempts < max_attempts and all_trends:
trend = all_trends.pop(0)
title = trend["title"]
link = trend.get("link", "https://trends.google.com/")
summary = trend.get("summary", "")
source_name = "Google Trends"
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted trend: {title}")
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
continue continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
return trend_title
print(f"Trying Google Trend: {title} from {source_name}") def curate_from_google_trends(geo_list=['US']):
logging.info(f"Trying Google Trend: {title} from {source_name}") try:
all_trends = []
for geo in geo_list:
trends = scrape_google_trends(geo=geo)
if trends:
all_trends.extend(trends)
if not all_trends:
logging.info("No Google Trends data available")
return None, None, False
attempts = 0
max_attempts = 10
while attempts < max_attempts and all_trends:
trend = all_trends.pop(0)
title = trend["title"]
link = trend.get("link", "https://trends.google.com/")
summary = trend.get("summary", "")
source_name = "Google Trends"
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
logging.info(f"Skipping already posted trend: {title}")
attempts += 1
continue
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) logging.info(f"Trying Google Trend: {title} from {source_name}")
if skip:
print(f"Skipping filtered Google Trend: {title}")
logging.info(f"Skipping filtered Google Trend: {title}")
attempts += 1
continue
ddg_context = fetch_duckduckgo_news_context(title) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" if skip:
interest_score = is_interesting(scoring_content) logging.info(f"Skipping filtered Google Trend: {title}")
logging.info(f"Interest score for '{title}': {interest_score}") attempts += 1
if interest_score < 6: continue
print(f"Google Trends Interest Too Low: {interest_score}")
logging.info(f"Google Trends Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score) ddg_context = fetch_duckduckgo_news_context(title)
extra_prompt = ( scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
f"Generate exactly {num_paragraphs} paragraphs.\n" interest_score = is_interesting(scoring_content)
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" logging.info(f"Interest score for '{title}': {interest_score}")
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" if interest_score < 6:
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" logging.info(f"Google Trends Interest Too Low: {interest_score}")
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" attempts += 1
f"Do not include emojis in the summary." continue
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
final_summary = insert_link_naturally(final_summary, source_name, link) num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) final_summary = insert_link_naturally(final_summary, source_name, link)
if not post_data:
attempts += 1
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
if not image_url: if not post_data:
image_url, image_source, uploader, page_url = get_image(image_query) attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip() image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) hook = get_dynamic_hook(post_data["title"]).strip()
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
is_posting = True share_links_template = (
try: f'<p>{share_prompt} '
post_id, post_url = post_to_wp( f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
post_data=post_data, f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
should_post_tweet=True
) )
finally: post_data["content"] = f"{final_summary}\n\n{share_links_template}"
is_posting = False
global is_posting
if post_id:
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
is_posting = True is_posting = True
try: try:
post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
@ -306,43 +347,86 @@ def curate_from_google_trends(geo_list=['US']):
uploader=uploader, uploader=uploader,
page_url=page_url, page_url=page_url,
interest_score=interest_score, interest_score=interest_score,
post_id=post_id, should_post_tweet=True
should_post_tweet=False
) )
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1
continue
finally: finally:
is_posting = False is_posting = False
timestamp = datetime.now(timezone.utc).isoformat() if post_id:
save_json_file(POSTED_TITLES_FILE, title, timestamp) share_text = f"Check out this foodie gem! {post_data['title']}"
posted_titles.add(title) share_text_encoded = quote(share_text)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
if image_url: post_data["content"] = f"{final_summary}\n\n{share_links}"
save_json_file(USED_IMAGES_FILE, image_url, timestamp) is_posting = True
used_images.add(image_url) try:
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") post_to_wp(
post_data=post_data,
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") category=category,
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****") link=link,
return post_data, category, random.randint(0, 1800) author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
return post_data, category, True
attempts += 1 attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'") logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting Google Trend found after attempts") logging.info("No interesting Google Trend found after attempts")
logging.info("No interesting Google Trend found after attempts") return None, None, False
return None, None, random.randint(600, 1800) except Exception as e:
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
return None, None, False
def run_google_trends_automator(): def run_google_trends_automator():
logging.info("***** Google Trends Automator Launched *****") lock_fd = None
geo_list = ['US', 'GB', 'AU'] try:
post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list) lock_fd = acquire_lock()
if sleep_time is None: logging.info("***** Google Trends Automator Launched *****")
sleep_time = random.randint(600, 1800) geo_list = ['US', 'GB', 'AU']
print(f"Sleeping for {sleep_time}s") post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list)
logging.info(f"Completed run with sleep time: {sleep_time} seconds") if not post_data:
time.sleep(sleep_time) logging.info("No postable Google Trend found")
return post_data, category, sleep_time else:
logging.info("Completed Google Trends run")
return post_data, category, should_continue
except Exception as e:
logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True)
return None, None, False
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__": if __name__ == "__main__":
run_google_trends_automator() setup_logging()
post_data, category, should_continue = run_google_trends_automator()
logging.info(f"Run completed, should_continue: {should_continue}")

@ -29,11 +29,13 @@ from foodie_utils import (
prepare_post_data, select_best_author, smart_image_and_filter, prepare_post_data, select_best_author, smart_image_and_filter,
get_flickr_image get_flickr_image
) )
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
import fcntl
load_dotenv() load_dotenv()
is_posting = False is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_reddit.lock"
def signal_handler(sig, frame): def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...") logging.info("Received termination signal, checking if safe to exit...")
@ -46,8 +48,22 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log" LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_reddit.log"
LOG_PRUNE_DAYS = 30 LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
EXPIRATION_HOURS = 24
IMAGE_EXPIRATION_DAYS = 7
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def setup_logging(): def setup_logging():
if os.path.exists(LOG_FILE): if os.path.exists(LOG_FILE):
@ -59,7 +75,7 @@ def setup_logging():
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines: for line in lines:
if timestamp_pattern.match(line): if(timestamp_pattern.match(line)):
if current_entry: if current_entry:
log_entries.append(''.join(current_entry)) log_entries.append(''.join(current_entry))
current_entry = [line] current_entry = [line]
@ -95,19 +111,17 @@ def setup_logging():
logging.getLogger().addHandler(console_handler) logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py") logging.info("Logging initialized for foodie_automator_reddit.py")
setup_logging() def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json' lock_fd = open(LOCK_FILE, 'w')
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' try:
EXPIRATION_HOURS = 24 fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
IMAGE_EXPIRATION_DAYS = 7 lock_fd.write(str(os.getpid()))
lock_fd.flush()
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) return lock_fd
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry) except IOError:
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) logging.info("Another instance of foodie_automator_reddit.py is running")
used_images = set(entry["title"] for entry in used_images_data if "title" in entry) sys.exit(0)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def clean_reddit_title(title): def clean_reddit_title(title):
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip() cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
@ -115,253 +129,246 @@ def clean_reddit_title(title):
return cleaned_title return cleaned_title
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments): def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
try: for attempt in range(MAX_RETRIES):
content = f"Title: {title}\n\nContent: {summary}" try:
if top_comments: content = f"Title: {title}\n\nContent: {summary}"
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}" if top_comments:
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
response = client.chat.completions.create(
model=LIGHT_TASK_MODEL, response = client.chat.completions.create(
messages=[ model=LIGHT_TASK_MODEL,
{"role": "system", "content": ( messages=[
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). " {"role": "system", "content": (
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). " "Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. " "Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
"Consider comments for added context (e.g., specific locations or unique details). " "Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
"Return only a number." "Consider comments for added context (e.g., specific locations or unique details). "
)}, "Return only a number"
{"role": "user", "content": content} )},
], {"role": "user", "content": content}
max_tokens=5 ],
) max_tokens=5
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0 )
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
engagement_boost = 0 engagement_boost = 0
if upvotes >= 500: if upvotes >= 500:
engagement_boost += 3 engagement_boost += 3
elif upvotes >= 100: elif upvotes >= 100:
engagement_boost += 2 engagement_boost += 2
elif upvotes >= 50: elif upvotes >= 50:
engagement_boost += 1 engagement_boost += 1
if comment_count >= 100: if comment_count >= 100:
engagement_boost += 2 engagement_boost += 2
elif comment_count >= 20: elif comment_count >= 20:
engagement_boost += 1 engagement_boost += 1
final_score = min(base_score + engagement_boost, 10) final_score = min(base_score + engagement_boost, 10)
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'") logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})") return final_score
return final_score except Exception as e:
except Exception as e: logging.warning(f"Reddit interestingness scoring failed (attempt {attempt + 1}): {e}")
logging.error(f"Reddit interestingness scoring failed: {e}") if attempt < MAX_RETRIES - 1:
print(f"Reddit Interest Error: {e}") time.sleep(RETRY_BACKOFF * (2 ** attempt))
return 0 continue
logging.error(f"Failed to score Reddit post '{title}' after {MAX_RETRIES} attempts")
return 0
def get_top_comments(post_url, reddit, limit=3): def get_top_comments(post_url, reddit, limit=3):
try: for attempt in range(MAX_RETRIES):
submission = reddit.submission(url=post_url) try:
submission.comment_sort = 'top' submission = reddit.submission(url=post_url)
submission.comments.replace_more(limit=0) submission.comment_sort = 'top'
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')] submission.comments.replace_more(limit=0)
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}") top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
return top_comments logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
except Exception as e: return top_comments
logging.error(f"Failed to fetch comments for {post_url}: {e}") except Exception as e:
return [] logging.warning(f"Failed to fetch comments for {post_url} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.error(f"Failed to fetch comments for {post_url} after {MAX_RETRIES} attempts")
return []
def fetch_duckduckgo_news_context(title, hours=24): def fetch_duckduckgo_news_context(title, hours=24):
for attempt in range(MAX_RETRIES):
try:
with DDGS() as ddgs:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
titles = []
for r in results:
try:
date_str = r["date"]
if '+00:00' in date_str:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
else:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
titles.append(r["title"].lower())
except ValueError as e:
logging.warning(f"Date parsing failed for '{date_str}': {e}")
continue
context = " ".join(titles) if titles else "No recent news found within 24 hours"
logging.info(f"DuckDuckGo News context for '{title}': {context}")
return context
except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
def fetch_reddit_posts():
try: try:
with DDGS() as ddgs: reddit = praw.Reddit(
results = ddgs.news(f"{title} news", timelimit="d", max_results=5) client_id=REDDIT_CLIENT_ID,
titles = [] client_secret=REDDIT_CLIENT_SECRET,
for r in results: user_agent=REDDIT_USER_AGENT
)
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
articles = []
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
for subreddit_name in feeds:
for attempt in range(MAX_RETRIES):
try: try:
date_str = r["date"] subreddit = reddit.subreddit(subreddit_name)
if '+00:00' in date_str: for submission in subreddit.top(time_filter='day', limit=100):
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
else: if pub_date < cutoff_date:
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): continue
titles.append(r["title"].lower()) cleaned_title = clean_reddit_title(submission.title)
except ValueError as e: articles.append({
logging.warning(f"Date parsing failed for '{date_str}': {e}") "title": cleaned_title,
"raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}",
"summary": submission.selftext,
"feed_title": get_clean_source_name(subreddit_name),
"pub_date": pub_date,
"upvotes": submission.score,
"comment_count": submission.num_comments
})
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
break
except Exception as e:
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
continue continue
context = " ".join(titles) if titles else "No recent news found within 24 hours" logging.info(f"Total Reddit posts fetched: {len(articles)}")
logging.info(f"DuckDuckGo News context for '{title}': {context}") return articles
return context
except Exception as e: except Exception as e:
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}") logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
return title return []
def fetch_reddit_posts():
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
articles = []
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
for subreddit_name in feeds:
try:
subreddit = reddit.subreddit(subreddit_name)
for submission in subreddit.top(time_filter='day', limit=100):
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
continue
cleaned_title = clean_reddit_title(submission.title)
articles.append({
"title": cleaned_title,
"raw_title": submission.title,
"link": f"https://www.reddit.com{submission.permalink}",
"summary": submission.selftext,
"feed_title": get_clean_source_name(subreddit_name),
"pub_date": pub_date,
"upvotes": submission.score,
"comment_count": submission.num_comments
})
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
except Exception as e:
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
logging.info(f"Total Reddit posts fetched: {len(articles)}")
return articles
def curate_from_reddit(): def curate_from_reddit():
articles = fetch_reddit_posts() try:
if not articles: articles = fetch_reddit_posts()
print("No Reddit posts available") if not articles:
logging.info("No Reddit posts available") logging.info("No Reddit posts available")
return None, None, random.randint(600, 1800) return None, None, False
articles.sort(key=lambda x: x["upvotes"], reverse=True) articles.sort(key=lambda x: x["upvotes"], reverse=True)
reddit = praw.Reddit(
client_id=REDDIT_CLIENT_ID,
client_secret=REDDIT_CLIENT_SECRET,
user_agent=REDDIT_USER_AGENT
)
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
raw_title = article["raw_title"]
link = article["link"]
summary = article["summary"]
source_name = "Reddit"
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
if raw_title in posted_titles:
print(f"Skipping already posted post: {raw_title}")
logging.info(f"Skipping already posted post: {raw_title}")
attempts += 1
continue
print(f"Trying Reddit Post: {title} from {source_name}")
logging.info(f"Trying Reddit Post: {title} from {source_name}")
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
print(f"Skipping filtered Reddit post: {title}")
logging.info(f"Skipping filtered Reddit post: {title}")
attempts += 1
continue
top_comments = get_top_comments(link, reddit, limit=3)
ddg_context = fetch_duckduckgo_news_context(title)
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting_reddit(
title,
summary,
article["upvotes"],
article["comment_count"],
top_comments
)
logging.info(f"Interest Score: {interest_score} for '{title}'")
if interest_score < 6:
print(f"Reddit Interest Too Low: {interest_score}")
logging.info(f"Reddit Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score) reddit = praw.Reddit(
extra_prompt = ( client_id=REDDIT_CLIENT_ID,
f"Generate exactly {num_paragraphs} paragraphs.\n" client_secret=REDDIT_CLIENT_SECRET,
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" user_agent=REDDIT_USER_AGENT
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
) )
final_summary = summarize_with_gpt4o( attempts = 0
content_to_summarize, max_attempts = 10
source_name, while attempts < max_attempts and articles:
link, article = articles.pop(0)
interest_score=interest_score, title = article["title"]
extra_prompt=extra_prompt raw_title = article["raw_title"]
) link = article["link"]
if not final_summary: summary = article["summary"]
logging.info(f"Summary failed for '{title}'") source_name = "Reddit"
attempts += 1 original_source = '<a href="https://www.reddit.com/">Reddit</a>'
continue
if raw_title in posted_titles:
final_summary = insert_link_naturally(final_summary, source_name, link) logging.info(f"Skipping already posted post: {raw_title}")
attempts += 1
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) continue
if not post_data:
attempts += 1 logging.info(f"Trying Reddit Post: {title} from {source_name}")
continue
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
if not image_url: logging.info(f"Skipping filtered Reddit post: {title}")
image_url, image_source, uploader, page_url = get_image(image_query) attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip()
top_comments = get_top_comments(link, reddit, limit=3)
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) ddg_context = fetch_duckduckgo_news_context(title)
share_links_template = ( content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
f'<p>{share_prompt} ' interest_score = is_interesting_reddit(
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> ' title,
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>' summary,
) article["upvotes"],
post_data["content"] = f"{final_summary}\n\n{share_links_template}" article["comment_count"],
top_comments
global is_posting )
is_posting = True logging.info(f"Interest Score: {interest_score} for '{title}'")
try: if interest_score < 6:
post_id, post_url = post_to_wp( logging.info(f"Reddit Interest Too Low: {interest_score}")
post_data=post_data, attempts += 1
category=category, continue
link=link,
author=author, num_paragraphs = determine_paragraph_count(interest_score)
image_url=image_url, extra_prompt = (
original_source=original_source, f"Generate exactly {num_paragraphs} paragraphs.\n"
image_source=image_source, f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
uploader=uploader, f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
page_url=page_url, f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
f"Do not include emojis in the summary."
)
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score, interest_score=interest_score,
should_post_tweet=True extra_prompt=extra_prompt
) )
finally: if not final_summary:
is_posting = False logging.info(f"Summary failed for '{title}'")
attempts += 1
if post_id: continue
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text) final_summary = insert_link_naturally(final_summary, source_name, link)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
post_data["content"] = f"{final_summary}\n\n{share_links}" if not post_data:
attempts += 1
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
hook = get_dynamic_hook(post_data["title"]).strip()
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting
is_posting = True is_posting = True
try: try:
post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
@ -372,49 +379,84 @@ def curate_from_reddit():
uploader=uploader, uploader=uploader,
page_url=page_url, page_url=page_url,
interest_score=interest_score, interest_score=interest_score,
post_id=post_id, should_post_tweet=True
should_post_tweet=False
) )
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1
continue
finally: finally:
is_posting = False is_posting = False
timestamp = datetime.now(timezone.utc).isoformat() if post_id:
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp) share_text = f"Check out this foodie gem! {post_data['title']}"
posted_titles.add(raw_title) share_text_encoded = quote(share_text)
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}") post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
if image_url: post_data["content"] = f"{final_summary}\n\n{share_links}"
save_json_file(USED_IMAGES_FILE, image_url, timestamp) is_posting = True
used_images.add(image_url) try:
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}") post_to_wp(
post_data=post_data,
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") category=category,
print(f"Actual post URL: {post_url}") link=link,
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****") author=author,
logging.info(f"Actual post URL: {post_url}") image_url=image_url,
return post_data, category, random.randint(0, 1800) original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
posted_titles.add(raw_title)
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
return post_data, category, True
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
attempts += 1 logging.info("No interesting Reddit post found after attempts")
logging.info(f"WP posting failed for '{post_data['title']}'") return None, None, False
except Exception as e:
print("No interesting Reddit post found after attempts") logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
logging.info("No interesting Reddit post found after attempts") return None, None, False
return None, None, random.randint(600, 1800)
def run_reddit_automator(): def run_reddit_automator():
print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****") lock_fd = None
logging.info("***** Reddit Automator Launched *****") try:
lock_fd = acquire_lock()
post_data, category, sleep_time = curate_from_reddit() logging.info("***** Reddit Automator Launched *****")
if not post_data: post_data, category, should_continue = curate_from_reddit()
print(f"No postable Reddit article found - sleeping for {sleep_time} seconds") if not post_data:
logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds") logging.info("No postable Reddit article found")
else: else:
print(f"Completed Reddit run with sleep time: {sleep_time} seconds") logging.info("Completed Reddit run")
logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds") return post_data, category, should_continue
print(f"Sleeping for {sleep_time}s") except Exception as e:
time.sleep(sleep_time) logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True)
return post_data, category, sleep_time return None, None, False
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__": if __name__ == "__main__":
run_reddit_automator() setup_logging()
post_data, category, should_continue = run_reddit_automator()
logging.info(f"Run completed, should_continue: {should_continue}")

@ -31,10 +31,12 @@ from foodie_utils import (
) )
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
from dotenv import load_dotenv from dotenv import load_dotenv
import fcntl
load_dotenv() load_dotenv()
is_posting = False is_posting = False
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_rss.lock"
def signal_handler(sig, frame): def signal_handler(sig, frame):
logging.info("Received termination signal, checking if safe to exit...") logging.info("Received termination signal, checking if safe to exit...")
@ -47,10 +49,11 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log" LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_rss.log"
LOG_PRUNE_DAYS = 30 LOG_PRUNE_DAYS = 30
FEED_TIMEOUT = 15 FEED_TIMEOUT = 15
MAX_RETRIES = 3 MAX_RETRIES = 3
RETRY_BACKOFF = 2
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@ -96,21 +99,27 @@ def setup_logging():
logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("requests").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_automator_rss.py") logging.info("Logging initialized for foodie_automator_rss.py")
setup_logging() def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_automator_rss.py is running")
sys.exit(0)
def create_http_session() -> requests.Session: def create_http_session() -> requests.Session:
session = requests.Session() session = requests.Session()
retry_strategy = Retry( retry_strategy = Retry(
total=MAX_RETRIES, total=MAX_RETRIES,
backoff_factor=2, backoff_factor=RETRY_BACKOFF,
status_forcelist=[403, 429, 500, 502, 503, 504], status_forcelist=[403, 429, 500, 502, 503, 504],
allowed_methods=["GET", "POST"] allowed_methods=["GET", "POST"]
) )
adapter = HTTPAdapter( adapter = HTTPAdapter(max_retries=retry_strategy)
max_retries=retry_strategy,
pool_connections=10,
pool_maxsize=10
)
session.mount("http://", adapter) session.mount("http://", adapter)
session.mount("https://", adapter) session.mount("https://", adapter)
session.headers.update({ session.headers.update({
@ -140,189 +149,169 @@ def fetch_rss_feeds():
logging.info(f"Processing feeds: {RSS_FEEDS}") logging.info(f"Processing feeds: {RSS_FEEDS}")
for feed_url in RSS_FEEDS: for feed_url in RSS_FEEDS:
logging.info(f"Processing feed: {feed_url}") for attempt in range(MAX_RETRIES):
try: logging.info(f"Processing feed: {feed_url} (attempt {attempt + 1})")
response = session.get(feed_url, timeout=FEED_TIMEOUT) try:
response.raise_for_status() response = session.get(feed_url, timeout=FEED_TIMEOUT)
soup = BeautifulSoup(response.content, 'xml') response.raise_for_status()
items = soup.find_all('item') soup = BeautifulSoup(response.content, 'xml')
items = soup.find_all('item')
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
for item in items: feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
try: for item in items:
title = item.find('title').text.strip() if item.find('title') else "Untitled" try:
link = item.find('link').text.strip() if item.find('link') else "" title = item.find('title').text.strip() if item.find('title') else "Untitled"
pub_date = item.find('pubDate') link = item.find('link').text.strip() if item.find('link') else ""
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc) pub_date = item.find('pubDate')
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
if pub_date < cutoff_date:
logging.info(f"Skipping old article: {title} (Published: {pub_date})") if pub_date < cutoff_date:
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
continue
description = item.find('description')
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
content = item.find('content:encoded')
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
articles.append({
"title": title,
"link": link,
"summary": summary,
"content": content_text,
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
"pub_date": pub_date
})
logging.debug(f"Processed article: {title}")
except Exception as e:
logging.warning(f"Error processing entry in {feed_url}: {e}")
continue continue
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
description = item.find('description') break
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else "" except Exception as e:
content = item.find('content:encoded') logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
articles.append({ continue
"title": title,
"link": link,
"summary": summary,
"content": content_text,
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
"pub_date": pub_date
})
logging.debug(f"Processed article: {title}")
except Exception as e:
logging.warning(f"Error processing entry in {feed_url}: {e}")
continue
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
except Exception as e:
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
continue
articles.sort(key=lambda x: x["pub_date"], reverse=True) articles.sort(key=lambda x: x["pub_date"], reverse=True)
logging.info(f"Total RSS articles fetched: {len(articles)}") logging.info(f"Total RSS articles fetched: {len(articles)}")
return articles return articles
def fetch_duckduckgo_news_context(title, hours=24): def fetch_duckduckgo_news_context(title, hours=24):
try: for attempt in range(MAX_RETRIES):
with DDGS() as ddgs: try:
results = ddgs.news(f"{title} news", timelimit="d", max_results=5) with DDGS() as ddgs:
titles = [] results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
for r in results: titles = []
try: for r in results:
date_str = r["date"] try:
if '+00:00' in date_str: date_str = r["date"]
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc) if '+00:00' in date_str:
else: dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) else:
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)): dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S%Z").replace(tzinfo=timezone.utc)
titles.append(r["title"].lower()) if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
except ValueError as e: titles.append(r["title"].lower())
logging.warning(f"Date parsing failed for '{date_str}': {e}") except ValueError as e:
continue logging.warning(f"Date parsing failed for '{date_str}': {e}")
context = " ".join(titles) if titles else "No recent news found within 24 hours" continue
logging.info(f"DuckDuckGo News context for '{title}': {context}") context = " ".join(titles) if titles else "No recent news found within 24 hours"
return context logging.info(f"DuckDuckGo News context for '{title}': {context}")
except Exception as e: return context
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}") except Exception as e:
return title logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
def curate_from_rss(): time.sleep(RETRY_BACKOFF * (2 ** attempt))
articles = fetch_rss_feeds() # Corrected from fetch_rss_articles to fetch_rss_feeds
if not articles:
print("No RSS articles available")
logging.info("No RSS articles available")
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article.get("summary", "")
source_name = article.get("feed_title", "Unknown Source") # Adjusted to match fetch_rss_feeds output
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted article: {title}")
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue continue
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
return title
print(f"Trying RSS Article: {title} from {source_name}") def curate_from_rss():
logging.info(f"Trying RSS Article: {title} from {source_name}") try:
articles = fetch_rss_feeds()
if not articles:
logging.info("No RSS articles available")
return None, None, False # Continue running
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article.get("summary", "")
source_name = article.get("feed_title", "Unknown Source")
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) logging.info(f"Trying RSS Article: {title} from {source_name}")
if skip:
print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
ddg_context = fetch_duckduckgo_news_context(title) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" if skip:
interest_score = is_interesting(scoring_content) logging.info(f"Skipping filtered RSS article: {title}")
logging.info(f"Interest score for '{title}': {interest_score}") attempts += 1
if interest_score < 6: continue
print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
num_paragraphs = determine_paragraph_count(interest_score) ddg_context = fetch_duckduckgo_news_context(title)
extra_prompt = ( scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
f"Generate exactly {num_paragraphs} paragraphs.\n" interest_score = is_interesting(scoring_content)
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n" logging.info(f"Interest score for '{title}': {interest_score}")
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n" if interest_score < 6:
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n" logging.info(f"RSS Interest Too Low: {interest_score}")
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n" attempts += 1
f"Do not include emojis in the summary." continue
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
final_summary = insert_link_naturally(final_summary, source_name, link) num_paragraphs = determine_paragraph_count(interest_score)
extra_prompt = (
f"Generate exactly {num_paragraphs} paragraphs.\n"
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
f"Do not include emojis in the summary."
)
content_to_summarize = scoring_content
final_summary = summarize_with_gpt4o(
content_to_summarize,
source_name,
link,
interest_score=interest_score,
extra_prompt=extra_prompt
)
if not final_summary:
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) final_summary = insert_link_naturally(final_summary, source_name, link)
if not post_data:
attempts += 1
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
if not image_url: if not post_data:
image_url, image_source, uploader, page_url = get_image(image_query) attempts += 1
continue
hook = get_dynamic_hook(post_data["title"]).strip() image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
share_prompt = get_viral_share_prompt(post_data["title"], final_summary) hook = get_dynamic_hook(post_data["title"]).strip()
share_links_template = (
f'<p>{share_prompt} '
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
)
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
global is_posting share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
is_posting = True share_links_template = (
try: f'<p>{share_prompt} '
post_id, post_url = post_to_wp( f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
post_data=post_data, f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
category=category,
link=link,
author=author,
image_url=image_url,
original_source=original_source,
image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
should_post_tweet=True
) )
finally: post_data["content"] = f"{final_summary}\n\n{share_links_template}"
is_posting = False
global is_posting
if post_id:
share_text = f"Check out this foodie gem! {post_data['title']}"
share_text_encoded = quote(share_text)
post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
post_data["content"] = f"{final_summary}\n\n{share_links}"
is_posting = True is_posting = True
try: try:
post_to_wp( post_id, post_url = post_to_wp(
post_data=post_data, post_data=post_data,
category=category, category=category,
link=link, link=link,
@ -333,41 +322,85 @@ def curate_from_rss():
uploader=uploader, uploader=uploader,
page_url=page_url, page_url=page_url,
interest_score=interest_score, interest_score=interest_score,
post_id=post_id, should_post_tweet=True
should_post_tweet=False
) )
except Exception as e:
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1
continue
finally: finally:
is_posting = False is_posting = False
timestamp = datetime.now(timezone.utc).isoformat() if post_id:
save_json_file(POSTED_TITLES_FILE, title, timestamp) share_text = f"Check out this foodie gem! {post_data['title']}"
posted_titles.add(title) share_text_encoded = quote(share_text)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") post_url_encoded = quote(post_url)
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
if image_url: post_data["content"] = f"{final_summary}\n\n{share_links}"
save_json_file(USED_IMAGES_FILE, image_url, timestamp) is_posting = True
used_images.add(image_url) try:
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") post_to_wp(
post_data=post_data,
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") category=category,
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") link=link,
return post_data, category, random.randint(0, 1800) author=author,
image_url=image_url,
attempts += 1 original_source=original_source,
logging.info(f"WP posting failed for '{post_data['title']}'") image_source=image_source,
uploader=uploader,
page_url=page_url,
interest_score=interest_score,
post_id=post_id,
should_post_tweet=False
)
except Exception as e:
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally:
is_posting = False
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
return post_data, category, True # Run again immediately
attempts += 1
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting RSS article found after attempts") logging.info("No interesting RSS article found after attempts")
logging.info("No interesting RSS article found after attempts") return None, None, False # Wait before running again
return None, None, random.randint(600, 1800) except Exception as e:
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
return None, None, False
def run_rss_automator(): def run_rss_automator():
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****") lock_fd = None
logging.info("***** RSS Automator Launched *****") try:
post_data, category, sleep_time = curate_from_rss() lock_fd = acquire_lock()
print(f"Sleeping for {sleep_time}s") logging.info("***** RSS Automator Launched *****")
logging.info(f"Completed run with sleep time: {sleep_time} seconds") post_data, category, should_continue = curate_from_rss()
time.sleep(sleep_time) if not post_data:
return post_data, category, sleep_time logging.info("No postable RSS article found")
else:
logging.info("Completed RSS run")
return post_data, category, should_continue
except Exception as e:
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
return None, None, False
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__": if __name__ == "__main__":
run_rss_automator() setup_logging()
post_data, category, should_continue = run_rss_automator()
# Remove sleep timer, let manage_scripts.sh control execution
logging.info(f"Run completed, should_continue: {should_continue}")

@ -1,83 +1,263 @@
import random # foodie_engagement_tweet.py
import json
import logging import logging
import random
import signal
import sys
import fcntl
import os
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from openai import OpenAI # Add this import from openai import OpenAI
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL, load_post_counts, save_post_counts
from foodie_config import X_API_CREDENTIALS from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE
from dotenv import load_dotenv # Add this import from dotenv import load_dotenv
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Load environment variables
load_dotenv() load_dotenv()
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_tweet.lock"
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_tweet.log"
REFERENCE_DATE_FILE = "/home/shane/foodie_automator/engagement_reference_date.json"
LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
def setup_logging():
"""Initialize logging with pruning of old logs."""
try:
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_engagement_tweet.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_engagement_tweet.py is running")
sys.exit(0)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
logging.info("Received termination signal, exiting...")
sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize OpenAI client # Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) try:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not os.getenv("OPENAI_API_KEY"):
logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
except Exception as e:
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
# Load author backgrounds
try:
with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f:
AUTHOR_BACKGROUNDS = json.load(f)
except Exception as e:
logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True)
sys.exit(1)
def get_reference_date():
"""Load or initialize the reference date for the 2-day interval."""
os.makedirs(os.path.dirname(REFERENCE_DATE_FILE), exist_ok=True)
if os.path.exists(REFERENCE_DATE_FILE):
try:
with open(REFERENCE_DATE_FILE, 'r') as f:
data = json.load(f)
reference_date = datetime.fromisoformat(data["reference_date"]).replace(tzinfo=timezone.utc)
logging.info(f"Loaded reference date: {reference_date.date()}")
return reference_date
except (json.JSONDecodeError, KeyError, ValueError) as e:
logging.error(f"Failed to load reference date from {REFERENCE_DATE_FILE}: {e}. Initializing new date.")
# Initialize with current date (start of day)
reference_date = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
try:
with open(REFERENCE_DATE_FILE, 'w') as f:
json.dump({"reference_date": reference_date.isoformat()}, f)
logging.info(f"Initialized reference date: {reference_date.date()}")
except Exception as e:
logging.error(f"Failed to save reference date to {REFERENCE_DATE_FILE}: {e}. Using current date.")
return reference_date
def generate_engagement_tweet(author): def generate_engagement_tweet(author):
# Fetch x_username from X_API_CREDENTIALS """Generate an engagement tweet using author background themes."""
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
if not credentials: if not credentials:
logging.error(f"No X credentials found for {author['username']}") logging.error(f"No X credentials found for {author['username']}")
return None return None
author_handle = credentials["x_username"] author_handle = credentials["x_username"]
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
if not background or "engagement_themes" not in background:
logging.warning(f"No background or engagement themes found for {author['username']}")
theme = "food trends"
else:
theme = random.choice(background["engagement_themes"])
prompt = ( prompt = (
f"Generate a concise tweet (under 280 characters) for {author_handle}. " f"Generate a concise tweet (under 280 characters) for {author_handle}. "
f"Create an engaging food-related question or statement to spark interaction. " f"Create an engaging question or statement about {theme} to spark interaction. "
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. " f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. " f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)." f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
) )
try: for attempt in range(MAX_RETRIES):
response = client.chat.completions.create( try:
model=SUMMARY_MODEL, response = client.chat.completions.create(
messages=[ model=SUMMARY_MODEL,
{"role": "system", "content": "You are a social media expert crafting engaging tweets."}, messages=[
{"role": "user", "content": prompt} {"role": "system", "content": "You are a social media expert crafting engaging tweets."},
], {"role": "user", "content": prompt}
max_tokens=100, ],
temperature=0.7 max_tokens=100,
) temperature=0.7
tweet = response.choices[0].message.content.strip() )
if len(tweet) > 280: tweet = response.choices[0].message.content.strip()
tweet = tweet[:277] + "..." if len(tweet) > 280:
return tweet tweet = tweet[:277] + "..."
except Exception as e: logging.debug(f"Generated engagement tweet: {tweet}")
logging.warning(f"Failed to generate engagement tweet for {author['username']}: {e}") return tweet
# Fallback templates except Exception as e:
engagement_templates = [ logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}")
f"Whats the most mouthwatering dish youve seen this week Share below and follow {author_handle} for more foodie ideas on InsiderFoodie.com Link: https://insiderfoodie.com", if attempt < MAX_RETRIES - 1:
f"Food lovers unite Whats your go to comfort food Tell us and like this tweet for more tasty ideas from {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com", time.sleep(RETRY_BACKOFF * (2 ** attempt))
f"Ever tried a dish that looked too good to eat Share your favorites and follow {author_handle} for more culinary trends on InsiderFoodie.com Link: https://insiderfoodie.com", else:
f"What food trend are you loving right now Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com" logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts")
] engagement_templates = [
template = random.choice(engagement_templates) f"What's the most mouthwatering {theme} you've seen this week? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
return template f"{theme.capitalize()} lovers unite! What's your go-to pick? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"Ever tried a {theme} that blew your mind? Share your favorites and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
f"What {theme} trend are you loving right now? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com"
]
template = random.choice(engagement_templates)
logging.info(f"Using fallback engagement tweet: {template}")
return template
def post_engagement_tweet(): def post_engagement_tweet():
# Reference date for calculating the 2-day interval """Post engagement tweets for authors every 2 days."""
reference_date = datetime(2025, 4, 29, tzinfo=timezone.utc) # Starting from April 29, 2025 try:
current_date = datetime.now(timezone.utc) logging.info("Starting foodie_engagement_tweet.py")
print("Starting foodie_engagement_tweet.py")
# Calculate the number of days since the reference date
days_since_reference = (current_date - reference_date).days # Get reference date
reference_date = get_reference_date()
# Post only if the number of days since the reference date is divisible by 2 current_date = datetime.now(timezone.utc)
if days_since_reference % 2 == 0: days_since_reference = (current_date - reference_date).days
logging.info("Today is an engagement tweet day (every 2 days). Posting...") logging.info(f"Days since reference date ({reference_date.date()}): {days_since_reference}")
for author in AUTHORS: print(f"Days since reference date ({reference_date.date()}): {days_since_reference}")
tweet = generate_engagement_tweet(author)
# Post only if the number of days since the reference date is divisible by 2
if days_since_reference % 2 == 0:
logging.info("Today is an engagement tweet day (every 2 days). Posting...")
print("Today is an engagement tweet day (every 2 days). Posting...")
logging.info(f"Posting engagement tweet for {author['username']}: {tweet}") # Load post counts to check limits
if post_tweet(author, tweet): post_counts = load_post_counts()
logging.info(f"Successfully posted engagement tweet for {author['username']}")
else: for author in AUTHORS:
logging.warning(f"Failed to post engagement tweet for {author['username']}") try:
else: # Check post limits
logging.info("Today is not an engagement tweet day (every 2 days). Skipping...") author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None)
if not author_count:
logging.error(f"No post count entry for {author['username']}, skipping")
continue
if author_count["monthly_count"] >= 500:
logging.warning(f"Monthly post limit (500) reached for {author['username']}, skipping")
continue
if author_count["daily_count"] >= 20:
logging.warning(f"Daily post limit (20) reached for {author['username']}, skipping")
continue
tweet = generate_engagement_tweet(author)
if not tweet:
logging.error(f"Failed to generate engagement tweet for {author['username']}, skipping")
continue
logging.info(f"Posting engagement tweet for {author['username']}: {tweet}")
print(f"Posting engagement tweet for {author['username']}: {tweet}")
if post_tweet(author, tweet):
logging.info(f"Successfully posted engagement tweet for {author['username']}")
# Update post counts
author_count["monthly_count"] += 1
author_count["daily_count"] += 1
save_post_counts(post_counts)
else:
logging.warning(f"Failed to post engagement tweet for {author['username']}")
except Exception as e:
logging.error(f"Error posting engagement tweet for {author['username']}: {e}", exc_info=True)
continue
else:
logging.info(f"Today is not an engagement tweet day (every 2 days). Days since reference: {days_since_reference}. Skipping...")
print(f"Today is not an engagement tweet day (every 2 days). Days since reference: {days_since_reference}. Skipping...")
logging.info("Completed foodie_engagement_tweet.py")
print("Completed foodie_engagement_tweet.py")
except Exception as e:
logging.error(f"Unexpected error in post_engagement_tweet: {e}", exc_info=True)
print(f"Error in post_engagement_tweet: {e}")
def main():
"""Main function to run the script."""
lock_fd = None
try:
lock_fd = acquire_lock()
setup_logging()
post_engagement_tweet()
except Exception as e:
logging.error(f"Fatal error in main: {e}", exc_info=True)
print(f"Fatal error: {e}")
sys.exit(1)
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__": if __name__ == "__main__":
post_engagement_tweet() main()

@ -1,94 +1,134 @@
# foodie_weekly_thread.py
import json import json
import os import os
from datetime import datetime, timedelta, timezone
import logging import logging
import random import random
import signal
import sys
import fcntl
import time
from datetime import datetime, timedelta, timezone
import tweepy
from openai import OpenAI from openai import OpenAI
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
from foodie_config import X_API_CREDENTIALS from foodie_config import X_API_CREDENTIALS
from dotenv import load_dotenv from dotenv import load_dotenv
import tweepy
load_dotenv() load_dotenv()
# Logging configuration LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_weekly_thread.lock"
LOG_FILE = "/home/shane/foodie_automator/foodie_weekly_thread.log" LOG_FILE = "/home/shane/foodie_automator/logs/foodie_weekly_thread.log"
LOG_PRUNE_DAYS = 30 LOG_PRUNE_DAYS = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 2
RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json"
def setup_logging(): def setup_logging():
if os.path.exists(LOG_FILE): """Initialize logging with pruning of old logs."""
with open(LOG_FILE, 'r') as f: try:
lines = f.readlines() os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) if os.path.exists(LOG_FILE):
pruned_lines = [] with open(LOG_FILE, 'r') as f:
for line in lines: lines = f.readlines()
try: cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) pruned_lines = []
if timestamp > cutoff: malformed_count = 0
pruned_lines.append(line) for line in lines:
except ValueError: if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
continue malformed_count += 1
with open(LOG_FILE, 'w') as f: continue
f.writelines(pruned_lines) try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
logging.basicConfig( if timestamp > cutoff:
filename=LOG_FILE, pruned_lines.append(line)
level=logging.DEBUG, except ValueError:
format='%(asctime)s - %(levelname)s - %(message)s', malformed_count += 1
datefmt='%Y-%m-%d %H:%M:%S' continue
) if malformed_count > 0:
console_handler = logging.StreamHandler() logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) with open(LOG_FILE, 'w') as f:
logging.getLogger().addHandler(console_handler) f.writelines(pruned_lines)
logging.info("Logging initialized for foodie_weekly_thread.py")
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.getLogger("tweepy").setLevel(logging.WARNING)
logging.info("Logging initialized for foodie_weekly_thread.py")
except Exception as e:
print(f"Failed to setup logging: {e}")
sys.exit(1)
def acquire_lock():
"""Acquire a lock to prevent concurrent runs."""
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
lock_fd = open(LOCK_FILE, 'w')
try:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_fd.write(str(os.getpid()))
lock_fd.flush()
return lock_fd
except IOError:
logging.info("Another instance of foodie_weekly_thread.py is running")
sys.exit(0)
def signal_handler(sig, frame):
"""Handle termination signals gracefully."""
logging.info("Received termination signal, exiting...")
sys.exit(0)
setup_logging() signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize OpenAI client # Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) try:
if not os.getenv("OPENAI_API_KEY"): client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
logging.error("OPENAI_API_KEY is not set in environment variables") if not os.getenv("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY is required") logging.error("OPENAI_API_KEY is not set in environment variables")
raise ValueError("OPENAI_API_KEY is required")
except Exception as e:
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
sys.exit(1)
# Validate X_API_CREDENTIALS and test API access
def validate_twitter_credentials(): def validate_twitter_credentials():
"""Validate Twitter API credentials for all authors."""
logging.info("Validating Twitter API credentials for all authors") logging.info("Validating Twitter API credentials for all authors")
valid_credentials = [] valid_credentials = []
for author in AUTHORS: for author in AUTHORS:
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
if not credentials: if not credentials:
logging.error(f"No X credentials found for {author['username']} in X_API_CREDENTIALS") logging.error(f"No X credentials found for {author['username']} in X_API_CREDENTIALS")
print(f"No X credentials found for {author['username']}")
continue continue
logging.debug(f"Testing credentials for {author['username']} (handle: {credentials['x_username']})") for attempt in range(MAX_RETRIES):
try: try:
client = tweepy.Client( twitter_client = tweepy.Client(
consumer_key=credentials["api_key"], consumer_key=credentials["api_key"],
consumer_secret=credentials["api_secret"], consumer_secret=credentials["api_secret"],
access_token=credentials["access_token"], access_token=credentials["access_token"],
access_token_secret=credentials["access_token_secret"] access_token_secret=credentials["access_token_secret"]
) )
# Test API access by fetching the user's profile user = twitter_client.get_me()
user = client.get_me() logging.info(f"Credentials valid for {author['username']} (handle: {credentials['x_username']})")
logging.info(f"Credentials valid for {author['username']} (handle: {credentials['x_username']}, user_id: {user.data.id})") valid_credentials.append(credentials)
print(f"Credentials valid for {author['username']} (handle: {credentials['x_username']})") break
valid_credentials.append(credentials) except tweepy.TweepyException as e:
except tweepy.TweepyException as e: logging.error(f"Failed to validate credentials for {author['username']} (attempt {attempt + 1}): {e}")
logging.error(f"Failed to validate credentials for {author['username']} (handle: {credentials['x_username']}): {e}") if attempt < MAX_RETRIES - 1:
if hasattr(e, 'response') and e.response: time.sleep(RETRY_BACKOFF * (2 ** attempt))
logging.error(f"Twitter API response: {e.response.text}") else:
print(f"Failed to validate credentials for {author['username']}: {e}") logging.error(f"Credentials invalid for {author['username']} after {MAX_RETRIES} attempts")
if not valid_credentials: if not valid_credentials:
logging.error("No valid Twitter credentials found for any author") logging.error("No valid Twitter credentials found for any author")
raise ValueError("No valid Twitter credentials found") raise ValueError("No valid Twitter credentials found")
return valid_credentials return valid_credentials
# Run credential validation
validate_twitter_credentials()
RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json"
def load_recent_posts(): def load_recent_posts():
"""Load and deduplicate posts from recent_posts.json."""
posts = [] posts = []
unique_posts = {} unique_posts = {}
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}") logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
@ -131,13 +171,15 @@ def load_recent_posts():
continue continue
logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)") logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)")
except Exception as e: except Exception as e:
logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}") logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True)
return posts
if not posts: if not posts:
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}") logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
return posts return posts
def filter_posts_for_week(posts, start_date, end_date): def filter_posts_for_week(posts, start_date, end_date):
"""Filter posts within the specified week."""
filtered_posts = [] filtered_posts = []
logging.debug(f"Filtering {len(posts)} posts for range {start_date} to {end_date}") logging.debug(f"Filtering {len(posts)} posts for range {start_date} to {end_date}")
@ -155,6 +197,7 @@ def filter_posts_for_week(posts, start_date, end_date):
return filtered_posts return filtered_posts
def generate_intro_tweet(author): def generate_intro_tweet(author):
"""Generate an intro tweet for the weekly thread."""
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None) credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
if not credentials: if not credentials:
logging.error(f"No X credentials found for {author['username']}") logging.error(f"No X credentials found for {author['username']}")
@ -170,118 +213,221 @@ def generate_intro_tweet(author):
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)." f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
) )
try: for attempt in range(MAX_RETRIES):
response = client.chat.completions.create( try:
model=SUMMARY_MODEL, response = client.chat.completions.create(
messages=[ model=SUMMARY_MODEL,
{"role": "system", "content": "You are a social media expert crafting engaging tweets."}, messages=[
{"role": "user", "content": prompt} {"role": "system", "content": "You are a social media expert crafting engaging tweets."},
], {"role": "user", "content": prompt}
max_tokens=100, ],
temperature=0.7 max_tokens=100,
) temperature=0.7
tweet = response.choices[0].message.content.strip() )
if len(tweet) > 280: tweet = response.choices[0].message.content.strip()
tweet = tweet[:277] + "..." if len(tweet) > 280:
logging.debug(f"Generated intro tweet: {tweet}") tweet = tweet[:277] + "..."
return tweet logging.debug(f"Generated intro tweet: {tweet}")
except Exception as e: return tweet
logging.error(f"Failed to generate intro tweet for {author['username']}: {e}") except Exception as e:
fallback = ( logging.warning(f"Failed to generate intro tweet for {author['username']} (attempt {attempt + 1}): {e}")
f"This weeks top 10 foodie finds by {author_handle} Check out the best on InsiderFoodie.com " if attempt < MAX_RETRIES - 1:
f"Follow {author_handle} for more and like this thread to stay in the loop Visit us at https://insiderfoodie.com" time.sleep(RETRY_BACKOFF * (2 ** attempt))
) else:
logging.info(f"Using fallback intro tweet: {fallback}") logging.error(f"Failed to generate intro tweet after {MAX_RETRIES} attempts")
return fallback fallback = (
f"This week's top 10 foodie finds by {author_handle}! Check out the best on InsiderFoodie.com. "
f"Follow {author_handle} for more and like this thread to stay in the loop! Visit us at https://insiderfoodie.com"
)
logging.info(f"Using fallback intro tweet: {fallback}")
return fallback
def post_weekly_thread(): def generate_final_cta(author):
logging.info("Entering post_weekly_thread") """Generate a final CTA tweet for the weekly thread using GPT."""
print("Entering post_weekly_thread") credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
if not credentials:
today = datetime.now(timezone.utc) logging.error(f"No X credentials found for {author['username']}")
days_to_monday = today.weekday() return None
start_date = (today - timedelta(days=days_to_monday + 7)).replace(hour=0, minute=0, second=0, microsecond=0) author_handle = credentials["x_username"]
end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59) logging.debug(f"Generating final CTA tweet for {author_handle}")
logging.info(f"Fetching posts from {start_date} to {end_date}")
print(f"Fetching posts from {start_date} to {end_date}")
all_posts = load_recent_posts()
print(f"Loaded {len(all_posts)} posts from recent_posts.json")
logging.info(f"Loaded {len(all_posts)} posts from recent_posts.json")
if not all_posts:
logging.warning("No posts loaded, exiting post_weekly_thread")
print("No posts loaded, exiting post_weekly_thread")
return
weekly_posts = filter_posts_for_week(all_posts, start_date, end_date)
print(f"Filtered to {len(weekly_posts)} posts for the week")
logging.info(f"Filtered to {len(weekly_posts)} posts for the week")
if not weekly_posts:
logging.warning("No posts found within the week range, exiting post_weekly_thread")
print("No posts found within the week range, exiting post_weekly_thread")
return
posts_by_author = {} prompt = (
for post in weekly_posts: f"Generate a concise tweet (under 280 characters) for {author_handle}. "
author = post["author_username"] f"Conclude a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
if author not in posts_by_author: f"Make it engaging, value-driven, and urgent, in the style of Neil Patel. "
posts_by_author[author] = [] f"Include a call to action to visit InsiderFoodie.com and follow {author_handle}. "
posts_by_author[author].append(post) f"Mention that the top 10 foodie trends are shared every Monday. "
logging.debug(f"Grouped posts by author: {list(posts_by_author.keys())}") f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. "
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
)
for author in AUTHORS: for attempt in range(MAX_RETRIES):
author_posts = posts_by_author.get(author["username"], []) try:
logging.info(f"Processing author {author['username']} with {len(author_posts)} posts") response = client.chat.completions.create(
print(f"Processing author {author['username']} with {len(author_posts)} posts") model=SUMMARY_MODEL,
messages=[
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
{"role": "user", "content": prompt}
],
max_tokens=100,
temperature=0.7
)
tweet = response.choices[0].message.content.strip()
if len(tweet) > 280:
tweet = tweet[:277] + "..."
logging.debug(f"Generated final CTA tweet: {tweet}")
return tweet
except Exception as e:
logging.warning(f"Failed to generate final CTA tweet for {author['username']} (attempt {attempt + 1}): {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_BACKOFF * (2 ** attempt))
else:
logging.error(f"Failed to generate final CTA tweet after {MAX_RETRIES} attempts")
fallback = (
f"Want more foodie insights like these? Check out insiderfoodie.com and follow {author_handle} "
f"for the world’s top 10 foodie trends every Monday. Don’t miss out!"
)
logging.info(f"Using fallback final CTA tweet: {fallback}")
return fallback
def post_weekly_thread():
"""Post weekly threads for each author."""
try:
logging.info("Starting foodie_weekly_thread.py")
print("Starting foodie_weekly_thread.py")
if not author_posts: valid_credentials = validate_twitter_credentials()
logging.info(f"No posts found for {author['username']} this week") if not valid_credentials:
print(f"No posts found for {author['username']} this week") logging.error("No valid Twitter credentials found, exiting")
continue return
author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True) today = datetime.now(timezone.utc)
top_posts = author_posts[:10] days_to_monday = today.weekday()
logging.info(f"Selected {len(top_posts)} top posts for {author['username']}") start_date = (today - timedelta(days=days_to_monday + 7)).replace(hour=0, minute=0, second=0, microsecond=0)
print(f"Selected {len(top_posts)} top posts for {author['username']}") end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59)
intro_tweet = generate_intro_tweet(author) logging.info(f"Fetching posts from {start_date} to {end_date}")
if not intro_tweet: print(f"Fetching posts from {start_date} to {end_date}")
logging.error(f"Failed to generate intro tweet for {author['username']}, skipping")
continue
logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}")
print(f"Posting intro tweet for {author['username']}: {intro_tweet}")
intro_response = post_tweet(author, intro_tweet) all_posts = load_recent_posts()
if not intro_response: logging.info(f"Loaded {len(all_posts)} posts from recent_posts.json")
logging.error(f"Failed to post intro tweet for {author['username']}, skipping thread") print(f"Loaded {len(all_posts)} posts from recent_posts.json")
print(f"Failed to post intro tweet for {author['username']}")
continue
intro_tweet_id = intro_response.get("id") if not all_posts:
logging.debug(f"Intro tweet posted with ID {intro_tweet_id}") logging.warning("No posts loaded, exiting post_weekly_thread")
print("No posts loaded, exiting post_weekly_thread")
return
for i, post in enumerate(top_posts, 1): weekly_posts = filter_posts_for_week(all_posts, start_date, end_date)
post_tweet_content = f"{i}. {post['title']} Link: {post['url']}" logging.info(f"Filtered to {len(weekly_posts)} posts for the week")
logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}") print(f"Filtered to {len(weekly_posts)} posts for the week")
print(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
reply_response = post_tweet(author, post_tweet_content, reply_to_id=intro_tweet_id)
if not reply_response:
logging.error(f"Failed to post thread reply {i} for {author['username']}")
else:
logging.debug(f"Thread reply {i} posted with ID {reply_response.get('id')}")
logging.info(f"Successfully posted weekly thread for {author['username']}") if not weekly_posts:
print(f"Successfully posted weekly thread for {author['username']}") logging.warning("No posts found within the week range, exiting post_weekly_thread")
print("No posts found within the week range, exiting post_weekly_thread")
return
posts_by_author = {}
for post in weekly_posts:
author = post["author_username"]
if author not in posts_by_author:
posts_by_author[author] = []
posts_by_author[author].append(post)
logging.debug(f"Grouped posts by author: {list(posts_by_author.keys())}")
for author in AUTHORS:
try:
author_posts = posts_by_author.get(author["username"], [])
logging.info(f"Processing author {author['username']} with {len(author_posts)} posts")
print(f"Processing author {author['username']} with {len(author_posts)} posts")
if not author_posts:
logging.info(f"No posts found for {author['username']} this week")
print(f"No posts found for {author['username']} this week")
continue
author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
top_posts = author_posts[:10]
logging.info(f"Selected {len(top_posts)} top posts for {author['username']}")
print(f"Selected {len(top_posts)} top posts for {author['username']}")
intro_tweet = generate_intro_tweet(author)
if not intro_tweet:
logging.error(f"Failed to generate intro tweet for {author['username']}, skipping")
continue
logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}")
print(f"Posting intro tweet for {author['username']}: {intro_tweet}")
intro_response = post_tweet(author, intro_tweet)
if not intro_response:
logging.error(f"Failed to post intro tweet for {author['username']}, skipping thread")
print(f"Failed to post intro tweet for {author['username']}")
continue
intro_tweet_id = intro_response.get("id")
last_tweet_id = intro_tweet_id
logging.debug(f"Intro tweet posted with ID {intro_tweet_id}")
for i, post in enumerate(top_posts, 1):
try:
post_tweet_content = f"{i}. {post['title']} Link: {post['url']}"
logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
print(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
reply_response = post_tweet(author, post_tweet_content, reply_to_id=last_tweet_id)
if not reply_response:
logging.error(f"Failed to post thread reply {i} for {author['username']}")
else:
last_tweet_id = reply_response.get("id")
logging.debug(f"Thread reply {i} posted with ID {last_tweet_id}")
except Exception as e:
logging.error(f"Error posting thread reply {i} for {author['username']}: {e}", exc_info=True)
continue
# Post final CTA tweet
if last_tweet_id and top_posts: # Ensure there's a valid thread to reply to
try:
final_cta = generate_final_cta(author)
if not final_cta:
logging.error(f"Failed to generate final CTA tweet for {author['username']}, skipping")
continue
logging.info(f"Posting final CTA tweet for {author['username']}: {final_cta}")
print(f"Posting final CTA tweet for {author['username']}: {final_cta}")
cta_response = post_tweet(author, final_cta, reply_to_id=last_tweet_id)
if not cta_response:
logging.error(f"Failed to post final CTA tweet for {author['username']}")
else:
logging.debug(f"Final CTA tweet posted with ID {cta_response.get('id')}")
except Exception as e:
logging.error(f"Error posting final CTA tweet for {author['username']}: {e}", exc_info=True)
logging.info(f"Successfully posted weekly thread for {author['username']}")
print(f"Successfully posted weekly thread for {author['username']}")
except Exception as e:
logging.error(f"Error processing author {author['username']}: {e}", exc_info=True)
continue
logging.info("Completed foodie_weekly_thread.py")
print("Completed foodie_weekly_thread.py")
except Exception as e:
logging.error(f"Unexpected error in post_weekly_thread: {e}", exc_info=True)
print(f"Error in post_weekly_thread: {e}")
if __name__ == "__main__": def main():
print("Starting foodie_weekly_thread.py") """Main function to run the script."""
logging.info("Starting foodie_weekly_thread.py") lock_fd = None
try: try:
lock_fd = acquire_lock()
setup_logging()
post_weekly_thread() post_weekly_thread()
except Exception as e: except Exception as e:
logging.error(f"Unexpected error in post_weekly_thread: {e}", exc_info=True) logging.error(f"Fatal error in main: {e}", exc_info=True)
print("Completed foodie_weekly_thread.py") print(f"Fatal error: {e}")
logging.info("Completed foodie_weekly_thread.py") sys.exit(1)
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
if __name__ == "__main__":
main()

@ -3,7 +3,9 @@
# Directory to monitor # Directory to monitor
BASE_DIR="/home/shane/foodie_automator" BASE_DIR="/home/shane/foodie_automator"
CHECKSUM_FILE="$BASE_DIR/.file_checksum" CHECKSUM_FILE="$BASE_DIR/.file_checksum"
LOG_FILE="$BASE_DIR/manage_scripts.log" LOG_FILE="$BASE_DIR/logs/manage_scripts.log"
VENV_PYTHON="$BASE_DIR/venv/bin/python"
LOCK_DIR="$BASE_DIR/locks"
# Log function # Log function
log() { log() {
@ -13,37 +15,105 @@ log() {
# Calculate checksum of files (excluding logs, JSON files, and venv) # Calculate checksum of files (excluding logs, JSON files, and venv)
calculate_checksum() { calculate_checksum() {
find "$BASE_DIR" -type f \ find "$BASE_DIR" -type f \
-not -path "$BASE_DIR/*.log" \ -not -path "$BASE_DIR/logs/*" \
-not -path "$BASE_DIR/*.json" \ -not -path "$BASE_DIR/*.json" \
-not -path "$BASE_DIR/.file_checksum" \ -not -path "$BASE_DIR/.file_checksum" \
-not -path "$BASE_DIR/venv/*" \ -not -path "$BASE_DIR/venv/*" \
-not -path "$BASE_DIR/locks/*" \
-exec sha256sum {} \; | sort | sha256sum | awk '{print $1}' -exec sha256sum {} \; | sort | sha256sum | awk '{print $1}'
} }
# Check if scripts are running # Check if a script is running (using lock file)
check_running() { check_running() {
pgrep -f "python3.*foodie_automator" > /dev/null local script_name="$1"
local lock_file="$LOCK_DIR/${script_name}.lock"
if [ -f "$lock_file" ]; then
local pid=$(cat "$lock_file")
if ps -p "$pid" > /dev/null; then
log "$script_name is already running (PID: $pid)"
return 0
else
log "Stale lock file found for $script_name, removing"
rm -f "$lock_file"
fi
fi
return 1
}
# Create lock file
create_lock() {
local script_name="$1"
local lock_file="$LOCK_DIR/${script_name}.lock"
mkdir -p "$LOCK_DIR"
echo $$ > "$lock_file"
log "Created lock file for $script_name (PID: $$)"
}
# Remove lock file
remove_lock() {
local script_name="$1"
local lock_file="$LOCK_DIR/${script_name}.lock"
rm -f "$lock_file"
log "Removed lock file for $script_name"
} }
# Stop scripts # Stop scripts
stop_scripts() { stop_scripts() {
log "Stopping scripts..." log "Stopping scripts..."
pkill -TERM -f "python3.*foodie_automator" || true for script in foodie_automator_*.py; do
if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then
local script_name="${script%.py}"
pkill -TERM -f "$VENV_PYTHON.*$script_name" || true
fi
done
sleep 10 sleep 10
pkill -9 -f "python3.*foodie_automator" || true for script in foodie_automator_*.py; do
if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then
local script_name="${script%.py}"
pkill -9 -f "$VENV_PYTHON.*$script_name" || true
remove_lock "$script_name"
fi
done
log "Scripts stopped." log "Scripts stopped."
} }
# Start scripts # Start scripts
start_scripts() { start_scripts() {
log "Starting scripts..." log "Starting scripts..."
cd "$BASE_DIR" cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
source venv/bin/activate
# Find all foodie_automator_*.py scripts and start them # Source virtual environment
if [ -f "$BASE_DIR/venv/bin/activate" ]; then
source "$BASE_DIR/venv/bin/activate"
else
log "Error: Virtual environment not found at $BASE_DIR/venv"
exit 1
fi
# Load .env variables
if [ -f "$BASE_DIR/.env" ]; then
export $(grep -v '^#' "$BASE_DIR/.env" | xargs)
log ".env variables loaded"
else
log "Error: .env file not found at $BASE_DIR/.env"
exit 1
fi
# Find and start all foodie_automator_*.py scripts (excluding weekly/engagement)
for script in foodie_automator_*.py; do for script in foodie_automator_*.py; do
if [ -f "$script" ]; then if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then
log "Starting $script..." local script_name="${script%.py}"
nohup python3 "$script" >> "${script%.py}.log" 2>&1 & if ! check_running "$script_name"; then
log "Starting $script..."
create_lock "$script_name"
nohup "$VENV_PYTHON" "$script" >> "$BASE_DIR/logs/${script_name}.log" 2>&1 &
if [ $? -eq 0 ]; then
log "$script started successfully"
else
log "Failed to start $script"
remove_lock "$script_name"
fi
fi
fi fi
done done
log "All scripts started." log "All scripts started."
@ -52,14 +122,34 @@ start_scripts() {
# Update dependencies # Update dependencies
update_dependencies() { update_dependencies() {
log "Updating dependencies..." log "Updating dependencies..."
cd "$BASE_DIR" cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
# Create venv if it doesn't exist # Create venv if it doesn't exist
if [ ! -d "venv" ]; then if [ ! -d "venv" ]; then
python3 -m venv venv python3 -m venv venv
log "Created new virtual environment"
fi
# Source virtual environment
if [ -f "$BASE_DIR/venv/bin/activate" ]; then
source "$BASE_DIR/venv/bin/activate"
else
log "Error: Virtual environment not found at $BASE_DIR/venv"
exit 1
fi
# Update pip and install requirements
"$VENV_PYTHON" -m pip install --upgrade pip
if [ -f "requirements.txt" ]; then
"$VENV_PYTHON" -m pip install -r requirements.txt || {
log "Failed to install requirements.txt, attempting fallback dependencies"
"$VENV_PYTHON" -m pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager
log "Fallback: Installed core dependencies"
}
else
log "Error: requirements.txt not found, installing core dependencies"
"$VENV_PYTHON" -m pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager
fi fi
source venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt || (pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager && log "Fallback: Installed core dependencies")
log "Dependencies updated." log "Dependencies updated."
} }
@ -77,7 +167,7 @@ if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then
log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM" log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM"
# Stop scripts if running # Stop scripts if running
if check_running; then if pgrep -f "$VENV_PYTHON.*foodie_automator" > /dev/null; then
stop_scripts stop_scripts
fi fi
@ -92,4 +182,6 @@ if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then
log "Checksum updated." log "Checksum updated."
else else
log "No file changes detected." log "No file changes detected."
fi fi
exit 0
Loading…
Cancel
Save