add lock files and update weekly tweet to include last tweet to follow
This commit is contained in:
+259
-175
@@ -29,12 +29,14 @@ from foodie_utils import (
|
|||||||
generate_category_from_summary, post_to_wp, prepare_post_data,
|
generate_category_from_summary, post_to_wp, prepare_post_data,
|
||||||
smart_image_and_filter, insert_link_naturally, get_flickr_image
|
smart_image_and_filter, insert_link_naturally, get_flickr_image
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
|
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
import fcntl
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_google.lock"
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
logging.info("Received termination signal, checking if safe to exit...")
|
logging.info("Received termination signal, checking if safe to exit...")
|
||||||
@@ -47,15 +49,58 @@ def signal_handler(sig, frame):
|
|||||||
signal.signal(signal.SIGTERM, signal_handler)
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
logger = logging.getLogger()
|
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_google.log"
|
||||||
logger.setLevel(logging.INFO)
|
LOG_PRUNE_DAYS = 30
|
||||||
file_handler = logging.FileHandler('/home/shane/foodie_automator/foodie_automator_google.log', mode='a')
|
MAX_RETRIES = 3
|
||||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
RETRY_BACKOFF = 2
|
||||||
logger.addHandler(file_handler)
|
|
||||||
console_handler = logging.StreamHandler()
|
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
||||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
||||||
logger.addHandler(console_handler)
|
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
||||||
logging.info("Logging initialized for foodie_automator_google.py")
|
|
||||||
|
def setup_logging():
|
||||||
|
if os.path.exists(LOG_FILE):
|
||||||
|
with open(LOG_FILE, 'r') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
log_entries = []
|
||||||
|
current_entry = []
|
||||||
|
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if timestamp_pattern.match(line):
|
||||||
|
if current_entry:
|
||||||
|
log_entries.append(''.join(current_entry))
|
||||||
|
current_entry = [line]
|
||||||
|
else:
|
||||||
|
current_entry.append(line)
|
||||||
|
|
||||||
|
if current_entry:
|
||||||
|
log_entries.append(''.join(current_entry))
|
||||||
|
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||||
|
pruned_entries = []
|
||||||
|
for entry in log_entries:
|
||||||
|
try:
|
||||||
|
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||||
|
if timestamp > cutoff:
|
||||||
|
pruned_entries.append(entry)
|
||||||
|
except ValueError:
|
||||||
|
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(LOG_FILE, 'w') as f:
|
||||||
|
f.writelines(pruned_entries)
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
file_handler = logging.FileHandler(LOG_FILE, mode='a')
|
||||||
|
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
logging.info("Logging initialized for foodie_automator_google.py")
|
||||||
|
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
@@ -68,6 +113,18 @@ posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
|||||||
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
||||||
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
||||||
|
|
||||||
|
def acquire_lock():
|
||||||
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
|
try:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
lock_fd.write(str(os.getpid()))
|
||||||
|
lock_fd.flush()
|
||||||
|
return lock_fd
|
||||||
|
except IOError:
|
||||||
|
logging.info("Another instance of foodie_automator_google.py is running")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
def parse_search_volume(volume_text):
|
def parse_search_volume(volume_text):
|
||||||
try:
|
try:
|
||||||
volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '')
|
volume_part = volume_text.split('\n')[0].lower().strip().replace('+', '')
|
||||||
@@ -89,10 +146,11 @@ def scrape_google_trends(geo='US'):
|
|||||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
|
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36")
|
||||||
|
|
||||||
driver = webdriver.Chrome(options=chrome_options)
|
driver = None
|
||||||
try:
|
try:
|
||||||
for attempt in range(3):
|
for attempt in range(MAX_RETRIES):
|
||||||
try:
|
try:
|
||||||
|
driver = webdriver.Chrome(options=chrome_options)
|
||||||
time.sleep(random.uniform(2, 5))
|
time.sleep(random.uniform(2, 5))
|
||||||
url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
|
url = f"https://trends.google.com/trending?geo={geo}&hours=24&sort=search-volume&category=5"
|
||||||
logging.info(f"Navigating to {url} (attempt {attempt + 1})")
|
logging.info(f"Navigating to {url} (attempt {attempt + 1})")
|
||||||
@@ -105,10 +163,13 @@ def scrape_google_trends(geo='US'):
|
|||||||
break
|
break
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
|
logging.warning(f"Timeout on attempt {attempt + 1} for geo={geo}")
|
||||||
if attempt == 2:
|
if attempt == MAX_RETRIES - 1:
|
||||||
logging.error(f"Failed after 3 attempts for geo={geo}")
|
logging.error(f"Failed after {MAX_RETRIES} attempts for geo={geo}")
|
||||||
return []
|
return []
|
||||||
time.sleep(5)
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
|
if driver:
|
||||||
|
driver.quit()
|
||||||
|
continue
|
||||||
|
|
||||||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
@@ -145,157 +206,137 @@ def scrape_google_trends(geo='US'):
|
|||||||
if trends:
|
if trends:
|
||||||
trends.sort(key=lambda x: x["search_volume"], reverse=True)
|
trends.sort(key=lambda x: x["search_volume"], reverse=True)
|
||||||
logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}")
|
logging.info(f"Extracted {len(trends)} trends for geo={geo}: {[t['title'] for t in trends]}")
|
||||||
print(f"Raw trends fetched for geo={geo}: {[t['title'] for t in trends]}")
|
|
||||||
else:
|
else:
|
||||||
logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
|
logging.warning(f"No valid trends found with search volume >= 20K for geo={geo}")
|
||||||
return trends
|
return trends
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Unexpected error in scrape_google_trends: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
finally:
|
finally:
|
||||||
driver.quit()
|
if driver:
|
||||||
logging.info(f"Chrome driver closed for geo={geo}")
|
driver.quit()
|
||||||
|
logging.info(f"Chrome driver closed for geo={geo}")
|
||||||
|
|
||||||
def fetch_duckduckgo_news_context(trend_title, hours=24):
|
def fetch_duckduckgo_news_context(trend_title, hours=24):
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
with DDGS() as ddgs:
|
try:
|
||||||
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
|
with DDGS() as ddgs:
|
||||||
titles = []
|
results = ddgs.news(f"{trend_title} news", timelimit="d", max_results=5)
|
||||||
for r in results:
|
titles = []
|
||||||
try:
|
for r in results:
|
||||||
date_str = r["date"]
|
try:
|
||||||
if '+00:00' in date_str:
|
date_str = r["date"]
|
||||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
if '+00:00' in date_str:
|
||||||
else:
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
else:
|
||||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||||
titles.append(r["title"].lower())
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
except ValueError as e:
|
titles.append(r["title"].lower())
|
||||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
except ValueError as e:
|
||||||
continue
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
continue
|
||||||
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
return context
|
logging.info(f"DuckDuckGo News context for '{trend_title}': {context}")
|
||||||
except Exception as e:
|
return context
|
||||||
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}': {e}")
|
except Exception as e:
|
||||||
return trend_title
|
logging.warning(f"DuckDuckGo News context fetch failed for '{trend_title}' (attempt {attempt + 1}): {e}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
|
continue
|
||||||
|
logging.error(f"Failed to fetch DuckDuckGo News context for '{trend_title}' after {MAX_RETRIES} attempts")
|
||||||
|
return trend_title
|
||||||
|
|
||||||
def curate_from_google_trends(geo_list=['US']):
|
def curate_from_google_trends(geo_list=['US']):
|
||||||
all_trends = []
|
try:
|
||||||
for geo in geo_list:
|
all_trends = []
|
||||||
trends = scrape_google_trends(geo=geo)
|
for geo in geo_list:
|
||||||
if trends:
|
trends = scrape_google_trends(geo=geo)
|
||||||
all_trends.extend(trends)
|
if trends:
|
||||||
|
all_trends.extend(trends)
|
||||||
|
|
||||||
if not all_trends:
|
if not all_trends:
|
||||||
print("No Google Trends data available")
|
logging.info("No Google Trends data available")
|
||||||
logging.info("No Google Trends data available")
|
return None, None, False
|
||||||
return None, None, random.randint(600, 1800)
|
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
max_attempts = 10
|
max_attempts = 10
|
||||||
while attempts < max_attempts and all_trends:
|
while attempts < max_attempts and all_trends:
|
||||||
trend = all_trends.pop(0)
|
trend = all_trends.pop(0)
|
||||||
title = trend["title"]
|
title = trend["title"]
|
||||||
link = trend.get("link", "https://trends.google.com/")
|
link = trend.get("link", "https://trends.google.com/")
|
||||||
summary = trend.get("summary", "")
|
summary = trend.get("summary", "")
|
||||||
source_name = "Google Trends"
|
source_name = "Google Trends"
|
||||||
original_source = f'<a href="{link}">{source_name}</a>'
|
original_source = f'<a href="{link}">{source_name}</a>'
|
||||||
|
|
||||||
if title in posted_titles:
|
if title in posted_titles:
|
||||||
print(f"Skipping already posted trend: {title}")
|
logging.info(f"Skipping already posted trend: {title}")
|
||||||
logging.info(f"Skipping already posted trend: {title}")
|
attempts += 1
|
||||||
attempts += 1
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Trying Google Trend: {title} from {source_name}")
|
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
||||||
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
|
||||||
|
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if skip:
|
if skip:
|
||||||
print(f"Skipping filtered Google Trend: {title}")
|
logging.info(f"Skipping filtered Google Trend: {title}")
|
||||||
logging.info(f"Skipping filtered Google Trend: {title}")
|
attempts += 1
|
||||||
attempts += 1
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
ddg_context = fetch_duckduckgo_news_context(title)
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting(scoring_content)
|
interest_score = is_interesting(scoring_content)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 6:
|
||||||
print(f"Google Trends Interest Too Low: {interest_score}")
|
logging.info(f"Google Trends Interest Too Low: {interest_score}")
|
||||||
logging.info(f"Google Trends Interest Too Low: {interest_score}")
|
attempts += 1
|
||||||
attempts += 1
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
|
||||||
content_to_summarize = scoring_content
|
|
||||||
final_summary = summarize_with_gpt4o(
|
|
||||||
content_to_summarize,
|
|
||||||
source_name,
|
|
||||||
link,
|
|
||||||
interest_score=interest_score,
|
|
||||||
extra_prompt=extra_prompt
|
|
||||||
)
|
|
||||||
if not final_summary:
|
|
||||||
logging.info(f"Summary failed for '{title}'")
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
|
||||||
|
|
||||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
|
||||||
if not post_data:
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
|
||||||
if not image_url:
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
|
||||||
|
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
|
||||||
share_links_template = (
|
|
||||||
f'<p>{share_prompt} '
|
|
||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
|
||||||
)
|
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
|
||||||
|
|
||||||
global is_posting
|
|
||||||
is_posting = True
|
|
||||||
try:
|
|
||||||
post_id, post_url = post_to_wp(
|
|
||||||
post_data=post_data,
|
|
||||||
category=category,
|
|
||||||
link=link,
|
|
||||||
author=author,
|
|
||||||
image_url=image_url,
|
|
||||||
original_source=original_source,
|
|
||||||
image_source=image_source,
|
|
||||||
uploader=uploader,
|
|
||||||
page_url=page_url,
|
|
||||||
interest_score=interest_score,
|
|
||||||
should_post_tweet=True
|
|
||||||
)
|
)
|
||||||
finally:
|
content_to_summarize = scoring_content
|
||||||
is_posting = False
|
final_summary = summarize_with_gpt4o(
|
||||||
|
content_to_summarize,
|
||||||
|
source_name,
|
||||||
|
link,
|
||||||
|
interest_score=interest_score,
|
||||||
|
extra_prompt=extra_prompt
|
||||||
|
)
|
||||||
|
if not final_summary:
|
||||||
|
logging.info(f"Summary failed for '{title}'")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
if post_id:
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
|
||||||
share_text_encoded = quote(share_text)
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||||
post_url_encoded = quote(post_url)
|
if not post_data:
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
attempts += 1
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
continue
|
||||||
|
|
||||||
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
|
if not image_url:
|
||||||
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
|
share_links_template = (
|
||||||
|
f'<p>{share_prompt} '
|
||||||
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
|
)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_id, post_url = post_to_wp(
|
||||||
post_data=post_data,
|
post_data=post_data,
|
||||||
category=category,
|
category=category,
|
||||||
link=link,
|
link=link,
|
||||||
@@ -306,43 +347,86 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
page_url=page_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
post_id=post_id,
|
should_post_tweet=True
|
||||||
should_post_tweet=False
|
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
finally:
|
finally:
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
if post_id:
|
||||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||||
posted_titles.add(title)
|
share_text_encoded = quote(share_text)
|
||||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
post_url_encoded = quote(post_url)
|
||||||
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
|
is_posting = True
|
||||||
|
try:
|
||||||
|
post_to_wp(
|
||||||
|
post_data=post_data,
|
||||||
|
category=category,
|
||||||
|
link=link,
|
||||||
|
author=author,
|
||||||
|
image_url=image_url,
|
||||||
|
original_source=original_source,
|
||||||
|
image_source=image_source,
|
||||||
|
uploader=uploader,
|
||||||
|
page_url=page_url,
|
||||||
|
interest_score=interest_score,
|
||||||
|
post_id=post_id,
|
||||||
|
should_post_tweet=False
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
is_posting = False
|
||||||
|
|
||||||
if image_url:
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||||
used_images.add(image_url)
|
posted_titles.add(title)
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||||
|
|
||||||
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
|
if image_url:
|
||||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
return post_data, category, random.randint(0, 1800)
|
used_images.add(image_url)
|
||||||
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
|
|
||||||
attempts += 1
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Google Trends *****")
|
||||||
logging.info(f"WP posting failed for '{post_data['title']}'")
|
return post_data, category, True
|
||||||
|
|
||||||
print("No interesting Google Trend found after attempts")
|
attempts += 1
|
||||||
logging.info("No interesting Google Trend found after attempts")
|
logging.info(f"WP posting failed for '{post_data['title']}'")
|
||||||
return None, None, random.randint(600, 1800)
|
|
||||||
|
logging.info("No interesting Google Trend found after attempts")
|
||||||
|
return None, None, False
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
|
||||||
|
return None, None, False
|
||||||
|
|
||||||
def run_google_trends_automator():
|
def run_google_trends_automator():
|
||||||
logging.info("***** Google Trends Automator Launched *****")
|
lock_fd = None
|
||||||
geo_list = ['US', 'GB', 'AU']
|
try:
|
||||||
post_data, category, sleep_time = curate_from_google_trends(geo_list=geo_list)
|
lock_fd = acquire_lock()
|
||||||
if sleep_time is None:
|
logging.info("***** Google Trends Automator Launched *****")
|
||||||
sleep_time = random.randint(600, 1800)
|
geo_list = ['US', 'GB', 'AU']
|
||||||
print(f"Sleeping for {sleep_time}s")
|
post_data, category, should_continue = curate_from_google_trends(geo_list=geo_list)
|
||||||
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
|
if not post_data:
|
||||||
time.sleep(sleep_time)
|
logging.info("No postable Google Trend found")
|
||||||
return post_data, category, sleep_time
|
else:
|
||||||
|
logging.info("Completed Google Trends run")
|
||||||
|
return post_data, category, should_continue
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Fatal error in run_google_trends_automator: {e}", exc_info=True)
|
||||||
|
return None, None, False
|
||||||
|
finally:
|
||||||
|
if lock_fd:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||||
|
lock_fd.close()
|
||||||
|
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run_google_trends_automator()
|
setup_logging()
|
||||||
|
post_data, category, should_continue = run_google_trends_automator()
|
||||||
|
logging.info(f"Run completed, should_continue: {should_continue}")
|
||||||
+313
-271
@@ -29,11 +29,13 @@ from foodie_utils import (
|
|||||||
prepare_post_data, select_best_author, smart_image_and_filter,
|
prepare_post_data, select_best_author, smart_image_and_filter,
|
||||||
get_flickr_image
|
get_flickr_image
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
|
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
||||||
|
import fcntl
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_reddit.lock"
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
logging.info("Received termination signal, checking if safe to exit...")
|
logging.info("Received termination signal, checking if safe to exit...")
|
||||||
@@ -46,8 +48,22 @@ def signal_handler(sig, frame):
|
|||||||
signal.signal(signal.SIGTERM, signal_handler)
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log"
|
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_reddit.log"
|
||||||
LOG_PRUNE_DAYS = 30
|
LOG_PRUNE_DAYS = 30
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
RETRY_BACKOFF = 2
|
||||||
|
|
||||||
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
|
||||||
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||||
|
EXPIRATION_HOURS = 24
|
||||||
|
IMAGE_EXPIRATION_DAYS = 7
|
||||||
|
|
||||||
|
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
||||||
|
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
|
||||||
|
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
||||||
|
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
||||||
|
|
||||||
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
if os.path.exists(LOG_FILE):
|
if os.path.exists(LOG_FILE):
|
||||||
@@ -59,7 +75,7 @@ def setup_logging():
|
|||||||
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
|
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if timestamp_pattern.match(line):
|
if(timestamp_pattern.match(line)):
|
||||||
if current_entry:
|
if current_entry:
|
||||||
log_entries.append(''.join(current_entry))
|
log_entries.append(''.join(current_entry))
|
||||||
current_entry = [line]
|
current_entry = [line]
|
||||||
@@ -95,19 +111,17 @@ def setup_logging():
|
|||||||
logging.getLogger().addHandler(console_handler)
|
logging.getLogger().addHandler(console_handler)
|
||||||
logging.info("Logging initialized for foodie_automator_reddit.py")
|
logging.info("Logging initialized for foodie_automator_reddit.py")
|
||||||
|
|
||||||
setup_logging()
|
def acquire_lock():
|
||||||
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
try:
|
||||||
EXPIRATION_HOURS = 24
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
IMAGE_EXPIRATION_DAYS = 7
|
lock_fd.write(str(os.getpid()))
|
||||||
|
lock_fd.flush()
|
||||||
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
return lock_fd
|
||||||
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
|
except IOError:
|
||||||
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
logging.info("Another instance of foodie_automator_reddit.py is running")
|
||||||
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
sys.exit(0)
|
||||||
|
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
||||||
|
|
||||||
def clean_reddit_title(title):
|
def clean_reddit_title(title):
|
||||||
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
|
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
|
||||||
@@ -115,253 +129,246 @@ def clean_reddit_title(title):
|
|||||||
return cleaned_title
|
return cleaned_title
|
||||||
|
|
||||||
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
|
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
content = f"Title: {title}\n\nContent: {summary}"
|
try:
|
||||||
if top_comments:
|
content = f"Title: {title}\n\nContent: {summary}"
|
||||||
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
if top_comments:
|
||||||
|
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model=LIGHT_TASK_MODEL,
|
model=LIGHT_TASK_MODEL,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": (
|
{"role": "system", "content": (
|
||||||
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
|
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
|
||||||
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
|
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
|
||||||
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
|
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
|
||||||
"Consider comments for added context (e.g., specific locations or unique details). "
|
"Consider comments for added context (e.g., specific locations or unique details). "
|
||||||
"Return only a number."
|
"Return only a number"
|
||||||
)},
|
)},
|
||||||
{"role": "user", "content": content}
|
{"role": "user", "content": content}
|
||||||
],
|
],
|
||||||
max_tokens=5
|
max_tokens=5
|
||||||
)
|
)
|
||||||
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
|
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
|
||||||
|
|
||||||
engagement_boost = 0
|
engagement_boost = 0
|
||||||
if upvotes >= 500:
|
if upvotes >= 500:
|
||||||
engagement_boost += 3
|
engagement_boost += 3
|
||||||
elif upvotes >= 100:
|
elif upvotes >= 100:
|
||||||
engagement_boost += 2
|
engagement_boost += 2
|
||||||
elif upvotes >= 50:
|
elif upvotes >= 50:
|
||||||
engagement_boost += 1
|
engagement_boost += 1
|
||||||
|
|
||||||
if comment_count >= 100:
|
if comment_count >= 100:
|
||||||
engagement_boost += 2
|
engagement_boost += 2
|
||||||
elif comment_count >= 20:
|
elif comment_count >= 20:
|
||||||
engagement_boost += 1
|
engagement_boost += 1
|
||||||
|
|
||||||
final_score = min(base_score + engagement_boost, 10)
|
final_score = min(base_score + engagement_boost, 10)
|
||||||
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
|
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
|
||||||
print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})")
|
return final_score
|
||||||
return final_score
|
except Exception as e:
|
||||||
except Exception as e:
|
logging.warning(f"Reddit interestingness scoring failed (attempt {attempt + 1}): {e}")
|
||||||
logging.error(f"Reddit interestingness scoring failed: {e}")
|
if attempt < MAX_RETRIES - 1:
|
||||||
print(f"Reddit Interest Error: {e}")
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
return 0
|
continue
|
||||||
|
logging.error(f"Failed to score Reddit post '{title}' after {MAX_RETRIES} attempts")
|
||||||
|
return 0
|
||||||
|
|
||||||
def get_top_comments(post_url, reddit, limit=3):
|
def get_top_comments(post_url, reddit, limit=3):
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
submission = reddit.submission(url=post_url)
|
try:
|
||||||
submission.comment_sort = 'top'
|
submission = reddit.submission(url=post_url)
|
||||||
submission.comments.replace_more(limit=0)
|
submission.comment_sort = 'top'
|
||||||
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
|
submission.comments.replace_more(limit=0)
|
||||||
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
|
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
|
||||||
return top_comments
|
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
|
||||||
except Exception as e:
|
return top_comments
|
||||||
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
except Exception as e:
|
||||||
return []
|
logging.warning(f"Failed to fetch comments for {post_url} (attempt {attempt + 1}): {e}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
|
continue
|
||||||
|
logging.error(f"Failed to fetch comments for {post_url} after {MAX_RETRIES} attempts")
|
||||||
|
return []
|
||||||
|
|
||||||
def fetch_duckduckgo_news_context(title, hours=24):
|
def fetch_duckduckgo_news_context(title, hours=24):
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
with DDGS() as ddgs:
|
try:
|
||||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
with DDGS() as ddgs:
|
||||||
titles = []
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||||
for r in results:
|
titles = []
|
||||||
try:
|
for r in results:
|
||||||
date_str = r["date"]
|
try:
|
||||||
if '+00:00' in date_str:
|
date_str = r["date"]
|
||||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
if '+00:00' in date_str:
|
||||||
else:
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
else:
|
||||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||||
titles.append(r["title"].lower())
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
except ValueError as e:
|
titles.append(r["title"].lower())
|
||||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
except ValueError as e:
|
||||||
continue
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
continue
|
||||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
return context
|
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||||
except Exception as e:
|
return context
|
||||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
except Exception as e:
|
||||||
return title
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
|
continue
|
||||||
|
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
|
||||||
|
return title
|
||||||
|
|
||||||
def fetch_reddit_posts():
|
def fetch_reddit_posts():
|
||||||
reddit = praw.Reddit(
|
try:
|
||||||
client_id=REDDIT_CLIENT_ID,
|
reddit = praw.Reddit(
|
||||||
client_secret=REDDIT_CLIENT_SECRET,
|
client_id=REDDIT_CLIENT_ID,
|
||||||
user_agent=REDDIT_USER_AGENT
|
client_secret=REDDIT_CLIENT_SECRET,
|
||||||
)
|
user_agent=REDDIT_USER_AGENT
|
||||||
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
|
)
|
||||||
articles = []
|
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
|
||||||
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
|
articles = []
|
||||||
|
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
|
||||||
|
|
||||||
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
|
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
|
||||||
for subreddit_name in feeds:
|
for subreddit_name in feeds:
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
subreddit = reddit.subreddit(subreddit_name)
|
try:
|
||||||
for submission in subreddit.top(time_filter='day', limit=100):
|
subreddit = reddit.subreddit(subreddit_name)
|
||||||
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
|
for submission in subreddit.top(time_filter='day', limit=100):
|
||||||
if pub_date < cutoff_date:
|
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
|
||||||
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
if pub_date < cutoff_date:
|
||||||
|
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
||||||
|
continue
|
||||||
|
cleaned_title = clean_reddit_title(submission.title)
|
||||||
|
articles.append({
|
||||||
|
"title": cleaned_title,
|
||||||
|
"raw_title": submission.title,
|
||||||
|
"link": f"https://www.reddit.com{submission.permalink}",
|
||||||
|
"summary": submission.selftext,
|
||||||
|
"feed_title": get_clean_source_name(subreddit_name),
|
||||||
|
"pub_date": pub_date,
|
||||||
|
"upvotes": submission.score,
|
||||||
|
"comment_count": submission.num_comments
|
||||||
|
})
|
||||||
|
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name} (attempt {attempt + 1}): {e}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
continue
|
continue
|
||||||
cleaned_title = clean_reddit_title(submission.title)
|
logging.info(f"Total Reddit posts fetched: {len(articles)}")
|
||||||
articles.append({
|
return articles
|
||||||
"title": cleaned_title,
|
except Exception as e:
|
||||||
"raw_title": submission.title,
|
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
|
||||||
"link": f"https://www.reddit.com{submission.permalink}",
|
return []
|
||||||
"summary": submission.selftext,
|
|
||||||
"feed_title": get_clean_source_name(subreddit_name),
|
|
||||||
"pub_date": pub_date,
|
|
||||||
"upvotes": submission.score,
|
|
||||||
"comment_count": submission.num_comments
|
|
||||||
})
|
|
||||||
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
|
|
||||||
|
|
||||||
logging.info(f"Total Reddit posts fetched: {len(articles)}")
|
|
||||||
return articles
|
|
||||||
|
|
||||||
def curate_from_reddit():
|
def curate_from_reddit():
|
||||||
articles = fetch_reddit_posts()
|
try:
|
||||||
if not articles:
|
articles = fetch_reddit_posts()
|
||||||
print("No Reddit posts available")
|
if not articles:
|
||||||
logging.info("No Reddit posts available")
|
logging.info("No Reddit posts available")
|
||||||
return None, None, random.randint(600, 1800)
|
return None, None, False
|
||||||
|
|
||||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||||
|
|
||||||
reddit = praw.Reddit(
|
reddit = praw.Reddit(
|
||||||
client_id=REDDIT_CLIENT_ID,
|
client_id=REDDIT_CLIENT_ID,
|
||||||
client_secret=REDDIT_CLIENT_SECRET,
|
client_secret=REDDIT_CLIENT_SECRET,
|
||||||
user_agent=REDDIT_USER_AGENT
|
user_agent=REDDIT_USER_AGENT
|
||||||
)
|
|
||||||
|
|
||||||
attempts = 0
|
|
||||||
max_attempts = 10
|
|
||||||
while attempts < max_attempts and articles:
|
|
||||||
article = articles.pop(0)
|
|
||||||
title = article["title"]
|
|
||||||
raw_title = article["raw_title"]
|
|
||||||
link = article["link"]
|
|
||||||
summary = article["summary"]
|
|
||||||
source_name = "Reddit"
|
|
||||||
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
|
|
||||||
|
|
||||||
if raw_title in posted_titles:
|
|
||||||
print(f"Skipping already posted post: {raw_title}")
|
|
||||||
logging.info(f"Skipping already posted post: {raw_title}")
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Trying Reddit Post: {title} from {source_name}")
|
|
||||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
|
||||||
|
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
|
||||||
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
|
|
||||||
print(f"Skipping filtered Reddit post: {title}")
|
|
||||||
logging.info(f"Skipping filtered Reddit post: {title}")
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
top_comments = get_top_comments(link, reddit, limit=3)
|
|
||||||
ddg_context = fetch_duckduckgo_news_context(title)
|
|
||||||
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
|
||||||
interest_score = is_interesting_reddit(
|
|
||||||
title,
|
|
||||||
summary,
|
|
||||||
article["upvotes"],
|
|
||||||
article["comment_count"],
|
|
||||||
top_comments
|
|
||||||
)
|
|
||||||
logging.info(f"Interest Score: {interest_score} for '{title}'")
|
|
||||||
if interest_score < 6:
|
|
||||||
print(f"Reddit Interest Too Low: {interest_score}")
|
|
||||||
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
|
||||||
extra_prompt = (
|
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
|
||||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
|
||||||
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
|
||||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
|
||||||
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
|
||||||
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
|
||||||
f"Do not include emojis in the summary."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
final_summary = summarize_with_gpt4o(
|
attempts = 0
|
||||||
content_to_summarize,
|
max_attempts = 10
|
||||||
source_name,
|
while attempts < max_attempts and articles:
|
||||||
link,
|
article = articles.pop(0)
|
||||||
interest_score=interest_score,
|
title = article["title"]
|
||||||
extra_prompt=extra_prompt
|
raw_title = article["raw_title"]
|
||||||
)
|
link = article["link"]
|
||||||
if not final_summary:
|
summary = article["summary"]
|
||||||
logging.info(f"Summary failed for '{title}'")
|
source_name = "Reddit"
|
||||||
attempts += 1
|
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
|
||||||
continue
|
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
if raw_title in posted_titles:
|
||||||
|
logging.info(f"Skipping already posted post: {raw_title}")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||||
if not post_data:
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if not image_url:
|
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
logging.info(f"Skipping filtered Reddit post: {title}")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
top_comments = get_top_comments(link, reddit, limit=3)
|
||||||
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
||||||
share_links_template = (
|
interest_score = is_interesting_reddit(
|
||||||
f'<p>{share_prompt} '
|
title,
|
||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
summary,
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
article["upvotes"],
|
||||||
)
|
article["comment_count"],
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
top_comments
|
||||||
|
|
||||||
global is_posting
|
|
||||||
is_posting = True
|
|
||||||
try:
|
|
||||||
post_id, post_url = post_to_wp(
|
|
||||||
post_data=post_data,
|
|
||||||
category=category,
|
|
||||||
link=link,
|
|
||||||
author=author,
|
|
||||||
image_url=image_url,
|
|
||||||
original_source=original_source,
|
|
||||||
image_source=image_source,
|
|
||||||
uploader=uploader,
|
|
||||||
page_url=page_url,
|
|
||||||
interest_score=interest_score,
|
|
||||||
should_post_tweet=True
|
|
||||||
)
|
)
|
||||||
finally:
|
logging.info(f"Interest Score: {interest_score} for '{title}'")
|
||||||
is_posting = False
|
if interest_score < 6:
|
||||||
|
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
if post_id:
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
extra_prompt = (
|
||||||
share_text_encoded = quote(share_text)
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
post_url_encoded = quote(post_url)
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
|
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
||||||
|
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
||||||
|
f"Do not include emojis in the summary."
|
||||||
|
)
|
||||||
|
|
||||||
|
final_summary = summarize_with_gpt4o(
|
||||||
|
content_to_summarize,
|
||||||
|
source_name,
|
||||||
|
link,
|
||||||
|
interest_score=interest_score,
|
||||||
|
extra_prompt=extra_prompt
|
||||||
|
)
|
||||||
|
if not final_summary:
|
||||||
|
logging.info(f"Summary failed for '{title}'")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
|
|
||||||
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||||
|
if not post_data:
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
|
if not image_url:
|
||||||
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
|
share_links_template = (
|
||||||
|
f'<p>{share_prompt} '
|
||||||
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
|
)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_id, post_url = post_to_wp(
|
||||||
post_data=post_data,
|
post_data=post_data,
|
||||||
category=category,
|
category=category,
|
||||||
link=link,
|
link=link,
|
||||||
@@ -372,49 +379,84 @@ def curate_from_reddit():
|
|||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
page_url=page_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
post_id=post_id,
|
should_post_tweet=True
|
||||||
should_post_tweet=False
|
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
finally:
|
finally:
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
if post_id:
|
||||||
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
|
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||||
posted_titles.add(raw_title)
|
share_text_encoded = quote(share_text)
|
||||||
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
|
post_url_encoded = quote(post_url)
|
||||||
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
|
is_posting = True
|
||||||
|
try:
|
||||||
|
post_to_wp(
|
||||||
|
post_data=post_data,
|
||||||
|
category=category,
|
||||||
|
link=link,
|
||||||
|
author=author,
|
||||||
|
image_url=image_url,
|
||||||
|
original_source=original_source,
|
||||||
|
image_source=image_source,
|
||||||
|
uploader=uploader,
|
||||||
|
page_url=page_url,
|
||||||
|
interest_score=interest_score,
|
||||||
|
post_id=post_id,
|
||||||
|
should_post_tweet=False
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
is_posting = False
|
||||||
|
|
||||||
if image_url:
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
|
||||||
used_images.add(image_url)
|
posted_titles.add(raw_title)
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
|
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE}")
|
||||||
|
|
||||||
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
|
if image_url:
|
||||||
print(f"Actual post URL: {post_url}")
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
|
used_images.add(image_url)
|
||||||
logging.info(f"Actual post URL: {post_url}")
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
return post_data, category, random.randint(0, 1800)
|
|
||||||
|
|
||||||
attempts += 1
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
|
||||||
logging.info(f"WP posting failed for '{post_data['title']}'")
|
return post_data, category, True
|
||||||
|
attempts += 1
|
||||||
|
logging.info(f"WP posting failed for '{post_data['title']}'")
|
||||||
|
|
||||||
print("No interesting Reddit post found after attempts")
|
logging.info("No interesting Reddit post found after attempts")
|
||||||
logging.info("No interesting Reddit post found after attempts")
|
return None, None, False
|
||||||
return None, None, random.randint(600, 1800)
|
except Exception as e:
|
||||||
|
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
|
||||||
|
return None, None, False
|
||||||
|
|
||||||
def run_reddit_automator():
|
def run_reddit_automator():
|
||||||
print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****")
|
lock_fd = None
|
||||||
logging.info("***** Reddit Automator Launched *****")
|
try:
|
||||||
|
lock_fd = acquire_lock()
|
||||||
post_data, category, sleep_time = curate_from_reddit()
|
logging.info("***** Reddit Automator Launched *****")
|
||||||
if not post_data:
|
post_data, category, should_continue = curate_from_reddit()
|
||||||
print(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
|
if not post_data:
|
||||||
logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
|
logging.info("No postable Reddit article found")
|
||||||
else:
|
else:
|
||||||
print(f"Completed Reddit run with sleep time: {sleep_time} seconds")
|
logging.info("Completed Reddit run")
|
||||||
logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds")
|
return post_data, category, should_continue
|
||||||
print(f"Sleeping for {sleep_time}s")
|
except Exception as e:
|
||||||
time.sleep(sleep_time)
|
logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True)
|
||||||
return post_data, category, sleep_time
|
return None, None, False
|
||||||
|
finally:
|
||||||
|
if lock_fd:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||||
|
lock_fd.close()
|
||||||
|
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run_reddit_automator()
|
setup_logging()
|
||||||
|
post_data, category, should_continue = run_reddit_automator()
|
||||||
|
logging.info(f"Run completed, should_continue: {should_continue}")
|
||||||
+230
-197
@@ -31,10 +31,12 @@ from foodie_utils import (
|
|||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
import fcntl
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_rss.lock"
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
logging.info("Received termination signal, checking if safe to exit...")
|
logging.info("Received termination signal, checking if safe to exit...")
|
||||||
@@ -47,10 +49,11 @@ def signal_handler(sig, frame):
|
|||||||
signal.signal(signal.SIGTERM, signal_handler)
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
|
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_rss.log"
|
||||||
LOG_PRUNE_DAYS = 30
|
LOG_PRUNE_DAYS = 30
|
||||||
FEED_TIMEOUT = 15
|
FEED_TIMEOUT = 15
|
||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
|
RETRY_BACKOFF = 2
|
||||||
|
|
||||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
|
||||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||||
@@ -96,21 +99,27 @@ def setup_logging():
|
|||||||
logging.getLogger("requests").setLevel(logging.WARNING)
|
logging.getLogger("requests").setLevel(logging.WARNING)
|
||||||
logging.info("Logging initialized for foodie_automator_rss.py")
|
logging.info("Logging initialized for foodie_automator_rss.py")
|
||||||
|
|
||||||
setup_logging()
|
def acquire_lock():
|
||||||
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
|
try:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
lock_fd.write(str(os.getpid()))
|
||||||
|
lock_fd.flush()
|
||||||
|
return lock_fd
|
||||||
|
except IOError:
|
||||||
|
logging.info("Another instance of foodie_automator_rss.py is running")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
def create_http_session() -> requests.Session:
|
def create_http_session() -> requests.Session:
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
retry_strategy = Retry(
|
retry_strategy = Retry(
|
||||||
total=MAX_RETRIES,
|
total=MAX_RETRIES,
|
||||||
backoff_factor=2,
|
backoff_factor=RETRY_BACKOFF,
|
||||||
status_forcelist=[403, 429, 500, 502, 503, 504],
|
status_forcelist=[403, 429, 500, 502, 503, 504],
|
||||||
allowed_methods=["GET", "POST"]
|
allowed_methods=["GET", "POST"]
|
||||||
)
|
)
|
||||||
adapter = HTTPAdapter(
|
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||||
max_retries=retry_strategy,
|
|
||||||
pool_connections=10,
|
|
||||||
pool_maxsize=10
|
|
||||||
)
|
|
||||||
session.mount("http://", adapter)
|
session.mount("http://", adapter)
|
||||||
session.mount("https://", adapter)
|
session.mount("https://", adapter)
|
||||||
session.headers.update({
|
session.headers.update({
|
||||||
@@ -140,189 +149,169 @@ def fetch_rss_feeds():
|
|||||||
|
|
||||||
logging.info(f"Processing feeds: {RSS_FEEDS}")
|
logging.info(f"Processing feeds: {RSS_FEEDS}")
|
||||||
for feed_url in RSS_FEEDS:
|
for feed_url in RSS_FEEDS:
|
||||||
logging.info(f"Processing feed: {feed_url}")
|
for attempt in range(MAX_RETRIES):
|
||||||
try:
|
logging.info(f"Processing feed: {feed_url} (attempt {attempt + 1})")
|
||||||
response = session.get(feed_url, timeout=FEED_TIMEOUT)
|
try:
|
||||||
response.raise_for_status()
|
response = session.get(feed_url, timeout=FEED_TIMEOUT)
|
||||||
soup = BeautifulSoup(response.content, 'xml')
|
response.raise_for_status()
|
||||||
items = soup.find_all('item')
|
soup = BeautifulSoup(response.content, 'xml')
|
||||||
|
items = soup.find_all('item')
|
||||||
|
|
||||||
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
|
feed_title = RSS_FEED_NAMES.get(feed_url, (get_clean_source_name(feed_url), feed_url))
|
||||||
for item in items:
|
for item in items:
|
||||||
try:
|
try:
|
||||||
title = item.find('title').text.strip() if item.find('title') else "Untitled"
|
title = item.find('title').text.strip() if item.find('title') else "Untitled"
|
||||||
link = item.find('link').text.strip() if item.find('link') else ""
|
link = item.find('link').text.strip() if item.find('link') else ""
|
||||||
pub_date = item.find('pubDate')
|
pub_date = item.find('pubDate')
|
||||||
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
|
pub_date = parse_date(pub_date.text) if pub_date else datetime.now(timezone.utc)
|
||||||
|
|
||||||
if pub_date < cutoff_date:
|
if pub_date < cutoff_date:
|
||||||
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
|
logging.info(f"Skipping old article: {title} (Published: {pub_date})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
description = item.find('description')
|
||||||
|
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
|
||||||
|
content = item.find('content:encoded')
|
||||||
|
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
|
||||||
|
|
||||||
|
articles.append({
|
||||||
|
"title": title,
|
||||||
|
"link": link,
|
||||||
|
"summary": summary,
|
||||||
|
"content": content_text,
|
||||||
|
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
|
||||||
|
"pub_date": pub_date
|
||||||
|
})
|
||||||
|
logging.debug(f"Processed article: {title}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Error processing entry in {feed_url}: {e}")
|
||||||
continue
|
continue
|
||||||
|
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
|
||||||
description = item.find('description')
|
break
|
||||||
summary = BeautifulSoup(description.text, 'html.parser').get_text().strip() if description else ""
|
except Exception as e:
|
||||||
content = item.find('content:encoded')
|
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
|
||||||
content_text = BeautifulSoup(content.text, 'html.parser').get_text().strip() if content else summary
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
articles.append({
|
continue
|
||||||
"title": title,
|
|
||||||
"link": link,
|
|
||||||
"summary": summary,
|
|
||||||
"content": content_text,
|
|
||||||
"feed_title": feed_title[0] if isinstance(feed_title, tuple) else feed_title,
|
|
||||||
"pub_date": pub_date
|
|
||||||
})
|
|
||||||
logging.debug(f"Processed article: {title}")
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Error processing entry in {feed_url}: {e}")
|
|
||||||
continue
|
|
||||||
logging.info(f"Filtered to {len(articles)} articles from {feed_url}")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Failed to fetch RSS feed {feed_url}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
articles.sort(key=lambda x: x["pub_date"], reverse=True)
|
articles.sort(key=lambda x: x["pub_date"], reverse=True)
|
||||||
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
logging.info(f"Total RSS articles fetched: {len(articles)}")
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def fetch_duckduckgo_news_context(title, hours=24):
|
def fetch_duckduckgo_news_context(title, hours=24):
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
with DDGS() as ddgs:
|
try:
|
||||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
with DDGS() as ddgs:
|
||||||
titles = []
|
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||||
for r in results:
|
titles = []
|
||||||
try:
|
for r in results:
|
||||||
date_str = r["date"]
|
try:
|
||||||
if '+00:00' in date_str:
|
date_str = r["date"]
|
||||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
if '+00:00' in date_str:
|
||||||
else:
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
else:
|
||||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S%Z").replace(tzinfo=timezone.utc)
|
||||||
titles.append(r["title"].lower())
|
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||||
except ValueError as e:
|
titles.append(r["title"].lower())
|
||||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
except ValueError as e:
|
||||||
continue
|
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
continue
|
||||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||||
return context
|
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||||
except Exception as e:
|
return context
|
||||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
except Exception as e:
|
||||||
return title
|
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
|
continue
|
||||||
|
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
|
||||||
|
return title
|
||||||
|
|
||||||
def curate_from_rss():
|
def curate_from_rss():
|
||||||
articles = fetch_rss_feeds() # Corrected from fetch_rss_articles to fetch_rss_feeds
|
try:
|
||||||
if not articles:
|
articles = fetch_rss_feeds()
|
||||||
print("No RSS articles available")
|
if not articles:
|
||||||
logging.info("No RSS articles available")
|
logging.info("No RSS articles available")
|
||||||
return None, None, random.randint(600, 1800)
|
return None, None, False # Continue running
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
max_attempts = 10
|
max_attempts = 10
|
||||||
while attempts < max_attempts and articles:
|
while attempts < max_attempts and articles:
|
||||||
article = articles.pop(0)
|
article = articles.pop(0)
|
||||||
title = article["title"]
|
title = article["title"]
|
||||||
link = article["link"]
|
link = article["link"]
|
||||||
summary = article.get("summary", "")
|
summary = article.get("summary", "")
|
||||||
source_name = article.get("feed_title", "Unknown Source") # Adjusted to match fetch_rss_feeds output
|
source_name = article.get("feed_title", "Unknown Source")
|
||||||
original_source = f'<a href="{link}">{source_name}</a>'
|
original_source = f'<a href="{link}">{source_name}</a>'
|
||||||
|
|
||||||
if title in posted_titles:
|
if title in posted_titles:
|
||||||
print(f"Skipping already posted article: {title}")
|
logging.info(f"Skipping already posted article: {title}")
|
||||||
logging.info(f"Skipping already posted article: {title}")
|
attempts += 1
|
||||||
attempts += 1
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Trying RSS Article: {title} from {source_name}")
|
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
|
||||||
|
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if skip:
|
if skip:
|
||||||
print(f"Skipping filtered RSS article: {title}")
|
logging.info(f"Skipping filtered RSS article: {title}")
|
||||||
logging.info(f"Skipping filtered RSS article: {title}")
|
attempts += 1
|
||||||
attempts += 1
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
ddg_context = fetch_duckduckgo_news_context(title)
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
|
||||||
interest_score = is_interesting(scoring_content)
|
interest_score = is_interesting(scoring_content)
|
||||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||||
if interest_score < 6:
|
if interest_score < 6:
|
||||||
print(f"RSS Interest Too Low: {interest_score}")
|
logging.info(f"RSS Interest Too Low: {interest_score}")
|
||||||
logging.info(f"RSS Interest Too Low: {interest_score}")
|
attempts += 1
|
||||||
attempts += 1
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
num_paragraphs = determine_paragraph_count(interest_score)
|
num_paragraphs = determine_paragraph_count(interest_score)
|
||||||
extra_prompt = (
|
extra_prompt = (
|
||||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||||
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
f"Do NOT introduce unrelated concepts unless in the content or additional context.\n"
|
||||||
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
f"Expand on the core idea with relevant context about its appeal or significance in food trends.\n"
|
||||||
f"Do not include emojis in the summary."
|
f"Do not include emojis in the summary."
|
||||||
)
|
|
||||||
content_to_summarize = scoring_content
|
|
||||||
final_summary = summarize_with_gpt4o(
|
|
||||||
content_to_summarize,
|
|
||||||
source_name,
|
|
||||||
link,
|
|
||||||
interest_score=interest_score,
|
|
||||||
extra_prompt=extra_prompt
|
|
||||||
)
|
|
||||||
if not final_summary:
|
|
||||||
logging.info(f"Summary failed for '{title}'")
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
|
||||||
|
|
||||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
|
||||||
if not post_data:
|
|
||||||
attempts += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
|
||||||
if not image_url:
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
|
||||||
|
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
|
||||||
share_links_template = (
|
|
||||||
f'<p>{share_prompt} '
|
|
||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
|
||||||
)
|
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
|
||||||
|
|
||||||
global is_posting
|
|
||||||
is_posting = True
|
|
||||||
try:
|
|
||||||
post_id, post_url = post_to_wp(
|
|
||||||
post_data=post_data,
|
|
||||||
category=category,
|
|
||||||
link=link,
|
|
||||||
author=author,
|
|
||||||
image_url=image_url,
|
|
||||||
original_source=original_source,
|
|
||||||
image_source=image_source,
|
|
||||||
uploader=uploader,
|
|
||||||
page_url=page_url,
|
|
||||||
interest_score=interest_score,
|
|
||||||
should_post_tweet=True
|
|
||||||
)
|
)
|
||||||
finally:
|
content_to_summarize = scoring_content
|
||||||
is_posting = False
|
final_summary = summarize_with_gpt4o(
|
||||||
|
content_to_summarize,
|
||||||
|
source_name,
|
||||||
|
link,
|
||||||
|
interest_score=interest_score,
|
||||||
|
extra_prompt=extra_prompt
|
||||||
|
)
|
||||||
|
if not final_summary:
|
||||||
|
logging.info(f"Summary failed for '{title}'")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
if post_id:
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
|
||||||
share_text_encoded = quote(share_text)
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||||
post_url_encoded = quote(post_url)
|
if not post_data:
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
attempts += 1
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
continue
|
||||||
|
|
||||||
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
|
if not image_url:
|
||||||
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
|
share_links_template = (
|
||||||
|
f'<p>{share_prompt} '
|
||||||
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
|
)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_id, post_url = post_to_wp(
|
||||||
post_data=post_data,
|
post_data=post_data,
|
||||||
category=category,
|
category=category,
|
||||||
link=link,
|
link=link,
|
||||||
@@ -333,41 +322,85 @@ def curate_from_rss():
|
|||||||
uploader=uploader,
|
uploader=uploader,
|
||||||
page_url=page_url,
|
page_url=page_url,
|
||||||
interest_score=interest_score,
|
interest_score=interest_score,
|
||||||
post_id=post_id,
|
should_post_tweet=True
|
||||||
should_post_tweet=False
|
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
finally:
|
finally:
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
if post_id:
|
||||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||||
posted_titles.add(title)
|
share_text_encoded = quote(share_text)
|
||||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
post_url_encoded = quote(post_url)
|
||||||
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
|
is_posting = True
|
||||||
|
try:
|
||||||
|
post_to_wp(
|
||||||
|
post_data=post_data,
|
||||||
|
category=category,
|
||||||
|
link=link,
|
||||||
|
author=author,
|
||||||
|
image_url=image_url,
|
||||||
|
original_source=original_source,
|
||||||
|
image_source=image_source,
|
||||||
|
uploader=uploader,
|
||||||
|
page_url=page_url,
|
||||||
|
interest_score=interest_score,
|
||||||
|
post_id=post_id,
|
||||||
|
should_post_tweet=False
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
is_posting = False
|
||||||
|
|
||||||
if image_url:
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||||
used_images.add(image_url)
|
posted_titles.add(title)
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||||
|
|
||||||
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
if image_url:
|
||||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
return post_data, category, random.randint(0, 1800)
|
used_images.add(image_url)
|
||||||
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
|
|
||||||
attempts += 1
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
||||||
logging.info(f"WP posting failed for '{post_data['title']}'")
|
return post_data, category, True # Run again immediately
|
||||||
|
attempts += 1
|
||||||
|
logging.info(f"WP posting failed for '{post_data['title']}'")
|
||||||
|
|
||||||
print("No interesting RSS article found after attempts")
|
logging.info("No interesting RSS article found after attempts")
|
||||||
logging.info("No interesting RSS article found after attempts")
|
return None, None, False # Wait before running again
|
||||||
return None, None, random.randint(600, 1800)
|
except Exception as e:
|
||||||
|
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
|
||||||
|
return None, None, False
|
||||||
|
|
||||||
def run_rss_automator():
|
def run_rss_automator():
|
||||||
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
|
lock_fd = None
|
||||||
logging.info("***** RSS Automator Launched *****")
|
try:
|
||||||
post_data, category, sleep_time = curate_from_rss()
|
lock_fd = acquire_lock()
|
||||||
print(f"Sleeping for {sleep_time}s")
|
logging.info("***** RSS Automator Launched *****")
|
||||||
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
|
post_data, category, should_continue = curate_from_rss()
|
||||||
time.sleep(sleep_time)
|
if not post_data:
|
||||||
return post_data, category, sleep_time
|
logging.info("No postable RSS article found")
|
||||||
|
else:
|
||||||
|
logging.info("Completed RSS run")
|
||||||
|
return post_data, category, should_continue
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
|
||||||
|
return None, None, False
|
||||||
|
finally:
|
||||||
|
if lock_fd:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||||
|
lock_fd.close()
|
||||||
|
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run_rss_automator()
|
setup_logging()
|
||||||
|
post_data, category, should_continue = run_rss_automator()
|
||||||
|
# Remove sleep timer, let manage_scripts.sh control execution
|
||||||
|
logging.info(f"Run completed, should_continue: {should_continue}")
|
||||||
+236
-56
@@ -1,83 +1,263 @@
|
|||||||
import random
|
# foodie_engagement_tweet.py
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import fcntl
|
||||||
|
import os
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from openai import OpenAI # Add this import
|
from openai import OpenAI
|
||||||
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
|
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL, load_post_counts, save_post_counts
|
||||||
from foodie_config import X_API_CREDENTIALS
|
from foodie_config import X_API_CREDENTIALS, AUTHOR_BACKGROUNDS_FILE
|
||||||
from dotenv import load_dotenv # Add this import
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
||||||
|
|
||||||
# Load environment variables
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Initialize OpenAI client
|
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_engagement_tweet.lock"
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_engagement_tweet.log"
|
||||||
|
REFERENCE_DATE_FILE = "/home/shane/foodie_automator/engagement_reference_date.json"
|
||||||
|
LOG_PRUNE_DAYS = 30
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
RETRY_BACKOFF = 2
|
||||||
|
|
||||||
|
def setup_logging():
|
||||||
|
"""Initialize logging with pruning of old logs."""
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
||||||
|
if os.path.exists(LOG_FILE):
|
||||||
|
with open(LOG_FILE, 'r') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||||
|
pruned_lines = []
|
||||||
|
malformed_count = 0
|
||||||
|
for line in lines:
|
||||||
|
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
|
||||||
|
malformed_count += 1
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||||
|
if timestamp > cutoff:
|
||||||
|
pruned_lines.append(line)
|
||||||
|
except ValueError:
|
||||||
|
malformed_count += 1
|
||||||
|
continue
|
||||||
|
if malformed_count > 0:
|
||||||
|
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
|
||||||
|
with open(LOG_FILE, 'w') as f:
|
||||||
|
f.writelines(pruned_lines)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=LOG_FILE,
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
|
)
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
logging.getLogger().addHandler(console_handler)
|
||||||
|
logging.getLogger("openai").setLevel(logging.WARNING)
|
||||||
|
logging.info("Logging initialized for foodie_engagement_tweet.py")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to setup logging: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def acquire_lock():
|
||||||
|
"""Acquire a lock to prevent concurrent runs."""
|
||||||
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
|
try:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
lock_fd.write(str(os.getpid()))
|
||||||
|
lock_fd.flush()
|
||||||
|
return lock_fd
|
||||||
|
except IOError:
|
||||||
|
logging.info("Another instance of foodie_engagement_tweet.py is running")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
"""Handle termination signals gracefully."""
|
||||||
|
logging.info("Received termination signal, exiting...")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
|
# Initialize OpenAI client
|
||||||
|
try:
|
||||||
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
if not os.getenv("OPENAI_API_KEY"):
|
||||||
|
logging.error("OPENAI_API_KEY is not set in environment variables")
|
||||||
|
raise ValueError("OPENAI_API_KEY is required")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Load author backgrounds
|
||||||
|
try:
|
||||||
|
with open(AUTHOR_BACKGROUNDS_FILE, 'r') as f:
|
||||||
|
AUTHOR_BACKGROUNDS = json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def get_reference_date():
|
||||||
|
"""Load or initialize the reference date for the 2-day interval."""
|
||||||
|
os.makedirs(os.path.dirname(REFERENCE_DATE_FILE), exist_ok=True)
|
||||||
|
if os.path.exists(REFERENCE_DATE_FILE):
|
||||||
|
try:
|
||||||
|
with open(REFERENCE_DATE_FILE, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
reference_date = datetime.fromisoformat(data["reference_date"]).replace(tzinfo=timezone.utc)
|
||||||
|
logging.info(f"Loaded reference date: {reference_date.date()}")
|
||||||
|
return reference_date
|
||||||
|
except (json.JSONDecodeError, KeyError, ValueError) as e:
|
||||||
|
logging.error(f"Failed to load reference date from {REFERENCE_DATE_FILE}: {e}. Initializing new date.")
|
||||||
|
|
||||||
|
# Initialize with current date (start of day)
|
||||||
|
reference_date = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
try:
|
||||||
|
with open(REFERENCE_DATE_FILE, 'w') as f:
|
||||||
|
json.dump({"reference_date": reference_date.isoformat()}, f)
|
||||||
|
logging.info(f"Initialized reference date: {reference_date.date()}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to save reference date to {REFERENCE_DATE_FILE}: {e}. Using current date.")
|
||||||
|
return reference_date
|
||||||
|
|
||||||
def generate_engagement_tweet(author):
|
def generate_engagement_tweet(author):
|
||||||
# Fetch x_username from X_API_CREDENTIALS
|
"""Generate an engagement tweet using author background themes."""
|
||||||
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
||||||
if not credentials:
|
if not credentials:
|
||||||
logging.error(f"No X credentials found for {author['username']}")
|
logging.error(f"No X credentials found for {author['username']}")
|
||||||
return None
|
return None
|
||||||
author_handle = credentials["x_username"]
|
author_handle = credentials["x_username"]
|
||||||
|
|
||||||
|
background = next((bg for bg in AUTHOR_BACKGROUNDS if bg["username"] == author["username"]), {})
|
||||||
|
if not background or "engagement_themes" not in background:
|
||||||
|
logging.warning(f"No background or engagement themes found for {author['username']}")
|
||||||
|
theme = "food trends"
|
||||||
|
else:
|
||||||
|
theme = random.choice(background["engagement_themes"])
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
|
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
|
||||||
f"Create an engaging food-related question or statement to spark interaction. "
|
f"Create an engaging question or statement about {theme} to spark interaction. "
|
||||||
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
|
f"Include a call to action to follow {author_handle} or like the tweet, and mention InsiderFoodie.com with a link to https://insiderfoodie.com. "
|
||||||
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
|
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'. "
|
||||||
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
|
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
response = client.chat.completions.create(
|
try:
|
||||||
model=SUMMARY_MODEL,
|
response = client.chat.completions.create(
|
||||||
messages=[
|
model=SUMMARY_MODEL,
|
||||||
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
|
messages=[
|
||||||
{"role": "user", "content": prompt}
|
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
|
||||||
],
|
{"role": "user", "content": prompt}
|
||||||
max_tokens=100,
|
],
|
||||||
temperature=0.7
|
max_tokens=100,
|
||||||
)
|
temperature=0.7
|
||||||
tweet = response.choices[0].message.content.strip()
|
)
|
||||||
if len(tweet) > 280:
|
tweet = response.choices[0].message.content.strip()
|
||||||
tweet = tweet[:277] + "..."
|
if len(tweet) > 280:
|
||||||
return tweet
|
tweet = tweet[:277] + "..."
|
||||||
except Exception as e:
|
logging.debug(f"Generated engagement tweet: {tweet}")
|
||||||
logging.warning(f"Failed to generate engagement tweet for {author['username']}: {e}")
|
return tweet
|
||||||
# Fallback templates
|
except Exception as e:
|
||||||
engagement_templates = [
|
logging.warning(f"Failed to generate engagement tweet for {author['username']} (attempt {attempt + 1}): {e}")
|
||||||
f"Whats the most mouthwatering dish youve seen this week Share below and follow {author_handle} for more foodie ideas on InsiderFoodie.com Link: https://insiderfoodie.com",
|
if attempt < MAX_RETRIES - 1:
|
||||||
f"Food lovers unite Whats your go to comfort food Tell us and like this tweet for more tasty ideas from {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com",
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
f"Ever tried a dish that looked too good to eat Share your favorites and follow {author_handle} for more culinary trends on InsiderFoodie.com Link: https://insiderfoodie.com",
|
else:
|
||||||
f"What food trend are you loving right now Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com Link: https://insiderfoodie.com"
|
logging.error(f"Failed to generate engagement tweet after {MAX_RETRIES} attempts")
|
||||||
]
|
engagement_templates = [
|
||||||
template = random.choice(engagement_templates)
|
f"What's the most mouthwatering {theme} you've seen this week? Share below and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
|
||||||
return template
|
f"{theme.capitalize()} lovers unite! What's your go-to pick? Tell us and like this tweet for more from {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com",
|
||||||
|
f"Ever tried a {theme} that blew your mind? Share your favorites and follow {author_handle} for more on InsiderFoodie.com! Link: https://insiderfoodie.com",
|
||||||
|
f"What {theme} trend are you loving right now? Let us know and like this tweet to keep up with {author_handle} on InsiderFoodie.com! Link: https://insiderfoodie.com"
|
||||||
|
]
|
||||||
|
template = random.choice(engagement_templates)
|
||||||
|
logging.info(f"Using fallback engagement tweet: {template}")
|
||||||
|
return template
|
||||||
|
|
||||||
def post_engagement_tweet():
|
def post_engagement_tweet():
|
||||||
# Reference date for calculating the 2-day interval
|
"""Post engagement tweets for authors every 2 days."""
|
||||||
reference_date = datetime(2025, 4, 29, tzinfo=timezone.utc) # Starting from April 29, 2025
|
try:
|
||||||
current_date = datetime.now(timezone.utc)
|
logging.info("Starting foodie_engagement_tweet.py")
|
||||||
|
print("Starting foodie_engagement_tweet.py")
|
||||||
|
|
||||||
# Calculate the number of days since the reference date
|
# Get reference date
|
||||||
days_since_reference = (current_date - reference_date).days
|
reference_date = get_reference_date()
|
||||||
|
current_date = datetime.now(timezone.utc)
|
||||||
|
days_since_reference = (current_date - reference_date).days
|
||||||
|
logging.info(f"Days since reference date ({reference_date.date()}): {days_since_reference}")
|
||||||
|
print(f"Days since reference date ({reference_date.date()}): {days_since_reference}")
|
||||||
|
|
||||||
# Post only if the number of days since the reference date is divisible by 2
|
# Post only if the number of days since the reference date is divisible by 2
|
||||||
if days_since_reference % 2 == 0:
|
if days_since_reference % 2 == 0:
|
||||||
logging.info("Today is an engagement tweet day (every 2 days). Posting...")
|
logging.info("Today is an engagement tweet day (every 2 days). Posting...")
|
||||||
for author in AUTHORS:
|
print("Today is an engagement tweet day (every 2 days). Posting...")
|
||||||
tweet = generate_engagement_tweet(author)
|
|
||||||
|
|
||||||
logging.info(f"Posting engagement tweet for {author['username']}: {tweet}")
|
# Load post counts to check limits
|
||||||
if post_tweet(author, tweet):
|
post_counts = load_post_counts()
|
||||||
logging.info(f"Successfully posted engagement tweet for {author['username']}")
|
|
||||||
else:
|
for author in AUTHORS:
|
||||||
logging.warning(f"Failed to post engagement tweet for {author['username']}")
|
try:
|
||||||
else:
|
# Check post limits
|
||||||
logging.info("Today is not an engagement tweet day (every 2 days). Skipping...")
|
author_count = next((entry for entry in post_counts if entry["username"] == author["username"]), None)
|
||||||
|
if not author_count:
|
||||||
|
logging.error(f"No post count entry for {author['username']}, skipping")
|
||||||
|
continue
|
||||||
|
if author_count["monthly_count"] >= 500:
|
||||||
|
logging.warning(f"Monthly post limit (500) reached for {author['username']}, skipping")
|
||||||
|
continue
|
||||||
|
if author_count["daily_count"] >= 20:
|
||||||
|
logging.warning(f"Daily post limit (20) reached for {author['username']}, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
tweet = generate_engagement_tweet(author)
|
||||||
|
if not tweet:
|
||||||
|
logging.error(f"Failed to generate engagement tweet for {author['username']}, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info(f"Posting engagement tweet for {author['username']}: {tweet}")
|
||||||
|
print(f"Posting engagement tweet for {author['username']}: {tweet}")
|
||||||
|
if post_tweet(author, tweet):
|
||||||
|
logging.info(f"Successfully posted engagement tweet for {author['username']}")
|
||||||
|
# Update post counts
|
||||||
|
author_count["monthly_count"] += 1
|
||||||
|
author_count["daily_count"] += 1
|
||||||
|
save_post_counts(post_counts)
|
||||||
|
else:
|
||||||
|
logging.warning(f"Failed to post engagement tweet for {author['username']}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error posting engagement tweet for {author['username']}: {e}", exc_info=True)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
logging.info(f"Today is not an engagement tweet day (every 2 days). Days since reference: {days_since_reference}. Skipping...")
|
||||||
|
print(f"Today is not an engagement tweet day (every 2 days). Days since reference: {days_since_reference}. Skipping...")
|
||||||
|
|
||||||
|
logging.info("Completed foodie_engagement_tweet.py")
|
||||||
|
print("Completed foodie_engagement_tweet.py")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Unexpected error in post_engagement_tweet: {e}", exc_info=True)
|
||||||
|
print(f"Error in post_engagement_tweet: {e}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function to run the script."""
|
||||||
|
lock_fd = None
|
||||||
|
try:
|
||||||
|
lock_fd = acquire_lock()
|
||||||
|
setup_logging()
|
||||||
|
post_engagement_tweet()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Fatal error in main: {e}", exc_info=True)
|
||||||
|
print(f"Fatal error: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
finally:
|
||||||
|
if lock_fd:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||||
|
lock_fd.close()
|
||||||
|
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
post_engagement_tweet()
|
main()
|
||||||
+315
-169
@@ -1,94 +1,134 @@
|
|||||||
|
# foodie_weekly_thread.py
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, timedelta, timezone
|
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import fcntl
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
import tweepy
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
|
from foodie_utils import post_tweet, AUTHORS, SUMMARY_MODEL
|
||||||
from foodie_config import X_API_CREDENTIALS
|
from foodie_config import X_API_CREDENTIALS
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import tweepy
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Logging configuration
|
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_weekly_thread.lock"
|
||||||
LOG_FILE = "/home/shane/foodie_automator/foodie_weekly_thread.log"
|
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_weekly_thread.log"
|
||||||
LOG_PRUNE_DAYS = 30
|
LOG_PRUNE_DAYS = 30
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
RETRY_BACKOFF = 2
|
||||||
|
RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json"
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
if os.path.exists(LOG_FILE):
|
"""Initialize logging with pruning of old logs."""
|
||||||
with open(LOG_FILE, 'r') as f:
|
try:
|
||||||
lines = f.readlines()
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
||||||
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
if os.path.exists(LOG_FILE):
|
||||||
pruned_lines = []
|
with open(LOG_FILE, 'r') as f:
|
||||||
for line in lines:
|
lines = f.readlines()
|
||||||
try:
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||||
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
pruned_lines = []
|
||||||
if timestamp > cutoff:
|
malformed_count = 0
|
||||||
pruned_lines.append(line)
|
for line in lines:
|
||||||
except ValueError:
|
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
|
||||||
continue
|
malformed_count += 1
|
||||||
with open(LOG_FILE, 'w') as f:
|
continue
|
||||||
f.writelines(pruned_lines)
|
try:
|
||||||
|
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||||
|
if timestamp > cutoff:
|
||||||
|
pruned_lines.append(line)
|
||||||
|
except ValueError:
|
||||||
|
malformed_count += 1
|
||||||
|
continue
|
||||||
|
if malformed_count > 0:
|
||||||
|
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
|
||||||
|
with open(LOG_FILE, 'w') as f:
|
||||||
|
f.writelines(pruned_lines)
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename=LOG_FILE,
|
filename=LOG_FILE,
|
||||||
level=logging.DEBUG,
|
level=logging.INFO,
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||||
datefmt='%Y-%m-%d %H:%M:%S'
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
)
|
)
|
||||||
console_handler = logging.StreamHandler()
|
console_handler = logging.StreamHandler()
|
||||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
logging.getLogger().addHandler(console_handler)
|
logging.getLogger().addHandler(console_handler)
|
||||||
logging.info("Logging initialized for foodie_weekly_thread.py")
|
logging.getLogger("tweepy").setLevel(logging.WARNING)
|
||||||
|
logging.info("Logging initialized for foodie_weekly_thread.py")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to setup logging: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
setup_logging()
|
def acquire_lock():
|
||||||
|
"""Acquire a lock to prevent concurrent runs."""
|
||||||
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
|
try:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
lock_fd.write(str(os.getpid()))
|
||||||
|
lock_fd.flush()
|
||||||
|
return lock_fd
|
||||||
|
except IOError:
|
||||||
|
logging.info("Another instance of foodie_weekly_thread.py is running")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
"""Handle termination signals gracefully."""
|
||||||
|
logging.info("Received termination signal, exiting...")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
# Initialize OpenAI client
|
# Initialize OpenAI client
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
try:
|
||||||
if not os.getenv("OPENAI_API_KEY"):
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
logging.error("OPENAI_API_KEY is not set in environment variables")
|
if not os.getenv("OPENAI_API_KEY"):
|
||||||
raise ValueError("OPENAI_API_KEY is required")
|
logging.error("OPENAI_API_KEY is not set in environment variables")
|
||||||
|
raise ValueError("OPENAI_API_KEY is required")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
# Validate X_API_CREDENTIALS and test API access
|
|
||||||
def validate_twitter_credentials():
|
def validate_twitter_credentials():
|
||||||
|
"""Validate Twitter API credentials for all authors."""
|
||||||
logging.info("Validating Twitter API credentials for all authors")
|
logging.info("Validating Twitter API credentials for all authors")
|
||||||
valid_credentials = []
|
valid_credentials = []
|
||||||
for author in AUTHORS:
|
for author in AUTHORS:
|
||||||
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
||||||
if not credentials:
|
if not credentials:
|
||||||
logging.error(f"No X credentials found for {author['username']} in X_API_CREDENTIALS")
|
logging.error(f"No X credentials found for {author['username']} in X_API_CREDENTIALS")
|
||||||
print(f"No X credentials found for {author['username']}")
|
|
||||||
continue
|
continue
|
||||||
logging.debug(f"Testing credentials for {author['username']} (handle: {credentials['x_username']})")
|
for attempt in range(MAX_RETRIES):
|
||||||
try:
|
try:
|
||||||
client = tweepy.Client(
|
twitter_client = tweepy.Client(
|
||||||
consumer_key=credentials["api_key"],
|
consumer_key=credentials["api_key"],
|
||||||
consumer_secret=credentials["api_secret"],
|
consumer_secret=credentials["api_secret"],
|
||||||
access_token=credentials["access_token"],
|
access_token=credentials["access_token"],
|
||||||
access_token_secret=credentials["access_token_secret"]
|
access_token_secret=credentials["access_token_secret"]
|
||||||
)
|
)
|
||||||
# Test API access by fetching the user's profile
|
user = twitter_client.get_me()
|
||||||
user = client.get_me()
|
logging.info(f"Credentials valid for {author['username']} (handle: {credentials['x_username']})")
|
||||||
logging.info(f"Credentials valid for {author['username']} (handle: {credentials['x_username']}, user_id: {user.data.id})")
|
valid_credentials.append(credentials)
|
||||||
print(f"Credentials valid for {author['username']} (handle: {credentials['x_username']})")
|
break
|
||||||
valid_credentials.append(credentials)
|
except tweepy.TweepyException as e:
|
||||||
except tweepy.TweepyException as e:
|
logging.error(f"Failed to validate credentials for {author['username']} (attempt {attempt + 1}): {e}")
|
||||||
logging.error(f"Failed to validate credentials for {author['username']} (handle: {credentials['x_username']}): {e}")
|
if attempt < MAX_RETRIES - 1:
|
||||||
if hasattr(e, 'response') and e.response:
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
logging.error(f"Twitter API response: {e.response.text}")
|
else:
|
||||||
print(f"Failed to validate credentials for {author['username']}: {e}")
|
logging.error(f"Credentials invalid for {author['username']} after {MAX_RETRIES} attempts")
|
||||||
if not valid_credentials:
|
if not valid_credentials:
|
||||||
logging.error("No valid Twitter credentials found for any author")
|
logging.error("No valid Twitter credentials found for any author")
|
||||||
raise ValueError("No valid Twitter credentials found")
|
raise ValueError("No valid Twitter credentials found")
|
||||||
return valid_credentials
|
return valid_credentials
|
||||||
|
|
||||||
# Run credential validation
|
|
||||||
validate_twitter_credentials()
|
|
||||||
|
|
||||||
RECENT_POSTS_FILE = "/home/shane/foodie_automator/recent_posts.json"
|
|
||||||
|
|
||||||
def load_recent_posts():
|
def load_recent_posts():
|
||||||
|
"""Load and deduplicate posts from recent_posts.json."""
|
||||||
posts = []
|
posts = []
|
||||||
unique_posts = {}
|
unique_posts = {}
|
||||||
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
|
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
|
||||||
@@ -131,13 +171,15 @@ def load_recent_posts():
|
|||||||
continue
|
continue
|
||||||
logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)")
|
logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}")
|
logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True)
|
||||||
|
return posts
|
||||||
|
|
||||||
if not posts:
|
if not posts:
|
||||||
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
|
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
|
||||||
return posts
|
return posts
|
||||||
|
|
||||||
def filter_posts_for_week(posts, start_date, end_date):
|
def filter_posts_for_week(posts, start_date, end_date):
|
||||||
|
"""Filter posts within the specified week."""
|
||||||
filtered_posts = []
|
filtered_posts = []
|
||||||
logging.debug(f"Filtering {len(posts)} posts for range {start_date} to {end_date}")
|
logging.debug(f"Filtering {len(posts)} posts for range {start_date} to {end_date}")
|
||||||
|
|
||||||
@@ -155,6 +197,7 @@ def filter_posts_for_week(posts, start_date, end_date):
|
|||||||
return filtered_posts
|
return filtered_posts
|
||||||
|
|
||||||
def generate_intro_tweet(author):
|
def generate_intro_tweet(author):
|
||||||
|
"""Generate an intro tweet for the weekly thread."""
|
||||||
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
||||||
if not credentials:
|
if not credentials:
|
||||||
logging.error(f"No X credentials found for {author['username']}")
|
logging.error(f"No X credentials found for {author['username']}")
|
||||||
@@ -170,118 +213,221 @@ def generate_intro_tweet(author):
|
|||||||
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
|
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
for attempt in range(MAX_RETRIES):
|
||||||
response = client.chat.completions.create(
|
try:
|
||||||
model=SUMMARY_MODEL,
|
response = client.chat.completions.create(
|
||||||
messages=[
|
model=SUMMARY_MODEL,
|
||||||
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
|
messages=[
|
||||||
{"role": "user", "content": prompt}
|
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
|
||||||
],
|
{"role": "user", "content": prompt}
|
||||||
max_tokens=100,
|
],
|
||||||
temperature=0.7
|
max_tokens=100,
|
||||||
)
|
temperature=0.7
|
||||||
tweet = response.choices[0].message.content.strip()
|
)
|
||||||
if len(tweet) > 280:
|
tweet = response.choices[0].message.content.strip()
|
||||||
tweet = tweet[:277] + "..."
|
if len(tweet) > 280:
|
||||||
logging.debug(f"Generated intro tweet: {tweet}")
|
tweet = tweet[:277] + "..."
|
||||||
return tweet
|
logging.debug(f"Generated intro tweet: {tweet}")
|
||||||
except Exception as e:
|
return tweet
|
||||||
logging.error(f"Failed to generate intro tweet for {author['username']}: {e}")
|
except Exception as e:
|
||||||
fallback = (
|
logging.warning(f"Failed to generate intro tweet for {author['username']} (attempt {attempt + 1}): {e}")
|
||||||
f"This weeks top 10 foodie finds by {author_handle} Check out the best on InsiderFoodie.com "
|
if attempt < MAX_RETRIES - 1:
|
||||||
f"Follow {author_handle} for more and like this thread to stay in the loop Visit us at https://insiderfoodie.com"
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
)
|
else:
|
||||||
logging.info(f"Using fallback intro tweet: {fallback}")
|
logging.error(f"Failed to generate intro tweet after {MAX_RETRIES} attempts")
|
||||||
return fallback
|
fallback = (
|
||||||
|
f"This week's top 10 foodie finds by {author_handle}! Check out the best on InsiderFoodie.com. "
|
||||||
|
f"Follow {author_handle} for more and like this thread to stay in the loop! Visit us at https://insiderfoodie.com"
|
||||||
|
)
|
||||||
|
logging.info(f"Using fallback intro tweet: {fallback}")
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
def generate_final_cta(author):
|
||||||
|
"""Generate a final CTA tweet for the weekly thread using GPT."""
|
||||||
|
credentials = next((cred for cred in X_API_CREDENTIALS if cred["username"] == author["username"]), None)
|
||||||
|
if not credentials:
|
||||||
|
logging.error(f"No X credentials found for {author['username']}")
|
||||||
|
return None
|
||||||
|
author_handle = credentials["x_username"]
|
||||||
|
logging.debug(f"Generating final CTA tweet for {author_handle}")
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"Generate a concise tweet (under 280 characters) for {author_handle}. "
|
||||||
|
f"Conclude a thread of their top 10 foodie posts of the week on InsiderFoodie.com. "
|
||||||
|
f"Make it engaging, value-driven, and urgent, in the style of Neil Patel. "
|
||||||
|
f"Include a call to action to visit InsiderFoodie.com and follow {author_handle}. "
|
||||||
|
f"Mention that the top 10 foodie trends are shared every Monday. "
|
||||||
|
f"Avoid using the word 'elevate'—use humanized language like 'level up' or 'bring to life'. "
|
||||||
|
f"Do not include emojis, hashtags, or reward-driven incentives (e.g., giveaways)."
|
||||||
|
)
|
||||||
|
|
||||||
|
for attempt in range(MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=SUMMARY_MODEL,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a social media expert crafting engaging tweets."},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
max_tokens=100,
|
||||||
|
temperature=0.7
|
||||||
|
)
|
||||||
|
tweet = response.choices[0].message.content.strip()
|
||||||
|
if len(tweet) > 280:
|
||||||
|
tweet = tweet[:277] + "..."
|
||||||
|
logging.debug(f"Generated final CTA tweet: {tweet}")
|
||||||
|
return tweet
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to generate final CTA tweet for {author['username']} (attempt {attempt + 1}): {e}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||||
|
else:
|
||||||
|
logging.error(f"Failed to generate final CTA tweet after {MAX_RETRIES} attempts")
|
||||||
|
fallback = (
|
||||||
|
f"Want more foodie insights like these? Check out insiderfoodie.com and follow {author_handle} "
|
||||||
|
f"for the world’s top 10 foodie trends every Monday. Don’t miss out!"
|
||||||
|
)
|
||||||
|
logging.info(f"Using fallback final CTA tweet: {fallback}")
|
||||||
|
return fallback
|
||||||
|
|
||||||
def post_weekly_thread():
|
def post_weekly_thread():
|
||||||
logging.info("Entering post_weekly_thread")
|
"""Post weekly threads for each author."""
|
||||||
print("Entering post_weekly_thread")
|
|
||||||
|
|
||||||
today = datetime.now(timezone.utc)
|
|
||||||
days_to_monday = today.weekday()
|
|
||||||
start_date = (today - timedelta(days=days_to_monday + 7)).replace(hour=0, minute=0, second=0, microsecond=0)
|
|
||||||
end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59)
|
|
||||||
|
|
||||||
logging.info(f"Fetching posts from {start_date} to {end_date}")
|
|
||||||
print(f"Fetching posts from {start_date} to {end_date}")
|
|
||||||
|
|
||||||
all_posts = load_recent_posts()
|
|
||||||
print(f"Loaded {len(all_posts)} posts from recent_posts.json")
|
|
||||||
logging.info(f"Loaded {len(all_posts)} posts from recent_posts.json")
|
|
||||||
|
|
||||||
if not all_posts:
|
|
||||||
logging.warning("No posts loaded, exiting post_weekly_thread")
|
|
||||||
print("No posts loaded, exiting post_weekly_thread")
|
|
||||||
return
|
|
||||||
|
|
||||||
weekly_posts = filter_posts_for_week(all_posts, start_date, end_date)
|
|
||||||
print(f"Filtered to {len(weekly_posts)} posts for the week")
|
|
||||||
logging.info(f"Filtered to {len(weekly_posts)} posts for the week")
|
|
||||||
|
|
||||||
if not weekly_posts:
|
|
||||||
logging.warning("No posts found within the week range, exiting post_weekly_thread")
|
|
||||||
print("No posts found within the week range, exiting post_weekly_thread")
|
|
||||||
return
|
|
||||||
|
|
||||||
posts_by_author = {}
|
|
||||||
for post in weekly_posts:
|
|
||||||
author = post["author_username"]
|
|
||||||
if author not in posts_by_author:
|
|
||||||
posts_by_author[author] = []
|
|
||||||
posts_by_author[author].append(post)
|
|
||||||
logging.debug(f"Grouped posts by author: {list(posts_by_author.keys())}")
|
|
||||||
|
|
||||||
for author in AUTHORS:
|
|
||||||
author_posts = posts_by_author.get(author["username"], [])
|
|
||||||
logging.info(f"Processing author {author['username']} with {len(author_posts)} posts")
|
|
||||||
print(f"Processing author {author['username']} with {len(author_posts)} posts")
|
|
||||||
|
|
||||||
if not author_posts:
|
|
||||||
logging.info(f"No posts found for {author['username']} this week")
|
|
||||||
print(f"No posts found for {author['username']} this week")
|
|
||||||
continue
|
|
||||||
|
|
||||||
author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
|
|
||||||
top_posts = author_posts[:10]
|
|
||||||
logging.info(f"Selected {len(top_posts)} top posts for {author['username']}")
|
|
||||||
print(f"Selected {len(top_posts)} top posts for {author['username']}")
|
|
||||||
|
|
||||||
intro_tweet = generate_intro_tweet(author)
|
|
||||||
if not intro_tweet:
|
|
||||||
logging.error(f"Failed to generate intro tweet for {author['username']}, skipping")
|
|
||||||
continue
|
|
||||||
logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}")
|
|
||||||
print(f"Posting intro tweet for {author['username']}: {intro_tweet}")
|
|
||||||
|
|
||||||
intro_response = post_tweet(author, intro_tweet)
|
|
||||||
if not intro_response:
|
|
||||||
logging.error(f"Failed to post intro tweet for {author['username']}, skipping thread")
|
|
||||||
print(f"Failed to post intro tweet for {author['username']}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
intro_tweet_id = intro_response.get("id")
|
|
||||||
logging.debug(f"Intro tweet posted with ID {intro_tweet_id}")
|
|
||||||
|
|
||||||
for i, post in enumerate(top_posts, 1):
|
|
||||||
post_tweet_content = f"{i}. {post['title']} Link: {post['url']}"
|
|
||||||
logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
|
|
||||||
print(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
|
|
||||||
reply_response = post_tweet(author, post_tweet_content, reply_to_id=intro_tweet_id)
|
|
||||||
if not reply_response:
|
|
||||||
logging.error(f"Failed to post thread reply {i} for {author['username']}")
|
|
||||||
else:
|
|
||||||
logging.debug(f"Thread reply {i} posted with ID {reply_response.get('id')}")
|
|
||||||
|
|
||||||
logging.info(f"Successfully posted weekly thread for {author['username']}")
|
|
||||||
print(f"Successfully posted weekly thread for {author['username']}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Starting foodie_weekly_thread.py")
|
|
||||||
logging.info("Starting foodie_weekly_thread.py")
|
|
||||||
try:
|
try:
|
||||||
post_weekly_thread()
|
logging.info("Starting foodie_weekly_thread.py")
|
||||||
|
print("Starting foodie_weekly_thread.py")
|
||||||
|
|
||||||
|
valid_credentials = validate_twitter_credentials()
|
||||||
|
if not valid_credentials:
|
||||||
|
logging.error("No valid Twitter credentials found, exiting")
|
||||||
|
return
|
||||||
|
|
||||||
|
today = datetime.now(timezone.utc)
|
||||||
|
days_to_monday = today.weekday()
|
||||||
|
start_date = (today - timedelta(days=days_to_monday + 7)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
end_date = start_date + timedelta(days=6, hours=23, minutes=59, seconds=59)
|
||||||
|
|
||||||
|
logging.info(f"Fetching posts from {start_date} to {end_date}")
|
||||||
|
print(f"Fetching posts from {start_date} to {end_date}")
|
||||||
|
|
||||||
|
all_posts = load_recent_posts()
|
||||||
|
logging.info(f"Loaded {len(all_posts)} posts from recent_posts.json")
|
||||||
|
print(f"Loaded {len(all_posts)} posts from recent_posts.json")
|
||||||
|
|
||||||
|
if not all_posts:
|
||||||
|
logging.warning("No posts loaded, exiting post_weekly_thread")
|
||||||
|
print("No posts loaded, exiting post_weekly_thread")
|
||||||
|
return
|
||||||
|
|
||||||
|
weekly_posts = filter_posts_for_week(all_posts, start_date, end_date)
|
||||||
|
logging.info(f"Filtered to {len(weekly_posts)} posts for the week")
|
||||||
|
print(f"Filtered to {len(weekly_posts)} posts for the week")
|
||||||
|
|
||||||
|
if not weekly_posts:
|
||||||
|
logging.warning("No posts found within the week range, exiting post_weekly_thread")
|
||||||
|
print("No posts found within the week range, exiting post_weekly_thread")
|
||||||
|
return
|
||||||
|
|
||||||
|
posts_by_author = {}
|
||||||
|
for post in weekly_posts:
|
||||||
|
author = post["author_username"]
|
||||||
|
if author not in posts_by_author:
|
||||||
|
posts_by_author[author] = []
|
||||||
|
posts_by_author[author].append(post)
|
||||||
|
logging.debug(f"Grouped posts by author: {list(posts_by_author.keys())}")
|
||||||
|
|
||||||
|
for author in AUTHORS:
|
||||||
|
try:
|
||||||
|
author_posts = posts_by_author.get(author["username"], [])
|
||||||
|
logging.info(f"Processing author {author['username']} with {len(author_posts)} posts")
|
||||||
|
print(f"Processing author {author['username']} with {len(author_posts)} posts")
|
||||||
|
|
||||||
|
if not author_posts:
|
||||||
|
logging.info(f"No posts found for {author['username']} this week")
|
||||||
|
print(f"No posts found for {author['username']} this week")
|
||||||
|
continue
|
||||||
|
|
||||||
|
author_posts.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
|
||||||
|
top_posts = author_posts[:10]
|
||||||
|
logging.info(f"Selected {len(top_posts)} top posts for {author['username']}")
|
||||||
|
print(f"Selected {len(top_posts)} top posts for {author['username']}")
|
||||||
|
|
||||||
|
intro_tweet = generate_intro_tweet(author)
|
||||||
|
if not intro_tweet:
|
||||||
|
logging.error(f"Failed to generate intro tweet for {author['username']}, skipping")
|
||||||
|
continue
|
||||||
|
logging.info(f"Posting intro tweet for {author['username']}: {intro_tweet}")
|
||||||
|
print(f"Posting intro tweet for {author['username']}: {intro_tweet}")
|
||||||
|
|
||||||
|
intro_response = post_tweet(author, intro_tweet)
|
||||||
|
if not intro_response:
|
||||||
|
logging.error(f"Failed to post intro tweet for {author['username']}, skipping thread")
|
||||||
|
print(f"Failed to post intro tweet for {author['username']}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
intro_tweet_id = intro_response.get("id")
|
||||||
|
last_tweet_id = intro_tweet_id
|
||||||
|
logging.debug(f"Intro tweet posted with ID {intro_tweet_id}")
|
||||||
|
|
||||||
|
for i, post in enumerate(top_posts, 1):
|
||||||
|
try:
|
||||||
|
post_tweet_content = f"{i}. {post['title']} Link: {post['url']}"
|
||||||
|
logging.info(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
|
||||||
|
print(f"Posting thread reply {i} for {author['username']}: {post_tweet_content}")
|
||||||
|
reply_response = post_tweet(author, post_tweet_content, reply_to_id=last_tweet_id)
|
||||||
|
if not reply_response:
|
||||||
|
logging.error(f"Failed to post thread reply {i} for {author['username']}")
|
||||||
|
else:
|
||||||
|
last_tweet_id = reply_response.get("id")
|
||||||
|
logging.debug(f"Thread reply {i} posted with ID {last_tweet_id}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error posting thread reply {i} for {author['username']}: {e}", exc_info=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Post final CTA tweet
|
||||||
|
if last_tweet_id and top_posts: # Ensure there's a valid thread to reply to
|
||||||
|
try:
|
||||||
|
final_cta = generate_final_cta(author)
|
||||||
|
if not final_cta:
|
||||||
|
logging.error(f"Failed to generate final CTA tweet for {author['username']}, skipping")
|
||||||
|
continue
|
||||||
|
logging.info(f"Posting final CTA tweet for {author['username']}: {final_cta}")
|
||||||
|
print(f"Posting final CTA tweet for {author['username']}: {final_cta}")
|
||||||
|
cta_response = post_tweet(author, final_cta, reply_to_id=last_tweet_id)
|
||||||
|
if not cta_response:
|
||||||
|
logging.error(f"Failed to post final CTA tweet for {author['username']}")
|
||||||
|
else:
|
||||||
|
logging.debug(f"Final CTA tweet posted with ID {cta_response.get('id')}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error posting final CTA tweet for {author['username']}: {e}", exc_info=True)
|
||||||
|
|
||||||
|
logging.info(f"Successfully posted weekly thread for {author['username']}")
|
||||||
|
print(f"Successfully posted weekly thread for {author['username']}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error processing author {author['username']}: {e}", exc_info=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info("Completed foodie_weekly_thread.py")
|
||||||
|
print("Completed foodie_weekly_thread.py")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Unexpected error in post_weekly_thread: {e}", exc_info=True)
|
logging.error(f"Unexpected error in post_weekly_thread: {e}", exc_info=True)
|
||||||
print("Completed foodie_weekly_thread.py")
|
print(f"Error in post_weekly_thread: {e}")
|
||||||
logging.info("Completed foodie_weekly_thread.py")
|
|
||||||
|
def main():
|
||||||
|
"""Main function to run the script."""
|
||||||
|
lock_fd = None
|
||||||
|
try:
|
||||||
|
lock_fd = acquire_lock()
|
||||||
|
setup_logging()
|
||||||
|
post_weekly_thread()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Fatal error in main: {e}", exc_info=True)
|
||||||
|
print(f"Fatal error: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
finally:
|
||||||
|
if lock_fd:
|
||||||
|
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||||
|
lock_fd.close()
|
||||||
|
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
+109
-17
@@ -3,7 +3,9 @@
|
|||||||
# Directory to monitor
|
# Directory to monitor
|
||||||
BASE_DIR="/home/shane/foodie_automator"
|
BASE_DIR="/home/shane/foodie_automator"
|
||||||
CHECKSUM_FILE="$BASE_DIR/.file_checksum"
|
CHECKSUM_FILE="$BASE_DIR/.file_checksum"
|
||||||
LOG_FILE="$BASE_DIR/manage_scripts.log"
|
LOG_FILE="$BASE_DIR/logs/manage_scripts.log"
|
||||||
|
VENV_PYTHON="$BASE_DIR/venv/bin/python"
|
||||||
|
LOCK_DIR="$BASE_DIR/locks"
|
||||||
|
|
||||||
# Log function
|
# Log function
|
||||||
log() {
|
log() {
|
||||||
@@ -13,37 +15,105 @@ log() {
|
|||||||
# Calculate checksum of files (excluding logs, JSON files, and venv)
|
# Calculate checksum of files (excluding logs, JSON files, and venv)
|
||||||
calculate_checksum() {
|
calculate_checksum() {
|
||||||
find "$BASE_DIR" -type f \
|
find "$BASE_DIR" -type f \
|
||||||
-not -path "$BASE_DIR/*.log" \
|
-not -path "$BASE_DIR/logs/*" \
|
||||||
-not -path "$BASE_DIR/*.json" \
|
-not -path "$BASE_DIR/*.json" \
|
||||||
-not -path "$BASE_DIR/.file_checksum" \
|
-not -path "$BASE_DIR/.file_checksum" \
|
||||||
-not -path "$BASE_DIR/venv/*" \
|
-not -path "$BASE_DIR/venv/*" \
|
||||||
|
-not -path "$BASE_DIR/locks/*" \
|
||||||
-exec sha256sum {} \; | sort | sha256sum | awk '{print $1}'
|
-exec sha256sum {} \; | sort | sha256sum | awk '{print $1}'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check if scripts are running
|
# Check if a script is running (using lock file)
|
||||||
check_running() {
|
check_running() {
|
||||||
pgrep -f "python3.*foodie_automator" > /dev/null
|
local script_name="$1"
|
||||||
|
local lock_file="$LOCK_DIR/${script_name}.lock"
|
||||||
|
if [ -f "$lock_file" ]; then
|
||||||
|
local pid=$(cat "$lock_file")
|
||||||
|
if ps -p "$pid" > /dev/null; then
|
||||||
|
log "$script_name is already running (PID: $pid)"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
log "Stale lock file found for $script_name, removing"
|
||||||
|
rm -f "$lock_file"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create lock file
|
||||||
|
create_lock() {
|
||||||
|
local script_name="$1"
|
||||||
|
local lock_file="$LOCK_DIR/${script_name}.lock"
|
||||||
|
mkdir -p "$LOCK_DIR"
|
||||||
|
echo $$ > "$lock_file"
|
||||||
|
log "Created lock file for $script_name (PID: $$)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove lock file
|
||||||
|
remove_lock() {
|
||||||
|
local script_name="$1"
|
||||||
|
local lock_file="$LOCK_DIR/${script_name}.lock"
|
||||||
|
rm -f "$lock_file"
|
||||||
|
log "Removed lock file for $script_name"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Stop scripts
|
# Stop scripts
|
||||||
stop_scripts() {
|
stop_scripts() {
|
||||||
log "Stopping scripts..."
|
log "Stopping scripts..."
|
||||||
pkill -TERM -f "python3.*foodie_automator" || true
|
for script in foodie_automator_*.py; do
|
||||||
|
if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then
|
||||||
|
local script_name="${script%.py}"
|
||||||
|
pkill -TERM -f "$VENV_PYTHON.*$script_name" || true
|
||||||
|
fi
|
||||||
|
done
|
||||||
sleep 10
|
sleep 10
|
||||||
pkill -9 -f "python3.*foodie_automator" || true
|
for script in foodie_automator_*.py; do
|
||||||
|
if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then
|
||||||
|
local script_name="${script%.py}"
|
||||||
|
pkill -9 -f "$VENV_PYTHON.*$script_name" || true
|
||||||
|
remove_lock "$script_name"
|
||||||
|
fi
|
||||||
|
done
|
||||||
log "Scripts stopped."
|
log "Scripts stopped."
|
||||||
}
|
}
|
||||||
|
|
||||||
# Start scripts
|
# Start scripts
|
||||||
start_scripts() {
|
start_scripts() {
|
||||||
log "Starting scripts..."
|
log "Starting scripts..."
|
||||||
cd "$BASE_DIR"
|
cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
|
||||||
source venv/bin/activate
|
|
||||||
# Find all foodie_automator_*.py scripts and start them
|
# Source virtual environment
|
||||||
|
if [ -f "$BASE_DIR/venv/bin/activate" ]; then
|
||||||
|
source "$BASE_DIR/venv/bin/activate"
|
||||||
|
else
|
||||||
|
log "Error: Virtual environment not found at $BASE_DIR/venv"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Load .env variables
|
||||||
|
if [ -f "$BASE_DIR/.env" ]; then
|
||||||
|
export $(grep -v '^#' "$BASE_DIR/.env" | xargs)
|
||||||
|
log ".env variables loaded"
|
||||||
|
else
|
||||||
|
log "Error: .env file not found at $BASE_DIR/.env"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Find and start all foodie_automator_*.py scripts (excluding weekly/engagement)
|
||||||
for script in foodie_automator_*.py; do
|
for script in foodie_automator_*.py; do
|
||||||
if [ -f "$script" ]; then
|
if [ -f "$script" ] && [ "$script" != "foodie_weekly_thread.py" ] && [ "$script" != "foodie_engagement_tweet.py" ]; then
|
||||||
log "Starting $script..."
|
local script_name="${script%.py}"
|
||||||
nohup python3 "$script" >> "${script%.py}.log" 2>&1 &
|
if ! check_running "$script_name"; then
|
||||||
|
log "Starting $script..."
|
||||||
|
create_lock "$script_name"
|
||||||
|
nohup "$VENV_PYTHON" "$script" >> "$BASE_DIR/logs/${script_name}.log" 2>&1 &
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
log "$script started successfully"
|
||||||
|
else
|
||||||
|
log "Failed to start $script"
|
||||||
|
remove_lock "$script_name"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
log "All scripts started."
|
log "All scripts started."
|
||||||
@@ -52,14 +122,34 @@ start_scripts() {
|
|||||||
# Update dependencies
|
# Update dependencies
|
||||||
update_dependencies() {
|
update_dependencies() {
|
||||||
log "Updating dependencies..."
|
log "Updating dependencies..."
|
||||||
cd "$BASE_DIR"
|
cd "$BASE_DIR" || { log "Failed to change to $BASE_DIR"; exit 1; }
|
||||||
|
|
||||||
# Create venv if it doesn't exist
|
# Create venv if it doesn't exist
|
||||||
if [ ! -d "venv" ]; then
|
if [ ! -d "venv" ]; then
|
||||||
python3 -m venv venv
|
python3 -m venv venv
|
||||||
|
log "Created new virtual environment"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Source virtual environment
|
||||||
|
if [ -f "$BASE_DIR/venv/bin/activate" ]; then
|
||||||
|
source "$BASE_DIR/venv/bin/activate"
|
||||||
|
else
|
||||||
|
log "Error: Virtual environment not found at $BASE_DIR/venv"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update pip and install requirements
|
||||||
|
"$VENV_PYTHON" -m pip install --upgrade pip
|
||||||
|
if [ -f "requirements.txt" ]; then
|
||||||
|
"$VENV_PYTHON" -m pip install -r requirements.txt || {
|
||||||
|
log "Failed to install requirements.txt, attempting fallback dependencies"
|
||||||
|
"$VENV_PYTHON" -m pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager
|
||||||
|
log "Fallback: Installed core dependencies"
|
||||||
|
}
|
||||||
|
else
|
||||||
|
log "Error: requirements.txt not found, installing core dependencies"
|
||||||
|
"$VENV_PYTHON" -m pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager
|
||||||
fi
|
fi
|
||||||
source venv/bin/activate
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install -r requirements.txt || (pip install requests openai beautifulsoup4 feedparser praw duckduckgo_search selenium Pillow pytesseract webdriver-manager && log "Fallback: Installed core dependencies")
|
|
||||||
log "Dependencies updated."
|
log "Dependencies updated."
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,7 +167,7 @@ if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then
|
|||||||
log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM"
|
log "File changes detected. Previous checksum: $PREVIOUS_CHECKSUM, Current checksum: $CURRENT_CHECKSUM"
|
||||||
|
|
||||||
# Stop scripts if running
|
# Stop scripts if running
|
||||||
if check_running; then
|
if pgrep -f "$VENV_PYTHON.*foodie_automator" > /dev/null; then
|
||||||
stop_scripts
|
stop_scripts
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -93,3 +183,5 @@ if [ "$CURRENT_CHECKSUM" != "$PREVIOUS_CHECKSUM" ]; then
|
|||||||
else
|
else
|
||||||
log "No file changes detected."
|
log "No file changes detected."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
Reference in New Issue
Block a user