add lock files and update weekly tweet to include last tweet to follow
This commit is contained in:
+326
-284
@@ -29,11 +29,13 @@ from foodie_utils import (
|
||||
prepare_post_data, select_best_author, smart_image_and_filter,
|
||||
get_flickr_image
|
||||
)
|
||||
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import
|
||||
from foodie_hooks import get_dynamic_hook, get_viral_share_prompt
|
||||
import fcntl
|
||||
|
||||
load_dotenv()
|
||||
|
||||
is_posting = False
|
||||
LOCK_FILE = "/home/shane/foodie_automator/locks/foodie_automator_reddit.lock"
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
logging.info("Received termination signal, checking if safe to exit...")
|
||||
@@ -46,8 +48,22 @@ def signal_handler(sig, frame):
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_reddit.log"
|
||||
LOG_FILE = "/home/shane/foodie_automator/logs/foodie_automator_reddit.log"
|
||||
LOG_PRUNE_DAYS = 30
|
||||
MAX_RETRIES = 3
|
||||
RETRY_BACKOFF = 2
|
||||
|
||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
|
||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||
EXPIRATION_HOURS = 24
|
||||
IMAGE_EXPIRATION_DAYS = 7
|
||||
|
||||
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
||||
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
|
||||
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
||||
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
||||
|
||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
def setup_logging():
|
||||
if os.path.exists(LOG_FILE):
|
||||
@@ -59,7 +75,7 @@ def setup_logging():
|
||||
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
|
||||
|
||||
for line in lines:
|
||||
if timestamp_pattern.match(line):
|
||||
if(timestamp_pattern.match(line)):
|
||||
if current_entry:
|
||||
log_entries.append(''.join(current_entry))
|
||||
current_entry = [line]
|
||||
@@ -95,19 +111,17 @@ def setup_logging():
|
||||
logging.getLogger().addHandler(console_handler)
|
||||
logging.info("Logging initialized for foodie_automator_reddit.py")
|
||||
|
||||
setup_logging()
|
||||
|
||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_reddit_titles.json'
|
||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||
EXPIRATION_HOURS = 24
|
||||
IMAGE_EXPIRATION_DAYS = 7
|
||||
|
||||
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
||||
posted_titles = set(entry["title"] for entry in posted_titles_data if "title" in entry)
|
||||
used_images_data = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
||||
used_images = set(entry["title"] for entry in used_images_data if "title" in entry)
|
||||
|
||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
def acquire_lock():
|
||||
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||
lock_fd = open(LOCK_FILE, 'w')
|
||||
try:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
lock_fd.write(str(os.getpid()))
|
||||
lock_fd.flush()
|
||||
return lock_fd
|
||||
except IOError:
|
||||
logging.info("Another instance of foodie_automator_reddit.py is running")
|
||||
sys.exit(0)
|
||||
|
||||
def clean_reddit_title(title):
|
||||
cleaned_title = re.sub(r'^\[.*?\]\s*', '', title).strip()
|
||||
@@ -115,253 +129,246 @@ def clean_reddit_title(title):
|
||||
return cleaned_title
|
||||
|
||||
def is_interesting_reddit(title, summary, upvotes, comment_count, top_comments):
|
||||
try:
|
||||
content = f"Title: {title}\n\nContent: {summary}"
|
||||
if top_comments:
|
||||
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=LIGHT_TASK_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": (
|
||||
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
|
||||
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
|
||||
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
|
||||
"Consider comments for added context (e.g., specific locations or unique details). "
|
||||
"Return only a number."
|
||||
)},
|
||||
{"role": "user", "content": content}
|
||||
],
|
||||
max_tokens=5
|
||||
)
|
||||
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
content = f"Title: {title}\n\nContent: {summary}"
|
||||
if top_comments:
|
||||
content += f"\n\nTop Comments:\n{'\n'.join(top_comments)}"
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=LIGHT_TASK_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": (
|
||||
"Rate this Reddit post from 0-10 based on rarity, buzzworthiness, and engagement potential for food lovers, covering food topics (skip recipes). "
|
||||
"Score 8-10 for rare, highly shareable ideas (e.g., unique dishes or restaurant trends). "
|
||||
"Score 5-7 for fresh, engaging updates with broad appeal. Score below 5 for common or unremarkable content. "
|
||||
"Consider comments for added context (e.g., specific locations or unique details). "
|
||||
"Return only a number"
|
||||
)},
|
||||
{"role": "user", "content": content}
|
||||
],
|
||||
max_tokens=5
|
||||
)
|
||||
base_score = int(response.choices[0].message.content.strip()) if response.choices[0].message.content.strip().isdigit() else 0
|
||||
|
||||
engagement_boost = 0
|
||||
if upvotes >= 500:
|
||||
engagement_boost += 3
|
||||
elif upvotes >= 100:
|
||||
engagement_boost += 2
|
||||
elif upvotes >= 50:
|
||||
engagement_boost += 1
|
||||
|
||||
if comment_count >= 100:
|
||||
engagement_boost += 2
|
||||
elif comment_count >= 20:
|
||||
engagement_boost += 1
|
||||
engagement_boost = 0
|
||||
if upvotes >= 500:
|
||||
engagement_boost += 3
|
||||
elif upvotes >= 100:
|
||||
engagement_boost += 2
|
||||
elif upvotes >= 50:
|
||||
engagement_boost += 1
|
||||
|
||||
if comment_count >= 100:
|
||||
engagement_boost += 2
|
||||
elif comment_count >= 20:
|
||||
engagement_boost += 1
|
||||
|
||||
final_score = min(base_score + engagement_boost, 10)
|
||||
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
|
||||
print(f"Interest Score for '{title[:50]}...': {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count})")
|
||||
return final_score
|
||||
except Exception as e:
|
||||
logging.error(f"Reddit interestingness scoring failed: {e}")
|
||||
print(f"Reddit Interest Error: {e}")
|
||||
return 0
|
||||
final_score = min(base_score + engagement_boost, 10)
|
||||
logging.info(f"Reddit Interest Score: {final_score} (base: {base_score}, upvotes: {upvotes}, comments: {comment_count}, top_comments: {len(top_comments)}) for '{title}'")
|
||||
return final_score
|
||||
except Exception as e:
|
||||
logging.warning(f"Reddit interestingness scoring failed (attempt {attempt + 1}): {e}")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||
continue
|
||||
logging.error(f"Failed to score Reddit post '{title}' after {MAX_RETRIES} attempts")
|
||||
return 0
|
||||
|
||||
def get_top_comments(post_url, reddit, limit=3):
|
||||
try:
|
||||
submission = reddit.submission(url=post_url)
|
||||
submission.comment_sort = 'top'
|
||||
submission.comments.replace_more(limit=0)
|
||||
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
|
||||
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
|
||||
return top_comments
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to fetch comments for {post_url}: {e}")
|
||||
return []
|
||||
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
submission = reddit.submission(url=post_url)
|
||||
submission.comment_sort = 'top'
|
||||
submission.comments.replace_more(limit=0)
|
||||
top_comments = [comment.body for comment in submission.comments[:limit] if not comment.body.startswith('[deleted]')]
|
||||
logging.info(f"Fetched {len(top_comments)} top comments for {post_url}")
|
||||
return top_comments
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to fetch comments for {post_url} (attempt {attempt + 1}): {e}")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||
continue
|
||||
logging.error(f"Failed to fetch comments for {post_url} after {MAX_RETRIES} attempts")
|
||||
return []
|
||||
|
||||
def fetch_duckduckgo_news_context(title, hours=24):
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||
titles = []
|
||||
for r in results:
|
||||
try:
|
||||
date_str = r["date"]
|
||||
if '+00:00' in date_str:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||
titles.append(r["title"].lower())
|
||||
except ValueError as e:
|
||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||
continue
|
||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||
return context
|
||||
except Exception as e:
|
||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}': {e}")
|
||||
return title
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.news(f"{title} news", timelimit="d", max_results=5)
|
||||
titles = []
|
||||
for r in results:
|
||||
try:
|
||||
date_str = r["date"]
|
||||
if '+00:00' in date_str:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S+00:00").replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
||||
if dt > (datetime.now(timezone.utc) - timedelta(hours=24)):
|
||||
titles.append(r["title"].lower())
|
||||
except ValueError as e:
|
||||
logging.warning(f"Date parsing failed for '{date_str}': {e}")
|
||||
continue
|
||||
context = " ".join(titles) if titles else "No recent news found within 24 hours"
|
||||
logging.info(f"DuckDuckGo News context for '{title}': {context}")
|
||||
return context
|
||||
except Exception as e:
|
||||
logging.warning(f"DuckDuckGo News context fetch failed for '{title}' (attempt {attempt + 1}): {e}")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||
continue
|
||||
logging.error(f"Failed to fetch DuckDuckGo News context for '{title}' after {MAX_RETRIES} attempts")
|
||||
return title
|
||||
|
||||
def fetch_reddit_posts():
|
||||
reddit = praw.Reddit(
|
||||
client_id=REDDIT_CLIENT_ID,
|
||||
client_secret=REDDIT_CLIENT_SECRET,
|
||||
user_agent=REDDIT_USER_AGENT
|
||||
)
|
||||
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
|
||||
articles = []
|
||||
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
|
||||
|
||||
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
|
||||
for subreddit_name in feeds:
|
||||
try:
|
||||
subreddit = reddit.subreddit(subreddit_name)
|
||||
for submission in subreddit.top(time_filter='day', limit=100):
|
||||
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
|
||||
if pub_date < cutoff_date:
|
||||
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
||||
try:
|
||||
reddit = praw.Reddit(
|
||||
client_id=REDDIT_CLIENT_ID,
|
||||
client_secret=REDDIT_CLIENT_SECRET,
|
||||
user_agent=REDDIT_USER_AGENT
|
||||
)
|
||||
feeds = ['FoodPorn', 'restaurant', 'FoodIndustry', 'food']
|
||||
articles = []
|
||||
cutoff_date = datetime.now(timezone.utc) - timedelta(hours=EXPIRATION_HOURS)
|
||||
|
||||
logging.info(f"Starting fetch with cutoff date: {cutoff_date}")
|
||||
for subreddit_name in feeds:
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
subreddit = reddit.subreddit(subreddit_name)
|
||||
for submission in subreddit.top(time_filter='day', limit=100):
|
||||
pub_date = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
|
||||
if pub_date < cutoff_date:
|
||||
logging.info(f"Skipping old post: {submission.title} (Published: {pub_date})")
|
||||
continue
|
||||
cleaned_title = clean_reddit_title(submission.title)
|
||||
articles.append({
|
||||
"title": cleaned_title,
|
||||
"raw_title": submission.title,
|
||||
"link": f"https://www.reddit.com{submission.permalink}",
|
||||
"summary": submission.selftext,
|
||||
"feed_title": get_clean_source_name(subreddit_name),
|
||||
"pub_date": pub_date,
|
||||
"upvotes": submission.score,
|
||||
"comment_count": submission.num_comments
|
||||
})
|
||||
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
|
||||
break
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name} (attempt {attempt + 1}): {e}")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
time.sleep(RETRY_BACKOFF * (2 ** attempt))
|
||||
continue
|
||||
cleaned_title = clean_reddit_title(submission.title)
|
||||
articles.append({
|
||||
"title": cleaned_title,
|
||||
"raw_title": submission.title,
|
||||
"link": f"https://www.reddit.com{submission.permalink}",
|
||||
"summary": submission.selftext,
|
||||
"feed_title": get_clean_source_name(subreddit_name),
|
||||
"pub_date": pub_date,
|
||||
"upvotes": submission.score,
|
||||
"comment_count": submission.num_comments
|
||||
})
|
||||
logging.info(f"Fetched {len(articles)} posts from r/{subreddit_name}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to fetch Reddit feed r/{subreddit_name}: {e}")
|
||||
|
||||
logging.info(f"Total Reddit posts fetched: {len(articles)}")
|
||||
return articles
|
||||
logging.info(f"Total Reddit posts fetched: {len(articles)}")
|
||||
return articles
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error in fetch_reddit_posts: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def curate_from_reddit():
|
||||
articles = fetch_reddit_posts()
|
||||
if not articles:
|
||||
print("No Reddit posts available")
|
||||
logging.info("No Reddit posts available")
|
||||
return None, None, random.randint(600, 1800)
|
||||
try:
|
||||
articles = fetch_reddit_posts()
|
||||
if not articles:
|
||||
logging.info("No Reddit posts available")
|
||||
return None, None, False
|
||||
|
||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||
|
||||
reddit = praw.Reddit(
|
||||
client_id=REDDIT_CLIENT_ID,
|
||||
client_secret=REDDIT_CLIENT_SECRET,
|
||||
user_agent=REDDIT_USER_AGENT
|
||||
)
|
||||
|
||||
attempts = 0
|
||||
max_attempts = 10
|
||||
while attempts < max_attempts and articles:
|
||||
article = articles.pop(0)
|
||||
title = article["title"]
|
||||
raw_title = article["raw_title"]
|
||||
link = article["link"]
|
||||
summary = article["summary"]
|
||||
source_name = "Reddit"
|
||||
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
|
||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||
|
||||
if raw_title in posted_titles:
|
||||
print(f"Skipping already posted post: {raw_title}")
|
||||
logging.info(f"Skipping already posted post: {raw_title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
print(f"Trying Reddit Post: {title} from {source_name}")
|
||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||
|
||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
|
||||
print(f"Skipping filtered Reddit post: {title}")
|
||||
logging.info(f"Skipping filtered Reddit post: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
top_comments = get_top_comments(link, reddit, limit=3)
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting_reddit(
|
||||
title,
|
||||
summary,
|
||||
article["upvotes"],
|
||||
article["comment_count"],
|
||||
top_comments
|
||||
)
|
||||
logging.info(f"Interest Score: {interest_score} for '{title}'")
|
||||
if interest_score < 6:
|
||||
print(f"Reddit Interest Too Low: {interest_score}")
|
||||
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
||||
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
reddit = praw.Reddit(
|
||||
client_id=REDDIT_CLIENT_ID,
|
||||
client_secret=REDDIT_CLIENT_SECRET,
|
||||
user_agent=REDDIT_USER_AGENT
|
||||
)
|
||||
|
||||
final_summary = summarize_with_gpt4o(
|
||||
content_to_summarize,
|
||||
source_name,
|
||||
link,
|
||||
interest_score=interest_score,
|
||||
extra_prompt=extra_prompt
|
||||
)
|
||||
if not final_summary:
|
||||
logging.info(f"Summary failed for '{title}'")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
|
||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||
if not post_data:
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||
if not image_url:
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
|
||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||
|
||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||
share_links_template = (
|
||||
f'<p>{share_prompt} '
|
||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||
)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||
|
||||
global is_posting
|
||||
is_posting = True
|
||||
try:
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
should_post_tweet=True
|
||||
attempts = 0
|
||||
max_attempts = 10
|
||||
while attempts < max_attempts and articles:
|
||||
article = articles.pop(0)
|
||||
title = article["title"]
|
||||
raw_title = article["raw_title"]
|
||||
link = article["link"]
|
||||
summary = article["summary"]
|
||||
source_name = "Reddit"
|
||||
original_source = '<a href="https://www.reddit.com/">Reddit</a>'
|
||||
|
||||
if raw_title in posted_titles:
|
||||
logging.info(f"Skipping already posted post: {raw_title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||
|
||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||
if skip or any(keyword in title.lower() or keyword in raw_title.lower() for keyword in RECIPE_KEYWORDS + ["homemade"]):
|
||||
logging.info(f"Skipping filtered Reddit post: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
top_comments = get_top_comments(link, reddit, limit=3)
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
content_to_summarize = f"{title}\n\n{summary}\n\nTop Comments:\n{'\n'.join(top_comments) if top_comments else 'None'}\n\nAdditional Context: {ddg_context}"
|
||||
interest_score = is_interesting_reddit(
|
||||
title,
|
||||
summary,
|
||||
article["upvotes"],
|
||||
article["comment_count"],
|
||||
top_comments
|
||||
)
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
if post_id:
|
||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
logging.info(f"Interest Score: {interest_score} for '{title}'")
|
||||
if interest_score < 6:
|
||||
logging.info(f"Reddit Interest Too Low: {interest_score}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
num_paragraphs = determine_paragraph_count(interest_score)
|
||||
extra_prompt = (
|
||||
f"Generate exactly {num_paragraphs} paragraphs.\n"
|
||||
f"FOCUS: Summarize ONLY the provided content, focusing on its specific topic and details without mentioning the original title.\n"
|
||||
f"Incorporate relevant insights from these top comments if available: {', '.join(top_comments) if top_comments else 'None'}.\n"
|
||||
f"Incorporate relevant insights from this additional context if available: {ddg_context}.\n"
|
||||
f"Do NOT introduce unrelated concepts unless in the content, comments, or additional context.\n"
|
||||
f"If brief, expand on the core idea with relevant context about its appeal or significance.\n"
|
||||
f"Do not include emojis in the summary."
|
||||
)
|
||||
|
||||
final_summary = summarize_with_gpt4o(
|
||||
content_to_summarize,
|
||||
source_name,
|
||||
link,
|
||||
interest_score=interest_score,
|
||||
extra_prompt=extra_prompt
|
||||
)
|
||||
if not final_summary:
|
||||
logging.info(f"Summary failed for '{title}'")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
|
||||
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
|
||||
if not post_data:
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||
if not image_url:
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
|
||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||
|
||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||
share_links_template = (
|
||||
f'<p>{share_prompt} '
|
||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||
)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||
|
||||
global is_posting
|
||||
is_posting = True
|
||||
try:
|
||||
post_to_wp(
|
||||
post_id, post_url = post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
@@ -372,49 +379,84 @@ def curate_from_reddit():
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
should_post_tweet=True
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||
attempts += 1
|
||||
continue
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
|
||||
posted_titles.add(raw_title)
|
||||
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
|
||||
|
||||
if image_url:
|
||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||
used_images.add(image_url)
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
|
||||
|
||||
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
|
||||
print(f"Actual post URL: {post_url}")
|
||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
|
||||
logging.info(f"Actual post URL: {post_url}")
|
||||
return post_data, category, random.randint(0, 1800)
|
||||
|
||||
if post_id:
|
||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||
share_text_encoded = quote(share_text)
|
||||
post_url_encoded = quote(post_url)
|
||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||
is_posting = True
|
||||
try:
|
||||
post_to_wp(
|
||||
post_data=post_data,
|
||||
category=category,
|
||||
link=link,
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
original_source=original_source,
|
||||
image_source=image_source,
|
||||
uploader=uploader,
|
||||
page_url=page_url,
|
||||
interest_score=interest_score,
|
||||
post_id=post_id,
|
||||
should_post_tweet=False
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
|
||||
finally:
|
||||
is_posting = False
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
save_json_file(POSTED_TITLES_FILE, raw_title, timestamp)
|
||||
posted_titles.add(raw_title)
|
||||
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE}")
|
||||
|
||||
if image_url:
|
||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||
used_images.add(image_url)
|
||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||
|
||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from Reddit *****")
|
||||
return post_data, category, True
|
||||
attempts += 1
|
||||
logging.info(f"WP posting failed for '{post_data['title']}'")
|
||||
|
||||
attempts += 1
|
||||
logging.info(f"WP posting failed for '{post_data['title']}'")
|
||||
|
||||
print("No interesting Reddit post found after attempts")
|
||||
logging.info("No interesting Reddit post found after attempts")
|
||||
return None, None, random.randint(600, 1800)
|
||||
logging.info("No interesting Reddit post found after attempts")
|
||||
return None, None, False
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
|
||||
return None, None, False
|
||||
|
||||
def run_reddit_automator():
|
||||
print(f"{datetime.now(timezone.utc)} - INFO - ***** Reddit Automator Launched *****")
|
||||
logging.info("***** Reddit Automator Launched *****")
|
||||
|
||||
post_data, category, sleep_time = curate_from_reddit()
|
||||
if not post_data:
|
||||
print(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
|
||||
logging.info(f"No postable Reddit article found - sleeping for {sleep_time} seconds")
|
||||
else:
|
||||
print(f"Completed Reddit run with sleep time: {sleep_time} seconds")
|
||||
logging.info(f"Completed Reddit run with sleep time: {sleep_time} seconds")
|
||||
print(f"Sleeping for {sleep_time}s")
|
||||
time.sleep(sleep_time)
|
||||
return post_data, category, sleep_time
|
||||
lock_fd = None
|
||||
try:
|
||||
lock_fd = acquire_lock()
|
||||
logging.info("***** Reddit Automator Launched *****")
|
||||
post_data, category, should_continue = curate_from_reddit()
|
||||
if not post_data:
|
||||
logging.info("No postable Reddit article found")
|
||||
else:
|
||||
logging.info("Completed Reddit run")
|
||||
return post_data, category, should_continue
|
||||
except Exception as e:
|
||||
logging.error(f"Fatal error in run_reddit_automator: {e}", exc_info=True)
|
||||
return None, None, False
|
||||
finally:
|
||||
if lock_fd:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
lock_fd.close()
|
||||
os.remove(LOCK_FILE) if os.path.exists(LOCK_FILE) else None
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_reddit_automator()
|
||||
setup_logging()
|
||||
post_data, category, should_continue = run_reddit_automator()
|
||||
logging.info(f"Run completed, should_continue: {should_continue}")
|
||||
Reference in New Issue
Block a user