update
This commit is contained in:
+39
-48
@@ -336,7 +336,7 @@ def curate_from_rss():
|
|||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
|
|
||||||
# Use round-robin author selection
|
# Select author
|
||||||
author = get_next_author_round_robin()
|
author = get_next_author_round_robin()
|
||||||
author_username = author["username"]
|
author_username = author["username"]
|
||||||
logging.info(f"Selected author via round-robin: {author_username}")
|
logging.info(f"Selected author via round-robin: {author_username}")
|
||||||
@@ -362,13 +362,16 @@ def curate_from_rss():
|
|||||||
page_url = None
|
page_url = None
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
|
share_text = f"Check out this foodie gem! {post_data['title']}"
|
||||||
|
share_text_encoded = quote(share_text)
|
||||||
share_links_template = (
|
share_links_template = (
|
||||||
f'<p>{share_prompt} '
|
f'<p>{share_prompt} '
|
||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Prepare post content with share links (to be updated later with post_url)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
||||||
|
|
||||||
global is_posting
|
global is_posting
|
||||||
@@ -392,6 +395,26 @@ def curate_from_rss():
|
|||||||
logging.warning(f"Failed to post to WordPress for '{title}'")
|
logging.warning(f"Failed to post to WordPress for '{title}'")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Update content with actual post_url
|
||||||
|
post_url_encoded = quote(post_url)
|
||||||
|
share_links = share_links_template.format(post_url=post_url_encoded)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
||||||
|
post_data["post_id"] = post_id # For update
|
||||||
|
post_to_wp(
|
||||||
|
post_data=post_data,
|
||||||
|
category=category,
|
||||||
|
link=link,
|
||||||
|
author=author,
|
||||||
|
image_url=None, # No need to re-upload image
|
||||||
|
original_source=original_source,
|
||||||
|
image_source=image_source,
|
||||||
|
uploader=uploader,
|
||||||
|
page_url=page_url,
|
||||||
|
interest_score=interest_score,
|
||||||
|
post_id=post_id,
|
||||||
|
should_post_tweet=False
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"WordPress posting error for '{title}': {e}")
|
print(f"WordPress posting error for '{title}': {e}")
|
||||||
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
|
||||||
@@ -400,49 +423,21 @@ def curate_from_rss():
|
|||||||
finally:
|
finally:
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
if post_id:
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
share_text = f"Check out this foodie gem! {post_data['title']}"
|
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
||||||
share_text_encoded = quote(share_text)
|
posted_titles.add(title)
|
||||||
post_url_encoded = quote(post_url)
|
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
|
||||||
is_posting = True
|
|
||||||
try:
|
|
||||||
post_to_wp(
|
|
||||||
post_data=post_data,
|
|
||||||
category=category,
|
|
||||||
link=link,
|
|
||||||
author=author,
|
|
||||||
image_url=image_url,
|
|
||||||
original_source=original_source,
|
|
||||||
image_source=image_source,
|
|
||||||
uploader=uploader,
|
|
||||||
page_url=page_url,
|
|
||||||
interest_score=interest_score,
|
|
||||||
post_id=post_id,
|
|
||||||
should_post_tweet=False
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Failed to update WordPress post '{title}' with share links: {e}")
|
|
||||||
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
|
|
||||||
finally:
|
|
||||||
is_posting = False
|
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
if image_url:
|
||||||
save_json_file(POSTED_TITLES_FILE, title, timestamp)
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
posted_titles.add(title)
|
used_images.add(image_url)
|
||||||
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
|
|
||||||
if image_url:
|
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
||||||
used_images.add(image_url)
|
return post_data, category, random.randint(0, 1800)
|
||||||
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
|
||||||
|
|
||||||
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
|
||||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
|
||||||
return post_data, category, random.randint(0, 1800)
|
|
||||||
|
|
||||||
attempts += 1
|
attempts += 1
|
||||||
print(f"WP posting failed for '{post_data['title']}'")
|
print(f"WP posting failed for '{post_data['title']}'")
|
||||||
@@ -460,18 +455,14 @@ def run_rss_automator():
|
|||||||
lock_fd = None
|
lock_fd = None
|
||||||
try:
|
try:
|
||||||
lock_fd = acquire_lock()
|
lock_fd = acquire_lock()
|
||||||
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
|
|
||||||
logging.info("***** RSS Automator Launched *****")
|
logging.info("***** RSS Automator Launched *****")
|
||||||
post_data, category, sleep_time = curate_from_rss()
|
post_data, category, sleep_time = curate_from_rss()
|
||||||
if not post_data:
|
if not post_data:
|
||||||
print("No postable RSS article found")
|
|
||||||
logging.info("No postable RSS article found")
|
logging.info("No postable RSS article found")
|
||||||
print(f"Sleeping for {sleep_time}s")
|
|
||||||
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
|
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
|
||||||
time.sleep(sleep_time)
|
time.sleep(sleep_time)
|
||||||
return post_data, category, sleep_time
|
return post_data, category, sleep_time
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Fatal error in run_rss_automator: {e}")
|
|
||||||
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
|
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
|
||||||
return None, None, random.randint(600, 1800)
|
return None, None, random.randint(600, 1800)
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
+113
-126
@@ -8,6 +8,7 @@ from PIL import Image
|
|||||||
import pytesseract
|
import pytesseract
|
||||||
import io
|
import io
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import shutil
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
import openai
|
import openai
|
||||||
@@ -28,6 +29,8 @@ from foodie_config import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
last_author_index = -1
|
last_author_index = -1
|
||||||
|
# Global to track round-robin index
|
||||||
|
round_robin_index = 0
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
@@ -36,86 +39,73 @@ IMAGE_UPLOAD_TIMEOUT = 30 # Added to fix NameError
|
|||||||
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py
|
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py
|
||||||
|
|
||||||
def load_json_file(file_path, expiration_hours=None):
|
def load_json_file(file_path, expiration_hours=None):
|
||||||
"""Load JSON file and return its contents as a list."""
|
"""
|
||||||
|
Load JSON file, optionally filtering out expired entries.
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
default = []
|
||||||
|
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.info(f"File {file_path} does not exist. Returning default: {default}")
|
||||||
|
return default
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(file_path):
|
|
||||||
logging.info(f"File {file_path} does not exist, initializing with empty list")
|
|
||||||
with open(file_path, 'w') as f:
|
|
||||||
json.dump([], f)
|
|
||||||
return []
|
|
||||||
|
|
||||||
with open(file_path, 'r') as f:
|
with open(file_path, 'r') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
if not isinstance(data, list):
|
if expiration_hours is not None:
|
||||||
logging.warning(f"Data in {file_path} is not a list, resetting to empty list")
|
|
||||||
with open(file_path, 'w') as f:
|
|
||||||
json.dump([], f)
|
|
||||||
return []
|
|
||||||
|
|
||||||
valid_entries = []
|
|
||||||
if expiration_hours:
|
|
||||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
|
||||||
for entry in data:
|
filtered_data = [
|
||||||
try:
|
entry for entry in data
|
||||||
timestamp_str = entry.get("timestamp")
|
if datetime.fromisoformat(entry['timestamp']) > cutoff
|
||||||
if timestamp_str:
|
]
|
||||||
timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
|
if len(filtered_data) < len(data):
|
||||||
if timestamp < cutoff:
|
logger.info(f"Filtered {len(data) - len(filtered_data)} expired entries from {file_path}")
|
||||||
continue
|
save_json_file(file_path, filtered_data) # Save filtered data
|
||||||
valid_entries.append(entry)
|
data = filtered_data
|
||||||
except (ValueError, TypeError) as e:
|
|
||||||
logging.warning(f"Skipping malformed entry in {file_path}: {e}")
|
logger.info(f"Loaded {len(data)} valid entries from {file_path}")
|
||||||
continue
|
return data
|
||||||
else:
|
|
||||||
valid_entries = data
|
|
||||||
|
|
||||||
logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}")
|
|
||||||
return valid_entries
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.")
|
logger.error(f"Invalid JSON in {file_path}: {str(e)}. Resetting to default.")
|
||||||
with open(file_path, 'w') as f:
|
save_json_file(file_path, default)
|
||||||
json.dump([], f)
|
return default
|
||||||
return []
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to load JSON file {file_path}: {e}")
|
logger.error(f"Failed to load {file_path}: {str(e)}. Returning default.")
|
||||||
return []
|
return default
|
||||||
|
|
||||||
def save_json_file(filename, data):
|
def save_json_file(file_path, data, timestamp=None):
|
||||||
"""Save data to a JSON file with locking to prevent corruption, without resetting on error."""
|
"""
|
||||||
lock = FileLock(f"{filename}.lock")
|
Save data to JSON file atomically. If timestamp is provided, append as an entry.
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
try:
|
try:
|
||||||
with lock:
|
# If timestamp is provided, append as a new entry
|
||||||
# Read existing data
|
if timestamp:
|
||||||
existing_data = []
|
current_data = load_json_file(file_path)
|
||||||
try:
|
new_entry = {'title': data, 'timestamp': timestamp}
|
||||||
if os.path.exists(filename):
|
if new_entry not in current_data: # Avoid duplicates
|
||||||
with open(filename, 'r') as f:
|
current_data.append(new_entry)
|
||||||
existing_data = json.load(f)
|
data = current_data
|
||||||
if not isinstance(existing_data, list):
|
|
||||||
logging.warning(f"Data in {filename} is not a list. Resetting to empty list.")
|
|
||||||
existing_data = []
|
|
||||||
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
||||||
# If the file is corrupted, log the error and skip writing to preserve existing data
|
|
||||||
if isinstance(e, json.JSONDecodeError):
|
|
||||||
logging.error(f"Invalid JSON in {filename}: {e}. Skipping write to preserve existing data.")
|
|
||||||
return
|
|
||||||
logging.warning(f"File {filename} not found: {e}. Starting with empty list.")
|
|
||||||
|
|
||||||
# Append new data if it's not already present
|
|
||||||
if isinstance(data, list):
|
|
||||||
existing_data.extend([item for item in data if item not in existing_data])
|
|
||||||
else:
|
else:
|
||||||
if data not in existing_data:
|
logger.info(f"Entry {data} already exists in {file_path}")
|
||||||
existing_data.append(data)
|
return True
|
||||||
|
|
||||||
# Write back to file
|
# Validate JSON
|
||||||
with open(filename, 'w') as f:
|
json.dumps(data)
|
||||||
json.dump(existing_data, f, indent=2)
|
|
||||||
logging.info(f"Saved data to {filename}")
|
# Write to temp file
|
||||||
except Exception as e:
|
temp_file = tempfile.NamedTemporaryFile('w', delete=False, encoding='utf-8')
|
||||||
logging.error(f"Failed to save to {filename}: {e}", exc_info=True)
|
with open(temp_file.name, 'w', encoding='utf-8') as f:
|
||||||
raise
|
json.dump(data, f, indent=2)
|
||||||
|
|
||||||
|
# Atomically move to target
|
||||||
|
shutil.move(temp_file.name, file_path)
|
||||||
|
logger.info(f"Saved data to {file_path}")
|
||||||
|
return True
|
||||||
|
except (json.JSONDecodeError, IOError) as e:
|
||||||
|
logger.error(f"Failed to save {file_path}: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
def generate_article_tweet(author, post, persona):
|
def generate_article_tweet(author, post, persona):
|
||||||
title = post["title"]
|
title = post["title"]
|
||||||
@@ -1136,64 +1126,61 @@ def check_rate_limit(response):
|
|||||||
logging.warning(f"Failed to parse rate limit headers: {e}")
|
logging.warning(f"Failed to parse rate limit headers: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def check_author_rate_limit(author):
|
def check_author_rate_limit(author, max_requests=10, window_seconds=3600):
|
||||||
"""Check if the author can post based on Twitter API rate limits."""
|
"""
|
||||||
from foodie_config import X_API_CREDENTIALS
|
Check if an author is rate-limited.
|
||||||
import tweepy
|
"""
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
credentials = X_API_CREDENTIALS.get(author["username"])
|
rate_limit_file = '/home/shane/foodie_automator/rate_limit_info.json'
|
||||||
if not credentials:
|
rate_limit_info = load_json_file(rate_limit_file, default={})
|
||||||
logging.error(f"No X credentials found for {author['username']}")
|
|
||||||
return False, 0, 0
|
if author['username'] not in rate_limit_info:
|
||||||
|
rate_limit_info[author['username']] = {
|
||||||
try:
|
'remaining': max_requests,
|
||||||
client = tweepy.Client(
|
'reset': time.time()
|
||||||
consumer_key=credentials["api_key"],
|
}
|
||||||
consumer_secret=credentials["api_secret"],
|
|
||||||
access_token=credentials["access_token"],
|
info = rate_limit_info[author['username']]
|
||||||
access_token_secret=credentials["access_token_secret"]
|
current_time = time.time()
|
||||||
)
|
|
||||||
# Make a lightweight API call to check rate limits
|
# Reset if window expired
|
||||||
response = client.get_me()
|
if current_time >= info['reset']:
|
||||||
remaining, reset = check_rate_limit(response)
|
info['remaining'] = max_requests
|
||||||
if remaining is None or reset is None:
|
info['reset'] = current_time + window_seconds
|
||||||
logging.warning(f"Could not determine rate limits for {author['username']}. Assuming rate-limited.")
|
logger.info(f"Reset rate limit for {author['username']}: {max_requests} requests available")
|
||||||
return False, 0, 0
|
save_json_file(rate_limit_file, rate_limit_info)
|
||||||
can_post = remaining > 0
|
|
||||||
if not can_post:
|
if info['remaining'] <= 0:
|
||||||
logging.info(f"Author {author['username']} is rate-limited. Remaining: {remaining}, Reset at: {reset}")
|
reset_time = datetime.fromtimestamp(info['reset'], tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
return can_post, remaining, reset
|
logger.info(f"Author {author['username']} is rate-limited. Remaining: {info['remaining']}, Reset at: {reset_time}")
|
||||||
except tweepy.TweepyException as e:
|
return True
|
||||||
logging.error(f"Failed to check rate limits for {author['username']}: {e}")
|
|
||||||
return False, 0, 0
|
# Decrement remaining requests
|
||||||
except Exception as e:
|
info['remaining'] -= 1
|
||||||
logging.error(f"Unexpected error checking rate limits for {author['username']}: {e}", exc_info=True)
|
save_json_file(rate_limit_file, rate_limit_info)
|
||||||
return False, 0, 0
|
logger.info(f"Updated rate limit for {author['username']}: {info['remaining']} requests remaining")
|
||||||
|
return False
|
||||||
|
|
||||||
def get_next_author_round_robin():
|
def get_next_author_round_robin():
|
||||||
"""Select the next author in round-robin fashion, respecting rate limits."""
|
"""
|
||||||
global last_author_index
|
Select the next author using round-robin, respecting rate limits.
|
||||||
authors = AUTHORS
|
"""
|
||||||
num_authors = len(authors)
|
from foodie_config import AUTHORS
|
||||||
if num_authors == 0:
|
global round_robin_index
|
||||||
logging.error("No authors available in AUTHORS list.")
|
logger = logging.getLogger(__name__)
|
||||||
return None
|
|
||||||
|
for _ in range(len(AUTHORS)):
|
||||||
# Try each author in round-robin order
|
author = AUTHORS[round_robin_index % len(AUTHORS)]
|
||||||
for i in range(num_authors):
|
round_robin_index = (round_robin_index + 1) % len(AUTHORS)
|
||||||
last_author_index = (last_author_index + 1) % num_authors
|
|
||||||
author = authors[last_author_index]
|
if not check_author_rate_limit(author):
|
||||||
can_post, remaining, reset = check_author_rate_limit(author)
|
logger.info(f"Selected author via round-robin: {author['username']}")
|
||||||
if can_post:
|
|
||||||
logging.info(f"Author {author['username']} can post")
|
|
||||||
return author
|
return author
|
||||||
else:
|
|
||||||
reset_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(reset)) if reset else "Unknown"
|
logger.warning("No authors available due to rate limits. Selecting a random author as fallback.")
|
||||||
logging.info(f"Skipping author {author['username']} due to rate limit. Remaining: {remaining}, Reset at: {reset_time}")
|
author = random.choice(AUTHORS)
|
||||||
|
logger.info(f"Selected author via random fallback: {author['username']}")
|
||||||
# If no authors are available, return None instead of falling back
|
return author
|
||||||
logging.warning("No authors available due to rate limits. Skipping posting.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def prepare_post_data(summary, title, main_topic=None):
|
def prepare_post_data(summary, title, main_topic=None):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user