update

2025-05-08 15:37:26 +10:00
parent 753934db4f
commit 5f03aabde4
2 changed files with 152 additions and 174 deletions
@@ -336,7 +336,7 @@ def curate_from_rss():
            final_summary = insert_link_naturally(final_summary, source_name, link)
-            # Use round-robin author selection
+            # Select author
            author = get_next_author_round_robin()
            author_username = author["username"]
            logging.info(f"Selected author via round-robin: {author_username}")
@@ -362,13 +362,16 @@ def curate_from_rss():
                    page_url = None
            hook = get_dynamic_hook(post_data["title"]).strip()
            share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
            share_text = f"Check out this foodie gem! {post_data['title']}"
            share_text_encoded = quote(share_text)
            share_links_template = (
                f'<p>{share_prompt} '
-                f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
+                f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={share_text_encoded}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
                f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
            )
            # Prepare post content with share links (to be updated later with post_url)
            post_data["content"] = f"{final_summary}\n\n{share_links_template}"
            global is_posting
@@ -392,6 +395,26 @@ def curate_from_rss():
                    logging.warning(f"Failed to post to WordPress for '{title}'")
                    attempts += 1
                    continue
                # Update content with actual post_url
                post_url_encoded = quote(post_url)
                share_links = share_links_template.format(post_url=post_url_encoded)
                post_data["content"] = f"{final_summary}\n\n{share_links}"
                post_data["post_id"] = post_id  # For update
                post_to_wp(
                    post_data=post_data,
                    category=category,
                    link=link,
                    author=author,
                    image_url=None,  # No need to re-upload image
                    original_source=original_source,
                    image_source=image_source,
                    uploader=uploader,
                    page_url=page_url,
                    interest_score=interest_score,
                    post_id=post_id,
                    should_post_tweet=False
                )
            except Exception as e:
                print(f"WordPress posting error for '{title}': {e}")
                logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
@@ -400,49 +423,21 @@ def curate_from_rss():
            finally:
                is_posting = False
-            if post_id:
+            timestamp = datetime.now(timezone.utc).isoformat()
-                share_text = f"Check out this foodie gem! {post_data['title']}"
+            save_json_file(POSTED_TITLES_FILE, title, timestamp)
-                share_text_encoded = quote(share_text)
+            posted_titles.add(title)
-                post_url_encoded = quote(post_url)
+            print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
-                share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
+            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
                post_data["content"] = f"{final_summary}\n\n{share_links}"
                is_posting = True
                try:
                    post_to_wp(
                        post_data=post_data,
                        category=category,
                        link=link,
                        author=author,
                        image_url=image_url,
                        original_source=original_source,
                        image_source=image_source,
                        uploader=uploader,
                        page_url=page_url,
                        interest_score=interest_score,
                        post_id=post_id,
                        should_post_tweet=False
                    )
                except Exception as e:
                    print(f"Failed to update WordPress post '{title}' with share links: {e}")
                    logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
                finally:
                    is_posting = False
-                timestamp = datetime.now(timezone.utc).isoformat()
+            if image_url:
-                save_json_file(POSTED_TITLES_FILE, title, timestamp)
+                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
-                posted_titles.add(title)
+                used_images.add(image_url)
-                print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
+                print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
-                logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
+                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
-                if image_url:
+            print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
-                    save_json_file(USED_IMAGES_FILE, image_url, timestamp)
+            logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
-                    used_images.add(image_url)
+            return post_data, category, random.randint(0, 1800)
                    print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
                    logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
                print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
                logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
                return post_data, category, random.randint(0, 1800)
            attempts += 1
            print(f"WP posting failed for '{post_data['title']}'")
@@ -460,18 +455,14 @@ def run_rss_automator():
    lock_fd = None
    try:
        lock_fd = acquire_lock()
        print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
        logging.info("***** RSS Automator Launched *****")
        post_data, category, sleep_time = curate_from_rss()
        if not post_data:
            print("No postable RSS article found")
            logging.info("No postable RSS article found")
        print(f"Sleeping for {sleep_time}s")
        logging.info(f"Completed run with sleep time: {sleep_time} seconds")
        time.sleep(sleep_time)
        return post_data, category, sleep_time
    except Exception as e:
        print(f"Fatal error in run_rss_automator: {e}")
        logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
        return None, None, random.randint(600, 1800)
    finally:
@@ -8,6 +8,7 @@ from PIL import Image
 import pytesseract
 import io
 import tempfile
 import shutil
 import requests
 import time
 import openai
@@ -28,6 +29,8 @@ from foodie_config import (
 )
 last_author_index = -1
 # Global to track round-robin index
 round_robin_index = 0
 load_dotenv()
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@@ -36,86 +39,73 @@ IMAGE_UPLOAD_TIMEOUT = 30  # Added to fix NameError
 IMAGE_EXPIRATION_DAYS = 7  # 7 days, consistent with foodie_automator_rss.py
 def load_json_file(file_path, expiration_hours=None):
-    """Load JSON file and return its contents as a list."""
+    """
    Load JSON file, optionally filtering out expired entries.
    """
    logger = logging.getLogger(__name__)
    default = []
    if not os.path.exists(file_path):
        logger.info(f"File {file_path} does not exist. Returning default: {default}")
        return default
    try:
        if not os.path.exists(file_path):
            logging.info(f"File {file_path} does not exist, initializing with empty list")
            with open(file_path, 'w') as f:
                json.dump([], f)
            return []
        with open(file_path, 'r') as f:
            data = json.load(f)
-
+        
-        if not isinstance(data, list):
+        if expiration_hours is not None:
            logging.warning(f"Data in {file_path} is not a list, resetting to empty list")
            with open(file_path, 'w') as f:
                json.dump([], f)
            return []
        valid_entries = []
        if expiration_hours:
            cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
-            for entry in data:
+            filtered_data = [
-                try:
+                entry for entry in data
-                    timestamp_str = entry.get("timestamp")
+                if datetime.fromisoformat(entry['timestamp']) > cutoff
-                    if timestamp_str:
+            ]
-                        timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
+            if len(filtered_data) < len(data):
-                        if timestamp < cutoff:
+                logger.info(f"Filtered {len(data) - len(filtered_data)} expired entries from {file_path}")
-                            continue
+                save_json_file(file_path, filtered_data)  # Save filtered data
-                    valid_entries.append(entry)
+            data = filtered_data
-                except (ValueError, TypeError) as e:
+        
-                    logging.warning(f"Skipping malformed entry in {file_path}: {e}")
+        logger.info(f"Loaded {len(data)} valid entries from {file_path}")
-                    continue
+        return data
        else:
            valid_entries = data
        logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}")
        return valid_entries
    except json.JSONDecodeError as e:
-        logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.")
+        logger.error(f"Invalid JSON in {file_path}: {str(e)}. Resetting to default.")
-        with open(file_path, 'w') as f:
+        save_json_file(file_path, default)
-            json.dump([], f)
+        return default
        return []
    except Exception as e:
-        logging.error(f"Failed to load JSON file {file_path}: {e}")
+        logger.error(f"Failed to load {file_path}: {str(e)}. Returning default.")
-        return []
+        return default
-def save_json_file(filename, data):
+def save_json_file(file_path, data, timestamp=None):
-    """Save data to a JSON file with locking to prevent corruption, without resetting on error."""
+    """
-    lock = FileLock(f"{filename}.lock")
+    Save data to JSON file atomically. If timestamp is provided, append as an entry.
    """
    logger = logging.getLogger(__name__)
    try:
-        with lock:
+        # If timestamp is provided, append as a new entry
-            # Read existing data
+        if timestamp:
-            existing_data = []
+            current_data = load_json_file(file_path)
-            try:
+            new_entry = {'title': data, 'timestamp': timestamp}
-                if os.path.exists(filename):
+            if new_entry not in current_data:  # Avoid duplicates
-                    with open(filename, 'r') as f:
+                current_data.append(new_entry)
-                        existing_data = json.load(f)
+                data = current_data
                    if not isinstance(existing_data, list):
                        logging.warning(f"Data in {filename} is not a list. Resetting to empty list.")
                        existing_data = []
            except (json.JSONDecodeError, FileNotFoundError) as e:
                # If the file is corrupted, log the error and skip writing to preserve existing data
                if isinstance(e, json.JSONDecodeError):
                    logging.error(f"Invalid JSON in {filename}: {e}. Skipping write to preserve existing data.")
                    return
                logging.warning(f"File {filename} not found: {e}. Starting with empty list.")
            # Append new data if it's not already present
            if isinstance(data, list):
                existing_data.extend([item for item in data if item not in existing_data])
            else:
-                if data not in existing_data:
+                logger.info(f"Entry {data} already exists in {file_path}")
-                    existing_data.append(data)
+                return True
-
+        
-            # Write back to file
+        # Validate JSON
-            with open(filename, 'w') as f:
+        json.dumps(data)
-                json.dump(existing_data, f, indent=2)
+        
-            logging.info(f"Saved data to {filename}")
+        # Write to temp file
-    except Exception as e:
+        temp_file = tempfile.NamedTemporaryFile('w', delete=False, encoding='utf-8')
-        logging.error(f"Failed to save to {filename}: {e}", exc_info=True)
+        with open(temp_file.name, 'w', encoding='utf-8') as f:
-        raise
+            json.dump(data, f, indent=2)
        # Atomically move to target
        shutil.move(temp_file.name, file_path)
        logger.info(f"Saved data to {file_path}")
        return True
    except (json.JSONDecodeError, IOError) as e:
        logger.error(f"Failed to save {file_path}: {str(e)}")
        return False
 def generate_article_tweet(author, post, persona):
    title = post["title"]
@@ -1136,64 +1126,61 @@ def check_rate_limit(response):
        logging.warning(f"Failed to parse rate limit headers: {e}")
        return None, None
-def check_author_rate_limit(author):
+def check_author_rate_limit(author, max_requests=10, window_seconds=3600):
-    """Check if the author can post based on Twitter API rate limits."""
+    """
-    from foodie_config import X_API_CREDENTIALS
+    Check if an author is rate-limited.
-    import tweepy
+    """
-
+    logger = logging.getLogger(__name__)
-    credentials = X_API_CREDENTIALS.get(author["username"])
+    rate_limit_file = '/home/shane/foodie_automator/rate_limit_info.json'
-    if not credentials:
+    rate_limit_info = load_json_file(rate_limit_file, default={})
-        logging.error(f"No X credentials found for {author['username']}")
+    
-        return False, 0, 0
+    if author['username'] not in rate_limit_info:
-
+        rate_limit_info[author['username']] = {
-    try:
+            'remaining': max_requests,
-        client = tweepy.Client(
+            'reset': time.time()
-            consumer_key=credentials["api_key"],
+        }
-            consumer_secret=credentials["api_secret"],
+    
-            access_token=credentials["access_token"],
+    info = rate_limit_info[author['username']]
-            access_token_secret=credentials["access_token_secret"]
+    current_time = time.time()
-        )
+    
-        # Make a lightweight API call to check rate limits
+    # Reset if window expired
-        response = client.get_me()
+    if current_time >= info['reset']:
-        remaining, reset = check_rate_limit(response)
+        info['remaining'] = max_requests
-        if remaining is None or reset is None:
+        info['reset'] = current_time + window_seconds
-            logging.warning(f"Could not determine rate limits for {author['username']}. Assuming rate-limited.")
+        logger.info(f"Reset rate limit for {author['username']}: {max_requests} requests available")
-            return False, 0, 0
+        save_json_file(rate_limit_file, rate_limit_info)
-        can_post = remaining > 0
+    
-        if not can_post:
+    if info['remaining'] <= 0:
-            logging.info(f"Author {author['username']} is rate-limited. Remaining: {remaining}, Reset at: {reset}")
+        reset_time = datetime.fromtimestamp(info['reset'], tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
-        return can_post, remaining, reset
+        logger.info(f"Author {author['username']} is rate-limited. Remaining: {info['remaining']}, Reset at: {reset_time}")
-    except tweepy.TweepyException as e:
+        return True
-        logging.error(f"Failed to check rate limits for {author['username']}: {e}")
+    
-        return False, 0, 0
+    # Decrement remaining requests
-    except Exception as e:
+    info['remaining'] -= 1
-        logging.error(f"Unexpected error checking rate limits for {author['username']}: {e}", exc_info=True)
+    save_json_file(rate_limit_file, rate_limit_info)
-        return False, 0, 0
+    logger.info(f"Updated rate limit for {author['username']}: {info['remaining']} requests remaining")
    return False
 def get_next_author_round_robin():
-    """Select the next author in round-robin fashion, respecting rate limits."""
+    """
-    global last_author_index
+    Select the next author using round-robin, respecting rate limits.
-    authors = AUTHORS
+    """
-    num_authors = len(authors)
+    from foodie_config import AUTHORS
-    if num_authors == 0:
+    global round_robin_index
-        logging.error("No authors available in AUTHORS list.")
+    logger = logging.getLogger(__name__)
-        return None
+    
-
+    for _ in range(len(AUTHORS)):
-    # Try each author in round-robin order
+        author = AUTHORS[round_robin_index % len(AUTHORS)]
-    for i in range(num_authors):
+        round_robin_index = (round_robin_index + 1) % len(AUTHORS)
-        last_author_index = (last_author_index + 1) % num_authors
+        
-        author = authors[last_author_index]
+        if not check_author_rate_limit(author):
-        can_post, remaining, reset = check_author_rate_limit(author)
+            logger.info(f"Selected author via round-robin: {author['username']}")
        if can_post:
            logging.info(f"Author {author['username']} can post")
            return author
-        else:
+    
-            reset_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(reset)) if reset else "Unknown"
+    logger.warning("No authors available due to rate limits. Selecting a random author as fallback.")
-            logging.info(f"Skipping author {author['username']} due to rate limit. Remaining: {remaining}, Reset at: {reset_time}")
+    author = random.choice(AUTHORS)
-
+    logger.info(f"Selected author via random fallback: {author['username']}")
-    # If no authors are available, return None instead of falling back
+    return author
    logging.warning("No authors available due to rate limits. Skipping posting.")
    return None
 def prepare_post_data(summary, title, main_topic=None):
    try: