From e7a06e337541b07a33bab0102d540b519d5740b5 Mon Sep 17 00:00:00 2001
From: Shane <shanehill@mail.com>
Date: Wed, 7 May 2025 20:45:28 +1000
Subject: [PATCH] fix all json formatting

---
 foodie_utils.py         | 82 ++++++++++++++++-------------------------
 foodie_weekly_thread.py | 66 ++++++++++++---------------------
 2 files changed, 55 insertions(+), 93 deletions(-)

diff --git a/foodie_utils.py b/foodie_utils.py
index 49dd207..1a26df1 100644
--- a/foodie_utils.py
+++ b/foodie_utils.py
@@ -23,7 +23,7 @@ import flickr_api
 from foodie_config import (
     RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, 
     get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS,
-    FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY
+    FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY, RECENT_POSTS_FILE, USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS
 )
 
 load_dotenv()
@@ -33,6 +33,7 @@ IMAGE_UPLOAD_TIMEOUT = 30  # Added to fix NameError
 IMAGE_EXPIRATION_DAYS = 7  # 7 days, consistent with foodie_automator_rss.py
 
 def load_json_file(file_path, expiration_hours=None):
+    """Load JSON file and return its contents as a list."""
     try:
         if not os.path.exists(file_path):
             logging.info(f"File {file_path} does not exist, initializing with empty list")
@@ -41,26 +42,13 @@ def load_json_file(file_path, expiration_hours=None):
             return []
 
         with open(file_path, 'r') as f:
-            try:
-                data = json.load(f)
-            except json.JSONDecodeError as e:
-                logging.warning(f"Invalid JSON in {file_path}: {e}. Attempting line-by-line parsing.")
-                data = []
-                f.seek(0)
-                for line_number, line in enumerate(f, 1):
-                    line = line.strip()
-                    if not line:
-                        continue
-                    try:
-                        entry = json.loads(line)
-                        data.append(entry)
-                    except json.JSONDecodeError as e:
-                        logging.warning(f"Skipping invalid JSON line in {file_path} at line {line_number}: {e}")
-                        continue
+            data = json.load(f)
 
         if not isinstance(data, list):
             logging.warning(f"Data in {file_path} is not a list, resetting to empty list")
-            data = []
+            with open(file_path, 'w') as f:
+                json.dump([], f)
+            return []
 
         valid_entries = []
         if expiration_hours:
@@ -79,29 +67,26 @@ def load_json_file(file_path, expiration_hours=None):
         else:
             valid_entries = data
 
-        logging.info(f"Loaded {len(valid_entries)} entries from {file_path}, {len(valid_entries)} valid after expiration check")
+        logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}")
         return valid_entries
+    except json.JSONDecodeError as e:
+        logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.")
+        with open(file_path, 'w') as f:
+            json.dump([], f)
+        return []
     except Exception as e:
         logging.error(f"Failed to load JSON file {file_path}: {e}")
         return []
 
 def save_json_file(file_path, title, timestamp):
+    """Save an entry to a JSON file, maintaining a JSON array."""
     try:
-        entries = load_json_file(file_path, 24 if "posted_" in file_path else 7 * 24)  # 24 hours for titles, 7 days for images
+        entries = load_json_file(file_path, 24 if "posted_" in file_path else IMAGE_EXPIRATION_DAYS * 24)
         entry = {"title": title, "timestamp": timestamp}
         entries.append(entry)
-        
-        # Prune entries older than expiration period
-        expiration_hours = 24 if "posted_" in file_path else 7 * 24
-        cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
-        pruned_entries = [e for e in entries if datetime.fromisoformat(e["timestamp"]) > cutoff]
-        
         with open(file_path, 'w') as f:
-            for entry in pruned_entries:
-                f.write(json.dumps(entry) + '\n')
-        
+            json.dump(entries, f, indent=2)
         logging.info(f"Saved '{title}' to {file_path}")
-        logging.info(f"Pruned {file_path} to {len(pruned_entries)} entries (older than {expiration_hours//24} days removed)")
     except Exception as e:
         logging.error(f"Failed to save to {file_path}: {e}")
 
@@ -790,8 +775,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
             content = "Content unavailable. Check the original source for details."
         formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
         
-        # Removed the block that appends image attribution to the content
-        
         author_id_map = {
             "owenjohnson": 10,
             "javiermorales": 2,
@@ -850,8 +833,10 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
         post_id = post_info["id"]
         post_url = post_info["link"]
 
-        timestamp = datetime.now(timezone.utc).isoformat()
-        save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
+        # Save to recent_posts.json only on initial post, not updates
+        if not post_id:
+            timestamp = datetime.now(timezone.utc).isoformat()
+            save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
 
         if should_post_tweet:
             try:
@@ -1234,9 +1219,11 @@ def prepare_post_data(summary, title, main_topic=None):
         logging.error(f"Failed to prepare post data: {e}")
         return None, None, None, None, None, None, None
 
+
 def save_post_to_recent(post_title, post_url, author_username, timestamp):
+    """Save a post to recent_posts.json, maintaining a JSON array."""
     try:
-        recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json', 24)  # Added expiration_hours
+        recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24)
         entry = {
             "title": post_title,
             "url": post_url,
@@ -1244,23 +1231,18 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp):
             "timestamp": timestamp
         }
         recent_posts.append(entry)
-        with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f:
-            for item in recent_posts:
-                json.dump(item, f)
-                f.write('\n')
-        logging.info(f"Saved post '{post_title}' to recent_posts.json")
+        with open(RECENT_POSTS_FILE, 'w') as f:
+            json.dump(recent_posts, f, indent=2)
+        logging.info(f"Saved post '{post_title}' to {RECENT_POSTS_FILE}")
     except Exception as e:
-        logging.error(f"Failed to save post to recent_posts.json: {e}")
+        logging.error(f"Failed to save post to {RECENT_POSTS_FILE}: {e}")
 
 def prune_recent_posts():
+    """Prune recent_posts.json to keep entries within the last 24 hours."""
     try:
-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
-        recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
-        recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff]
-        with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f:
-            for item in recent_posts:
-                json.dump(item, f)
-                f.write('\n')
-        logging.info(f"Pruned recent_posts.json to {len(recent_posts)} entries")
+        recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24)
+        with open(RECENT_POSTS_FILE, 'w') as f:
+            json.dump(recent_posts, f, indent=2)
+        logging.info(f"Pruned {RECENT_POSTS_FILE} to {len(recent_posts)} entries")
     except Exception as e:
-        logging.error(f"Failed to prune recent_posts.json: {e}")
\ No newline at end of file
+        logging.error(f"Failed to prune {RECENT_POSTS_FILE}: {e}")
\ No newline at end of file
diff --git a/foodie_weekly_thread.py b/foodie_weekly_thread.py
index 5e0b971..2444265 100644
--- a/foodie_weekly_thread.py
+++ b/foodie_weekly_thread.py
@@ -128,54 +128,34 @@ def validate_twitter_credentials():
 
 def load_recent_posts():
     """Load and deduplicate posts from recent_posts.json."""
-    posts = []
-    unique_posts = {}
     logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
+    posts = load_json_file(RECENT_POSTS_FILE)
     
-    if not os.path.exists(RECENT_POSTS_FILE):
-        logging.error(f"Recent posts file {RECENT_POSTS_FILE} does not exist")
-        return posts
-    if not os.access(RECENT_POSTS_FILE, os.R_OK):
-        logging.error(f"Cannot read {RECENT_POSTS_FILE} due to permission issues")
-        return posts
+    if not posts:
+        logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
+        return []
     
-    try:
-        with open(RECENT_POSTS_FILE, 'r') as f:
-            lines = f.readlines()
-        logging.debug(f"Read {len(lines)} lines from {RECENT_POSTS_FILE}")
-        
-        for i, line in enumerate(lines, 1):
-            if not line.strip():
-                logging.debug(f"Skipping empty line {i} in {RECENT_POSTS_FILE}")
-                continue
-            try:
-                entry = json.loads(line.strip())
-                required_fields = ["title", "url", "author_username", "timestamp"]
-                if not all(key in entry for key in required_fields):
-                    logging.warning(f"Skipping invalid entry at line {i}: missing fields {entry}")
-                    continue
-                try:
-                    datetime.fromisoformat(entry["timestamp"])
-                except ValueError:
-                    logging.warning(f"Skipping entry at line {i}: invalid timestamp {entry['timestamp']}")
-                    continue
-                key = (entry["title"], entry["url"], entry["author_username"])
-                if key in unique_posts:
-                    logging.debug(f"Skipping duplicate entry at line {i}: {entry['title']}")
-                    continue
-                unique_posts[key] = entry
-                posts.append(entry)
-            except json.JSONDecodeError as e:
-                logging.warning(f"Skipping invalid JSON at line {i}: {e}")
+    # Deduplicate posts
+    unique_posts = {}
+    for post in posts:
+        try:
+            required_fields = ["title", "url", "author_username", "timestamp"]
+            if not all(key in post for key in required_fields):
+                logging.warning(f"Skipping invalid post: missing fields {post}")
                 continue
-        logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)")
-    except Exception as e:
-        logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True)
-        return posts
+            datetime.fromisoformat(post["timestamp"])
+            key = (post["title"], post["url"], post["author_username"])
+            if key not in unique_posts:
+                unique_posts[key] = post
+            else:
+                logging.debug(f"Skipping duplicate post: {post['title']}")
+        except (KeyError, ValueError) as e:
+            logging.warning(f"Skipping post due to invalid format: {e}")
+            continue
     
-    if not posts:
-        logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
-    return posts
+    deduped_posts = list(unique_posts.values())
+    logging.info(f"Loaded {len(deduped_posts)} unique posts from {RECENT_POSTS_FILE}")
+    return deduped_posts
 
 def filter_posts_for_week(posts, start_date, end_date):
     """Filter posts within the given week range."""