From e7a06e337541b07a33bab0102d540b519d5740b5 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 7 May 2025 20:45:28 +1000 Subject: [PATCH] fix all json formatting --- foodie_utils.py | 82 ++++++++++++++++------------------------- foodie_weekly_thread.py | 66 ++++++++++++--------------------- 2 files changed, 55 insertions(+), 93 deletions(-) diff --git a/foodie_utils.py b/foodie_utils.py index 49dd207..1a26df1 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -23,7 +23,7 @@ import flickr_api from foodie_config import ( RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS, - FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY + FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY, RECENT_POSTS_FILE, USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS ) load_dotenv() @@ -33,6 +33,7 @@ IMAGE_UPLOAD_TIMEOUT = 30 # Added to fix NameError IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py def load_json_file(file_path, expiration_hours=None): + """Load JSON file and return its contents as a list.""" try: if not os.path.exists(file_path): logging.info(f"File {file_path} does not exist, initializing with empty list") @@ -41,26 +42,13 @@ def load_json_file(file_path, expiration_hours=None): return [] with open(file_path, 'r') as f: - try: - data = json.load(f) - except json.JSONDecodeError as e: - logging.warning(f"Invalid JSON in {file_path}: {e}. Attempting line-by-line parsing.") - data = [] - f.seek(0) - for line_number, line in enumerate(f, 1): - line = line.strip() - if not line: - continue - try: - entry = json.loads(line) - data.append(entry) - except json.JSONDecodeError as e: - logging.warning(f"Skipping invalid JSON line in {file_path} at line {line_number}: {e}") - continue + data = json.load(f) if not isinstance(data, list): logging.warning(f"Data in {file_path} is not a list, resetting to empty list") - data = [] + with open(file_path, 'w') as f: + json.dump([], f) + return [] valid_entries = [] if expiration_hours: @@ -79,29 +67,26 @@ def load_json_file(file_path, expiration_hours=None): else: valid_entries = data - logging.info(f"Loaded {len(valid_entries)} entries from {file_path}, {len(valid_entries)} valid after expiration check") + logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}") return valid_entries + except json.JSONDecodeError as e: + logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.") + with open(file_path, 'w') as f: + json.dump([], f) + return [] except Exception as e: logging.error(f"Failed to load JSON file {file_path}: {e}") return [] def save_json_file(file_path, title, timestamp): + """Save an entry to a JSON file, maintaining a JSON array.""" try: - entries = load_json_file(file_path, 24 if "posted_" in file_path else 7 * 24) # 24 hours for titles, 7 days for images + entries = load_json_file(file_path, 24 if "posted_" in file_path else IMAGE_EXPIRATION_DAYS * 24) entry = {"title": title, "timestamp": timestamp} entries.append(entry) - - # Prune entries older than expiration period - expiration_hours = 24 if "posted_" in file_path else 7 * 24 - cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours) - pruned_entries = [e for e in entries if datetime.fromisoformat(e["timestamp"]) > cutoff] - with open(file_path, 'w') as f: - for entry in pruned_entries: - f.write(json.dumps(entry) + '\n') - + json.dump(entries, f, indent=2) logging.info(f"Saved '{title}' to {file_path}") - logging.info(f"Pruned {file_path} to {len(pruned_entries)} entries (older than {expiration_hours//24} days removed)") except Exception as e: logging.error(f"Failed to save to {file_path}: {e}") @@ -790,8 +775,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im content = "Content unavailable. Check the original source for details." formatted_content = "\n".join(f"

{para}

" for para in content.split('\n') if para.strip()) - # Removed the block that appends image attribution to the content - author_id_map = { "owenjohnson": 10, "javiermorales": 2, @@ -850,8 +833,10 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im post_id = post_info["id"] post_url = post_info["link"] - timestamp = datetime.now(timezone.utc).isoformat() - save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) + # Save to recent_posts.json only on initial post, not updates + if not post_id: + timestamp = datetime.now(timezone.utc).isoformat() + save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) if should_post_tweet: try: @@ -1234,9 +1219,11 @@ def prepare_post_data(summary, title, main_topic=None): logging.error(f"Failed to prepare post data: {e}") return None, None, None, None, None, None, None + def save_post_to_recent(post_title, post_url, author_username, timestamp): + """Save a post to recent_posts.json, maintaining a JSON array.""" try: - recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json', 24) # Added expiration_hours + recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24) entry = { "title": post_title, "url": post_url, @@ -1244,23 +1231,18 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp): "timestamp": timestamp } recent_posts.append(entry) - with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f: - for item in recent_posts: - json.dump(item, f) - f.write('\n') - logging.info(f"Saved post '{post_title}' to recent_posts.json") + with open(RECENT_POSTS_FILE, 'w') as f: + json.dump(recent_posts, f, indent=2) + logging.info(f"Saved post '{post_title}' to {RECENT_POSTS_FILE}") except Exception as e: - logging.error(f"Failed to save post to recent_posts.json: {e}") + logging.error(f"Failed to save post to {RECENT_POSTS_FILE}: {e}") def prune_recent_posts(): + """Prune recent_posts.json to keep entries within the last 24 hours.""" try: - cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat() - recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') - recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff] - with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f: - for item in recent_posts: - json.dump(item, f) - f.write('\n') - logging.info(f"Pruned recent_posts.json to {len(recent_posts)} entries") + recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24) + with open(RECENT_POSTS_FILE, 'w') as f: + json.dump(recent_posts, f, indent=2) + logging.info(f"Pruned {RECENT_POSTS_FILE} to {len(recent_posts)} entries") except Exception as e: - logging.error(f"Failed to prune recent_posts.json: {e}") \ No newline at end of file + logging.error(f"Failed to prune {RECENT_POSTS_FILE}: {e}") \ No newline at end of file diff --git a/foodie_weekly_thread.py b/foodie_weekly_thread.py index 5e0b971..2444265 100644 --- a/foodie_weekly_thread.py +++ b/foodie_weekly_thread.py @@ -128,54 +128,34 @@ def validate_twitter_credentials(): def load_recent_posts(): """Load and deduplicate posts from recent_posts.json.""" - posts = [] - unique_posts = {} logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}") + posts = load_json_file(RECENT_POSTS_FILE) - if not os.path.exists(RECENT_POSTS_FILE): - logging.error(f"Recent posts file {RECENT_POSTS_FILE} does not exist") - return posts - if not os.access(RECENT_POSTS_FILE, os.R_OK): - logging.error(f"Cannot read {RECENT_POSTS_FILE} due to permission issues") - return posts + if not posts: + logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}") + return [] - try: - with open(RECENT_POSTS_FILE, 'r') as f: - lines = f.readlines() - logging.debug(f"Read {len(lines)} lines from {RECENT_POSTS_FILE}") - - for i, line in enumerate(lines, 1): - if not line.strip(): - logging.debug(f"Skipping empty line {i} in {RECENT_POSTS_FILE}") - continue - try: - entry = json.loads(line.strip()) - required_fields = ["title", "url", "author_username", "timestamp"] - if not all(key in entry for key in required_fields): - logging.warning(f"Skipping invalid entry at line {i}: missing fields {entry}") - continue - try: - datetime.fromisoformat(entry["timestamp"]) - except ValueError: - logging.warning(f"Skipping entry at line {i}: invalid timestamp {entry['timestamp']}") - continue - key = (entry["title"], entry["url"], entry["author_username"]) - if key in unique_posts: - logging.debug(f"Skipping duplicate entry at line {i}: {entry['title']}") - continue - unique_posts[key] = entry - posts.append(entry) - except json.JSONDecodeError as e: - logging.warning(f"Skipping invalid JSON at line {i}: {e}") + # Deduplicate posts + unique_posts = {} + for post in posts: + try: + required_fields = ["title", "url", "author_username", "timestamp"] + if not all(key in post for key in required_fields): + logging.warning(f"Skipping invalid post: missing fields {post}") continue - logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)") - except Exception as e: - logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True) - return posts + datetime.fromisoformat(post["timestamp"]) + key = (post["title"], post["url"], post["author_username"]) + if key not in unique_posts: + unique_posts[key] = post + else: + logging.debug(f"Skipping duplicate post: {post['title']}") + except (KeyError, ValueError) as e: + logging.warning(f"Skipping post due to invalid format: {e}") + continue - if not posts: - logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}") - return posts + deduped_posts = list(unique_posts.values()) + logging.info(f"Loaded {len(deduped_posts)} unique posts from {RECENT_POSTS_FILE}") + return deduped_posts def filter_posts_for_week(posts, start_date, end_date): """Filter posts within the given week range."""