fix all json formatting
This commit is contained in:
+32
-50
@@ -23,7 +23,7 @@ import flickr_api
|
|||||||
from foodie_config import (
|
from foodie_config import (
|
||||||
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS,
|
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS,
|
||||||
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS,
|
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS,
|
||||||
FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY
|
FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY, RECENT_POSTS_FILE, USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS
|
||||||
)
|
)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -33,6 +33,7 @@ IMAGE_UPLOAD_TIMEOUT = 30 # Added to fix NameError
|
|||||||
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py
|
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py
|
||||||
|
|
||||||
def load_json_file(file_path, expiration_hours=None):
|
def load_json_file(file_path, expiration_hours=None):
|
||||||
|
"""Load JSON file and return its contents as a list."""
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
logging.info(f"File {file_path} does not exist, initializing with empty list")
|
logging.info(f"File {file_path} does not exist, initializing with empty list")
|
||||||
@@ -41,26 +42,13 @@ def load_json_file(file_path, expiration_hours=None):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
with open(file_path, 'r') as f:
|
with open(file_path, 'r') as f:
|
||||||
try:
|
data = json.load(f)
|
||||||
data = json.load(f)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logging.warning(f"Invalid JSON in {file_path}: {e}. Attempting line-by-line parsing.")
|
|
||||||
data = []
|
|
||||||
f.seek(0)
|
|
||||||
for line_number, line in enumerate(f, 1):
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
entry = json.loads(line)
|
|
||||||
data.append(entry)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logging.warning(f"Skipping invalid JSON line in {file_path} at line {line_number}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not isinstance(data, list):
|
if not isinstance(data, list):
|
||||||
logging.warning(f"Data in {file_path} is not a list, resetting to empty list")
|
logging.warning(f"Data in {file_path} is not a list, resetting to empty list")
|
||||||
data = []
|
with open(file_path, 'w') as f:
|
||||||
|
json.dump([], f)
|
||||||
|
return []
|
||||||
|
|
||||||
valid_entries = []
|
valid_entries = []
|
||||||
if expiration_hours:
|
if expiration_hours:
|
||||||
@@ -79,29 +67,26 @@ def load_json_file(file_path, expiration_hours=None):
|
|||||||
else:
|
else:
|
||||||
valid_entries = data
|
valid_entries = data
|
||||||
|
|
||||||
logging.info(f"Loaded {len(valid_entries)} entries from {file_path}, {len(valid_entries)} valid after expiration check")
|
logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}")
|
||||||
return valid_entries
|
return valid_entries
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.")
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
json.dump([], f)
|
||||||
|
return []
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to load JSON file {file_path}: {e}")
|
logging.error(f"Failed to load JSON file {file_path}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def save_json_file(file_path, title, timestamp):
|
def save_json_file(file_path, title, timestamp):
|
||||||
|
"""Save an entry to a JSON file, maintaining a JSON array."""
|
||||||
try:
|
try:
|
||||||
entries = load_json_file(file_path, 24 if "posted_" in file_path else 7 * 24) # 24 hours for titles, 7 days for images
|
entries = load_json_file(file_path, 24 if "posted_" in file_path else IMAGE_EXPIRATION_DAYS * 24)
|
||||||
entry = {"title": title, "timestamp": timestamp}
|
entry = {"title": title, "timestamp": timestamp}
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
|
|
||||||
# Prune entries older than expiration period
|
|
||||||
expiration_hours = 24 if "posted_" in file_path else 7 * 24
|
|
||||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
|
|
||||||
pruned_entries = [e for e in entries if datetime.fromisoformat(e["timestamp"]) > cutoff]
|
|
||||||
|
|
||||||
with open(file_path, 'w') as f:
|
with open(file_path, 'w') as f:
|
||||||
for entry in pruned_entries:
|
json.dump(entries, f, indent=2)
|
||||||
f.write(json.dumps(entry) + '\n')
|
|
||||||
|
|
||||||
logging.info(f"Saved '{title}' to {file_path}")
|
logging.info(f"Saved '{title}' to {file_path}")
|
||||||
logging.info(f"Pruned {file_path} to {len(pruned_entries)} entries (older than {expiration_hours//24} days removed)")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to save to {file_path}: {e}")
|
logging.error(f"Failed to save to {file_path}: {e}")
|
||||||
|
|
||||||
@@ -790,8 +775,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
|||||||
content = "Content unavailable. Check the original source for details."
|
content = "Content unavailable. Check the original source for details."
|
||||||
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
|
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
|
||||||
|
|
||||||
# Removed the block that appends image attribution to the content
|
|
||||||
|
|
||||||
author_id_map = {
|
author_id_map = {
|
||||||
"owenjohnson": 10,
|
"owenjohnson": 10,
|
||||||
"javiermorales": 2,
|
"javiermorales": 2,
|
||||||
@@ -850,8 +833,10 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
|||||||
post_id = post_info["id"]
|
post_id = post_info["id"]
|
||||||
post_url = post_info["link"]
|
post_url = post_info["link"]
|
||||||
|
|
||||||
timestamp = datetime.now(timezone.utc).isoformat()
|
# Save to recent_posts.json only on initial post, not updates
|
||||||
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
|
if not post_id:
|
||||||
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
|
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp)
|
||||||
|
|
||||||
if should_post_tweet:
|
if should_post_tweet:
|
||||||
try:
|
try:
|
||||||
@@ -1234,9 +1219,11 @@ def prepare_post_data(summary, title, main_topic=None):
|
|||||||
logging.error(f"Failed to prepare post data: {e}")
|
logging.error(f"Failed to prepare post data: {e}")
|
||||||
return None, None, None, None, None, None, None
|
return None, None, None, None, None, None, None
|
||||||
|
|
||||||
|
|
||||||
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
||||||
|
"""Save a post to recent_posts.json, maintaining a JSON array."""
|
||||||
try:
|
try:
|
||||||
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json', 24) # Added expiration_hours
|
recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24)
|
||||||
entry = {
|
entry = {
|
||||||
"title": post_title,
|
"title": post_title,
|
||||||
"url": post_url,
|
"url": post_url,
|
||||||
@@ -1244,23 +1231,18 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp):
|
|||||||
"timestamp": timestamp
|
"timestamp": timestamp
|
||||||
}
|
}
|
||||||
recent_posts.append(entry)
|
recent_posts.append(entry)
|
||||||
with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f:
|
with open(RECENT_POSTS_FILE, 'w') as f:
|
||||||
for item in recent_posts:
|
json.dump(recent_posts, f, indent=2)
|
||||||
json.dump(item, f)
|
logging.info(f"Saved post '{post_title}' to {RECENT_POSTS_FILE}")
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Saved post '{post_title}' to recent_posts.json")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to save post to recent_posts.json: {e}")
|
logging.error(f"Failed to save post to {RECENT_POSTS_FILE}: {e}")
|
||||||
|
|
||||||
def prune_recent_posts():
|
def prune_recent_posts():
|
||||||
|
"""Prune recent_posts.json to keep entries within the last 24 hours."""
|
||||||
try:
|
try:
|
||||||
cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat()
|
recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24)
|
||||||
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json')
|
with open(RECENT_POSTS_FILE, 'w') as f:
|
||||||
recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff]
|
json.dump(recent_posts, f, indent=2)
|
||||||
with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f:
|
logging.info(f"Pruned {RECENT_POSTS_FILE} to {len(recent_posts)} entries")
|
||||||
for item in recent_posts:
|
|
||||||
json.dump(item, f)
|
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Pruned recent_posts.json to {len(recent_posts)} entries")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to prune recent_posts.json: {e}")
|
logging.error(f"Failed to prune {RECENT_POSTS_FILE}: {e}")
|
||||||
+24
-44
@@ -128,54 +128,34 @@ def validate_twitter_credentials():
|
|||||||
|
|
||||||
def load_recent_posts():
|
def load_recent_posts():
|
||||||
"""Load and deduplicate posts from recent_posts.json."""
|
"""Load and deduplicate posts from recent_posts.json."""
|
||||||
posts = []
|
|
||||||
unique_posts = {}
|
|
||||||
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
|
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
|
||||||
|
posts = load_json_file(RECENT_POSTS_FILE)
|
||||||
if not os.path.exists(RECENT_POSTS_FILE):
|
|
||||||
logging.error(f"Recent posts file {RECENT_POSTS_FILE} does not exist")
|
|
||||||
return posts
|
|
||||||
if not os.access(RECENT_POSTS_FILE, os.R_OK):
|
|
||||||
logging.error(f"Cannot read {RECENT_POSTS_FILE} due to permission issues")
|
|
||||||
return posts
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(RECENT_POSTS_FILE, 'r') as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
logging.debug(f"Read {len(lines)} lines from {RECENT_POSTS_FILE}")
|
|
||||||
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
if not line.strip():
|
|
||||||
logging.debug(f"Skipping empty line {i} in {RECENT_POSTS_FILE}")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
entry = json.loads(line.strip())
|
|
||||||
required_fields = ["title", "url", "author_username", "timestamp"]
|
|
||||||
if not all(key in entry for key in required_fields):
|
|
||||||
logging.warning(f"Skipping invalid entry at line {i}: missing fields {entry}")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
datetime.fromisoformat(entry["timestamp"])
|
|
||||||
except ValueError:
|
|
||||||
logging.warning(f"Skipping entry at line {i}: invalid timestamp {entry['timestamp']}")
|
|
||||||
continue
|
|
||||||
key = (entry["title"], entry["url"], entry["author_username"])
|
|
||||||
if key in unique_posts:
|
|
||||||
logging.debug(f"Skipping duplicate entry at line {i}: {entry['title']}")
|
|
||||||
continue
|
|
||||||
unique_posts[key] = entry
|
|
||||||
posts.append(entry)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logging.warning(f"Skipping invalid JSON at line {i}: {e}")
|
|
||||||
continue
|
|
||||||
logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True)
|
|
||||||
return posts
|
|
||||||
|
|
||||||
if not posts:
|
if not posts:
|
||||||
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
|
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
|
||||||
return posts
|
return []
|
||||||
|
|
||||||
|
# Deduplicate posts
|
||||||
|
unique_posts = {}
|
||||||
|
for post in posts:
|
||||||
|
try:
|
||||||
|
required_fields = ["title", "url", "author_username", "timestamp"]
|
||||||
|
if not all(key in post for key in required_fields):
|
||||||
|
logging.warning(f"Skipping invalid post: missing fields {post}")
|
||||||
|
continue
|
||||||
|
datetime.fromisoformat(post["timestamp"])
|
||||||
|
key = (post["title"], post["url"], post["author_username"])
|
||||||
|
if key not in unique_posts:
|
||||||
|
unique_posts[key] = post
|
||||||
|
else:
|
||||||
|
logging.debug(f"Skipping duplicate post: {post['title']}")
|
||||||
|
except (KeyError, ValueError) as e:
|
||||||
|
logging.warning(f"Skipping post due to invalid format: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
deduped_posts = list(unique_posts.values())
|
||||||
|
logging.info(f"Loaded {len(deduped_posts)} unique posts from {RECENT_POSTS_FILE}")
|
||||||
|
return deduped_posts
|
||||||
|
|
||||||
def filter_posts_for_week(posts, start_date, end_date):
|
def filter_posts_for_week(posts, start_date, end_date):
|
||||||
"""Filter posts within the given week range."""
|
"""Filter posts within the given week range."""
|
||||||
|
|||||||
Reference in New Issue
Block a user