fix all json formatting
This commit is contained in:
+24
-44
@@ -128,54 +128,34 @@ def validate_twitter_credentials():
|
||||
|
||||
def load_recent_posts():
|
||||
"""Load and deduplicate posts from recent_posts.json."""
|
||||
posts = []
|
||||
unique_posts = {}
|
||||
logging.debug(f"Attempting to load posts from {RECENT_POSTS_FILE}")
|
||||
|
||||
if not os.path.exists(RECENT_POSTS_FILE):
|
||||
logging.error(f"Recent posts file {RECENT_POSTS_FILE} does not exist")
|
||||
return posts
|
||||
if not os.access(RECENT_POSTS_FILE, os.R_OK):
|
||||
logging.error(f"Cannot read {RECENT_POSTS_FILE} due to permission issues")
|
||||
return posts
|
||||
|
||||
try:
|
||||
with open(RECENT_POSTS_FILE, 'r') as f:
|
||||
lines = f.readlines()
|
||||
logging.debug(f"Read {len(lines)} lines from {RECENT_POSTS_FILE}")
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
if not line.strip():
|
||||
logging.debug(f"Skipping empty line {i} in {RECENT_POSTS_FILE}")
|
||||
continue
|
||||
try:
|
||||
entry = json.loads(line.strip())
|
||||
required_fields = ["title", "url", "author_username", "timestamp"]
|
||||
if not all(key in entry for key in required_fields):
|
||||
logging.warning(f"Skipping invalid entry at line {i}: missing fields {entry}")
|
||||
continue
|
||||
try:
|
||||
datetime.fromisoformat(entry["timestamp"])
|
||||
except ValueError:
|
||||
logging.warning(f"Skipping entry at line {i}: invalid timestamp {entry['timestamp']}")
|
||||
continue
|
||||
key = (entry["title"], entry["url"], entry["author_username"])
|
||||
if key in unique_posts:
|
||||
logging.debug(f"Skipping duplicate entry at line {i}: {entry['title']}")
|
||||
continue
|
||||
unique_posts[key] = entry
|
||||
posts.append(entry)
|
||||
except json.JSONDecodeError as e:
|
||||
logging.warning(f"Skipping invalid JSON at line {i}: {e}")
|
||||
continue
|
||||
logging.info(f"Loaded {len(posts)} unique posts from {RECENT_POSTS_FILE} (after deduplication)")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load {RECENT_POSTS_FILE}: {e}", exc_info=True)
|
||||
return posts
|
||||
posts = load_json_file(RECENT_POSTS_FILE)
|
||||
|
||||
if not posts:
|
||||
logging.warning(f"No valid posts loaded from {RECENT_POSTS_FILE}")
|
||||
return posts
|
||||
return []
|
||||
|
||||
# Deduplicate posts
|
||||
unique_posts = {}
|
||||
for post in posts:
|
||||
try:
|
||||
required_fields = ["title", "url", "author_username", "timestamp"]
|
||||
if not all(key in post for key in required_fields):
|
||||
logging.warning(f"Skipping invalid post: missing fields {post}")
|
||||
continue
|
||||
datetime.fromisoformat(post["timestamp"])
|
||||
key = (post["title"], post["url"], post["author_username"])
|
||||
if key not in unique_posts:
|
||||
unique_posts[key] = post
|
||||
else:
|
||||
logging.debug(f"Skipping duplicate post: {post['title']}")
|
||||
except (KeyError, ValueError) as e:
|
||||
logging.warning(f"Skipping post due to invalid format: {e}")
|
||||
continue
|
||||
|
||||
deduped_posts = list(unique_posts.values())
|
||||
logging.info(f"Loaded {len(deduped_posts)} unique posts from {RECENT_POSTS_FILE}")
|
||||
return deduped_posts
|
||||
|
||||
def filter_posts_for_week(posts, start_date, end_date):
|
||||
"""Filter posts within the given week range."""
|
||||
|
||||
Reference in New Issue
Block a user