This commit is contained in:
2025-05-01 19:24:20 +10:00
parent 022b52a8a7
commit 90be324fe4
2 changed files with 57 additions and 42 deletions
+17 -1
View File
@@ -65,14 +65,20 @@ def setup_logging():
lines = f.readlines()
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_lines = []
malformed_count = 0
for line in lines:
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
malformed_count += 1
continue
try:
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_lines.append(line)
except ValueError:
logging.warning(f"Skipping malformed log line: {line.strip()[:50]}...")
malformed_count += 1
continue
if malformed_count > 0:
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_lines)
@@ -240,6 +246,16 @@ def curate_from_rss():
attempts += 1
continue
# Remove the original title from the summary
title_pattern = re.compile(
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
re.IGNORECASE
)
final_summary = title_pattern.sub('', final_summary).strip()
# Clean up any extra spaces or newlines left after removal
final_summary = re.sub(r'\s+', ' ', final_summary)
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
if not post_data: