This commit is contained in:
2025-05-01 19:41:50 +10:00
parent 7a71843c7c
commit b79397f309
2 changed files with 39 additions and 20 deletions
+11 -5
View File
@@ -247,15 +247,21 @@ def curate_from_rss():
attempts += 1
continue
# Remove the original title from the summary
# Remove the original title from the summary while preserving paragraphs
title_pattern = re.compile(
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
re.IGNORECASE
)
final_summary = title_pattern.sub('', final_summary).strip()
# Clean up any extra spaces or newlines left after removal
final_summary = re.sub(r'\s+', ' ', final_summary)
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
# Split into paragraphs, process each one, then rejoin
paragraphs = final_summary.split('\n')
cleaned_paragraphs = []
for para in paragraphs:
if para.strip():
# Remove the title and normalize spaces within the paragraph
cleaned_para = title_pattern.sub('', para).strip()
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
cleaned_paragraphs.append(cleaned_para)
final_summary = '\n'.join(cleaned_paragraphs)
final_summary = insert_link_naturally(final_summary, source_name, link)
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)