fix

2025-05-01 19:41:50 +10:00
parent 7a71843c7c
commit b79397f309
2 changed files with 39 additions and 20 deletions
@@ -247,15 +247,21 @@ def curate_from_rss():
            attempts += 1
            continue

-        # Remove the original title from the summary
+        # Remove the original title from the summary while preserving paragraphs
        title_pattern = re.compile(
            r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
            re.IGNORECASE
        )
-        final_summary = title_pattern.sub('', final_summary).strip()
-        # Clean up any extra spaces or newlines left after removal
-        final_summary = re.sub(r'\s+', ' ', final_summary)
-        final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
+        # Split into paragraphs, process each one, then rejoin
+        paragraphs = final_summary.split('\n')
+        cleaned_paragraphs = []
+        for para in paragraphs:
+            if para.strip():
+                # Remove the title and normalize spaces within the paragraph
+                cleaned_para = title_pattern.sub('', para).strip()
+                cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
+                cleaned_paragraphs.append(cleaned_para)
+        final_summary = '\n'.join(cleaned_paragraphs)

        final_summary = insert_link_naturally(final_summary, source_name, link)
        post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)