fix
This commit is contained in:
+11
-5
@@ -247,15 +247,21 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Remove the original title from the summary
|
||||
# Remove the original title from the summary while preserving paragraphs
|
||||
title_pattern = re.compile(
|
||||
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
||||
re.IGNORECASE
|
||||
)
|
||||
final_summary = title_pattern.sub('', final_summary).strip()
|
||||
# Clean up any extra spaces or newlines left after removal
|
||||
final_summary = re.sub(r'\s+', ' ', final_summary)
|
||||
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
|
||||
# Split into paragraphs, process each one, then rejoin
|
||||
paragraphs = final_summary.split('\n')
|
||||
cleaned_paragraphs = []
|
||||
for para in paragraphs:
|
||||
if para.strip():
|
||||
# Remove the title and normalize spaces within the paragraph
|
||||
cleaned_para = title_pattern.sub('', para).strip()
|
||||
cleaned_para = re.sub(r'\s+', ' ', cleaned_para)
|
||||
cleaned_paragraphs.append(cleaned_para)
|
||||
final_summary = '\n'.join(cleaned_paragraphs)
|
||||
|
||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||
|
||||
Reference in New Issue
Block a user