This commit is contained in:
2025-05-13 19:31:02 +10:00
parent 660ca33ee4
commit dbd3615b07
+13 -3
View File
@@ -621,6 +621,15 @@ def insert_link_naturally(summary, source_name, source_url):
logging.error("No valid paragraphs to insert link.")
return append_link_as_fallback(summary, source_name, source_url)
# Remove duplicates while preserving order
seen = set()
unique_paragraphs = []
for para in paragraphs:
if para.strip() and para not in seen:
seen.add(para)
unique_paragraphs.append(para)
paragraphs = unique_paragraphs
# Find paragraphs with at least two sentences
eligible_paragraphs = [
p for p in paragraphs
@@ -714,6 +723,7 @@ def insert_link_naturally(summary, source_name, source_url):
sentences[sentence_idx] = new_sentence
new_para = ' '.join(sentences)
# Update the paragraph in the original list
paragraphs[paragraphs.index(best_paragraph)] = new_para
# Rejoin paragraphs with \n\n
@@ -1766,8 +1776,8 @@ def prepare_post_data(summary, title, main_topic=None):
new_title = "A Tasty Food Discovery Awaits You"
logging.info(f"Generated new title: '{new_title}'")
# Update to unpack four values
search_query, relevance_keywords, generated_main_topic, skip_flag = smart_image_and_filter(new_title, summary)
# Update to unpack five values from smart_image_and_filter
search_query, relevance_keywords, generated_main_topic, skip_flag, specific_term = smart_image_and_filter(new_title, summary)
if skip_flag:
logging.info("Summary filtered out during post preparation")
return None, None, None, None, None, None, None
@@ -1775,7 +1785,7 @@ def prepare_post_data(summary, title, main_topic=None):
# Use the provided main_topic if available, otherwise use the generated one
effective_main_topic = main_topic if main_topic else generated_main_topic
image_url, image_source, uploader, page_url = get_flickr_image(search_query, relevance_keywords, effective_main_topic)
image_url, image_source, uploader, page_url = get_flickr_image(search_query, relevance_keywords, effective_main_topic, specific_term)
if not image_url:
image_url, image_source, uploader, page_url = get_image(search_query)