diff --git a/foodie_utils.py b/foodie_utils.py index 7bb48ba..1d27404 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -621,6 +621,15 @@ def insert_link_naturally(summary, source_name, source_url): logging.error("No valid paragraphs to insert link.") return append_link_as_fallback(summary, source_name, source_url) + # Remove duplicates while preserving order + seen = set() + unique_paragraphs = [] + for para in paragraphs: + if para.strip() and para not in seen: + seen.add(para) + unique_paragraphs.append(para) + paragraphs = unique_paragraphs + # Find paragraphs with at least two sentences eligible_paragraphs = [ p for p in paragraphs @@ -714,6 +723,7 @@ def insert_link_naturally(summary, source_name, source_url): sentences[sentence_idx] = new_sentence new_para = ' '.join(sentences) + # Update the paragraph in the original list paragraphs[paragraphs.index(best_paragraph)] = new_para # Rejoin paragraphs with \n\n @@ -1766,8 +1776,8 @@ def prepare_post_data(summary, title, main_topic=None): new_title = "A Tasty Food Discovery Awaits You" logging.info(f"Generated new title: '{new_title}'") - # Update to unpack four values - search_query, relevance_keywords, generated_main_topic, skip_flag = smart_image_and_filter(new_title, summary) + # Update to unpack five values from smart_image_and_filter + search_query, relevance_keywords, generated_main_topic, skip_flag, specific_term = smart_image_and_filter(new_title, summary) if skip_flag: logging.info("Summary filtered out during post preparation") return None, None, None, None, None, None, None @@ -1775,7 +1785,7 @@ def prepare_post_data(summary, title, main_topic=None): # Use the provided main_topic if available, otherwise use the generated one effective_main_topic = main_topic if main_topic else generated_main_topic - image_url, image_source, uploader, page_url = get_flickr_image(search_query, relevance_keywords, effective_main_topic) + image_url, image_source, uploader, page_url = get_flickr_image(search_query, relevance_keywords, effective_main_topic, specific_term) if not image_url: image_url, image_source, uploader, page_url = get_image(search_query)