From c89a9df6e258606c1826a4bcdfce64735dfb27d8 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 12 May 2025 21:16:53 +1000 Subject: [PATCH] update insert link naturally --- foodie_utils.py | 91 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 28 deletions(-) diff --git a/foodie_utils.py b/foodie_utils.py index 5afe36e..eb9396f 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -613,45 +613,80 @@ def insert_link_naturally(summary, source_name, source_url): logging.error("No valid paragraphs to insert link.") return summary + # Find paragraphs with at least two sentences eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] if not eligible_paragraphs: - logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") - target_para = paragraphs[-1].strip() - link_pattern = f'{source_name}' - new_para = f"{target_para} Source: {link_pattern}." - paragraphs[-1] = new_para - new_summary = '\n'.join(paragraphs) - logging.info(f"Appended link to summary: {new_summary!r}") - return new_summary - - target_para = random.choice(eligible_paragraphs) - sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) - - eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] - if not eligible_sentences: - logging.error("No eligible sentences found for link insertion.") - return summary - - sentence_idx, sentence = random.choice(eligible_sentences) + logging.warning("No paragraph with multiple sentences found, using fallback.") + return append_link_as_fallback(summary, source_name, source_url) + + # Alternative phrases for variety + link_phrases = [ + "according to {source}", + "as reported by {source}", + "{source} notes that" + ] + + best_candidate = None + best_score = -1 + best_paragraph = None + + # Score each paragraph and sentence for suitability + for para in eligible_paragraphs: + sentences = re.split(r'(?<=[.!?])\s+', para.strip()) + eligible_sentences = [ + (i, s) for i, s in enumerate(sentences) + if s.strip() and not s.endswith('?') # Exclude sentences ending with '?' + and not s.endswith('!') # Exclude exclamations for smoother integration + ] + if not eligible_sentences: + continue + + # Score sentences based on suitability (prefer declarative sentences) + for idx, sentence in eligible_sentences: + score = 0 + # Favor sentences with factual content (simplified heuristic) + if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]): + score += 2 + # Prefer longer sentences for better context + score += len(sentence.split()) // 5 + # Prefer middle sentences for natural flow + score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle + + if score > best_score: + best_score = score + best_candidate = (idx, sentence) + best_paragraph = para + + if best_candidate is None: + logging.warning("No suitable sentence found, using fallback.") + return append_link_as_fallback(summary, source_name, source_url) + + # Select a link phrase based on sentence structure + sentence_idx, sentence = best_candidate + link_phrase = random.choice(link_phrases) link_pattern = f'{source_name}' - - # Insert the link at the end of the sentence - new_sentence = f"{sentence.rstrip('.')} according to {link_pattern}." - + formatted_link = link_phrase.format(source=link_pattern) + + # Insert the link at the end of the selected sentence (no capitalization needed) + new_sentence = f"{sentence.rstrip('.')} {formatted_link}." sentences[sentence_idx] = new_sentence new_para = ' '.join(sentences) - paragraphs[paragraphs.index(target_para)] = new_para - + paragraphs[paragraphs.index(best_paragraph)] = new_para + new_summary = '\n'.join(paragraphs) logging.info(f"Summary with naturally embedded link: {new_summary!r}") return new_summary except Exception as e: logging.error(f"Link insertion failed: {e}") - link_pattern = f'{source_name}' - new_summary = f"{summary}\n\nSource: {link_pattern}." - logging.info(f"Fallback summary with link: {new_summary!r}") - return new_summary + return append_link_as_fallback(summary, source_name, source_url) + +def append_link_as_fallback(summary, source_name, source_url): + """Fallback method to append the link in a polished way.""" + link_pattern = f'{source_name}' + new_summary = f"{summary}\n\n

We learned about this from {link_pattern}.

" + logging.info(f"Fallback summary with link: {new_summary!r}") + return new_summary def generate_category_from_summary(summary): try: