From 3edc8135f3e51b3f0c6d8e2579c37435f874e675 Mon Sep 17 00:00:00 2001
From: Shane <shanehill@mail.com>
Date: Tue, 13 May 2025 20:01:56 +1000
Subject: [PATCH] use cursor to fix insert_link_naturally

---
 foodie_utils.py | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/foodie_utils.py b/foodie_utils.py
index 874470b..504fcbf 100644
--- a/foodie_utils.py
+++ b/foodie_utils.py
@@ -535,7 +535,7 @@ def generate_title_from_summary(summary):
                 messages=[
                     {"role": "system", "content": (
                         "Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. "
-                        "Craft it with Upworthy/Buzzfeed flair—think ‘you won’t believe this’ or ‘this is nuts’—for food insiders. "
+                        "Craft it with Upworthy/Buzzfeed flair—think 'you won't believe this' or 'this is nuts'—for food insiders. "
                         "Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. "
                         "End with a question to spark shares."
                     )},
@@ -622,8 +622,8 @@ def insert_link_naturally(summary, source_name, source_url):
             return summary
 
         # Find paragraphs with at least two sentences
-        eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
-        if not eligible_paragraphs:
+        eligible_paragraph_indices = [i for i, p in enumerate(paragraphs) if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
+        if not eligible_paragraph_indices:
             logging.warning("No paragraph with multiple sentences found, using fallback.")
             return append_link_as_fallback(summary, source_name, source_url)
 
@@ -636,33 +636,29 @@ def insert_link_naturally(summary, source_name, source_url):
 
         best_candidate = None
         best_score = -1
+        best_paragraph_idx = None
         best_paragraph = None
 
-        # Score each paragraph and sentence for suitability
-        for para in eligible_paragraphs:
+        # Score each eligible paragraph and sentence for suitability
+        for idx in eligible_paragraph_indices:
+            para = paragraphs[idx]
             sentences = re.split(r'(?<=[.!?])\s+', para.strip())
             eligible_sentences = [
                 (i, s) for i, s in enumerate(sentences)
-                if s.strip() and not s.endswith('?')  # Exclude sentences ending with '?'
-                and not s.endswith('!')  # Exclude exclamations for smoother integration
+                if s.strip() and not s.endswith('?') and not s.endswith('!')
             ]
             if not eligible_sentences:
                 continue
-
-            # Score sentences based on suitability (prefer declarative sentences)
-            for idx, sentence in eligible_sentences:
+            for s_idx, sentence in eligible_sentences:
                 score = 0
-                # Favor sentences with factual content (simplified heuristic)
                 if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
                     score += 2
-                # Prefer longer sentences for better context
                 score += len(sentence.split()) // 5
-                # Prefer middle sentences for natural flow
-                score += abs(idx - len(sentences) / 2) * -1  # Penalize sentences far from the middle
-
+                score += abs(s_idx - len(sentences) / 2) * -1
                 if score > best_score:
                     best_score = score
-                    best_candidate = (idx, sentence)
+                    best_candidate = (s_idx, sentence)
+                    best_paragraph_idx = idx
                     best_paragraph = para
 
         if best_candidate is None:
@@ -676,10 +672,11 @@ def insert_link_naturally(summary, source_name, source_url):
         formatted_link = link_phrase.format(source=link_pattern)
 
         # Insert the link at the end of the selected sentence (no capitalization needed)
+        sentences = re.split(r'(?<=[.!?])\s+', best_paragraph.strip())
         new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
         sentences[sentence_idx] = new_sentence
         new_para = ' '.join(sentences)
-        paragraphs[paragraphs.index(best_paragraph)] = new_para
+        paragraphs[best_paragraph_idx] = new_para
 
         # Rejoin paragraphs with \n\n
         new_summary = '\n\n'.join(paragraphs)