use cursor to fix insert_link_naturally

9 months ago · 3edc8135f3
parent 5f38374abd
commit 3edc8135f3
1 changed files with 14 additions and 17 deletions
--- a/foodie_utils.py
+++ b/foodie_utils.py
@ -535,7 +535,7 @@ def generate_title_from_summary(summary):
                messages=[
                    {"role": "system", "content": (
                        "Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. "
-                        "Craft it with Upworthy/Buzzfeed flair—think ‘you won’t believe this’ or ‘this is nuts’—for food insiders. "
+                        "Craft it with Upworthy/Buzzfeed flair—think 'you won't believe this' or 'this is nuts'—for food insiders. "
                        "Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. "
                        "End with a question to spark shares."
                    )},
@ -622,8 +622,8 @@ def insert_link_naturally(summary, source_name, source_url):
            return summary

        # Find paragraphs with at least two sentences
-        eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
-        if not eligible_paragraphs:
+        eligible_paragraph_indices = [i for i, p in enumerate(paragraphs) if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
+        if not eligible_paragraph_indices:
            logging.warning("No paragraph with multiple sentences found, using fallback.")
            return append_link_as_fallback(summary, source_name, source_url)

@ -636,33 +636,29 @@ def insert_link_naturally(summary, source_name, source_url):

        best_candidate = None
        best_score = -1
+        best_paragraph_idx = None
        best_paragraph = None

-        # Score each paragraph and sentence for suitability
-        for para in eligible_paragraphs:
+        # Score each eligible paragraph and sentence for suitability
+        for idx in eligible_paragraph_indices:
+            para = paragraphs[idx]
            sentences = re.split(r'(?<=[.!?])\s+', para.strip())
            eligible_sentences = [
                (i, s) for i, s in enumerate(sentences)
-                if s.strip() and not s.endswith('?')  # Exclude sentences ending with '?'
-                and not s.endswith('!')  # Exclude exclamations for smoother integration
+                if s.strip() and not s.endswith('?') and not s.endswith('!')
            ]
            if not eligible_sentences:
                continue
-
-            # Score sentences based on suitability (prefer declarative sentences)
-            for idx, sentence in eligible_sentences:
+            for s_idx, sentence in eligible_sentences:
                score = 0
-                # Favor sentences with factual content (simplified heuristic)
                if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
                    score += 2
-                # Prefer longer sentences for better context
                score += len(sentence.split()) // 5
-                # Prefer middle sentences for natural flow
-                score += abs(idx - len(sentences) / 2) * -1  # Penalize sentences far from the middle
-
+                score += abs(s_idx - len(sentences) / 2) * -1
                if score > best_score:
                    best_score = score
-                    best_candidate = (idx, sentence)
+                    best_candidate = (s_idx, sentence)
+                    best_paragraph_idx = idx
                    best_paragraph = para

        if best_candidate is None:
@ -676,10 +672,11 @@ def insert_link_naturally(summary, source_name, source_url):
        formatted_link = link_phrase.format(source=link_pattern)

        # Insert the link at the end of the selected sentence (no capitalization needed)
+        sentences = re.split(r'(?<=[.!?])\s+', best_paragraph.strip())
        new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
        sentences[sentence_idx] = new_sentence
        new_para = ' '.join(sentences)
-        paragraphs[paragraphs.index(best_paragraph)] = new_para
+        paragraphs[best_paragraph_idx] = new_para

        # Rejoin paragraphs with \n\n
        new_summary = '\n\n'.join(paragraphs)