From 3edc8135f3e51b3f0c6d8e2579c37435f874e675 Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 13 May 2025 20:01:56 +1000 Subject: [PATCH] use cursor to fix insert_link_naturally --- foodie_utils.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/foodie_utils.py b/foodie_utils.py index 874470b..504fcbf 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -535,7 +535,7 @@ def generate_title_from_summary(summary): messages=[ {"role": "system", "content": ( "Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. " - "Craft it with Upworthy/Buzzfeed flair—think ‘you won’t believe this’ or ‘this is nuts’—for food insiders. " + "Craft it with Upworthy/Buzzfeed flair—think 'you won't believe this' or 'this is nuts'—for food insiders. " "Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. " "End with a question to spark shares." )}, @@ -622,8 +622,8 @@ def insert_link_naturally(summary, source_name, source_url): return summary # Find paragraphs with at least two sentences - eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] - if not eligible_paragraphs: + eligible_paragraph_indices = [i for i, p in enumerate(paragraphs) if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] + if not eligible_paragraph_indices: logging.warning("No paragraph with multiple sentences found, using fallback.") return append_link_as_fallback(summary, source_name, source_url) @@ -636,33 +636,29 @@ def insert_link_naturally(summary, source_name, source_url): best_candidate = None best_score = -1 + best_paragraph_idx = None best_paragraph = None - # Score each paragraph and sentence for suitability - for para in eligible_paragraphs: + # Score each eligible paragraph and sentence for suitability + for idx in eligible_paragraph_indices: + para = paragraphs[idx] sentences = re.split(r'(?<=[.!?])\s+', para.strip()) eligible_sentences = [ (i, s) for i, s in enumerate(sentences) - if s.strip() and not s.endswith('?') # Exclude sentences ending with '?' - and not s.endswith('!') # Exclude exclamations for smoother integration + if s.strip() and not s.endswith('?') and not s.endswith('!') ] if not eligible_sentences: continue - - # Score sentences based on suitability (prefer declarative sentences) - for idx, sentence in eligible_sentences: + for s_idx, sentence in eligible_sentences: score = 0 - # Favor sentences with factual content (simplified heuristic) if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]): score += 2 - # Prefer longer sentences for better context score += len(sentence.split()) // 5 - # Prefer middle sentences for natural flow - score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle - + score += abs(s_idx - len(sentences) / 2) * -1 if score > best_score: best_score = score - best_candidate = (idx, sentence) + best_candidate = (s_idx, sentence) + best_paragraph_idx = idx best_paragraph = para if best_candidate is None: @@ -676,10 +672,11 @@ def insert_link_naturally(summary, source_name, source_url): formatted_link = link_phrase.format(source=link_pattern) # Insert the link at the end of the selected sentence (no capitalization needed) + sentences = re.split(r'(?<=[.!?])\s+', best_paragraph.strip()) new_sentence = f"{sentence.rstrip('.')} {formatted_link}." sentences[sentence_idx] = new_sentence new_para = ' '.join(sentences) - paragraphs[paragraphs.index(best_paragraph)] = new_para + paragraphs[best_paragraph_idx] = new_para # Rejoin paragraphs with \n\n new_summary = '\n\n'.join(paragraphs)