use cursor to fix insert_link_naturally

main
Shane 7 months ago
parent 5f38374abd
commit 3edc8135f3
  1. 31
      foodie_utils.py

@ -535,7 +535,7 @@ def generate_title_from_summary(summary):
messages=[ messages=[
{"role": "system", "content": ( {"role": "system", "content": (
"Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. " "Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. "
"Craft it with Upworthy/Buzzfeed flair—think ‘you won’t believe this’ or ‘this is nuts’—for food insiders. " "Craft it with Upworthy/Buzzfeed flair—think 'you won't believe this' or 'this is nuts'—for food insiders. "
"Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. " "Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. "
"End with a question to spark shares." "End with a question to spark shares."
)}, )},
@ -622,8 +622,8 @@ def insert_link_naturally(summary, source_name, source_url):
return summary return summary
# Find paragraphs with at least two sentences # Find paragraphs with at least two sentences
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] eligible_paragraph_indices = [i for i, p in enumerate(paragraphs) if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
if not eligible_paragraphs: if not eligible_paragraph_indices:
logging.warning("No paragraph with multiple sentences found, using fallback.") logging.warning("No paragraph with multiple sentences found, using fallback.")
return append_link_as_fallback(summary, source_name, source_url) return append_link_as_fallback(summary, source_name, source_url)
@ -636,33 +636,29 @@ def insert_link_naturally(summary, source_name, source_url):
best_candidate = None best_candidate = None
best_score = -1 best_score = -1
best_paragraph_idx = None
best_paragraph = None best_paragraph = None
# Score each paragraph and sentence for suitability # Score each eligible paragraph and sentence for suitability
for para in eligible_paragraphs: for idx in eligible_paragraph_indices:
para = paragraphs[idx]
sentences = re.split(r'(?<=[.!?])\s+', para.strip()) sentences = re.split(r'(?<=[.!?])\s+', para.strip())
eligible_sentences = [ eligible_sentences = [
(i, s) for i, s in enumerate(sentences) (i, s) for i, s in enumerate(sentences)
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?' if s.strip() and not s.endswith('?') and not s.endswith('!')
and not s.endswith('!') # Exclude exclamations for smoother integration
] ]
if not eligible_sentences: if not eligible_sentences:
continue continue
for s_idx, sentence in eligible_sentences:
# Score sentences based on suitability (prefer declarative sentences)
for idx, sentence in eligible_sentences:
score = 0 score = 0
# Favor sentences with factual content (simplified heuristic)
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]): if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
score += 2 score += 2
# Prefer longer sentences for better context
score += len(sentence.split()) // 5 score += len(sentence.split()) // 5
# Prefer middle sentences for natural flow score += abs(s_idx - len(sentences) / 2) * -1
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle
if score > best_score: if score > best_score:
best_score = score best_score = score
best_candidate = (idx, sentence) best_candidate = (s_idx, sentence)
best_paragraph_idx = idx
best_paragraph = para best_paragraph = para
if best_candidate is None: if best_candidate is None:
@ -676,10 +672,11 @@ def insert_link_naturally(summary, source_name, source_url):
formatted_link = link_phrase.format(source=link_pattern) formatted_link = link_phrase.format(source=link_pattern)
# Insert the link at the end of the selected sentence (no capitalization needed) # Insert the link at the end of the selected sentence (no capitalization needed)
sentences = re.split(r'(?<=[.!?])\s+', best_paragraph.strip())
new_sentence = f"{sentence.rstrip('.')} {formatted_link}." new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
sentences[sentence_idx] = new_sentence sentences[sentence_idx] = new_sentence
new_para = ' '.join(sentences) new_para = ' '.join(sentences)
paragraphs[paragraphs.index(best_paragraph)] = new_para paragraphs[best_paragraph_idx] = new_para
# Rejoin paragraphs with \n\n # Rejoin paragraphs with \n\n
new_summary = '\n\n'.join(paragraphs) new_summary = '\n\n'.join(paragraphs)

Loading…
Cancel
Save