use cursor to fix insert_link_naturally
This commit is contained in:
+14
-17
@@ -535,7 +535,7 @@ def generate_title_from_summary(summary):
|
|||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": (
|
{"role": "system", "content": (
|
||||||
"Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. "
|
"Generate a concise, engaging title (under 100 characters) based on this summary, covering food topics. "
|
||||||
"Craft it with Upworthy/Buzzfeed flair—think ‘you won’t believe this’ or ‘this is nuts’—for food insiders. "
|
"Craft it with Upworthy/Buzzfeed flair—think 'you won't believe this' or 'this is nuts'—for food insiders. "
|
||||||
"Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. "
|
"Avoid quotes, emojis, special characters, or the words 'elevate', 'elevating', 'elevated'. "
|
||||||
"End with a question to spark shares."
|
"End with a question to spark shares."
|
||||||
)},
|
)},
|
||||||
@@ -622,8 +622,8 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
return summary
|
return summary
|
||||||
|
|
||||||
# Find paragraphs with at least two sentences
|
# Find paragraphs with at least two sentences
|
||||||
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
|
eligible_paragraph_indices = [i for i, p in enumerate(paragraphs) if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
|
||||||
if not eligible_paragraphs:
|
if not eligible_paragraph_indices:
|
||||||
logging.warning("No paragraph with multiple sentences found, using fallback.")
|
logging.warning("No paragraph with multiple sentences found, using fallback.")
|
||||||
return append_link_as_fallback(summary, source_name, source_url)
|
return append_link_as_fallback(summary, source_name, source_url)
|
||||||
|
|
||||||
@@ -636,33 +636,29 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
|
|
||||||
best_candidate = None
|
best_candidate = None
|
||||||
best_score = -1
|
best_score = -1
|
||||||
|
best_paragraph_idx = None
|
||||||
best_paragraph = None
|
best_paragraph = None
|
||||||
|
|
||||||
# Score each paragraph and sentence for suitability
|
# Score each eligible paragraph and sentence for suitability
|
||||||
for para in eligible_paragraphs:
|
for idx in eligible_paragraph_indices:
|
||||||
|
para = paragraphs[idx]
|
||||||
sentences = re.split(r'(?<=[.!?])\s+', para.strip())
|
sentences = re.split(r'(?<=[.!?])\s+', para.strip())
|
||||||
eligible_sentences = [
|
eligible_sentences = [
|
||||||
(i, s) for i, s in enumerate(sentences)
|
(i, s) for i, s in enumerate(sentences)
|
||||||
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?'
|
if s.strip() and not s.endswith('?') and not s.endswith('!')
|
||||||
and not s.endswith('!') # Exclude exclamations for smoother integration
|
|
||||||
]
|
]
|
||||||
if not eligible_sentences:
|
if not eligible_sentences:
|
||||||
continue
|
continue
|
||||||
|
for s_idx, sentence in eligible_sentences:
|
||||||
# Score sentences based on suitability (prefer declarative sentences)
|
|
||||||
for idx, sentence in eligible_sentences:
|
|
||||||
score = 0
|
score = 0
|
||||||
# Favor sentences with factual content (simplified heuristic)
|
|
||||||
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
|
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
|
||||||
score += 2
|
score += 2
|
||||||
# Prefer longer sentences for better context
|
|
||||||
score += len(sentence.split()) // 5
|
score += len(sentence.split()) // 5
|
||||||
# Prefer middle sentences for natural flow
|
score += abs(s_idx - len(sentences) / 2) * -1
|
||||||
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle
|
|
||||||
|
|
||||||
if score > best_score:
|
if score > best_score:
|
||||||
best_score = score
|
best_score = score
|
||||||
best_candidate = (idx, sentence)
|
best_candidate = (s_idx, sentence)
|
||||||
|
best_paragraph_idx = idx
|
||||||
best_paragraph = para
|
best_paragraph = para
|
||||||
|
|
||||||
if best_candidate is None:
|
if best_candidate is None:
|
||||||
@@ -676,10 +672,11 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
formatted_link = link_phrase.format(source=link_pattern)
|
formatted_link = link_phrase.format(source=link_pattern)
|
||||||
|
|
||||||
# Insert the link at the end of the selected sentence (no capitalization needed)
|
# Insert the link at the end of the selected sentence (no capitalization needed)
|
||||||
|
sentences = re.split(r'(?<=[.!?])\s+', best_paragraph.strip())
|
||||||
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
||||||
sentences[sentence_idx] = new_sentence
|
sentences[sentence_idx] = new_sentence
|
||||||
new_para = ' '.join(sentences)
|
new_para = ' '.join(sentences)
|
||||||
paragraphs[paragraphs.index(best_paragraph)] = new_para
|
paragraphs[best_paragraph_idx] = new_para
|
||||||
|
|
||||||
# Rejoin paragraphs with \n\n
|
# Rejoin paragraphs with \n\n
|
||||||
new_summary = '\n\n'.join(paragraphs)
|
new_summary = '\n\n'.join(paragraphs)
|
||||||
|
|||||||
Reference in New Issue
Block a user