update insert link naturally
This commit is contained in:
+62
-27
@@ -613,45 +613,80 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
logging.error("No valid paragraphs to insert link.")
|
||||
return summary
|
||||
|
||||
# Find paragraphs with at least two sentences
|
||||
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
|
||||
if not eligible_paragraphs:
|
||||
logging.warning("No paragraph with multiple sentences found, appending to last paragraph.")
|
||||
target_para = paragraphs[-1].strip()
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
new_para = f"{target_para} Source: {link_pattern}."
|
||||
paragraphs[-1] = new_para
|
||||
new_summary = '\n'.join(paragraphs)
|
||||
logging.info(f"Appended link to summary: {new_summary!r}")
|
||||
return new_summary
|
||||
logging.warning("No paragraph with multiple sentences found, using fallback.")
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
target_para = random.choice(eligible_paragraphs)
|
||||
sentences = re.split(r'(?<=[.!?])\s+', target_para.strip())
|
||||
|
||||
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()]
|
||||
if not eligible_sentences:
|
||||
logging.error("No eligible sentences found for link insertion.")
|
||||
return summary
|
||||
|
||||
sentence_idx, sentence = random.choice(eligible_sentences)
|
||||
# Alternative phrases for variety
|
||||
link_phrases = [
|
||||
"according to {source}",
|
||||
"as reported by {source}",
|
||||
"{source} notes that"
|
||||
]
|
||||
|
||||
best_candidate = None
|
||||
best_score = -1
|
||||
best_paragraph = None
|
||||
|
||||
# Score each paragraph and sentence for suitability
|
||||
for para in eligible_paragraphs:
|
||||
sentences = re.split(r'(?<=[.!?])\s+', para.strip())
|
||||
eligible_sentences = [
|
||||
(i, s) for i, s in enumerate(sentences)
|
||||
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?'
|
||||
and not s.endswith('!') # Exclude exclamations for smoother integration
|
||||
]
|
||||
if not eligible_sentences:
|
||||
continue
|
||||
|
||||
# Score sentences based on suitability (prefer declarative sentences)
|
||||
for idx, sentence in eligible_sentences:
|
||||
score = 0
|
||||
# Favor sentences with factual content (simplified heuristic)
|
||||
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
|
||||
score += 2
|
||||
# Prefer longer sentences for better context
|
||||
score += len(sentence.split()) // 5
|
||||
# Prefer middle sentences for natural flow
|
||||
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_candidate = (idx, sentence)
|
||||
best_paragraph = para
|
||||
|
||||
if best_candidate is None:
|
||||
logging.warning("No suitable sentence found, using fallback.")
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Select a link phrase based on sentence structure
|
||||
sentence_idx, sentence = best_candidate
|
||||
link_phrase = random.choice(link_phrases)
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
|
||||
# Insert the link at the end of the sentence
|
||||
new_sentence = f"{sentence.rstrip('.')} according to {link_pattern}."
|
||||
|
||||
formatted_link = link_phrase.format(source=link_pattern)
|
||||
|
||||
# Insert the link at the end of the selected sentence (no capitalization needed)
|
||||
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
||||
sentences[sentence_idx] = new_sentence
|
||||
new_para = ' '.join(sentences)
|
||||
paragraphs[paragraphs.index(target_para)] = new_para
|
||||
|
||||
paragraphs[paragraphs.index(best_paragraph)] = new_para
|
||||
|
||||
new_summary = '\n'.join(paragraphs)
|
||||
logging.info(f"Summary with naturally embedded link: {new_summary!r}")
|
||||
return new_summary
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Link insertion failed: {e}")
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
new_summary = f"{summary}\n\nSource: {link_pattern}."
|
||||
logging.info(f"Fallback summary with link: {new_summary!r}")
|
||||
return new_summary
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
def append_link_as_fallback(summary, source_name, source_url):
|
||||
"""Fallback method to append the link in a polished way."""
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
new_summary = f"{summary}\n\n<p>We learned about this from {link_pattern}.</p>"
|
||||
logging.info(f"Fallback summary with link: {new_summary!r}")
|
||||
return new_summary
|
||||
|
||||
def generate_category_from_summary(summary):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user