|
|
|
|
@ -613,45 +613,80 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
|
logging.error("No valid paragraphs to insert link.") |
|
|
|
|
return summary |
|
|
|
|
|
|
|
|
|
# Find paragraphs with at least two sentences |
|
|
|
|
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] |
|
|
|
|
if not eligible_paragraphs: |
|
|
|
|
logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") |
|
|
|
|
target_para = paragraphs[-1].strip() |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
new_para = f"{target_para} Source: {link_pattern}." |
|
|
|
|
paragraphs[-1] = new_para |
|
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
|
logging.info(f"Appended link to summary: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
target_para = random.choice(eligible_paragraphs) |
|
|
|
|
sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) |
|
|
|
|
|
|
|
|
|
eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] |
|
|
|
|
if not eligible_sentences: |
|
|
|
|
logging.error("No eligible sentences found for link insertion.") |
|
|
|
|
return summary |
|
|
|
|
|
|
|
|
|
sentence_idx, sentence = random.choice(eligible_sentences) |
|
|
|
|
logging.warning("No paragraph with multiple sentences found, using fallback.") |
|
|
|
|
return append_link_as_fallback(summary, source_name, source_url) |
|
|
|
|
|
|
|
|
|
# Alternative phrases for variety |
|
|
|
|
link_phrases = [ |
|
|
|
|
"according to {source}", |
|
|
|
|
"as reported by {source}", |
|
|
|
|
"{source} notes that" |
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
best_candidate = None |
|
|
|
|
best_score = -1 |
|
|
|
|
best_paragraph = None |
|
|
|
|
|
|
|
|
|
# Score each paragraph and sentence for suitability |
|
|
|
|
for para in eligible_paragraphs: |
|
|
|
|
sentences = re.split(r'(?<=[.!?])\s+', para.strip()) |
|
|
|
|
eligible_sentences = [ |
|
|
|
|
(i, s) for i, s in enumerate(sentences) |
|
|
|
|
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?' |
|
|
|
|
and not s.endswith('!') # Exclude exclamations for smoother integration |
|
|
|
|
] |
|
|
|
|
if not eligible_sentences: |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Score sentences based on suitability (prefer declarative sentences) |
|
|
|
|
for idx, sentence in eligible_sentences: |
|
|
|
|
score = 0 |
|
|
|
|
# Favor sentences with factual content (simplified heuristic) |
|
|
|
|
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]): |
|
|
|
|
score += 2 |
|
|
|
|
# Prefer longer sentences for better context |
|
|
|
|
score += len(sentence.split()) // 5 |
|
|
|
|
# Prefer middle sentences for natural flow |
|
|
|
|
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle |
|
|
|
|
|
|
|
|
|
if score > best_score: |
|
|
|
|
best_score = score |
|
|
|
|
best_candidate = (idx, sentence) |
|
|
|
|
best_paragraph = para |
|
|
|
|
|
|
|
|
|
if best_candidate is None: |
|
|
|
|
logging.warning("No suitable sentence found, using fallback.") |
|
|
|
|
return append_link_as_fallback(summary, source_name, source_url) |
|
|
|
|
|
|
|
|
|
# Select a link phrase based on sentence structure |
|
|
|
|
sentence_idx, sentence = best_candidate |
|
|
|
|
link_phrase = random.choice(link_phrases) |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
|
|
|
|
|
# Insert the link at the end of the sentence |
|
|
|
|
new_sentence = f"{sentence.rstrip('.')} according to {link_pattern}." |
|
|
|
|
|
|
|
|
|
formatted_link = link_phrase.format(source=link_pattern) |
|
|
|
|
|
|
|
|
|
# Insert the link at the end of the selected sentence (no capitalization needed) |
|
|
|
|
new_sentence = f"{sentence.rstrip('.')} {formatted_link}." |
|
|
|
|
sentences[sentence_idx] = new_sentence |
|
|
|
|
new_para = ' '.join(sentences) |
|
|
|
|
paragraphs[paragraphs.index(target_para)] = new_para |
|
|
|
|
|
|
|
|
|
paragraphs[paragraphs.index(best_paragraph)] = new_para |
|
|
|
|
|
|
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
|
logging.info(f"Summary with naturally embedded link: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
new_summary = f"{summary}\n\nSource: {link_pattern}." |
|
|
|
|
logging.info(f"Fallback summary with link: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
return append_link_as_fallback(summary, source_name, source_url) |
|
|
|
|
|
|
|
|
|
def append_link_as_fallback(summary, source_name, source_url): |
|
|
|
|
"""Fallback method to append the link in a polished way.""" |
|
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
|
new_summary = f"{summary}\n\n<p>We learned about this from {link_pattern}.</p>" |
|
|
|
|
logging.info(f"Fallback summary with link: {new_summary!r}") |
|
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
def generate_category_from_summary(summary): |
|
|
|
|
try: |
|
|
|
|
|