fix insert link naturally
This commit is contained in:
+49
-14
@@ -615,23 +615,28 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
try:
|
||||
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
||||
|
||||
# Split summary into paragraphs using \n\n (correct separator)
|
||||
# Split summary into paragraphs using \n\n
|
||||
paragraphs = summary.split('\n\n')
|
||||
if not paragraphs or all(not p.strip() for p in paragraphs):
|
||||
logging.error("No valid paragraphs to insert link.")
|
||||
return summary
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Find paragraphs with at least two sentences
|
||||
eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
|
||||
eligible_paragraphs = [
|
||||
p for p in paragraphs
|
||||
if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2
|
||||
]
|
||||
if not eligible_paragraphs:
|
||||
logging.warning("No paragraph with multiple sentences found, using fallback.")
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Alternative phrases for variety
|
||||
# Alternative phrases for manual insertion (as a fallback)
|
||||
link_phrases = [
|
||||
"according to {source}",
|
||||
"as reported by {source}",
|
||||
"{source} notes that"
|
||||
"{source} notes that",
|
||||
"per {source}",
|
||||
"says {source}"
|
||||
]
|
||||
|
||||
best_candidate = None
|
||||
@@ -643,22 +648,28 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
sentences = re.split(r'(?<=[.!?])\s+', para.strip())
|
||||
eligible_sentences = [
|
||||
(i, s) for i, s in enumerate(sentences)
|
||||
if s.strip() and not s.endswith('?') # Exclude sentences ending with '?'
|
||||
and not s.endswith('!') # Exclude exclamations for smoother integration
|
||||
if s.strip()
|
||||
and not s.endswith('?') # Exclude questions
|
||||
and not s.endswith('!') # Exclude exclamations
|
||||
and '<a href=' not in s # Avoid sentences with existing links
|
||||
and len(s.split()) >= 5 # Prefer sentences with at least 5 words
|
||||
]
|
||||
if not eligible_sentences:
|
||||
continue
|
||||
|
||||
# Score sentences based on suitability (prefer declarative sentences)
|
||||
# Score sentences based on suitability
|
||||
for idx, sentence in eligible_sentences:
|
||||
score = 0
|
||||
# Favor sentences with factual content (simplified heuristic)
|
||||
# Favor sentences with factual content
|
||||
if any(word in sentence.lower() for word in ["is", "are", "has", "shows", "reveals"]):
|
||||
score += 2
|
||||
# Prefer longer sentences for better context
|
||||
score += len(sentence.split()) // 5
|
||||
# Prefer middle sentences for natural flow
|
||||
score += abs(idx - len(sentences) / 2) * -1 # Penalize sentences far from the middle
|
||||
# Boost score for sentences mentioning the source topic
|
||||
if source_name.lower() in sentence.lower():
|
||||
score += 3
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
@@ -669,14 +680,38 @@ def insert_link_naturally(summary, source_name, source_url):
|
||||
logging.warning("No suitable sentence found, using fallback.")
|
||||
return append_link_as_fallback(summary, source_name, source_url)
|
||||
|
||||
# Select a link phrase based on sentence structure
|
||||
# Select a link phrase for fallback manual insertion
|
||||
sentence_idx, sentence = best_candidate
|
||||
link_phrase = random.choice(link_phrases)
|
||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||
formatted_link = link_phrase.format(source=link_pattern)
|
||||
|
||||
# Insert the link at the end of the selected sentence (no capitalization needed)
|
||||
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
||||
# Use GPT to rewrite the sentence with the link
|
||||
prompt = (
|
||||
f"Rewrite the following sentence to naturally include a reference to the source '{source_name}' "
|
||||
f"with a hyperlink in HTML format: <a href=\"{source_url}\">{source_name}</a>. "
|
||||
"Integrate the link into the sentence seamlessly, maintaining the original tone and style. "
|
||||
"Do not add extra sentences, change the meaning, or include additional punctuation like a trailing period. "
|
||||
"Return only the rewritten sentence."
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model=LIGHT_TASK_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": sentence}
|
||||
],
|
||||
max_tokens=100,
|
||||
temperature=0.7
|
||||
)
|
||||
new_sentence = response.choices[0].message.content.strip()
|
||||
if not new_sentence or '<a href=' not in new_sentence:
|
||||
logging.warning("GPT failed to rewrite sentence, using manual insertion")
|
||||
new_sentence = f"{sentence.rstrip('.')} {formatted_link}."
|
||||
else:
|
||||
# Ensure the sentence ends with a period if the original did
|
||||
if sentence.rstrip().endswith('.'):
|
||||
new_sentence = new_sentence.rstrip('.') + '.'
|
||||
|
||||
sentences[sentence_idx] = new_sentence
|
||||
new_para = ' '.join(sentences)
|
||||
paragraphs[paragraphs.index(best_paragraph)] = new_para
|
||||
@@ -838,12 +873,12 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
||||
tags.append(picks_tag_id)
|
||||
logger.info(f"Added 'Picks' tag (ID: {picks_tag_id}) due to high interest score: {interest_score}")
|
||||
|
||||
# Format content with <p> tags
|
||||
# Format content with <p> tags, splitting on \n\n to match summary format
|
||||
content = post_data["content"]
|
||||
if content is None:
|
||||
logger.error(f"Post content is None for title '{post_data['title']}' - using fallback")
|
||||
content = "Content unavailable. Check the original source for details."
|
||||
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
|
||||
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n\n') if para.strip())
|
||||
|
||||
# Upload image before posting
|
||||
image_id = None
|
||||
|
||||
Reference in New Issue
Block a user