try insert link naturally paragraph fix

my-fix-branch
Shane 7 months ago
parent 20451b5c2e
commit 17a5bef6b7
  1. 26
      foodie_utils.py

@ -497,12 +497,6 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
)
summary = response.choices[0].message.content.strip()
# Post-processing: Normalize paragraph separation to ensure a single \n break
# Split by any combination of newlines, filter out empty paragraphs, and join with a single \n
paragraphs = [p.strip() for p in summary.split('\n') if p.strip()]
summary = '\n'.join(paragraphs)
logging.info(f"Processed summary (Persona: {persona}): {summary}")
return summary
@ -512,14 +506,19 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
def insert_link_naturally(summary, source_name, source_url):
try:
# Log the input summary to debug its structure
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
prompt = (
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, "
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} uncovers this wild shift.' "
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). "
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). "
"Maintain the original tone and flow, ensuring the link reads as part of the sentence, not standalone. "
"Return the modified summary with exactly one link, no extra formatting or newlines beyond the original.\n\n"
"Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. "
"Each paragraph in the input summary is separated by a single \\n; ensure the output maintains this exact separation. "
"Do not add or remove newlines beyond the original summary structure. "
"Return the modified summary with exactly one link.\n\n"
"Summary:\n{summary}\n\n"
"Source Name: {source_name}\nSource URL: {source_url}"
).format(summary=summary, source_name=source_name, source_url=source_url)
@ -536,13 +535,20 @@ def insert_link_naturally(summary, source_name, source_url):
new_summary = response.choices[0].message.content.strip()
link_pattern = f'<a href="{source_url}">{source_name}</a>'
if new_summary and new_summary.count(link_pattern) == 1:
logging.info(f"Summary with naturally embedded link: {new_summary}")
# Normalize paragraph separation to ensure a single \n break
# Split by newlines, but do not filter out paragraphs to preserve the count
paragraphs = new_summary.split('\n')
# Strip each paragraph, but keep all paragraphs even if empty
paragraphs = [p.strip() for p in paragraphs]
new_summary = '\n'.join(paragraphs)
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
return new_summary
logging.warning(f"GPT failed to insert link correctly: {new_summary}. Using fallback.")
except Exception as e:
logging.error(f"Link insertion failed: {e}")
# Fallback path
time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b'
protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary)
paragraphs = protected_summary.split('\n')
@ -573,7 +579,7 @@ def insert_link_naturally(summary, source_name, source_url):
new_summary = '\n'.join(paragraphs)
new_summary = new_summary.replace('@', '.')
logging.info(f"Fallback summary with link: {new_summary}")
logging.info(f"Fallback summary with link: {new_summary!r}")
return new_summary
def generate_category_from_summary(summary):

Loading…
Cancel
Save