|
|
|
@ -497,12 +497,6 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
summary = response.choices[0].message.content.strip() |
|
|
|
summary = response.choices[0].message.content.strip() |
|
|
|
|
|
|
|
|
|
|
|
# Post-processing: Normalize paragraph separation to ensure a single \n break |
|
|
|
|
|
|
|
# Split by any combination of newlines, filter out empty paragraphs, and join with a single \n |
|
|
|
|
|
|
|
paragraphs = [p.strip() for p in summary.split('\n') if p.strip()] |
|
|
|
|
|
|
|
summary = '\n'.join(paragraphs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logging.info(f"Processed summary (Persona: {persona}): {summary}") |
|
|
|
logging.info(f"Processed summary (Persona: {persona}): {summary}") |
|
|
|
return summary |
|
|
|
return summary |
|
|
|
|
|
|
|
|
|
|
|
@ -512,14 +506,19 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro |
|
|
|
|
|
|
|
|
|
|
|
def insert_link_naturally(summary, source_name, source_url): |
|
|
|
def insert_link_naturally(summary, source_name, source_url): |
|
|
|
try: |
|
|
|
try: |
|
|
|
|
|
|
|
# Log the input summary to debug its structure |
|
|
|
|
|
|
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}") |
|
|
|
|
|
|
|
|
|
|
|
prompt = ( |
|
|
|
prompt = ( |
|
|
|
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). " |
|
|
|
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). " |
|
|
|
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, " |
|
|
|
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, " |
|
|
|
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} uncovers this wild shift.' " |
|
|
|
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} uncovers this wild shift.' " |
|
|
|
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). " |
|
|
|
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). " |
|
|
|
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). " |
|
|
|
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). " |
|
|
|
"Maintain the original tone and flow, ensuring the link reads as part of the sentence, not standalone. " |
|
|
|
"Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. " |
|
|
|
"Return the modified summary with exactly one link, no extra formatting or newlines beyond the original.\n\n" |
|
|
|
"Each paragraph in the input summary is separated by a single \\n; ensure the output maintains this exact separation. " |
|
|
|
|
|
|
|
"Do not add or remove newlines beyond the original summary structure. " |
|
|
|
|
|
|
|
"Return the modified summary with exactly one link.\n\n" |
|
|
|
"Summary:\n{summary}\n\n" |
|
|
|
"Summary:\n{summary}\n\n" |
|
|
|
"Source Name: {source_name}\nSource URL: {source_url}" |
|
|
|
"Source Name: {source_name}\nSource URL: {source_url}" |
|
|
|
).format(summary=summary, source_name=source_name, source_url=source_url) |
|
|
|
).format(summary=summary, source_name=source_name, source_url=source_url) |
|
|
|
@ -536,13 +535,20 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
new_summary = response.choices[0].message.content.strip() |
|
|
|
new_summary = response.choices[0].message.content.strip() |
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
link_pattern = f'<a href="{source_url}">{source_name}</a>' |
|
|
|
if new_summary and new_summary.count(link_pattern) == 1: |
|
|
|
if new_summary and new_summary.count(link_pattern) == 1: |
|
|
|
logging.info(f"Summary with naturally embedded link: {new_summary}") |
|
|
|
# Normalize paragraph separation to ensure a single \n break |
|
|
|
|
|
|
|
# Split by newlines, but do not filter out paragraphs to preserve the count |
|
|
|
|
|
|
|
paragraphs = new_summary.split('\n') |
|
|
|
|
|
|
|
# Strip each paragraph, but keep all paragraphs even if empty |
|
|
|
|
|
|
|
paragraphs = [p.strip() for p in paragraphs] |
|
|
|
|
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
|
|
|
|
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}") |
|
|
|
return new_summary |
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
|
|
logging.warning(f"GPT failed to insert link correctly: {new_summary}. Using fallback.") |
|
|
|
logging.warning(f"GPT failed to insert link correctly: {new_summary}. Using fallback.") |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
logging.error(f"Link insertion failed: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Fallback path |
|
|
|
time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' |
|
|
|
time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' |
|
|
|
protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) |
|
|
|
protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) |
|
|
|
paragraphs = protected_summary.split('\n') |
|
|
|
paragraphs = protected_summary.split('\n') |
|
|
|
@ -573,7 +579,7 @@ def insert_link_naturally(summary, source_name, source_url): |
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
new_summary = '\n'.join(paragraphs) |
|
|
|
|
|
|
|
|
|
|
|
new_summary = new_summary.replace('@', '.') |
|
|
|
new_summary = new_summary.replace('@', '.') |
|
|
|
logging.info(f"Fallback summary with link: {new_summary}") |
|
|
|
logging.info(f"Fallback summary with link: {new_summary!r}") |
|
|
|
return new_summary |
|
|
|
return new_summary |
|
|
|
|
|
|
|
|
|
|
|
def generate_category_from_summary(summary): |
|
|
|
def generate_category_from_summary(summary): |
|
|
|
|