test
This commit is contained in:
+17
-1
@@ -65,14 +65,20 @@ def setup_logging():
|
|||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||||
pruned_lines = []
|
pruned_lines = []
|
||||||
|
malformed_count = 0
|
||||||
for line in lines:
|
for line in lines:
|
||||||
|
if len(line) < 19 or not line[:19].replace('-', '').replace(':', '').replace(' ', '').isdigit():
|
||||||
|
malformed_count += 1
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
timestamp = datetime.strptime(line[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||||
if timestamp > cutoff:
|
if timestamp > cutoff:
|
||||||
pruned_lines.append(line)
|
pruned_lines.append(line)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
logging.warning(f"Skipping malformed log line: {line.strip()[:50]}...")
|
malformed_count += 1
|
||||||
continue
|
continue
|
||||||
|
if malformed_count > 0:
|
||||||
|
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
|
||||||
with open(LOG_FILE, 'w') as f:
|
with open(LOG_FILE, 'w') as f:
|
||||||
f.writelines(pruned_lines)
|
f.writelines(pruned_lines)
|
||||||
|
|
||||||
@@ -240,6 +246,16 @@ def curate_from_rss():
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Remove the original title from the summary
|
||||||
|
title_pattern = re.compile(
|
||||||
|
r'\*\*' + re.escape(title) + r'\*\*|' + re.escape(title),
|
||||||
|
re.IGNORECASE
|
||||||
|
)
|
||||||
|
final_summary = title_pattern.sub('', final_summary).strip()
|
||||||
|
# Clean up any extra spaces or newlines left after removal
|
||||||
|
final_summary = re.sub(r'\s+', ' ', final_summary)
|
||||||
|
final_summary = '\n'.join(para.strip() for para in final_summary.split('\n') if para.strip())
|
||||||
|
|
||||||
final_summary = insert_link_naturally(final_summary, source_name, link)
|
final_summary = insert_link_naturally(final_summary, source_name, link)
|
||||||
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
post_data, author, category, image_url, image_source, uploader, pixabay_url = prepare_post_data(final_summary, title)
|
||||||
if not post_data:
|
if not post_data:
|
||||||
|
|||||||
+40
-41
@@ -137,7 +137,7 @@ def generate_article_tweet(author, post, persona):
|
|||||||
author_handle = f"@{author['username']}"
|
author_handle = f"@{author['username']}"
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
f"Craft a sharp tweet (under 280 characters) for {author_handle} with the voice of '{persona}'. "
|
f"Craft a sharp tweet (under 230 characters) for {author_handle} with the voice of '{persona}'. "
|
||||||
f"Distill the essence of the article '{title}' and include the raw URL '{url}' at the end. "
|
f"Distill the essence of the article '{title}' and include the raw URL '{url}' at the end. "
|
||||||
f"Make it bold, spark curiosity, and invite engagement with a human touch. "
|
f"Make it bold, spark curiosity, and invite engagement with a human touch. "
|
||||||
f"Swap 'elevate' for dynamic terms like 'ignite' or 'unleash'. "
|
f"Swap 'elevate' for dynamic terms like 'ignite' or 'unleash'. "
|
||||||
@@ -414,53 +414,46 @@ def get_image(search_query):
|
|||||||
logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
||||||
def generate_image_query(content):
|
def generate_image_query(title, summary):
|
||||||
prompt = (
|
|
||||||
"Given the following content, generate a concise image search query (max 5 words) that would likely yield relevant, visually appealing images on platforms like Flickr or Pixabay. "
|
|
||||||
"Identify and prioritize specific entities like brand names or unique terms over abstract or generic concepts. "
|
|
||||||
"Focus on concrete, visual concepts related to food, dining, or restaurants. "
|
|
||||||
"Also provide relevance keywords (max 5 words) to filter results, using general themes related to the content. "
|
|
||||||
"Return the result as a JSON object with 'search' and 'relevance' keys.\n\n"
|
|
||||||
"Content:\n"
|
|
||||||
f"{content}\n\n"
|
|
||||||
"Example output:\n"
|
|
||||||
"```json\n"
|
|
||||||
"{\n"
|
|
||||||
" \"search\": \"Wingstop dining\",\n"
|
|
||||||
" \"relevance\": \"fast food dining\"\n"
|
|
||||||
"}\n```"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
prompt = (
|
||||||
|
"Given the following article title and summary, generate a concise image search query (max 5 words) to find a relevant image. "
|
||||||
|
"Also provide a list of relevance keywords (max 5 words) that should be associated with the image. "
|
||||||
|
"Return the result as a JSON object with 'search' and 'relevance' keys.\n\n"
|
||||||
|
f"Title: {title}\n\n"
|
||||||
|
f"Summary: {summary}\n\n"
|
||||||
|
"Example output:\n"
|
||||||
|
"```json\n"
|
||||||
|
"{\"search\": \"Italian cuisine trends\", \"relevance\": \"pasta wine dining culture\"}\n"
|
||||||
|
"```"
|
||||||
|
)
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model=LIGHT_TASK_MODEL,
|
model=LIGHT_TASK_MODEL,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": "You are a helpful assistant that generates concise image search queries."},
|
{"role": "system", "content": prompt},
|
||||||
{"role": "user", "content": prompt}
|
{"role": "user", "content": "Generate an image search query and relevance keywords."}
|
||||||
],
|
],
|
||||||
max_tokens=100,
|
max_tokens=100,
|
||||||
temperature=0.5
|
temperature=0.5
|
||||||
)
|
)
|
||||||
|
|
||||||
raw_response = response.choices[0].message.content
|
raw_response = response.choices[0].message.content
|
||||||
logging.debug(f"Raw GPT image query response: '{raw_response}'")
|
|
||||||
|
|
||||||
# Extract JSON from the response
|
|
||||||
json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
|
json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
|
||||||
if not json_match:
|
if not json_match:
|
||||||
logging.warning(f"Failed to parse image query JSON from GPT response: {raw_response}")
|
logging.warning(f"Failed to parse image query JSON: {raw_response}")
|
||||||
return "food dining", ["dining", "trends"]
|
return title, [], True
|
||||||
|
|
||||||
query_data = json.loads(json_match.group(1))
|
query_data = json.loads(json_match.group(1))
|
||||||
search_query = query_data.get("search", "food dining")
|
search_query = query_data.get("search", title)
|
||||||
relevance_keywords = query_data.get("relevance", ["dining", "trends"])
|
relevance_keywords = query_data.get("relevance", "").split()
|
||||||
|
|
||||||
logging.debug(f"Image query from content: {query_data}")
|
# Log the JSON object in a single line
|
||||||
return search_query, relevance_keywords
|
log_json = json.dumps(query_data).replace('\n', ' ').replace('\r', ' ')
|
||||||
|
logging.debug(f"Image query from content: {log_json}")
|
||||||
|
|
||||||
|
return search_query, relevance_keywords, False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to generate image query: {e}. Using fallback.")
|
logging.warning(f"Image query generation failed: {e}. Using title as fallback.")
|
||||||
return "food dining", ["dining", "trends"]
|
return title, [], True
|
||||||
|
|
||||||
def smart_image_and_filter(title, summary):
|
def smart_image_and_filter(title, summary):
|
||||||
try:
|
try:
|
||||||
@@ -655,6 +648,7 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
|
|||||||
|
|
||||||
full_prompt = (
|
full_prompt = (
|
||||||
f"{prompt}\n\n"
|
f"{prompt}\n\n"
|
||||||
|
f"Do not include the article title in the summary.\n\n"
|
||||||
f"{extra_prompt}\n\n"
|
f"{extra_prompt}\n\n"
|
||||||
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'.\n"
|
f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'.\n"
|
||||||
f"Content to summarize:\n{content}\n\n"
|
f"Content to summarize:\n{content}\n\n"
|
||||||
@@ -673,6 +667,14 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
|
|||||||
)
|
)
|
||||||
|
|
||||||
summary = response.choices[0].message.content.strip()
|
summary = response.choices[0].message.content.strip()
|
||||||
|
|
||||||
|
# Post-process to remove the original title if it still appears
|
||||||
|
# Extract the title from the content (assuming it's the first line or part of the prompt)
|
||||||
|
# For simplicity, we can pass the title as an additional parameter if needed
|
||||||
|
# Here, we'll assume the title is passed via the calling function (e.g., from foodie_automator_rss.py)
|
||||||
|
# For now, we'll use a placeholder for the title removal logic
|
||||||
|
# In foodie_automator_rss.py, the title is available as entry.title
|
||||||
|
# We'll handle the title removal in the calling script instead
|
||||||
logging.info(f"Processed summary (Persona: {persona}): {summary}")
|
logging.info(f"Processed summary (Persona: {persona}): {summary}")
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
@@ -682,13 +684,12 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
|
|||||||
|
|
||||||
def insert_link_naturally(summary, source_name, source_url):
|
def insert_link_naturally(summary, source_name, source_url):
|
||||||
try:
|
try:
|
||||||
# Log the input summary to debug its structure
|
|
||||||
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
logging.info(f"Input summary to insert_link_naturally: {summary!r}")
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
|
"Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
|
||||||
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, "
|
"Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, "
|
||||||
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} uncovers this wild shift.' "
|
"e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} shares this insight.' "
|
||||||
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). "
|
"Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). "
|
||||||
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). "
|
"Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). "
|
||||||
"Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. "
|
"Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. "
|
||||||
@@ -711,10 +712,7 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
new_summary = response.choices[0].message.content.strip()
|
new_summary = response.choices[0].message.content.strip()
|
||||||
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
if new_summary and new_summary.count(link_pattern) == 1:
|
if new_summary and new_summary.count(link_pattern) == 1:
|
||||||
# Normalize paragraph separation to ensure a single \n break
|
|
||||||
# Split by newlines, but do not filter out paragraphs to preserve the count
|
|
||||||
paragraphs = new_summary.split('\n')
|
paragraphs = new_summary.split('\n')
|
||||||
# Strip each paragraph, but keep all paragraphs even if empty
|
|
||||||
paragraphs = [p.strip() for p in paragraphs]
|
paragraphs = [p.strip() for p in paragraphs]
|
||||||
new_summary = '\n'.join(paragraphs)
|
new_summary = '\n'.join(paragraphs)
|
||||||
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
|
logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
|
||||||
@@ -733,11 +731,12 @@ def insert_link_naturally(summary, source_name, source_url):
|
|||||||
return summary
|
return summary
|
||||||
|
|
||||||
target_para = random.choice([p for p in paragraphs if p.strip()])
|
target_para = random.choice([p for p in paragraphs if p.strip()])
|
||||||
|
link_pattern = f'<a href="{source_url}">{source_name}</a>'
|
||||||
phrases = [
|
phrases = [
|
||||||
f"The scoop from {link_pattern} spills the details",
|
f"Learn more from {link_pattern}",
|
||||||
f"{link_pattern} uncovers this wild shift",
|
f"{link_pattern} shares this insight",
|
||||||
f"This gem via {link_pattern} drops some truth",
|
f"Discover more at {link_pattern}",
|
||||||
f"{link_pattern} breaks down the buzz"
|
f"Check out {link_pattern} for details"
|
||||||
]
|
]
|
||||||
insertion_phrase = random.choice(phrases)
|
insertion_phrase = random.choice(phrases)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user