test

2025-05-01 19:24:20 +10:00
parent 022b52a8a7
commit 90be324fe4
2 changed files with 57 additions and 42 deletions
@@ -137,7 +137,7 @@ def generate_article_tweet(author, post, persona):
    author_handle = f"@{author['username']}"
    
    prompt = (
-        f"Craft a sharp tweet (under 280 characters) for {author_handle} with the voice of '{persona}'. "
+        f"Craft a sharp tweet (under 230 characters) for {author_handle} with the voice of '{persona}'. "
        f"Distill the essence of the article '{title}' and include the raw URL '{url}' at the end. "
        f"Make it bold, spark curiosity, and invite engagement with a human touch. "
        f"Swap 'elevate' for dynamic terms like 'ignite' or 'unleash'. "
@@ -414,53 +414,46 @@ def get_image(search_query):
        logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
        return None, None, None, None

-def generate_image_query(content):
-    prompt = (
-        "Given the following content, generate a concise image search query (max 5 words) that would likely yield relevant, visually appealing images on platforms like Flickr or Pixabay. "
-        "Identify and prioritize specific entities like brand names or unique terms over abstract or generic concepts. "
-        "Focus on concrete, visual concepts related to food, dining, or restaurants. "
-        "Also provide relevance keywords (max 5 words) to filter results, using general themes related to the content. "
-        "Return the result as a JSON object with 'search' and 'relevance' keys.\n\n"
-        "Content:\n"
-        f"{content}\n\n"
-        "Example output:\n"
-        "```json\n"
-        "{\n"
-        "  \"search\": \"Wingstop dining\",\n"
-        "  \"relevance\": \"fast food dining\"\n"
-        "}\n```"
-    )
-    
+def generate_image_query(title, summary):
    try:
+        prompt = (
+            "Given the following article title and summary, generate a concise image search query (max 5 words) to find a relevant image. "
+            "Also provide a list of relevance keywords (max 5 words) that should be associated with the image. "
+            "Return the result as a JSON object with 'search' and 'relevance' keys.\n\n"
+            f"Title: {title}\n\n"
+            f"Summary: {summary}\n\n"
+            "Example output:\n"
+            "```json\n"
+            "{\"search\": \"Italian cuisine trends\", \"relevance\": \"pasta wine dining culture\"}\n"
+            "```"
+        )
        response = client.chat.completions.create(
            model=LIGHT_TASK_MODEL,
            messages=[
-                {"role": "system", "content": "You are a helpful assistant that generates concise image search queries."},
-                {"role": "user", "content": prompt}
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": "Generate an image search query and relevance keywords."}
            ],
            max_tokens=100,
            temperature=0.5
        )
-        
        raw_response = response.choices[0].message.content
-        logging.debug(f"Raw GPT image query response: '{raw_response}'")
-        
-        # Extract JSON from the response
        json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
        if not json_match:
-            logging.warning(f"Failed to parse image query JSON from GPT response: {raw_response}")
-            return "food dining", ["dining", "trends"]
+            logging.warning(f"Failed to parse image query JSON: {raw_response}")
+            return title, [], True
        
        query_data = json.loads(json_match.group(1))
-        search_query = query_data.get("search", "food dining")
-        relevance_keywords = query_data.get("relevance", ["dining", "trends"])
+        search_query = query_data.get("search", title)
+        relevance_keywords = query_data.get("relevance", "").split()
        
-        logging.debug(f"Image query from content: {query_data}")
-        return search_query, relevance_keywords
+        # Log the JSON object in a single line
+        log_json = json.dumps(query_data).replace('\n', ' ').replace('\r', ' ')
+        logging.debug(f"Image query from content: {log_json}")
        
+        return search_query, relevance_keywords, False
    except Exception as e:
-        logging.warning(f"Failed to generate image query: {e}. Using fallback.")
-        return "food dining", ["dining", "trends"]
+        logging.warning(f"Image query generation failed: {e}. Using title as fallback.")
+        return title, [], True

 def smart_image_and_filter(title, summary):
    try:
@@ -655,6 +648,7 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
        
        full_prompt = (
            f"{prompt}\n\n"
+            f"Do not include the article title in the summary.\n\n"
            f"{extra_prompt}\n\n"
            f"Avoid using the word 'elevate'—use more humanized language like 'level up' or 'bring to life'.\n"
            f"Content to summarize:\n{content}\n\n"
@@ -673,6 +667,14 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro
        )
        
        summary = response.choices[0].message.content.strip()
+        
+        # Post-process to remove the original title if it still appears
+        # Extract the title from the content (assuming it's the first line or part of the prompt)
+        # For simplicity, we can pass the title as an additional parameter if needed
+        # Here, we'll assume the title is passed via the calling function (e.g., from foodie_automator_rss.py)
+        # For now, we'll use a placeholder for the title removal logic
+        # In foodie_automator_rss.py, the title is available as entry.title
+        # We'll handle the title removal in the calling script instead
        logging.info(f"Processed summary (Persona: {persona}): {summary}")
        return summary
    
@@ -682,13 +684,12 @@ def summarize_with_gpt4o(content, source_name, link, interest_score=0, extra_pro

 def insert_link_naturally(summary, source_name, source_url):
    try:
-        # Log the input summary to debug its structure
        logging.info(f"Input summary to insert_link_naturally: {summary!r}")

        prompt = (
            "Take this summary and insert a single HTML link naturally into one paragraph (randomly chosen). "
            "Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text seamlessly, "
-            "e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} uncovers this wild shift.' "
+            "e.g., 'The latest scoop from {source_name} reveals...' or '{source_name} shares this insight.' "
            "Vary the phrasing creatively to avoid repetition (don’t always use 'dives into'). "
            "Place the link at a sentence boundary (after a period, not within numbers like '6.30am' or '1.5'). "
            "Maintain the original tone, flow, and paragraph structure, preserving all existing newlines exactly as they are. "
@@ -711,10 +712,7 @@ def insert_link_naturally(summary, source_name, source_url):
        new_summary = response.choices[0].message.content.strip()
        link_pattern = f'<a href="{source_url}">{source_name}</a>'
        if new_summary and new_summary.count(link_pattern) == 1:
-            # Normalize paragraph separation to ensure a single \n break
-            # Split by newlines, but do not filter out paragraphs to preserve the count
            paragraphs = new_summary.split('\n')
-            # Strip each paragraph, but keep all paragraphs even if empty
            paragraphs = [p.strip() for p in paragraphs]
            new_summary = '\n'.join(paragraphs)
            logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
@@ -733,11 +731,12 @@ def insert_link_naturally(summary, source_name, source_url):
        return summary
    
    target_para = random.choice([p for p in paragraphs if p.strip()])
+    link_pattern = f'<a href="{source_url}">{source_name}</a>'
    phrases = [
-        f"The scoop from {link_pattern} spills the details",
-        f"{link_pattern} uncovers this wild shift",
-        f"This gem via {link_pattern} drops some truth",
-        f"{link_pattern} breaks down the buzz"
+        f"Learn more from {link_pattern}",
+        f"{link_pattern} shares this insight",
+        f"Discover more at {link_pattern}",
+        f"Check out {link_pattern} for details"
    ]
    insertion_phrase = random.choice(phrases)