From ccddefbc8bda2ac05f594a6912d93212005e1282 Mon Sep 17 00:00:00 2001
From: Shane <shanehill@mail.com>
Date: Sun, 4 May 2025 12:44:50 +1000
Subject: [PATCH] try

---
 foodie_utils.py | 176 ++++++++++++++++++++++--------------------------
 1 file changed, 80 insertions(+), 96 deletions(-)
diff --git a/foodie_utils.py b/foodie_utils.py
index 4ba722a..57d6529 100644
--- a/foodie_utils.py
+++ b/foodie_utils.py
@@ -407,7 +407,7 @@ def extract_main_topic(text):
     # Fallback to a generic term if no specific food item is found
     return "food trends"
 
-def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None):
+def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, page_url=None):
     try:
         safe_title = post_title.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')[:50]
         headers = {
@@ -450,7 +450,13 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
         response.raise_for_status()
         
         image_id = response.json()["id"]
-        caption = f'<a href="{pixabay_url}">{image_source}</a> by {uploader}' if pixabay_url and uploader else image_source
+        # Always include a clickable link and uploader if available
+        if page_url and uploader:
+            caption = f'<a href="{page_url}">{image_source}</a> by {uploader}'
+        elif page_url:
+            caption = f'<a href="{page_url}">{image_source}</a>'
+        else:
+            caption = image_source
         requests.post(
             f"{wp_base_url}/media/{image_id}",
             headers={"Authorization": headers["Authorization"], "Content-Type": "application/json"},
@@ -584,80 +590,62 @@ def insert_link_naturally(summary, source_name, source_url):
     try:
         logging.info(f"Input summary to insert_link_naturally: {summary!r}")
 
-        prompt = (
-            "Take this summary and insert a single HTML link into one paragraph (randomly chosen). "
-            "Use the format '<a href=\"{source_url}\">{source_name}</a>' and weave it into the text naturally, "
-            "e.g., 'According to {source_name}, ' or '{source_name} shares that '. "
-            "Place the link at the end of a sentence (after a period). "
-            "Preserve the original paragraph structure, maintaining all newlines exactly as they are (each paragraph separated by a single \\n). "
-            "Return the modified summary with exactly one link.\n\n"
-            "Summary:\n{summary}\n\n"
-            "Source Name: {source_name}\nSource URL: {source_url}"
-        ).format(summary=summary, source_name=source_name, source_url=source_url)
-        
-        # Add retry mechanism
-        for attempt in range(3):
-            try:
-                response = client.chat.completions.create(
-                    model=LIGHT_TASK_MODEL,
-                    messages=[
-                        {"role": "system", "content": prompt},
-                        {"role": "user", "content": "Insert the link naturally into the summary."}
-                    ],
-                    max_tokens=1000,
-                    temperature=0.7
-                )
-                new_summary = response.choices[0].message.content.strip()
-                link_pattern = f'<a href="{source_url}">{source_name}</a>'
-                if new_summary and new_summary.count(link_pattern) == 1:
-                    paragraphs = new_summary.split('\n')
-                    paragraphs = [p.strip() for p in paragraphs]
-                    new_summary = '\n'.join(paragraphs)
-                    logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}")
-                    return new_summary
-                else:
-                    logging.warning(f"GPT attempt {attempt + 1}/3 failed to insert link correctly: {new_summary}")
-            except Exception as e:
-                logging.error(f"Link insertion attempt {attempt + 1}/3 failed: {e}")
+        # Split the summary into paragraphs
+        paragraphs = summary.split('\n')
+        if not paragraphs or all(not p.strip() for p in paragraphs):
+            logging.error("No valid paragraphs to insert link.")
+            return summary
+
+        # Choose a paragraph with at least two sentences
+        eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2]
+        if not eligible_paragraphs:
+            logging.warning("No paragraph with multiple sentences found, appending to last paragraph.")
+            target_para = paragraphs[-1].strip()
+            link_pattern = f'<a href="{source_url}">{source_name}</a>'
+            new_para = f"{target_para} Source: {link_pattern}."
+            paragraphs[-1] = new_para
+            new_summary = '\n'.join(paragraphs)
+            logging.info(f"Appended link to summary: {new_summary!r}")
+            return new_summary
+
+        # Select a random eligible paragraph
+        target_para = random.choice(eligible_paragraphs)
+        sentences = re.split(r'(?<=[.!?])\s+', target_para.strip())
+        
+        # Find a sentence to insert the link into (prefer mid-paragraph sentences)
+        eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()]  # Exclude the last sentence
+        if not eligible_sentences:
+            eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()]  # Fall back to any sentence
         
-        logging.warning(f"GPT failed to insert link after 3 attempts. Using fallback.")
+        sentence_idx, sentence = random.choice(eligible_sentences)
+        link_pattern = f'<a href="{source_url}">{source_name}</a>'
+        
+        # Split the sentence into words and insert the link naturally
+        words = sentence.split()
+        if len(words) < 3:  # Ensure the sentence has enough words to split
+            # If the sentence is too short, append the attribution
+            new_sentence = f"{sentence} according to {link_pattern}."
+        else:
+            # Insert the link mid-sentence by splitting at a random point
+            split_point = random.randint(1, len(words)-2)  # Avoid splitting at the very start or end
+            new_sentence = f"{' '.join(words[:split_point])}, according to {link_pattern}, {' '.join(words[split_point:])}"
+        
+        # Reconstruct the paragraph with the modified sentence
+        sentences[sentence_idx] = new_sentence
+        new_para = ' '.join(sentences)
+        paragraphs[paragraphs.index(target_para)] = new_para
+        
+        new_summary = '\n'.join(paragraphs)
+        logging.info(f"Summary with naturally embedded link: {new_summary!r}")
+        return new_summary
+
     except Exception as e:
         logging.error(f"Link insertion failed: {e}")
-
-    # Fallback path
-    time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b'
-    protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary)
-    paragraphs = protected_summary.split('\n')
-    if not paragraphs or all(not p.strip() for p in paragraphs):
-        logging.error("No valid paragraphs to insert link.")
-        return summary
-    
-    target_para = random.choice([p for p in paragraphs if p.strip()])
-    link_pattern = f'<a href="{source_url}">{source_name}</a>'
-    phrases = [
-        f"According to {link_pattern}",
-        f"{link_pattern} notes this insight",
-        f"Details shared by {link_pattern}",
-        f"Source: {link_pattern}"
-    ]
-    insertion_phrase = random.choice(phrases)
-    
-    sentences = re.split(r'(?<=[.!?])\s+', target_para)
-    insertion_point = -1
-    for i, sent in enumerate(sentences):
-        if sent.strip() and '@' not in sent:
-            insertion_point = sum(len(s) + 1 for s in sentences[:i+1])
-            break
-    if insertion_point == -1:
-        insertion_point = len(target_para)
-    
-    new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip()
-    paragraphs[paragraphs.index(target_para)] = new_para
-    new_summary = '\n'.join(paragraphs)
-    
-    new_summary = new_summary.replace('@', '.')
-    logging.info(f"Fallback summary with link: {new_summary!r}")
-    return new_summary
+        # Fallback: append the link to the end of the summary
+        link_pattern = f'<a href="{source_url}">{source_name}</a>'
+        new_summary = f"{summary}\n\nSource: {link_pattern}."
+        logging.info(f"Fallback summary with link: {new_summary!r}")
+        return new_summary
 
 def generate_category_from_summary(summary):
     try:
@@ -725,7 +713,7 @@ def get_wp_tag_id(tag_name, wp_base_url, wp_username, wp_password):
         logging.error(f"Failed to get WP tag ID for '{tag_name}': {e}")
         return None
 
-def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, pixabay_url=None, interest_score=4, post_id=None, should_post_tweet=True):
+def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, page_url=None, interest_score=4, post_id=None, should_post_tweet=True):
     wp_base_url = "https://insiderfoodie.com/wp-json/wp/v2"
     logging.info(f"Starting post_to_wp for '{post_data['title']}', image_source: {image_source}")
     
@@ -772,6 +760,15 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
             content = "Content unavailable. Check the original source for details."
         formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip())
         
+        # Append image attribution to the content to ensure visibility
+        if image_url and image_source:
+            attribution = f"Image Source: {image_source}"
+            if page_url and uploader:
+                attribution = f'Image Source: <a href="{page_url}">{image_source}</a> by {uploader}'
+            elif page_url:
+                attribution = f'Image Source: <a href="{page_url}">{image_source}</a>'
+            formatted_content += f"\n<p>{attribution}</p>"
+        
         author_id_map = {
             "owenjohnson": 10,
             "javiermorales": 2,
@@ -786,13 +783,13 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
         image_id = None
         if image_url:
             logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}")
-            image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url)
+            image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url)
             if not image_id:
                 logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay")
                 pixabay_query = post_data["title"][:50]
-                image_url, image_source, uploader, pixabay_url = get_image(pixabay_query)
+                image_url, image_source, uploader, page_url = get_image(pixabay_query)
                 if image_url:
-                    image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url)
+                    image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url)
             if not image_id:
                 logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image")
         
@@ -1164,22 +1161,11 @@ def prepare_post_data(summary, title, main_topic=None):
     try:
         logging.info(f"Preparing post data for summary: {summary[:100]}...")
         
-        prompt = (
-            "Generate a concise, engaging title (5-15 words) for this food-related article summary. "
-            "The title should be catchy, avoid emojis, and not reproduce the original title verbatim. "
-            "Return the title as plain text."
-        )
-        
-        response = client.chat.completions.create(
-            model=LIGHT_TASK_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": summary}
-            ],
-            max_tokens=50,
-            temperature=0.7
-        )
-        new_title = response.choices[0].message.content.strip()
+        # Use the original generate_title_from_summary function to generate the title
+        new_title = generate_title_from_summary(summary)
+        if not new_title:
+            logging.warning("Title generation failed, using fallback title")
+            new_title = "A Tasty Food Discovery Awaits You"
         logging.info(f"Generated new title: '{new_title}'")
         
         # Update to unpack four values
@@ -1199,8 +1185,6 @@ def prepare_post_data(summary, title, main_topic=None):
             logging.warning("No image found for post, skipping")
             return None, None, None, None, None, None, None
         
-        pixabay_url = page_url if image_source == "Pixabay" else None
-        
         # Select a full author dictionary from AUTHORS (already imported from foodie_config)
         author = random.choice(AUTHORS)
         
@@ -1216,7 +1200,7 @@ def prepare_post_data(summary, title, main_topic=None):
         }
         
         logging.info(f"Post data prepared: Title: '{new_title}', Category: {category}, Author: {author['username']}")
-        return post_data, author, category, image_url, image_source, uploader, pixabay_url
+        return post_data, author, category, image_url, image_source, uploader, page_url
     
     except Exception as e:
         logging.error(f"Failed to prepare post data: {e}")