From ccddefbc8bda2ac05f594a6912d93212005e1282 Mon Sep 17 00:00:00 2001 From: Shane Date: Sun, 4 May 2025 12:44:50 +1000 Subject: [PATCH] try --- foodie_utils.py | 176 ++++++++++++++++++++++-------------------------- 1 file changed, 80 insertions(+), 96 deletions(-) diff --git a/foodie_utils.py b/foodie_utils.py index 4ba722a..57d6529 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -407,7 +407,7 @@ def extract_main_topic(text): # Fallback to a generic term if no specific food item is found return "food trends" -def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None): +def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, page_url=None): try: safe_title = post_title.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')[:50] headers = { @@ -450,7 +450,13 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw response.raise_for_status() image_id = response.json()["id"] - caption = f'{image_source} by {uploader}' if pixabay_url and uploader else image_source + # Always include a clickable link and uploader if available + if page_url and uploader: + caption = f'{image_source} by {uploader}' + elif page_url: + caption = f'{image_source}' + else: + caption = image_source requests.post( f"{wp_base_url}/media/{image_id}", headers={"Authorization": headers["Authorization"], "Content-Type": "application/json"}, @@ -584,80 +590,62 @@ def insert_link_naturally(summary, source_name, source_url): try: logging.info(f"Input summary to insert_link_naturally: {summary!r}") - prompt = ( - "Take this summary and insert a single HTML link into one paragraph (randomly chosen). " - "Use the format '{source_name}' and weave it into the text naturally, " - "e.g., 'According to {source_name}, ' or '{source_name} shares that '. " - "Place the link at the end of a sentence (after a period). " - "Preserve the original paragraph structure, maintaining all newlines exactly as they are (each paragraph separated by a single \\n). " - "Return the modified summary with exactly one link.\n\n" - "Summary:\n{summary}\n\n" - "Source Name: {source_name}\nSource URL: {source_url}" - ).format(summary=summary, source_name=source_name, source_url=source_url) - - # Add retry mechanism - for attempt in range(3): - try: - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": prompt}, - {"role": "user", "content": "Insert the link naturally into the summary."} - ], - max_tokens=1000, - temperature=0.7 - ) - new_summary = response.choices[0].message.content.strip() - link_pattern = f'{source_name}' - if new_summary and new_summary.count(link_pattern) == 1: - paragraphs = new_summary.split('\n') - paragraphs = [p.strip() for p in paragraphs] - new_summary = '\n'.join(paragraphs) - logging.info(f"Summary with naturally embedded link (normalized): {new_summary!r}") - return new_summary - else: - logging.warning(f"GPT attempt {attempt + 1}/3 failed to insert link correctly: {new_summary}") - except Exception as e: - logging.error(f"Link insertion attempt {attempt + 1}/3 failed: {e}") + # Split the summary into paragraphs + paragraphs = summary.split('\n') + if not paragraphs or all(not p.strip() for p in paragraphs): + logging.error("No valid paragraphs to insert link.") + return summary + + # Choose a paragraph with at least two sentences + eligible_paragraphs = [p for p in paragraphs if p.strip() and len(re.split(r'(?<=[.!?])\s+', p.strip())) >= 2] + if not eligible_paragraphs: + logging.warning("No paragraph with multiple sentences found, appending to last paragraph.") + target_para = paragraphs[-1].strip() + link_pattern = f'{source_name}' + new_para = f"{target_para} Source: {link_pattern}." + paragraphs[-1] = new_para + new_summary = '\n'.join(paragraphs) + logging.info(f"Appended link to summary: {new_summary!r}") + return new_summary + + # Select a random eligible paragraph + target_para = random.choice(eligible_paragraphs) + sentences = re.split(r'(?<=[.!?])\s+', target_para.strip()) + + # Find a sentence to insert the link into (prefer mid-paragraph sentences) + eligible_sentences = [(i, s) for i, s in enumerate(sentences) if i < len(sentences)-1 and s.strip()] # Exclude the last sentence + if not eligible_sentences: + eligible_sentences = [(i, s) for i, s in enumerate(sentences) if s.strip()] # Fall back to any sentence - logging.warning(f"GPT failed to insert link after 3 attempts. Using fallback.") + sentence_idx, sentence = random.choice(eligible_sentences) + link_pattern = f'{source_name}' + + # Split the sentence into words and insert the link naturally + words = sentence.split() + if len(words) < 3: # Ensure the sentence has enough words to split + # If the sentence is too short, append the attribution + new_sentence = f"{sentence} according to {link_pattern}." + else: + # Insert the link mid-sentence by splitting at a random point + split_point = random.randint(1, len(words)-2) # Avoid splitting at the very start or end + new_sentence = f"{' '.join(words[:split_point])}, according to {link_pattern}, {' '.join(words[split_point:])}" + + # Reconstruct the paragraph with the modified sentence + sentences[sentence_idx] = new_sentence + new_para = ' '.join(sentences) + paragraphs[paragraphs.index(target_para)] = new_para + + new_summary = '\n'.join(paragraphs) + logging.info(f"Summary with naturally embedded link: {new_summary!r}") + return new_summary + except Exception as e: logging.error(f"Link insertion failed: {e}") - - # Fallback path - time_pattern = r'\b\d{1,2}\.\d{2}(?:am|pm)\b' - protected_summary = re.sub(time_pattern, lambda m: m.group(0).replace('.', '@'), summary) - paragraphs = protected_summary.split('\n') - if not paragraphs or all(not p.strip() for p in paragraphs): - logging.error("No valid paragraphs to insert link.") - return summary - - target_para = random.choice([p for p in paragraphs if p.strip()]) - link_pattern = f'{source_name}' - phrases = [ - f"According to {link_pattern}", - f"{link_pattern} notes this insight", - f"Details shared by {link_pattern}", - f"Source: {link_pattern}" - ] - insertion_phrase = random.choice(phrases) - - sentences = re.split(r'(?<=[.!?])\s+', target_para) - insertion_point = -1 - for i, sent in enumerate(sentences): - if sent.strip() and '@' not in sent: - insertion_point = sum(len(s) + 1 for s in sentences[:i+1]) - break - if insertion_point == -1: - insertion_point = len(target_para) - - new_para = f"{target_para[:insertion_point]} {insertion_phrase}. {target_para[insertion_point:]}".strip() - paragraphs[paragraphs.index(target_para)] = new_para - new_summary = '\n'.join(paragraphs) - - new_summary = new_summary.replace('@', '.') - logging.info(f"Fallback summary with link: {new_summary!r}") - return new_summary + # Fallback: append the link to the end of the summary + link_pattern = f'{source_name}' + new_summary = f"{summary}\n\nSource: {link_pattern}." + logging.info(f"Fallback summary with link: {new_summary!r}") + return new_summary def generate_category_from_summary(summary): try: @@ -725,7 +713,7 @@ def get_wp_tag_id(tag_name, wp_base_url, wp_username, wp_password): logging.error(f"Failed to get WP tag ID for '{tag_name}': {e}") return None -def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, pixabay_url=None, interest_score=4, post_id=None, should_post_tweet=True): +def post_to_wp(post_data, category, link, author, image_url, original_source, image_source="Pixabay", uploader=None, page_url=None, interest_score=4, post_id=None, should_post_tweet=True): wp_base_url = "https://insiderfoodie.com/wp-json/wp/v2" logging.info(f"Starting post_to_wp for '{post_data['title']}', image_source: {image_source}") @@ -772,6 +760,15 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im content = "Content unavailable. Check the original source for details." formatted_content = "\n".join(f"

{para}

" for para in content.split('\n') if para.strip()) + # Append image attribution to the content to ensure visibility + if image_url and image_source: + attribution = f"Image Source: {image_source}" + if page_url and uploader: + attribution = f'Image Source: {image_source} by {uploader}' + elif page_url: + attribution = f'Image Source: {image_source}' + formatted_content += f"\n

{attribution}

" + author_id_map = { "owenjohnson": 10, "javiermorales": 2, @@ -786,13 +783,13 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im image_id = None if image_url: logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") - image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) + image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url) if not image_id: logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay") pixabay_query = post_data["title"][:50] - image_url, image_source, uploader, pixabay_url = get_image(pixabay_query) + image_url, image_source, uploader, page_url = get_image(pixabay_query) if image_url: - image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) + image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, page_url) if not image_id: logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") @@ -1164,22 +1161,11 @@ def prepare_post_data(summary, title, main_topic=None): try: logging.info(f"Preparing post data for summary: {summary[:100]}...") - prompt = ( - "Generate a concise, engaging title (5-15 words) for this food-related article summary. " - "The title should be catchy, avoid emojis, and not reproduce the original title verbatim. " - "Return the title as plain text." - ) - - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": prompt}, - {"role": "user", "content": summary} - ], - max_tokens=50, - temperature=0.7 - ) - new_title = response.choices[0].message.content.strip() + # Use the original generate_title_from_summary function to generate the title + new_title = generate_title_from_summary(summary) + if not new_title: + logging.warning("Title generation failed, using fallback title") + new_title = "A Tasty Food Discovery Awaits You" logging.info(f"Generated new title: '{new_title}'") # Update to unpack four values @@ -1199,8 +1185,6 @@ def prepare_post_data(summary, title, main_topic=None): logging.warning("No image found for post, skipping") return None, None, None, None, None, None, None - pixabay_url = page_url if image_source == "Pixabay" else None - # Select a full author dictionary from AUTHORS (already imported from foodie_config) author = random.choice(AUTHORS) @@ -1216,7 +1200,7 @@ def prepare_post_data(summary, title, main_topic=None): } logging.info(f"Post data prepared: Title: '{new_title}', Category: {category}, Author: {author['username']}") - return post_data, author, category, image_url, image_source, uploader, pixabay_url + return post_data, author, category, image_url, image_source, uploader, page_url except Exception as e: logging.error(f"Failed to prepare post data: {e}")