From 9870d276a3cab1f7abf114d2dbc8f4345a816407 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 12 May 2025 23:20:13 +1000 Subject: [PATCH] new specific_term functionality --- foodie_automator_google.py | 6 +-- foodie_automator_reddit.py | 6 +-- foodie_automator_rss.py | 6 +-- foodie_utils.py | 91 +++++++++++++++++++++++++++++++------- 4 files changed, 83 insertions(+), 26 deletions(-) diff --git a/foodie_automator_google.py b/foodie_automator_google.py index 1df87b9..fcbc162 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -313,7 +313,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat logging.info(f"Trying Google Trend: {title} from {source_name}") try: - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary) except Exception as e: logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 @@ -365,10 +365,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat "categories": [generate_category_from_summary(final_summary)] } category = post_data["categories"][0] - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term) if not image_url: logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") - image_url, image_source, uploader, page_url = get_image(image_query) + image_url, image_source, uploader, page_url = get_image(image_query, specific_term) if not image_url: logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index 2d15cf8..95f507a 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -380,7 +380,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used logging.info(f"Trying Reddit Post: {title} from {source_name}") try: - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary) except Exception as e: logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 @@ -434,10 +434,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used "categories": [generate_category_from_summary(final_summary)] } category = post_data["categories"][0] - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term) if not image_url: logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") - image_url, image_source, uploader, page_url = get_image(image_query) + image_url, image_source, uploader, page_url = get_image(image_query, specific_term) if not image_url: logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index f55557f..e84c59d 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -292,7 +292,7 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im logging.info(f"Trying RSS Article: {title} from {source_name}") try: - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary) except Exception as e: logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 @@ -344,10 +344,10 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im "categories": [generate_category_from_summary(final_summary)] } category = post_data["categories"][0] - image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term) if not image_url: logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") - image_url, image_source, uploader, page_url = get_image(image_query) + image_url, image_source, uploader, page_url = get_image(image_query, specific_term) if not image_url: logging.warning(f"All image uploads failed for '{title}' - posting without image") image_source = None diff --git a/foodie_utils.py b/foodie_utils.py index 90c190b..49a1e6f 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -359,13 +359,17 @@ def smart_image_and_filter(title, summary): content = f"{title}\n\n{summary}" prompt = ( - "Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " - "for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, " - "otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). " - "Return 'SKIP' if the article is about home appliances, recipes, promotions, contains '[homemade]' or 'homemade', " - "or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. " + "Analyze this article title and summary. Perform the following tasks:\n" + "1. Extract the most specific and defining term (e.g., a proper noun like 'Ozempic', a unique concept like 'GLP-1', or a niche topic like 'Sushi') that makes the article distinct.\n" + "2. Generate a concise image search query (3-7 words) that MUST include the most specific term from step 1, combined with relevant contextual keywords (e.g., 'dining', 'trends').\n" + "3. Identify the main topic of the article (e.g., a specific food item or cuisine).\n" + "4. List relevance keywords (up to 5) for the image search, including the specific term and related concepts.\n" + "5. Determine if the article should be skipped based on these rules:\n" + " - SKIP if about home appliances, recipes, promotions, or contains '[homemade]' or 'homemade'.\n" + " - SKIP if it includes recipe-related terms like 'cook', 'bake', or 'ingredient'.\n" + " - KEEP otherwise.\n" "Return as JSON with double quotes for all property names and string values (e.g., " - "{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})." + "{\"image_query\": \"Ozempic dining trends\", \"specific_term\": \"Ozempic\", \"relevance\": [\"Ozempic\", \"dining\", \"trends\"], \"main_topic\": \"dining trends\", \"action\": \"KEEP\"})." ) response = client.chat.completions.create( @@ -374,7 +378,7 @@ def smart_image_and_filter(title, summary): {"role": "system", "content": prompt}, {"role": "user", "content": content} ], - max_tokens=100 + max_tokens=150 ) raw_result = response.choices[0].message.content.strip() logging.debug(f"Raw GPT response: '{raw_result}'") @@ -384,11 +388,11 @@ def smart_image_and_filter(title, summary): try: result = json.loads(fixed_result) - if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: + if not isinstance(result, dict) or "image_query" not in result or "specific_term" not in result or "relevance" not in result or "action" not in result: logging.warning(f"Invalid GPT response format: {result}, checking action before fallback") if isinstance(result, dict) and result.get("action") == "SKIP": logging.info(f"Respecting AI SKIP action for '{title}'") - return extract_main_topic(title.lower() + " " + summary.lower()), ["food"], "food", True + return "food trends", ["food"], "food", True main_topic = extract_main_topic(title.lower() + " " + summary.lower()) skip_flag = ( "[homemade]" in title.lower() or @@ -417,17 +421,18 @@ def smart_image_and_filter(title, summary): return main_topic, [main_topic, "food"], main_topic, skip_flag image_query = result["image_query"] + specific_term = result["specific_term"] relevance_keywords = result["relevance"] main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower())) skip_flag = ( - result["action"] == "SKIP" or + result["aison"] == "SKIP" or "[homemade]" in title.lower() or "homemade" in title.lower() or "homemade" in summary.lower() or any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS) ) - logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, " + logging.info(f"Smart image query: {image_query}, Specific Term: {specific_term}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, " f"Reasons: action={result['action']}, " f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, " f"homemade_in_summary={'homemade' in summary.lower()}, " @@ -441,7 +446,7 @@ def smart_image_and_filter(title, summary): logging.warning(f"Image query '{image_query}' too vague, using fallback") return main_topic, [main_topic, "food"], main_topic, skip_flag - return image_query, relevance_keywords, main_topic, skip_flag + return image_query, relevance_keywords, main_topic, skip_flag, specific_term except Exception as e: logging.error(f"Smart image/filter failed: {e}, using fallback") @@ -456,7 +461,7 @@ def smart_image_and_filter(title, summary): f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, " f"homemade_in_summary={'homemade' in summary.lower()}, " f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}") - return main_topic, [main_topic, "food"], main_topic, skip_flag + return main_topic, [main_topic, "food"], main_topic, skip_flag, "food" def extract_main_topic(text): # Common food-related keywords (expand as needed) @@ -1170,7 +1175,7 @@ def classify_keywords(keywords): logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.") return {kw: "specific" for kw in keywords} -def get_flickr_image(search_query, relevance_keywords, main_topic): +def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term=None): global used_images logger = logging.getLogger(__name__) @@ -1260,9 +1265,9 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): except Exception as e: logger.warning(f"DDG search failed for '{ddg_query}': {e}") - # Step 2: Fallback to Pixabay + # Step 2: Fallback to Pixabay with specific term logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'") - image_url, source_name, uploader, page_url = get_image(search_query) + image_url, source_name, uploader, page_url = get_image(search_query, specific_term) if image_url: used_images.add(image_url) save_used_images() @@ -1272,7 +1277,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): logger.warning(f"No valid images found for query '{search_query}'") return None, None, None, None -def get_image(search_query): +def get_image(search_query, specific_term=None): headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'} def process_image(image_url, source_name, page_url): @@ -1309,6 +1314,58 @@ def get_image(search_query): logger.warning(f"Failed to process Pixabay image {image_url}: {e}") return None + def fetch_pixabay_image(query): + try: + pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20" + response = requests.get(pixabay_url, headers=headers, timeout=10) + response.raise_for_status() + data = response.json() + + for hit in data.get('hits', []): + img_url = hit.get('largeImageURL') + if not img_url or img_url in used_images: + continue + + uploader = hit.get('user', 'Unknown') + page_url = hit.get('pageURL', img_url) + + # Process the image for watermarks and resolution + result = process_image(img_url, "Pixabay", page_url) + if result: + image_url, source_name, page_url, width, height = result + used_images.add(img_url) + save_used_images() + logger.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{query}' ({width}x{height})") + return image_url, source_name, uploader, page_url + + logger.info(f"No valid Pixabay image found for query '{query}'. Trying fallback query.") + return None, None, None, None + + except Exception as e: + logger.warning(f"Pixabay image fetch failed for query '{query}': {e}") + return None, None, None, None + + # Try with the original query + image_url, source_name, uploader, page_url = fetch_pixabay_image(search_query) + if image_url: + return image_url, source_name, uploader, page_url + + # Fallback to a dynamic query using the specific term if provided + if specific_term: + fallback_query = f"{specific_term} dining trends" + image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query) + if image_url: + return image_url, source_name, uploader, page_url + + # Final fallback to a generic query + fallback_query = "food dining trends" + image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query) + if image_url: + return image_url, source_name, uploader, page_url + + logger.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.") + return None, None, None, None + def fetch_pixabay_image(query): try: pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"