new specific_term functionality

main
Shane 7 months ago
parent 3b1b030025
commit 9870d276a3
  1. 6
      foodie_automator_google.py
  2. 6
      foodie_automator_reddit.py
  3. 6
      foodie_automator_rss.py
  4. 91
      foodie_utils.py

@ -313,7 +313,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
logging.info(f"Trying Google Trend: {title} from {source_name}") logging.info(f"Trying Google Trend: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
@ -365,10 +365,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
"categories": [generate_category_from_summary(final_summary)] "categories": [generate_category_from_summary(final_summary)]
} }
category = post_data["categories"][0] category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url: if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url: if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None image_source = None

@ -380,7 +380,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
logging.info(f"Trying Reddit Post: {title} from {source_name}") logging.info(f"Trying Reddit Post: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
@ -434,10 +434,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
"categories": [generate_category_from_summary(final_summary)] "categories": [generate_category_from_summary(final_summary)]
} }
category = post_data["categories"][0] category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url: if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url: if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None image_source = None

@ -292,7 +292,7 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
logging.info(f"Trying RSS Article: {title} from {source_name}") logging.info(f"Trying RSS Article: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
@ -344,10 +344,10 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
"categories": [generate_category_from_summary(final_summary)] "categories": [generate_category_from_summary(final_summary)]
} }
category = post_data["categories"][0] category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url: if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url: if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image") logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None image_source = None

@ -359,13 +359,17 @@ def smart_image_and_filter(title, summary):
content = f"{title}\n\n{summary}" content = f"{title}\n\n{summary}"
prompt = ( prompt = (
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " "Analyze this article title and summary. Perform the following tasks:\n"
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, " "1. Extract the most specific and defining term (e.g., a proper noun like 'Ozempic', a unique concept like 'GLP-1', or a niche topic like 'Sushi') that makes the article distinct.\n"
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). " "2. Generate a concise image search query (3-7 words) that MUST include the most specific term from step 1, combined with relevant contextual keywords (e.g., 'dining', 'trends').\n"
"Return 'SKIP' if the article is about home appliances, recipes, promotions, contains '[homemade]' or 'homemade', " "3. Identify the main topic of the article (e.g., a specific food item or cuisine).\n"
"or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. " "4. List relevance keywords (up to 5) for the image search, including the specific term and related concepts.\n"
"5. Determine if the article should be skipped based on these rules:\n"
" - SKIP if about home appliances, recipes, promotions, or contains '[homemade]' or 'homemade'.\n"
" - SKIP if it includes recipe-related terms like 'cook', 'bake', or 'ingredient'.\n"
" - KEEP otherwise.\n"
"Return as JSON with double quotes for all property names and string values (e.g., " "Return as JSON with double quotes for all property names and string values (e.g., "
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})." "{\"image_query\": \"Ozempic dining trends\", \"specific_term\": \"Ozempic\", \"relevance\": [\"Ozempic\", \"dining\", \"trends\"], \"main_topic\": \"dining trends\", \"action\": \"KEEP\"})."
) )
response = client.chat.completions.create( response = client.chat.completions.create(
@ -374,7 +378,7 @@ def smart_image_and_filter(title, summary):
{"role": "system", "content": prompt}, {"role": "system", "content": prompt},
{"role": "user", "content": content} {"role": "user", "content": content}
], ],
max_tokens=100 max_tokens=150
) )
raw_result = response.choices[0].message.content.strip() raw_result = response.choices[0].message.content.strip()
logging.debug(f"Raw GPT response: '{raw_result}'") logging.debug(f"Raw GPT response: '{raw_result}'")
@ -384,11 +388,11 @@ def smart_image_and_filter(title, summary):
try: try:
result = json.loads(fixed_result) result = json.loads(fixed_result)
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: if not isinstance(result, dict) or "image_query" not in result or "specific_term" not in result or "relevance" not in result or "action" not in result:
logging.warning(f"Invalid GPT response format: {result}, checking action before fallback") logging.warning(f"Invalid GPT response format: {result}, checking action before fallback")
if isinstance(result, dict) and result.get("action") == "SKIP": if isinstance(result, dict) and result.get("action") == "SKIP":
logging.info(f"Respecting AI SKIP action for '{title}'") logging.info(f"Respecting AI SKIP action for '{title}'")
return extract_main_topic(title.lower() + " " + summary.lower()), ["food"], "food", True return "food trends", ["food"], "food", True
main_topic = extract_main_topic(title.lower() + " " + summary.lower()) main_topic = extract_main_topic(title.lower() + " " + summary.lower())
skip_flag = ( skip_flag = (
"[homemade]" in title.lower() or "[homemade]" in title.lower() or
@ -417,17 +421,18 @@ def smart_image_and_filter(title, summary):
return main_topic, [main_topic, "food"], main_topic, skip_flag return main_topic, [main_topic, "food"], main_topic, skip_flag
image_query = result["image_query"] image_query = result["image_query"]
specific_term = result["specific_term"]
relevance_keywords = result["relevance"] relevance_keywords = result["relevance"]
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower())) main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
skip_flag = ( skip_flag = (
result["action"] == "SKIP" or result["aison"] == "SKIP" or
"[homemade]" in title.lower() or "[homemade]" in title.lower() or
"homemade" in title.lower() or "homemade" in title.lower() or
"homemade" in summary.lower() or "homemade" in summary.lower() or
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS) any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
) )
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, " logging.info(f"Smart image query: {image_query}, Specific Term: {specific_term}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
f"Reasons: action={result['action']}, " f"Reasons: action={result['action']}, "
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, " f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, " f"homemade_in_summary={'homemade' in summary.lower()}, "
@ -441,7 +446,7 @@ def smart_image_and_filter(title, summary):
logging.warning(f"Image query '{image_query}' too vague, using fallback") logging.warning(f"Image query '{image_query}' too vague, using fallback")
return main_topic, [main_topic, "food"], main_topic, skip_flag return main_topic, [main_topic, "food"], main_topic, skip_flag
return image_query, relevance_keywords, main_topic, skip_flag return image_query, relevance_keywords, main_topic, skip_flag, specific_term
except Exception as e: except Exception as e:
logging.error(f"Smart image/filter failed: {e}, using fallback") logging.error(f"Smart image/filter failed: {e}, using fallback")
@ -456,7 +461,7 @@ def smart_image_and_filter(title, summary):
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, " f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, " f"homemade_in_summary={'homemade' in summary.lower()}, "
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}") f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
return main_topic, [main_topic, "food"], main_topic, skip_flag return main_topic, [main_topic, "food"], main_topic, skip_flag, "food"
def extract_main_topic(text): def extract_main_topic(text):
# Common food-related keywords (expand as needed) # Common food-related keywords (expand as needed)
@ -1170,7 +1175,7 @@ def classify_keywords(keywords):
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.") logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
return {kw: "specific" for kw in keywords} return {kw: "specific" for kw in keywords}
def get_flickr_image(search_query, relevance_keywords, main_topic): def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term=None):
global used_images global used_images
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -1260,9 +1265,9 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
except Exception as e: except Exception as e:
logger.warning(f"DDG search failed for '{ddg_query}': {e}") logger.warning(f"DDG search failed for '{ddg_query}': {e}")
# Step 2: Fallback to Pixabay # Step 2: Fallback to Pixabay with specific term
logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'") logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'")
image_url, source_name, uploader, page_url = get_image(search_query) image_url, source_name, uploader, page_url = get_image(search_query, specific_term)
if image_url: if image_url:
used_images.add(image_url) used_images.add(image_url)
save_used_images() save_used_images()
@ -1272,7 +1277,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
logger.warning(f"No valid images found for query '{search_query}'") logger.warning(f"No valid images found for query '{search_query}'")
return None, None, None, None return None, None, None, None
def get_image(search_query): def get_image(search_query, specific_term=None):
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'} headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
def process_image(image_url, source_name, page_url): def process_image(image_url, source_name, page_url):
@ -1309,6 +1314,58 @@ def get_image(search_query):
logger.warning(f"Failed to process Pixabay image {image_url}: {e}") logger.warning(f"Failed to process Pixabay image {image_url}: {e}")
return None return None
def fetch_pixabay_image(query):
try:
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
response = requests.get(pixabay_url, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
for hit in data.get('hits', []):
img_url = hit.get('largeImageURL')
if not img_url or img_url in used_images:
continue
uploader = hit.get('user', 'Unknown')
page_url = hit.get('pageURL', img_url)
# Process the image for watermarks and resolution
result = process_image(img_url, "Pixabay", page_url)
if result:
image_url, source_name, page_url, width, height = result
used_images.add(img_url)
save_used_images()
logger.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{query}' ({width}x{height})")
return image_url, source_name, uploader, page_url
logger.info(f"No valid Pixabay image found for query '{query}'. Trying fallback query.")
return None, None, None, None
except Exception as e:
logger.warning(f"Pixabay image fetch failed for query '{query}': {e}")
return None, None, None, None
# Try with the original query
image_url, source_name, uploader, page_url = fetch_pixabay_image(search_query)
if image_url:
return image_url, source_name, uploader, page_url
# Fallback to a dynamic query using the specific term if provided
if specific_term:
fallback_query = f"{specific_term} dining trends"
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
if image_url:
return image_url, source_name, uploader, page_url
# Final fallback to a generic query
fallback_query = "food dining trends"
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
if image_url:
return image_url, source_name, uploader, page_url
logger.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
return None, None, None, None
def fetch_pixabay_image(query): def fetch_pixabay_image(query):
try: try:
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20" pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"

Loading…
Cancel
Save