new specific_term functionality

main
Shane 7 months ago
parent 3b1b030025
commit 9870d276a3
  1. 6
      foodie_automator_google.py
  2. 6
      foodie_automator_reddit.py
  3. 6
      foodie_automator_rss.py
  4. 91
      foodie_utils.py

@ -313,7 +313,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
logging.info(f"Trying Google Trend: {title} from {source_name}")
try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
@ -365,10 +365,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query)
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None

@ -380,7 +380,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
logging.info(f"Trying Reddit Post: {title} from {source_name}")
try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
@ -434,10 +434,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query)
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None

@ -292,7 +292,7 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
logging.info(f"Trying RSS Article: {title} from {source_name}")
try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
except Exception as e:
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
@ -344,10 +344,10 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
"categories": [generate_category_from_summary(final_summary)]
}
category = post_data["categories"][0]
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
if not image_url:
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query)
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
if not image_url:
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None

@ -359,13 +359,17 @@ def smart_image_and_filter(title, summary):
content = f"{title}\n\n{summary}"
prompt = (
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
"Return 'SKIP' if the article is about home appliances, recipes, promotions, contains '[homemade]' or 'homemade', "
"or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. "
"Analyze this article title and summary. Perform the following tasks:\n"
"1. Extract the most specific and defining term (e.g., a proper noun like 'Ozempic', a unique concept like 'GLP-1', or a niche topic like 'Sushi') that makes the article distinct.\n"
"2. Generate a concise image search query (3-7 words) that MUST include the most specific term from step 1, combined with relevant contextual keywords (e.g., 'dining', 'trends').\n"
"3. Identify the main topic of the article (e.g., a specific food item or cuisine).\n"
"4. List relevance keywords (up to 5) for the image search, including the specific term and related concepts.\n"
"5. Determine if the article should be skipped based on these rules:\n"
" - SKIP if about home appliances, recipes, promotions, or contains '[homemade]' or 'homemade'.\n"
" - SKIP if it includes recipe-related terms like 'cook', 'bake', or 'ingredient'.\n"
" - KEEP otherwise.\n"
"Return as JSON with double quotes for all property names and string values (e.g., "
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
"{\"image_query\": \"Ozempic dining trends\", \"specific_term\": \"Ozempic\", \"relevance\": [\"Ozempic\", \"dining\", \"trends\"], \"main_topic\": \"dining trends\", \"action\": \"KEEP\"})."
)
response = client.chat.completions.create(
@ -374,7 +378,7 @@ def smart_image_and_filter(title, summary):
{"role": "system", "content": prompt},
{"role": "user", "content": content}
],
max_tokens=100
max_tokens=150
)
raw_result = response.choices[0].message.content.strip()
logging.debug(f"Raw GPT response: '{raw_result}'")
@ -384,11 +388,11 @@ def smart_image_and_filter(title, summary):
try:
result = json.loads(fixed_result)
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
if not isinstance(result, dict) or "image_query" not in result or "specific_term" not in result or "relevance" not in result or "action" not in result:
logging.warning(f"Invalid GPT response format: {result}, checking action before fallback")
if isinstance(result, dict) and result.get("action") == "SKIP":
logging.info(f"Respecting AI SKIP action for '{title}'")
return extract_main_topic(title.lower() + " " + summary.lower()), ["food"], "food", True
return "food trends", ["food"], "food", True
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
skip_flag = (
"[homemade]" in title.lower() or
@ -417,17 +421,18 @@ def smart_image_and_filter(title, summary):
return main_topic, [main_topic, "food"], main_topic, skip_flag
image_query = result["image_query"]
specific_term = result["specific_term"]
relevance_keywords = result["relevance"]
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
skip_flag = (
result["action"] == "SKIP" or
result["aison"] == "SKIP" or
"[homemade]" in title.lower() or
"homemade" in title.lower() or
"homemade" in summary.lower() or
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
)
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
logging.info(f"Smart image query: {image_query}, Specific Term: {specific_term}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
f"Reasons: action={result['action']}, "
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, "
@ -441,7 +446,7 @@ def smart_image_and_filter(title, summary):
logging.warning(f"Image query '{image_query}' too vague, using fallback")
return main_topic, [main_topic, "food"], main_topic, skip_flag
return image_query, relevance_keywords, main_topic, skip_flag
return image_query, relevance_keywords, main_topic, skip_flag, specific_term
except Exception as e:
logging.error(f"Smart image/filter failed: {e}, using fallback")
@ -456,7 +461,7 @@ def smart_image_and_filter(title, summary):
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, "
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
return main_topic, [main_topic, "food"], main_topic, skip_flag
return main_topic, [main_topic, "food"], main_topic, skip_flag, "food"
def extract_main_topic(text):
# Common food-related keywords (expand as needed)
@ -1170,7 +1175,7 @@ def classify_keywords(keywords):
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
return {kw: "specific" for kw in keywords}
def get_flickr_image(search_query, relevance_keywords, main_topic):
def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term=None):
global used_images
logger = logging.getLogger(__name__)
@ -1260,9 +1265,9 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
except Exception as e:
logger.warning(f"DDG search failed for '{ddg_query}': {e}")
# Step 2: Fallback to Pixabay
# Step 2: Fallback to Pixabay with specific term
logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'")
image_url, source_name, uploader, page_url = get_image(search_query)
image_url, source_name, uploader, page_url = get_image(search_query, specific_term)
if image_url:
used_images.add(image_url)
save_used_images()
@ -1272,7 +1277,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
logger.warning(f"No valid images found for query '{search_query}'")
return None, None, None, None
def get_image(search_query):
def get_image(search_query, specific_term=None):
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
def process_image(image_url, source_name, page_url):
@ -1309,6 +1314,58 @@ def get_image(search_query):
logger.warning(f"Failed to process Pixabay image {image_url}: {e}")
return None
def fetch_pixabay_image(query):
try:
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
response = requests.get(pixabay_url, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
for hit in data.get('hits', []):
img_url = hit.get('largeImageURL')
if not img_url or img_url in used_images:
continue
uploader = hit.get('user', 'Unknown')
page_url = hit.get('pageURL', img_url)
# Process the image for watermarks and resolution
result = process_image(img_url, "Pixabay", page_url)
if result:
image_url, source_name, page_url, width, height = result
used_images.add(img_url)
save_used_images()
logger.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{query}' ({width}x{height})")
return image_url, source_name, uploader, page_url
logger.info(f"No valid Pixabay image found for query '{query}'. Trying fallback query.")
return None, None, None, None
except Exception as e:
logger.warning(f"Pixabay image fetch failed for query '{query}': {e}")
return None, None, None, None
# Try with the original query
image_url, source_name, uploader, page_url = fetch_pixabay_image(search_query)
if image_url:
return image_url, source_name, uploader, page_url
# Fallback to a dynamic query using the specific term if provided
if specific_term:
fallback_query = f"{specific_term} dining trends"
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
if image_url:
return image_url, source_name, uploader, page_url
# Final fallback to a generic query
fallback_query = "food dining trends"
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
if image_url:
return image_url, source_name, uploader, page_url
logger.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
return None, None, None, None
def fetch_pixabay_image(query):
try:
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"

Loading…
Cancel
Save