new specific_term functionality
This commit is contained in:
@@ -313,7 +313,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
@@ -365,10 +365,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
||||
"categories": [generate_category_from_summary(final_summary)]
|
||||
}
|
||||
category = post_data["categories"][0]
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||
image_source = None
|
||||
|
||||
@@ -380,7 +380,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
|
||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
@@ -434,10 +434,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
|
||||
"categories": [generate_category_from_summary(final_summary)]
|
||||
}
|
||||
category = post_data["categories"][0]
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||
image_source = None
|
||||
|
||||
@@ -292,7 +292,7 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
|
||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||
|
||||
try:
|
||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||
attempts += 1
|
||||
@@ -344,10 +344,10 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
|
||||
"categories": [generate_category_from_summary(final_summary)]
|
||||
}
|
||||
category = post_data["categories"][0]
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||
if not image_url:
|
||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||
image_source = None
|
||||
|
||||
+74
-17
@@ -359,13 +359,17 @@ def smart_image_and_filter(title, summary):
|
||||
content = f"{title}\n\n{summary}"
|
||||
|
||||
prompt = (
|
||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
||||
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
|
||||
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, contains '[homemade]' or 'homemade', "
|
||||
"or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. "
|
||||
"Analyze this article title and summary. Perform the following tasks:\n"
|
||||
"1. Extract the most specific and defining term (e.g., a proper noun like 'Ozempic', a unique concept like 'GLP-1', or a niche topic like 'Sushi') that makes the article distinct.\n"
|
||||
"2. Generate a concise image search query (3-7 words) that MUST include the most specific term from step 1, combined with relevant contextual keywords (e.g., 'dining', 'trends').\n"
|
||||
"3. Identify the main topic of the article (e.g., a specific food item or cuisine).\n"
|
||||
"4. List relevance keywords (up to 5) for the image search, including the specific term and related concepts.\n"
|
||||
"5. Determine if the article should be skipped based on these rules:\n"
|
||||
" - SKIP if about home appliances, recipes, promotions, or contains '[homemade]' or 'homemade'.\n"
|
||||
" - SKIP if it includes recipe-related terms like 'cook', 'bake', or 'ingredient'.\n"
|
||||
" - KEEP otherwise.\n"
|
||||
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
|
||||
"{\"image_query\": \"Ozempic dining trends\", \"specific_term\": \"Ozempic\", \"relevance\": [\"Ozempic\", \"dining\", \"trends\"], \"main_topic\": \"dining trends\", \"action\": \"KEEP\"})."
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
@@ -374,7 +378,7 @@ def smart_image_and_filter(title, summary):
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": content}
|
||||
],
|
||||
max_tokens=100
|
||||
max_tokens=150
|
||||
)
|
||||
raw_result = response.choices[0].message.content.strip()
|
||||
logging.debug(f"Raw GPT response: '{raw_result}'")
|
||||
@@ -384,11 +388,11 @@ def smart_image_and_filter(title, summary):
|
||||
|
||||
try:
|
||||
result = json.loads(fixed_result)
|
||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
||||
if not isinstance(result, dict) or "image_query" not in result or "specific_term" not in result or "relevance" not in result or "action" not in result:
|
||||
logging.warning(f"Invalid GPT response format: {result}, checking action before fallback")
|
||||
if isinstance(result, dict) and result.get("action") == "SKIP":
|
||||
logging.info(f"Respecting AI SKIP action for '{title}'")
|
||||
return extract_main_topic(title.lower() + " " + summary.lower()), ["food"], "food", True
|
||||
return "food trends", ["food"], "food", True
|
||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||
skip_flag = (
|
||||
"[homemade]" in title.lower() or
|
||||
@@ -417,17 +421,18 @@ def smart_image_and_filter(title, summary):
|
||||
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||
|
||||
image_query = result["image_query"]
|
||||
specific_term = result["specific_term"]
|
||||
relevance_keywords = result["relevance"]
|
||||
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
||||
skip_flag = (
|
||||
result["action"] == "SKIP" or
|
||||
result["aison"] == "SKIP" or
|
||||
"[homemade]" in title.lower() or
|
||||
"homemade" in title.lower() or
|
||||
"homemade" in summary.lower() or
|
||||
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
||||
)
|
||||
|
||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
|
||||
logging.info(f"Smart image query: {image_query}, Specific Term: {specific_term}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
|
||||
f"Reasons: action={result['action']}, "
|
||||
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
|
||||
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||
@@ -441,7 +446,7 @@ def smart_image_and_filter(title, summary):
|
||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||
|
||||
return image_query, relevance_keywords, main_topic, skip_flag
|
||||
return image_query, relevance_keywords, main_topic, skip_flag, specific_term
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||
@@ -456,7 +461,7 @@ def smart_image_and_filter(title, summary):
|
||||
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
|
||||
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
||||
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||
return main_topic, [main_topic, "food"], main_topic, skip_flag, "food"
|
||||
|
||||
def extract_main_topic(text):
|
||||
# Common food-related keywords (expand as needed)
|
||||
@@ -1170,7 +1175,7 @@ def classify_keywords(keywords):
|
||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||
return {kw: "specific" for kw in keywords}
|
||||
|
||||
def get_flickr_image(search_query, relevance_keywords, main_topic):
|
||||
def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term=None):
|
||||
global used_images
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1260,9 +1265,9 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
|
||||
except Exception as e:
|
||||
logger.warning(f"DDG search failed for '{ddg_query}': {e}")
|
||||
|
||||
# Step 2: Fallback to Pixabay
|
||||
# Step 2: Fallback to Pixabay with specific term
|
||||
logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'")
|
||||
image_url, source_name, uploader, page_url = get_image(search_query)
|
||||
image_url, source_name, uploader, page_url = get_image(search_query, specific_term)
|
||||
if image_url:
|
||||
used_images.add(image_url)
|
||||
save_used_images()
|
||||
@@ -1272,7 +1277,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
|
||||
logger.warning(f"No valid images found for query '{search_query}'")
|
||||
return None, None, None, None
|
||||
|
||||
def get_image(search_query):
|
||||
def get_image(search_query, specific_term=None):
|
||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||
|
||||
def process_image(image_url, source_name, page_url):
|
||||
@@ -1309,6 +1314,58 @@ def get_image(search_query):
|
||||
logger.warning(f"Failed to process Pixabay image {image_url}: {e}")
|
||||
return None
|
||||
|
||||
def fetch_pixabay_image(query):
|
||||
try:
|
||||
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
|
||||
response = requests.get(pixabay_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
for hit in data.get('hits', []):
|
||||
img_url = hit.get('largeImageURL')
|
||||
if not img_url or img_url in used_images:
|
||||
continue
|
||||
|
||||
uploader = hit.get('user', 'Unknown')
|
||||
page_url = hit.get('pageURL', img_url)
|
||||
|
||||
# Process the image for watermarks and resolution
|
||||
result = process_image(img_url, "Pixabay", page_url)
|
||||
if result:
|
||||
image_url, source_name, page_url, width, height = result
|
||||
used_images.add(img_url)
|
||||
save_used_images()
|
||||
logger.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{query}' ({width}x{height})")
|
||||
return image_url, source_name, uploader, page_url
|
||||
|
||||
logger.info(f"No valid Pixabay image found for query '{query}'. Trying fallback query.")
|
||||
return None, None, None, None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Pixabay image fetch failed for query '{query}': {e}")
|
||||
return None, None, None, None
|
||||
|
||||
# Try with the original query
|
||||
image_url, source_name, uploader, page_url = fetch_pixabay_image(search_query)
|
||||
if image_url:
|
||||
return image_url, source_name, uploader, page_url
|
||||
|
||||
# Fallback to a dynamic query using the specific term if provided
|
||||
if specific_term:
|
||||
fallback_query = f"{specific_term} dining trends"
|
||||
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
|
||||
if image_url:
|
||||
return image_url, source_name, uploader, page_url
|
||||
|
||||
# Final fallback to a generic query
|
||||
fallback_query = "food dining trends"
|
||||
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
|
||||
if image_url:
|
||||
return image_url, source_name, uploader, page_url
|
||||
|
||||
logger.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
|
||||
return None, None, None, None
|
||||
|
||||
def fetch_pixabay_image(query):
|
||||
try:
|
||||
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
|
||||
|
||||
Reference in New Issue
Block a user