new specific_term functionality
This commit is contained in:
@@ -313,7 +313,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
|||||||
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
logging.info(f"Trying Google Trend: {title} from {source_name}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
@@ -365,10 +365,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
|
|||||||
"categories": [generate_category_from_summary(final_summary)]
|
"categories": [generate_category_from_summary(final_summary)]
|
||||||
}
|
}
|
||||||
category = post_data["categories"][0]
|
category = post_data["categories"][0]
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||||
image_source = None
|
image_source = None
|
||||||
|
|||||||
@@ -380,7 +380,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
|
|||||||
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
logging.info(f"Trying Reddit Post: {title} from {source_name}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
@@ -434,10 +434,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
|
|||||||
"categories": [generate_category_from_summary(final_summary)]
|
"categories": [generate_category_from_summary(final_summary)]
|
||||||
}
|
}
|
||||||
category = post_data["categories"][0]
|
category = post_data["categories"][0]
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||||
image_source = None
|
image_source = None
|
||||||
|
|||||||
@@ -292,7 +292,7 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
|
|||||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
@@ -344,10 +344,10 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
|
|||||||
"categories": [generate_category_from_summary(final_summary)]
|
"categories": [generate_category_from_summary(final_summary)]
|
||||||
}
|
}
|
||||||
category = post_data["categories"][0]
|
category = post_data["categories"][0]
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic, specific_term)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query, specific_term)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
logging.warning(f"All image uploads failed for '{title}' - posting without image")
|
||||||
image_source = None
|
image_source = None
|
||||||
|
|||||||
+74
-17
@@ -359,13 +359,17 @@ def smart_image_and_filter(title, summary):
|
|||||||
content = f"{title}\n\n{summary}"
|
content = f"{title}\n\n{summary}"
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
"Analyze this article title and summary. Perform the following tasks:\n"
|
||||||
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
|
"1. Extract the most specific and defining term (e.g., a proper noun like 'Ozempic', a unique concept like 'GLP-1', or a niche topic like 'Sushi') that makes the article distinct.\n"
|
||||||
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
"2. Generate a concise image search query (3-7 words) that MUST include the most specific term from step 1, combined with relevant contextual keywords (e.g., 'dining', 'trends').\n"
|
||||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, contains '[homemade]' or 'homemade', "
|
"3. Identify the main topic of the article (e.g., a specific food item or cuisine).\n"
|
||||||
"or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. "
|
"4. List relevance keywords (up to 5) for the image search, including the specific term and related concepts.\n"
|
||||||
|
"5. Determine if the article should be skipped based on these rules:\n"
|
||||||
|
" - SKIP if about home appliances, recipes, promotions, or contains '[homemade]' or 'homemade'.\n"
|
||||||
|
" - SKIP if it includes recipe-related terms like 'cook', 'bake', or 'ingredient'.\n"
|
||||||
|
" - KEEP otherwise.\n"
|
||||||
"Return as JSON with double quotes for all property names and string values (e.g., "
|
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||||
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
|
"{\"image_query\": \"Ozempic dining trends\", \"specific_term\": \"Ozempic\", \"relevance\": [\"Ozempic\", \"dining\", \"trends\"], \"main_topic\": \"dining trends\", \"action\": \"KEEP\"})."
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
@@ -374,7 +378,7 @@ def smart_image_and_filter(title, summary):
|
|||||||
{"role": "system", "content": prompt},
|
{"role": "system", "content": prompt},
|
||||||
{"role": "user", "content": content}
|
{"role": "user", "content": content}
|
||||||
],
|
],
|
||||||
max_tokens=100
|
max_tokens=150
|
||||||
)
|
)
|
||||||
raw_result = response.choices[0].message.content.strip()
|
raw_result = response.choices[0].message.content.strip()
|
||||||
logging.debug(f"Raw GPT response: '{raw_result}'")
|
logging.debug(f"Raw GPT response: '{raw_result}'")
|
||||||
@@ -384,11 +388,11 @@ def smart_image_and_filter(title, summary):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
result = json.loads(fixed_result)
|
result = json.loads(fixed_result)
|
||||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
if not isinstance(result, dict) or "image_query" not in result or "specific_term" not in result or "relevance" not in result or "action" not in result:
|
||||||
logging.warning(f"Invalid GPT response format: {result}, checking action before fallback")
|
logging.warning(f"Invalid GPT response format: {result}, checking action before fallback")
|
||||||
if isinstance(result, dict) and result.get("action") == "SKIP":
|
if isinstance(result, dict) and result.get("action") == "SKIP":
|
||||||
logging.info(f"Respecting AI SKIP action for '{title}'")
|
logging.info(f"Respecting AI SKIP action for '{title}'")
|
||||||
return extract_main_topic(title.lower() + " " + summary.lower()), ["food"], "food", True
|
return "food trends", ["food"], "food", True
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
skip_flag = (
|
skip_flag = (
|
||||||
"[homemade]" in title.lower() or
|
"[homemade]" in title.lower() or
|
||||||
@@ -417,17 +421,18 @@ def smart_image_and_filter(title, summary):
|
|||||||
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
|
|
||||||
image_query = result["image_query"]
|
image_query = result["image_query"]
|
||||||
|
specific_term = result["specific_term"]
|
||||||
relevance_keywords = result["relevance"]
|
relevance_keywords = result["relevance"]
|
||||||
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
||||||
skip_flag = (
|
skip_flag = (
|
||||||
result["action"] == "SKIP" or
|
result["aison"] == "SKIP" or
|
||||||
"[homemade]" in title.lower() or
|
"[homemade]" in title.lower() or
|
||||||
"homemade" in title.lower() or
|
"homemade" in title.lower() or
|
||||||
"homemade" in summary.lower() or
|
"homemade" in summary.lower() or
|
||||||
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
|
logging.info(f"Smart image query: {image_query}, Specific Term: {specific_term}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
|
||||||
f"Reasons: action={result['action']}, "
|
f"Reasons: action={result['action']}, "
|
||||||
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
|
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
|
||||||
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||||
@@ -441,7 +446,7 @@ def smart_image_and_filter(title, summary):
|
|||||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||||
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
|
|
||||||
return image_query, relevance_keywords, main_topic, skip_flag
|
return image_query, relevance_keywords, main_topic, skip_flag, specific_term
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||||
@@ -456,7 +461,7 @@ def smart_image_and_filter(title, summary):
|
|||||||
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
|
f"homemade_in_title={'[homemade]' in title.lower() or 'homemade' in title.lower()}, "
|
||||||
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||||
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
||||||
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
return main_topic, [main_topic, "food"], main_topic, skip_flag, "food"
|
||||||
|
|
||||||
def extract_main_topic(text):
|
def extract_main_topic(text):
|
||||||
# Common food-related keywords (expand as needed)
|
# Common food-related keywords (expand as needed)
|
||||||
@@ -1170,7 +1175,7 @@ def classify_keywords(keywords):
|
|||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||||
return {kw: "specific" for kw in keywords}
|
return {kw: "specific" for kw in keywords}
|
||||||
|
|
||||||
def get_flickr_image(search_query, relevance_keywords, main_topic):
|
def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term=None):
|
||||||
global used_images
|
global used_images
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -1260,9 +1265,9 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"DDG search failed for '{ddg_query}': {e}")
|
logger.warning(f"DDG search failed for '{ddg_query}': {e}")
|
||||||
|
|
||||||
# Step 2: Fallback to Pixabay
|
# Step 2: Fallback to Pixabay with specific term
|
||||||
logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'")
|
logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'")
|
||||||
image_url, source_name, uploader, page_url = get_image(search_query)
|
image_url, source_name, uploader, page_url = get_image(search_query, specific_term)
|
||||||
if image_url:
|
if image_url:
|
||||||
used_images.add(image_url)
|
used_images.add(image_url)
|
||||||
save_used_images()
|
save_used_images()
|
||||||
@@ -1272,7 +1277,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
|
|||||||
logger.warning(f"No valid images found for query '{search_query}'")
|
logger.warning(f"No valid images found for query '{search_query}'")
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
||||||
def get_image(search_query):
|
def get_image(search_query, specific_term=None):
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
def process_image(image_url, source_name, page_url):
|
def process_image(image_url, source_name, page_url):
|
||||||
@@ -1309,6 +1314,58 @@ def get_image(search_query):
|
|||||||
logger.warning(f"Failed to process Pixabay image {image_url}: {e}")
|
logger.warning(f"Failed to process Pixabay image {image_url}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def fetch_pixabay_image(query):
|
||||||
|
try:
|
||||||
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
|
||||||
|
response = requests.get(pixabay_url, headers=headers, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
for hit in data.get('hits', []):
|
||||||
|
img_url = hit.get('largeImageURL')
|
||||||
|
if not img_url or img_url in used_images:
|
||||||
|
continue
|
||||||
|
|
||||||
|
uploader = hit.get('user', 'Unknown')
|
||||||
|
page_url = hit.get('pageURL', img_url)
|
||||||
|
|
||||||
|
# Process the image for watermarks and resolution
|
||||||
|
result = process_image(img_url, "Pixabay", page_url)
|
||||||
|
if result:
|
||||||
|
image_url, source_name, page_url, width, height = result
|
||||||
|
used_images.add(img_url)
|
||||||
|
save_used_images()
|
||||||
|
logger.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{query}' ({width}x{height})")
|
||||||
|
return image_url, source_name, uploader, page_url
|
||||||
|
|
||||||
|
logger.info(f"No valid Pixabay image found for query '{query}'. Trying fallback query.")
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pixabay image fetch failed for query '{query}': {e}")
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
|
# Try with the original query
|
||||||
|
image_url, source_name, uploader, page_url = fetch_pixabay_image(search_query)
|
||||||
|
if image_url:
|
||||||
|
return image_url, source_name, uploader, page_url
|
||||||
|
|
||||||
|
# Fallback to a dynamic query using the specific term if provided
|
||||||
|
if specific_term:
|
||||||
|
fallback_query = f"{specific_term} dining trends"
|
||||||
|
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
|
||||||
|
if image_url:
|
||||||
|
return image_url, source_name, uploader, page_url
|
||||||
|
|
||||||
|
# Final fallback to a generic query
|
||||||
|
fallback_query = "food dining trends"
|
||||||
|
image_url, source_name, uploader, page_url = fetch_pixabay_image(fallback_query)
|
||||||
|
if image_url:
|
||||||
|
return image_url, source_name, uploader, page_url
|
||||||
|
|
||||||
|
logger.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
def fetch_pixabay_image(query):
|
def fetch_pixabay_image(query):
|
||||||
try:
|
try:
|
||||||
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(query)}&image_type=photo&per_page=20"
|
||||||
|
|||||||
Reference in New Issue
Block a user