@ -359,13 +359,17 @@ def smart_image_and_filter(title, summary):
content = f " { title } \n \n { summary } "
content = f " { title } \n \n { summary } "
prompt = (
prompt = (
" Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
" Analyze this article title and summary. Perform the following tasks: \n "
" for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
" 1. Extract the most specific and defining term (e.g., a proper noun like ' Ozempic ' , a unique concept like ' GLP-1 ' , or a niche topic like ' Sushi ' ) that makes the article distinct. \n "
" otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
" 2. Generate a concise image search query (3-7 words) that MUST include the most specific term from step 1, combined with relevant contextual keywords (e.g., ' dining ' , ' trends ' ). \n "
" Return ' SKIP ' if the article is about home appliances, recipes, promotions, contains ' [homemade] ' or ' homemade ' , "
" 3. Identify the main topic of the article (e.g., a specific food item or cuisine). \n "
" or includes recipe-related terms like ' cook ' , ' bake ' , or ' ingredient ' . "
" 4. List relevance keywords (up to 5) for the image search, including the specific term and related concepts. \n "
" 5. Determine if the article should be skipped based on these rules: \n "
" - SKIP if about home appliances, recipes, promotions, or contains ' [homemade] ' or ' homemade ' . \n "
" - SKIP if it includes recipe-related terms like ' cook ' , ' bake ' , or ' ingredient ' . \n "
" - KEEP otherwise. \n "
" Return as JSON with double quotes for all property names and string values (e.g., "
" Return as JSON with double quotes for all property names and string values (e.g., "
" { \" image_query \" : \" fast food trends \" , \" relevance \" : [ \" fast food \" , \" dining \" , \" culture \" ], \" main_topic \" : \" fast food \" , \" action \" : \" KEEP \" }). "
" { \" image_query \" : \" Ozempic dining trends\" , \" specific_term \" : \" Ozempic \" , \" relevance \" : [ \" Ozempic \" , \" dining \" , \" trends \" ], \" main_topic \" : \" dining trends \" , \" action \" : \" KEEP \" }). "
)
)
response = client . chat . completions . create (
response = client . chat . completions . create (
@ -374,7 +378,7 @@ def smart_image_and_filter(title, summary):
{ " role " : " system " , " content " : prompt } ,
{ " role " : " system " , " content " : prompt } ,
{ " role " : " user " , " content " : content }
{ " role " : " user " , " content " : content }
] ,
] ,
max_tokens = 10 0
max_tokens = 15 0
)
)
raw_result = response . choices [ 0 ] . message . content . strip ( )
raw_result = response . choices [ 0 ] . message . content . strip ( )
logging . debug ( f " Raw GPT response: ' { raw_result } ' " )
logging . debug ( f " Raw GPT response: ' { raw_result } ' " )
@ -384,11 +388,11 @@ def smart_image_and_filter(title, summary):
try :
try :
result = json . loads ( fixed_result )
result = json . loads ( fixed_result )
if not isinstance ( result , dict ) or " image_query " not in result or " relevance " not in result or " action " not in result :
if not isinstance ( result , dict ) or " image_query " not in result or " specific_term " not in result or " relevance" not in result or " action " not in result :
logging . warning ( f " Invalid GPT response format: { result } , checking action before fallback " )
logging . warning ( f " Invalid GPT response format: { result } , checking action before fallback " )
if isinstance ( result , dict ) and result . get ( " action " ) == " SKIP " :
if isinstance ( result , dict ) and result . get ( " action " ) == " SKIP " :
logging . info ( f " Respecting AI SKIP action for ' { title } ' " )
logging . info ( f " Respecting AI SKIP action for ' { title } ' " )
return extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) ) , [ " food " ] , " food " , True
return " food trends " , [ " food " ] , " food " , True
main_topic = extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) )
main_topic = extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) )
skip_flag = (
skip_flag = (
" [homemade] " in title . lower ( ) or
" [homemade] " in title . lower ( ) or
@ -417,17 +421,18 @@ def smart_image_and_filter(title, summary):
return main_topic , [ main_topic , " food " ] , main_topic , skip_flag
return main_topic , [ main_topic , " food " ] , main_topic , skip_flag
image_query = result [ " image_query " ]
image_query = result [ " image_query " ]
specific_term = result [ " specific_term " ]
relevance_keywords = result [ " relevance " ]
relevance_keywords = result [ " relevance " ]
main_topic = result . get ( " main_topic " , extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) ) )
main_topic = result . get ( " main_topic " , extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) ) )
skip_flag = (
skip_flag = (
result [ " act ion " ] == " SKIP " or
result [ " ais on " ] == " SKIP " or
" [homemade] " in title . lower ( ) or
" [homemade] " in title . lower ( ) or
" homemade " in title . lower ( ) or
" homemade " in title . lower ( ) or
" homemade " in summary . lower ( ) or
" homemade " in summary . lower ( ) or
any ( kw in title . lower ( ) or kw in summary . lower ( ) for kw in RECIPE_KEYWORDS )
any ( kw in title . lower ( ) or kw in summary . lower ( ) for kw in RECIPE_KEYWORDS )
)
)
logging . info ( f " Smart image query: { image_query } , Relevance: { relevance_keywords } , Main Topic: { main_topic } , Skip: { skip_flag } , "
logging . info ( f " Smart image query: { image_query } , Specific Term: { specific_term } , Relevance: { relevance_keywords } , Main Topic: { main_topic } , Skip: { skip_flag } , "
f " Reasons: action= { result [ ' action ' ] } , "
f " Reasons: action= { result [ ' action ' ] } , "
f " homemade_in_title= { ' [homemade] ' in title . lower ( ) or ' homemade ' in title . lower ( ) } , "
f " homemade_in_title= { ' [homemade] ' in title . lower ( ) or ' homemade ' in title . lower ( ) } , "
f " homemade_in_summary= { ' homemade ' in summary . lower ( ) } , "
f " homemade_in_summary= { ' homemade ' in summary . lower ( ) } , "
@ -441,7 +446,7 @@ def smart_image_and_filter(title, summary):
logging . warning ( f " Image query ' { image_query } ' too vague, using fallback " )
logging . warning ( f " Image query ' { image_query } ' too vague, using fallback " )
return main_topic , [ main_topic , " food " ] , main_topic , skip_flag
return main_topic , [ main_topic , " food " ] , main_topic , skip_flag
return image_query , relevance_keywords , main_topic , skip_flag
return image_query , relevance_keywords , main_topic , skip_flag , specific_term
except Exception as e :
except Exception as e :
logging . error ( f " Smart image/filter failed: { e } , using fallback " )
logging . error ( f " Smart image/filter failed: { e } , using fallback " )
@ -456,7 +461,7 @@ def smart_image_and_filter(title, summary):
f " homemade_in_title= { ' [homemade] ' in title . lower ( ) or ' homemade ' in title . lower ( ) } , "
f " homemade_in_title= { ' [homemade] ' in title . lower ( ) or ' homemade ' in title . lower ( ) } , "
f " homemade_in_summary= { ' homemade ' in summary . lower ( ) } , "
f " homemade_in_summary= { ' homemade ' in summary . lower ( ) } , "
f " recipe_keywords= { any ( kw in title . lower ( ) or kw in summary . lower ( ) for kw in RECIPE_KEYWORDS ) } " )
f " recipe_keywords= { any ( kw in title . lower ( ) or kw in summary . lower ( ) for kw in RECIPE_KEYWORDS ) } " )
return main_topic , [ main_topic , " food " ] , main_topic , skip_flag
return main_topic , [ main_topic , " food " ] , main_topic , skip_flag , " food "
def extract_main_topic ( text ) :
def extract_main_topic ( text ) :
# Common food-related keywords (expand as needed)
# Common food-related keywords (expand as needed)
@ -1170,7 +1175,7 @@ def classify_keywords(keywords):
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
return { kw : " specific " for kw in keywords }
return { kw : " specific " for kw in keywords }
def get_flickr_image ( search_query , relevance_keywords , main_topic ) :
def get_flickr_image ( search_query , relevance_keywords , main_topic , specific_term = None ) :
global used_images
global used_images
logger = logging . getLogger ( __name__ )
logger = logging . getLogger ( __name__ )
@ -1260,9 +1265,9 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
except Exception as e :
except Exception as e :
logger . warning ( f " DDG search failed for ' { ddg_query } ' : { e } " )
logger . warning ( f " DDG search failed for ' { ddg_query } ' : { e } " )
# Step 2: Fallback to Pixabay
# Step 2: Fallback to Pixabay with specific term
logger . info ( f " No valid DDG images, falling back to Pixabay for ' { search_query } ' " )
logger . info ( f " No valid DDG images, falling back to Pixabay for ' { search_query } ' " )
image_url , source_name , uploader , page_url = get_image ( search_query )
image_url , source_name , uploader , page_url = get_image ( search_query , specific_term )
if image_url :
if image_url :
used_images . add ( image_url )
used_images . add ( image_url )
save_used_images ( )
save_used_images ( )
@ -1272,7 +1277,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
logger . warning ( f " No valid images found for query ' { search_query } ' " )
logger . warning ( f " No valid images found for query ' { search_query } ' " )
return None , None , None , None
return None , None , None , None
def get_image ( search_query ) :
def get_image ( search_query , specific_term = None ) :
headers = { ' User-Agent ' : ' InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com) ' }
headers = { ' User-Agent ' : ' InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com) ' }
def process_image ( image_url , source_name , page_url ) :
def process_image ( image_url , source_name , page_url ) :
@ -1309,6 +1314,58 @@ def get_image(search_query):
logger . warning ( f " Failed to process Pixabay image { image_url } : { e } " )
logger . warning ( f " Failed to process Pixabay image { image_url } : { e } " )
return None
return None
def fetch_pixabay_image ( query ) :
try :
pixabay_url = f " https://pixabay.com/api/?key= { PIXABAY_API_KEY } &q= { quote ( query ) } &image_type=photo&per_page=20 "
response = requests . get ( pixabay_url , headers = headers , timeout = 10 )
response . raise_for_status ( )
data = response . json ( )
for hit in data . get ( ' hits ' , [ ] ) :
img_url = hit . get ( ' largeImageURL ' )
if not img_url or img_url in used_images :
continue
uploader = hit . get ( ' user ' , ' Unknown ' )
page_url = hit . get ( ' pageURL ' , img_url )
# Process the image for watermarks and resolution
result = process_image ( img_url , " Pixabay " , page_url )
if result :
image_url , source_name , page_url , width , height = result
used_images . add ( img_url )
save_used_images ( )
logger . info ( f " Selected Pixabay image: { img_url } by { uploader } for query ' { query } ' ( { width } x { height } ) " )
return image_url , source_name , uploader , page_url
logger . info ( f " No valid Pixabay image found for query ' { query } ' . Trying fallback query. " )
return None , None , None , None
except Exception as e :
logger . warning ( f " Pixabay image fetch failed for query ' { query } ' : { e } " )
return None , None , None , None
# Try with the original query
image_url , source_name , uploader , page_url = fetch_pixabay_image ( search_query )
if image_url :
return image_url , source_name , uploader , page_url
# Fallback to a dynamic query using the specific term if provided
if specific_term :
fallback_query = f " { specific_term } dining trends "
image_url , source_name , uploader , page_url = fetch_pixabay_image ( fallback_query )
if image_url :
return image_url , source_name , uploader , page_url
# Final fallback to a generic query
fallback_query = " food dining trends "
image_url , source_name , uploader , page_url = fetch_pixabay_image ( fallback_query )
if image_url :
return image_url , source_name , uploader , page_url
logger . error ( f " All image fetch attempts failed for query ' { search_query } ' . Returning None. " )
return None , None , None , None
def fetch_pixabay_image ( query ) :
def fetch_pixabay_image ( query ) :
try :
try :
pixabay_url = f " https://pixabay.com/api/?key= { PIXABAY_API_KEY } &q= { quote ( query ) } &image_type=photo&per_page=20 "
pixabay_url = f " https://pixabay.com/api/?key= { PIXABAY_API_KEY } &q= { quote ( query ) } &image_type=photo&per_page=20 "