@ -341,9 +341,10 @@ def smart_image_and_filter(title, summary):
prompt = (
prompt = (
" Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
" Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
" for an image search about food industry trends or viral content. Prioritize specific terms if present, "
" for an image search about food industry trends or viral content. Prioritize specific terms if present, "
" otherwise focus on the main theme. "
" otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
" Return ' SKIP ' if the article is about home appliances, recipes, promotions, or contains ' homemade ' , else ' KEEP ' . "
" Return ' SKIP ' if the article is about home appliances, recipes, promotions, or contains ' homemade ' , else ' KEEP ' . "
" Return as JSON with double quotes for all property names and string values (e.g., { \" image_query \" : \" specific term \" , \" relevance \" : [ \" keyword1 \" , \" keyword2 \" ], \" action \" : \" KEEP \" or \" SKIP \" }). "
" Return as JSON with double quotes for all property names and string values (e.g., "
" { \" image_query \" : \" specific term \" , \" relevance \" : [ \" keyword1 \" , \" keyword2 \" ], \" main_topic \" : \" main food item \" , \" action \" : \" KEEP \" or \" SKIP \" }). "
)
)
response = client . chat . completions . create (
response = client . chat . completions . create (
@ -357,39 +358,52 @@ def smart_image_and_filter(title, summary):
raw_result = response . choices [ 0 ] . message . content . strip ( )
raw_result = response . choices [ 0 ] . message . content . strip ( )
logging . info ( f " Raw GPT smart image/filter response: ' { raw_result } ' " )
logging . info ( f " Raw GPT smart image/filter response: ' { raw_result } ' " )
# Remove ```json markers and fix single quotes in JSON structure
cleaned_result = re . sub ( r ' ```json \ s*| \ s*``` ' , ' ' , raw_result ) . strip ( )
cleaned_result = re . sub ( r ' ```json \ s*| \ s*``` ' , ' ' , raw_result ) . strip ( )
# Replace single quotes with double quotes, but preserve single quotes within string values
fixed_result = re . sub ( r " (?<! \\ ) ' (?= \ s*[ \ w \ s]* \ ])|(?<= \ [| \ { | \ s) ' | ' (?= \ s*[ \ ] \ },:])|(?<= \ w) ' (?= \ s*:) " , ' " ' , cleaned_result )
fixed_result = re . sub ( r " (?<! \\ ) ' (?= \ s*[ \ w \ s]* \ ])|(?<= \ [| \ { | \ s) ' | ' (?= \ s*[ \ ] \ },:])|(?<= \ w) ' (?= \ s*:) " , ' " ' , cleaned_result )
try :
try :
result = json . loads ( fixed_result )
result = json . loads ( fixed_result )
except json . JSONDecodeError as e :
except json . JSONDecodeError as e :
logging . warning ( f " JSON parsing failed: { e } , raw: ' { fixed_result } ' . Using fallback. " )
logging . warning ( f " JSON parsing failed: { e } , raw: ' { fixed_result } ' . Using fallback. " )
return " food trends " , [ " cuisine " , " dining " ] , False
# Fallback: Extract main topic using simple keyword matching
main_topic = extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) )
return main_topic , [ main_topic , " food " ] , False
if not isinstance ( result , dict ) or " image_query " not in result or " relevance " not in result or " action " not in result :
if not isinstance ( result , dict ) or " image_query " not in result or " relevance " not in result or " action " not in result :
logging . warning ( f " Invalid GPT response format: { result } , using fallback " )
logging . warning ( f " Invalid GPT response format: { result } , using fallback " )
return " food trends " , [ " cuisine " , " dining " ] , False
main_topic = extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) )
return main_topic , [ main_topic , " food " ] , False
image_query = result [ " image_query " ]
image_query = result [ " image_query " ]
relevance_keywords = result [ " relevance " ]
relevance_keywords = result [ " relevance " ]
main_topic = result . get ( " main_topic " , extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) ) )
skip_flag = result [ " action " ] == " SKIP " or " homemade " in title . lower ( ) or " homemade " in summary . lower ( )
skip_flag = result [ " action " ] == " SKIP " or " homemade " in title . lower ( ) or " homemade " in summary . lower ( )
logging . info ( f " Smart image query: { image_query } , Relevance: { relevance_keywords } , Skip: { skip_flag } " )
logging . info ( f " Smart image query: { image_query } , Relevance: { relevance_keywords } , Main Topic: { main_topic } , Skip: { skip_flag } " )
specific_single_words = [ " kimchi " , " sushi " , " pizza " , " taco " , " burger " ]
if not image_query :
if not image_query :
logging . warning ( f " Image query is empty, using fallback " )
logging . warning ( f " Image query is empty, using fallback " )
return " food trends " , [ " cuisine " , " dining " ] , skip_flag
return main_topic , [ main_topic , " food " ] , skip_flag
# Allow single-word queries if they are specific (e.g., food items)
specific_single_words = [ " kimchi " , " sushi " , " pizza " , " taco " , " burger " ] # Add more as needed
if len ( image_query . split ( ) ) < 2 and image_query . lower ( ) not in specific_single_words :
if len ( image_query . split ( ) ) < 2 and image_query . lower ( ) not in specific_single_words :
logging . warning ( f " Image query ' { image_query } ' too vague, using fallback " )
logging . warning ( f " Image query ' { image_query } ' too vague, using fallback " )
return " food trends " , [ " cuisine " , " dining " ] , skip_flag
return main_topic , [ main_topic , " food " ] , skip_flag
return image_query , relevance_keywords , main_topic , skip_flag
except Exception as e :
except Exception as e :
logging . error ( f " Smart image/filter failed: { e } , using fallback " )
logging . error ( f " Smart image/filter failed: { e } , using fallback " )
return " food trends " , [ " cuisine " , " dining " ] , False
main_topic = extract_main_topic ( title . lower ( ) + " " + summary . lower ( ) )
return main_topic , [ main_topic , " food " ] , False
def extract_main_topic ( text ) :
# Common food-related keywords (expand as needed)
food_keywords = [ " kimchi " , " sushi " , " pizza " , " taco " , " burger " , " ramen " , " curry " , " pasta " , " salad " , " soup " ]
for keyword in food_keywords :
if keyword in text :
return keyword
# Fallback to a generic term if no specific food item is found
return " food trends "
def upload_image_to_wp ( image_url , post_title , wp_base_url , wp_username , wp_password , image_source = " Pixabay " , uploader = None , pixabay_url = None ) :
def upload_image_to_wp ( image_url , post_title , wp_base_url , wp_username , wp_password , image_source = " Pixabay " , uploader = None , pixabay_url = None ) :
try :
try :
@ -934,15 +948,25 @@ def process_photo(photo, search_query):
logging . warning ( f " Medium size not available for photo { photo . id } : { e } " )
logging . warning ( f " Medium size not available for photo { photo . id } : { e } " )
return None
return None
if not img_url or img_url in used_images :
if not img_url :
logging . info ( f " Image URL invalid or already used for photo { photo . id } : { img_url } " )
logging . info ( f " Image URL invalid for photo { photo . id } " )
return None
# Check if the image is highly relevant to the query
query_keywords = set ( search_query . lower ( ) . split ( ) )
photo_keywords = set ( tags + title . split ( ) )
is_relevant = bool ( query_keywords & photo_keywords ) # Check if any query keyword is in tags or title
# Allow reuse of highly relevant images
if img_url in used_images and not is_relevant :
logging . info ( f " Image already used and not highly relevant for photo { photo . id } : { img_url } " )
return None
return None
uploader = photo . owner . username
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
used_images . add ( img_url )
used_images . add ( img_url )
save_used_images ( ) # This will now save in the correct format
save_used_images ( )
flickr_data = {
flickr_data = {
" title " : search_query ,
" title " : search_query ,
@ -1041,14 +1065,13 @@ def classify_keywords(keywords):
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
return { kw : " specific " for kw in keywords }
return { kw : " specific " for kw in keywords }
def get_flickr_image ( search_query , relevance_keywords ) :
def get_flickr_image ( search_query , relevance_keywords , main_topic ) :
global last_flickr_request_time , flickr_request_count
global last_flickr_request_time , flickr_request_count
reset_flickr_request_count ( )
reset_flickr_request_count ( )
flickr_request_count + = 1
flickr_request_count + = 1
logging . info ( f " Flickr request count: { flickr_request_count } /3600 " )
logging . info ( f " Flickr request count: { flickr_request_count } /3600 " )
# Enforce a minimum delay of 10 seconds between Flickr requests
current_time = time . time ( )
current_time = time . time ( )
time_since_last_request = current_time - last_flickr_request_time
time_since_last_request = current_time - last_flickr_request_time
if time_since_last_request < 10 :
if time_since_last_request < 10 :
@ -1081,7 +1104,6 @@ def get_flickr_image(search_query, relevance_keywords):
classifications = classify_keywords ( keywords )
classifications = classify_keywords ( keywords )
logging . info ( f " Keyword classifications: { classifications } " )
logging . info ( f " Keyword classifications: { classifications } " )
# Prioritize specific keywords
specific_keywords = [ kw for kw , classification in classifications . items ( ) if classification == " specific " ]
specific_keywords = [ kw for kw , classification in classifications . items ( ) if classification == " specific " ]
if specific_keywords :
if specific_keywords :
for keyword in specific_keywords :
for keyword in specific_keywords :
@ -1092,9 +1114,17 @@ def get_flickr_image(search_query, relevance_keywords):
if result :
if result :
return result
return result
# Step 4: Final fallback using relevance keywords
# Step 4: Fallback using main topic
logging . info ( f " No results found. Falling back to main topic: ' { main_topic } ' " )
photos = search_flickr ( main_topic )
for photo in photos :
result = process_photo ( photo , main_topic )
if result :
return result
# Step 5: Final fallback using relevance keywords
fallback_query = " " . join ( relevance_keywords ) if isinstance ( relevance_keywords , list ) else relevance_keywords
fallback_query = " " . join ( relevance_keywords ) if isinstance ( relevance_keywords , list ) else relevance_keywords
logging . info ( f " No results found. Falling back to generic query: ' { fallback_query } ' " )
logging . info ( f " No results with main topic. Falling back to relevance keywords : ' { fallback_query } ' " )
photos = search_flickr ( fallback_query )
photos = search_flickr ( fallback_query )
for photo in photos :
for photo in photos :
result = process_photo ( photo , search_query )
result = process_photo ( photo , search_query )