@ -293,110 +293,21 @@ def get_image(search_query):
logging . error ( f " All image fetch attempts failed for query ' { search_query } ' . Returning None. " )
return None , None , None , None
def process_photo ( photo ) :
tags = [ tag . text . lower ( ) for tag in photo . getTags ( ) ]
title = photo . title . lower ( ) if photo . title else " "
matched_keywords = [ kw for kw in exclude_keywords if kw in tags or kw in title ]
if matched_keywords :
logging . info ( f " Skipping image with unwanted keywords: { photo . id } (tags: { tags } , title: { title } , matched: { matched_keywords } ) " )
return None
# Try 'Large' size first, fall back to 'Medium' if unavailable
img_url = None
try :
img_url = photo . getPhotoFile ( size_label = ' Large ' )
except flickr_api . flickrerrors . FlickrError as e :
logging . info ( f " Large size not available for photo { photo . id } : { e } , trying Medium " )
try :
img_url = photo . getPhotoFile ( size_label = ' Medium ' )
except flickr_api . flickrerrors . FlickrError as e :
logging . warning ( f " Medium size not available for photo { photo . id } : { e } " )
return None
if not img_url or img_url in used_images :
return None
def get_flickr_image ( search_query , relevance_keywords ) :
global last_flickr_request_time , flickr_request_count
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
reset_flickr_request_count ( )
flickr_request_count + = 1
logging . info ( f " Flickr request count: { flickr_request_count } /3600 " )
used_images . add ( img_url )
save_used_images ( )
# Enforce a minimum delay of 10 seconds between Flickr requests
current_time = time . time ( )
time_since_last_request = current_time - last_flickr_request_time
if time_since_last_request < 10 :
time . sleep ( 10 - time_since_last_request )
flickr_data = {
" title " : search_query ,
" image_url " : img_url ,
" source " : " Flickr " ,
" uploader " : uploader ,
" page_url " : page_url ,
" timestamp " : datetime . now ( timezone . utc ) . isoformat ( )
}
flickr_file = " /home/shane/foodie_automator/flickr_images.json "
with open ( flickr_file , ' a ' ) as f :
json . dump ( flickr_data , f )
f . write ( ' \n ' )
logging . info ( f " Saved Flickr image metadata to { flickr_file } : { img_url } " )
last_flickr_request_time = time . time ( )
logging . info ( f " Selected Flickr image: { img_url } by { uploader } for query ' { search_query } ' (tags: { tags } ) " )
return img_url , " Flickr " , uploader , page_url
def search_ddg_for_flickr ( query ) :
ddg_query = f " { query } site:flickr.com "
ddg_url = f " https://duckduckgo.com/?q= { quote ( ddg_query ) } "
try :
response = requests . get ( ddg_url , headers = headers , timeout = 10 )
response . raise_for_status ( )
soup = BeautifulSoup ( response . text , ' html.parser ' )
photo_ids = set ( )
for link in soup . find_all ( ' a ' , href = True ) :
href = link [ ' href ' ]
match = re . search ( r ' flickr \ .com/photos/[^/]+/( \ d+) ' , href )
if match :
photo_id = match . group ( 1 )
photo_ids . add ( photo_id )
photo_ids = list ( photo_ids ) [ : 2 ] # Limit to 2 IDs
logging . info ( f " Found { len ( photo_ids ) } Flickr photo IDs via DDG: { photo_ids } " )
return photo_ids
except Exception as e :
logging . warning ( f " DDG search failed for query ' { ddg_query } ' : { e } " )
return set ( )
def classify_keywords ( keywords ) :
prompt = (
" Given the following keywords from an image search query, classify each as ' specific ' (e.g., brand names, unique entities like ' Taco Bell ' or ' Paris ' ) or ' generic ' (e.g., common or abstract terms like ' dining ' or ' trends ' ). "
" Return a JSON object mapping each keyword to its classification. \n \n "
" Keywords: " + " , " . join ( keywords ) + " \n \n "
" Example output format (do not use these exact keywords in your response): \n "
" ```json \n "
" { \n "
" \" keyword1 \" : \" specific \" , \n "
" \" keyword2 \" : \" generic \" \n "
" } \n ``` "
)
try :
response = client . chat . completions . create (
model = LIGHT_TASK_MODEL ,
messages = [
{ " role " : " system " , " content " : " You are a helper that classifies keywords. " } ,
{ " role " : " user " , " content " : prompt }
] ,
max_tokens = 100 ,
temperature = 0.5
)
raw_response = response . choices [ 0 ] . message . content
json_match = re . search ( r ' ```json \ n([ \ s \ S]*?) \ n``` ' , raw_response )
if not json_match :
logging . warning ( f " Failed to parse keyword classification JSON: { raw_response } " )
return { kw : " specific " for kw in keywords }
classifications = json . loads ( json_match . group ( 1 ) )
return classifications
except Exception as e :
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
return { kw : " specific " for kw in keywords }
# Step 1: Search DDG to find Flickr photo IDs
logging . info ( f " Searching DDG with query: ' { search_query } site:flickr.com ' " )
photo_ids = search_ddg_for_flickr ( search_query )
@ -414,6 +325,7 @@ def process_photo(photo):
classifications = classify_keywords ( keywords )
logging . info ( f " Keyword classifications: { classifications } " )
# Prioritize specific keywords
specific_keywords = [ kw for kw , classification in classifications . items ( ) if classification == " specific " ]
if specific_keywords :
for keyword in specific_keywords :
@ -424,42 +336,17 @@ def process_photo(photo):
if result :
return result
# Step 3: Final fallback to a generic food-related query
logging . info ( f " No results found. Falling back to generic query: ' food dining ' " )
photos = search_flickr ( " food dining " )
# Step 3: Final fallback using relevance keywords
fallback_query = " " . join ( relevance_keywords ) if isinstance ( relevance_keywords , list ) else relevance_keywords
logging . info ( f " No results found. Falling back to generic query: ' { fallback_query } ' " )
photos = search_flickr ( fallback_query )
for photo in photos :
result = process_photo ( photo )
if result :
return result
logging . warning ( f " No valid Flickr image found in fallback for query ' { search_query } ' . Trying Pixabay. " )
# Fallback to Pixabay
try :
pixabay_url = f " https://pixabay.com/api/?key= { PIXABAY_API_KEY } &q= { quote ( search_query ) } &image_type=photo&per_page=10 "
response = requests . get ( pixabay_url , timeout = 10 )
response . raise_for_status ( )
data = response . json ( )
for hit in data . get ( ' hits ' , [ ] ) :
img_url = hit . get ( ' webformatURL ' )
if not img_url or img_url in used_images :
continue
uploader = hit . get ( ' user ' , ' Unknown ' )
page_url = hit . get ( ' pageURL ' , img_url )
used_images . add ( img_url )
save_used_images ( )
logging . debug ( f " Image selected for query ' { search_query } ' : { img_url } " )
return img_url , " Pixabay " , uploader , page_url
logging . warning ( f " No valid Pixabay image found for query ' { search_query } ' . " )
return None , None , None , None
except Exception as e :
logging . error ( f " Pixabay image fetch failed for query ' { search_query } ' : { e } " )
return None , None , None , None
logging . warning ( f " No valid Flickr image found for query ' { search_query } ' after all attempts. " )
return None , None , None , None
def generate_image_query ( title , summary ) :
try :
@ -1119,45 +1006,6 @@ def get_flickr_image(search_query, relevance_keywords):
logging . warning ( f " Failed to fetch Flickr photo ID { photo_id } : { e } " )
return None
# Helper function to process a photo (fetch URL and metadata only)
def process_photo ( photo ) :
tags = [ tag . text . lower ( ) for tag in photo . getTags ( ) ]
title = photo . title . lower ( ) if photo . title else " "
matched_keywords = [ kw for kw in exclude_keywords if kw in tags or kw in title ]
if matched_keywords :
logging . info ( f " Skipping image with unwanted keywords: { photo . id } (tags: { tags } , title: { title } , matched: { matched_keywords } ) " )
return None
img_url = photo . getPhotoFile ( size_label = ' Large ' )
if not img_url :
img_url = photo . getPhotoFile ( size_label = ' Medium ' )
if not img_url or img_url in used_images :
return None
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
used_images . add ( img_url )
save_used_images ( )
flickr_data = {
" title " : search_query ,
" image_url " : img_url ,
" source " : " Flickr " ,
" uploader " : uploader ,
" page_url " : page_url ,
" timestamp " : datetime . now ( timezone . utc ) . isoformat ( )
}
flickr_file = " /home/shane/foodie_automator/flickr_images.json "
with open ( flickr_file , ' a ' ) as f :
json . dump ( flickr_data , f )
f . write ( ' \n ' )
logging . info ( f " Saved Flickr image metadata to { flickr_file } : { img_url } " )
logging . info ( f " Selected Flickr image: { img_url } by { uploader } for query ' { search_query } ' (tags: { tags } ) " )
return img_url , " Flickr " , uploader , page_url
# Helper function to search DDG and extract Flickr photo IDs
def search_ddg_for_flickr ( query ) :
ddg_query = f " { query } site:flickr.com "