@ -964,6 +964,54 @@ def reset_flickr_request_count():
flickr_request_count = 0
flickr_request_start_time = time . time ( )
def process_photo ( photo , search_query ) :
tags = [ tag . text . lower ( ) for tag in photo . getTags ( ) ]
title = photo . title . lower ( ) if photo . title else " "
matched_keywords = [ kw for kw in exclude_keywords if kw in tags or kw in title ]
if matched_keywords :
logging . info ( f " Skipping image with unwanted keywords: { photo . id } (tags: { tags } , title: { title } , matched: { matched_keywords } ) " )
return None
# Try 'Large' size first, fall back to 'Medium' if unavailable
img_url = None
try :
img_url = photo . getPhotoFile ( size_label = ' Large ' )
except flickr_api . flickrerrors . FlickrError as e :
logging . info ( f " Large size not available for photo { photo . id } : { e } , trying Medium " )
try :
img_url = photo . getPhotoFile ( size_label = ' Medium ' )
except flickr_api . flickrerrors . FlickrError as e :
logging . warning ( f " Medium size not available for photo { photo . id } : { e } " )
return None
if not img_url or img_url in used_images :
logging . info ( f " Image URL invalid or already used for photo { photo . id } : { img_url } " )
return None
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
used_images . add ( img_url )
save_used_images ( )
flickr_data = {
" title " : search_query ,
" image_url " : img_url ,
" source " : " Flickr " ,
" uploader " : uploader ,
" page_url " : page_url ,
" timestamp " : datetime . now ( timezone . utc ) . isoformat ( )
}
flickr_file = " /home/shane/foodie_automator/flickr_images.json "
with open ( flickr_file , ' a ' ) as f :
json . dump ( flickr_data , f )
f . write ( ' \n ' )
logging . info ( f " Saved Flickr image metadata to { flickr_file } : { img_url } " )
logging . info ( f " Selected Flickr image: { img_url } by { uploader } for query ' { search_query } ' (tags: { tags } ) " )
return img_url , " Flickr " , uploader , page_url
def get_flickr_image ( search_query , relevance_keywords ) :
global last_flickr_request_time , flickr_request_count
@ -979,92 +1027,6 @@ def get_flickr_image(search_query, relevance_keywords):
last_flickr_request_time = time . time ( )
headers = { ' User-Agent ' : ' InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com) ' }
# Helper function to search Flickr with a given query
def search_flickr ( query , per_page = 5 ) : # Reduced per_page to limit results
try :
photos = flickr_api . Photo . search (
text = query ,
per_page = per_page ,
sort = ' relevance ' ,
safe_search = 1 ,
media = ' photos ' ,
license = ' 4,5,9,10 '
)
return photos
except Exception as e :
logging . warning ( f " Flickr API error for query ' { query } ' : { e } " )
return [ ]
# Helper function to fetch a Flickr photo by ID
def fetch_photo_by_id ( photo_id ) :
try :
photo = flickr_api . Photo ( id = photo_id )
return photo
except Exception as e :
logging . warning ( f " Failed to fetch Flickr photo ID { photo_id } : { e } " )
return None
# Helper function to search DDG and extract Flickr photo IDs
def search_ddg_for_flickr ( query ) :
ddg_query = f " { query } site:flickr.com "
ddg_url = f " https://duckduckgo.com/?q= { quote ( ddg_query ) } "
try :
response = requests . get ( ddg_url , headers = headers , timeout = 10 )
response . raise_for_status ( )
soup = BeautifulSoup ( response . text , ' html.parser ' )
photo_ids = set ( )
for link in soup . find_all ( ' a ' , href = True ) :
href = link [ ' href ' ]
match = re . search ( r ' flickr \ .com/photos/[^/]+/( \ d+) ' , href )
if match :
photo_id = match . group ( 1 )
photo_ids . add ( photo_id )
photo_ids = list ( photo_ids ) [ : 2 ] # Limit to 2 IDs
logging . info ( f " Found { len ( photo_ids ) } Flickr photo IDs via DDG: { photo_ids } " )
return photo_ids
except Exception as e :
logging . warning ( f " DDG search failed for query ' { ddg_query } ' : { e } " )
return set ( )
# Helper function to classify keywords as specific or generic
def classify_keywords ( keywords ) :
prompt = (
" Given the following keywords from an image search query, classify each as ' specific ' (e.g., brand names, unique entities like ' Taco Bell ' or ' Paris ' ) or ' generic ' (e.g., common or abstract terms like ' dining ' or ' trends ' ). "
" Return a JSON object mapping each keyword to its classification. \n \n "
" Keywords: " + " , " . join ( keywords ) + " \n \n "
" Example output format (do not use these exact keywords in your response): \n "
" ```json \n "
" { \n "
" \" keyword1 \" : \" specific \" , \n "
" \" keyword2 \" : \" generic \" \n "
" } \n ``` "
)
try :
response = client . chat . completions . create (
model = LIGHT_TASK_MODEL ,
messages = [
{ " role " : " system " , " content " : " You are a helper that classifies keywords. " } ,
{ " role " : " user " , " content " : prompt }
] ,
max_tokens = 100 ,
temperature = 0.5
)
raw_response = response . choices [ 0 ] . message . content
json_match = re . search ( r ' ```json \ n([ \ s \ S]*?) \ n``` ' , raw_response )
if not json_match :
logging . warning ( f " Failed to parse keyword classification JSON: { raw_response } " )
return { kw : " specific " for kw in keywords }
classifications = json . loads ( json_match . group ( 1 ) )
return classifications
except Exception as e :
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
return { kw : " specific " for kw in keywords }
# Step 1: Search DDG to find Flickr photo IDs
logging . info ( f " Searching DDG with query: ' { search_query } site:flickr.com ' " )
photo_ids = search_ddg_for_flickr ( search_query )
@ -1072,7 +1034,7 @@ def get_flickr_image(search_query, relevance_keywords):
for photo_id in photo_ids :
photo = fetch_photo_by_id ( photo_id )
if photo :
result = process_photo ( photo )
result = process_photo ( photo , search_query )
if result :
return result
@ -1089,7 +1051,7 @@ def get_flickr_image(search_query, relevance_keywords):
logging . info ( f " Searching Flickr with specific keyword: ' { keyword } ' " )
photos = search_flickr ( keyword )
for photo in photos :
result = process_photo ( photo )
result = process_photo ( photo , search_query )
if result :
return result
@ -1098,7 +1060,7 @@ def get_flickr_image(search_query, relevance_keywords):
logging . info ( f " No results found. Falling back to generic query: ' { fallback_query } ' " )
photos = search_flickr ( fallback_query )
for photo in photos :
result = process_photo ( photo )
result = process_photo ( photo , search_query )
if result :
return result