@ -225,7 +225,6 @@ def get_image(search_query):
flickr_request_count + = 1
flickr_request_count + = 1
logging . info ( f " Flickr request count: { flickr_request_count } /3600 " )
logging . info ( f " Flickr request count: { flickr_request_count } /3600 " )
# Enforce a minimum delay of 1 second between Flickr requests
current_time = time . time ( )
current_time = time . time ( )
time_since_last_request = current_time - last_flickr_request_time
time_since_last_request = current_time - last_flickr_request_time
if time_since_last_request < 1 :
if time_since_last_request < 1 :
@ -235,7 +234,6 @@ def get_image(search_query):
headers = { ' User-Agent ' : ' InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com) ' }
headers = { ' User-Agent ' : ' InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com) ' }
# Helper function to search Flickr with a given query
def search_flickr ( query , per_page = 20 ) :
def search_flickr ( query , per_page = 20 ) :
try :
try :
photos = flickr_api . Photo . search (
photos = flickr_api . Photo . search (
@ -251,7 +249,14 @@ def get_image(search_query):
logging . warning ( f " Flickr API error for query ' { query } ' : { e } " )
logging . warning ( f " Flickr API error for query ' { query } ' : { e } " )
return [ ]
return [ ]
# Helper function to process a photo
def fetch_photo_by_id ( photo_id ) :
try :
photo = flickr_api . Photo ( id = photo_id )
return photo
except Exception as e :
logging . warning ( f " Failed to fetch Flickr photo ID { photo_id } : { e } " )
return None
def process_photo ( photo ) :
def process_photo ( photo ) :
tags = [ tag . text . lower ( ) for tag in photo . getTags ( ) ]
tags = [ tag . text . lower ( ) for tag in photo . getTags ( ) ]
title = photo . title . lower ( ) if photo . title else " "
title = photo . title . lower ( ) if photo . title else " "
@ -320,7 +325,28 @@ def get_image(search_query):
if temp_file and os . path . exists ( temp_path ) :
if temp_file and os . path . exists ( temp_path ) :
os . unlink ( temp_path )
os . unlink ( temp_path )
# Helper function to classify keywords as specific or generic
def search_ddg_for_flickr ( query ) :
ddg_query = f " { query } site:flickr.com "
ddg_url = f " https://duckduckgo.com/?q= { quote ( ddg_query ) } "
try :
response = requests . get ( ddg_url , headers = headers , timeout = 10 )
response . raise_for_status ( )
soup = BeautifulSoup ( response . text , ' html.parser ' )
photo_ids = set ( )
for link in soup . find_all ( ' a ' , href = True ) :
href = link [ ' href ' ]
match = re . search ( r ' flickr \ .com/photos/[^/]+/( \ d+) ' , href )
if match :
photo_id = match . group ( 1 )
photo_ids . add ( photo_id )
logging . info ( f " Found { len ( photo_ids ) } Flickr photo IDs via DDG: { photo_ids } " )
return photo_ids
except Exception as e :
logging . warning ( f " DDG search failed for query ' { ddg_query } ' : { e } " )
return set ( )
def classify_keywords ( keywords ) :
def classify_keywords ( keywords ) :
prompt = (
prompt = (
" Given the following keywords from an image search query, classify each as ' specific ' (e.g., brand names, unique entities) or ' generic ' (e.g., common or abstract terms). "
" Given the following keywords from an image search query, classify each as ' specific ' (e.g., brand names, unique entities) or ' generic ' (e.g., common or abstract terms). "
@ -330,15 +356,14 @@ def get_image(search_query):
" ```json \n "
" ```json \n "
" { \n "
" { \n "
" \" Wingstop \" : \" specific \" , \n "
" \" Wingstop \" : \" specific \" , \n "
" \" Smart \" : \" generic \" , \n "
" \" dining \" : \" generic \" \n "
" \" Kitchen \" : \" generic \" \n "
" } \n ``` "
" } \n ``` "
)
)
try :
try :
response = client . chat . completions . create (
response = client . chat . completions . create (
model = LIGHT_TASK_MODEL ,
model = LIGHT_TASK_MODEL ,
messages = [
messages = [
{ " role " : " system " , " content " : " You are a helpful assistant that classifies keywords. " } ,
{ " role " : " system " , " content " : " You are a helper that classifies keywords. " } ,
{ " role " : " user " , " content " : prompt }
{ " role " : " user " , " content " : prompt }
] ,
] ,
max_tokens = 100 ,
max_tokens = 100 ,
@ -356,21 +381,23 @@ def get_image(search_query):
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
return { kw : " specific " for kw in keywords }
return { kw : " specific " for kw in keywords }
# Step 1: Try the original search query on Flickr
# Step 1: Search DDG to find Flickr photo IDs
logging . info ( f " Searching Flickr with original query: ' { search_query } ' " )
logging . info ( f " Searching DDG with query: ' { search_query } site:flickr.com ' " )
photos = search_flickr ( search_query )
photo_ids = search_ddg_for_flickr ( search_query )
for photo in photos :
if photo_ids :
result = process_photo ( photo )
for photo_id in photo_ids :
if result :
photo = fetch_photo_by_id ( photo_id )
return result
if photo :
result = process_photo ( photo )
# Step 2: Break down the query into keywords and classify them
if result :
return result
# Step 2: Break down the query into keywords and classify them for direct Flickr API search
keywords = search_query . lower ( ) . split ( )
keywords = search_query . lower ( ) . split ( )
if len ( keywords ) > 1 :
if len ( keywords ) > 1 :
classifications = classify_keywords ( keywords )
classifications = classify_keywords ( keywords )
logging . info ( f " Keyword classifications: { classifications } " )
logging . info ( f " Keyword classifications: { classifications } " )
# Prioritize specific keywords
specific_keywords = [ kw for kw , classification in classifications . items ( ) if classification == " specific " ]
specific_keywords = [ kw for kw , classification in classifications . items ( ) if classification == " specific " ]
if specific_keywords :
if specific_keywords :
for keyword in specific_keywords :
for keyword in specific_keywords :
@ -382,10 +409,8 @@ def get_image(search_query):
return result
return result
# Step 3: Final fallback to a generic food-related query
# Step 3: Final fallback to a generic food-related query
# Use a simple generic query derived from context (e.g., "food dining")
logging . info ( f " No results found. Falling back to generic query: ' food dining ' " )
fallback_query = " food dining " # This could be further contextualized if needed
photos = search_flickr ( " food dining " )
logging . info ( f " No results found. Falling back to generic query: ' { fallback_query } ' " )
photos = search_flickr ( fallback_query )
for photo in photos :
for photo in photos :
result = process_photo ( photo )
result = process_photo ( photo )
if result :
if result :
@ -1000,6 +1025,219 @@ def reset_flickr_request_count():
flickr_request_count = 0
flickr_request_count = 0
flickr_request_start_time = time . time ( )
flickr_request_start_time = time . time ( )
def get_flickr_image ( search_query , relevance_keywords ) :
global last_flickr_request_time , flickr_request_count
reset_flickr_request_count ( )
flickr_request_count + = 1
logging . info ( f " Flickr request count: { flickr_request_count } /3600 " )
# Enforce a minimum delay of 1 second between Flickr requests
current_time = time . time ( )
time_since_last_request = current_time - last_flickr_request_time
if time_since_last_request < 1 :
time . sleep ( 1 - time_since_last_request )
last_flickr_request_time = time . time ( )
headers = { ' User-Agent ' : ' InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com) ' }
# Helper function to search Flickr with a given query
def search_flickr ( query , per_page = 20 ) :
try :
photos = flickr_api . Photo . search (
text = query ,
per_page = per_page ,
sort = ' relevance ' ,
safe_search = 1 ,
media = ' photos ' ,
license = ' 4,5,9,10 '
)
return photos
except Exception as e :
logging . warning ( f " Flickr API error for query ' { query } ' : { e } " )
return [ ]
# Helper function to fetch a Flickr photo by ID
def fetch_photo_by_id ( photo_id ) :
try :
photo = flickr_api . Photo ( id = photo_id )
return photo
except Exception as e :
logging . warning ( f " Failed to fetch Flickr photo ID { photo_id } : { e } " )
return None
# Helper function to process a photo
def process_photo ( photo ) :
tags = [ tag . text . lower ( ) for tag in photo . getTags ( ) ]
title = photo . title . lower ( ) if photo . title else " "
matched_keywords = [ kw for kw in exclude_keywords if kw in tags or kw in title ]
if matched_keywords :
logging . info ( f " Skipping image with unwanted keywords: { photo . id } (tags: { tags } , title: { title } , matched: { matched_keywords } ) " )
return None
img_url = photo . getPhotoFile ( size_label = ' Large ' )
if not img_url :
img_url = photo . getPhotoFile ( size_label = ' Medium ' )
if not img_url or img_url in used_images :
return None
temp_file = None
try :
img_response = requests . get ( img_url , headers = headers , timeout = 10 )
img_response . raise_for_status ( )
with tempfile . NamedTemporaryFile ( delete = False , suffix = ' .jpg ' ) as temp_file :
temp_file . write ( img_response . content )
temp_path = temp_file . name
img = Image . open ( temp_path )
text = pytesseract . image_to_string ( img )
char_count = len ( text . strip ( ) )
logging . info ( f " OCR processed { img_url } : { char_count } characters detected " )
if char_count > 200 :
logging . info ( f " Skipping text-heavy image (OCR): { img_url } (char_count: { char_count } ) " )
return None
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
used_images . add ( img_url )
save_used_images ( )
flickr_data = {
" title " : search_query ,
" image_url " : img_url ,
" source " : " Flickr " ,
" uploader " : uploader ,
" page_url " : page_url ,
" timestamp " : datetime . now ( timezone . utc ) . isoformat ( ) ,
" ocr_chars " : char_count
}
flickr_file = " /home/shane/foodie_automator/flickr_images.json "
with open ( flickr_file , ' a ' ) as f :
json . dump ( flickr_data , f )
f . write ( ' \n ' )
logging . info ( f " Saved Flickr image to { flickr_file } : { img_url } " )
logging . info ( f " Fetched Flickr image: { img_url } by { uploader } for query ' { search_query } ' (tags: { tags } ) " )
return img_url , " Flickr " , uploader , page_url
except requests . exceptions . HTTPError as e :
if e . response . status_code == 429 :
logging . warning ( f " Rate limit hit for { img_url } . Falling back to Pixabay. " )
return None
else :
logging . warning ( f " Download failed for { img_url } : { e } " )
return None
except Exception as e :
logging . warning ( f " OCR processing failed for { img_url } : { e } " )
return None
finally :
if temp_file and os . path . exists ( temp_path ) :
os . unlink ( temp_path )
# Helper function to search DDG and extract Flickr photo IDs
def search_ddg_for_flickr ( query ) :
ddg_query = f " { query } site:flickr.com "
ddg_url = f " https://duckduckgo.com/?q= { quote ( ddg_query ) } "
try :
response = requests . get ( ddg_url , headers = headers , timeout = 10 )
response . raise_for_status ( )
soup = BeautifulSoup ( response . text , ' html.parser ' )
photo_ids = set ( )
# Look for Flickr URLs in the search results
for link in soup . find_all ( ' a ' , href = True ) :
href = link [ ' href ' ]
# Match Flickr photo URLs like https://www.flickr.com/photos/username/1234567890
match = re . search ( r ' flickr \ .com/photos/[^/]+/( \ d+) ' , href )
if match :
photo_id = match . group ( 1 )
photo_ids . add ( photo_id )
logging . info ( f " Found { len ( photo_ids ) } Flickr photo IDs via DDG: { photo_ids } " )
return photo_ids
except Exception as e :
logging . warning ( f " DDG search failed for query ' { ddg_query } ' : { e } " )
return set ( )
# Helper function to classify keywords as specific or generic
def classify_keywords ( keywords ) :
prompt = (
" Given the following keywords from an image search query, classify each as ' specific ' (e.g., brand names, unique entities) or ' generic ' (e.g., common or abstract terms). "
" Return a JSON object mapping each keyword to its classification. \n \n "
" Keywords: " + " , " . join ( keywords ) + " \n \n "
" Example output: \n "
" ```json \n "
" { \n "
" \" Wingstop \" : \" specific \" , \n "
" \" dining \" : \" generic \" \n "
" } \n ``` "
)
try :
response = client . chat . completions . create (
model = LIGHT_TASK_MODEL ,
messages = [
{ " role " : " system " , " content " : " You are a helper that classifies keywords. " } ,
{ " role " : " user " , " content " : prompt }
] ,
max_tokens = 100 ,
temperature = 0.5
)
raw_response = response . choices [ 0 ] . message . content
json_match = re . search ( r ' ```json \ n([ \ s \ S]*?) \ n``` ' , raw_response )
if not json_match :
logging . warning ( f " Failed to parse keyword classification JSON: { raw_response } " )
return { kw : " specific " for kw in keywords }
classifications = json . loads ( json_match . group ( 1 ) )
return classifications
except Exception as e :
logging . warning ( f " Keyword classification failed: { e } . Defaulting to all specific. " )
return { kw : " specific " for kw in keywords }
# Step 1: Search DDG to find Flickr photo IDs
logging . info ( f " Searching DDG with query: ' { search_query } site:flickr.com ' " )
photo_ids = search_ddg_for_flickr ( search_query )
if photo_ids :
for photo_id in photo_ids :
photo = fetch_photo_by_id ( photo_id )
if photo :
result = process_photo ( photo )
if result :
return result
# Step 2: Break down the query into keywords and classify them for direct Flickr API search
keywords = search_query . lower ( ) . split ( )
if len ( keywords ) > 1 :
classifications = classify_keywords ( keywords )
logging . info ( f " Keyword classifications: { classifications } " )
# Prioritize specific keywords
specific_keywords = [ kw for kw , classification in classifications . items ( ) if classification == " specific " ]
if specific_keywords :
for keyword in specific_keywords :
logging . info ( f " Searching Flickr with specific keyword: ' { keyword } ' " )
photos = search_flickr ( keyword )
for photo in photos :
result = process_photo ( photo )
if result :
return result
# Step 3: Final fallback using relevance keywords
fallback_query = " " . join ( relevance_keywords ) if isinstance ( relevance_keywords , list ) else relevance_keywords
logging . info ( f " No results found. Falling back to generic query: ' { fallback_query } ' " )
photos = search_flickr ( fallback_query )
for photo in photos :
result = process_photo ( photo )
if result :
return result
logging . warning ( f " No valid Flickr image found for query ' { search_query } ' after all attempts. " )
return None , None , None , None
def select_best_author ( summary ) :
def select_best_author ( summary ) :
try :
try :
response = client . chat . completions . create (
response = client . chat . completions . create (