@ -278,6 +278,10 @@ def get_image(search_query):
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
# Add the image URL to used_images
used_images . add ( img_url )
save_used_images ( )
# Save Flickr image metadata
flickr_data = {
" title " : search_query ,
@ -329,6 +333,11 @@ def get_image(search_query):
continue
uploader = hit . get ( ' user ' , ' Unknown ' )
page_url = hit . get ( ' pageURL ' , img_url )
# Add the image URL to used_images
used_images . add ( img_url )
save_used_images ( )
logging . debug ( f " Image selected for query ' { search_query } ' : { img_url } " )
return img_url , " Pixabay " , uploader , page_url
@ -340,50 +349,48 @@ def get_image(search_query):
return None , None , None , None
def generate_image_query ( content ) :
prompt = (
" Given the following content, generate a concise image search query (max 5 words) that would likely yield relevant, visually appealing images on platforms like Flickr or Pixabay. Focus on concrete, visual concepts related to food, dining, or restaurants, avoiding overly abstract terms. Also provide relevance keywords (max 5 words) to filter results. Return the result as a JSON object with ' search ' and ' relevance ' keys. \n \n "
" Content: \n "
f " { content } \n \n "
" Example output: \n "
" ```json \n "
" { \n "
" \" search \" : \" modern dining trends \" , \n "
" \" relevance \" : \" dining habits restaurant trends \" \n "
" } \n ``` "
)
try :
response = client . chat . completions . create (
model = LIGHT_TASK_MODEL ,
messages = [
{ " role " : " system " , " content " : (
" From this content (title and summary), generate two sets of 2-3 concise keywords for an image search about restaurant/food industry trends: \n "
" 1. Search keywords: For finding images (e.g., ' AI restaurant technology ' ). Focus on key themes like technology, sustainability, dining, or specific food concepts. \n "
" 2. Relevance keywords: For filtering relevant images (e.g., ' ai tech dining ' ). Focus on core concepts to ensure match. \n "
" Avoid vague terms like ' trends ' , ' future ' , or unrelated words like ' dog ' , ' family ' . "
" Return as JSON: { ' search ' : ' keyword1 keyword2 ' , ' relevance ' : ' keyword3 keyword4 ' } "
) } ,
{ " role " : " user " , " content " : content }
{ " role " : " system " , " content " : " You are a helpful assistant that generates concise image search queries. " } ,
{ " role " : " user " , " content " : prompt }
] ,
max_tokens = 100
max_tokens = 100 ,
temperature = 0.5
)
raw_result = response . choices [ 0 ] . message . content . strip ( )
logging . info ( f " Raw GPT image query response: ' { raw_result } ' " )
print ( f " DEBUG: Raw GPT image query response: ' { raw_result } ' " )
cleaned_result = re . sub ( r ' ```json \ s*| \ s*``` ' , ' ' , raw_result ) . strip ( )
result = json . loads ( cleaned_result )
if not isinstance ( result , dict ) or " search " not in result or " relevance " not in result or len ( result [ " search " ] . split ( ) ) < 2 :
logging . warning ( f " Invalid image query format: { result } , using fallback " )
words = re . findall ( r ' \ w+ ' , content . lower ( ) )
filtered_words = [ w for w in words if w not in RECIPE_KEYWORDS + PROMO_KEYWORDS + [ ' trends ' , ' future ' , ' dog ' , ' family ' ] ]
search = " " . join ( filtered_words [ : 3 ] ) or " restaurant innovation "
relevance = filtered_words [ 3 : 6 ] or [ " dining " , " tech " ]
result = { " search " : search , " relevance " : " " . join ( relevance ) }
raw_response = response . choices [ 0 ] . message . content
logging . debug ( f " Raw GPT image query response: ' { raw_response } ' " )
# Extract JSON from the response
json_match = re . search ( r ' ```json \ n([ \ s \ S]*?) \ n``` ' , raw_response )
if not json_match :
logging . warning ( f " Failed to parse image query JSON from GPT response: { raw_response } " )
return " restaurant dining " , " dining trends "
query_data = json . loads ( json_match . group ( 1 ) )
search_query = query_data . get ( " search " , " restaurant dining " )
relevance_keywords = query_data . get ( " relevance " , " dining trends " )
logging . debug ( f " Image query from content: { query_data } " )
return search_query , relevance_keywords
logging . info ( f " Generated image query: { result } " )
print ( f " DEBUG: Image query from content: { result } " )
return result [ " search " ] , result [ " relevance " ] . split ( )
except json . JSONDecodeError as e :
logging . error ( f " JSON parsing failed for image query: { e } , raw response: ' { raw_result } ' " )
words = re . findall ( r ' \ w+ ' , content . lower ( ) )
filtered_words = [ w for w in words if w not in RECIPE_KEYWORDS + PROMO_KEYWORDS + [ ' trends ' , ' future ' , ' dog ' , ' family ' ] ]
search = " " . join ( filtered_words [ : 3 ] ) or " restaurant innovation "
relevance = filtered_words [ 3 : 6 ] or [ " dining " , " tech " ]
logging . info ( f " Fallback image query: {{ ' search ' : ' { search } ' , ' relevance ' : ' { ' ' . join ( relevance ) } ' }} " )
return search , relevance
except Exception as e :
logging . error ( f " Image query generation failed: { e } " )
print ( f " Image Query Error: { e } " )
return None , None
logging . warning ( f " Failed to generate image query: { e } . Using fallback. " )
return " restaurant dining " , " dining trends "
def smart_image_and_filter ( title , summary ) :
try :
@ -877,6 +884,29 @@ exclude_keywords = [
" design " , " advertisement " , " illustration " , " diagram " , " layout " , " print "
]
# Initialize used_images as a set to track used image URLs
used_images_file = " /home/shane/foodie_automator/used_images.json "
used_images = set ( )
# Load used images from file if it exists
if os . path . exists ( used_images_file ) :
try :
with open ( used_images_file , ' r ' ) as f :
data = json . load ( f )
used_images . update ( data )
logging . info ( f " Loaded { len ( used_images ) } used image URLs from { used_images_file } " )
except Exception as e :
logging . warning ( f " Failed to load used images from { used_images_file } : { e } " )
# Function to save used_images to file
def save_used_images ( ) :
try :
with open ( used_images_file , ' w ' ) as f :
json . dump ( list ( used_images ) , f )
logging . info ( f " Saved { len ( used_images ) } used image URLs to { used_images_file } " )
except Exception as e :
logging . warning ( f " Failed to save used images to { used_images_file } : { e } " )
def reset_flickr_request_count ( ) :
global flickr_request_count , flickr_request_start_time
if time . time ( ) - flickr_request_start_time > = 3600 : # Reset every hour
@ -951,6 +981,10 @@ def get_flickr_image(search_query, relevance_keywords):
uploader = photo . owner . username
page_url = f " https://www.flickr.com/photos/ { photo . owner . nsid } / { photo . id } "
# Add the image URL to used_images
used_images . add ( img_url )
save_used_images ( )
# Save Flickr image metadata
flickr_data = {
" title " : search_query ,