diff --git a/foodie_utils.py b/foodie_utils.py index d2faf3c..6a8629d 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -278,6 +278,10 @@ def get_image(search_query): uploader = photo.owner.username page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" + # Add the image URL to used_images + used_images.add(img_url) + save_used_images() + # Save Flickr image metadata flickr_data = { "title": search_query, @@ -329,6 +333,11 @@ def get_image(search_query): continue uploader = hit.get('user', 'Unknown') page_url = hit.get('pageURL', img_url) + + # Add the image URL to used_images + used_images.add(img_url) + save_used_images() + logging.debug(f"Image selected for query '{search_query}': {img_url}") return img_url, "Pixabay", uploader, page_url @@ -340,50 +349,48 @@ def get_image(search_query): return None, None, None, None def generate_image_query(content): + prompt = ( + "Given the following content, generate a concise image search query (max 5 words) that would likely yield relevant, visually appealing images on platforms like Flickr or Pixabay. Focus on concrete, visual concepts related to food, dining, or restaurants, avoiding overly abstract terms. Also provide relevance keywords (max 5 words) to filter results. Return the result as a JSON object with 'search' and 'relevance' keys.\n\n" + "Content:\n" + f"{content}\n\n" + "Example output:\n" + "```json\n" + "{\n" + " \"search\": \"modern dining trends\",\n" + " \"relevance\": \"dining habits restaurant trends\"\n" + "}\n```" + ) + try: response = client.chat.completions.create( model=LIGHT_TASK_MODEL, messages=[ - {"role": "system", "content": ( - "From this content (title and summary), generate two sets of 2-3 concise keywords for an image search about restaurant/food industry trends:\n" - "1. Search keywords: For finding images (e.g., 'AI restaurant technology'). Focus on key themes like technology, sustainability, dining, or specific food concepts.\n" - "2. Relevance keywords: For filtering relevant images (e.g., 'ai tech dining'). Focus on core concepts to ensure match.\n" - "Avoid vague terms like 'trends', 'future', or unrelated words like 'dog', 'family'. " - "Return as JSON: {'search': 'keyword1 keyword2', 'relevance': 'keyword3 keyword4'}" - )}, - {"role": "user", "content": content} + {"role": "system", "content": "You are a helpful assistant that generates concise image search queries."}, + {"role": "user", "content": prompt} ], - max_tokens=100 + max_tokens=100, + temperature=0.5 ) - raw_result = response.choices[0].message.content.strip() - logging.info(f"Raw GPT image query response: '{raw_result}'") - print(f"DEBUG: Raw GPT image query response: '{raw_result}'") - cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() - result = json.loads(cleaned_result) - if not isinstance(result, dict) or "search" not in result or "relevance" not in result or len(result["search"].split()) < 2: - logging.warning(f"Invalid image query format: {result}, using fallback") - words = re.findall(r'\w+', content.lower()) - filtered_words = [w for w in words if w not in RECIPE_KEYWORDS + PROMO_KEYWORDS + ['trends', 'future', 'dog', 'family']] - search = " ".join(filtered_words[:3]) or "restaurant innovation" - relevance = filtered_words[3:6] or ["dining", "tech"] - result = {"search": search, "relevance": " ".join(relevance)} + raw_response = response.choices[0].message.content + logging.debug(f"Raw GPT image query response: '{raw_response}'") + + # Extract JSON from the response + json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response) + if not json_match: + logging.warning(f"Failed to parse image query JSON from GPT response: {raw_response}") + return "restaurant dining", "dining trends" + + query_data = json.loads(json_match.group(1)) + search_query = query_data.get("search", "restaurant dining") + relevance_keywords = query_data.get("relevance", "dining trends") + + logging.debug(f"Image query from content: {query_data}") + return search_query, relevance_keywords - logging.info(f"Generated image query: {result}") - print(f"DEBUG: Image query from content: {result}") - return result["search"], result["relevance"].split() - except json.JSONDecodeError as e: - logging.error(f"JSON parsing failed for image query: {e}, raw response: '{raw_result}'") - words = re.findall(r'\w+', content.lower()) - filtered_words = [w for w in words if w not in RECIPE_KEYWORDS + PROMO_KEYWORDS + ['trends', 'future', 'dog', 'family']] - search = " ".join(filtered_words[:3]) or "restaurant innovation" - relevance = filtered_words[3:6] or ["dining", "tech"] - logging.info(f"Fallback image query: {{'search': '{search}', 'relevance': '{' '.join(relevance)}'}}") - return search, relevance except Exception as e: - logging.error(f"Image query generation failed: {e}") - print(f"Image Query Error: {e}") - return None, None + logging.warning(f"Failed to generate image query: {e}. Using fallback.") + return "restaurant dining", "dining trends" def smart_image_and_filter(title, summary): try: @@ -877,6 +884,29 @@ exclude_keywords = [ "design", "advertisement", "illustration", "diagram", "layout", "print" ] +# Initialize used_images as a set to track used image URLs +used_images_file = "/home/shane/foodie_automator/used_images.json" +used_images = set() + +# Load used images from file if it exists +if os.path.exists(used_images_file): + try: + with open(used_images_file, 'r') as f: + data = json.load(f) + used_images.update(data) + logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}") + except Exception as e: + logging.warning(f"Failed to load used images from {used_images_file}: {e}") + +# Function to save used_images to file +def save_used_images(): + try: + with open(used_images_file, 'w') as f: + json.dump(list(used_images), f) + logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}") + except Exception as e: + logging.warning(f"Failed to save used images to {used_images_file}: {e}") + def reset_flickr_request_count(): global flickr_request_count, flickr_request_start_time if time.time() - flickr_request_start_time >= 3600: # Reset every hour @@ -951,6 +981,10 @@ def get_flickr_image(search_query, relevance_keywords): uploader = photo.owner.username page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" + # Add the image URL to used_images + used_images.add(img_url) + save_used_images() + # Save Flickr image metadata flickr_data = { "title": search_query,