From 1fd1ad361b415c94d9918373d1d222c43b780e90 Mon Sep 17 00:00:00 2001 From: Shane Date: Sat, 3 May 2025 15:05:16 +1000 Subject: [PATCH] Removed Redundant process_photo --- foodie_utils.py | 188 +++++------------------------------------------- 1 file changed, 18 insertions(+), 170 deletions(-) diff --git a/foodie_utils.py b/foodie_utils.py index ff0eabc..e942794 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -293,110 +293,21 @@ def get_image(search_query): logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.") return None, None, None, None -def process_photo(photo): - tags = [tag.text.lower() for tag in photo.getTags()] - title = photo.title.lower() if photo.title else "" - - matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] - if matched_keywords: - logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})") - return None - - # Try 'Large' size first, fall back to 'Medium' if unavailable - img_url = None - try: - img_url = photo.getPhotoFile(size_label='Large') - except flickr_api.flickrerrors.FlickrError as e: - logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium") - try: - img_url = photo.getPhotoFile(size_label='Medium') - except flickr_api.flickrerrors.FlickrError as e: - logging.warning(f"Medium size not available for photo {photo.id}: {e}") - return None - - if not img_url or img_url in used_images: - return None +def get_flickr_image(search_query, relevance_keywords): + global last_flickr_request_time, flickr_request_count - uploader = photo.owner.username - page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" + reset_flickr_request_count() + flickr_request_count += 1 + logging.info(f"Flickr request count: {flickr_request_count}/3600") - used_images.add(img_url) - save_used_images() + # Enforce a minimum delay of 10 seconds between Flickr requests + current_time = time.time() + time_since_last_request = current_time - last_flickr_request_time + if time_since_last_request < 10: + time.sleep(10 - time_since_last_request) - flickr_data = { - "title": search_query, - "image_url": img_url, - "source": "Flickr", - "uploader": uploader, - "page_url": page_url, - "timestamp": datetime.now(timezone.utc).isoformat() - } - flickr_file = "/home/shane/foodie_automator/flickr_images.json" - with open(flickr_file, 'a') as f: - json.dump(flickr_data, f) - f.write('\n') - logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") + last_flickr_request_time = time.time() - logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") - return img_url, "Flickr", uploader, page_url - - def search_ddg_for_flickr(query): - ddg_query = f"{query} site:flickr.com" - ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}" - try: - response = requests.get(ddg_url, headers=headers, timeout=10) - response.raise_for_status() - soup = BeautifulSoup(response.text, 'html.parser') - - photo_ids = set() - for link in soup.find_all('a', href=True): - href = link['href'] - match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href) - if match: - photo_id = match.group(1) - photo_ids.add(photo_id) - - photo_ids = list(photo_ids)[:2] # Limit to 2 IDs - logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") - return photo_ids - except Exception as e: - logging.warning(f"DDG search failed for query '{ddg_query}': {e}") - return set() - - def classify_keywords(keywords): - prompt = ( - "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). " - "Return a JSON object mapping each keyword to its classification.\n\n" - "Keywords: " + ", ".join(keywords) + "\n\n" - "Example output format (do not use these exact keywords in your response):\n" - "```json\n" - "{\n" - " \"keyword1\": \"specific\",\n" - " \"keyword2\": \"generic\"\n" - "}\n```" - ) - try: - response = client.chat.completions.create( - model=LIGHT_TASK_MODEL, - messages=[ - {"role": "system", "content": "You are a helper that classifies keywords."}, - {"role": "user", "content": prompt} - ], - max_tokens=100, - temperature=0.5 - ) - raw_response = response.choices[0].message.content - json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response) - if not json_match: - logging.warning(f"Failed to parse keyword classification JSON: {raw_response}") - return {kw: "specific" for kw in keywords} - - classifications = json.loads(json_match.group(1)) - return classifications - except Exception as e: - logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.") - return {kw: "specific" for kw in keywords} - # Step 1: Search DDG to find Flickr photo IDs logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'") photo_ids = search_ddg_for_flickr(search_query) @@ -414,6 +325,7 @@ def process_photo(photo): classifications = classify_keywords(keywords) logging.info(f"Keyword classifications: {classifications}") + # Prioritize specific keywords specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"] if specific_keywords: for keyword in specific_keywords: @@ -424,42 +336,17 @@ def process_photo(photo): if result: return result - # Step 3: Final fallback to a generic food-related query - logging.info(f"No results found. Falling back to generic query: 'food dining'") - photos = search_flickr("food dining") + # Step 3: Final fallback using relevance keywords + fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords + logging.info(f"No results found. Falling back to generic query: '{fallback_query}'") + photos = search_flickr(fallback_query) for photo in photos: result = process_photo(photo) if result: return result - logging.warning(f"No valid Flickr image found in fallback for query '{search_query}'. Trying Pixabay.") - - # Fallback to Pixabay - try: - pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10" - response = requests.get(pixabay_url, timeout=10) - response.raise_for_status() - data = response.json() - - for hit in data.get('hits', []): - img_url = hit.get('webformatURL') - if not img_url or img_url in used_images: - continue - uploader = hit.get('user', 'Unknown') - page_url = hit.get('pageURL', img_url) - - used_images.add(img_url) - save_used_images() - - logging.debug(f"Image selected for query '{search_query}': {img_url}") - return img_url, "Pixabay", uploader, page_url - - logging.warning(f"No valid Pixabay image found for query '{search_query}'.") - return None, None, None, None - - except Exception as e: - logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}") - return None, None, None, None + logging.warning(f"No valid Flickr image found for query '{search_query}' after all attempts.") + return None, None, None, None def generate_image_query(title, summary): try: @@ -1119,45 +1006,6 @@ def get_flickr_image(search_query, relevance_keywords): logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}") return None - # Helper function to process a photo (fetch URL and metadata only) - def process_photo(photo): - tags = [tag.text.lower() for tag in photo.getTags()] - title = photo.title.lower() if photo.title else "" - - matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] - if matched_keywords: - logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})") - return None - - img_url = photo.getPhotoFile(size_label='Large') - if not img_url: - img_url = photo.getPhotoFile(size_label='Medium') - if not img_url or img_url in used_images: - return None - - uploader = photo.owner.username - page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" - - used_images.add(img_url) - save_used_images() - - flickr_data = { - "title": search_query, - "image_url": img_url, - "source": "Flickr", - "uploader": uploader, - "page_url": page_url, - "timestamp": datetime.now(timezone.utc).isoformat() - } - flickr_file = "/home/shane/foodie_automator/flickr_images.json" - with open(flickr_file, 'a') as f: - json.dump(flickr_data, f) - f.write('\n') - logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") - - logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") - return img_url, "Flickr", uploader, page_url - # Helper function to search DDG and extract Flickr photo IDs def search_ddg_for_flickr(query): ddg_query = f"{query} site:flickr.com"