diff --git a/foodie_utils.py b/foodie_utils.py index 61e30a8..252e602 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -293,61 +293,6 @@ def get_image(search_query): logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.") return None, None, None, None -def get_flickr_image(search_query, relevance_keywords): - global last_flickr_request_time, flickr_request_count - - reset_flickr_request_count() - flickr_request_count += 1 - logging.info(f"Flickr request count: {flickr_request_count}/3600") - - # Enforce a minimum delay of 10 seconds between Flickr requests - current_time = time.time() - time_since_last_request = current_time - last_flickr_request_time - if time_since_last_request < 10: - time.sleep(10 - time_since_last_request) - - last_flickr_request_time = time.time() - - # Step 1: Search DDG to find Flickr photo IDs - logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'") - photo_ids = search_ddg_for_flickr(search_query) - if photo_ids: - for photo_id in photo_ids: - photo = fetch_photo_by_id(photo_id) - if photo: - result = process_photo(photo) - if result: - return result - - # Step 2: Break down the query into keywords and classify them for direct Flickr API search - keywords = search_query.lower().split() - if len(keywords) > 1: - classifications = classify_keywords(keywords) - logging.info(f"Keyword classifications: {classifications}") - - # Prioritize specific keywords - specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"] - if specific_keywords: - for keyword in specific_keywords: - logging.info(f"Searching Flickr with specific keyword: '{keyword}'") - photos = search_flickr(keyword) - for photo in photos: - result = process_photo(photo) - if result: - return result - - # Step 3: Final fallback using relevance keywords - fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords - logging.info(f"No results found. Falling back to generic query: '{fallback_query}'") - photos = search_flickr(fallback_query) - for photo in photos: - result = process_photo(photo) - if result: - return result - - logging.warning(f"No valid Flickr image found for query '{search_query}' after all attempts.") - return None, None, None, None - def generate_image_query(title, summary): try: prompt = ( @@ -1012,6 +957,86 @@ def process_photo(photo, search_query): logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") return img_url, "Flickr", uploader, page_url +def search_flickr(query, per_page=5): + try: + photos = flickr_api.Photo.search( + text=query, + per_page=per_page, + sort='relevance', + safe_search=1, + media='photos', + license='4,5,9,10' + ) + return photos + except Exception as e: + logging.warning(f"Flickr API error for query '{query}': {e}") + return [] + +def fetch_photo_by_id(photo_id): + try: + photo = flickr_api.Photo(id=photo_id) + return photo + except Exception as e: + logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}") + return None + +def search_ddg_for_flickr(query): + ddg_query = f"{query} site:flickr.com" + ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}" + try: + response = requests.get(ddg_url, headers={'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}, timeout=10) + response.raise_for_status() + soup = BeautifulSoup(response.text, 'html.parser') + + photo_ids = set() + for link in soup.find_all('a', href=True): + href = link['href'] + match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href) + if match: + photo_id = match.group(1) + photo_ids.add(photo_id) + + photo_ids = list(photo_ids)[:2] # Limit to 2 IDs + logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") + return photo_ids + except Exception as e: + logging.warning(f"DDG search failed for query '{ddg_query}': {e}") + return set() + +def classify_keywords(keywords): + prompt = ( + "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). " + "Return a JSON object mapping each keyword to its classification.\n\n" + "Keywords: " + ", ".join(keywords) + "\n\n" + "Example output format (do not use these exact keywords in your response):\n" + "```json\n" + "{\n" + " \"keyword1\": \"specific\",\n" + " \"keyword2\": \"generic\"\n" + "}\n```" + ) + try: + response = client.chat.completions.create( + model=LIGHT_TASK_MODEL, + messages=[ + {"role": "system", "content": "You are a helper that classifies keywords."}, + {"role": "user", "content": prompt} + ], + max_tokens=100, + temperature=0.5 + ) + raw_response = response.choices[0].message.content + json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response) + if not json_match: + logging.warning(f"Failed to parse keyword classification JSON: {raw_response}") + return {kw: "specific" for kw in keywords} + + classifications = json.loads(json_match.group(1)) + return classifications + except Exception as e: + logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.") + return {kw: "specific" for kw in keywords} + def get_flickr_image(search_query, relevance_keywords): global last_flickr_request_time, flickr_request_count