fix

9 months ago · 6d945dae67
parent 1fd1ad361b
commit 6d945dae67
1 changed files with 51 additions and 89 deletions
--- a/foodie_utils.py
+++ b/foodie_utils.py
@ -964,6 +964,54 @@ def reset_flickr_request_count():
        flickr_request_count = 0
        flickr_request_start_time = time.time()

+def process_photo(photo, search_query):
+    tags = [tag.text.lower() for tag in photo.getTags()]
+    title = photo.title.lower() if photo.title else ""
+    
+    matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
+    if matched_keywords:
+        logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
+        return None
+    
+    # Try 'Large' size first, fall back to 'Medium' if unavailable
+    img_url = None
+    try:
+        img_url = photo.getPhotoFile(size_label='Large')
+    except flickr_api.flickrerrors.FlickrError as e:
+        logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium")
+        try:
+            img_url = photo.getPhotoFile(size_label='Medium')
+        except flickr_api.flickrerrors.FlickrError as e:
+            logging.warning(f"Medium size not available for photo {photo.id}: {e}")
+            return None
+    
+    if not img_url or img_url in used_images:
+        logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
+        return None
+    
+    uploader = photo.owner.username
+    page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
+    
+    used_images.add(img_url)
+    save_used_images()
+    
+    flickr_data = {
+        "title": search_query,
+        "image_url": img_url,
+        "source": "Flickr",
+        "uploader": uploader,
+        "page_url": page_url,
+        "timestamp": datetime.now(timezone.utc).isoformat()
+    }
+    flickr_file = "/home/shane/foodie_automator/flickr_images.json"
+    with open(flickr_file, 'a') as f:
+        json.dump(flickr_data, f)
+        f.write('\n')
+    logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
+    
+    logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
+    return img_url, "Flickr", uploader, page_url
+
 def get_flickr_image(search_query, relevance_keywords):
    global last_flickr_request_time, flickr_request_count
    
@ -979,92 +1027,6 @@ def get_flickr_image(search_query, relevance_keywords):
    
    last_flickr_request_time = time.time()
    
-    headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
-    
-    # Helper function to search Flickr with a given query
-    def search_flickr(query, per_page=5):  # Reduced per_page to limit results
-        try:
-            photos = flickr_api.Photo.search(
-                text=query,
-                per_page=per_page,
-                sort='relevance',
-                safe_search=1,
-                media='photos',
-                license='4,5,9,10'
-            )
-            return photos
-        except Exception as e:
-            logging.warning(f"Flickr API error for query '{query}': {e}")
-            return []
-
-    # Helper function to fetch a Flickr photo by ID
-    def fetch_photo_by_id(photo_id):
-        try:
-            photo = flickr_api.Photo(id=photo_id)
-            return photo
-        except Exception as e:
-            logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
-            return None
-
-    # Helper function to search DDG and extract Flickr photo IDs
-    def search_ddg_for_flickr(query):
-        ddg_query = f"{query} site:flickr.com"
-        ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
-        try:
-            response = requests.get(ddg_url, headers=headers, timeout=10)
-            response.raise_for_status()
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            photo_ids = set()
-            for link in soup.find_all('a', href=True):
-                href = link['href']
-                match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
-                if match:
-                    photo_id = match.group(1)
-                    photo_ids.add(photo_id)
-            
-            photo_ids = list(photo_ids)[:2]  # Limit to 2 IDs
-            logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
-            return photo_ids
-        except Exception as e:
-            logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
-            return set()
-
-    # Helper function to classify keywords as specific or generic
-    def classify_keywords(keywords):
-        prompt = (
-            "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
-            "Return a JSON object mapping each keyword to its classification.\n\n"
-            "Keywords: " + ", ".join(keywords) + "\n\n"
-            "Example output format (do not use these exact keywords in your response):\n"
-            "```json\n"
-            "{\n"
-            "  \"keyword1\": \"specific\",\n"
-            "  \"keyword2\": \"generic\"\n"
-            "}\n```"
-        )
-        try:
-            response = client.chat.completions.create(
-                model=LIGHT_TASK_MODEL,
-                messages=[
-                    {"role": "system", "content": "You are a helper that classifies keywords."},
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=100,
-                temperature=0.5
-            )
-            raw_response = response.choices[0].message.content
-            json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
-            if not json_match:
-                logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
-                return {kw: "specific" for kw in keywords}
-            
-            classifications = json.loads(json_match.group(1))
-            return classifications
-        except Exception as e:
-            logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
-            return {kw: "specific" for kw in keywords}
-
    # Step 1: Search DDG to find Flickr photo IDs
    logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
    photo_ids = search_ddg_for_flickr(search_query)
@ -1072,7 +1034,7 @@ def get_flickr_image(search_query, relevance_keywords):
        for photo_id in photo_ids:
            photo = fetch_photo_by_id(photo_id)
            if photo:
-                result = process_photo(photo)
+                result = process_photo(photo, search_query)
                if result:
                    return result

@ -1089,7 +1051,7 @@ def get_flickr_image(search_query, relevance_keywords):
                logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
                photos = search_flickr(keyword)
                for photo in photos:
-                    result = process_photo(photo)
+                    result = process_photo(photo, search_query)
                    if result:
                        return result

@ -1098,7 +1060,7 @@ def get_flickr_image(search_query, relevance_keywords):
    logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
    photos = search_flickr(fallback_query)
    for photo in photos:
-        result = process_photo(photo)
+        result = process_photo(photo, search_query)
        if result:
            return result