fix flickr image large issue

9 months ago · c936555741
parent cdc54f3f14
commit c936555741
1 changed files with 61 additions and 58 deletions
--- a/foodie_utils.py
+++ b/foodie_utils.py
@ -275,40 +275,51 @@ def get_image(search_query):
            return None

    def process_photo(photo):
-        tags = [tag.text.lower() for tag in photo.getTags()]
-        title = photo.title.lower() if photo.title else ""
-        
-        matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
-        if matched_keywords:
-            logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
-            return None
-        
-        img_url = photo.getPhotoFile(size_label='Medium')
-        if not img_url or img_url in used_images:
+    tags = [tag.text.lower() for tag in photo.getTags()]
+    title = photo.title.lower() if photo.title else ""
+    
+    matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
+    if matched_keywords:
+        logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
+        return None
+    
+    # Try 'Large' size first, fall back to 'Medium' if unavailable
+    img_url = None
+    try:
+        img_url = photo.getPhotoFile(size_label='Large')
+    except flickr_api.flickrerrors.FlickrError as e:
+        logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium")
+        try:
+            img_url = photo.getPhotoFile(size_label='Medium')
+        except flickr_api.flickrerrors.FlickrError as e:
+            logging.warning(f"Medium size not available for photo {photo.id}: {e}")
            return None
-        
-        uploader = photo.owner.username
-        page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
-        
-        used_images.add(img_url)
-        save_used_images()
-        
-        flickr_data = {
-            "title": search_query,
-            "image_url": img_url,
-            "source": "Flickr",
-            "uploader": uploader,
-            "page_url": page_url,
-            "timestamp": datetime.now(timezone.utc).isoformat()
-        }
-        flickr_file = "/home/shane/foodie_automator/flickr_images.json"
-        with open(flickr_file, 'a') as f:
-            json.dump(flickr_data, f)
-            f.write('\n')
-        logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
-        
-        logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
-        return img_url, "Flickr", uploader, page_url
+    
+    if not img_url or img_url in used_images:
+        return None
+    
+    uploader = photo.owner.username
+    page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
+    
+    used_images.add(img_url)
+    save_used_images()
+    
+    flickr_data = {
+        "title": search_query,
+        "image_url": img_url,
+        "source": "Flickr",
+        "uploader": uploader,
+        "page_url": page_url,
+        "timestamp": datetime.now(timezone.utc).isoformat()
+    }
+    flickr_file = "/home/shane/foodie_automator/flickr_images.json"
+    with open(flickr_file, 'a') as f:
+        json.dump(flickr_data, f)
+        f.write('\n')
+    logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
+    
+    logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
+    return img_url, "Flickr", uploader, page_url

    def search_ddg_for_flickr(query):
        ddg_query = f"{query} site:flickr.com"
@ -1010,24 +1021,19 @@ if os.path.exists(used_images_file):
            else:
                data = json.loads(content)
                if not isinstance(data, list):
-                    logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
-                    data = []
-                else:
-                    # Handle malformed format (list of lists or invalid entries)
-                    flat_data = []
-                    for item in data:
-                        if isinstance(item, str) and item.startswith('https://'):
-                            flat_data.append(item)
-                        elif isinstance(item, list):
-                            logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
-                            flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
-                        else:
-                            logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
-                    data = flat_data
+                    logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.")
+                    if isinstance(data, dict):
+                        # If it's a dict, try to extract URLs from values
+                        data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')]
+                    else:
+                        logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.")
+                        data = []
+                # Filter out non-string or non-URL entries
+                data = [item for item in data if isinstance(item, str) and item.startswith('https://')]
            used_images.update(data)
        logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
    except Exception as e:
-        logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
+        logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
        used_images = set()
        with open(used_images_file, 'w') as f:
            json.dump([], f)
@ -1035,17 +1041,14 @@ if os.path.exists(used_images_file):
 # Function to save used_images to file
 def save_used_images():
    try:
+        # Ensure used_images contains only valid URLs
+        valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')]
+        if len(valid_urls) != len(used_images):
+            logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set")
+        
        with open(used_images_file, 'w') as f:
-            f.write('[\n')
-            urls = list(used_images)
-            for i, url in enumerate(urls):
-                f.write(f'"{url}"')
-                if i < len(urls) - 1:
-                    f.write(',\n')
-                else:
-                    f.write('\n')
-            f.write(']')
-        logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}")
+            json.dump(valid_urls, f, indent=2)
+        logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}")
    except Exception as e:
        logging.warning(f"Failed to save used images to {used_images_file}: {e}")