diff --git a/foodie_utils.py b/foodie_utils.py index 73d272d..ec468fe 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -275,40 +275,51 @@ def get_image(search_query): return None def process_photo(photo): - tags = [tag.text.lower() for tag in photo.getTags()] - title = photo.title.lower() if photo.title else "" - - matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] - if matched_keywords: - logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})") - return None - - img_url = photo.getPhotoFile(size_label='Medium') - if not img_url or img_url in used_images: + tags = [tag.text.lower() for tag in photo.getTags()] + title = photo.title.lower() if photo.title else "" + + matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] + if matched_keywords: + logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})") + return None + + # Try 'Large' size first, fall back to 'Medium' if unavailable + img_url = None + try: + img_url = photo.getPhotoFile(size_label='Large') + except flickr_api.flickrerrors.FlickrError as e: + logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium") + try: + img_url = photo.getPhotoFile(size_label='Medium') + except flickr_api.flickrerrors.FlickrError as e: + logging.warning(f"Medium size not available for photo {photo.id}: {e}") return None - - uploader = photo.owner.username - page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" - - used_images.add(img_url) - save_used_images() - - flickr_data = { - "title": search_query, - "image_url": img_url, - "source": "Flickr", - "uploader": uploader, - "page_url": page_url, - "timestamp": datetime.now(timezone.utc).isoformat() - } - flickr_file = "/home/shane/foodie_automator/flickr_images.json" - with open(flickr_file, 'a') as f: - json.dump(flickr_data, f) - f.write('\n') - logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") - - logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") - return img_url, "Flickr", uploader, page_url + + if not img_url or img_url in used_images: + return None + + uploader = photo.owner.username + page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" + + used_images.add(img_url) + save_used_images() + + flickr_data = { + "title": search_query, + "image_url": img_url, + "source": "Flickr", + "uploader": uploader, + "page_url": page_url, + "timestamp": datetime.now(timezone.utc).isoformat() + } + flickr_file = "/home/shane/foodie_automator/flickr_images.json" + with open(flickr_file, 'a') as f: + json.dump(flickr_data, f) + f.write('\n') + logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") + + logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") + return img_url, "Flickr", uploader, page_url def search_ddg_for_flickr(query): ddg_query = f"{query} site:flickr.com" @@ -1010,24 +1021,19 @@ if os.path.exists(used_images_file): else: data = json.loads(content) if not isinstance(data, list): - logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.") - data = [] - else: - # Handle malformed format (list of lists or invalid entries) - flat_data = [] - for item in data: - if isinstance(item, str) and item.startswith('https://'): - flat_data.append(item) - elif isinstance(item, list): - logging.warning(f"Fixing malformed entry in {used_images_file}: {item}") - flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')]) - else: - logging.warning(f"Skipping invalid entry in {used_images_file}: {item}") - data = flat_data + logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.") + if isinstance(data, dict): + # If it's a dict, try to extract URLs from values + data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')] + else: + logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.") + data = [] + # Filter out non-string or non-URL entries + data = [item for item in data if isinstance(item, str) and item.startswith('https://')] used_images.update(data) logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}") except Exception as e: - logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.") + logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.") used_images = set() with open(used_images_file, 'w') as f: json.dump([], f) @@ -1035,17 +1041,14 @@ if os.path.exists(used_images_file): # Function to save used_images to file def save_used_images(): try: + # Ensure used_images contains only valid URLs + valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')] + if len(valid_urls) != len(used_images): + logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set") + with open(used_images_file, 'w') as f: - f.write('[\n') - urls = list(used_images) - for i, url in enumerate(urls): - f.write(f'"{url}"') - if i < len(urls) - 1: - f.write(',\n') - else: - f.write('\n') - f.write(']') - logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}") + json.dump(valid_urls, f, indent=2) + logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}") except Exception as e: logging.warning(f"Failed to save used images to {used_images_file}: {e}")