|
|
|
|
@ -275,40 +275,51 @@ def get_image(search_query): |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
def process_photo(photo): |
|
|
|
|
tags = [tag.text.lower() for tag in photo.getTags()] |
|
|
|
|
title = photo.title.lower() if photo.title else "" |
|
|
|
|
|
|
|
|
|
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] |
|
|
|
|
if matched_keywords: |
|
|
|
|
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
img_url = photo.getPhotoFile(size_label='Medium') |
|
|
|
|
if not img_url or img_url in used_images: |
|
|
|
|
tags = [tag.text.lower() for tag in photo.getTags()] |
|
|
|
|
title = photo.title.lower() if photo.title else "" |
|
|
|
|
|
|
|
|
|
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title] |
|
|
|
|
if matched_keywords: |
|
|
|
|
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
# Try 'Large' size first, fall back to 'Medium' if unavailable |
|
|
|
|
img_url = None |
|
|
|
|
try: |
|
|
|
|
img_url = photo.getPhotoFile(size_label='Large') |
|
|
|
|
except flickr_api.flickrerrors.FlickrError as e: |
|
|
|
|
logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium") |
|
|
|
|
try: |
|
|
|
|
img_url = photo.getPhotoFile(size_label='Medium') |
|
|
|
|
except flickr_api.flickrerrors.FlickrError as e: |
|
|
|
|
logging.warning(f"Medium size not available for photo {photo.id}: {e}") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
uploader = photo.owner.username |
|
|
|
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" |
|
|
|
|
|
|
|
|
|
used_images.add(img_url) |
|
|
|
|
save_used_images() |
|
|
|
|
|
|
|
|
|
flickr_data = { |
|
|
|
|
"title": search_query, |
|
|
|
|
"image_url": img_url, |
|
|
|
|
"source": "Flickr", |
|
|
|
|
"uploader": uploader, |
|
|
|
|
"page_url": page_url, |
|
|
|
|
"timestamp": datetime.now(timezone.utc).isoformat() |
|
|
|
|
} |
|
|
|
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json" |
|
|
|
|
with open(flickr_file, 'a') as f: |
|
|
|
|
json.dump(flickr_data, f) |
|
|
|
|
f.write('\n') |
|
|
|
|
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") |
|
|
|
|
|
|
|
|
|
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") |
|
|
|
|
return img_url, "Flickr", uploader, page_url |
|
|
|
|
|
|
|
|
|
if not img_url or img_url in used_images: |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
uploader = photo.owner.username |
|
|
|
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" |
|
|
|
|
|
|
|
|
|
used_images.add(img_url) |
|
|
|
|
save_used_images() |
|
|
|
|
|
|
|
|
|
flickr_data = { |
|
|
|
|
"title": search_query, |
|
|
|
|
"image_url": img_url, |
|
|
|
|
"source": "Flickr", |
|
|
|
|
"uploader": uploader, |
|
|
|
|
"page_url": page_url, |
|
|
|
|
"timestamp": datetime.now(timezone.utc).isoformat() |
|
|
|
|
} |
|
|
|
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json" |
|
|
|
|
with open(flickr_file, 'a') as f: |
|
|
|
|
json.dump(flickr_data, f) |
|
|
|
|
f.write('\n') |
|
|
|
|
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") |
|
|
|
|
|
|
|
|
|
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") |
|
|
|
|
return img_url, "Flickr", uploader, page_url |
|
|
|
|
|
|
|
|
|
def search_ddg_for_flickr(query): |
|
|
|
|
ddg_query = f"{query} site:flickr.com" |
|
|
|
|
@ -1010,24 +1021,19 @@ if os.path.exists(used_images_file): |
|
|
|
|
else: |
|
|
|
|
data = json.loads(content) |
|
|
|
|
if not isinstance(data, list): |
|
|
|
|
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.") |
|
|
|
|
data = [] |
|
|
|
|
else: |
|
|
|
|
# Handle malformed format (list of lists or invalid entries) |
|
|
|
|
flat_data = [] |
|
|
|
|
for item in data: |
|
|
|
|
if isinstance(item, str) and item.startswith('https://'): |
|
|
|
|
flat_data.append(item) |
|
|
|
|
elif isinstance(item, list): |
|
|
|
|
logging.warning(f"Fixing malformed entry in {used_images_file}: {item}") |
|
|
|
|
flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')]) |
|
|
|
|
else: |
|
|
|
|
logging.warning(f"Skipping invalid entry in {used_images_file}: {item}") |
|
|
|
|
data = flat_data |
|
|
|
|
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.") |
|
|
|
|
if isinstance(data, dict): |
|
|
|
|
# If it's a dict, try to extract URLs from values |
|
|
|
|
data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')] |
|
|
|
|
else: |
|
|
|
|
logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.") |
|
|
|
|
data = [] |
|
|
|
|
# Filter out non-string or non-URL entries |
|
|
|
|
data = [item for item in data if isinstance(item, str) and item.startswith('https://')] |
|
|
|
|
used_images.update(data) |
|
|
|
|
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.") |
|
|
|
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.") |
|
|
|
|
used_images = set() |
|
|
|
|
with open(used_images_file, 'w') as f: |
|
|
|
|
json.dump([], f) |
|
|
|
|
@ -1035,17 +1041,14 @@ if os.path.exists(used_images_file): |
|
|
|
|
# Function to save used_images to file |
|
|
|
|
def save_used_images(): |
|
|
|
|
try: |
|
|
|
|
# Ensure used_images contains only valid URLs |
|
|
|
|
valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')] |
|
|
|
|
if len(valid_urls) != len(used_images): |
|
|
|
|
logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set") |
|
|
|
|
|
|
|
|
|
with open(used_images_file, 'w') as f: |
|
|
|
|
f.write('[\n') |
|
|
|
|
urls = list(used_images) |
|
|
|
|
for i, url in enumerate(urls): |
|
|
|
|
f.write(f'"{url}"') |
|
|
|
|
if i < len(urls) - 1: |
|
|
|
|
f.write(',\n') |
|
|
|
|
else: |
|
|
|
|
f.write('\n') |
|
|
|
|
f.write(']') |
|
|
|
|
logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}") |
|
|
|
|
json.dump(valid_urls, f, indent=2) |
|
|
|
|
logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.warning(f"Failed to save used images to {used_images_file}: {e}") |
|
|
|
|
|
|
|
|
|
|