use flickr API
This commit is contained in:
@@ -27,7 +27,7 @@ from foodie_utils import (
|
|||||||
upload_image_to_wp, select_best_persona, determine_paragraph_count,
|
upload_image_to_wp, select_best_persona, determine_paragraph_count,
|
||||||
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
||||||
generate_category_from_summary, post_to_wp, prepare_post_data,
|
generate_category_from_summary, post_to_wp, prepare_post_data,
|
||||||
smart_image_and_filter, insert_link_naturally, get_flickr_image_via_ddg
|
smart_image_and_filter, insert_link_naturally, get_flickr_image # Updated function name
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, select_best_cta
|
from foodie_hooks import get_dynamic_hook, select_best_cta
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -259,7 +259,7 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Fetch image
|
# Fetch image
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image_via_ddg(image_query, relevance_keywords)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from urllib.parse import quote
|
|||||||
from requests.packages.urllib3.util.retry import Retry
|
from requests.packages.urllib3.util.retry import Retry
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
import praw
|
import praw
|
||||||
|
from dotenv import load_dotenv
|
||||||
from foodie_config import (
|
from foodie_config import (
|
||||||
AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
|
AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS,
|
||||||
PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name,
|
PERSONA_CONFIGS, CATEGORIES, CTAS, get_clean_source_name,
|
||||||
@@ -25,10 +26,12 @@ from foodie_utils import (
|
|||||||
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
||||||
summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
|
summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
|
||||||
prepare_post_data, select_best_author, smart_image_and_filter,
|
prepare_post_data, select_best_author, smart_image_and_filter,
|
||||||
get_flickr_image_via_ddg
|
get_flickr_image # Updated function name
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, select_best_cta
|
from foodie_hooks import get_dynamic_hook, select_best_cta
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
# Flag to indicate if we're in the middle of posting
|
# Flag to indicate if we're in the middle of posting
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
@@ -294,7 +297,8 @@ def curate_from_reddit():
|
|||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image_via_ddg(image_query, relevance_keywords)
|
# Fetch image
|
||||||
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ from foodie_utils import (
|
|||||||
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
||||||
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
||||||
generate_category_from_summary, post_to_wp, prepare_post_data,
|
generate_category_from_summary, post_to_wp, prepare_post_data,
|
||||||
select_best_author, smart_image_and_filter, get_flickr_image_via_ddg
|
select_best_author, smart_image_and_filter, get_flickr_image # Updated function name
|
||||||
)
|
)
|
||||||
from foodie_hooks import get_dynamic_hook, select_best_cta
|
from foodie_hooks import get_dynamic_hook, select_best_cta
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -247,7 +247,7 @@ def curate_from_rss():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Fetch image
|
# Fetch image
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image_via_ddg(image_query, relevance_keywords)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ import os
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||||
PIXABAY_API_KEY = os.getenv("PIXABAY_API_KEY")
|
PIXABAY_API_KEY = os.getenv("PIXABAY_API_KEY")
|
||||||
|
FLICKR_API_KEY = os.getenv("FLICKR_API_KEY")
|
||||||
|
FLICKR_API_SECRET = os.getenv("FLICKR_API_SECRET")
|
||||||
|
|
||||||
AUTHORS = [
|
AUTHORS = [
|
||||||
{
|
{
|
||||||
|
|||||||
+204
-109
@@ -15,14 +15,15 @@ from dotenv import load_dotenv
|
|||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from duckduckgo_search import DDGS
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from requests.packages.urllib3.util.retry import Retry
|
from requests.packages.urllib3.util.retry import Retry
|
||||||
import tweepy
|
import tweepy
|
||||||
|
import flickr_api
|
||||||
from foodie_config import (
|
from foodie_config import (
|
||||||
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS,
|
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS,
|
||||||
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS
|
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS,
|
||||||
|
FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY
|
||||||
)
|
)
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -212,52 +213,130 @@ def select_best_persona(interest_score, content=""):
|
|||||||
return random.choice(personas)
|
return random.choice(personas)
|
||||||
|
|
||||||
def get_image(search_query):
|
def get_image(search_query):
|
||||||
api_key = "14836528-999c19a033d77d463113b1fb8"
|
global last_flickr_request_time, flickr_request_count
|
||||||
base_url = "https://pixabay.com/api/"
|
|
||||||
queries = [search_query.split()[:2], search_query.split()]
|
reset_flickr_request_count()
|
||||||
|
flickr_request_count += 1
|
||||||
|
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||||
|
|
||||||
|
# Enforce a minimum delay of 1 second between Flickr requests
|
||||||
|
current_time = time.time()
|
||||||
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
|
if time_since_last_request < 1:
|
||||||
|
time.sleep(1 - time_since_last_request)
|
||||||
|
|
||||||
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
for query in queries:
|
|
||||||
short_query = " ".join(query)
|
|
||||||
params = {
|
|
||||||
"key": api_key,
|
|
||||||
"q": short_query,
|
|
||||||
"image_type": "photo",
|
|
||||||
"safesearch": True,
|
|
||||||
"per_page": 20
|
|
||||||
}
|
|
||||||
try:
|
try:
|
||||||
logging.info(f"Fetching Pixabay image for query '{short_query}'")
|
# Try Flickr API first
|
||||||
response = requests.get(base_url, params=params, timeout=10)
|
photos = flickr_api.Photo.search(
|
||||||
|
text=search_query,
|
||||||
|
per_page=10,
|
||||||
|
sort='relevance',
|
||||||
|
safe_search=1,
|
||||||
|
media='photos',
|
||||||
|
license='4,5,9,10' # Commercial use licenses
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
|
for photo in photos:
|
||||||
|
# Fetch photo metadata (tags and title)
|
||||||
|
tags = [tag.text.lower() for tag in photo.getTags()]
|
||||||
|
title = photo.title.lower() if photo.title else ""
|
||||||
|
|
||||||
|
# Filter out images with unwanted keywords in tags or title
|
||||||
|
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
||||||
|
if matched_keywords:
|
||||||
|
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
img_url = photo.getPhotoFile(size_label='Medium')
|
||||||
|
if not img_url:
|
||||||
|
continue
|
||||||
|
if img_url in used_images:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Download the image and run OCR to check for excessive text
|
||||||
|
temp_file = None
|
||||||
|
try:
|
||||||
|
img_response = requests.get(img_url, headers=headers, timeout=10)
|
||||||
|
img_response.raise_for_status()
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
||||||
|
temp_file.write(img_response.content)
|
||||||
|
temp_path = temp_file.name
|
||||||
|
|
||||||
|
img = Image.open(temp_path)
|
||||||
|
text = pytesseract.image_to_string(img)
|
||||||
|
char_count = len(text.strip())
|
||||||
|
logging.info(f"OCR processed {img_url}: {char_count} characters detected")
|
||||||
|
|
||||||
|
if char_count > 200:
|
||||||
|
logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
uploader = photo.owner.username
|
||||||
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
|
|
||||||
|
# Save Flickr image metadata
|
||||||
|
flickr_data = {
|
||||||
|
"title": search_query,
|
||||||
|
"image_url": img_url,
|
||||||
|
"source": "Flickr",
|
||||||
|
"uploader": uploader,
|
||||||
|
"page_url": page_url,
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"ocr_chars": char_count
|
||||||
|
}
|
||||||
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||||
|
with open(flickr_file, 'a') as f:
|
||||||
|
json.dump(flickr_data, f)
|
||||||
|
f.write('\n')
|
||||||
|
logging.info(f"Saved Flickr image to {flickr_file}: {img_url}")
|
||||||
|
|
||||||
|
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||||
|
return img_url, "Flickr", uploader, page_url
|
||||||
|
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if e.response.status_code == 429:
|
||||||
|
logging.warning(f"Rate limit hit for {img_url}. Falling back to Pixabay.")
|
||||||
|
return None, None, None, None
|
||||||
|
else:
|
||||||
|
logging.warning(f"Download failed for {img_url}: {e}")
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"OCR processing failed for {img_url}: {e}")
|
||||||
|
continue
|
||||||
|
finally:
|
||||||
|
if temp_file and os.path.exists(temp_path):
|
||||||
|
os.unlink(temp_path)
|
||||||
|
|
||||||
|
logging.warning(f"No valid Flickr image found in fallback for query '{search_query}'. Trying Pixabay.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Fallback Flickr API error for query '{search_query}': {e}. Falling back to Pixabay.")
|
||||||
|
|
||||||
|
# Fallback to Pixabay
|
||||||
|
try:
|
||||||
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
|
||||||
|
response = requests.get(pixabay_url, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
if not data.get("hits"):
|
for hit in data.get('hits', []):
|
||||||
logging.warning(f"No image hits for query '{short_query}'")
|
img_url = hit.get('webformatURL')
|
||||||
|
if not img_url or img_url in used_images:
|
||||||
continue
|
continue
|
||||||
|
uploader = hit.get('user', 'Unknown')
|
||||||
|
page_url = hit.get('pageURL', img_url)
|
||||||
|
logging.debug(f"Image selected for query '{search_query}': {img_url}")
|
||||||
|
return img_url, "Pixabay", uploader, page_url
|
||||||
|
|
||||||
valid_images = [
|
logging.warning(f"No valid Pixabay image found for query '{search_query}'.")
|
||||||
hit for hit in data["hits"]
|
return None, None, None, None
|
||||||
if all(tag not in hit.get("tags", "").lower() for tag in ["dog", "cat", "family", "child", "baby"])
|
|
||||||
]
|
|
||||||
|
|
||||||
if not valid_images:
|
except Exception as e:
|
||||||
logging.warning(f"No valid images for query '{short_query}' after filtering")
|
logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
||||||
continue
|
|
||||||
|
|
||||||
image = random.choice(valid_images)
|
|
||||||
image_url = image["webformatURL"]
|
|
||||||
image_source = "Pixabay"
|
|
||||||
uploader = image.get("user", "Unknown")
|
|
||||||
pixabay_url = image["pageURL"]
|
|
||||||
|
|
||||||
logging.info(f"Fetched image URL: {image_url} by {uploader} for query '{short_query}'")
|
|
||||||
print(f"DEBUG: Image selected for query '{short_query}': {image_url}")
|
|
||||||
return image_url, image_source, uploader, pixabay_url
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logging.error(f"Image fetch failed for query '{short_query}': {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
logging.error(f"All Pixabay image queries failed: {queries}")
|
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
||||||
def generate_image_query(content):
|
def generate_image_query(content):
|
||||||
@@ -781,71 +860,80 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im
|
|||||||
print(f"WP Error: {e}")
|
print(f"WP Error: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def get_flickr_image_via_ddg(search_query, relevance_keywords):
|
# Configure Flickr API with credentials
|
||||||
try:
|
flickr_api.set_keys(api_key=FLICKR_API_KEY, api_secret=FLICKR_API_SECRET)
|
||||||
with DDGS() as ddgs:
|
logging.info(f"Flickr API configured with key: {FLICKR_API_KEY[:4]}... and secret: {FLICKR_API_SECRET[:4]}...")
|
||||||
results = ddgs.images(
|
|
||||||
f"{search_query} flickr site:flickr.com -poster -infographic -chart -graph -data -stats -text -typography",
|
|
||||||
license_image="sharecommercially",
|
|
||||||
max_results=30
|
|
||||||
)
|
|
||||||
if not results:
|
|
||||||
logging.warning(f"No Flickr images found via DDG for query '{search_query}'")
|
|
||||||
return None, None, None, None
|
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
# Global variable to track the last Flickr request time
|
||||||
candidates = []
|
last_flickr_request_time = 0
|
||||||
|
|
||||||
for r in results:
|
# Flickr request counter
|
||||||
image_url = r.get("image", "")
|
flickr_request_count = 0
|
||||||
page_url = r.get("url", "")
|
flickr_request_start_time = time.time()
|
||||||
if not image_url or "live.staticflickr.com" not in image_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
# Define exclude keywords for filtering unwanted image types
|
||||||
response = requests.get(page_url, headers=headers, timeout=10)
|
exclude_keywords = [
|
||||||
response.raise_for_status()
|
|
||||||
soup = BeautifulSoup(response.content, 'html.parser')
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
tags_elem = soup.find_all('a', class_='tag')
|
|
||||||
tags = [tag.text.strip().lower() for tag in tags_elem] if tags_elem else []
|
|
||||||
title_elem = soup.find('h1', class_='photo-title')
|
|
||||||
title = title_elem.text.strip().lower() if title_elem else r.get("title", "").lower()
|
|
||||||
|
|
||||||
exclude_keywords = [
|
|
||||||
"poster", "infographic", "chart", "graph", "data", "stats", "text", "typography",
|
"poster", "infographic", "chart", "graph", "data", "stats", "text", "typography",
|
||||||
"design", "advertisement", "illustration", "diagram", "layout", "print"
|
"design", "advertisement", "illustration", "diagram", "layout", "print"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def reset_flickr_request_count():
|
||||||
|
global flickr_request_count, flickr_request_start_time
|
||||||
|
if time.time() - flickr_request_start_time >= 3600: # Reset every hour
|
||||||
|
flickr_request_count = 0
|
||||||
|
flickr_request_start_time = time.time()
|
||||||
|
|
||||||
|
def get_flickr_image(search_query, relevance_keywords):
|
||||||
|
global last_flickr_request_time, flickr_request_count
|
||||||
|
|
||||||
|
reset_flickr_request_count()
|
||||||
|
flickr_request_count += 1
|
||||||
|
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||||
|
|
||||||
|
# Enforce a minimum delay of 1 second between Flickr requests
|
||||||
|
current_time = time.time()
|
||||||
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
|
if time_since_last_request < 1:
|
||||||
|
time.sleep(1 - time_since_last_request)
|
||||||
|
|
||||||
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Search for photos on Flickr using the API
|
||||||
|
photos = flickr_api.Photo.search(
|
||||||
|
text=search_query,
|
||||||
|
per_page=10,
|
||||||
|
sort='relevance',
|
||||||
|
safe_search=1,
|
||||||
|
media='photos',
|
||||||
|
license='4,5,9,10' # Commercial use licenses (CC BY, CC BY-SA, etc.)
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
|
for photo in photos:
|
||||||
|
# Fetch photo metadata (tags and title)
|
||||||
|
tags = [tag.text.lower() for tag in photo.getTags()]
|
||||||
|
title = photo.title.lower() if photo.title else ""
|
||||||
|
|
||||||
|
# Filter out images with unwanted keywords in tags or title
|
||||||
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
||||||
if matched_keywords:
|
if matched_keywords:
|
||||||
logging.info(f"Skipping text-heavy image: {image_url} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
uploader = soup.find('a', class_='owner-name')
|
img_url = photo.getPhotoFile(size_label='Large')
|
||||||
uploader = uploader.text.strip() if uploader else "Flickr User"
|
if not img_url:
|
||||||
candidates.append({
|
img_url = photo.getPhotoFile(size_label='Medium')
|
||||||
"image_url": image_url,
|
if not img_url:
|
||||||
"page_url": page_url,
|
continue
|
||||||
"uploader": uploader,
|
if img_url in used_images:
|
||||||
"tags": tags,
|
|
||||||
"title": title
|
|
||||||
})
|
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logging.info(f"Skipping unavailable image: {image_url} (page: {page_url}, error: {e})")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not candidates:
|
# Download the image and run OCR to check for excessive text
|
||||||
logging.warning(f"No valid candidate images after filtering for '{search_query}'")
|
|
||||||
return None, None, None, None
|
|
||||||
|
|
||||||
result = random.choice(candidates)
|
|
||||||
image_url = result["image_url"]
|
|
||||||
|
|
||||||
temp_file = None
|
temp_file = None
|
||||||
try:
|
try:
|
||||||
img_response = requests.get(image_url, headers=headers, timeout=10)
|
img_response = requests.get(img_url, headers=headers, timeout=10)
|
||||||
img_response.raise_for_status()
|
img_response.raise_for_status()
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
||||||
temp_file.write(img_response.content)
|
temp_file.write(img_response.content)
|
||||||
@@ -854,46 +942,53 @@ def get_flickr_image_via_ddg(search_query, relevance_keywords):
|
|||||||
img = Image.open(temp_path)
|
img = Image.open(temp_path)
|
||||||
text = pytesseract.image_to_string(img)
|
text = pytesseract.image_to_string(img)
|
||||||
char_count = len(text.strip())
|
char_count = len(text.strip())
|
||||||
logging.info(f"OCR processed {image_url}: {char_count} characters detected")
|
logging.info(f"OCR processed {img_url}: {char_count} characters detected")
|
||||||
|
|
||||||
if char_count > 200:
|
if char_count > 200:
|
||||||
logging.info(f"Skipping text-heavy image (OCR): {image_url} (char_count: {char_count})")
|
logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})")
|
||||||
return None, None, None, None
|
continue
|
||||||
|
|
||||||
|
uploader = photo.owner.username
|
||||||
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
|
|
||||||
|
# Save Flickr image metadata
|
||||||
flickr_data = {
|
flickr_data = {
|
||||||
"title": search_query,
|
"title": search_query,
|
||||||
"image_url": image_url,
|
"image_url": img_url,
|
||||||
"source": "Flickr",
|
"source": "Flickr",
|
||||||
"uploader": result["uploader"],
|
"uploader": uploader,
|
||||||
"page_url": result["page_url"],
|
"page_url": page_url,
|
||||||
"timestamp": datetime.now().isoformat(),
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
"ocr_chars": char_count
|
"ocr_chars": char_count
|
||||||
}
|
}
|
||||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||||
with open(flickr_file, 'a') as f:
|
with open(flickr_file, 'a') as f:
|
||||||
json.dump(flickr_data, f)
|
json.dump(flickr_data, f)
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
logging.info(f"Saved Flickr image to {flickr_file}: {image_url}")
|
logging.info(f"Saved Flickr image to {flickr_file}: {img_url}")
|
||||||
logging.info(f"Fetched Flickr image URL: {image_url} by {result['uploader']} for query '{search_query}' (tags: {result['tags']})")
|
|
||||||
print(f"DEBUG: Flickr image selected: {image_url}")
|
logging.info(f"Fetched Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||||
return image_url, "Flickr", result["uploader"], result["page_url"]
|
return img_url, "Flickr", uploader, page_url
|
||||||
|
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
if e.response.status_code == 429:
|
if e.response.status_code == 429:
|
||||||
logging.warning(f"Rate limit hit for {image_url}. Falling back to Pixabay.")
|
logging.warning(f"Rate limit hit for {img_url}. Falling back to Pixabay.")
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
else:
|
else:
|
||||||
logging.warning(f"Download failed for {image_url}: {e}")
|
logging.warning(f"Download failed for {img_url}: {e}")
|
||||||
return None, None, None, None
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"OCR processing failed for {image_url}: {e}")
|
logging.warning(f"OCR processing failed for {img_url}: {e}")
|
||||||
return None, None, None, None
|
continue
|
||||||
finally:
|
finally:
|
||||||
if temp_file and os.path.exists(temp_path):
|
if temp_file and os.path.exists(temp_path):
|
||||||
os.unlink(temp_path)
|
os.unlink(temp_path)
|
||||||
|
|
||||||
|
logging.warning(f"No valid Flickr image found for query '{search_query}'.")
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Flickr/DDG image fetch failed for '{search_query}': {e}")
|
logging.warning(f"Flickr API error for query '{search_query}': {e}. Falling back to Pixabay.")
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
||||||
def select_best_author(summary):
|
def select_best_author(summary):
|
||||||
|
|||||||
@@ -10,3 +10,4 @@ feedparser==6.0.11
|
|||||||
webdriver-manager==4.0.2
|
webdriver-manager==4.0.2
|
||||||
tweepy==4.14.0
|
tweepy==4.14.0
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
|
flickr-api==0.7.1
|
||||||
Reference in New Issue
Block a user