Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 427a5cb919 | |||
| 6d945dae67 | |||
| 1fd1ad361b | |||
| a5182bdfb9 | |||
| be6514e4e3 | |||
| c936555741 | |||
| cdc54f3f14 |
@@ -17,7 +17,7 @@ from requests.packages.urllib3.util.retry import Retry
|
|||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from foodie_config import (
|
from foodie_config import (
|
||||||
RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS,
|
RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS,
|
||||||
HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES, CTAS,
|
HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES,
|
||||||
get_clean_source_name, X_API_CREDENTIALS
|
get_clean_source_name, X_API_CREDENTIALS
|
||||||
)
|
)
|
||||||
from foodie_utils import (
|
from foodie_utils import (
|
||||||
@@ -269,10 +269,14 @@ def curate_from_rss():
|
|||||||
# Fetch image
|
# Fetch image
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
|
logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
if not image_url:
|
||||||
|
logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
|
|
||||||
|
|
||||||
# Generate viral share prompt
|
# Generate viral share prompt
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
|
|||||||
+183
-326
@@ -236,178 +236,12 @@ def select_best_persona(interest_score, content=""):
|
|||||||
return random.choice(personas)
|
return random.choice(personas)
|
||||||
|
|
||||||
def get_image(search_query):
|
def get_image(search_query):
|
||||||
global last_flickr_request_time, flickr_request_count
|
|
||||||
|
|
||||||
reset_flickr_request_count()
|
|
||||||
flickr_request_count += 1
|
|
||||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
|
||||||
|
|
||||||
current_time = time.time()
|
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
|
||||||
if time_since_last_request < 10:
|
|
||||||
time.sleep(10 - time_since_last_request)
|
|
||||||
|
|
||||||
last_flickr_request_time = time.time()
|
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
def search_flickr(query, per_page=5):
|
# Try Pixabay with the original query
|
||||||
try:
|
|
||||||
photos = flickr_api.Photo.search(
|
|
||||||
text=query,
|
|
||||||
per_page=per_page,
|
|
||||||
sort='relevance',
|
|
||||||
safe_search=1,
|
|
||||||
media='photos',
|
|
||||||
license='4,5,9,10'
|
|
||||||
)
|
|
||||||
return photos
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Flickr API error for query '{query}': {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def fetch_photo_by_id(photo_id):
|
|
||||||
try:
|
|
||||||
photo = flickr_api.Photo(id=photo_id)
|
|
||||||
return photo
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def process_photo(photo):
|
|
||||||
tags = [tag.text.lower() for tag in photo.getTags()]
|
|
||||||
title = photo.title.lower() if photo.title else ""
|
|
||||||
|
|
||||||
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
|
||||||
if matched_keywords:
|
|
||||||
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
img_url = photo.getPhotoFile(size_label='Medium')
|
|
||||||
if not img_url or img_url in used_images:
|
|
||||||
return None
|
|
||||||
|
|
||||||
uploader = photo.owner.username
|
|
||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
|
||||||
|
|
||||||
used_images.add(img_url)
|
|
||||||
save_used_images()
|
|
||||||
|
|
||||||
flickr_data = {
|
|
||||||
"title": search_query,
|
|
||||||
"image_url": img_url,
|
|
||||||
"source": "Flickr",
|
|
||||||
"uploader": uploader,
|
|
||||||
"page_url": page_url,
|
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
||||||
}
|
|
||||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
|
||||||
with open(flickr_file, 'a') as f:
|
|
||||||
json.dump(flickr_data, f)
|
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
|
||||||
|
|
||||||
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
|
||||||
return img_url, "Flickr", uploader, page_url
|
|
||||||
|
|
||||||
def search_ddg_for_flickr(query):
|
|
||||||
ddg_query = f"{query} site:flickr.com"
|
|
||||||
ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
|
|
||||||
try:
|
|
||||||
response = requests.get(ddg_url, headers=headers, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
|
||||||
|
|
||||||
photo_ids = set()
|
|
||||||
for link in soup.find_all('a', href=True):
|
|
||||||
href = link['href']
|
|
||||||
match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
|
|
||||||
if match:
|
|
||||||
photo_id = match.group(1)
|
|
||||||
photo_ids.add(photo_id)
|
|
||||||
|
|
||||||
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
|
||||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
|
||||||
return photo_ids
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
|
|
||||||
return set()
|
|
||||||
|
|
||||||
def classify_keywords(keywords):
|
|
||||||
prompt = (
|
|
||||||
"Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
|
|
||||||
"Return a JSON object mapping each keyword to its classification.\n\n"
|
|
||||||
"Keywords: " + ", ".join(keywords) + "\n\n"
|
|
||||||
"Example output format (do not use these exact keywords in your response):\n"
|
|
||||||
"```json\n"
|
|
||||||
"{\n"
|
|
||||||
" \"keyword1\": \"specific\",\n"
|
|
||||||
" \"keyword2\": \"generic\"\n"
|
|
||||||
"}\n```"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=LIGHT_TASK_MODEL,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": "You are a helper that classifies keywords."},
|
|
||||||
{"role": "user", "content": prompt}
|
|
||||||
],
|
|
||||||
max_tokens=100,
|
|
||||||
temperature=0.5
|
|
||||||
)
|
|
||||||
raw_response = response.choices[0].message.content
|
|
||||||
json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
|
|
||||||
if not json_match:
|
|
||||||
logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
|
|
||||||
return {kw: "specific" for kw in keywords}
|
|
||||||
|
|
||||||
classifications = json.loads(json_match.group(1))
|
|
||||||
return classifications
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
|
||||||
return {kw: "specific" for kw in keywords}
|
|
||||||
|
|
||||||
# Step 1: Search DDG to find Flickr photo IDs
|
|
||||||
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
|
||||||
photo_ids = search_ddg_for_flickr(search_query)
|
|
||||||
if photo_ids:
|
|
||||||
for photo_id in photo_ids:
|
|
||||||
photo = fetch_photo_by_id(photo_id)
|
|
||||||
if photo:
|
|
||||||
result = process_photo(photo)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Step 2: Break down the query into keywords and classify them for direct Flickr API search
|
|
||||||
keywords = search_query.lower().split()
|
|
||||||
if len(keywords) > 1:
|
|
||||||
classifications = classify_keywords(keywords)
|
|
||||||
logging.info(f"Keyword classifications: {classifications}")
|
|
||||||
|
|
||||||
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
|
||||||
if specific_keywords:
|
|
||||||
for keyword in specific_keywords:
|
|
||||||
logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
|
|
||||||
photos = search_flickr(keyword)
|
|
||||||
for photo in photos:
|
|
||||||
result = process_photo(photo)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Step 3: Final fallback to a generic food-related query
|
|
||||||
logging.info(f"No results found. Falling back to generic query: 'food dining'")
|
|
||||||
photos = search_flickr("food dining")
|
|
||||||
for photo in photos:
|
|
||||||
result = process_photo(photo)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
logging.warning(f"No valid Flickr image found in fallback for query '{search_query}'. Trying Pixabay.")
|
|
||||||
|
|
||||||
# Fallback to Pixabay
|
|
||||||
try:
|
try:
|
||||||
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
|
||||||
response = requests.get(pixabay_url, timeout=10)
|
response = requests.get(pixabay_url, headers=headers, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
@@ -421,15 +255,43 @@ def get_image(search_query):
|
|||||||
used_images.add(img_url)
|
used_images.add(img_url)
|
||||||
save_used_images()
|
save_used_images()
|
||||||
|
|
||||||
logging.debug(f"Image selected for query '{search_query}': {img_url}")
|
logging.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{search_query}'")
|
||||||
return img_url, "Pixabay", uploader, page_url
|
return img_url, "Pixabay", uploader, page_url
|
||||||
|
|
||||||
logging.warning(f"No valid Pixabay image found for query '{search_query}'.")
|
logging.info(f"No valid Pixabay image found for query '{search_query}'. Trying fallback query.")
|
||||||
return None, None, None, None
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
logging.warning(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
||||||
return None, None, None, None
|
|
||||||
|
# Fallback to a generic query
|
||||||
|
fallback_query = "food dining"
|
||||||
|
try:
|
||||||
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(fallback_query)}&image_type=photo&per_page=10"
|
||||||
|
response = requests.get(pixabay_url, headers=headers, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
for hit in data.get('hits', []):
|
||||||
|
img_url = hit.get('webformatURL')
|
||||||
|
if not img_url or img_url in used_images:
|
||||||
|
continue
|
||||||
|
uploader = hit.get('user', 'Unknown')
|
||||||
|
page_url = hit.get('pageURL', img_url)
|
||||||
|
|
||||||
|
used_images.add(img_url)
|
||||||
|
save_used_images()
|
||||||
|
|
||||||
|
logging.info(f"Selected Pixabay fallback image: {img_url} by {uploader} for query '{fallback_query}'")
|
||||||
|
return img_url, "Pixabay", uploader, page_url
|
||||||
|
|
||||||
|
logging.warning(f"No valid Pixabay image found for fallback query '{fallback_query}'.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Pixabay fallback image fetch failed for query '{fallback_query}': {e}")
|
||||||
|
|
||||||
|
# Ultimate fallback: return None but log clearly
|
||||||
|
logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
def generate_image_query(title, summary):
|
def generate_image_query(title, summary):
|
||||||
try:
|
try:
|
||||||
@@ -1010,24 +872,19 @@ if os.path.exists(used_images_file):
|
|||||||
else:
|
else:
|
||||||
data = json.loads(content)
|
data = json.loads(content)
|
||||||
if not isinstance(data, list):
|
if not isinstance(data, list):
|
||||||
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
|
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.")
|
||||||
data = []
|
if isinstance(data, dict):
|
||||||
else:
|
# If it's a dict, try to extract URLs from values
|
||||||
# Handle malformed format (list of lists or invalid entries)
|
data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')]
|
||||||
flat_data = []
|
else:
|
||||||
for item in data:
|
logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.")
|
||||||
if isinstance(item, str) and item.startswith('https://'):
|
data = []
|
||||||
flat_data.append(item)
|
# Filter out non-string or non-URL entries
|
||||||
elif isinstance(item, list):
|
data = [item for item in data if isinstance(item, str) and item.startswith('https://')]
|
||||||
logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
|
|
||||||
flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
|
|
||||||
else:
|
|
||||||
logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
|
|
||||||
data = flat_data
|
|
||||||
used_images.update(data)
|
used_images.update(data)
|
||||||
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
|
||||||
used_images = set()
|
used_images = set()
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
json.dump([], f)
|
json.dump([], f)
|
||||||
@@ -1035,17 +892,14 @@ if os.path.exists(used_images_file):
|
|||||||
# Function to save used_images to file
|
# Function to save used_images to file
|
||||||
def save_used_images():
|
def save_used_images():
|
||||||
try:
|
try:
|
||||||
|
# Ensure used_images contains only valid URLs
|
||||||
|
valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')]
|
||||||
|
if len(valid_urls) != len(used_images):
|
||||||
|
logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set")
|
||||||
|
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
f.write('[\n')
|
json.dump(valid_urls, f, indent=2)
|
||||||
urls = list(used_images)
|
logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}")
|
||||||
for i, url in enumerate(urls):
|
|
||||||
f.write(f'"{url}"')
|
|
||||||
if i < len(urls) - 1:
|
|
||||||
f.write(',\n')
|
|
||||||
else:
|
|
||||||
f.write('\n')
|
|
||||||
f.write(']')
|
|
||||||
logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
||||||
|
|
||||||
@@ -1055,6 +909,134 @@ def reset_flickr_request_count():
|
|||||||
flickr_request_count = 0
|
flickr_request_count = 0
|
||||||
flickr_request_start_time = time.time()
|
flickr_request_start_time = time.time()
|
||||||
|
|
||||||
|
def process_photo(photo, search_query):
|
||||||
|
tags = [tag.text.lower() for tag in photo.getTags()]
|
||||||
|
title = photo.title.lower() if photo.title else ""
|
||||||
|
|
||||||
|
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
||||||
|
if matched_keywords:
|
||||||
|
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Try 'Large' size first, fall back to 'Medium' if unavailable
|
||||||
|
img_url = None
|
||||||
|
try:
|
||||||
|
img_url = photo.getPhotoFile(size_label='Large')
|
||||||
|
except flickr_api.flickrerrors.FlickrError as e:
|
||||||
|
logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium")
|
||||||
|
try:
|
||||||
|
img_url = photo.getPhotoFile(size_label='Medium')
|
||||||
|
except flickr_api.flickrerrors.FlickrError as e:
|
||||||
|
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not img_url or img_url in used_images:
|
||||||
|
logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
uploader = photo.owner.username
|
||||||
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
|
|
||||||
|
used_images.add(img_url)
|
||||||
|
save_used_images()
|
||||||
|
|
||||||
|
flickr_data = {
|
||||||
|
"title": search_query,
|
||||||
|
"image_url": img_url,
|
||||||
|
"source": "Flickr",
|
||||||
|
"uploader": uploader,
|
||||||
|
"page_url": page_url,
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||||
|
}
|
||||||
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||||
|
with open(flickr_file, 'a') as f:
|
||||||
|
json.dump(flickr_data, f)
|
||||||
|
f.write('\n')
|
||||||
|
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
||||||
|
|
||||||
|
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||||
|
return img_url, "Flickr", uploader, page_url
|
||||||
|
|
||||||
|
def search_flickr(query, per_page=5):
|
||||||
|
try:
|
||||||
|
photos = flickr_api.Photo.search(
|
||||||
|
text=query,
|
||||||
|
per_page=per_page,
|
||||||
|
sort='relevance',
|
||||||
|
safe_search=1,
|
||||||
|
media='photos',
|
||||||
|
license='4,5,9,10'
|
||||||
|
)
|
||||||
|
return photos
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Flickr API error for query '{query}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def fetch_photo_by_id(photo_id):
|
||||||
|
try:
|
||||||
|
photo = flickr_api.Photo(id=photo_id)
|
||||||
|
return photo
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def search_ddg_for_flickr(query):
|
||||||
|
ddg_query = f"{query} site:flickr.com"
|
||||||
|
ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
|
||||||
|
try:
|
||||||
|
response = requests.get(ddg_url, headers={'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
photo_ids = set()
|
||||||
|
for link in soup.find_all('a', href=True):
|
||||||
|
href = link['href']
|
||||||
|
match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
|
||||||
|
if match:
|
||||||
|
photo_id = match.group(1)
|
||||||
|
photo_ids.add(photo_id)
|
||||||
|
|
||||||
|
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
||||||
|
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
||||||
|
return photo_ids
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
|
||||||
|
return set()
|
||||||
|
|
||||||
|
def classify_keywords(keywords):
|
||||||
|
prompt = (
|
||||||
|
"Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
|
||||||
|
"Return a JSON object mapping each keyword to its classification.\n\n"
|
||||||
|
"Keywords: " + ", ".join(keywords) + "\n\n"
|
||||||
|
"Example output format (do not use these exact keywords in your response):\n"
|
||||||
|
"```json\n"
|
||||||
|
"{\n"
|
||||||
|
" \"keyword1\": \"specific\",\n"
|
||||||
|
" \"keyword2\": \"generic\"\n"
|
||||||
|
"}\n```"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=LIGHT_TASK_MODEL,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helper that classifies keywords."},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
max_tokens=100,
|
||||||
|
temperature=0.5
|
||||||
|
)
|
||||||
|
raw_response = response.choices[0].message.content
|
||||||
|
json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
|
||||||
|
if not json_match:
|
||||||
|
logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
|
||||||
|
return {kw: "specific" for kw in keywords}
|
||||||
|
|
||||||
|
classifications = json.loads(json_match.group(1))
|
||||||
|
return classifications
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||||
|
return {kw: "specific" for kw in keywords}
|
||||||
|
|
||||||
def get_flickr_image(search_query, relevance_keywords):
|
def get_flickr_image(search_query, relevance_keywords):
|
||||||
global last_flickr_request_time, flickr_request_count
|
global last_flickr_request_time, flickr_request_count
|
||||||
|
|
||||||
@@ -1070,131 +1052,6 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
|
|
||||||
last_flickr_request_time = time.time()
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
|
||||||
|
|
||||||
# Helper function to search Flickr with a given query
|
|
||||||
def search_flickr(query, per_page=5): # Reduced per_page to limit results
|
|
||||||
try:
|
|
||||||
photos = flickr_api.Photo.search(
|
|
||||||
text=query,
|
|
||||||
per_page=per_page,
|
|
||||||
sort='relevance',
|
|
||||||
safe_search=1,
|
|
||||||
media='photos',
|
|
||||||
license='4,5,9,10'
|
|
||||||
)
|
|
||||||
return photos
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Flickr API error for query '{query}': {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Helper function to fetch a Flickr photo by ID
|
|
||||||
def fetch_photo_by_id(photo_id):
|
|
||||||
try:
|
|
||||||
photo = flickr_api.Photo(id=photo_id)
|
|
||||||
return photo
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Helper function to process a photo (fetch URL and metadata only)
|
|
||||||
def process_photo(photo):
|
|
||||||
tags = [tag.text.lower() for tag in photo.getTags()]
|
|
||||||
title = photo.title.lower() if photo.title else ""
|
|
||||||
|
|
||||||
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
|
||||||
if matched_keywords:
|
|
||||||
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
img_url = photo.getPhotoFile(size_label='Large')
|
|
||||||
if not img_url:
|
|
||||||
img_url = photo.getPhotoFile(size_label='Medium')
|
|
||||||
if not img_url or img_url in used_images:
|
|
||||||
return None
|
|
||||||
|
|
||||||
uploader = photo.owner.username
|
|
||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
|
||||||
|
|
||||||
used_images.add(img_url)
|
|
||||||
save_used_images()
|
|
||||||
|
|
||||||
flickr_data = {
|
|
||||||
"title": search_query,
|
|
||||||
"image_url": img_url,
|
|
||||||
"source": "Flickr",
|
|
||||||
"uploader": uploader,
|
|
||||||
"page_url": page_url,
|
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
||||||
}
|
|
||||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
|
||||||
with open(flickr_file, 'a') as f:
|
|
||||||
json.dump(flickr_data, f)
|
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
|
||||||
|
|
||||||
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
|
||||||
return img_url, "Flickr", uploader, page_url
|
|
||||||
|
|
||||||
# Helper function to search DDG and extract Flickr photo IDs
|
|
||||||
def search_ddg_for_flickr(query):
|
|
||||||
ddg_query = f"{query} site:flickr.com"
|
|
||||||
ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
|
|
||||||
try:
|
|
||||||
response = requests.get(ddg_url, headers=headers, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
|
||||||
|
|
||||||
photo_ids = set()
|
|
||||||
for link in soup.find_all('a', href=True):
|
|
||||||
href = link['href']
|
|
||||||
match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
|
|
||||||
if match:
|
|
||||||
photo_id = match.group(1)
|
|
||||||
photo_ids.add(photo_id)
|
|
||||||
|
|
||||||
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
|
||||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
|
||||||
return photo_ids
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
|
|
||||||
return set()
|
|
||||||
|
|
||||||
# Helper function to classify keywords as specific or generic
|
|
||||||
def classify_keywords(keywords):
|
|
||||||
prompt = (
|
|
||||||
"Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
|
|
||||||
"Return a JSON object mapping each keyword to its classification.\n\n"
|
|
||||||
"Keywords: " + ", ".join(keywords) + "\n\n"
|
|
||||||
"Example output format (do not use these exact keywords in your response):\n"
|
|
||||||
"```json\n"
|
|
||||||
"{\n"
|
|
||||||
" \"keyword1\": \"specific\",\n"
|
|
||||||
" \"keyword2\": \"generic\"\n"
|
|
||||||
"}\n```"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=LIGHT_TASK_MODEL,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": "You are a helper that classifies keywords."},
|
|
||||||
{"role": "user", "content": prompt}
|
|
||||||
],
|
|
||||||
max_tokens=100,
|
|
||||||
temperature=0.5
|
|
||||||
)
|
|
||||||
raw_response = response.choices[0].message.content
|
|
||||||
json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
|
|
||||||
if not json_match:
|
|
||||||
logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
|
|
||||||
return {kw: "specific" for kw in keywords}
|
|
||||||
|
|
||||||
classifications = json.loads(json_match.group(1))
|
|
||||||
return classifications
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
|
||||||
return {kw: "specific" for kw in keywords}
|
|
||||||
|
|
||||||
# Step 1: Search DDG to find Flickr photo IDs
|
# Step 1: Search DDG to find Flickr photo IDs
|
||||||
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
||||||
photo_ids = search_ddg_for_flickr(search_query)
|
photo_ids = search_ddg_for_flickr(search_query)
|
||||||
@@ -1202,7 +1059,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
for photo_id in photo_ids:
|
for photo_id in photo_ids:
|
||||||
photo = fetch_photo_by_id(photo_id)
|
photo = fetch_photo_by_id(photo_id)
|
||||||
if photo:
|
if photo:
|
||||||
result = process_photo(photo)
|
result = process_photo(photo, search_query)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -1219,7 +1076,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
|
logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
|
||||||
photos = search_flickr(keyword)
|
photos = search_flickr(keyword)
|
||||||
for photo in photos:
|
for photo in photos:
|
||||||
result = process_photo(photo)
|
result = process_photo(photo, search_query)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -1228,7 +1085,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
||||||
photos = search_flickr(fallback_query)
|
photos = search_flickr(fallback_query)
|
||||||
for photo in photos:
|
for photo in photos:
|
||||||
result = process_photo(photo)
|
result = process_photo(photo, search_query)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user