Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 427a5cb919 | |||
| 6d945dae67 | |||
| 1fd1ad361b | |||
| a5182bdfb9 | |||
| be6514e4e3 | |||
| c936555741 | |||
| cdc54f3f14 | |||
| aabc989e1c | |||
| b025afe9f3 |
@@ -256,9 +256,6 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
# Log the fetched image details
|
|
||||||
logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
|
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
# Generate viral share prompt
|
# Generate viral share prompt
|
||||||
@@ -294,7 +291,8 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
@@ -320,16 +318,6 @@ def curate_from_google_trends(geo_list=['US']):
|
|||||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||||
|
|
||||||
if image_url:
|
if image_url:
|
||||||
# Check if image is already used
|
|
||||||
used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
|
||||||
used_image_urls = {entry["title"] for entry in used_images_list}
|
|
||||||
if image_url in used_image_urls:
|
|
||||||
logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image")
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
|
||||||
if not image_url:
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
|
|
||||||
|
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
used_images.add(image_url)
|
used_images.add(image_url)
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ def curate_from_reddit():
|
|||||||
if not articles:
|
if not articles:
|
||||||
print("No Reddit posts available")
|
print("No Reddit posts available")
|
||||||
logging.info("No Reddit posts available")
|
logging.info("No Reddit posts available")
|
||||||
return None, None, random.randint(600, 1800)
|
return None, None, None
|
||||||
|
|
||||||
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
articles.sort(key=lambda x: x["upvotes"], reverse=True)
|
||||||
|
|
||||||
@@ -299,10 +299,8 @@ def curate_from_reddit():
|
|||||||
if not image_url:
|
if not image_url:
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
# Log the fetched image details
|
|
||||||
logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
|
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
|
||||||
|
|
||||||
# Generate viral share prompt
|
# Generate viral share prompt
|
||||||
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
|
||||||
@@ -311,7 +309,7 @@ def curate_from_reddit():
|
|||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
)
|
)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
|
||||||
|
|
||||||
global is_posting
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
@@ -337,7 +335,8 @@ def curate_from_reddit():
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
@@ -363,16 +362,6 @@ def curate_from_reddit():
|
|||||||
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
|
logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
|
||||||
|
|
||||||
if image_url:
|
if image_url:
|
||||||
# Check if image is already used
|
|
||||||
used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
|
||||||
used_image_urls = {entry["title"] for entry in used_images_list}
|
|
||||||
if image_url in used_image_urls:
|
|
||||||
logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image")
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
|
||||||
if not image_url:
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
|
|
||||||
|
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
used_images.add(image_url)
|
used_images.add(image_url)
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
|
||||||
|
|||||||
+8
-19
@@ -32,10 +32,6 @@ from dotenv import load_dotenv
|
|||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Log script version to ensure it's the latest
|
|
||||||
SCRIPT_VERSION = "1.2.0"
|
|
||||||
logging.info(f"Starting foodie_automator_rss.py version {SCRIPT_VERSION}")
|
|
||||||
|
|
||||||
is_posting = False
|
is_posting = False
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
@@ -273,10 +269,12 @@ def curate_from_rss():
|
|||||||
# Fetch image
|
# Fetch image
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
||||||
if not image_url:
|
if not image_url:
|
||||||
|
logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
if not image_url:
|
||||||
# Log the fetched image details
|
logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
|
||||||
logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
hook = get_dynamic_hook(post_data["title"]).strip()
|
hook = get_dynamic_hook(post_data["title"]).strip()
|
||||||
|
|
||||||
@@ -287,7 +285,7 @@ def curate_from_rss():
|
|||||||
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
|
||||||
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
|
||||||
)
|
)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links_template}"
|
post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content
|
||||||
|
|
||||||
global is_posting
|
global is_posting
|
||||||
is_posting = True
|
is_posting = True
|
||||||
@@ -313,7 +311,8 @@ def curate_from_rss():
|
|||||||
share_text_encoded = quote(share_text)
|
share_text_encoded = quote(share_text)
|
||||||
post_url_encoded = quote(post_url)
|
post_url_encoded = quote(post_url)
|
||||||
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
|
||||||
post_data["content"] = f"{final_summary}\n\n{share_links}"
|
# Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
|
||||||
|
post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content
|
||||||
is_posting = True
|
is_posting = True
|
||||||
try:
|
try:
|
||||||
post_to_wp(
|
post_to_wp(
|
||||||
@@ -339,16 +338,6 @@ def curate_from_rss():
|
|||||||
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
|
||||||
|
|
||||||
if image_url:
|
if image_url:
|
||||||
# Check if image is already used
|
|
||||||
used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
|
|
||||||
used_image_urls = {entry["title"] for entry in used_images_list}
|
|
||||||
if image_url in used_image_urls:
|
|
||||||
logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image")
|
|
||||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
|
|
||||||
if not image_url:
|
|
||||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
|
||||||
logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
|
|
||||||
|
|
||||||
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
|
||||||
used_images.add(image_url)
|
used_images.add(image_url)
|
||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
|
|||||||
+104
-247
@@ -236,178 +236,12 @@ def select_best_persona(interest_score, content=""):
|
|||||||
return random.choice(personas)
|
return random.choice(personas)
|
||||||
|
|
||||||
def get_image(search_query):
|
def get_image(search_query):
|
||||||
global last_flickr_request_time, flickr_request_count
|
|
||||||
|
|
||||||
reset_flickr_request_count()
|
|
||||||
flickr_request_count += 1
|
|
||||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
|
||||||
|
|
||||||
current_time = time.time()
|
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
|
||||||
if time_since_last_request < 10:
|
|
||||||
time.sleep(10 - time_since_last_request)
|
|
||||||
|
|
||||||
last_flickr_request_time = time.time()
|
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
def search_flickr(query, per_page=5):
|
# Try Pixabay with the original query
|
||||||
try:
|
|
||||||
photos = flickr_api.Photo.search(
|
|
||||||
text=query,
|
|
||||||
per_page=per_page,
|
|
||||||
sort='relevance',
|
|
||||||
safe_search=1,
|
|
||||||
media='photos',
|
|
||||||
license='4,5,9,10'
|
|
||||||
)
|
|
||||||
return photos
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Flickr API error for query '{query}': {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def fetch_photo_by_id(photo_id):
|
|
||||||
try:
|
|
||||||
photo = flickr_api.Photo(id=photo_id)
|
|
||||||
return photo
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def process_photo(photo):
|
|
||||||
tags = [tag.text.lower() for tag in photo.getTags()]
|
|
||||||
title = photo.title.lower() if photo.title else ""
|
|
||||||
|
|
||||||
matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
|
|
||||||
if matched_keywords:
|
|
||||||
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
img_url = photo.getPhotoFile(size_label='Medium')
|
|
||||||
if not img_url or img_url in used_images:
|
|
||||||
return None
|
|
||||||
|
|
||||||
uploader = photo.owner.username
|
|
||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
|
||||||
|
|
||||||
used_images.add(img_url)
|
|
||||||
save_used_images()
|
|
||||||
|
|
||||||
flickr_data = {
|
|
||||||
"title": search_query,
|
|
||||||
"image_url": img_url,
|
|
||||||
"source": "Flickr",
|
|
||||||
"uploader": uploader,
|
|
||||||
"page_url": page_url,
|
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
||||||
}
|
|
||||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
|
||||||
with open(flickr_file, 'a') as f:
|
|
||||||
json.dump(flickr_data, f)
|
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
|
||||||
|
|
||||||
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
|
||||||
return img_url, "Flickr", uploader, page_url
|
|
||||||
|
|
||||||
def search_ddg_for_flickr(query):
|
|
||||||
ddg_query = f"{query} site:flickr.com"
|
|
||||||
ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
|
|
||||||
try:
|
|
||||||
response = requests.get(ddg_url, headers=headers, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
|
||||||
|
|
||||||
photo_ids = set()
|
|
||||||
for link in soup.find_all('a', href=True):
|
|
||||||
href = link['href']
|
|
||||||
match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
|
|
||||||
if match:
|
|
||||||
photo_id = match.group(1)
|
|
||||||
photo_ids.add(photo_id)
|
|
||||||
|
|
||||||
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
|
||||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
|
||||||
return photo_ids
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
|
|
||||||
return set()
|
|
||||||
|
|
||||||
def classify_keywords(keywords):
|
|
||||||
prompt = (
|
|
||||||
"Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
|
|
||||||
"Return a JSON object mapping each keyword to its classification.\n\n"
|
|
||||||
"Keywords: " + ", ".join(keywords) + "\n\n"
|
|
||||||
"Example output format (do not use these exact keywords in your response):\n"
|
|
||||||
"```json\n"
|
|
||||||
"{\n"
|
|
||||||
" \"keyword1\": \"specific\",\n"
|
|
||||||
" \"keyword2\": \"generic\"\n"
|
|
||||||
"}\n```"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=LIGHT_TASK_MODEL,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": "You are a helper that classifies keywords."},
|
|
||||||
{"role": "user", "content": prompt}
|
|
||||||
],
|
|
||||||
max_tokens=100,
|
|
||||||
temperature=0.5
|
|
||||||
)
|
|
||||||
raw_response = response.choices[0].message.content
|
|
||||||
json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
|
|
||||||
if not json_match:
|
|
||||||
logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
|
|
||||||
return {kw: "specific" for kw in keywords}
|
|
||||||
|
|
||||||
classifications = json.loads(json_match.group(1))
|
|
||||||
return classifications
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
|
||||||
return {kw: "specific" for kw in keywords}
|
|
||||||
|
|
||||||
# Step 1: Search DDG to find Flickr photo IDs
|
|
||||||
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
|
||||||
photo_ids = search_ddg_for_flickr(search_query)
|
|
||||||
if photo_ids:
|
|
||||||
for photo_id in photo_ids:
|
|
||||||
photo = fetch_photo_by_id(photo_id)
|
|
||||||
if photo:
|
|
||||||
result = process_photo(photo)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Step 2: Break down the query into keywords and classify them for direct Flickr API search
|
|
||||||
keywords = search_query.lower().split()
|
|
||||||
if len(keywords) > 1:
|
|
||||||
classifications = classify_keywords(keywords)
|
|
||||||
logging.info(f"Keyword classifications: {classifications}")
|
|
||||||
|
|
||||||
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
|
||||||
if specific_keywords:
|
|
||||||
for keyword in specific_keywords:
|
|
||||||
logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
|
|
||||||
photos = search_flickr(keyword)
|
|
||||||
for photo in photos:
|
|
||||||
result = process_photo(photo)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Step 3: Final fallback to a generic food-related query
|
|
||||||
logging.info(f"No results found. Falling back to generic query: 'food dining'")
|
|
||||||
photos = search_flickr("food dining")
|
|
||||||
for photo in photos:
|
|
||||||
result = process_photo(photo)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
|
|
||||||
logging.warning(f"No valid Flickr image found in fallback for query '{search_query}'. Trying Pixabay.")
|
|
||||||
|
|
||||||
# Fallback to Pixabay
|
|
||||||
try:
|
try:
|
||||||
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
|
||||||
response = requests.get(pixabay_url, timeout=10)
|
response = requests.get(pixabay_url, headers=headers, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
@@ -421,14 +255,42 @@ def get_image(search_query):
|
|||||||
used_images.add(img_url)
|
used_images.add(img_url)
|
||||||
save_used_images()
|
save_used_images()
|
||||||
|
|
||||||
logging.debug(f"Image selected for query '{search_query}': {img_url}")
|
logging.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{search_query}'")
|
||||||
return img_url, "Pixabay", uploader, page_url
|
return img_url, "Pixabay", uploader, page_url
|
||||||
|
|
||||||
logging.warning(f"No valid Pixabay image found for query '{search_query}'.")
|
logging.info(f"No valid Pixabay image found for query '{search_query}'. Trying fallback query.")
|
||||||
return None, None, None, None
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
logging.warning(f"Pixabay image fetch failed for query '{search_query}': {e}")
|
||||||
|
|
||||||
|
# Fallback to a generic query
|
||||||
|
fallback_query = "food dining"
|
||||||
|
try:
|
||||||
|
pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(fallback_query)}&image_type=photo&per_page=10"
|
||||||
|
response = requests.get(pixabay_url, headers=headers, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
for hit in data.get('hits', []):
|
||||||
|
img_url = hit.get('webformatURL')
|
||||||
|
if not img_url or img_url in used_images:
|
||||||
|
continue
|
||||||
|
uploader = hit.get('user', 'Unknown')
|
||||||
|
page_url = hit.get('pageURL', img_url)
|
||||||
|
|
||||||
|
used_images.add(img_url)
|
||||||
|
save_used_images()
|
||||||
|
|
||||||
|
logging.info(f"Selected Pixabay fallback image: {img_url} by {uploader} for query '{fallback_query}'")
|
||||||
|
return img_url, "Pixabay", uploader, page_url
|
||||||
|
|
||||||
|
logging.warning(f"No valid Pixabay image found for fallback query '{fallback_query}'.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Pixabay fallback image fetch failed for query '{fallback_query}': {e}")
|
||||||
|
|
||||||
|
# Ultimate fallback: return None but log clearly
|
||||||
|
logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
||||||
def generate_image_query(title, summary):
|
def generate_image_query(title, summary):
|
||||||
@@ -1010,24 +872,19 @@ if os.path.exists(used_images_file):
|
|||||||
else:
|
else:
|
||||||
data = json.loads(content)
|
data = json.loads(content)
|
||||||
if not isinstance(data, list):
|
if not isinstance(data, list):
|
||||||
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
|
logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.")
|
||||||
|
if isinstance(data, dict):
|
||||||
|
# If it's a dict, try to extract URLs from values
|
||||||
|
data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')]
|
||||||
|
else:
|
||||||
|
logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.")
|
||||||
data = []
|
data = []
|
||||||
else:
|
# Filter out non-string or non-URL entries
|
||||||
# Handle malformed format (list of lists or invalid entries)
|
data = [item for item in data if isinstance(item, str) and item.startswith('https://')]
|
||||||
flat_data = []
|
|
||||||
for item in data:
|
|
||||||
if isinstance(item, str) and item.startswith('https://'):
|
|
||||||
flat_data.append(item)
|
|
||||||
elif isinstance(item, list):
|
|
||||||
logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
|
|
||||||
flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
|
|
||||||
else:
|
|
||||||
logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
|
|
||||||
data = flat_data
|
|
||||||
used_images.update(data)
|
used_images.update(data)
|
||||||
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
|
logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
|
||||||
used_images = set()
|
used_images = set()
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
json.dump([], f)
|
json.dump([], f)
|
||||||
@@ -1035,17 +892,14 @@ if os.path.exists(used_images_file):
|
|||||||
# Function to save used_images to file
|
# Function to save used_images to file
|
||||||
def save_used_images():
|
def save_used_images():
|
||||||
try:
|
try:
|
||||||
|
# Ensure used_images contains only valid URLs
|
||||||
|
valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')]
|
||||||
|
if len(valid_urls) != len(used_images):
|
||||||
|
logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set")
|
||||||
|
|
||||||
with open(used_images_file, 'w') as f:
|
with open(used_images_file, 'w') as f:
|
||||||
f.write('[\n')
|
json.dump(valid_urls, f, indent=2)
|
||||||
urls = list(used_images)
|
logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}")
|
||||||
for i, url in enumerate(urls):
|
|
||||||
f.write(f'"{url}"')
|
|
||||||
if i < len(urls) - 1:
|
|
||||||
f.write(',\n')
|
|
||||||
else:
|
|
||||||
f.write('\n')
|
|
||||||
f.write(']')
|
|
||||||
logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
logging.warning(f"Failed to save used images to {used_images_file}: {e}")
|
||||||
|
|
||||||
@@ -1055,50 +909,7 @@ def reset_flickr_request_count():
|
|||||||
flickr_request_count = 0
|
flickr_request_count = 0
|
||||||
flickr_request_start_time = time.time()
|
flickr_request_start_time = time.time()
|
||||||
|
|
||||||
def get_flickr_image(search_query, relevance_keywords):
|
def process_photo(photo, search_query):
|
||||||
global last_flickr_request_time, flickr_request_count
|
|
||||||
|
|
||||||
reset_flickr_request_count()
|
|
||||||
flickr_request_count += 1
|
|
||||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
|
||||||
|
|
||||||
# Enforce a minimum delay of 10 seconds between Flickr requests
|
|
||||||
current_time = time.time()
|
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
|
||||||
if time_since_last_request < 10:
|
|
||||||
time.sleep(10 - time_since_last_request)
|
|
||||||
|
|
||||||
last_flickr_request_time = time.time()
|
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
|
||||||
|
|
||||||
# Helper function to search Flickr with a given query
|
|
||||||
def search_flickr(query, per_page=5): # Reduced per_page to limit results
|
|
||||||
try:
|
|
||||||
photos = flickr_api.Photo.search(
|
|
||||||
text=query,
|
|
||||||
per_page=per_page,
|
|
||||||
sort='relevance',
|
|
||||||
safe_search=1,
|
|
||||||
media='photos',
|
|
||||||
license='4,5,9,10'
|
|
||||||
)
|
|
||||||
return photos
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Flickr API error for query '{query}': {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Helper function to fetch a Flickr photo by ID
|
|
||||||
def fetch_photo_by_id(photo_id):
|
|
||||||
try:
|
|
||||||
photo = flickr_api.Photo(id=photo_id)
|
|
||||||
return photo
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Helper function to process a photo (fetch URL and metadata only)
|
|
||||||
def process_photo(photo):
|
|
||||||
tags = [tag.text.lower() for tag in photo.getTags()]
|
tags = [tag.text.lower() for tag in photo.getTags()]
|
||||||
title = photo.title.lower() if photo.title else ""
|
title = photo.title.lower() if photo.title else ""
|
||||||
|
|
||||||
@@ -1107,10 +918,20 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Try 'Large' size first, fall back to 'Medium' if unavailable
|
||||||
|
img_url = None
|
||||||
|
try:
|
||||||
img_url = photo.getPhotoFile(size_label='Large')
|
img_url = photo.getPhotoFile(size_label='Large')
|
||||||
if not img_url:
|
except flickr_api.flickrerrors.FlickrError as e:
|
||||||
|
logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium")
|
||||||
|
try:
|
||||||
img_url = photo.getPhotoFile(size_label='Medium')
|
img_url = photo.getPhotoFile(size_label='Medium')
|
||||||
|
except flickr_api.flickrerrors.FlickrError as e:
|
||||||
|
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
if not img_url or img_url in used_images:
|
if not img_url or img_url in used_images:
|
||||||
|
logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
uploader = photo.owner.username
|
uploader = photo.owner.username
|
||||||
@@ -1136,12 +957,34 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||||
return img_url, "Flickr", uploader, page_url
|
return img_url, "Flickr", uploader, page_url
|
||||||
|
|
||||||
# Helper function to search DDG and extract Flickr photo IDs
|
def search_flickr(query, per_page=5):
|
||||||
|
try:
|
||||||
|
photos = flickr_api.Photo.search(
|
||||||
|
text=query,
|
||||||
|
per_page=per_page,
|
||||||
|
sort='relevance',
|
||||||
|
safe_search=1,
|
||||||
|
media='photos',
|
||||||
|
license='4,5,9,10'
|
||||||
|
)
|
||||||
|
return photos
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Flickr API error for query '{query}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def fetch_photo_by_id(photo_id):
|
||||||
|
try:
|
||||||
|
photo = flickr_api.Photo(id=photo_id)
|
||||||
|
return photo
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def search_ddg_for_flickr(query):
|
def search_ddg_for_flickr(query):
|
||||||
ddg_query = f"{query} site:flickr.com"
|
ddg_query = f"{query} site:flickr.com"
|
||||||
ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
|
ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
|
||||||
try:
|
try:
|
||||||
response = requests.get(ddg_url, headers=headers, timeout=10)
|
response = requests.get(ddg_url, headers={'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
@@ -1160,7 +1003,6 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
|
logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
# Helper function to classify keywords as specific or generic
|
|
||||||
def classify_keywords(keywords):
|
def classify_keywords(keywords):
|
||||||
prompt = (
|
prompt = (
|
||||||
"Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
|
"Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
|
||||||
@@ -1195,6 +1037,21 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||||
return {kw: "specific" for kw in keywords}
|
return {kw: "specific" for kw in keywords}
|
||||||
|
|
||||||
|
def get_flickr_image(search_query, relevance_keywords):
|
||||||
|
global last_flickr_request_time, flickr_request_count
|
||||||
|
|
||||||
|
reset_flickr_request_count()
|
||||||
|
flickr_request_count += 1
|
||||||
|
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||||
|
|
||||||
|
# Enforce a minimum delay of 10 seconds between Flickr requests
|
||||||
|
current_time = time.time()
|
||||||
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
|
if time_since_last_request < 10:
|
||||||
|
time.sleep(10 - time_since_last_request)
|
||||||
|
|
||||||
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
# Step 1: Search DDG to find Flickr photo IDs
|
# Step 1: Search DDG to find Flickr photo IDs
|
||||||
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
|
||||||
photo_ids = search_ddg_for_flickr(search_query)
|
photo_ids = search_ddg_for_flickr(search_query)
|
||||||
@@ -1202,7 +1059,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
for photo_id in photo_ids:
|
for photo_id in photo_ids:
|
||||||
photo = fetch_photo_by_id(photo_id)
|
photo = fetch_photo_by_id(photo_id)
|
||||||
if photo:
|
if photo:
|
||||||
result = process_photo(photo)
|
result = process_photo(photo, search_query)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -1219,7 +1076,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
|
logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
|
||||||
photos = search_flickr(keyword)
|
photos = search_flickr(keyword)
|
||||||
for photo in photos:
|
for photo in photos:
|
||||||
result = process_photo(photo)
|
result = process_photo(photo, search_query)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -1228,7 +1085,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
||||||
photos = search_flickr(fallback_query)
|
photos = search_flickr(fallback_query)
|
||||||
for photo in photos:
|
for photo in photos:
|
||||||
result = process_photo(photo)
|
result = process_photo(photo, search_query)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user