try avoid rate limit flickr upload to wp direct
This commit is contained in:
+76
-142
@@ -227,14 +227,14 @@ def get_image(search_query):
|
|||||||
|
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
if time_since_last_request < 5:
|
if time_since_last_request < 10:
|
||||||
time.sleep(5 - time_since_last_request)
|
time.sleep(10 - time_since_last_request)
|
||||||
|
|
||||||
last_flickr_request_time = time.time()
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
def search_flickr(query, per_page=20):
|
def search_flickr(query, per_page=5):
|
||||||
try:
|
try:
|
||||||
photos = flickr_api.Photo.search(
|
photos = flickr_api.Photo.search(
|
||||||
text=query,
|
text=query,
|
||||||
@@ -270,71 +270,28 @@ def get_image(search_query):
|
|||||||
if not img_url or img_url in used_images:
|
if not img_url or img_url in used_images:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
temp_file = None
|
uploader = photo.owner.username
|
||||||
try:
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
for attempt in range(3):
|
|
||||||
img_response = requests.get(img_url, headers=headers, timeout=10)
|
used_images.add(img_url)
|
||||||
if img_response.status_code == 429:
|
save_used_images()
|
||||||
wait_time = 5 * (2 ** attempt)
|
|
||||||
logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
flickr_data = {
|
||||||
time.sleep(wait_time)
|
"title": search_query,
|
||||||
continue
|
"image_url": img_url,
|
||||||
img_response.raise_for_status()
|
"source": "Flickr",
|
||||||
break
|
"uploader": uploader,
|
||||||
else:
|
"page_url": page_url,
|
||||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||||
return None
|
}
|
||||||
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
with open(flickr_file, 'a') as f:
|
||||||
temp_file.write(img_response.content)
|
json.dump(flickr_data, f)
|
||||||
temp_path = temp_file.name
|
f.write('\n')
|
||||||
|
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
||||||
img = Image.open(temp_path)
|
|
||||||
text = pytesseract.image_to_string(img)
|
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||||
char_count = len(text.strip())
|
return img_url, "Flickr", uploader, page_url
|
||||||
logging.info(f"OCR processed {img_url}: {char_count} characters detected")
|
|
||||||
|
|
||||||
if char_count > 200:
|
|
||||||
logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
uploader = photo.owner.username
|
|
||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
|
||||||
|
|
||||||
used_images.add(img_url)
|
|
||||||
save_used_images()
|
|
||||||
|
|
||||||
flickr_data = {
|
|
||||||
"title": search_query,
|
|
||||||
"image_url": img_url,
|
|
||||||
"source": "Flickr",
|
|
||||||
"uploader": uploader,
|
|
||||||
"page_url": page_url,
|
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"ocr_chars": char_count
|
|
||||||
}
|
|
||||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
|
||||||
with open(flickr_file, 'a') as f:
|
|
||||||
json.dump(flickr_data, f)
|
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Saved Flickr image to {flickr_file}: {img_url}")
|
|
||||||
|
|
||||||
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
|
||||||
return img_url, "Flickr", uploader, page_url
|
|
||||||
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
if e.response.status_code == 429:
|
|
||||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
logging.warning(f"Download failed for {img_url}: {e}")
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"OCR processing failed for {img_url}: {e}")
|
|
||||||
return None
|
|
||||||
finally:
|
|
||||||
if temp_file and os.path.exists(temp_path):
|
|
||||||
os.unlink(temp_path)
|
|
||||||
|
|
||||||
def search_ddg_for_flickr(query):
|
def search_ddg_for_flickr(query):
|
||||||
ddg_query = f"{query} site:flickr.com"
|
ddg_query = f"{query} site:flickr.com"
|
||||||
@@ -352,7 +309,7 @@ def get_image(search_query):
|
|||||||
photo_id = match.group(1)
|
photo_id = match.group(1)
|
||||||
photo_ids.add(photo_id)
|
photo_ids.add(photo_id)
|
||||||
|
|
||||||
photo_ids = list(photo_ids)[:5] # Limit to 5 IDs
|
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
||||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
||||||
return photo_ids
|
return photo_ids
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -571,9 +528,29 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
|
|||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||||
}
|
}
|
||||||
logging.info(f"Fetching image from {image_url} for '{post_title}'")
|
logging.info(f"Fetching image from {image_url} for '{post_title}'")
|
||||||
image_response = requests.get(image_url, headers=image_headers, timeout=10)
|
|
||||||
image_response.raise_for_status()
|
|
||||||
|
|
||||||
|
# Add rate limit handling for image download
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
|
image_response = requests.get(image_url, headers=image_headers, timeout=10)
|
||||||
|
if image_response.status_code == 429:
|
||||||
|
wait_time = 10 * (2 ** attempt) # 10s, 20s, 40s
|
||||||
|
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
continue
|
||||||
|
image_response.raise_for_status()
|
||||||
|
break
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if e.response.status_code == 429:
|
||||||
|
wait_time = 10 * (2 ** attempt)
|
||||||
|
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
logging.warning(f"Rate limit hit for {image_url} after retries. Failing image upload.")
|
||||||
|
return None
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{wp_base_url}/media",
|
f"{wp_base_url}/media",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
@@ -1044,18 +1021,18 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
flickr_request_count += 1
|
flickr_request_count += 1
|
||||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||||
|
|
||||||
# Enforce a minimum delay of 5 seconds between Flickr requests
|
# Enforce a minimum delay of 10 seconds between Flickr requests
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
if time_since_last_request < 5:
|
if time_since_last_request < 10:
|
||||||
time.sleep(5 - time_since_last_request)
|
time.sleep(10 - time_since_last_request)
|
||||||
|
|
||||||
last_flickr_request_time = time.time()
|
last_flickr_request_time = time.time()
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
|
|
||||||
# Helper function to search Flickr with a given query
|
# Helper function to search Flickr with a given query
|
||||||
def search_flickr(query, per_page=20):
|
def search_flickr(query, per_page=5): # Reduced per_page to limit results
|
||||||
try:
|
try:
|
||||||
photos = flickr_api.Photo.search(
|
photos = flickr_api.Photo.search(
|
||||||
text=query,
|
text=query,
|
||||||
@@ -1079,7 +1056,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Helper function to process a photo
|
# Helper function to process a photo (fetch URL and metadata only)
|
||||||
def process_photo(photo):
|
def process_photo(photo):
|
||||||
tags = [tag.text.lower() for tag in photo.getTags()]
|
tags = [tag.text.lower() for tag in photo.getTags()]
|
||||||
title = photo.title.lower() if photo.title else ""
|
title = photo.title.lower() if photo.title else ""
|
||||||
@@ -1095,71 +1072,28 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
if not img_url or img_url in used_images:
|
if not img_url or img_url in used_images:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
temp_file = None
|
uploader = photo.owner.username
|
||||||
try:
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
for attempt in range(3):
|
|
||||||
img_response = requests.get(img_url, headers=headers, timeout=10)
|
used_images.add(img_url)
|
||||||
if img_response.status_code == 429:
|
save_used_images()
|
||||||
wait_time = 5 * (2 ** attempt)
|
|
||||||
logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
flickr_data = {
|
||||||
time.sleep(wait_time)
|
"title": search_query,
|
||||||
continue
|
"image_url": img_url,
|
||||||
img_response.raise_for_status()
|
"source": "Flickr",
|
||||||
break
|
"uploader": uploader,
|
||||||
else:
|
"page_url": page_url,
|
||||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||||
return None
|
}
|
||||||
|
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
with open(flickr_file, 'a') as f:
|
||||||
temp_file.write(img_response.content)
|
json.dump(flickr_data, f)
|
||||||
temp_path = temp_file.name
|
f.write('\n')
|
||||||
|
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
||||||
img = Image.open(temp_path)
|
|
||||||
text = pytesseract.image_to_string(img)
|
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||||
char_count = len(text.strip())
|
return img_url, "Flickr", uploader, page_url
|
||||||
logging.info(f"OCR processed {img_url}: {char_count} characters detected")
|
|
||||||
|
|
||||||
if char_count > 200:
|
|
||||||
logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
uploader = photo.owner.username
|
|
||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
|
||||||
|
|
||||||
used_images.add(img_url)
|
|
||||||
save_used_images()
|
|
||||||
|
|
||||||
flickr_data = {
|
|
||||||
"title": search_query,
|
|
||||||
"image_url": img_url,
|
|
||||||
"source": "Flickr",
|
|
||||||
"uploader": uploader,
|
|
||||||
"page_url": page_url,
|
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"ocr_chars": char_count
|
|
||||||
}
|
|
||||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
|
||||||
with open(flickr_file, 'a') as f:
|
|
||||||
json.dump(flickr_data, f)
|
|
||||||
f.write('\n')
|
|
||||||
logging.info(f"Saved Flickr image to {flickr_file}: {img_url}")
|
|
||||||
|
|
||||||
logging.info(f"Fetched Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
|
||||||
return img_url, "Flickr", uploader, page_url
|
|
||||||
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
if e.response.status_code == 429:
|
|
||||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
logging.warning(f"Download failed for {img_url}: {e}")
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"OCR processing failed for {img_url}: {e}")
|
|
||||||
return None
|
|
||||||
finally:
|
|
||||||
if temp_file and os.path.exists(temp_path):
|
|
||||||
os.unlink(temp_path)
|
|
||||||
|
|
||||||
# Helper function to search DDG and extract Flickr photo IDs
|
# Helper function to search DDG and extract Flickr photo IDs
|
||||||
def search_ddg_for_flickr(query):
|
def search_ddg_for_flickr(query):
|
||||||
@@ -1178,7 +1112,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
photo_id = match.group(1)
|
photo_id = match.group(1)
|
||||||
photo_ids.add(photo_id)
|
photo_ids.add(photo_id)
|
||||||
|
|
||||||
photo_ids = list(photo_ids)[:5] # Limit to 5 IDs
|
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
||||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
||||||
return photo_ids
|
return photo_ids
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user