try avoid rate limit flickr upload to wp direct
This commit is contained in:
+76
-142
@@ -227,14 +227,14 @@ def get_image(search_query):
|
||||
|
||||
current_time = time.time()
|
||||
time_since_last_request = current_time - last_flickr_request_time
|
||||
if time_since_last_request < 5:
|
||||
time.sleep(5 - time_since_last_request)
|
||||
if time_since_last_request < 10:
|
||||
time.sleep(10 - time_since_last_request)
|
||||
|
||||
last_flickr_request_time = time.time()
|
||||
|
||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||
|
||||
def search_flickr(query, per_page=20):
|
||||
def search_flickr(query, per_page=5):
|
||||
try:
|
||||
photos = flickr_api.Photo.search(
|
||||
text=query,
|
||||
@@ -270,71 +270,28 @@ def get_image(search_query):
|
||||
if not img_url or img_url in used_images:
|
||||
return None
|
||||
|
||||
temp_file = None
|
||||
try:
|
||||
for attempt in range(3):
|
||||
img_response = requests.get(img_url, headers=headers, timeout=10)
|
||||
if img_response.status_code == 429:
|
||||
wait_time = 5 * (2 ** attempt)
|
||||
logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
img_response.raise_for_status()
|
||||
break
|
||||
else:
|
||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
||||
return None
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
||||
temp_file.write(img_response.content)
|
||||
temp_path = temp_file.name
|
||||
|
||||
img = Image.open(temp_path)
|
||||
text = pytesseract.image_to_string(img)
|
||||
char_count = len(text.strip())
|
||||
logging.info(f"OCR processed {img_url}: {char_count} characters detected")
|
||||
|
||||
if char_count > 200:
|
||||
logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})")
|
||||
return None
|
||||
|
||||
uploader = photo.owner.username
|
||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||
|
||||
used_images.add(img_url)
|
||||
save_used_images()
|
||||
|
||||
flickr_data = {
|
||||
"title": search_query,
|
||||
"image_url": img_url,
|
||||
"source": "Flickr",
|
||||
"uploader": uploader,
|
||||
"page_url": page_url,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"ocr_chars": char_count
|
||||
}
|
||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||
with open(flickr_file, 'a') as f:
|
||||
json.dump(flickr_data, f)
|
||||
f.write('\n')
|
||||
logging.info(f"Saved Flickr image to {flickr_file}: {img_url}")
|
||||
|
||||
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||
return img_url, "Flickr", uploader, page_url
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 429:
|
||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
||||
return None
|
||||
else:
|
||||
logging.warning(f"Download failed for {img_url}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.warning(f"OCR processing failed for {img_url}: {e}")
|
||||
return None
|
||||
finally:
|
||||
if temp_file and os.path.exists(temp_path):
|
||||
os.unlink(temp_path)
|
||||
uploader = photo.owner.username
|
||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||
|
||||
used_images.add(img_url)
|
||||
save_used_images()
|
||||
|
||||
flickr_data = {
|
||||
"title": search_query,
|
||||
"image_url": img_url,
|
||||
"source": "Flickr",
|
||||
"uploader": uploader,
|
||||
"page_url": page_url,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||
with open(flickr_file, 'a') as f:
|
||||
json.dump(flickr_data, f)
|
||||
f.write('\n')
|
||||
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
||||
|
||||
logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||
return img_url, "Flickr", uploader, page_url
|
||||
|
||||
def search_ddg_for_flickr(query):
|
||||
ddg_query = f"{query} site:flickr.com"
|
||||
@@ -352,7 +309,7 @@ def get_image(search_query):
|
||||
photo_id = match.group(1)
|
||||
photo_ids.add(photo_id)
|
||||
|
||||
photo_ids = list(photo_ids)[:5] # Limit to 5 IDs
|
||||
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
||||
return photo_ids
|
||||
except Exception as e:
|
||||
@@ -571,9 +528,29 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
logging.info(f"Fetching image from {image_url} for '{post_title}'")
|
||||
image_response = requests.get(image_url, headers=image_headers, timeout=10)
|
||||
image_response.raise_for_status()
|
||||
|
||||
# Add rate limit handling for image download
|
||||
for attempt in range(3):
|
||||
try:
|
||||
image_response = requests.get(image_url, headers=image_headers, timeout=10)
|
||||
if image_response.status_code == 429:
|
||||
wait_time = 10 * (2 ** attempt) # 10s, 20s, 40s
|
||||
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
image_response.raise_for_status()
|
||||
break
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 429:
|
||||
wait_time = 10 * (2 ** attempt)
|
||||
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise
|
||||
else:
|
||||
logging.warning(f"Rate limit hit for {image_url} after retries. Failing image upload.")
|
||||
return None
|
||||
|
||||
response = requests.post(
|
||||
f"{wp_base_url}/media",
|
||||
headers=headers,
|
||||
@@ -1044,18 +1021,18 @@ def get_flickr_image(search_query, relevance_keywords):
|
||||
flickr_request_count += 1
|
||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||
|
||||
# Enforce a minimum delay of 5 seconds between Flickr requests
|
||||
# Enforce a minimum delay of 10 seconds between Flickr requests
|
||||
current_time = time.time()
|
||||
time_since_last_request = current_time - last_flickr_request_time
|
||||
if time_since_last_request < 5:
|
||||
time.sleep(5 - time_since_last_request)
|
||||
if time_since_last_request < 10:
|
||||
time.sleep(10 - time_since_last_request)
|
||||
|
||||
last_flickr_request_time = time.time()
|
||||
|
||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||
|
||||
# Helper function to search Flickr with a given query
|
||||
def search_flickr(query, per_page=20):
|
||||
def search_flickr(query, per_page=5): # Reduced per_page to limit results
|
||||
try:
|
||||
photos = flickr_api.Photo.search(
|
||||
text=query,
|
||||
@@ -1079,7 +1056,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
||||
logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
|
||||
return None
|
||||
|
||||
# Helper function to process a photo
|
||||
# Helper function to process a photo (fetch URL and metadata only)
|
||||
def process_photo(photo):
|
||||
tags = [tag.text.lower() for tag in photo.getTags()]
|
||||
title = photo.title.lower() if photo.title else ""
|
||||
@@ -1095,71 +1072,28 @@ def get_flickr_image(search_query, relevance_keywords):
|
||||
if not img_url or img_url in used_images:
|
||||
return None
|
||||
|
||||
temp_file = None
|
||||
try:
|
||||
for attempt in range(3):
|
||||
img_response = requests.get(img_url, headers=headers, timeout=10)
|
||||
if img_response.status_code == 429:
|
||||
wait_time = 5 * (2 ** attempt)
|
||||
logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
img_response.raise_for_status()
|
||||
break
|
||||
else:
|
||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
||||
return None
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
||||
temp_file.write(img_response.content)
|
||||
temp_path = temp_file.name
|
||||
|
||||
img = Image.open(temp_path)
|
||||
text = pytesseract.image_to_string(img)
|
||||
char_count = len(text.strip())
|
||||
logging.info(f"OCR processed {img_url}: {char_count} characters detected")
|
||||
|
||||
if char_count > 200:
|
||||
logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})")
|
||||
return None
|
||||
|
||||
uploader = photo.owner.username
|
||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||
|
||||
used_images.add(img_url)
|
||||
save_used_images()
|
||||
|
||||
flickr_data = {
|
||||
"title": search_query,
|
||||
"image_url": img_url,
|
||||
"source": "Flickr",
|
||||
"uploader": uploader,
|
||||
"page_url": page_url,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"ocr_chars": char_count
|
||||
}
|
||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||
with open(flickr_file, 'a') as f:
|
||||
json.dump(flickr_data, f)
|
||||
f.write('\n')
|
||||
logging.info(f"Saved Flickr image to {flickr_file}: {img_url}")
|
||||
|
||||
logging.info(f"Fetched Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||
return img_url, "Flickr", uploader, page_url
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 429:
|
||||
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.")
|
||||
return None
|
||||
else:
|
||||
logging.warning(f"Download failed for {img_url}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.warning(f"OCR processing failed for {img_url}: {e}")
|
||||
return None
|
||||
finally:
|
||||
if temp_file and os.path.exists(temp_path):
|
||||
os.unlink(temp_path)
|
||||
uploader = photo.owner.username
|
||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||
|
||||
used_images.add(img_url)
|
||||
save_used_images()
|
||||
|
||||
flickr_data = {
|
||||
"title": search_query,
|
||||
"image_url": img_url,
|
||||
"source": "Flickr",
|
||||
"uploader": uploader,
|
||||
"page_url": page_url,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
flickr_file = "/home/shane/foodie_automator/flickr_images.json"
|
||||
with open(flickr_file, 'a') as f:
|
||||
json.dump(flickr_data, f)
|
||||
f.write('\n')
|
||||
logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
|
||||
|
||||
logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
|
||||
return img_url, "Flickr", uploader, page_url
|
||||
|
||||
# Helper function to search DDG and extract Flickr photo IDs
|
||||
def search_ddg_for_flickr(query):
|
||||
@@ -1178,7 +1112,7 @@ def get_flickr_image(search_query, relevance_keywords):
|
||||
photo_id = match.group(1)
|
||||
photo_ids.add(photo_id)
|
||||
|
||||
photo_ids = list(photo_ids)[:5] # Limit to 5 IDs
|
||||
photo_ids = list(photo_ids)[:2] # Limit to 2 IDs
|
||||
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
|
||||
return photo_ids
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user