|
|
|
@ -1032,11 +1032,11 @@ def get_flickr_image(search_query, relevance_keywords): |
|
|
|
flickr_request_count += 1 |
|
|
|
flickr_request_count += 1 |
|
|
|
logging.info(f"Flickr request count: {flickr_request_count}/3600") |
|
|
|
logging.info(f"Flickr request count: {flickr_request_count}/3600") |
|
|
|
|
|
|
|
|
|
|
|
# Enforce a minimum delay of 1 second between Flickr requests |
|
|
|
# Enforce a minimum delay of 5 seconds between Flickr requests |
|
|
|
current_time = time.time() |
|
|
|
current_time = time.time() |
|
|
|
time_since_last_request = current_time - last_flickr_request_time |
|
|
|
time_since_last_request = current_time - last_flickr_request_time |
|
|
|
if time_since_last_request < 1: |
|
|
|
if time_since_last_request < 5: |
|
|
|
time.sleep(1 - time_since_last_request) |
|
|
|
time.sleep(5 - time_since_last_request) |
|
|
|
|
|
|
|
|
|
|
|
last_flickr_request_time = time.time() |
|
|
|
last_flickr_request_time = time.time() |
|
|
|
|
|
|
|
|
|
|
|
@ -1085,8 +1085,19 @@ def get_flickr_image(search_query, relevance_keywords): |
|
|
|
|
|
|
|
|
|
|
|
temp_file = None |
|
|
|
temp_file = None |
|
|
|
try: |
|
|
|
try: |
|
|
|
img_response = requests.get(img_url, headers=headers, timeout=10) |
|
|
|
for attempt in range(3): |
|
|
|
img_response.raise_for_status() |
|
|
|
img_response = requests.get(img_url, headers=headers, timeout=10) |
|
|
|
|
|
|
|
if img_response.status_code == 429: |
|
|
|
|
|
|
|
wait_time = 5 * (2 ** attempt) |
|
|
|
|
|
|
|
logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") |
|
|
|
|
|
|
|
time.sleep(wait_time) |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
img_response.raise_for_status() |
|
|
|
|
|
|
|
break |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") |
|
|
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: |
|
|
|
temp_file.write(img_response.content) |
|
|
|
temp_file.write(img_response.content) |
|
|
|
temp_path = temp_file.name |
|
|
|
temp_path = temp_file.name |
|
|
|
@ -1126,7 +1137,7 @@ def get_flickr_image(search_query, relevance_keywords): |
|
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.HTTPError as e: |
|
|
|
except requests.exceptions.HTTPError as e: |
|
|
|
if e.response.status_code == 429: |
|
|
|
if e.response.status_code == 429: |
|
|
|
logging.warning(f"Rate limit hit for {img_url}. Falling back to Pixabay.") |
|
|
|
logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") |
|
|
|
return None |
|
|
|
return None |
|
|
|
else: |
|
|
|
else: |
|
|
|
logging.warning(f"Download failed for {img_url}: {e}") |
|
|
|
logging.warning(f"Download failed for {img_url}: {e}") |
|
|
|
@ -1148,15 +1159,14 @@ def get_flickr_image(search_query, relevance_keywords): |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
|
|
|
|
|
|
photo_ids = set() |
|
|
|
photo_ids = set() |
|
|
|
# Look for Flickr URLs in the search results |
|
|
|
|
|
|
|
for link in soup.find_all('a', href=True): |
|
|
|
for link in soup.find_all('a', href=True): |
|
|
|
href = link['href'] |
|
|
|
href = link['href'] |
|
|
|
# Match Flickr photo URLs like https://www.flickr.com/photos/username/1234567890 |
|
|
|
|
|
|
|
match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href) |
|
|
|
match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href) |
|
|
|
if match: |
|
|
|
if match: |
|
|
|
photo_id = match.group(1) |
|
|
|
photo_id = match.group(1) |
|
|
|
photo_ids.add(photo_id) |
|
|
|
photo_ids.add(photo_id) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
photo_ids = list(photo_ids)[:5] # Limit to 5 IDs |
|
|
|
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") |
|
|
|
logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") |
|
|
|
return photo_ids |
|
|
|
return photo_ids |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
@ -1173,7 +1183,7 @@ def get_flickr_image(search_query, relevance_keywords): |
|
|
|
"```json\n" |
|
|
|
"```json\n" |
|
|
|
"{\n" |
|
|
|
"{\n" |
|
|
|
" \"Wingstop\": \"specific\",\n" |
|
|
|
" \"Wingstop\": \"specific\",\n" |
|
|
|
" \"dining\": \"generic\"\n" |
|
|
|
" " \"dining\": \"generic\"\n" |
|
|
|
"}\n```" |
|
|
|
"}\n```" |
|
|
|
) |
|
|
|
) |
|
|
|
try: |
|
|
|
try: |
|
|
|
|