diff --git a/foodie_utils.py b/foodie_utils.py index ce3f40e..c5b47b5 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -1032,11 +1032,11 @@ def get_flickr_image(search_query, relevance_keywords): flickr_request_count += 1 logging.info(f"Flickr request count: {flickr_request_count}/3600") - # Enforce a minimum delay of 1 second between Flickr requests + # Enforce a minimum delay of 5 seconds between Flickr requests current_time = time.time() time_since_last_request = current_time - last_flickr_request_time - if time_since_last_request < 1: - time.sleep(1 - time_since_last_request) + if time_since_last_request < 5: + time.sleep(5 - time_since_last_request) last_flickr_request_time = time.time() @@ -1085,8 +1085,19 @@ def get_flickr_image(search_query, relevance_keywords): temp_file = None try: - img_response = requests.get(img_url, headers=headers, timeout=10) - img_response.raise_for_status() + for attempt in range(3): + img_response = requests.get(img_url, headers=headers, timeout=10) + if img_response.status_code == 429: + wait_time = 5 * (2 ** attempt) + logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") + time.sleep(wait_time) + continue + img_response.raise_for_status() + break + else: + logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") + return None + with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: temp_file.write(img_response.content) temp_path = temp_file.name @@ -1126,7 +1137,7 @@ def get_flickr_image(search_query, relevance_keywords): except requests.exceptions.HTTPError as e: if e.response.status_code == 429: - logging.warning(f"Rate limit hit for {img_url}. Falling back to Pixabay.") + logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") return None else: logging.warning(f"Download failed for {img_url}: {e}") @@ -1148,15 +1159,14 @@ def get_flickr_image(search_query, relevance_keywords): soup = BeautifulSoup(response.text, 'html.parser') photo_ids = set() - # Look for Flickr URLs in the search results for link in soup.find_all('a', href=True): href = link['href'] - # Match Flickr photo URLs like https://www.flickr.com/photos/username/1234567890 match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href) if match: photo_id = match.group(1) photo_ids.add(photo_id) + photo_ids = list(photo_ids)[:5] # Limit to 5 IDs logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") return photo_ids except Exception as e: @@ -1173,7 +1183,7 @@ def get_flickr_image(search_query, relevance_keywords): "```json\n" "{\n" " \"Wingstop\": \"specific\",\n" - " \"dining\": \"generic\"\n" + " " \"dining\": \"generic\"\n" "}\n```" ) try: