diff --git a/foodie_utils.py b/foodie_utils.py index b9ff2c9..a81b46c 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -227,14 +227,14 @@ def get_image(search_query): current_time = time.time() time_since_last_request = current_time - last_flickr_request_time - if time_since_last_request < 5: - time.sleep(5 - time_since_last_request) + if time_since_last_request < 10: + time.sleep(10 - time_since_last_request) last_flickr_request_time = time.time() headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'} - def search_flickr(query, per_page=20): + def search_flickr(query, per_page=5): try: photos = flickr_api.Photo.search( text=query, @@ -270,71 +270,28 @@ def get_image(search_query): if not img_url or img_url in used_images: return None - temp_file = None - try: - for attempt in range(3): - img_response = requests.get(img_url, headers=headers, timeout=10) - if img_response.status_code == 429: - wait_time = 5 * (2 ** attempt) - logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") - time.sleep(wait_time) - continue - img_response.raise_for_status() - break - else: - logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") - return None - - with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: - temp_file.write(img_response.content) - temp_path = temp_file.name - - img = Image.open(temp_path) - text = pytesseract.image_to_string(img) - char_count = len(text.strip()) - logging.info(f"OCR processed {img_url}: {char_count} characters detected") - - if char_count > 200: - logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})") - return None - - uploader = photo.owner.username - page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" - - used_images.add(img_url) - save_used_images() - - flickr_data = { - "title": search_query, - "image_url": img_url, - "source": "Flickr", - "uploader": uploader, - "page_url": page_url, - "timestamp": datetime.now(timezone.utc).isoformat(), - "ocr_chars": char_count - } - flickr_file = "/home/shane/foodie_automator/flickr_images.json" - with open(flickr_file, 'a') as f: - json.dump(flickr_data, f) - f.write('\n') - logging.info(f"Saved Flickr image to {flickr_file}: {img_url}") - - logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") - return img_url, "Flickr", uploader, page_url - - except requests.exceptions.HTTPError as e: - if e.response.status_code == 429: - logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") - return None - else: - logging.warning(f"Download failed for {img_url}: {e}") - return None - except Exception as e: - logging.warning(f"OCR processing failed for {img_url}: {e}") - return None - finally: - if temp_file and os.path.exists(temp_path): - os.unlink(temp_path) + uploader = photo.owner.username + page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" + + used_images.add(img_url) + save_used_images() + + flickr_data = { + "title": search_query, + "image_url": img_url, + "source": "Flickr", + "uploader": uploader, + "page_url": page_url, + "timestamp": datetime.now(timezone.utc).isoformat() + } + flickr_file = "/home/shane/foodie_automator/flickr_images.json" + with open(flickr_file, 'a') as f: + json.dump(flickr_data, f) + f.write('\n') + logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") + + logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") + return img_url, "Flickr", uploader, page_url def search_ddg_for_flickr(query): ddg_query = f"{query} site:flickr.com" @@ -352,7 +309,7 @@ def get_image(search_query): photo_id = match.group(1) photo_ids.add(photo_id) - photo_ids = list(photo_ids)[:5] # Limit to 5 IDs + photo_ids = list(photo_ids)[:2] # Limit to 2 IDs logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") return photo_ids except Exception as e: @@ -571,9 +528,29 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } logging.info(f"Fetching image from {image_url} for '{post_title}'") - image_response = requests.get(image_url, headers=image_headers, timeout=10) - image_response.raise_for_status() + # Add rate limit handling for image download + for attempt in range(3): + try: + image_response = requests.get(image_url, headers=image_headers, timeout=10) + if image_response.status_code == 429: + wait_time = 10 * (2 ** attempt) # 10s, 20s, 40s + logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") + time.sleep(wait_time) + continue + image_response.raise_for_status() + break + except requests.exceptions.HTTPError as e: + if e.response.status_code == 429: + wait_time = 10 * (2 ** attempt) + logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") + time.sleep(wait_time) + continue + raise + else: + logging.warning(f"Rate limit hit for {image_url} after retries. Failing image upload.") + return None + response = requests.post( f"{wp_base_url}/media", headers=headers, @@ -1044,18 +1021,18 @@ def get_flickr_image(search_query, relevance_keywords): flickr_request_count += 1 logging.info(f"Flickr request count: {flickr_request_count}/3600") - # Enforce a minimum delay of 5 seconds between Flickr requests + # Enforce a minimum delay of 10 seconds between Flickr requests current_time = time.time() time_since_last_request = current_time - last_flickr_request_time - if time_since_last_request < 5: - time.sleep(5 - time_since_last_request) + if time_since_last_request < 10: + time.sleep(10 - time_since_last_request) last_flickr_request_time = time.time() headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'} # Helper function to search Flickr with a given query - def search_flickr(query, per_page=20): + def search_flickr(query, per_page=5): # Reduced per_page to limit results try: photos = flickr_api.Photo.search( text=query, @@ -1079,7 +1056,7 @@ def get_flickr_image(search_query, relevance_keywords): logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}") return None - # Helper function to process a photo + # Helper function to process a photo (fetch URL and metadata only) def process_photo(photo): tags = [tag.text.lower() for tag in photo.getTags()] title = photo.title.lower() if photo.title else "" @@ -1095,71 +1072,28 @@ def get_flickr_image(search_query, relevance_keywords): if not img_url or img_url in used_images: return None - temp_file = None - try: - for attempt in range(3): - img_response = requests.get(img_url, headers=headers, timeout=10) - if img_response.status_code == 429: - wait_time = 5 * (2 ** attempt) - logging.warning(f"Rate limit hit for {img_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") - time.sleep(wait_time) - continue - img_response.raise_for_status() - break - else: - logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") - return None - - with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file: - temp_file.write(img_response.content) - temp_path = temp_file.name - - img = Image.open(temp_path) - text = pytesseract.image_to_string(img) - char_count = len(text.strip()) - logging.info(f"OCR processed {img_url}: {char_count} characters detected") - - if char_count > 200: - logging.info(f"Skipping text-heavy image (OCR): {img_url} (char_count: {char_count})") - return None - - uploader = photo.owner.username - page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" - - used_images.add(img_url) - save_used_images() - - flickr_data = { - "title": search_query, - "image_url": img_url, - "source": "Flickr", - "uploader": uploader, - "page_url": page_url, - "timestamp": datetime.now(timezone.utc).isoformat(), - "ocr_chars": char_count - } - flickr_file = "/home/shane/foodie_automator/flickr_images.json" - with open(flickr_file, 'a') as f: - json.dump(flickr_data, f) - f.write('\n') - logging.info(f"Saved Flickr image to {flickr_file}: {img_url}") - - logging.info(f"Fetched Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") - return img_url, "Flickr", uploader, page_url - - except requests.exceptions.HTTPError as e: - if e.response.status_code == 429: - logging.warning(f"Rate limit hit for {img_url} after retries. Falling back to Pixabay.") - return None - else: - logging.warning(f"Download failed for {img_url}: {e}") - return None - except Exception as e: - logging.warning(f"OCR processing failed for {img_url}: {e}") - return None - finally: - if temp_file and os.path.exists(temp_path): - os.unlink(temp_path) + uploader = photo.owner.username + page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" + + used_images.add(img_url) + save_used_images() + + flickr_data = { + "title": search_query, + "image_url": img_url, + "source": "Flickr", + "uploader": uploader, + "page_url": page_url, + "timestamp": datetime.now(timezone.utc).isoformat() + } + flickr_file = "/home/shane/foodie_automator/flickr_images.json" + with open(flickr_file, 'a') as f: + json.dump(flickr_data, f) + f.write('\n') + logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}") + + logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})") + return img_url, "Flickr", uploader, page_url # Helper function to search DDG and extract Flickr photo IDs def search_ddg_for_flickr(query): @@ -1178,7 +1112,7 @@ def get_flickr_image(search_query, relevance_keywords): photo_id = match.group(1) photo_ids.add(photo_id) - photo_ids = list(photo_ids)[:5] # Limit to 5 IDs + photo_ids = list(photo_ids)[:2] # Limit to 2 IDs logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}") return photo_ids except Exception as e: