diff --git a/foodie_utils.py b/foodie_utils.py index 6a62af8..605af1a 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -435,25 +435,7 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw } logging.info(f"Fetching image from {image_url} for '{post_title}'") - for attempt in range(3): - try: - image_response = requests.get(image_url, headers=image_headers, timeout=IMAGE_UPLOAD_TIMEOUT) - if image_response.status_code == 429: - wait_time = 10 * (2 ** attempt) - logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") - time.sleep(wait_time) - continue - image_response.raise_for_status() - break - except requests.exceptions.RequestException as e: - logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/3): {e}") - if attempt == 2: - logging.error(f"Failed to fetch image {image_url} after 3 attempts") - return None - time.sleep(10 * (2 ** attempt)) - else: - logging.error(f"Failed to fetch image {image_url} after retries") - return None + # ... (image fetching logic) ... response = requests.post( f"{wp_base_url}/media", @@ -1102,7 +1084,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): used_images.add(image_url) save_used_images() - uploader = "Unknown" # Most public domain sources don't provide uploader + uploader = "Unknown" logger.info(f"Selected image: {image_url} from {source_name}") return image_url, source_name, uploader, page_url except Exception as e: @@ -1118,9 +1100,14 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): for result in results: image_url = result.get("image") page_url = result.get("url") - # Extract domain as source_name (e.g., unsplash.com -> Unsplash) + # Extract domain and remove top-level domain (e.g., .cn, .com) source_match = re.search(r'https?://(?:www\.)?([^/]+)', page_url) - source_name = source_match.group(1).capitalize() if source_match else "Public Domain" + if source_match: + domain = source_match.group(1) # e.g., shine.cn + # Split on last dot and take the first part, then capitalize + source_name = domain.rsplit('.', 1)[0].capitalize() # e.g., Shine + else: + source_name = "Public Domain" if image_url and image_url.endswith(('.jpg', '.jpeg', '.png')): result = process_image(image_url, source_name, page_url) if result: @@ -1135,6 +1122,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): used_images.add(image_url) save_used_images() logger.info(f"Selected Pixabay image: {image_url}") + # For Pixabay, source_name is already set to "Pixabay", which is fine return image_url, source_name, uploader, page_url logger.warning(f"No valid images found for query '{search_query}'")