|
|
|
|
@ -435,25 +435,7 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw |
|
|
|
|
} |
|
|
|
|
logging.info(f"Fetching image from {image_url} for '{post_title}'") |
|
|
|
|
|
|
|
|
|
for attempt in range(3): |
|
|
|
|
try: |
|
|
|
|
image_response = requests.get(image_url, headers=image_headers, timeout=IMAGE_UPLOAD_TIMEOUT) |
|
|
|
|
if image_response.status_code == 429: |
|
|
|
|
wait_time = 10 * (2 ** attempt) |
|
|
|
|
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") |
|
|
|
|
time.sleep(wait_time) |
|
|
|
|
continue |
|
|
|
|
image_response.raise_for_status() |
|
|
|
|
break |
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
|
|
logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/3): {e}") |
|
|
|
|
if attempt == 2: |
|
|
|
|
logging.error(f"Failed to fetch image {image_url} after 3 attempts") |
|
|
|
|
return None |
|
|
|
|
time.sleep(10 * (2 ** attempt)) |
|
|
|
|
else: |
|
|
|
|
logging.error(f"Failed to fetch image {image_url} after retries") |
|
|
|
|
return None |
|
|
|
|
# ... (image fetching logic) ... |
|
|
|
|
|
|
|
|
|
response = requests.post( |
|
|
|
|
f"{wp_base_url}/media", |
|
|
|
|
@ -1102,7 +1084,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): |
|
|
|
|
|
|
|
|
|
used_images.add(image_url) |
|
|
|
|
save_used_images() |
|
|
|
|
uploader = "Unknown" # Most public domain sources don't provide uploader |
|
|
|
|
uploader = "Unknown" |
|
|
|
|
logger.info(f"Selected image: {image_url} from {source_name}") |
|
|
|
|
return image_url, source_name, uploader, page_url |
|
|
|
|
except Exception as e: |
|
|
|
|
@ -1118,9 +1100,14 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): |
|
|
|
|
for result in results: |
|
|
|
|
image_url = result.get("image") |
|
|
|
|
page_url = result.get("url") |
|
|
|
|
# Extract domain as source_name (e.g., unsplash.com -> Unsplash) |
|
|
|
|
# Extract domain and remove top-level domain (e.g., .cn, .com) |
|
|
|
|
source_match = re.search(r'https?://(?:www\.)?([^/]+)', page_url) |
|
|
|
|
source_name = source_match.group(1).capitalize() if source_match else "Public Domain" |
|
|
|
|
if source_match: |
|
|
|
|
domain = source_match.group(1) # e.g., shine.cn |
|
|
|
|
# Split on last dot and take the first part, then capitalize |
|
|
|
|
source_name = domain.rsplit('.', 1)[0].capitalize() # e.g., Shine |
|
|
|
|
else: |
|
|
|
|
source_name = "Public Domain" |
|
|
|
|
if image_url and image_url.endswith(('.jpg', '.jpeg', '.png')): |
|
|
|
|
result = process_image(image_url, source_name, page_url) |
|
|
|
|
if result: |
|
|
|
|
@ -1135,6 +1122,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic): |
|
|
|
|
used_images.add(image_url) |
|
|
|
|
save_used_images() |
|
|
|
|
logger.info(f"Selected Pixabay image: {image_url}") |
|
|
|
|
# For Pixabay, source_name is already set to "Pixabay", which is fine |
|
|
|
|
return image_url, source_name, uploader, page_url |
|
|
|
|
|
|
|
|
|
logger.warning(f"No valid images found for query '{search_query}'") |
|
|
|
|
|