From 6e0f8b47592e7b0771fb599cf18fe7b0cd009baf Mon Sep 17 00:00:00 2001
From: Shane <shanehill@mail.com>
Date: Tue, 13 May 2025 08:51:52 +1000
Subject: [PATCH] fix better images

---
 foodie_automator_google.py | 141 +++++++++++++++++++++++--------------
 foodie_automator_reddit.py |  23 +++---
 foodie_automator_rss.py    |   2 -
 foodie_utils.py            | 124 +++++++++++---------------------
 4 files changed, 143 insertions(+), 147 deletions(-)

diff --git a/foodie_automator_google.py b/foodie_automator_google.py
index fcbc162..9ddb4c1 100644
--- a/foodie_automator_google.py
+++ b/foodie_automator_google.py
@@ -70,48 +70,84 @@ MAX_RETRIES = 3
 RETRY_BACKOFF = 2
 
 def setup_logging():
-    if os.path.exists(LOG_FILE):
-        with open(LOG_FILE, 'r') as f:
-            lines = f.readlines()
-        
-        log_entries = []
-        current_entry = []
-        timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
-        
-        for line in lines:
-            if timestamp_pattern.match(line):
-                if current_entry:
-                    log_entries.append(''.join(current_entry))
-                current_entry = [line]
-            else:
-                current_entry.append(line)
+    try:
+        # Ensure log directory exists
+        os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
+        logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
         
-        if current_entry:
-            log_entries.append(''.join(current_entry))
+        # Check write permissions
+        if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
+            raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
         
-        cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
-        pruned_entries = []
-        for entry in log_entries:
-            try:
-                timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
-                if timestamp > cutoff:
-                    pruned_entries.append(entry)
-            except ValueError:
-                logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
-                continue
+        # Test write to log file
+        try:
+            with open(LOG_FILE, 'a') as f:
+                f.write("")
+            logging.debug(f"Confirmed write access to {LOG_FILE}")
+        except Exception as e:
+            raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
+
+        # Prune old logs
+        if os.path.exists(LOG_FILE):
+            with open(LOG_FILE, 'r') as f:
+                lines = f.readlines()
+            
+            log_entries = []
+            current_entry = []
+            timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
+            
+            for line in lines:
+                if timestamp_pattern.match(line):
+                    if current_entry:
+                        log_entries.append(''.join(current_entry))
+                    current_entry = [line]
+                else:
+                    current_entry.append(line)
+            
+            if current_entry:
+                log_entries.append(''.join(current_entry))
+            
+            cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
+            pruned_entries = []
+            for entry in log_entries:
+                try:
+                    timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
+                    if timestamp > cutoff:
+                        pruned_entries.append(entry)
+                except ValueError:
+                    logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
+                    continue
+            
+            with open(LOG_FILE, 'w') as f:
+                f.writelines(pruned_entries)
+            logging.debug(f"Log file pruned: {LOG_FILE}")
         
-        with open(LOG_FILE, 'w') as f:
-            f.writelines(pruned_entries)
+        # Configure logging
+        logging.basicConfig(
+            filename=LOG_FILE,
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+            force=True  # Ensure this config takes precedence
+        )
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        logging.getLogger().addHandler(console_handler)
+        logging.info("Logging initialized for foodie_automator_google.py")
     
-    logger = logging.getLogger()
-    logger.setLevel(logging.INFO)
-    file_handler = logging.FileHandler(LOG_FILE, mode='a')
-    file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-    logger.addHandler(file_handler)
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-    logger.addHandler(console_handler)
-    logging.info("Logging initialized for foodie_automator_google.py")
+    except Exception as e:
+        # Fallback to console logging if file logging fails
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+            force=True
+        )
+        logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        logging.getLogger().addHandler(console_handler)
+        logging.info("Console logging initialized as fallback for foodie_automator_google.py")
 
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
@@ -253,11 +289,9 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
     try:
         logging.debug(f"Using {len(posted_titles)} posted titles and {len(used_images)} used images")
 
-        # Define regions to scrape
         regions = ['US', 'GB', 'AU']
         all_trends = []
 
-        # Scrape trends for each region
         for geo in regions:
             logging.info(f"Scraping Google Trends for geo={geo}")
             trends = scrape_google_trends(geo=geo)
@@ -267,7 +301,6 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
             else:
                 logging.warning(f"No trends collected for geo={geo}")
 
-        # Remove duplicates by title and sort by search volume
         unique_trends = []
         seen_titles = set()
         for trend in all_trends:
@@ -277,10 +310,9 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
         
         if not unique_trends:
             logging.info("No Google Trends data available across regions")
-            sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+            sleep_time = random.randint(1200, 1800)
             return None, None, sleep_time
 
-        # Sort trends by search volume in descending order
         unique_trends.sort(key=lambda x: x["search_volume"], reverse=True)
         logging.info(f"Total unique trends collected: {len(unique_trends)}")
 
@@ -293,14 +325,13 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
             summary = trend.get("summary", "")
             source_name = trend.get("source", "Google Trends")
             original_source = f'<a href="{link}">{source_name}</a>'
-            original_url = link  # Store for fallback
+            original_url = link
 
             if title in posted_titles:
                 logging.info(f"Skipping already posted trend: {title}")
                 attempts += 1
                 continue
 
-            # Check author availability before GPT calls
             author = get_next_author_round_robin()
             if not author:
                 logging.info(f"Skipping trend '{title}' due to tweet rate limits for all authors")
@@ -312,8 +343,12 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
 
             logging.info(f"Trying Google Trend: {title} from {source_name}")
 
+            # Fetch DuckDuckGo context early to enhance smart_image_and_filter
+            ddg_context = fetch_duckduckgo_news_context(title)
+            enhanced_summary = summary + "\n\nAdditional Context: " + ddg_context if summary else ddg_context
+
             try:
-                image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
+                image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
             except Exception as e:
                 logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
                 attempts += 1
@@ -324,7 +359,6 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
                 attempts += 1
                 continue
 
-            ddg_context = fetch_duckduckgo_news_context(title)
             scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
             interest_score = is_interesting(scoring_content)
             logging.info(f"Interest score for '{title}': {interest_score}")
@@ -405,11 +439,10 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
                 )
                 if not post_id:
                     logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
-                    post_url = original_url  # Fallback to original trend URL
+                    post_url = original_url
                 else:
                     logging.info(f"Posted to WordPress for {author_username}: {post_url}")
 
-                # Update post with actual post_url
                 post_url_encoded = quote(post_url)
                 share_links = share_links_template.format(post_url=post_url_encoded)
                 post_data["content"] = f"{final_summary}\n\n{share_links}"
@@ -420,7 +453,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
                         category=category,
                         link=link,
                         author=author,
-                        image_url=None,  # Skip image re-upload
+                        image_url=None,
                         original_source=original_source,
                         image_source=image_source,
                         uploader=uploader,
@@ -431,7 +464,7 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
                     )
             except Exception as e:
                 logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
-                post_url = original_url  # Fallback to original trend URL
+                post_url = original_url
             finally:
                 is_posting = False
 
@@ -446,15 +479,15 @@ def curate_from_google_trends(posted_titles_data, posted_titles, used_images_dat
                 logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
 
             logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Google Trends *****")
-            sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+            sleep_time = random.randint(1200, 1800)
             return post_data, category, sleep_time
 
         logging.info("No interesting Google Trend found after attempts")
-        sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+        sleep_time = random.randint(1200, 1800)
         return None, None, sleep_time
     except Exception as e:
         logging.error(f"Unexpected error in curate_from_google_trends: {e}", exc_info=True)
-        sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+        sleep_time = random.randint(1200, 1800)
         return None, None, sleep_time
 
 def run_google_trends_automator():
diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py
index 95f507a..9d0aed4 100644
--- a/foodie_automator_reddit.py
+++ b/foodie_automator_reddit.py
@@ -346,7 +346,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
         posts = fetch_reddit_posts()
         if not posts:
             logging.info("No Reddit posts available")
-            sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+            sleep_time = random.randint(1200, 1800)
             return None, None, sleep_time
 
         attempts = 0
@@ -379,8 +379,13 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
 
             logging.info(f"Trying Reddit Post: {title} from {source_name}")
 
+            # Combine summary and top comments for smart_image_and_filter
+            enhanced_summary = summary
+            if top_comments:
+                enhanced_summary += "\n\nTop Comments:\n" + "\n".join(top_comments)
+
             try:
-                image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, summary)
+                image_query, relevance_keywords, main_topic, skip, specific_term = smart_image_and_filter(title, enhanced_summary)
             except Exception as e:
                 logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
                 attempts += 1
@@ -392,7 +397,6 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
                 continue
 
             ddg_context = fetch_duckduckgo_news_context(title)
-            # Log full scoring content for debugging
             scoring_content = f"Title: {title}\n\nContent: {summary}\n\nTop Comments: {top_comments}\n\nAdditional Context: {ddg_context}"
             logging.debug(f"Scoring content for '{title}': {scoring_content}")
             interest_score = is_interesting_reddit(title, summary, upvotes, comment_count, top_comments)
@@ -474,11 +478,10 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
                 )
                 if not post_id:
                     logging.warning(f"Failed to post to WordPress for '{title}', using original URL: {original_url}")
-                    post_url = original_url  # Fallback to original Reddit post URL
+                    post_url = original_url
                 else:
                     logging.info(f"Posted to WordPress for {author_username}: {post_url}")
 
-                # Update post with actual post_url
                 post_url_encoded = quote(post_url)
                 share_links = share_links_template.format(post_url=post_url_encoded)
                 post_data["content"] = f"{final_summary}\n\n{share_links}"
@@ -489,7 +492,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
                         category=category,
                         link=link,
                         author=author,
-                        image_url=None,  # Skip image re-upload
+                        image_url=None,
                         original_source=original_source,
                         image_source=image_source,
                         uploader=uploader,
@@ -500,7 +503,7 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
                     )
             except Exception as e:
                 logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
-                post_url = original_url  # Fallback to original Reddit post URL
+                post_url = original_url
             finally:
                 is_posting = False
 
@@ -515,15 +518,15 @@ def curate_from_reddit(posted_titles_data, posted_titles, used_images_data, used
                 logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
 
             logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id or 'N/A'}) from Reddit *****")
-            sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+            sleep_time = random.randint(1200, 1800)
             return post_data, category, sleep_time
 
         logging.info("No interesting Reddit post found after attempts")
-        sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+        sleep_time = random.randint(1200, 1800)
         return None, None, sleep_time
     except Exception as e:
         logging.error(f"Unexpected error in curate_from_reddit: {e}", exc_info=True)
-        sleep_time = random.randint(1200, 1800)  # 20–30 minutes
+        sleep_time = random.randint(1200, 1800)
         return None, None, sleep_time
 
 def run_reddit_automator():
diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py
index e84c59d..0893281 100644
--- a/foodie_automator_rss.py
+++ b/foodie_automator_rss.py
@@ -364,7 +364,6 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
                 f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
             )
 
-            # Embed placeholder share links; update after getting post_url
             post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url='{post_url}', share_text=share_text_encoded)}"
 
             global is_posting
@@ -390,7 +389,6 @@ def curate_from_rss(posted_titles_data, posted_titles, used_images_data, used_im
                 else:
                     logging.info(f"Posted to WordPress for {author_username}: {post_url}")
 
-                    # Update content with actual post_url
                     post_url_encoded = quote(post_url)
                     post_data["content"] = f"{final_summary}\n\n{share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)}"
                     if post_id:
diff --git a/foodie_utils.py b/foodie_utils.py
index 49a1e6f..7cfbca8 100644
--- a/foodie_utils.py
+++ b/foodie_utils.py
@@ -44,12 +44,9 @@ IMAGE_UPLOAD_TIMEOUT = 30  # Added to fix NameError
 IMAGE_EXPIRATION_DAYS = 7  # 7 days, consistent with foodie_automator_rss.py
 
 def load_json_file(file_path, expiration_hours=None, default=None):
-    """
-    Load JSON file, optionally filtering expired entries and returning default if invalid.
-    """
     logger = logging.getLogger(__name__)
     if default is None:
-        default = []  # Default to list for posted_rss_titles.json and used_images.json
+        default = []
     
     if not os.path.exists(file_path):
         logger.info(f"File {file_path} does not exist. Returning default: {default}")
@@ -59,15 +56,34 @@ def load_json_file(file_path, expiration_hours=None, default=None):
         with open(file_path, 'r') as f:
             data = json.load(f)
         
+        if not isinstance(data, list):
+            logger.warning(f"Data in {file_path} is not a list, resetting to default")
+            return default
+        
         if expiration_hours is not None:
-            cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours)
-            filtered_data = [
-                entry for entry in data
-                if datetime.fromisoformat(entry['timestamp']) > cutoff
-            ]
+            # Use days for used_images.json, hours for others
+            if "used_images" in file_path:
+                expiration_delta = timedelta(days=expiration_hours)
+            else:
+                expiration_delta = timedelta(hours=expiration_hours)
+            
+            cutoff = datetime.now(timezone.utc) - expiration_delta
+            filtered_data = []
+            for entry in data:
+                if not isinstance(entry, dict) or "title" not in entry or "timestamp" not in entry:
+                    logger.warning(f"Skipping malformed entry in {file_path}: {entry}")
+                    continue
+                try:
+                    timestamp = datetime.fromisoformat(entry["timestamp"])
+                    if timestamp > cutoff:
+                        filtered_data.append(entry)
+                except ValueError as e:
+                    logger.warning(f"Invalid timestamp in {file_path} entry {entry}: {e}")
+                    continue
+            
             if len(filtered_data) < len(data):
                 logger.info(f"Filtered {len(data) - len(filtered_data)} expired entries from {file_path}")
-                save_json_file(file_path, filtered_data)  # Save filtered data
+                save_json_file(file_path, filtered_data)
             data = filtered_data
         
         logger.info(f"Loaded {len(data)} valid entries from {file_path}")
@@ -254,64 +270,6 @@ def select_best_persona(interest_score, content=""):
         return random.choice(personas[2:])
     return random.choice(personas)
 
-def get_image(search_query):
-    headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
-    
-    # Try Pixabay with the original query
-    try:
-        pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
-        response = requests.get(pixabay_url, headers=headers, timeout=10)
-        response.raise_for_status()
-        data = response.json()
-        
-        for hit in data.get('hits', []):
-            img_url = hit.get('webformatURL')
-            if not img_url or img_url in used_images:
-                continue
-            uploader = hit.get('user', 'Unknown')
-            page_url = hit.get('pageURL', img_url)
-            
-            used_images.add(img_url)
-            save_used_images()
-            
-            logging.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{search_query}'")
-            return img_url, "Pixabay", uploader, page_url
-        
-        logging.info(f"No valid Pixabay image found for query '{search_query}'. Trying fallback query.")
-    
-    except Exception as e:
-        logging.warning(f"Pixabay image fetch failed for query '{search_query}': {e}")
-    
-    # Fallback to a generic query
-    fallback_query = "food dining"
-    try:
-        pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(fallback_query)}&image_type=photo&per_page=10"
-        response = requests.get(pixabay_url, headers=headers, timeout=10)
-        response.raise_for_status()
-        data = response.json()
-        
-        for hit in data.get('hits', []):
-            img_url = hit.get('webformatURL')
-            if not img_url or img_url in used_images:
-                continue
-            uploader = hit.get('user', 'Unknown')
-            page_url = hit.get('pageURL', img_url)
-            
-            used_images.add(img_url)
-            save_used_images()
-            
-            logging.info(f"Selected Pixabay fallback image: {img_url} by {uploader} for query '{fallback_query}'")
-            return img_url, "Pixabay", uploader, page_url
-        
-        logging.warning(f"No valid Pixabay image found for fallback query '{fallback_query}'.")
-    
-    except Exception as e:
-        logging.warning(f"Pixabay fallback image fetch failed for query '{fallback_query}': {e}")
-    
-    # Ultimate fallback: return None but log clearly
-    logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
-    return None, None, None, None
-
 def generate_image_query(title, summary):
     try:
         prompt = (
@@ -425,7 +383,7 @@ def smart_image_and_filter(title, summary):
         relevance_keywords = result["relevance"]
         main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
         skip_flag = (
-            result["aison"] == "SKIP" or 
+            result["action"] == "SKIP" or  # Fixed typo: "aison" → "action"
             "[homemade]" in title.lower() or 
             "homemade" in title.lower() or 
             "homemade" in summary.lower() or 
@@ -1180,9 +1138,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term
     logger = logging.getLogger(__name__)
     
     def process_image(image_url, source_name, page_url):
-        """Download image, check for text with OCR, validate resolution, exclude screenshots, watermarks, and YouTube images."""
         try:
-            # Check for YouTube images via URL or page URL
             youtube_domains = ['youtube.com', 'ytimg.com']
             if any(domain in image_url.lower() or domain in page_url.lower() for domain in youtube_domains):
                 logger.info(f"Skipping YouTube image: {image_url}")
@@ -1193,20 +1149,17 @@ def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term
             response.raise_for_status()
             img = Image.open(io.BytesIO(response.content))
             
-            # Check image resolution
             width, height = img.size
             min_dimension = 1280
             if width < min_dimension and height < min_dimension:
                 logger.info(f"Skipping low-resolution image: {image_url} ({width}x{height})")
                 return None
             
-            # Attempt to detect screenshots via aspect ratio or naming
             aspect_ratio = width / height
             if (0.9 <= aspect_ratio <= 1.1) or "screenshot" in image_url.lower():
                 logger.info(f"Skipping potential screenshot: {image_url} (aspect ratio: {aspect_ratio})")
                 return None
             
-            # Check for watermarks in URL or page URL
             watermark_domains = [
                 'shutterstock.com', 'gettyimages.com', 'istockphoto.com', 'adobestock.com',
                 '123rf.com', 'dreamstime.com', 'alamy.com', 'stock.adobe.com'
@@ -1215,7 +1168,6 @@ def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term
                 logger.info(f"Skipping image from stock photo site (potential watermark): {image_url}")
                 return None
             
-            # OCR to detect text and watermarks
             text = pytesseract.image_to_string(img).strip().lower()
             watermark_phrases = [
                 'shutterstock', 'getty images', 'istock', 'adobe stock', 'watermark',
@@ -1243,12 +1195,13 @@ def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term
             logger.warning(f"Failed to process image {image_url}: {e}")
             return None
     
-    # Step 1: Search DDG for public domain images
     ddg_query = f"{search_query} license:public domain"
     logger.info(f"Searching DDG with query: '{ddg_query}'")
     try:
         with DDGS() as ddgs:
             results = ddgs.images(ddg_query, safesearch="on", max_results=20)
+            prioritized_results = []
+            other_results = []
             for result in results:
                 image_url = result.get("image")
                 page_url = result.get("url")
@@ -1258,14 +1211,23 @@ def get_flickr_image(search_query, relevance_keywords, main_topic, specific_term
                     source_name = domain.rsplit('.', 1)[0].capitalize()
                 else:
                     source_name = "Public Domain"
-                if image_url and image_url.endswith(('.jpg', '.jpeg', '.png')):
-                    result = process_image(image_url, source_name, page_url)
-                    if result:
-                        return result
+                
+                if not image_url or not image_url.endswith(('.jpg', '.jpeg', '.png')):
+                    continue
+                
+                image_metadata = f"{result.get('title', '').lower()} {page_url.lower()}"
+                if specific_term and specific_term.lower() in image_metadata:
+                    prioritized_results.append((image_url, source_name, page_url))
+                else:
+                    other_results.append((image_url, source_name, page_url))
+            
+            for image_url, source_name, page_url in prioritized_results + other_results:
+                result = process_image(image_url, source_name, page_url)
+                if result:
+                    return result
     except Exception as e:
         logger.warning(f"DDG search failed for '{ddg_query}': {e}")
     
-    # Step 2: Fallback to Pixabay with specific term
     logger.info(f"No valid DDG images, falling back to Pixabay for '{search_query}'")
     image_url, source_name, uploader, page_url = get_image(search_query, specific_term)
     if image_url: