update whole file

fix
Removed Redundant process_photo
2025-05-03 15:53:22 +10:00 · 2025-05-03 15:21:08 +10:00 · 2025-05-03 15:05:16 +10:00 · 2025-05-03 14:49:06 +10:00 · 2025-05-03 14:35:07 +10:00 · 2025-05-03 14:22:46 +10:00
4 changed files with 199 additions and 376 deletions
@@ -256,9 +256,6 @@ def curate_from_google_trends(geo_list=['US']):
        if not image_url:
            image_url, image_source, uploader, page_url = get_image(image_query)

-        # Log the fetched image details
-        logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
-
        hook = get_dynamic_hook(post_data["title"]).strip()

        # Generate viral share prompt
@@ -294,7 +291,8 @@ def curate_from_google_trends(geo_list=['US']):
            share_text_encoded = quote(share_text)
            post_url_encoded = quote(post_url)
            share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
-            post_data["content"] = f"{final_summary}\n\n{share_links}"
+            # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
+            post_data["content"] = f"{final_summary}\n\n{share_links}"  # Removed cta from content
            is_posting = True
            try:
                post_to_wp(
@@ -320,16 +318,6 @@ def curate_from_google_trends(geo_list=['US']):
            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")

            if image_url:
-                # Check if image is already used
-                used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
-                used_image_urls = {entry["title"] for entry in used_images_list}
-                if image_url in used_image_urls:
-                    logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image")
-                    image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
-                    if not image_url:
-                        image_url, image_source, uploader, page_url = get_image(image_query)
-                    logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
-
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
@@ -211,7 +211,7 @@ def curate_from_reddit():
    if not articles:
        print("No Reddit posts available")
        logging.info("No Reddit posts available")
-        return None, None, random.randint(600, 1800)
+        return None, None, None

    articles.sort(key=lambda x: x["upvotes"], reverse=True)
    
@@ -299,10 +299,8 @@ def curate_from_reddit():
        if not image_url:
            image_url, image_source, uploader, page_url = get_image(image_query)
        
-        # Log the fetched image details
-        logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
-        
        hook = get_dynamic_hook(post_data["title"]).strip()
+        # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None)
        
        # Generate viral share prompt
        share_prompt = get_viral_share_prompt(post_data["title"], final_summary)
@@ -311,7 +309,7 @@ def curate_from_reddit():
            f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
            f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
        )
-        post_data["content"] = f"{final_summary}\n\n{share_links_template}"
+        post_data["content"] = f"{final_summary}\n\n{share_links_template}"  # Removed cta from content
        
        global is_posting
        is_posting = True
@@ -337,7 +335,8 @@ def curate_from_reddit():
            share_text_encoded = quote(share_text)
            post_url_encoded = quote(post_url)
            share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
-            post_data["content"] = f"{final_summary}\n\n{share_links}"
+            # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
+            post_data["content"] = f"{final_summary}\n\n{share_links}"  # Removed cta from content
            is_posting = True
            try:
                post_to_wp(
@@ -363,16 +362,6 @@ def curate_from_reddit():
            logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}")
            
            if image_url:
-                # Check if image is already used
-                used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
-                used_image_urls = {entry["title"] for entry in used_images_list}
-                if image_url in used_image_urls:
-                    logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image")
-                    image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
-                    if not image_url:
-                        image_url, image_source, uploader, page_url = get_image(image_query)
-                    logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
-
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}")
@@ -32,10 +32,6 @@ from dotenv import load_dotenv

 load_dotenv()

-# Log script version to ensure it's the latest
-SCRIPT_VERSION = "1.2.0"
-logging.info(f"Starting foodie_automator_rss.py version {SCRIPT_VERSION}")
-
 is_posting = False

 def signal_handler(sig, frame):
@@ -273,10 +269,12 @@ def curate_from_rss():
        # Fetch image
        image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
        if not image_url:
+            logging.info(f"Flickr fetch failed for '{image_query}'. Falling back to Pixabay.")
            image_url, image_source, uploader, page_url = get_image(image_query)
-
-        # Log the fetched image details
-        logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
+            if not image_url:
+                logging.info(f"Pixabay fetch failed for '{image_query}'. Skipping article '{title}'.")
+                attempts += 1
+                continue

        hook = get_dynamic_hook(post_data["title"]).strip()

@@ -287,7 +285,7 @@ def curate_from_rss():
            f'<a href="https://x.com/intent/tweet?url={{post_url}}&text={{share_text}}" target="_blank"><i class="tsi tsi-twitter"></i></a> '
            f'<a href="https://www.facebook.com/sharer/sharer.php?u={{post_url}}" target="_blank"><i class="tsi tsi-facebook"></i></a></p>'
        )
-        post_data["content"] = f"{final_summary}\n\n{share_links_template}"
+        post_data["content"] = f"{final_summary}\n\n{share_links_template}"  # Removed cta from content

        global is_posting
        is_posting = True
@@ -313,7 +311,8 @@ def curate_from_rss():
            share_text_encoded = quote(share_text)
            post_url_encoded = quote(post_url)
            share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded)
-            post_data["content"] = f"{final_summary}\n\n{share_links}"
+            # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url)
+            post_data["content"] = f"{final_summary}\n\n{share_links}"  # Removed cta from content
            is_posting = True
            try:
                post_to_wp(
@@ -339,16 +338,6 @@ def curate_from_rss():
            logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")

            if image_url:
-                # Check if image is already used
-                used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS)
-                used_image_urls = {entry["title"] for entry in used_images_list}
-                if image_url in used_image_urls:
-                    logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image")
-                    image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords)
-                    if not image_url:
-                        image_url, image_source, uploader, page_url = get_image(image_query)
-                    logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}")
-
                save_json_file(USED_IMAGES_FILE, image_url, timestamp)
                used_images.add(image_url)
                logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
@@ -236,178 +236,12 @@ def select_best_persona(interest_score, content=""):
    return random.choice(personas)

 def get_image(search_query):
-    global last_flickr_request_time, flickr_request_count
-    
-    reset_flickr_request_count()
-    flickr_request_count += 1
-    logging.info(f"Flickr request count: {flickr_request_count}/3600")
-    
-    current_time = time.time()
-    time_since_last_request = current_time - last_flickr_request_time
-    if time_since_last_request < 10:
-        time.sleep(10 - time_since_last_request)
-    
-    last_flickr_request_time = time.time()
-    
    headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
    
-    def search_flickr(query, per_page=5):
-        try:
-            photos = flickr_api.Photo.search(
-                text=query,
-                per_page=per_page,
-                sort='relevance',
-                safe_search=1,
-                media='photos',
-                license='4,5,9,10'
-            )
-            return photos
-        except Exception as e:
-            logging.warning(f"Flickr API error for query '{query}': {e}")
-            return []
-
-    def fetch_photo_by_id(photo_id):
-        try:
-            photo = flickr_api.Photo(id=photo_id)
-            return photo
-        except Exception as e:
-            logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
-            return None
-
-    def process_photo(photo):
-        tags = [tag.text.lower() for tag in photo.getTags()]
-        title = photo.title.lower() if photo.title else ""
-        
-        matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
-        if matched_keywords:
-            logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
-            return None
-        
-        img_url = photo.getPhotoFile(size_label='Medium')
-        if not img_url or img_url in used_images:
-            return None
-        
-        uploader = photo.owner.username
-        page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
-        
-        used_images.add(img_url)
-        save_used_images()
-        
-        flickr_data = {
-            "title": search_query,
-            "image_url": img_url,
-            "source": "Flickr",
-            "uploader": uploader,
-            "page_url": page_url,
-            "timestamp": datetime.now(timezone.utc).isoformat()
-        }
-        flickr_file = "/home/shane/foodie_automator/flickr_images.json"
-        with open(flickr_file, 'a') as f:
-            json.dump(flickr_data, f)
-            f.write('\n')
-        logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
-        
-        logging.info(f"Fallback Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
-        return img_url, "Flickr", uploader, page_url
-
-    def search_ddg_for_flickr(query):
-        ddg_query = f"{query} site:flickr.com"
-        ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
-        try:
-            response = requests.get(ddg_url, headers=headers, timeout=10)
-            response.raise_for_status()
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            photo_ids = set()
-            for link in soup.find_all('a', href=True):
-                href = link['href']
-                match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
-                if match:
-                    photo_id = match.group(1)
-                    photo_ids.add(photo_id)
-            
-            photo_ids = list(photo_ids)[:2]  # Limit to 2 IDs
-            logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
-            return photo_ids
-        except Exception as e:
-            logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
-            return set()
-
-    def classify_keywords(keywords):
-        prompt = (
-            "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
-            "Return a JSON object mapping each keyword to its classification.\n\n"
-            "Keywords: " + ", ".join(keywords) + "\n\n"
-            "Example output format (do not use these exact keywords in your response):\n"
-            "```json\n"
-            "{\n"
-            "  \"keyword1\": \"specific\",\n"
-            "  \"keyword2\": \"generic\"\n"
-            "}\n```"
-        )
-        try:
-            response = client.chat.completions.create(
-                model=LIGHT_TASK_MODEL,
-                messages=[
-                    {"role": "system", "content": "You are a helper that classifies keywords."},
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=100,
-                temperature=0.5
-            )
-            raw_response = response.choices[0].message.content
-            json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
-            if not json_match:
-                logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
-                return {kw: "specific" for kw in keywords}
-            
-            classifications = json.loads(json_match.group(1))
-            return classifications
-        except Exception as e:
-            logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
-            return {kw: "specific" for kw in keywords}
-
-    # Step 1: Search DDG to find Flickr photo IDs
-    logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
-    photo_ids = search_ddg_for_flickr(search_query)
-    if photo_ids:
-        for photo_id in photo_ids:
-            photo = fetch_photo_by_id(photo_id)
-            if photo:
-                result = process_photo(photo)
-                if result:
-                    return result
-
-    # Step 2: Break down the query into keywords and classify them for direct Flickr API search
-    keywords = search_query.lower().split()
-    if len(keywords) > 1:
-        classifications = classify_keywords(keywords)
-        logging.info(f"Keyword classifications: {classifications}")
-        
-        specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
-        if specific_keywords:
-            for keyword in specific_keywords:
-                logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
-                photos = search_flickr(keyword)
-                for photo in photos:
-                    result = process_photo(photo)
-                    if result:
-                        return result
-
-    # Step 3: Final fallback to a generic food-related query
-    logging.info(f"No results found. Falling back to generic query: 'food dining'")
-    photos = search_flickr("food dining")
-    for photo in photos:
-        result = process_photo(photo)
-        if result:
-            return result
-
-    logging.warning(f"No valid Flickr image found in fallback for query '{search_query}'. Trying Pixabay.")
-
-    # Fallback to Pixabay
+    # Try Pixabay with the original query
    try:
        pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
-        response = requests.get(pixabay_url, timeout=10)
+        response = requests.get(pixabay_url, headers=headers, timeout=10)
        response.raise_for_status()
        data = response.json()
        
@@ -421,15 +255,43 @@ def get_image(search_query):
            used_images.add(img_url)
            save_used_images()
            
-            logging.debug(f"Image selected for query '{search_query}': {img_url}")
+            logging.info(f"Selected Pixabay image: {img_url} by {uploader} for query '{search_query}'")
            return img_url, "Pixabay", uploader, page_url
        
-        logging.warning(f"No valid Pixabay image found for query '{search_query}'.")
-        return None, None, None, None
-        
+        logging.info(f"No valid Pixabay image found for query '{search_query}'. Trying fallback query.")
+    
    except Exception as e:
-        logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
-        return None, None, None, None
+        logging.warning(f"Pixabay image fetch failed for query '{search_query}': {e}")
+    
+    # Fallback to a generic query
+    fallback_query = "food dining"
+    try:
+        pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(fallback_query)}&image_type=photo&per_page=10"
+        response = requests.get(pixabay_url, headers=headers, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        
+        for hit in data.get('hits', []):
+            img_url = hit.get('webformatURL')
+            if not img_url or img_url in used_images:
+                continue
+            uploader = hit.get('user', 'Unknown')
+            page_url = hit.get('pageURL', img_url)
+            
+            used_images.add(img_url)
+            save_used_images()
+            
+            logging.info(f"Selected Pixabay fallback image: {img_url} by {uploader} for query '{fallback_query}'")
+            return img_url, "Pixabay", uploader, page_url
+        
+        logging.warning(f"No valid Pixabay image found for fallback query '{fallback_query}'.")
+    
+    except Exception as e:
+        logging.warning(f"Pixabay fallback image fetch failed for query '{fallback_query}': {e}")
+    
+    # Ultimate fallback: return None but log clearly
+    logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
+    return None, None, None, None

 def generate_image_query(title, summary):
    try:
@@ -1010,24 +872,19 @@ if os.path.exists(used_images_file):
            else:
                data = json.loads(content)
                if not isinstance(data, list):
-                    logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Resetting.")
-                    data = []
-                else:
-                    # Handle malformed format (list of lists or invalid entries)
-                    flat_data = []
-                    for item in data:
-                        if isinstance(item, str) and item.startswith('https://'):
-                            flat_data.append(item)
-                        elif isinstance(item, list):
-                            logging.warning(f"Fixing malformed entry in {used_images_file}: {item}")
-                            flat_data.extend([sub_item for sub_item in item if isinstance(sub_item, str) and sub_item.startswith('https://')])
-                        else:
-                            logging.warning(f"Skipping invalid entry in {used_images_file}: {item}")
-                    data = flat_data
+                    logging.warning(f"Invalid format in {used_images_file}: expected a list, got {type(data)}. Converting to list.")
+                    if isinstance(data, dict):
+                        # If it's a dict, try to extract URLs from values
+                        data = [v for v in data.values() if isinstance(v, str) and v.startswith('https://')]
+                    else:
+                        logging.warning(f"Cannot convert {type(data)} to list. Resetting to empty list.")
+                        data = []
+                # Filter out non-string or non-URL entries
+                data = [item for item in data if isinstance(item, str) and item.startswith('https://')]
            used_images.update(data)
        logging.info(f"Loaded {len(used_images)} used image URLs from {used_images_file}")
    except Exception as e:
-        logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting file.")
+        logging.warning(f"Failed to load used images from {used_images_file}: {e}. Resetting to empty set.")
        used_images = set()
        with open(used_images_file, 'w') as f:
            json.dump([], f)
@@ -1035,17 +892,14 @@ if os.path.exists(used_images_file):
 # Function to save used_images to file
 def save_used_images():
    try:
+        # Ensure used_images contains only valid URLs
+        valid_urls = [url for url in used_images if isinstance(url, str) and url.startswith('https://')]
+        if len(valid_urls) != len(used_images):
+            logging.warning(f"Found {len(used_images) - len(valid_urls)} invalid URLs in used_images set")
+        
        with open(used_images_file, 'w') as f:
-            f.write('[\n')
-            urls = list(used_images)
-            for i, url in enumerate(urls):
-                f.write(f'"{url}"')
-                if i < len(urls) - 1:
-                    f.write(',\n')
-                else:
-                    f.write('\n')
-            f.write(']')
-        logging.info(f"Saved {len(used_images)} used image URLs to {used_images_file}")
+            json.dump(valid_urls, f, indent=2)
+        logging.info(f"Saved {len(valid_urls)} used image URLs to {used_images_file}")
    except Exception as e:
        logging.warning(f"Failed to save used images to {used_images_file}: {e}")

@@ -1055,6 +909,134 @@ def reset_flickr_request_count():
        flickr_request_count = 0
        flickr_request_start_time = time.time()

+def process_photo(photo, search_query):
+    tags = [tag.text.lower() for tag in photo.getTags()]
+    title = photo.title.lower() if photo.title else ""
+    
+    matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
+    if matched_keywords:
+        logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
+        return None
+    
+    # Try 'Large' size first, fall back to 'Medium' if unavailable
+    img_url = None
+    try:
+        img_url = photo.getPhotoFile(size_label='Large')
+    except flickr_api.flickrerrors.FlickrError as e:
+        logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium")
+        try:
+            img_url = photo.getPhotoFile(size_label='Medium')
+        except flickr_api.flickrerrors.FlickrError as e:
+            logging.warning(f"Medium size not available for photo {photo.id}: {e}")
+            return None
+    
+    if not img_url or img_url in used_images:
+        logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
+        return None
+    
+    uploader = photo.owner.username
+    page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
+    
+    used_images.add(img_url)
+    save_used_images()
+    
+    flickr_data = {
+        "title": search_query,
+        "image_url": img_url,
+        "source": "Flickr",
+        "uploader": uploader,
+        "page_url": page_url,
+        "timestamp": datetime.now(timezone.utc).isoformat()
+    }
+    flickr_file = "/home/shane/foodie_automator/flickr_images.json"
+    with open(flickr_file, 'a') as f:
+        json.dump(flickr_data, f)
+        f.write('\n')
+    logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
+    
+    logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
+    return img_url, "Flickr", uploader, page_url
+
+def search_flickr(query, per_page=5):
+    try:
+        photos = flickr_api.Photo.search(
+            text=query,
+            per_page=per_page,
+            sort='relevance',
+            safe_search=1,
+            media='photos',
+            license='4,5,9,10'
+        )
+        return photos
+    except Exception as e:
+        logging.warning(f"Flickr API error for query '{query}': {e}")
+        return []
+
+def fetch_photo_by_id(photo_id):
+    try:
+        photo = flickr_api.Photo(id=photo_id)
+        return photo
+    except Exception as e:
+        logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
+        return None
+
+def search_ddg_for_flickr(query):
+    ddg_query = f"{query} site:flickr.com"
+    ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
+    try:
+        response = requests.get(ddg_url, headers={'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        
+        photo_ids = set()
+        for link in soup.find_all('a', href=True):
+            href = link['href']
+            match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
+            if match:
+                photo_id = match.group(1)
+                photo_ids.add(photo_id)
+        
+        photo_ids = list(photo_ids)[:2]  # Limit to 2 IDs
+        logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
+        return photo_ids
+    except Exception as e:
+        logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
+        return set()
+
+def classify_keywords(keywords):
+    prompt = (
+        "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
+        "Return a JSON object mapping each keyword to its classification.\n\n"
+        "Keywords: " + ", ".join(keywords) + "\n\n"
+        "Example output format (do not use these exact keywords in your response):\n"
+        "```json\n"
+        "{\n"
+        "  \"keyword1\": \"specific\",\n"
+        "  \"keyword2\": \"generic\"\n"
+        "}\n```"
+    )
+    try:
+        response = client.chat.completions.create(
+            model=LIGHT_TASK_MODEL,
+            messages=[
+                {"role": "system", "content": "You are a helper that classifies keywords."},
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=100,
+            temperature=0.5
+        )
+        raw_response = response.choices[0].message.content
+        json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
+        if not json_match:
+            logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
+            return {kw: "specific" for kw in keywords}
+        
+        classifications = json.loads(json_match.group(1))
+        return classifications
+    except Exception as e:
+        logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
+        return {kw: "specific" for kw in keywords}
+
 def get_flickr_image(search_query, relevance_keywords):
    global last_flickr_request_time, flickr_request_count
    
@@ -1070,131 +1052,6 @@ def get_flickr_image(search_query, relevance_keywords):
    
    last_flickr_request_time = time.time()
    
-    headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
-    
-    # Helper function to search Flickr with a given query
-    def search_flickr(query, per_page=5):  # Reduced per_page to limit results
-        try:
-            photos = flickr_api.Photo.search(
-                text=query,
-                per_page=per_page,
-                sort='relevance',
-                safe_search=1,
-                media='photos',
-                license='4,5,9,10'
-            )
-            return photos
-        except Exception as e:
-            logging.warning(f"Flickr API error for query '{query}': {e}")
-            return []
-
-    # Helper function to fetch a Flickr photo by ID
-    def fetch_photo_by_id(photo_id):
-        try:
-            photo = flickr_api.Photo(id=photo_id)
-            return photo
-        except Exception as e:
-            logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
-            return None
-
-    # Helper function to process a photo (fetch URL and metadata only)
-    def process_photo(photo):
-        tags = [tag.text.lower() for tag in photo.getTags()]
-        title = photo.title.lower() if photo.title else ""
-        
-        matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
-        if matched_keywords:
-            logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
-            return None
-        
-        img_url = photo.getPhotoFile(size_label='Large')
-        if not img_url:
-            img_url = photo.getPhotoFile(size_label='Medium')
-        if not img_url or img_url in used_images:
-            return None
-        
-        uploader = photo.owner.username
-        page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
-        
-        used_images.add(img_url)
-        save_used_images()
-        
-        flickr_data = {
-            "title": search_query,
-            "image_url": img_url,
-            "source": "Flickr",
-            "uploader": uploader,
-            "page_url": page_url,
-            "timestamp": datetime.now(timezone.utc).isoformat()
-        }
-        flickr_file = "/home/shane/foodie_automator/flickr_images.json"
-        with open(flickr_file, 'a') as f:
-            json.dump(flickr_data, f)
-            f.write('\n')
-        logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
-        
-        logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
-        return img_url, "Flickr", uploader, page_url
-
-    # Helper function to search DDG and extract Flickr photo IDs
-    def search_ddg_for_flickr(query):
-        ddg_query = f"{query} site:flickr.com"
-        ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
-        try:
-            response = requests.get(ddg_url, headers=headers, timeout=10)
-            response.raise_for_status()
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            photo_ids = set()
-            for link in soup.find_all('a', href=True):
-                href = link['href']
-                match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
-                if match:
-                    photo_id = match.group(1)
-                    photo_ids.add(photo_id)
-            
-            photo_ids = list(photo_ids)[:2]  # Limit to 2 IDs
-            logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
-            return photo_ids
-        except Exception as e:
-            logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
-            return set()
-
-    # Helper function to classify keywords as specific or generic
-    def classify_keywords(keywords):
-        prompt = (
-            "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
-            "Return a JSON object mapping each keyword to its classification.\n\n"
-            "Keywords: " + ", ".join(keywords) + "\n\n"
-            "Example output format (do not use these exact keywords in your response):\n"
-            "```json\n"
-            "{\n"
-            "  \"keyword1\": \"specific\",\n"
-            "  \"keyword2\": \"generic\"\n"
-            "}\n```"
-        )
-        try:
-            response = client.chat.completions.create(
-                model=LIGHT_TASK_MODEL,
-                messages=[
-                    {"role": "system", "content": "You are a helper that classifies keywords."},
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=100,
-                temperature=0.5
-            )
-            raw_response = response.choices[0].message.content
-            json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
-            if not json_match:
-                logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
-                return {kw: "specific" for kw in keywords}
-            
-            classifications = json.loads(json_match.group(1))
-            return classifications
-        except Exception as e:
-            logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
-            return {kw: "specific" for kw in keywords}
-
    # Step 1: Search DDG to find Flickr photo IDs
    logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
    photo_ids = search_ddg_for_flickr(search_query)
@@ -1202,7 +1059,7 @@ def get_flickr_image(search_query, relevance_keywords):
        for photo_id in photo_ids:
            photo = fetch_photo_by_id(photo_id)
            if photo:
-                result = process_photo(photo)
+                result = process_photo(photo, search_query)
                if result:
                    return result

@@ -1219,7 +1076,7 @@ def get_flickr_image(search_query, relevance_keywords):
                logging.info(f"Searching Flickr with specific keyword: '{keyword}'")
                photos = search_flickr(keyword)
                for photo in photos:
-                    result = process_photo(photo)
+                    result = process_photo(photo, search_query)
                    if result:
                        return result

@@ -1228,7 +1085,7 @@ def get_flickr_image(search_query, relevance_keywords):
    logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
    photos = search_flickr(fallback_query)
    for photo in photos:
-        result = process_photo(photo)
+        result = process_photo(photo, search_query)
        if result:
            return result
Author	SHA1	Message	Date
Shane	427a5cb919	update whole file	2025-05-03 15:53:22 +10:00
Shane	6d945dae67	fix	2025-05-03 15:21:08 +10:00
Shane	1fd1ad361b	Removed Redundant process_photo	2025-05-03 15:05:16 +10:00
Shane	a5182bdfb9	fix	2025-05-03 14:49:06 +10:00
Shane	be6514e4e3	fix	2025-05-03 14:35:07 +10:00
Shane	c936555741	fix flickr image large issue	2025-05-03 14:22:46 +10:00
Shane	cdc54f3f14	remove cta import	2025-05-03 14:06:58 +10:00
Shane	aabc989e1c	remove cta	2025-05-03 13:59:01 +10:00
Shane	b025afe9f3	Revert "remove double cta" This reverts commit `e2c47a1a05`.	2025-05-03 13:58:44 +10:00