Removed Redundant process_photo

2025-05-03 15:05:16 +10:00
parent a5182bdfb9
commit 1fd1ad361b
1 changed files with 18 additions and 170 deletions
@@ -293,109 +293,20 @@ def get_image(search_query):
    logging.error(f"All image fetch attempts failed for query '{search_query}'. Returning None.")
    return None, None, None, None
-def process_photo(photo):
+def get_flickr_image(search_query, relevance_keywords):
-    tags = [tag.text.lower() for tag in photo.getTags()]
+    global last_flickr_request_time, flickr_request_count
    title = photo.title.lower() if photo.title else ""
-    matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
+    reset_flickr_request_count()
-    if matched_keywords:
+    flickr_request_count += 1
-        logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
+    logging.info(f"Flickr request count: {flickr_request_count}/3600")
        return None
-    # Try 'Large' size first, fall back to 'Medium' if unavailable
+    # Enforce a minimum delay of 10 seconds between Flickr requests
-    img_url = None
+    current_time = time.time()
-    try:
+    time_since_last_request = current_time - last_flickr_request_time
-        img_url = photo.getPhotoFile(size_label='Large')
+    if time_since_last_request < 10:
-    except flickr_api.flickrerrors.FlickrError as e:
+        time.sleep(10 - time_since_last_request)
        logging.info(f"Large size not available for photo {photo.id}: {e}, trying Medium")
        try:
            img_url = photo.getPhotoFile(size_label='Medium')
        except flickr_api.flickrerrors.FlickrError as e:
            logging.warning(f"Medium size not available for photo {photo.id}: {e}")
            return None
-    if not img_url or img_url in used_images:
+    last_flickr_request_time = time.time()
        return None
    uploader = photo.owner.username
    page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
    used_images.add(img_url)
    save_used_images()
    flickr_data = {
        "title": search_query,
        "image_url": img_url,
        "source": "Flickr",
        "uploader": uploader,
        "page_url": page_url,
        "timestamp": datetime.now(timezone.utc).isoformat()
    }
    flickr_file = "/home/shane/foodie_automator/flickr_images.json"
    with open(flickr_file, 'a') as f:
        json.dump(flickr_data, f)
        f.write('\n')
    logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
    logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
    return img_url, "Flickr", uploader, page_url
    def search_ddg_for_flickr(query):
        ddg_query = f"{query} site:flickr.com"
        ddg_url = f"https://duckduckgo.com/?q={quote(ddg_query)}"
        try:
            response = requests.get(ddg_url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            photo_ids = set()
            for link in soup.find_all('a', href=True):
                href = link['href']
                match = re.search(r'flickr\.com/photos/[^/]+/(\d+)', href)
                if match:
                    photo_id = match.group(1)
                    photo_ids.add(photo_id)
            photo_ids = list(photo_ids)[:2]  # Limit to 2 IDs
            logging.info(f"Found {len(photo_ids)} Flickr photo IDs via DDG: {photo_ids}")
            return photo_ids
        except Exception as e:
            logging.warning(f"DDG search failed for query '{ddg_query}': {e}")
            return set()
    def classify_keywords(keywords):
        prompt = (
            "Given the following keywords from an image search query, classify each as 'specific' (e.g., brand names, unique entities like 'Taco Bell' or 'Paris') or 'generic' (e.g., common or abstract terms like 'dining' or 'trends'). "
            "Return a JSON object mapping each keyword to its classification.\n\n"
            "Keywords: " + ", ".join(keywords) + "\n\n"
            "Example output format (do not use these exact keywords in your response):\n"
            "```json\n"
            "{\n"
            "  \"keyword1\": \"specific\",\n"
            "  \"keyword2\": \"generic\"\n"
            "}\n```"
        )
        try:
            response = client.chat.completions.create(
                model=LIGHT_TASK_MODEL,
                messages=[
                    {"role": "system", "content": "You are a helper that classifies keywords."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=100,
                temperature=0.5
            )
            raw_response = response.choices[0].message.content
            json_match = re.search(r'```json\n([\s\S]*?)\n```', raw_response)
            if not json_match:
                logging.warning(f"Failed to parse keyword classification JSON: {raw_response}")
                return {kw: "specific" for kw in keywords}
            classifications = json.loads(json_match.group(1))
            return classifications
        except Exception as e:
            logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
            return {kw: "specific" for kw in keywords}
    # Step 1: Search DDG to find Flickr photo IDs
    logging.info(f"Searching DDG with query: '{search_query} site:flickr.com'")
@@ -414,6 +325,7 @@ def process_photo(photo):
        classifications = classify_keywords(keywords)
        logging.info(f"Keyword classifications: {classifications}")
        # Prioritize specific keywords
        specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
        if specific_keywords:
            for keyword in specific_keywords:
@@ -424,42 +336,17 @@ def process_photo(photo):
                    if result:
                        return result
-    # Step 3: Final fallback to a generic food-related query
+    # Step 3: Final fallback using relevance keywords
-    logging.info(f"No results found. Falling back to generic query: 'food dining'")
+    fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
-    photos = search_flickr("food dining")
+    logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
    photos = search_flickr(fallback_query)
    for photo in photos:
        result = process_photo(photo)
        if result:
            return result
-    logging.warning(f"No valid Flickr image found in fallback for query '{search_query}'. Trying Pixabay.")
+    logging.warning(f"No valid Flickr image found for query '{search_query}' after all attempts.")
-
+    return None, None, None, None
    # Fallback to Pixabay
    try:
        pixabay_url = f"https://pixabay.com/api/?key={PIXABAY_API_KEY}&q={quote(search_query)}&image_type=photo&per_page=10"
        response = requests.get(pixabay_url, timeout=10)
        response.raise_for_status()
        data = response.json()
        for hit in data.get('hits', []):
            img_url = hit.get('webformatURL')
            if not img_url or img_url in used_images:
                continue
            uploader = hit.get('user', 'Unknown')
            page_url = hit.get('pageURL', img_url)
            used_images.add(img_url)
            save_used_images()
            logging.debug(f"Image selected for query '{search_query}': {img_url}")
            return img_url, "Pixabay", uploader, page_url
        logging.warning(f"No valid Pixabay image found for query '{search_query}'.")
        return None, None, None, None
    except Exception as e:
        logging.error(f"Pixabay image fetch failed for query '{search_query}': {e}")
        return None, None, None, None
 def generate_image_query(title, summary):
    try:
@@ -1119,45 +1006,6 @@ def get_flickr_image(search_query, relevance_keywords):
            logging.warning(f"Failed to fetch Flickr photo ID {photo_id}: {e}")
            return None
    # Helper function to process a photo (fetch URL and metadata only)
    def process_photo(photo):
        tags = [tag.text.lower() for tag in photo.getTags()]
        title = photo.title.lower() if photo.title else ""
        matched_keywords = [kw for kw in exclude_keywords if kw in tags or kw in title]
        if matched_keywords:
            logging.info(f"Skipping image with unwanted keywords: {photo.id} (tags: {tags}, title: {title}, matched: {matched_keywords})")
            return None
        img_url = photo.getPhotoFile(size_label='Large')
        if not img_url:
            img_url = photo.getPhotoFile(size_label='Medium')
        if not img_url or img_url in used_images:
            return None
        uploader = photo.owner.username
        page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
        used_images.add(img_url)
        save_used_images()
        flickr_data = {
            "title": search_query,
            "image_url": img_url,
            "source": "Flickr",
            "uploader": uploader,
            "page_url": page_url,
            "timestamp": datetime.now(timezone.utc).isoformat()
        }
        flickr_file = "/home/shane/foodie_automator/flickr_images.json"
        with open(flickr_file, 'a') as f:
            json.dump(flickr_data, f)
            f.write('\n')
        logging.info(f"Saved Flickr image metadata to {flickr_file}: {img_url}")
        logging.info(f"Selected Flickr image: {img_url} by {uploader} for query '{search_query}' (tags: {tags})")
        return img_url, "Flickr", uploader, page_url
    # Helper function to search DDG and extract Flickr photo IDs
    def search_ddg_for_flickr(query):
        ddg_query = f"{query} site:flickr.com"