skip youtube
This commit is contained in:
+8
-2
@@ -1086,8 +1086,14 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def process_image(image_url, source_name, page_url):
|
def process_image(image_url, source_name, page_url):
|
||||||
"""Download image, check for text with OCR, validate resolution, exclude screenshots and watermarks."""
|
"""Download image, check for text with OCR, validate resolution, exclude screenshots, watermarks, and YouTube images."""
|
||||||
try:
|
try:
|
||||||
|
# Check for YouTube images via URL or page URL
|
||||||
|
youtube_domains = ['youtube.com', 'ytimg.com']
|
||||||
|
if any(domain in image_url.lower() or domain in page_url.lower() for domain in youtube_domains):
|
||||||
|
logger.info(f"Skipping YouTube image: {image_url}")
|
||||||
|
return None
|
||||||
|
|
||||||
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
headers = {'User-Agent': 'InsiderFoodieBot/1.0 (https://insiderfoodie.com; contact@insiderfoodie.com)'}
|
||||||
response = requests.get(image_url, headers=headers, timeout=10)
|
response = requests.get(image_url, headers=headers, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@@ -1126,7 +1132,7 @@ def get_flickr_image(search_query, relevance_keywords, main_topic):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
word_count = len(text.split())
|
word_count = len(text.split())
|
||||||
if word_count > 5: # Lowered threshold for stricter filtering
|
if word_count > 5:
|
||||||
logger.info(f"Skipping image with too much text: {image_url} ({word_count} words)")
|
logger.info(f"Skipping image with too much text: {image_url} ({word_count} words)")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user