try
This commit is contained in:
@@ -236,13 +236,18 @@ def curate_from_rss():
|
|||||||
print(f"Trying RSS Article: {title} from {source_name}")
|
print(f"Trying RSS Article: {title} from {source_name}")
|
||||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||||
|
|
||||||
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
if skip:
|
if skip:
|
||||||
print(f"Skipping filtered RSS article: {title}")
|
print(f"Skipping filtered RSS article: {title}")
|
||||||
logging.info(f"Skipping filtered RSS article: {title}")
|
logging.info(f"Skipping filtered RSS article: {title}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Pass main_topic to get_flickr_image for use in fallbacks
|
||||||
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||||
|
if not image_url:
|
||||||
|
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||||
|
|
||||||
# Fetch additional context via DDG
|
# Fetch additional context via DDG
|
||||||
ddg_context = fetch_duckduckgo_news_context(title)
|
ddg_context = fetch_duckduckgo_news_context(title)
|
||||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
|
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
|
||||||
|
|||||||
+50
-20
@@ -341,9 +341,10 @@ def smart_image_and_filter(title, summary):
|
|||||||
prompt = (
|
prompt = (
|
||||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
||||||
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
||||||
"otherwise focus on the main theme. "
|
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
||||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
||||||
"Return as JSON with double quotes for all property names and string values (e.g., {\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"action\": \"KEEP\" or \"SKIP\"})."
|
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||||
|
"{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})."
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
@@ -357,39 +358,52 @@ def smart_image_and_filter(title, summary):
|
|||||||
raw_result = response.choices[0].message.content.strip()
|
raw_result = response.choices[0].message.content.strip()
|
||||||
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
||||||
|
|
||||||
# Remove ```json markers and fix single quotes in JSON structure
|
|
||||||
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
||||||
# Replace single quotes with double quotes, but preserve single quotes within string values
|
|
||||||
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = json.loads(fixed_result)
|
result = json.loads(fixed_result)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
# Fallback: Extract main topic using simple keyword matching
|
||||||
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
|
return main_topic, [main_topic, "food"], False
|
||||||
|
|
||||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
||||||
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
|
return main_topic, [main_topic, "food"], False
|
||||||
|
|
||||||
image_query = result["image_query"]
|
image_query = result["image_query"]
|
||||||
relevance_keywords = result["relevance"]
|
relevance_keywords = result["relevance"]
|
||||||
|
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
||||||
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
||||||
|
|
||||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
|
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
|
||||||
|
|
||||||
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"]
|
||||||
if not image_query:
|
if not image_query:
|
||||||
logging.warning(f"Image query is empty, using fallback")
|
logging.warning(f"Image query is empty, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], skip_flag
|
return main_topic, [main_topic, "food"], skip_flag
|
||||||
# Allow single-word queries if they are specific (e.g., food items)
|
|
||||||
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"] # Add more as needed
|
|
||||||
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
||||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], skip_flag
|
return main_topic, [main_topic, "food"], skip_flag
|
||||||
|
|
||||||
|
return image_query, relevance_keywords, main_topic, skip_flag
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||||
return "food trends", ["cuisine", "dining"], False
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
|
return main_topic, [main_topic, "food"], False
|
||||||
|
|
||||||
|
def extract_main_topic(text):
|
||||||
|
# Common food-related keywords (expand as needed)
|
||||||
|
food_keywords = ["kimchi", "sushi", "pizza", "taco", "burger", "ramen", "curry", "pasta", "salad", "soup"]
|
||||||
|
for keyword in food_keywords:
|
||||||
|
if keyword in text:
|
||||||
|
return keyword
|
||||||
|
# Fallback to a generic term if no specific food item is found
|
||||||
|
return "food trends"
|
||||||
|
|
||||||
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None):
|
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None):
|
||||||
try:
|
try:
|
||||||
@@ -934,15 +948,25 @@ def process_photo(photo, search_query):
|
|||||||
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not img_url or img_url in used_images:
|
if not img_url:
|
||||||
logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
|
logging.info(f"Image URL invalid for photo {photo.id}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Check if the image is highly relevant to the query
|
||||||
|
query_keywords = set(search_query.lower().split())
|
||||||
|
photo_keywords = set(tags + title.split())
|
||||||
|
is_relevant = bool(query_keywords & photo_keywords) # Check if any query keyword is in tags or title
|
||||||
|
|
||||||
|
# Allow reuse of highly relevant images
|
||||||
|
if img_url in used_images and not is_relevant:
|
||||||
|
logging.info(f"Image already used and not highly relevant for photo {photo.id}: {img_url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
uploader = photo.owner.username
|
uploader = photo.owner.username
|
||||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||||
|
|
||||||
used_images.add(img_url)
|
used_images.add(img_url)
|
||||||
save_used_images() # This will now save in the correct format
|
save_used_images()
|
||||||
|
|
||||||
flickr_data = {
|
flickr_data = {
|
||||||
"title": search_query,
|
"title": search_query,
|
||||||
@@ -1041,14 +1065,13 @@ def classify_keywords(keywords):
|
|||||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||||
return {kw: "specific" for kw in keywords}
|
return {kw: "specific" for kw in keywords}
|
||||||
|
|
||||||
def get_flickr_image(search_query, relevance_keywords):
|
def get_flickr_image(search_query, relevance_keywords, main_topic):
|
||||||
global last_flickr_request_time, flickr_request_count
|
global last_flickr_request_time, flickr_request_count
|
||||||
|
|
||||||
reset_flickr_request_count()
|
reset_flickr_request_count()
|
||||||
flickr_request_count += 1
|
flickr_request_count += 1
|
||||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||||
|
|
||||||
# Enforce a minimum delay of 10 seconds between Flickr requests
|
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
time_since_last_request = current_time - last_flickr_request_time
|
time_since_last_request = current_time - last_flickr_request_time
|
||||||
if time_since_last_request < 10:
|
if time_since_last_request < 10:
|
||||||
@@ -1081,7 +1104,6 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
classifications = classify_keywords(keywords)
|
classifications = classify_keywords(keywords)
|
||||||
logging.info(f"Keyword classifications: {classifications}")
|
logging.info(f"Keyword classifications: {classifications}")
|
||||||
|
|
||||||
# Prioritize specific keywords
|
|
||||||
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
||||||
if specific_keywords:
|
if specific_keywords:
|
||||||
for keyword in specific_keywords:
|
for keyword in specific_keywords:
|
||||||
@@ -1092,9 +1114,17 @@ def get_flickr_image(search_query, relevance_keywords):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 4: Final fallback using relevance keywords
|
# Step 4: Fallback using main topic
|
||||||
|
logging.info(f"No results found. Falling back to main topic: '{main_topic}'")
|
||||||
|
photos = search_flickr(main_topic)
|
||||||
|
for photo in photos:
|
||||||
|
result = process_photo(photo, main_topic)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Step 5: Final fallback using relevance keywords
|
||||||
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
||||||
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
logging.info(f"No results with main topic. Falling back to relevance keywords: '{fallback_query}'")
|
||||||
photos = search_flickr(fallback_query)
|
photos = search_flickr(fallback_query)
|
||||||
for photo in photos:
|
for photo in photos:
|
||||||
result = process_photo(photo, search_query)
|
result = process_photo(photo, search_query)
|
||||||
|
|||||||
Reference in New Issue
Block a user