try
This commit is contained in:
@@ -236,13 +236,18 @@ def curate_from_rss():
|
||||
print(f"Trying RSS Article: {title} from {source_name}")
|
||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||
|
||||
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary)
|
||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||
if skip:
|
||||
print(f"Skipping filtered RSS article: {title}")
|
||||
logging.info(f"Skipping filtered RSS article: {title}")
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Pass main_topic to get_flickr_image for use in fallbacks
|
||||
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
|
||||
if not image_url:
|
||||
image_url, image_source, uploader, page_url = get_image(image_query)
|
||||
|
||||
# Fetch additional context via DDG
|
||||
ddg_context = fetch_duckduckgo_news_context(title)
|
||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"
|
||||
|
||||
+50
-20
@@ -341,9 +341,10 @@ def smart_image_and_filter(title, summary):
|
||||
prompt = (
|
||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
||||
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
||||
"otherwise focus on the main theme. "
|
||||
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
||||
"Return as JSON with double quotes for all property names and string values (e.g., {\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"action\": \"KEEP\" or \"SKIP\"})."
|
||||
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||
"{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})."
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
@@ -357,39 +358,52 @@ def smart_image_and_filter(title, summary):
|
||||
raw_result = response.choices[0].message.content.strip()
|
||||
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
||||
|
||||
# Remove ```json markers and fix single quotes in JSON structure
|
||||
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
||||
# Replace single quotes with double quotes, but preserve single quotes within string values
|
||||
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
||||
|
||||
try:
|
||||
result = json.loads(fixed_result)
|
||||
except json.JSONDecodeError as e:
|
||||
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
||||
return "food trends", ["cuisine", "dining"], False
|
||||
# Fallback: Extract main topic using simple keyword matching
|
||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||
return main_topic, [main_topic, "food"], False
|
||||
|
||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
||||
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
||||
return "food trends", ["cuisine", "dining"], False
|
||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||
return main_topic, [main_topic, "food"], False
|
||||
|
||||
image_query = result["image_query"]
|
||||
relevance_keywords = result["relevance"]
|
||||
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
||||
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
||||
|
||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}")
|
||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
|
||||
|
||||
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"]
|
||||
if not image_query:
|
||||
logging.warning(f"Image query is empty, using fallback")
|
||||
return "food trends", ["cuisine", "dining"], skip_flag
|
||||
# Allow single-word queries if they are specific (e.g., food items)
|
||||
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"] # Add more as needed
|
||||
return main_topic, [main_topic, "food"], skip_flag
|
||||
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||
return "food trends", ["cuisine", "dining"], skip_flag
|
||||
return main_topic, [main_topic, "food"], skip_flag
|
||||
|
||||
return image_query, relevance_keywords, main_topic, skip_flag
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||
return "food trends", ["cuisine", "dining"], False
|
||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||
return main_topic, [main_topic, "food"], False
|
||||
|
||||
def extract_main_topic(text):
|
||||
# Common food-related keywords (expand as needed)
|
||||
food_keywords = ["kimchi", "sushi", "pizza", "taco", "burger", "ramen", "curry", "pasta", "salad", "soup"]
|
||||
for keyword in food_keywords:
|
||||
if keyword in text:
|
||||
return keyword
|
||||
# Fallback to a generic term if no specific food item is found
|
||||
return "food trends"
|
||||
|
||||
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None):
|
||||
try:
|
||||
@@ -934,15 +948,25 @@ def process_photo(photo, search_query):
|
||||
logging.warning(f"Medium size not available for photo {photo.id}: {e}")
|
||||
return None
|
||||
|
||||
if not img_url or img_url in used_images:
|
||||
logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}")
|
||||
if not img_url:
|
||||
logging.info(f"Image URL invalid for photo {photo.id}")
|
||||
return None
|
||||
|
||||
# Check if the image is highly relevant to the query
|
||||
query_keywords = set(search_query.lower().split())
|
||||
photo_keywords = set(tags + title.split())
|
||||
is_relevant = bool(query_keywords & photo_keywords) # Check if any query keyword is in tags or title
|
||||
|
||||
# Allow reuse of highly relevant images
|
||||
if img_url in used_images and not is_relevant:
|
||||
logging.info(f"Image already used and not highly relevant for photo {photo.id}: {img_url}")
|
||||
return None
|
||||
|
||||
uploader = photo.owner.username
|
||||
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
|
||||
|
||||
used_images.add(img_url)
|
||||
save_used_images() # This will now save in the correct format
|
||||
save_used_images()
|
||||
|
||||
flickr_data = {
|
||||
"title": search_query,
|
||||
@@ -1041,14 +1065,13 @@ def classify_keywords(keywords):
|
||||
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
|
||||
return {kw: "specific" for kw in keywords}
|
||||
|
||||
def get_flickr_image(search_query, relevance_keywords):
|
||||
def get_flickr_image(search_query, relevance_keywords, main_topic):
|
||||
global last_flickr_request_time, flickr_request_count
|
||||
|
||||
reset_flickr_request_count()
|
||||
flickr_request_count += 1
|
||||
logging.info(f"Flickr request count: {flickr_request_count}/3600")
|
||||
|
||||
# Enforce a minimum delay of 10 seconds between Flickr requests
|
||||
current_time = time.time()
|
||||
time_since_last_request = current_time - last_flickr_request_time
|
||||
if time_since_last_request < 10:
|
||||
@@ -1081,7 +1104,6 @@ def get_flickr_image(search_query, relevance_keywords):
|
||||
classifications = classify_keywords(keywords)
|
||||
logging.info(f"Keyword classifications: {classifications}")
|
||||
|
||||
# Prioritize specific keywords
|
||||
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
|
||||
if specific_keywords:
|
||||
for keyword in specific_keywords:
|
||||
@@ -1092,9 +1114,17 @@ def get_flickr_image(search_query, relevance_keywords):
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Step 4: Final fallback using relevance keywords
|
||||
# Step 4: Fallback using main topic
|
||||
logging.info(f"No results found. Falling back to main topic: '{main_topic}'")
|
||||
photos = search_flickr(main_topic)
|
||||
for photo in photos:
|
||||
result = process_photo(photo, main_topic)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Step 5: Final fallback using relevance keywords
|
||||
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
|
||||
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'")
|
||||
logging.info(f"No results with main topic. Falling back to relevance keywords: '{fallback_query}'")
|
||||
photos = search_flickr(fallback_query)
|
||||
for photo in photos:
|
||||
result = process_photo(photo, search_query)
|
||||
|
||||
Reference in New Issue
Block a user