Shane 7 months ago
parent aa0f3364d5
commit 64d17d5599
  1. 7
      foodie_automator_rss.py
  2. 70
      foodie_utils.py

@ -236,13 +236,18 @@ def curate_from_rss():
print(f"Trying RSS Article: {title} from {source_name}") print(f"Trying RSS Article: {title} from {source_name}")
logging.info(f"Trying RSS Article: {title} from {source_name}") logging.info(f"Trying RSS Article: {title} from {source_name}")
image_query, relevance_keywords, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
if skip: if skip:
print(f"Skipping filtered RSS article: {title}") print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}") logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1 attempts += 1
continue continue
# Pass main_topic to get_flickr_image for use in fallbacks
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url:
image_url, image_source, uploader, page_url = get_image(image_query)
# Fetch additional context via DDG # Fetch additional context via DDG
ddg_context = fetch_duckduckgo_news_context(title) ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}" scoring_content = f"{title}\n\n{summary}\n\nContent: {content}\n\nAdditional Context: {ddg_context}"

@ -341,9 +341,10 @@ def smart_image_and_filter(title, summary):
prompt = ( prompt = (
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " "Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
"for an image search about food industry trends or viral content. Prioritize specific terms if present, " "for an image search about food industry trends or viral content. Prioritize specific terms if present, "
"otherwise focus on the main theme. " "otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. " "Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
"Return as JSON with double quotes for all property names and string values (e.g., {\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"action\": \"KEEP\" or \"SKIP\"})." "Return as JSON with double quotes for all property names and string values (e.g., "
"{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})."
) )
response = client.chat.completions.create( response = client.chat.completions.create(
@ -357,39 +358,52 @@ def smart_image_and_filter(title, summary):
raw_result = response.choices[0].message.content.strip() raw_result = response.choices[0].message.content.strip()
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
# Remove ```json markers and fix single quotes in JSON structure
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
# Replace single quotes with double quotes, but preserve single quotes within string values
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result) fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
try: try:
result = json.loads(fixed_result) result = json.loads(fixed_result)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.") logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
return "food trends", ["cuisine", "dining"], False # Fallback: Extract main topic using simple keyword matching
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
return main_topic, [main_topic, "food"], False
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
logging.warning(f"Invalid GPT response format: {result}, using fallback") logging.warning(f"Invalid GPT response format: {result}, using fallback")
return "food trends", ["cuisine", "dining"], False main_topic = extract_main_topic(title.lower() + " " + summary.lower())
return main_topic, [main_topic, "food"], False
image_query = result["image_query"] image_query = result["image_query"]
relevance_keywords = result["relevance"] relevance_keywords = result["relevance"]
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower() skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Skip: {skip_flag}") logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"]
if not image_query: if not image_query:
logging.warning(f"Image query is empty, using fallback") logging.warning(f"Image query is empty, using fallback")
return "food trends", ["cuisine", "dining"], skip_flag return main_topic, [main_topic, "food"], skip_flag
# Allow single-word queries if they are specific (e.g., food items)
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"] # Add more as needed
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words: if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
logging.warning(f"Image query '{image_query}' too vague, using fallback") logging.warning(f"Image query '{image_query}' too vague, using fallback")
return "food trends", ["cuisine", "dining"], skip_flag return main_topic, [main_topic, "food"], skip_flag
return image_query, relevance_keywords, main_topic, skip_flag
except Exception as e: except Exception as e:
logging.error(f"Smart image/filter failed: {e}, using fallback") logging.error(f"Smart image/filter failed: {e}, using fallback")
return "food trends", ["cuisine", "dining"], False main_topic = extract_main_topic(title.lower() + " " + summary.lower())
return main_topic, [main_topic, "food"], False
def extract_main_topic(text):
# Common food-related keywords (expand as needed)
food_keywords = ["kimchi", "sushi", "pizza", "taco", "burger", "ramen", "curry", "pasta", "salad", "soup"]
for keyword in food_keywords:
if keyword in text:
return keyword
# Fallback to a generic term if no specific food item is found
return "food trends"
def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None): def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_password, image_source="Pixabay", uploader=None, pixabay_url=None):
try: try:
@ -934,15 +948,25 @@ def process_photo(photo, search_query):
logging.warning(f"Medium size not available for photo {photo.id}: {e}") logging.warning(f"Medium size not available for photo {photo.id}: {e}")
return None return None
if not img_url or img_url in used_images: if not img_url:
logging.info(f"Image URL invalid or already used for photo {photo.id}: {img_url}") logging.info(f"Image URL invalid for photo {photo.id}")
return None
# Check if the image is highly relevant to the query
query_keywords = set(search_query.lower().split())
photo_keywords = set(tags + title.split())
is_relevant = bool(query_keywords & photo_keywords) # Check if any query keyword is in tags or title
# Allow reuse of highly relevant images
if img_url in used_images and not is_relevant:
logging.info(f"Image already used and not highly relevant for photo {photo.id}: {img_url}")
return None return None
uploader = photo.owner.username uploader = photo.owner.username
page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}" page_url = f"https://www.flickr.com/photos/{photo.owner.nsid}/{photo.id}"
used_images.add(img_url) used_images.add(img_url)
save_used_images() # This will now save in the correct format save_used_images()
flickr_data = { flickr_data = {
"title": search_query, "title": search_query,
@ -1041,14 +1065,13 @@ def classify_keywords(keywords):
logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.") logging.warning(f"Keyword classification failed: {e}. Defaulting to all specific.")
return {kw: "specific" for kw in keywords} return {kw: "specific" for kw in keywords}
def get_flickr_image(search_query, relevance_keywords): def get_flickr_image(search_query, relevance_keywords, main_topic):
global last_flickr_request_time, flickr_request_count global last_flickr_request_time, flickr_request_count
reset_flickr_request_count() reset_flickr_request_count()
flickr_request_count += 1 flickr_request_count += 1
logging.info(f"Flickr request count: {flickr_request_count}/3600") logging.info(f"Flickr request count: {flickr_request_count}/3600")
# Enforce a minimum delay of 10 seconds between Flickr requests
current_time = time.time() current_time = time.time()
time_since_last_request = current_time - last_flickr_request_time time_since_last_request = current_time - last_flickr_request_time
if time_since_last_request < 10: if time_since_last_request < 10:
@ -1081,7 +1104,6 @@ def get_flickr_image(search_query, relevance_keywords):
classifications = classify_keywords(keywords) classifications = classify_keywords(keywords)
logging.info(f"Keyword classifications: {classifications}") logging.info(f"Keyword classifications: {classifications}")
# Prioritize specific keywords
specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"] specific_keywords = [kw for kw, classification in classifications.items() if classification == "specific"]
if specific_keywords: if specific_keywords:
for keyword in specific_keywords: for keyword in specific_keywords:
@ -1092,9 +1114,17 @@ def get_flickr_image(search_query, relevance_keywords):
if result: if result:
return result return result
# Step 4: Final fallback using relevance keywords # Step 4: Fallback using main topic
logging.info(f"No results found. Falling back to main topic: '{main_topic}'")
photos = search_flickr(main_topic)
for photo in photos:
result = process_photo(photo, main_topic)
if result:
return result
# Step 5: Final fallback using relevance keywords
fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords fallback_query = " ".join(relevance_keywords) if isinstance(relevance_keywords, list) else relevance_keywords
logging.info(f"No results found. Falling back to generic query: '{fallback_query}'") logging.info(f"No results with main topic. Falling back to relevance keywords: '{fallback_query}'")
photos = search_flickr(fallback_query) photos = search_flickr(fallback_query)
for photo in photos: for photo in photos:
result = process_photo(photo, search_query) result = process_photo(photo, search_query)

Loading…
Cancel
Save