|
|
|
|
@ -370,11 +370,11 @@ def smart_image_and_filter(title, summary): |
|
|
|
|
|
|
|
|
|
prompt = ( |
|
|
|
|
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " |
|
|
|
|
"for an image search about food industry trends or viral content. Prioritize specific terms if present, " |
|
|
|
|
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, " |
|
|
|
|
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). " |
|
|
|
|
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. " |
|
|
|
|
"Return as JSON with double quotes for all property names and string values (e.g., " |
|
|
|
|
"{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})." |
|
|
|
|
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})." |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
response = client.chat.completions.create( |
|
|
|
|
@ -386,7 +386,7 @@ def smart_image_and_filter(title, summary): |
|
|
|
|
max_tokens=100 |
|
|
|
|
) |
|
|
|
|
raw_result = response.choices[0].message.content.strip() |
|
|
|
|
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") |
|
|
|
|
logging.debug(f"Raw GPT smart image/filter response: '{raw_result}'") |
|
|
|
|
|
|
|
|
|
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() |
|
|
|
|
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result) |
|
|
|
|
@ -395,14 +395,13 @@ def smart_image_and_filter(title, summary): |
|
|
|
|
result = json.loads(fixed_result) |
|
|
|
|
except json.JSONDecodeError as e: |
|
|
|
|
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.") |
|
|
|
|
# Fallback: Extract main topic using simple keyword matching |
|
|
|
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower()) |
|
|
|
|
return main_topic, [main_topic, "food"], False |
|
|
|
|
return main_topic, [main_topic, "food"], main_topic, False |
|
|
|
|
|
|
|
|
|
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: |
|
|
|
|
logging.warning(f"Invalid GPT response format: {result}, using fallback") |
|
|
|
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower()) |
|
|
|
|
return main_topic, [main_topic, "food"], False |
|
|
|
|
return main_topic, [main_topic, "food"], main_topic, False |
|
|
|
|
|
|
|
|
|
image_query = result["image_query"] |
|
|
|
|
relevance_keywords = result["relevance"] |
|
|
|
|
@ -411,20 +410,20 @@ def smart_image_and_filter(title, summary): |
|
|
|
|
|
|
|
|
|
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}") |
|
|
|
|
|
|
|
|
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"] |
|
|
|
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger", "chipotle", "starbucks", "mcdonalds"] |
|
|
|
|
if not image_query: |
|
|
|
|
logging.warning(f"Image query is empty, using fallback") |
|
|
|
|
return main_topic, [main_topic, "food"], skip_flag |
|
|
|
|
return main_topic, [main_topic, "food"], main_topic, skip_flag |
|
|
|
|
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words: |
|
|
|
|
logging.warning(f"Image query '{image_query}' too vague, using fallback") |
|
|
|
|
return main_topic, [main_topic, "food"], skip_flag |
|
|
|
|
return main_topic, [main_topic, "food"], main_topic, skip_flag |
|
|
|
|
|
|
|
|
|
return image_query, relevance_keywords, main_topic, skip_flag |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Smart image/filter failed: {e}, using fallback") |
|
|
|
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower()) |
|
|
|
|
return main_topic, [main_topic, "food"], False |
|
|
|
|
return main_topic, [main_topic, "food"], main_topic, False |
|
|
|
|
|
|
|
|
|
def extract_main_topic(text): |
|
|
|
|
# Common food-related keywords (expand as needed) |
|
|
|
|
|