update title filter for reddit homemade
This commit is contained in:
+74
-38
@@ -70,50 +70,86 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent
|
|||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
if os.path.exists(LOG_FILE):
|
try:
|
||||||
with open(LOG_FILE, 'r') as f:
|
# Ensure log directory exists
|
||||||
lines = f.readlines()
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
||||||
|
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
|
||||||
|
|
||||||
log_entries = []
|
# Check write permissions
|
||||||
current_entry = []
|
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
|
||||||
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
|
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
|
||||||
|
|
||||||
for line in lines:
|
# Test write to log file
|
||||||
if(timestamp_pattern.match(line)):
|
try:
|
||||||
if current_entry:
|
with open(LOG_FILE, 'a') as f:
|
||||||
log_entries.append(''.join(current_entry))
|
f.write("")
|
||||||
current_entry = [line]
|
logging.debug(f"Confirmed write access to {LOG_FILE}")
|
||||||
else:
|
except Exception as e:
|
||||||
current_entry.append(line)
|
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
|
||||||
|
|
||||||
if current_entry:
|
# Prune old logs
|
||||||
log_entries.append(''.join(current_entry))
|
if os.path.exists(LOG_FILE):
|
||||||
|
with open(LOG_FILE, 'r') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
log_entries = []
|
||||||
pruned_entries = []
|
current_entry = []
|
||||||
for entry in log_entries:
|
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
|
||||||
try:
|
|
||||||
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
|
||||||
if timestamp > cutoff:
|
|
||||||
pruned_entries.append(entry)
|
|
||||||
except ValueError:
|
|
||||||
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
|
|
||||||
continue
|
|
||||||
|
|
||||||
with open(LOG_FILE, 'w') as f:
|
for line in lines:
|
||||||
f.writelines(pruned_entries)
|
if timestamp_pattern.match(line):
|
||||||
|
if current_entry:
|
||||||
|
log_entries.append(''.join(current_entry))
|
||||||
|
current_entry = [line]
|
||||||
|
else:
|
||||||
|
current_entry.append(line)
|
||||||
|
|
||||||
logging.basicConfig(
|
if current_entry:
|
||||||
filename=LOG_FILE,
|
log_entries.append(''.join(current_entry))
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s"
|
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
|
||||||
)
|
pruned_entries = []
|
||||||
logging.getLogger("requests").setLevel(logging.WARNING)
|
for entry in log_entries:
|
||||||
logging.getLogger("prawcore").setLevel(logging.WARNING)
|
try:
|
||||||
console_handler = logging.StreamHandler()
|
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
|
||||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
if timestamp > cutoff:
|
||||||
logging.getLogger().addHandler(console_handler)
|
pruned_entries.append(entry)
|
||||||
logging.info("Logging initialized for foodie_automator_reddit.py")
|
except ValueError:
|
||||||
|
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(LOG_FILE, 'w') as f:
|
||||||
|
f.writelines(pruned_entries)
|
||||||
|
logging.debug(f"Log file pruned: {LOG_FILE}")
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=LOG_FILE,
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
force=True # Ensure this config takes precedence
|
||||||
|
)
|
||||||
|
logging.getLogger("requests").setLevel(logging.WARNING)
|
||||||
|
logging.getLogger("prawcore").setLevel(logging.WARNING)
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
logging.getLogger().addHandler(console_handler)
|
||||||
|
logging.info("Logging initialized for foodie_automator_reddit.py")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to console logging if file logging fails
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
force=True
|
||||||
|
)
|
||||||
|
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
logging.getLogger().addHandler(console_handler)
|
||||||
|
logging.info("Console logging initialized as fallback for foodie_automator_reddit.py")
|
||||||
|
|
||||||
def acquire_lock():
|
def acquire_lock():
|
||||||
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
|
|||||||
+42
-6
@@ -361,7 +361,7 @@ def smart_image_and_filter(title, summary):
|
|||||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
||||||
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
|
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
|
||||||
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
||||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
"Return 'SKIP' if the article is about home appliances, recipes, promotions, contains 'homemade', or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. "
|
||||||
"Return as JSON with double quotes for all property names and string values (e.g., "
|
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||||
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
|
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
|
||||||
)
|
)
|
||||||
@@ -385,19 +385,46 @@ def smart_image_and_filter(title, summary):
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
return main_topic, [main_topic, "food"], main_topic, False
|
skip_flag = (
|
||||||
|
"homemade" in title.lower() or
|
||||||
|
"homemade" in summary.lower() or
|
||||||
|
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
||||||
|
)
|
||||||
|
logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: "
|
||||||
|
f"homemade_in_title={'homemade' in title.lower()}, "
|
||||||
|
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||||
|
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
||||||
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
|
|
||||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
||||||
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
return main_topic, [main_topic, "food"], main_topic, False
|
skip_flag = (
|
||||||
|
"homemade" in title.lower() or
|
||||||
|
"homemade" in summary.lower() or
|
||||||
|
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
||||||
|
)
|
||||||
|
logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: "
|
||||||
|
f"homemade_in_title={'homemade' in title.lower()}, "
|
||||||
|
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||||
|
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
||||||
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
|
|
||||||
image_query = result["image_query"]
|
image_query = result["image_query"]
|
||||||
relevance_keywords = result["relevance"]
|
relevance_keywords = result["relevance"]
|
||||||
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
|
||||||
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
|
skip_flag = (
|
||||||
|
result["action"] == "SKIP" or
|
||||||
|
"homemade" in title.lower() or
|
||||||
|
"homemade" in summary.lower() or
|
||||||
|
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
||||||
|
)
|
||||||
|
|
||||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
|
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
|
||||||
|
f"Reasons: action={result['action']}, "
|
||||||
|
f"homemade_in_title={'homemade' in title.lower()}, "
|
||||||
|
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||||
|
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
||||||
|
|
||||||
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger", "chipotle", "starbucks", "mcdonalds"]
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger", "chipotle", "starbucks", "mcdonalds"]
|
||||||
if not image_query:
|
if not image_query:
|
||||||
@@ -412,7 +439,16 @@ def smart_image_and_filter(title, summary):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
return main_topic, [main_topic, "food"], main_topic, False
|
skip_flag = (
|
||||||
|
"homemade" in title.lower() or
|
||||||
|
"homemade" in summary.lower() or
|
||||||
|
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
|
||||||
|
)
|
||||||
|
logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: "
|
||||||
|
f"homemade_in_title={'homemade' in title.lower()}, "
|
||||||
|
f"homemade_in_summary={'homemade' in summary.lower()}, "
|
||||||
|
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
|
||||||
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
|
|
||||||
def extract_main_topic(text):
|
def extract_main_topic(text):
|
||||||
# Common food-related keywords (expand as needed)
|
# Common food-related keywords (expand as needed)
|
||||||
|
|||||||
Reference in New Issue
Block a user