From 255469389509490bfc60bfad265b370f31afcb51 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 12 May 2025 21:42:47 +1000 Subject: [PATCH] update title filter for reddit homemade --- foodie_automator_reddit.py | 116 ++++++++++++++++++++++++------------- foodie_utils.py | 48 +++++++++++++-- 2 files changed, 118 insertions(+), 46 deletions(-) diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index 4731735..58a70bc 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -70,50 +70,86 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def setup_logging(): - if os.path.exists(LOG_FILE): - with open(LOG_FILE, 'r') as f: - lines = f.readlines() - - log_entries = [] - current_entry = [] - timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') - - for line in lines: - if(timestamp_pattern.match(line)): - if current_entry: - log_entries.append(''.join(current_entry)) - current_entry = [line] - else: - current_entry.append(line) + try: + # Ensure log directory exists + os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) + logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") - if current_entry: - log_entries.append(''.join(current_entry)) + # Check write permissions + if not os.access(os.path.dirname(LOG_FILE), os.W_OK): + raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") - cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) - pruned_entries = [] - for entry in log_entries: - try: - timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) - if timestamp > cutoff: - pruned_entries.append(entry) - except ValueError: - logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...") - continue + # Test write to log file + try: + with open(LOG_FILE, 'a') as f: + f.write("") + logging.debug(f"Confirmed write access to {LOG_FILE}") + except Exception as e: + raise PermissionError(f"Cannot write to {LOG_FILE}: {e}") + + # Prune old logs + if os.path.exists(LOG_FILE): + with open(LOG_FILE, 'r') as f: + lines = f.readlines() + + log_entries = [] + current_entry = [] + timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') + + for line in lines: + if timestamp_pattern.match(line): + if current_entry: + log_entries.append(''.join(current_entry)) + current_entry = [line] + else: + current_entry.append(line) + + if current_entry: + log_entries.append(''.join(current_entry)) + + cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS) + pruned_entries = [] + for entry in log_entries: + try: + timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc) + if timestamp > cutoff: + pruned_entries.append(entry) + except ValueError: + logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...") + continue + + with open(LOG_FILE, 'w') as f: + f.writelines(pruned_entries) + logging.debug(f"Log file pruned: {LOG_FILE}") - with open(LOG_FILE, 'w') as f: - f.writelines(pruned_entries) + # Configure logging + logging.basicConfig( + filename=LOG_FILE, + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + force=True # Ensure this config takes precedence + ) + logging.getLogger("requests").setLevel(logging.WARNING) + logging.getLogger("prawcore").setLevel(logging.WARNING) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logging.getLogger().addHandler(console_handler) + logging.info("Logging initialized for foodie_automator_reddit.py") - logging.basicConfig( - filename=LOG_FILE, - level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(message)s" - ) - logging.getLogger("requests").setLevel(logging.WARNING) - logging.getLogger("prawcore").setLevel(logging.WARNING) - console_handler = logging.StreamHandler() - console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) - logging.getLogger().addHandler(console_handler) - logging.info("Logging initialized for foodie_automator_reddit.py") + except Exception as e: + # Fallback to console logging if file logging fails + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + force=True + ) + logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.") + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logging.getLogger().addHandler(console_handler) + logging.info("Console logging initialized as fallback for foodie_automator_reddit.py") def acquire_lock(): os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) diff --git a/foodie_utils.py b/foodie_utils.py index eb9396f..692c6bc 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -361,7 +361,7 @@ def smart_image_and_filter(title, summary): "Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " "for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, " "otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). " - "Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. " + "Return 'SKIP' if the article is about home appliances, recipes, promotions, contains 'homemade', or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. " "Return as JSON with double quotes for all property names and string values (e.g., " "{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})." ) @@ -385,19 +385,46 @@ def smart_image_and_filter(title, summary): except json.JSONDecodeError as e: logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.") main_topic = extract_main_topic(title.lower() + " " + summary.lower()) - return main_topic, [main_topic, "food"], main_topic, False + skip_flag = ( + "homemade" in title.lower() or + "homemade" in summary.lower() or + any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS) + ) + logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: " + f"homemade_in_title={'homemade' in title.lower()}, " + f"homemade_in_summary={'homemade' in summary.lower()}, " + f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}") + return main_topic, [main_topic, "food"], main_topic, skip_flag if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result: logging.warning(f"Invalid GPT response format: {result}, using fallback") main_topic = extract_main_topic(title.lower() + " " + summary.lower()) - return main_topic, [main_topic, "food"], main_topic, False + skip_flag = ( + "homemade" in title.lower() or + "homemade" in summary.lower() or + any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS) + ) + logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: " + f"homemade_in_title={'homemade' in title.lower()}, " + f"homemade_in_summary={'homemade' in summary.lower()}, " + f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}") + return main_topic, [main_topic, "food"], main_topic, skip_flag image_query = result["image_query"] relevance_keywords = result["relevance"] main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower())) - skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower() + skip_flag = ( + result["action"] == "SKIP" or + "homemade" in title.lower() or + "homemade" in summary.lower() or + any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS) + ) - logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}") + logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, " + f"Reasons: action={result['action']}, " + f"homemade_in_title={'homemade' in title.lower()}, " + f"homemade_in_summary={'homemade' in summary.lower()}, " + f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}") specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger", "chipotle", "starbucks", "mcdonalds"] if not image_query: @@ -412,7 +439,16 @@ def smart_image_and_filter(title, summary): except Exception as e: logging.error(f"Smart image/filter failed: {e}, using fallback") main_topic = extract_main_topic(title.lower() + " " + summary.lower()) - return main_topic, [main_topic, "food"], main_topic, False + skip_flag = ( + "homemade" in title.lower() or + "homemade" in summary.lower() or + any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS) + ) + logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: " + f"homemade_in_title={'homemade' in title.lower()}, " + f"homemade_in_summary={'homemade' in summary.lower()}, " + f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}") + return main_topic, [main_topic, "food"], main_topic, skip_flag def extract_main_topic(text): # Common food-related keywords (expand as needed)