update title filter for reddit homemade

main
Shane 7 months ago
parent c89a9df6e2
commit 2554693895
  1. 122
      foodie_automator_reddit.py
  2. 48
      foodie_utils.py

@ -70,50 +70,86 @@ used_images = set(entry["title"] for entry in used_images_data if "title" in ent
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def setup_logging():
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines:
if(timestamp_pattern.match(line)):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
try:
# Ensure log directory exists
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
# Check write permissions
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
# Prune old logs
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
log_entries = []
current_entry = []
timestamp_pattern = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}')
for line in lines:
if timestamp_pattern.match(line):
if current_entry:
log_entries.append(''.join(current_entry))
current_entry = [line]
else:
current_entry.append(line)
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("prawcore").setLevel(logging.WARNING)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py")
if current_entry:
log_entries.append(''.join(current_entry))
cutoff = datetime.now(timezone.utc) - timedelta(days=LOG_PRUNE_DAYS)
pruned_entries = []
for entry in log_entries:
try:
timestamp = datetime.strptime(entry[:19], '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
if timestamp > cutoff:
pruned_entries.append(entry)
except ValueError:
logging.warning(f"Skipping malformed log entry (no timestamp): {entry[:50]}...")
continue
with open(LOG_FILE, 'w') as f:
f.writelines(pruned_entries)
logging.debug(f"Log file pruned: {LOG_FILE}")
# Configure logging
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True # Ensure this config takes precedence
)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("prawcore").setLevel(logging.WARNING)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_reddit.py")
except Exception as e:
# Fallback to console logging if file logging fails
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
logging.error(f"Failed to setup file logging for {LOG_FILE}: {e}. Using console logging.")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Console logging initialized as fallback for foodie_automator_reddit.py")
def acquire_lock():
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)

@ -361,7 +361,7 @@ def smart_image_and_filter(title, summary):
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
"Return 'SKIP' if the article is about home appliances, recipes, promotions, contains 'homemade', or includes recipe-related terms like 'cook', 'bake', or 'ingredient'. "
"Return as JSON with double quotes for all property names and string values (e.g., "
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
)
@ -385,19 +385,46 @@ def smart_image_and_filter(title, summary):
except json.JSONDecodeError as e:
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
return main_topic, [main_topic, "food"], main_topic, False
skip_flag = (
"homemade" in title.lower() or
"homemade" in summary.lower() or
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
)
logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: "
f"homemade_in_title={'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, "
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
return main_topic, [main_topic, "food"], main_topic, skip_flag
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
logging.warning(f"Invalid GPT response format: {result}, using fallback")
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
return main_topic, [main_topic, "food"], main_topic, False
skip_flag = (
"homemade" in title.lower() or
"homemade" in summary.lower() or
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
)
logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: "
f"homemade_in_title={'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, "
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
return main_topic, [main_topic, "food"], main_topic, skip_flag
image_query = result["image_query"]
relevance_keywords = result["relevance"]
main_topic = result.get("main_topic", extract_main_topic(title.lower() + " " + summary.lower()))
skip_flag = result["action"] == "SKIP" or "homemade" in title.lower() or "homemade" in summary.lower()
skip_flag = (
result["action"] == "SKIP" or
"homemade" in title.lower() or
"homemade" in summary.lower() or
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
)
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}, "
f"Reasons: action={result['action']}, "
f"homemade_in_title={'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, "
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger", "chipotle", "starbucks", "mcdonalds"]
if not image_query:
@ -412,7 +439,16 @@ def smart_image_and_filter(title, summary):
except Exception as e:
logging.error(f"Smart image/filter failed: {e}, using fallback")
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
return main_topic, [main_topic, "food"], main_topic, False
skip_flag = (
"homemade" in title.lower() or
"homemade" in summary.lower() or
any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)
)
logging.info(f"Fallback for '{title}': Skip={skip_flag}, Reasons: "
f"homemade_in_title={'homemade' in title.lower()}, "
f"homemade_in_summary={'homemade' in summary.lower()}, "
f"recipe_keywords={any(kw in title.lower() or kw in summary.lower() for kw in RECIPE_KEYWORDS)}")
return main_topic, [main_topic, "food"], main_topic, skip_flag
def extract_main_topic(text):
# Common food-related keywords (expand as needed)

Loading…
Cancel
Save