From dbe76795c29ffe0f815cb0244f53854ab5345958 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 7 May 2025 07:57:03 +1000 Subject: [PATCH] fix --- foodie_automator_rss.py | 45 +++++++++++++++++++++++++++++------------ foodie_utils.py | 19 +++++++++-------- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 4d8dfb1..4140682 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -42,23 +42,21 @@ LOG_PRUNE_DAYS = 30 FEED_TIMEOUT = 15 MAX_RETRIES = 3 RETRY_BACKOFF = 2 -IMAGE_UPLOAD_TIMEOUT = 30 # Added to match foodie_utils.py +IMAGE_UPLOAD_TIMEOUT = 30 POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' EXPIRATION_HOURS = 24 IMAGE_EXPIRATION_DAYS = 7 -posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) -posted_titles = set(entry["title"] for entry in posted_titles_data) -used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) - def setup_logging(): """Initialize logging with pruning of old logs.""" try: + logging.debug("Attempting to set up logging") os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) if not os.access(os.path.dirname(LOG_FILE), os.W_OK): raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") + logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") if os.path.exists(LOG_FILE): with open(LOG_FILE, 'r') as f: lines = f.readlines() @@ -80,10 +78,11 @@ def setup_logging(): logging.info(f"Skipped {malformed_count} malformed log lines during pruning") with open(LOG_FILE, 'w') as f: f.writelines(pruned_lines) + logging.debug(f"Log file pruned: {LOG_FILE}") logging.basicConfig( filename=LOG_FILE, - level=logging.INFO, + level=logging.DEBUG, # Changed to DEBUG for troubleshooting format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) @@ -98,12 +97,14 @@ def setup_logging(): sys.exit(1) def acquire_lock(): - os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) - lock_fd = open(LOCK_FILE, 'w') try: + logging.debug("Attempting to acquire lock") + os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True) + lock_fd = open(LOCK_FILE, 'w') fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) lock_fd.write(str(os.getpid())) lock_fd.flush() + logging.debug(f"Lock acquired: {LOCK_FILE}") return lock_fd except IOError: logging.info("Another instance of foodie_automator_rss.py is running") @@ -120,6 +121,11 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) +# Initialize posted_titles after logging setup +posted_titles_data = [] +posted_titles = set() +used_images = set() + def create_http_session() -> requests.Session: session = requests.Session() retry_strategy = Retry( @@ -237,10 +243,16 @@ def fetch_duckduckgo_news_context(title, hours=24): def curate_from_rss(): try: + global posted_titles_data, posted_titles, used_images + posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) + posted_titles = set(entry["title"] for entry in posted_titles_data) + used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) + logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images") + articles = fetch_rss_feeds() if not articles: logging.info("No RSS articles available") - return None, None, False # Continue running + return None, None, False attempts = 0 max_attempts = 10 @@ -259,7 +271,13 @@ def curate_from_rss(): logging.info(f"Trying RSS Article: {title} from {source_name}") - image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + try: + image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) + except Exception as e: + logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") + attempts += 1 + continue + if skip: logging.info(f"Skipping filtered RSS article: {title}") attempts += 1 @@ -382,21 +400,22 @@ def curate_from_rss(): logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") - return post_data, category, True # Run again immediately + return post_data, category, True attempts += 1 logging.info(f"WP posting failed for '{post_data['title']}'") logging.info("No interesting RSS article found after attempts") - return None, None, False # Wait before running again + return None, None, False except Exception as e: logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) + print(f"Error in curate_from_rss: {e}") return None, None, False def run_rss_automator(): lock_fd = None try: - lock_fd = acquire_lock() setup_logging() + lock_fd = acquire_lock() logging.info("***** RSS Automator Launched *****") post_data, category, should_continue = curate_from_rss() if not post_data: diff --git a/foodie_utils.py b/foodie_utils.py index 29824c9..203ce36 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -370,11 +370,11 @@ def smart_image_and_filter(title, summary): prompt = ( "Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) " - "for an image search about food industry trends or viral content. Prioritize specific terms if present, " + "for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, " "otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). " "Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. " "Return as JSON with double quotes for all property names and string values (e.g., " - "{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})." + "{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})." ) response = client.chat.completions.create( @@ -386,7 +386,7 @@ def smart_image_and_filter(title, summary): max_tokens=100 ) raw_result = response.choices[0].message.content.strip() - logging.info(f"Raw GPT smart image/filter response: '{raw_result}'") + logging.debug(f"Raw GPT smart image/filter response: '{raw_result}'") cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip() fixed_result = re.sub(r"(?