diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index 4140682..6e94460 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -42,7 +42,6 @@ LOG_PRUNE_DAYS = 30 FEED_TIMEOUT = 15 MAX_RETRIES = 3 RETRY_BACKOFF = 2 -IMAGE_UPLOAD_TIMEOUT = 30 POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' @@ -54,9 +53,18 @@ def setup_logging(): try: logging.debug("Attempting to set up logging") os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) + logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") if not os.access(os.path.dirname(LOG_FILE), os.W_OK): raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") - logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") + + # Test write to log file + try: + with open(LOG_FILE, 'a') as f: + f.write("") + logging.debug(f"Confirmed write access to {LOG_FILE}") + except Exception as e: + raise PermissionError(f"Cannot write to {LOG_FILE}: {e}") + if os.path.exists(LOG_FILE): with open(LOG_FILE, 'r') as f: lines = f.readlines() @@ -82,9 +90,10 @@ def setup_logging(): logging.basicConfig( filename=LOG_FILE, - level=logging.DEBUG, # Changed to DEBUG for troubleshooting + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" + datefmt="%Y-%m-%d %H:%M:%S", + force=True ) console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) @@ -96,6 +105,13 @@ def setup_logging(): print(f"Failed to setup logging: {e}") sys.exit(1) +# Call setup_logging immediately +setup_logging() + +posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) +posted_titles = set(entry["title"] for entry in posted_titles_data) +used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) + def acquire_lock(): try: logging.debug("Attempting to acquire lock") @@ -121,11 +137,6 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) -# Initialize posted_titles after logging setup -posted_titles_data = [] -posted_titles = set() -used_images = set() - def create_http_session() -> requests.Session: session = requests.Session() retry_strategy = Retry( @@ -251,8 +262,9 @@ def curate_from_rss(): articles = fetch_rss_feeds() if not articles: + print("No RSS articles available") logging.info("No RSS articles available") - return None, None, False + return None, None, random.randint(600, 1800) attempts = 0 max_attempts = 10 @@ -265,20 +277,24 @@ def curate_from_rss(): original_source = f'{source_name}' if title in posted_titles: + print(f"Skipping already posted article: {title}") logging.info(f"Skipping already posted article: {title}") attempts += 1 continue + print(f"Trying RSS Article: {title} from {source_name}") logging.info(f"Trying RSS Article: {title} from {source_name}") try: image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) except Exception as e: + print(f"Smart image/filter error for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") attempts += 1 continue if skip: + print(f"Skipping filtered RSS article: {title}") logging.info(f"Skipping filtered RSS article: {title}") attempts += 1 continue @@ -286,8 +302,10 @@ def curate_from_rss(): ddg_context = fetch_duckduckgo_news_context(title) scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" interest_score = is_interesting(scoring_content) + print(f"Interest Score for '{title[:50]}...': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}") if interest_score < 6: + print(f"RSS Interest Too Low: {interest_score}") logging.info(f"RSS Interest Too Low: {interest_score}") attempts += 1 continue @@ -310,6 +328,7 @@ def curate_from_rss(): extra_prompt=extra_prompt ) if not final_summary: + print(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'") attempts += 1 continue @@ -318,12 +337,22 @@ def curate_from_rss(): post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) if not post_data: + print(f"Post data preparation failed for '{title}'") + logging.info(f"Post data preparation failed for '{title}'") attempts += 1 continue image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) if not image_url: + print(f"Flickr image fetch failed for '{image_query}', trying fallback") + logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") image_url, image_source, uploader, page_url = get_image(image_query) + if not image_url: + print(f"All image uploads failed for '{title}' - posting without image") + logging.warning(f"All image uploads failed for '{title}' - posting without image") + image_source = None + uploader = None + page_url = None hook = get_dynamic_hook(post_data["title"]).strip() @@ -352,10 +381,12 @@ def curate_from_rss(): should_post_tweet=True ) if not post_id: - logging.warning(f"Failed to post to WordPress for '{title}', skipping") + print(f"Failed to post to WordPress for '{title}'") + logging.warning(f"Failed to post to WordPress for '{title}'") attempts += 1 continue except Exception as e: + print(f"WordPress posting error for '{title}': {e}") logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) attempts += 1 continue @@ -385,6 +416,7 @@ def curate_from_rss(): should_post_tweet=False ) except Exception as e: + print(f"Failed to update WordPress post '{title}' with share links: {e}") logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) finally: is_posting = False @@ -392,41 +424,49 @@ def curate_from_rss(): timestamp = datetime.now(timezone.utc).isoformat() save_json_file(POSTED_TITLES_FILE, title, timestamp) posted_titles.add(title) + print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") if image_url: save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) + print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") + print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") - return post_data, category, True + return post_data, category, random.randint(0, 1800) + attempts += 1 + print(f"WP posting failed for '{post_data['title']}'") logging.info(f"WP posting failed for '{post_data['title']}'") + print("No interesting RSS article found after attempts") logging.info("No interesting RSS article found after attempts") - return None, None, False + return None, None, random.randint(600, 1800) except Exception as e: + print(f"Unexpected error in curate_from_rss: {e}") logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) - print(f"Error in curate_from_rss: {e}") - return None, None, False + return None, None, random.randint(600, 1800) def run_rss_automator(): lock_fd = None try: - setup_logging() lock_fd = acquire_lock() + print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****") logging.info("***** RSS Automator Launched *****") - post_data, category, should_continue = curate_from_rss() + post_data, category, sleep_time = curate_from_rss() if not post_data: + print("No postable RSS article found") logging.info("No postable RSS article found") - else: - logging.info("Completed RSS run") - return post_data, category, should_continue + print(f"Sleeping for {sleep_time}s") + logging.info(f"Completed run with sleep time: {sleep_time} seconds") + time.sleep(sleep_time) + return post_data, category, sleep_time except Exception as e: + print(f"Fatal error in run_rss_automator: {e}") logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) - print(f"Fatal error: {e}") - return None, None, False + return None, None, random.randint(600, 1800) finally: if lock_fd: fcntl.flock(lock_fd, fcntl.LOCK_UN) diff --git a/foodie_utils.py b/foodie_utils.py index 203ce36..40d4f89 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -447,22 +447,22 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw } logging.info(f"Fetching image from {image_url} for '{post_title}'") - for attempt in range(MAX_RETRIES): + for attempt in range(3): try: image_response = requests.get(image_url, headers=image_headers, timeout=IMAGE_UPLOAD_TIMEOUT) if image_response.status_code == 429: - wait_time = RETRY_BACKOFF * (2 ** attempt) - logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/{MAX_RETRIES}).") + wait_time = 10 * (2 ** attempt) + logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).") time.sleep(wait_time) continue image_response.raise_for_status() break except requests.exceptions.RequestException as e: - logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/{MAX_RETRIES}): {e}") - if attempt == MAX_RETRIES - 1: - logging.error(f"Failed to fetch image {image_url} after {MAX_RETRIES} attempts") + logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/3): {e}") + if attempt == 2: + logging.error(f"Failed to fetch image {image_url} after 3 attempts") return None - time.sleep(RETRY_BACKOFF * (2 ** attempt)) + time.sleep(10 * (2 ** attempt)) else: logging.error(f"Failed to fetch image {image_url} after retries") return None @@ -490,7 +490,7 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw logging.info(f"Uploaded image '{safe_title}.jpg' to WP (ID: {image_id}) with caption '{caption}'") return image_id except Exception as e: - logging.error(f"Image upload to WP failed for '{post_title}': {e}", exc_info=True) + logging.error(f"Image upload to WP failed for '{post_title}': {e}") print(f"Image upload to WP failed for '{post_title}': {e}") return None