Shane 7 months ago
parent dbe76795c2
commit ad21bac601
  1. 84
      foodie_automator_rss.py
  2. 16
      foodie_utils.py

@ -42,7 +42,6 @@ LOG_PRUNE_DAYS = 30
FEED_TIMEOUT = 15 FEED_TIMEOUT = 15
MAX_RETRIES = 3 MAX_RETRIES = 3
RETRY_BACKOFF = 2 RETRY_BACKOFF = 2
IMAGE_UPLOAD_TIMEOUT = 30
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@ -54,9 +53,18 @@ def setup_logging():
try: try:
logging.debug("Attempting to set up logging") logging.debug("Attempting to set up logging")
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
if not os.access(os.path.dirname(LOG_FILE), os.W_OK): if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
if os.path.exists(LOG_FILE): if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f: with open(LOG_FILE, 'r') as f:
lines = f.readlines() lines = f.readlines()
@ -82,9 +90,10 @@ def setup_logging():
logging.basicConfig( logging.basicConfig(
filename=LOG_FILE, filename=LOG_FILE,
level=logging.DEBUG, # Changed to DEBUG for troubleshooting level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s", format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S" datefmt="%Y-%m-%d %H:%M:%S",
force=True
) )
console_handler = logging.StreamHandler() console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
@ -96,6 +105,13 @@ def setup_logging():
print(f"Failed to setup logging: {e}") print(f"Failed to setup logging: {e}")
sys.exit(1) sys.exit(1)
# Call setup_logging immediately
setup_logging()
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def acquire_lock(): def acquire_lock():
try: try:
logging.debug("Attempting to acquire lock") logging.debug("Attempting to acquire lock")
@ -121,11 +137,6 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
# Initialize posted_titles after logging setup
posted_titles_data = []
posted_titles = set()
used_images = set()
def create_http_session() -> requests.Session: def create_http_session() -> requests.Session:
session = requests.Session() session = requests.Session()
retry_strategy = Retry( retry_strategy = Retry(
@ -251,8 +262,9 @@ def curate_from_rss():
articles = fetch_rss_feeds() articles = fetch_rss_feeds()
if not articles: if not articles:
print("No RSS articles available")
logging.info("No RSS articles available") logging.info("No RSS articles available")
return None, None, False return None, None, random.randint(600, 1800)
attempts = 0 attempts = 0
max_attempts = 10 max_attempts = 10
@ -265,20 +277,24 @@ def curate_from_rss():
original_source = f'<a href="{link}">{source_name}</a>' original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles: if title in posted_titles:
print(f"Skipping already posted article: {title}")
logging.info(f"Skipping already posted article: {title}") logging.info(f"Skipping already posted article: {title}")
attempts += 1 attempts += 1
continue continue
print(f"Trying RSS Article: {title} from {source_name}")
logging.info(f"Trying RSS Article: {title} from {source_name}") logging.info(f"Trying RSS Article: {title} from {source_name}")
try: try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
except Exception as e: except Exception as e:
print(f"Smart image/filter error for '{title}': {e}")
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1 attempts += 1
continue continue
if skip: if skip:
print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}") logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1 attempts += 1
continue continue
@ -286,8 +302,10 @@ def curate_from_rss():
ddg_context = fetch_duckduckgo_news_context(title) ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content) interest_score = is_interesting(scoring_content)
print(f"Interest Score for '{title[:50]}...': {interest_score}")
logging.info(f"Interest score for '{title}': {interest_score}") logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6: if interest_score < 6:
print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}") logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1 attempts += 1
continue continue
@ -310,6 +328,7 @@ def curate_from_rss():
extra_prompt=extra_prompt extra_prompt=extra_prompt
) )
if not final_summary: if not final_summary:
print(f"Summary failed for '{title}'")
logging.info(f"Summary failed for '{title}'") logging.info(f"Summary failed for '{title}'")
attempts += 1 attempts += 1
continue continue
@ -318,12 +337,22 @@ def curate_from_rss():
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
if not post_data: if not post_data:
print(f"Post data preparation failed for '{title}'")
logging.info(f"Post data preparation failed for '{title}'")
attempts += 1 attempts += 1
continue continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url: if not image_url:
print(f"Flickr image fetch failed for '{image_query}', trying fallback")
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query) image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url:
print(f"All image uploads failed for '{title}' - posting without image")
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip() hook = get_dynamic_hook(post_data["title"]).strip()
@ -352,10 +381,12 @@ def curate_from_rss():
should_post_tweet=True should_post_tweet=True
) )
if not post_id: if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', skipping") print(f"Failed to post to WordPress for '{title}'")
logging.warning(f"Failed to post to WordPress for '{title}'")
attempts += 1 attempts += 1
continue continue
except Exception as e: except Exception as e:
print(f"WordPress posting error for '{title}': {e}")
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1 attempts += 1
continue continue
@ -385,6 +416,7 @@ def curate_from_rss():
should_post_tweet=False should_post_tweet=False
) )
except Exception as e: except Exception as e:
print(f"Failed to update WordPress post '{title}' with share links: {e}")
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally: finally:
is_posting = False is_posting = False
@ -392,41 +424,49 @@ def curate_from_rss():
timestamp = datetime.now(timezone.utc).isoformat() timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp) save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title) posted_titles.add(title)
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url: if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp) save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url) used_images.add(image_url)
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
return post_data, category, True return post_data, category, random.randint(0, 1800)
attempts += 1 attempts += 1
print(f"WP posting failed for '{post_data['title']}'")
logging.info(f"WP posting failed for '{post_data['title']}'") logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting RSS article found after attempts")
logging.info("No interesting RSS article found after attempts") logging.info("No interesting RSS article found after attempts")
return None, None, False return None, None, random.randint(600, 1800)
except Exception as e: except Exception as e:
print(f"Unexpected error in curate_from_rss: {e}")
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
print(f"Error in curate_from_rss: {e}") return None, None, random.randint(600, 1800)
return None, None, False
def run_rss_automator(): def run_rss_automator():
lock_fd = None lock_fd = None
try: try:
setup_logging()
lock_fd = acquire_lock() lock_fd = acquire_lock()
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
logging.info("***** RSS Automator Launched *****") logging.info("***** RSS Automator Launched *****")
post_data, category, should_continue = curate_from_rss() post_data, category, sleep_time = curate_from_rss()
if not post_data: if not post_data:
print("No postable RSS article found")
logging.info("No postable RSS article found") logging.info("No postable RSS article found")
else: print(f"Sleeping for {sleep_time}s")
logging.info("Completed RSS run") logging.info(f"Completed run with sleep time: {sleep_time} seconds")
return post_data, category, should_continue time.sleep(sleep_time)
return post_data, category, sleep_time
except Exception as e: except Exception as e:
print(f"Fatal error in run_rss_automator: {e}")
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
print(f"Fatal error: {e}") return None, None, random.randint(600, 1800)
return None, None, False
finally: finally:
if lock_fd: if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN) fcntl.flock(lock_fd, fcntl.LOCK_UN)

@ -447,22 +447,22 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
} }
logging.info(f"Fetching image from {image_url} for '{post_title}'") logging.info(f"Fetching image from {image_url} for '{post_title}'")
for attempt in range(MAX_RETRIES): for attempt in range(3):
try: try:
image_response = requests.get(image_url, headers=image_headers, timeout=IMAGE_UPLOAD_TIMEOUT) image_response = requests.get(image_url, headers=image_headers, timeout=IMAGE_UPLOAD_TIMEOUT)
if image_response.status_code == 429: if image_response.status_code == 429:
wait_time = RETRY_BACKOFF * (2 ** attempt) wait_time = 10 * (2 ** attempt)
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/{MAX_RETRIES}).") logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
time.sleep(wait_time) time.sleep(wait_time)
continue continue
image_response.raise_for_status() image_response.raise_for_status()
break break
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/{MAX_RETRIES}): {e}") logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/3): {e}")
if attempt == MAX_RETRIES - 1: if attempt == 2:
logging.error(f"Failed to fetch image {image_url} after {MAX_RETRIES} attempts") logging.error(f"Failed to fetch image {image_url} after 3 attempts")
return None return None
time.sleep(RETRY_BACKOFF * (2 ** attempt)) time.sleep(10 * (2 ** attempt))
else: else:
logging.error(f"Failed to fetch image {image_url} after retries") logging.error(f"Failed to fetch image {image_url} after retries")
return None return None
@ -490,7 +490,7 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
logging.info(f"Uploaded image '{safe_title}.jpg' to WP (ID: {image_id}) with caption '{caption}'") logging.info(f"Uploaded image '{safe_title}.jpg' to WP (ID: {image_id}) with caption '{caption}'")
return image_id return image_id
except Exception as e: except Exception as e:
logging.error(f"Image upload to WP failed for '{post_title}': {e}", exc_info=True) logging.error(f"Image upload to WP failed for '{post_title}': {e}")
print(f"Image upload to WP failed for '{post_title}': {e}") print(f"Image upload to WP failed for '{post_title}': {e}")
return None return None

Loading…
Cancel
Save