Shane 7 months ago
parent dbe76795c2
commit ad21bac601
  1. 84
      foodie_automator_rss.py
  2. 16
      foodie_utils.py

@ -42,7 +42,6 @@ LOG_PRUNE_DAYS = 30
FEED_TIMEOUT = 15
MAX_RETRIES = 3
RETRY_BACKOFF = 2
IMAGE_UPLOAD_TIMEOUT = 30
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@ -54,9 +53,18 @@ def setup_logging():
try:
logging.debug("Attempting to set up logging")
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
# Test write to log file
try:
with open(LOG_FILE, 'a') as f:
f.write("")
logging.debug(f"Confirmed write access to {LOG_FILE}")
except Exception as e:
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}")
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
@ -82,9 +90,10 @@ def setup_logging():
logging.basicConfig(
filename=LOG_FILE,
level=logging.DEBUG, # Changed to DEBUG for troubleshooting
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
datefmt="%Y-%m-%d %H:%M:%S",
force=True
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
@ -96,6 +105,13 @@ def setup_logging():
print(f"Failed to setup logging: {e}")
sys.exit(1)
# Call setup_logging immediately
setup_logging()
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def acquire_lock():
try:
logging.debug("Attempting to acquire lock")
@ -121,11 +137,6 @@ def signal_handler(sig, frame):
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Initialize posted_titles after logging setup
posted_titles_data = []
posted_titles = set()
used_images = set()
def create_http_session() -> requests.Session:
session = requests.Session()
retry_strategy = Retry(
@ -251,8 +262,9 @@ def curate_from_rss():
articles = fetch_rss_feeds()
if not articles:
print("No RSS articles available")
logging.info("No RSS articles available")
return None, None, False
return None, None, random.randint(600, 1800)
attempts = 0
max_attempts = 10
@ -265,20 +277,24 @@ def curate_from_rss():
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
print(f"Skipping already posted article: {title}")
logging.info(f"Skipping already posted article: {title}")
attempts += 1
continue
print(f"Trying RSS Article: {title} from {source_name}")
logging.info(f"Trying RSS Article: {title} from {source_name}")
try:
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
except Exception as e:
print(f"Smart image/filter error for '{title}': {e}")
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
attempts += 1
continue
if skip:
print(f"Skipping filtered RSS article: {title}")
logging.info(f"Skipping filtered RSS article: {title}")
attempts += 1
continue
@ -286,8 +302,10 @@ def curate_from_rss():
ddg_context = fetch_duckduckgo_news_context(title)
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}"
interest_score = is_interesting(scoring_content)
print(f"Interest Score for '{title[:50]}...': {interest_score}")
logging.info(f"Interest score for '{title}': {interest_score}")
if interest_score < 6:
print(f"RSS Interest Too Low: {interest_score}")
logging.info(f"RSS Interest Too Low: {interest_score}")
attempts += 1
continue
@ -310,6 +328,7 @@ def curate_from_rss():
extra_prompt=extra_prompt
)
if not final_summary:
print(f"Summary failed for '{title}'")
logging.info(f"Summary failed for '{title}'")
attempts += 1
continue
@ -318,12 +337,22 @@ def curate_from_rss():
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic)
if not post_data:
print(f"Post data preparation failed for '{title}'")
logging.info(f"Post data preparation failed for '{title}'")
attempts += 1
continue
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic)
if not image_url:
print(f"Flickr image fetch failed for '{image_query}', trying fallback")
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback")
image_url, image_source, uploader, page_url = get_image(image_query)
if not image_url:
print(f"All image uploads failed for '{title}' - posting without image")
logging.warning(f"All image uploads failed for '{title}' - posting without image")
image_source = None
uploader = None
page_url = None
hook = get_dynamic_hook(post_data["title"]).strip()
@ -352,10 +381,12 @@ def curate_from_rss():
should_post_tweet=True
)
if not post_id:
logging.warning(f"Failed to post to WordPress for '{title}', skipping")
print(f"Failed to post to WordPress for '{title}'")
logging.warning(f"Failed to post to WordPress for '{title}'")
attempts += 1
continue
except Exception as e:
print(f"WordPress posting error for '{title}': {e}")
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True)
attempts += 1
continue
@ -385,6 +416,7 @@ def curate_from_rss():
should_post_tweet=False
)
except Exception as e:
print(f"Failed to update WordPress post '{title}' with share links: {e}")
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True)
finally:
is_posting = False
@ -392,41 +424,49 @@ def curate_from_rss():
timestamp = datetime.now(timezone.utc).isoformat()
save_json_file(POSTED_TITLES_FILE, title, timestamp)
posted_titles.add(title)
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}")
if image_url:
save_json_file(USED_IMAGES_FILE, image_url, timestamp)
used_images.add(image_url)
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
return post_data, category, True
return post_data, category, random.randint(0, 1800)
attempts += 1
print(f"WP posting failed for '{post_data['title']}'")
logging.info(f"WP posting failed for '{post_data['title']}'")
print("No interesting RSS article found after attempts")
logging.info("No interesting RSS article found after attempts")
return None, None, False
return None, None, random.randint(600, 1800)
except Exception as e:
print(f"Unexpected error in curate_from_rss: {e}")
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
print(f"Error in curate_from_rss: {e}")
return None, None, False
return None, None, random.randint(600, 1800)
def run_rss_automator():
lock_fd = None
try:
setup_logging()
lock_fd = acquire_lock()
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****")
logging.info("***** RSS Automator Launched *****")
post_data, category, should_continue = curate_from_rss()
post_data, category, sleep_time = curate_from_rss()
if not post_data:
print("No postable RSS article found")
logging.info("No postable RSS article found")
else:
logging.info("Completed RSS run")
return post_data, category, should_continue
print(f"Sleeping for {sleep_time}s")
logging.info(f"Completed run with sleep time: {sleep_time} seconds")
time.sleep(sleep_time)
return post_data, category, sleep_time
except Exception as e:
print(f"Fatal error in run_rss_automator: {e}")
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True)
print(f"Fatal error: {e}")
return None, None, False
return None, None, random.randint(600, 1800)
finally:
if lock_fd:
fcntl.flock(lock_fd, fcntl.LOCK_UN)

@ -447,22 +447,22 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
}
logging.info(f"Fetching image from {image_url} for '{post_title}'")
for attempt in range(MAX_RETRIES):
for attempt in range(3):
try:
image_response = requests.get(image_url, headers=image_headers, timeout=IMAGE_UPLOAD_TIMEOUT)
if image_response.status_code == 429:
wait_time = RETRY_BACKOFF * (2 ** attempt)
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/{MAX_RETRIES}).")
wait_time = 10 * (2 ** attempt)
logging.warning(f"Rate limit hit for {image_url}. Retrying after {wait_time}s (attempt {attempt+1}/3).")
time.sleep(wait_time)
continue
image_response.raise_for_status()
break
except requests.exceptions.RequestException as e:
logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/{MAX_RETRIES}): {e}")
if attempt == MAX_RETRIES - 1:
logging.error(f"Failed to fetch image {image_url} after {MAX_RETRIES} attempts")
logging.warning(f"Image fetch failed for {image_url} (attempt {attempt+1}/3): {e}")
if attempt == 2:
logging.error(f"Failed to fetch image {image_url} after 3 attempts")
return None
time.sleep(RETRY_BACKOFF * (2 ** attempt))
time.sleep(10 * (2 ** attempt))
else:
logging.error(f"Failed to fetch image {image_url} after retries")
return None
@ -490,7 +490,7 @@ def upload_image_to_wp(image_url, post_title, wp_base_url, wp_username, wp_passw
logging.info(f"Uploaded image '{safe_title}.jpg' to WP (ID: {image_id}) with caption '{caption}'")
return image_id
except Exception as e:
logging.error(f"Image upload to WP failed for '{post_title}': {e}", exc_info=True)
logging.error(f"Image upload to WP failed for '{post_title}': {e}")
print(f"Image upload to WP failed for '{post_title}': {e}")
return None

Loading…
Cancel
Save