|
|
|
@ -42,7 +42,6 @@ LOG_PRUNE_DAYS = 30 |
|
|
|
FEED_TIMEOUT = 15 |
|
|
|
FEED_TIMEOUT = 15 |
|
|
|
MAX_RETRIES = 3 |
|
|
|
MAX_RETRIES = 3 |
|
|
|
RETRY_BACKOFF = 2 |
|
|
|
RETRY_BACKOFF = 2 |
|
|
|
IMAGE_UPLOAD_TIMEOUT = 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' |
|
|
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json' |
|
|
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' |
|
|
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json' |
|
|
|
@ -54,9 +53,18 @@ def setup_logging(): |
|
|
|
try: |
|
|
|
try: |
|
|
|
logging.debug("Attempting to set up logging") |
|
|
|
logging.debug("Attempting to set up logging") |
|
|
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) |
|
|
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) |
|
|
|
|
|
|
|
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") |
|
|
|
if not os.access(os.path.dirname(LOG_FILE), os.W_OK): |
|
|
|
if not os.access(os.path.dirname(LOG_FILE), os.W_OK): |
|
|
|
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") |
|
|
|
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}") |
|
|
|
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}") |
|
|
|
|
|
|
|
|
|
|
|
# Test write to log file |
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
with open(LOG_FILE, 'a') as f: |
|
|
|
|
|
|
|
f.write("") |
|
|
|
|
|
|
|
logging.debug(f"Confirmed write access to {LOG_FILE}") |
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
raise PermissionError(f"Cannot write to {LOG_FILE}: {e}") |
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(LOG_FILE): |
|
|
|
if os.path.exists(LOG_FILE): |
|
|
|
with open(LOG_FILE, 'r') as f: |
|
|
|
with open(LOG_FILE, 'r') as f: |
|
|
|
lines = f.readlines() |
|
|
|
lines = f.readlines() |
|
|
|
@ -82,9 +90,10 @@ def setup_logging(): |
|
|
|
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
|
logging.basicConfig( |
|
|
|
filename=LOG_FILE, |
|
|
|
filename=LOG_FILE, |
|
|
|
level=logging.DEBUG, # Changed to DEBUG for troubleshooting |
|
|
|
level=logging.INFO, |
|
|
|
format="%(asctime)s - %(levelname)s - %(message)s", |
|
|
|
format="%(asctime)s - %(levelname)s - %(message)s", |
|
|
|
datefmt="%Y-%m-%d %H:%M:%S" |
|
|
|
datefmt="%Y-%m-%d %H:%M:%S", |
|
|
|
|
|
|
|
force=True |
|
|
|
) |
|
|
|
) |
|
|
|
console_handler = logging.StreamHandler() |
|
|
|
console_handler = logging.StreamHandler() |
|
|
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
|
|
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
|
|
|
@ -96,6 +105,13 @@ def setup_logging(): |
|
|
|
print(f"Failed to setup logging: {e}") |
|
|
|
print(f"Failed to setup logging: {e}") |
|
|
|
sys.exit(1) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Call setup_logging immediately |
|
|
|
|
|
|
|
setup_logging() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS) |
|
|
|
|
|
|
|
posted_titles = set(entry["title"] for entry in posted_titles_data) |
|
|
|
|
|
|
|
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry) |
|
|
|
|
|
|
|
|
|
|
|
def acquire_lock(): |
|
|
|
def acquire_lock(): |
|
|
|
try: |
|
|
|
try: |
|
|
|
logging.debug("Attempting to acquire lock") |
|
|
|
logging.debug("Attempting to acquire lock") |
|
|
|
@ -121,11 +137,6 @@ def signal_handler(sig, frame): |
|
|
|
signal.signal(signal.SIGTERM, signal_handler) |
|
|
|
signal.signal(signal.SIGTERM, signal_handler) |
|
|
|
signal.signal(signal.SIGINT, signal_handler) |
|
|
|
signal.signal(signal.SIGINT, signal_handler) |
|
|
|
|
|
|
|
|
|
|
|
# Initialize posted_titles after logging setup |
|
|
|
|
|
|
|
posted_titles_data = [] |
|
|
|
|
|
|
|
posted_titles = set() |
|
|
|
|
|
|
|
used_images = set() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_http_session() -> requests.Session: |
|
|
|
def create_http_session() -> requests.Session: |
|
|
|
session = requests.Session() |
|
|
|
session = requests.Session() |
|
|
|
retry_strategy = Retry( |
|
|
|
retry_strategy = Retry( |
|
|
|
@ -251,8 +262,9 @@ def curate_from_rss(): |
|
|
|
|
|
|
|
|
|
|
|
articles = fetch_rss_feeds() |
|
|
|
articles = fetch_rss_feeds() |
|
|
|
if not articles: |
|
|
|
if not articles: |
|
|
|
|
|
|
|
print("No RSS articles available") |
|
|
|
logging.info("No RSS articles available") |
|
|
|
logging.info("No RSS articles available") |
|
|
|
return None, None, False |
|
|
|
return None, None, random.randint(600, 1800) |
|
|
|
|
|
|
|
|
|
|
|
attempts = 0 |
|
|
|
attempts = 0 |
|
|
|
max_attempts = 10 |
|
|
|
max_attempts = 10 |
|
|
|
@ -265,20 +277,24 @@ def curate_from_rss(): |
|
|
|
original_source = f'<a href="{link}">{source_name}</a>' |
|
|
|
original_source = f'<a href="{link}">{source_name}</a>' |
|
|
|
|
|
|
|
|
|
|
|
if title in posted_titles: |
|
|
|
if title in posted_titles: |
|
|
|
|
|
|
|
print(f"Skipping already posted article: {title}") |
|
|
|
logging.info(f"Skipping already posted article: {title}") |
|
|
|
logging.info(f"Skipping already posted article: {title}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Trying RSS Article: {title} from {source_name}") |
|
|
|
logging.info(f"Trying RSS Article: {title} from {source_name}") |
|
|
|
logging.info(f"Trying RSS Article: {title} from {source_name}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) |
|
|
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary) |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"Smart image/filter error for '{title}': {e}") |
|
|
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") |
|
|
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
if skip: |
|
|
|
if skip: |
|
|
|
|
|
|
|
print(f"Skipping filtered RSS article: {title}") |
|
|
|
logging.info(f"Skipping filtered RSS article: {title}") |
|
|
|
logging.info(f"Skipping filtered RSS article: {title}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -286,8 +302,10 @@ def curate_from_rss(): |
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
ddg_context = fetch_duckduckgo_news_context(title) |
|
|
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" |
|
|
|
scoring_content = f"{title}\n\n{summary}\n\nAdditional Context: {ddg_context}" |
|
|
|
interest_score = is_interesting(scoring_content) |
|
|
|
interest_score = is_interesting(scoring_content) |
|
|
|
|
|
|
|
print(f"Interest Score for '{title[:50]}...': {interest_score}") |
|
|
|
logging.info(f"Interest score for '{title}': {interest_score}") |
|
|
|
logging.info(f"Interest score for '{title}': {interest_score}") |
|
|
|
if interest_score < 6: |
|
|
|
if interest_score < 6: |
|
|
|
|
|
|
|
print(f"RSS Interest Too Low: {interest_score}") |
|
|
|
logging.info(f"RSS Interest Too Low: {interest_score}") |
|
|
|
logging.info(f"RSS Interest Too Low: {interest_score}") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -310,6 +328,7 @@ def curate_from_rss(): |
|
|
|
extra_prompt=extra_prompt |
|
|
|
extra_prompt=extra_prompt |
|
|
|
) |
|
|
|
) |
|
|
|
if not final_summary: |
|
|
|
if not final_summary: |
|
|
|
|
|
|
|
print(f"Summary failed for '{title}'") |
|
|
|
logging.info(f"Summary failed for '{title}'") |
|
|
|
logging.info(f"Summary failed for '{title}'") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -318,12 +337,22 @@ def curate_from_rss(): |
|
|
|
|
|
|
|
|
|
|
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) |
|
|
|
post_data, author, category, image_url, image_source, uploader, page_url = prepare_post_data(final_summary, title, main_topic) |
|
|
|
if not post_data: |
|
|
|
if not post_data: |
|
|
|
|
|
|
|
print(f"Post data preparation failed for '{title}'") |
|
|
|
|
|
|
|
logging.info(f"Post data preparation failed for '{title}'") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) |
|
|
|
image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords, main_topic) |
|
|
|
if not image_url: |
|
|
|
if not image_url: |
|
|
|
|
|
|
|
print(f"Flickr image fetch failed for '{image_query}', trying fallback") |
|
|
|
|
|
|
|
logging.warning(f"Flickr image fetch failed for '{image_query}', trying fallback") |
|
|
|
image_url, image_source, uploader, page_url = get_image(image_query) |
|
|
|
image_url, image_source, uploader, page_url = get_image(image_query) |
|
|
|
|
|
|
|
if not image_url: |
|
|
|
|
|
|
|
print(f"All image uploads failed for '{title}' - posting without image") |
|
|
|
|
|
|
|
logging.warning(f"All image uploads failed for '{title}' - posting without image") |
|
|
|
|
|
|
|
image_source = None |
|
|
|
|
|
|
|
uploader = None |
|
|
|
|
|
|
|
page_url = None |
|
|
|
|
|
|
|
|
|
|
|
hook = get_dynamic_hook(post_data["title"]).strip() |
|
|
|
hook = get_dynamic_hook(post_data["title"]).strip() |
|
|
|
|
|
|
|
|
|
|
|
@ -352,10 +381,12 @@ def curate_from_rss(): |
|
|
|
should_post_tweet=True |
|
|
|
should_post_tweet=True |
|
|
|
) |
|
|
|
) |
|
|
|
if not post_id: |
|
|
|
if not post_id: |
|
|
|
logging.warning(f"Failed to post to WordPress for '{title}', skipping") |
|
|
|
print(f"Failed to post to WordPress for '{title}'") |
|
|
|
|
|
|
|
logging.warning(f"Failed to post to WordPress for '{title}'") |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"WordPress posting error for '{title}': {e}") |
|
|
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) |
|
|
|
logging.error(f"Failed to post to WordPress for '{title}': {e}", exc_info=True) |
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
continue |
|
|
|
continue |
|
|
|
@ -385,6 +416,7 @@ def curate_from_rss(): |
|
|
|
should_post_tweet=False |
|
|
|
should_post_tweet=False |
|
|
|
) |
|
|
|
) |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"Failed to update WordPress post '{title}' with share links: {e}") |
|
|
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) |
|
|
|
logging.error(f"Failed to update WordPress post '{title}' with share links: {e}", exc_info=True) |
|
|
|
finally: |
|
|
|
finally: |
|
|
|
is_posting = False |
|
|
|
is_posting = False |
|
|
|
@ -392,41 +424,49 @@ def curate_from_rss(): |
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
save_json_file(POSTED_TITLES_FILE, title, timestamp) |
|
|
|
save_json_file(POSTED_TITLES_FILE, title, timestamp) |
|
|
|
posted_titles.add(title) |
|
|
|
posted_titles.add(title) |
|
|
|
|
|
|
|
print(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") |
|
|
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") |
|
|
|
logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") |
|
|
|
|
|
|
|
|
|
|
|
if image_url: |
|
|
|
if image_url: |
|
|
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp) |
|
|
|
save_json_file(USED_IMAGES_FILE, image_url, timestamp) |
|
|
|
used_images.add(image_url) |
|
|
|
used_images.add(image_url) |
|
|
|
|
|
|
|
print(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") |
|
|
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") |
|
|
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****") |
|
|
|
return post_data, category, True |
|
|
|
return post_data, category, random.randint(0, 1800) |
|
|
|
|
|
|
|
|
|
|
|
attempts += 1 |
|
|
|
attempts += 1 |
|
|
|
|
|
|
|
print(f"WP posting failed for '{post_data['title']}'") |
|
|
|
logging.info(f"WP posting failed for '{post_data['title']}'") |
|
|
|
logging.info(f"WP posting failed for '{post_data['title']}'") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("No interesting RSS article found after attempts") |
|
|
|
logging.info("No interesting RSS article found after attempts") |
|
|
|
logging.info("No interesting RSS article found after attempts") |
|
|
|
return None, None, False |
|
|
|
return None, None, random.randint(600, 1800) |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"Unexpected error in curate_from_rss: {e}") |
|
|
|
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) |
|
|
|
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True) |
|
|
|
print(f"Error in curate_from_rss: {e}") |
|
|
|
return None, None, random.randint(600, 1800) |
|
|
|
return None, None, False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_rss_automator(): |
|
|
|
def run_rss_automator(): |
|
|
|
lock_fd = None |
|
|
|
lock_fd = None |
|
|
|
try: |
|
|
|
try: |
|
|
|
setup_logging() |
|
|
|
|
|
|
|
lock_fd = acquire_lock() |
|
|
|
lock_fd = acquire_lock() |
|
|
|
|
|
|
|
print(f"{datetime.now(timezone.utc)} - INFO - ***** RSS Automator Launched *****") |
|
|
|
logging.info("***** RSS Automator Launched *****") |
|
|
|
logging.info("***** RSS Automator Launched *****") |
|
|
|
post_data, category, should_continue = curate_from_rss() |
|
|
|
post_data, category, sleep_time = curate_from_rss() |
|
|
|
if not post_data: |
|
|
|
if not post_data: |
|
|
|
|
|
|
|
print("No postable RSS article found") |
|
|
|
logging.info("No postable RSS article found") |
|
|
|
logging.info("No postable RSS article found") |
|
|
|
else: |
|
|
|
print(f"Sleeping for {sleep_time}s") |
|
|
|
logging.info("Completed RSS run") |
|
|
|
logging.info(f"Completed run with sleep time: {sleep_time} seconds") |
|
|
|
return post_data, category, should_continue |
|
|
|
time.sleep(sleep_time) |
|
|
|
|
|
|
|
return post_data, category, sleep_time |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
print(f"Fatal error in run_rss_automator: {e}") |
|
|
|
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) |
|
|
|
logging.error(f"Fatal error in run_rss_automator: {e}", exc_info=True) |
|
|
|
print(f"Fatal error: {e}") |
|
|
|
return None, None, random.randint(600, 1800) |
|
|
|
return None, None, False |
|
|
|
|
|
|
|
finally: |
|
|
|
finally: |
|
|
|
if lock_fd: |
|
|
|
if lock_fd: |
|
|
|
fcntl.flock(lock_fd, fcntl.LOCK_UN) |
|
|
|
fcntl.flock(lock_fd, fcntl.LOCK_UN) |
|
|
|
|