|
|
|
|
@ -23,7 +23,7 @@ import flickr_api |
|
|
|
|
from foodie_config import ( |
|
|
|
|
RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, |
|
|
|
|
get_clean_source_name, AUTHORS, LIGHT_TASK_MODEL, SUMMARY_MODEL, X_API_CREDENTIALS, |
|
|
|
|
FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY |
|
|
|
|
FLICKR_API_KEY, FLICKR_API_SECRET, PIXABAY_API_KEY, RECENT_POSTS_FILE, USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
@ -33,6 +33,7 @@ IMAGE_UPLOAD_TIMEOUT = 30 # Added to fix NameError |
|
|
|
|
IMAGE_EXPIRATION_DAYS = 7 # 7 days, consistent with foodie_automator_rss.py |
|
|
|
|
|
|
|
|
|
def load_json_file(file_path, expiration_hours=None): |
|
|
|
|
"""Load JSON file and return its contents as a list.""" |
|
|
|
|
try: |
|
|
|
|
if not os.path.exists(file_path): |
|
|
|
|
logging.info(f"File {file_path} does not exist, initializing with empty list") |
|
|
|
|
@ -41,26 +42,13 @@ def load_json_file(file_path, expiration_hours=None): |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
with open(file_path, 'r') as f: |
|
|
|
|
try: |
|
|
|
|
data = json.load(f) |
|
|
|
|
except json.JSONDecodeError as e: |
|
|
|
|
logging.warning(f"Invalid JSON in {file_path}: {e}. Attempting line-by-line parsing.") |
|
|
|
|
data = [] |
|
|
|
|
f.seek(0) |
|
|
|
|
for line_number, line in enumerate(f, 1): |
|
|
|
|
line = line.strip() |
|
|
|
|
if not line: |
|
|
|
|
continue |
|
|
|
|
try: |
|
|
|
|
entry = json.loads(line) |
|
|
|
|
data.append(entry) |
|
|
|
|
except json.JSONDecodeError as e: |
|
|
|
|
logging.warning(f"Skipping invalid JSON line in {file_path} at line {line_number}: {e}") |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
if not isinstance(data, list): |
|
|
|
|
logging.warning(f"Data in {file_path} is not a list, resetting to empty list") |
|
|
|
|
data = [] |
|
|
|
|
with open(file_path, 'w') as f: |
|
|
|
|
json.dump([], f) |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
valid_entries = [] |
|
|
|
|
if expiration_hours: |
|
|
|
|
@ -79,29 +67,26 @@ def load_json_file(file_path, expiration_hours=None): |
|
|
|
|
else: |
|
|
|
|
valid_entries = data |
|
|
|
|
|
|
|
|
|
logging.info(f"Loaded {len(valid_entries)} entries from {file_path}, {len(valid_entries)} valid after expiration check") |
|
|
|
|
logging.info(f"Loaded {len(valid_entries)} valid entries from {file_path}") |
|
|
|
|
return valid_entries |
|
|
|
|
except json.JSONDecodeError as e: |
|
|
|
|
logging.error(f"Invalid JSON in {file_path}: {e}. Resetting to empty list.") |
|
|
|
|
with open(file_path, 'w') as f: |
|
|
|
|
json.dump([], f) |
|
|
|
|
return [] |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to load JSON file {file_path}: {e}") |
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
def save_json_file(file_path, title, timestamp): |
|
|
|
|
"""Save an entry to a JSON file, maintaining a JSON array.""" |
|
|
|
|
try: |
|
|
|
|
entries = load_json_file(file_path, 24 if "posted_" in file_path else 7 * 24) # 24 hours for titles, 7 days for images |
|
|
|
|
entries = load_json_file(file_path, 24 if "posted_" in file_path else IMAGE_EXPIRATION_DAYS * 24) |
|
|
|
|
entry = {"title": title, "timestamp": timestamp} |
|
|
|
|
entries.append(entry) |
|
|
|
|
|
|
|
|
|
# Prune entries older than expiration period |
|
|
|
|
expiration_hours = 24 if "posted_" in file_path else 7 * 24 |
|
|
|
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours) |
|
|
|
|
pruned_entries = [e for e in entries if datetime.fromisoformat(e["timestamp"]) > cutoff] |
|
|
|
|
|
|
|
|
|
with open(file_path, 'w') as f: |
|
|
|
|
for entry in pruned_entries: |
|
|
|
|
f.write(json.dumps(entry) + '\n') |
|
|
|
|
|
|
|
|
|
json.dump(entries, f, indent=2) |
|
|
|
|
logging.info(f"Saved '{title}' to {file_path}") |
|
|
|
|
logging.info(f"Pruned {file_path} to {len(pruned_entries)} entries (older than {expiration_hours//24} days removed)") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to save to {file_path}: {e}") |
|
|
|
|
|
|
|
|
|
@ -790,8 +775,6 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
|
content = "Content unavailable. Check the original source for details." |
|
|
|
|
formatted_content = "\n".join(f"<p>{para}</p>" for para in content.split('\n') if para.strip()) |
|
|
|
|
|
|
|
|
|
# Removed the block that appends image attribution to the content |
|
|
|
|
|
|
|
|
|
author_id_map = { |
|
|
|
|
"owenjohnson": 10, |
|
|
|
|
"javiermorales": 2, |
|
|
|
|
@ -850,6 +833,8 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im |
|
|
|
|
post_id = post_info["id"] |
|
|
|
|
post_url = post_info["link"] |
|
|
|
|
|
|
|
|
|
# Save to recent_posts.json only on initial post, not updates |
|
|
|
|
if not post_id: |
|
|
|
|
timestamp = datetime.now(timezone.utc).isoformat() |
|
|
|
|
save_post_to_recent(post_data["title"], post_url, author["username"], timestamp) |
|
|
|
|
|
|
|
|
|
@ -1234,9 +1219,11 @@ def prepare_post_data(summary, title, main_topic=None): |
|
|
|
|
logging.error(f"Failed to prepare post data: {e}") |
|
|
|
|
return None, None, None, None, None, None, None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_post_to_recent(post_title, post_url, author_username, timestamp): |
|
|
|
|
"""Save a post to recent_posts.json, maintaining a JSON array.""" |
|
|
|
|
try: |
|
|
|
|
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json', 24) # Added expiration_hours |
|
|
|
|
recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24) |
|
|
|
|
entry = { |
|
|
|
|
"title": post_title, |
|
|
|
|
"url": post_url, |
|
|
|
|
@ -1244,23 +1231,18 @@ def save_post_to_recent(post_title, post_url, author_username, timestamp): |
|
|
|
|
"timestamp": timestamp |
|
|
|
|
} |
|
|
|
|
recent_posts.append(entry) |
|
|
|
|
with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f: |
|
|
|
|
for item in recent_posts: |
|
|
|
|
json.dump(item, f) |
|
|
|
|
f.write('\n') |
|
|
|
|
logging.info(f"Saved post '{post_title}' to recent_posts.json") |
|
|
|
|
with open(RECENT_POSTS_FILE, 'w') as f: |
|
|
|
|
json.dump(recent_posts, f, indent=2) |
|
|
|
|
logging.info(f"Saved post '{post_title}' to {RECENT_POSTS_FILE}") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to save post to recent_posts.json: {e}") |
|
|
|
|
logging.error(f"Failed to save post to {RECENT_POSTS_FILE}: {e}") |
|
|
|
|
|
|
|
|
|
def prune_recent_posts(): |
|
|
|
|
"""Prune recent_posts.json to keep entries within the last 24 hours.""" |
|
|
|
|
try: |
|
|
|
|
cutoff = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat() |
|
|
|
|
recent_posts = load_json_file('/home/shane/foodie_automator/recent_posts.json') |
|
|
|
|
recent_posts = [entry for entry in recent_posts if entry["timestamp"] > cutoff] |
|
|
|
|
with open('/home/shane/foodie_automator/recent_posts.json', 'w') as f: |
|
|
|
|
for item in recent_posts: |
|
|
|
|
json.dump(item, f) |
|
|
|
|
f.write('\n') |
|
|
|
|
logging.info(f"Pruned recent_posts.json to {len(recent_posts)} entries") |
|
|
|
|
recent_posts = load_json_file(RECENT_POSTS_FILE, expiration_hours=24) |
|
|
|
|
with open(RECENT_POSTS_FILE, 'w') as f: |
|
|
|
|
json.dump(recent_posts, f, indent=2) |
|
|
|
|
logging.info(f"Pruned {RECENT_POSTS_FILE} to {len(recent_posts)} entries") |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.error(f"Failed to prune recent_posts.json: {e}") |
|
|
|
|
logging.error(f"Failed to prune {RECENT_POSTS_FILE}: {e}") |