diff --git a/foodie_automator_google.py b/foodie_automator_google.py index 32214bf..5bf7ff6 100644 --- a/foodie_automator_google.py +++ b/foodie_automator_google.py @@ -256,8 +256,10 @@ def curate_from_google_trends(geo_list=['US']): if not image_url: image_url, image_source, uploader, page_url = get_image(image_query) + # Log the fetched image details + logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") + hook = get_dynamic_hook(post_data["title"]).strip() - # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None) # Generate viral share prompt share_prompt = get_viral_share_prompt(post_data["title"], final_summary) @@ -266,7 +268,7 @@ def curate_from_google_trends(geo_list=['US']): f' ' f'
' ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content + post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting is_posting = True @@ -292,8 +294,7 @@ def curate_from_google_trends(geo_list=['US']): share_text_encoded = quote(share_text) post_url_encoded = quote(post_url) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url) - post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content + post_data["content"] = f"{final_summary}\n\n{share_links}" is_posting = True try: post_to_wp( @@ -319,6 +320,16 @@ def curate_from_google_trends(geo_list=['US']): logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") if image_url: + # Check if image is already used + used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) + used_image_urls = {entry["title"] for entry in used_images_list} + if image_url in used_image_urls: + logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image") + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords) + if not image_url: + image_url, image_source, uploader, page_url = get_image(image_query) + logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") + save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") diff --git a/foodie_automator_reddit.py b/foodie_automator_reddit.py index f194789..0b3a55a 100644 --- a/foodie_automator_reddit.py +++ b/foodie_automator_reddit.py @@ -211,7 +211,7 @@ def curate_from_reddit(): if not articles: print("No Reddit posts available") logging.info("No Reddit posts available") - return None, None, None + return None, None, random.randint(600, 1800) articles.sort(key=lambda x: x["upvotes"], reverse=True) @@ -299,8 +299,10 @@ def curate_from_reddit(): if not image_url: image_url, image_source, uploader, page_url = get_image(image_query) + # Log the fetched image details + logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") + hook = get_dynamic_hook(post_data["title"]).strip() - # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None) # Generate viral share prompt share_prompt = get_viral_share_prompt(post_data["title"], final_summary) @@ -309,7 +311,7 @@ def curate_from_reddit(): f' ' f'' ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content + post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting is_posting = True @@ -335,8 +337,7 @@ def curate_from_reddit(): share_text_encoded = quote(share_text) post_url_encoded = quote(post_url) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url) - post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content + post_data["content"] = f"{final_summary}\n\n{share_links}" is_posting = True try: post_to_wp( @@ -362,6 +363,16 @@ def curate_from_reddit(): logging.info(f"Successfully saved '{raw_title}' to {POSTED_TITLES_FILE} with timestamp {timestamp}") if image_url: + # Check if image is already used + used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) + used_image_urls = {entry["title"] for entry in used_images_list} + if image_url in used_image_urls: + logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image") + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords) + if not image_url: + image_url, image_source, uploader, page_url = get_image(image_query) + logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") + save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE} with timestamp {timestamp}") diff --git a/foodie_automator_rss.py b/foodie_automator_rss.py index d7220d5..e90ecbb 100644 --- a/foodie_automator_rss.py +++ b/foodie_automator_rss.py @@ -27,11 +27,15 @@ from foodie_utils import ( generate_category_from_summary, post_to_wp, prepare_post_data, select_best_author, smart_image_and_filter, get_flickr_image ) -from foodie_hooks import get_dynamic_hook, get_viral_share_prompt # Removed select_best_cta import +from foodie_hooks import get_dynamic_hook, get_viral_share_prompt from dotenv import load_dotenv load_dotenv() +# Log script version to ensure it's the latest +SCRIPT_VERSION = "1.2.0" +logging.info(f"Starting foodie_automator_rss.py version {SCRIPT_VERSION}") + is_posting = False def signal_handler(sig, frame): @@ -271,8 +275,10 @@ def curate_from_rss(): if not image_url: image_url, image_source, uploader, page_url = get_image(image_query) + # Log the fetched image details + logging.info(f"Fetched image for '{post_data['title']}': URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") + hook = get_dynamic_hook(post_data["title"]).strip() - # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=None) # Generate viral share prompt share_prompt = get_viral_share_prompt(post_data["title"], final_summary) @@ -281,7 +287,7 @@ def curate_from_rss(): f' ' f'' ) - post_data["content"] = f"{final_summary}\n\n{share_links_template}" # Removed cta from content + post_data["content"] = f"{final_summary}\n\n{share_links_template}" global is_posting is_posting = True @@ -307,8 +313,7 @@ def curate_from_rss(): share_text_encoded = quote(share_text) post_url_encoded = quote(post_url) share_links = share_links_template.format(post_url=post_url_encoded, share_text=share_text_encoded) - # Removed: cta = select_best_cta(post_data["title"], final_summary, post_url=post_url) - post_data["content"] = f"{final_summary}\n\n{share_links}" # Removed cta from content + post_data["content"] = f"{final_summary}\n\n{share_links}" is_posting = True try: post_to_wp( @@ -334,6 +339,16 @@ def curate_from_rss(): logging.info(f"Successfully saved '{title}' to {POSTED_TITLES_FILE}") if image_url: + # Check if image is already used + used_images_list = load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) + used_image_urls = {entry["title"] for entry in used_images_list} + if image_url in used_image_urls: + logging.warning(f"Image '{image_url}' already used, attempting to fetch a new image") + image_url, image_source, uploader, page_url = get_flickr_image(image_query, relevance_keywords) + if not image_url: + image_url, image_source, uploader, page_url = get_image(image_query) + logging.info(f"New image fetched: URL={image_url}, Source={image_source}, Uploader={uploader}, Page URL={page_url}") + save_json_file(USED_IMAGES_FILE, image_url, timestamp) used_images.add(image_url) logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}") diff --git a/foodie_config.py b/foodie_config.py index 6be0e1d..ca51f36 100644 --- a/foodie_config.py +++ b/foodie_config.py @@ -260,14 +260,6 @@ CATEGORIES = [ "Lifestyle", "Buzz", "Culture", "Health", "Drink", "Food", "Eats" ] -CTAS = [ - "Love This Take? Share It On !", - "Dig This Scoop? Post It On !", - "Wild For This? Spread It On !", - "Crave This Read? Tweet It On !", - "Buzzing Over This? Share On !" -] - REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID") REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET") REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT") diff --git a/foodie_utils.py b/foodie_utils.py index 9833897..73d272d 100644 --- a/foodie_utils.py +++ b/foodie_utils.py @@ -29,63 +29,62 @@ from foodie_config import ( load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) -def load_json_file(filename, expiration_days=None): - data = [] - if os.path.exists(filename): - try: - with open(filename, 'r') as f: - lines = f.readlines() - for i, line in enumerate(lines, 1): - if line.strip(): - try: - entry = json.loads(line.strip()) - if not isinstance(entry, dict) or "title" not in entry or "timestamp" not in entry: - logging.warning(f"Skipping malformed entry in {filename} at line {i}: {entry}") - continue - data.append(entry) - except json.JSONDecodeError as e: - logging.warning(f"Skipping invalid JSON line in {filename} at line {i}: {e}") - if expiration_days: - cutoff = (datetime.now(timezone.utc) - timedelta(days=expiration_days)).isoformat() - data = [entry for entry in data if entry["timestamp"] > cutoff] - logging.info(f"Loaded {len(data)} entries from {filename}, {len(data)} valid after expiration check") - except Exception as e: - logging.error(f"Failed to load {filename}: {e}") - data = [] # Reset to empty on failure - return data +def load_json_file(file_path, expiration_hours): + entries = [] + cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours) + + if not os.path.exists(file_path): + logging.info(f"File {file_path} does not exist, returning empty list") + return entries + + try: + with open(file_path, 'r') as f: + lines = f.readlines() + + for i, line in enumerate(lines, 1): + try: + entry = json.loads(line.strip()) + if not isinstance(entry, dict) or "title" not in entry or "timestamp" not in entry: + logging.warning(f"Skipping malformed entry in {file_path} at line {i}: {line.strip()}") + continue + + timestamp = datetime.fromisoformat(entry["timestamp"]) + if timestamp > cutoff: + entries.append(entry) + else: + logging.debug(f"Entry expired in {file_path}: {entry['title']}") + except json.JSONDecodeError as e: + logging.warning(f"Skipping invalid JSON line in {file_path} at line {i}: {e}") + continue + except Exception as e: + logging.warning(f"Skipping malformed entry in {file_path} at line {i}: {line.strip()}") + continue + + logging.info(f"Loaded {len(entries)} entries from {file_path}, {len(entries)} valid after expiration check") + return entries + except Exception as e: + logging.error(f"Failed to load {file_path}: {e}") + return entries -def save_json_file(filename, key, value): - entry = {"title": key, "timestamp": value} - PRUNE_INTERVAL_DAYS = 180 +def save_json_file(file_path, title, timestamp): try: - data = load_json_file(filename, expiration_days=PRUNE_INTERVAL_DAYS) - # Remove duplicates by title - data = [item for item in data if item["title"] != key] - data.append(entry) - # Special handling for used_images.json to save as a flat list with one URL per line - if filename.endswith('used_images.json'): - used_images.add(key) - with open(filename, 'w') as f: - f.write('[\n') - urls = list(used_images) - for i, url in enumerate(urls): - f.write(f'"{url}"') - if i < len(urls) - 1: - f.write(',\n') - else: - f.write('\n') - f.write(']') - else: - with open(filename, 'w') as f: - for item in data: - json.dump(item, f) - f.write('\n') - logging.info(f"Saved '{key}' to {filename}") - print(f"DEBUG: Saved '{key}' to {filename}") - loaded_data = load_json_file(filename, expiration_days=PRUNE_INTERVAL_DAYS) - logging.info(f"Pruned {filename} to {len(loaded_data)} entries (older than {PRUNE_INTERVAL_DAYS} days removed)") + entries = load_json_file(file_path, 24 if "posted_" in file_path else 7 * 24) # 24 hours for titles, 7 days for images + entry = {"title": title, "timestamp": timestamp} + entries.append(entry) + + # Prune entries older than expiration period + expiration_hours = 24 if "posted_" in file_path else 7 * 24 + cutoff = datetime.now(timezone.utc) - timedelta(hours=expiration_hours) + pruned_entries = [e for e in entries if datetime.fromisoformat(e["timestamp"]) > cutoff] + + with open(file_path, 'w') as f: + for entry in pruned_entries: + f.write(json.dumps(entry) + '\n') + + logging.info(f"Saved '{title}' to {file_path}") + logging.info(f"Pruned {file_path} to {len(pruned_entries)} entries (older than {expiration_hours//24} days removed)") except Exception as e: - logging.error(f"Failed to save or prune {filename}: {e}") + logging.error(f"Failed to save to {file_path}: {e}") def load_post_counts(): counts = [] @@ -886,7 +885,8 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im if content is None: logging.error(f"Post content is None for title '{post_data['title']}' - using fallback") content = "Content unavailable. Check the original source for details." - formatted_content = "\n".join(f"{para}
" for para in content.split('\n') if para.strip()) + formatted_content = "\n".join(f"{para}
" for para in content.split('\n') if para.strip()) + author_id_map = { "owenjohnson": 10, "javiermorales": 2, @@ -897,6 +897,20 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im } author_id = author_id_map.get(author["username"], 5) + # Handle image upload + image_id = None + if image_url: + logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") + image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) + if not image_id: + logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay") + pixabay_query = post_data["title"][:50] + image_url, image_source, uploader, pixabay_url = get_image(pixabay_query) + if image_url: + image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) + if not image_id: + logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") + payload = { "title": post_data["title"], "content": formatted_content, @@ -911,19 +925,9 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im } } - if image_url and not post_id: - logging.info(f"Attempting image upload for '{post_data['title']}', URL: {image_url}, source: {image_source}") - image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) - if not image_id: - logging.info(f"Flickr upload failed for '{post_data['title']}', falling back to Pixabay") - pixabay_query = post_data["title"][:50] - image_url, image_source, uploader, pixabay_url = get_image(pixabay_query) - if image_url: - image_id = upload_image_to_wp(image_url, post_data["title"], wp_base_url, wp_username, wp_password, image_source, uploader, pixabay_url) - if image_id: - payload["featured_media"] = image_id - else: - logging.warning(f"All image uploads failed for '{post_data['title']}' - posting without image") + if image_id: + payload["featured_media"] = image_id + logging.info(f"Set featured image for post '{post_data['title']}': Media ID={image_id}") endpoint = f"{wp_base_url}/posts/{post_id}" if post_id else f"{wp_base_url}/posts" method = requests.post @@ -951,7 +955,7 @@ def post_to_wp(post_data, category, link, author, image_url, original_source, im try: post = {"title": post_data["title"], "url": post_url} tweet = generate_article_tweet(author, post, author["persona"]) - if post_tweet(author, tweet): # Use the actual post_tweet function + if post_tweet(author, tweet): logging.info(f"Successfully posted article tweet for {author['username']} on X") else: logging.warning(f"Failed to post article tweet for {author['username']} on X")