diff --git a/foodie_engagement_tweet.py b/foodie_engagement_tweet.py index 8baf2d9..af98fc9 100644 --- a/foodie_engagement_tweet.py +++ b/foodie_engagement_tweet.py @@ -8,6 +8,7 @@ import fcntl import os import time import re +import unicodedata from datetime import datetime, timedelta, timezone import tweepy from openai import OpenAI @@ -28,6 +29,7 @@ MAX_RETRIES = 3 RETRY_BACKOFF = 2 URL = "https://insiderfoodie.com" URL_SHORTENED_LENGTH = 23 # Twitter's shortened URL length +CURRENT_YEAR = "2025" # Explicitly set the current year for the prompt def setup_logging(): """Initialize logging with pruning of old logs.""" @@ -113,6 +115,7 @@ try: logging.error(f"Invalid format in {AUTHOR_BACKGROUNDS_FILE}: Expected a list, got {type(background_list)}") raise ValueError("Author backgrounds must be a list") AUTHOR_BACKGROUNDS = {} + AUTHOR_BACKGROUNDS_LIST = background_list # Keep the original list for fallback lookup for bg in background_list: if "username" not in bg: logging.error(f"Invalid entry in {AUTHOR_BACKGROUNDS_FILE}: Missing 'username' key in {bg}") @@ -121,13 +124,16 @@ try: if not isinstance(username, str): logging.error(f"Invalid username type in {AUTHOR_BACKGROUNDS_FILE}: {username} (type: {type(username)})") raise ValueError("Username must be a string") - cleaned_username = username.strip().lower() + # Normalize the username to handle encoding differences + cleaned_username = unicodedata.normalize('NFC', username.strip().lower()) AUTHOR_BACKGROUNDS[cleaned_username] = bg + logging.debug(f"Added to AUTHOR_BACKGROUNDS: key='{cleaned_username}', value={bg}") loaded_usernames = list(AUTHOR_BACKGROUNDS.keys()) logging.debug(f"Loaded author backgrounds: {loaded_usernames}") except Exception as e: logging.error(f"Failed to load author_backgrounds.json: {e}", exc_info=True) AUTHOR_BACKGROUNDS = {} + AUTHOR_BACKGROUNDS_LIST = [] sys.exit(1) def validate_twitter_credentials(author): @@ -158,8 +164,8 @@ def validate_twitter_credentials(author): return False def remove_emojis(text): - """Remove emojis from the given text.""" - # Unicode ranges for emojis + """Remove emojis from the given text, including variation selectors.""" + # Unicode ranges for emojis, including variation selectors and combining characters emoji_pattern = re.compile( "[" "\U0001F600-\U0001F64F" # Emoticons @@ -173,6 +179,9 @@ def remove_emojis(text): "\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A "\U00002700-\U000027BF" # Dingbats "\U00002600-\U000026FF" # Miscellaneous Symbols + "\U0000FE00-\U0000FE0F" # Variation Selectors + "\U0000200D" # Zero Width Joiner + "\U0000200C" # Zero Width Non-Joiner "]+", flags=re.UNICODE ) @@ -212,8 +221,9 @@ def generate_engagement_tweet(author): persona = author["persona"] persona_config = PERSONA_CONFIGS.get(persona, PERSONA_CONFIGS["Visionary Editor"]) - # Lookup background using a dictionary - username_cleaned = username.strip().lower() + # Normalize and lookup background + username_cleaned = unicodedata.normalize('NFC', username.strip().lower()) + logging.debug(f"Looking up background for username: raw='{username}', cleaned='{username_cleaned}'") background = AUTHOR_BACKGROUNDS.get(username_cleaned, {}) # Debug comparison @@ -222,7 +232,7 @@ def generate_engagement_tweet(author): logging.debug(f"Direct key check: '{username_cleaned}' found in AUTHOR_BACKGROUNDS keys") else: logging.debug(f"Direct key check: '{username_cleaned}' NOT found in AUTHOR_BACKGROUNDS keys") - # Log byte-level comparison for the first available username as a sample + # Byte-level comparison for the first available username if available_usernames: sample_key = available_usernames[0] logging.debug( @@ -231,6 +241,25 @@ def generate_engagement_tweet(author): f"sample background key bytes = {list(sample_key.encode('utf-8'))}" ) + # Fallback lookup if dictionary fails + if not background: + logging.debug(f"Dictionary lookup failed for '{username_cleaned}', attempting fallback lookup") + for bg in AUTHOR_BACKGROUNDS_LIST: + bg_username = bg.get("username", "") + if not isinstance(bg_username, str): + logging.warning(f"Skipping background entry with non-string username: {bg_username} (type: {type(bg_username)})") + continue + bg_username_cleaned = unicodedata.normalize('NFC', bg_username.strip().lower()) + logging.debug( + f"Fallback comparison: " + f"author username (cleaned) = '{username_cleaned}', " + f"background username (cleaned) = '{bg_username_cleaned}'" + ) + if bg_username_cleaned == username_cleaned: + background = bg + logging.debug(f"Fallback lookup succeeded for '{username_cleaned}'") + break + if not background or "engagement_themes" not in background: logging.warning( f"No background or engagement themes found for {username}. " @@ -249,6 +278,7 @@ def generate_engagement_tweet(author): prompt = ( f"{base_prompt}\n\n" f"Generate an engagement tweet for {author_handle} asking a question about {theme} to engage the public. " + f"The current year is {CURRENT_YEAR}, and all references to the year should use {CURRENT_YEAR}. " f"Keep it under 230 characters to ensure room for the URL. " f"Use {persona_config['tone']}. " f"Include a call to action to follow {author_handle} or like the tweet, followed by the URL {URL} (do not mention InsiderFoodie.com separately in the text). "