fix
This commit is contained in:
+32
-13
@@ -42,23 +42,21 @@ LOG_PRUNE_DAYS = 30
|
|||||||
FEED_TIMEOUT = 15
|
FEED_TIMEOUT = 15
|
||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
RETRY_BACKOFF = 2
|
RETRY_BACKOFF = 2
|
||||||
IMAGE_UPLOAD_TIMEOUT = 30 # Added to match foodie_utils.py
|
IMAGE_UPLOAD_TIMEOUT = 30
|
||||||
|
|
||||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
|
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
|
||||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||||
EXPIRATION_HOURS = 24
|
EXPIRATION_HOURS = 24
|
||||||
IMAGE_EXPIRATION_DAYS = 7
|
IMAGE_EXPIRATION_DAYS = 7
|
||||||
|
|
||||||
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
|
||||||
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
|
||||||
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
"""Initialize logging with pruning of old logs."""
|
"""Initialize logging with pruning of old logs."""
|
||||||
try:
|
try:
|
||||||
|
logging.debug("Attempting to set up logging")
|
||||||
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
||||||
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
|
if not os.access(os.path.dirname(LOG_FILE), os.W_OK):
|
||||||
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
|
raise PermissionError(f"No write permission for {os.path.dirname(LOG_FILE)}")
|
||||||
|
logging.debug(f"Log directory created/verified: {os.path.dirname(LOG_FILE)}")
|
||||||
if os.path.exists(LOG_FILE):
|
if os.path.exists(LOG_FILE):
|
||||||
with open(LOG_FILE, 'r') as f:
|
with open(LOG_FILE, 'r') as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
@@ -80,10 +78,11 @@ def setup_logging():
|
|||||||
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
|
logging.info(f"Skipped {malformed_count} malformed log lines during pruning")
|
||||||
with open(LOG_FILE, 'w') as f:
|
with open(LOG_FILE, 'w') as f:
|
||||||
f.writelines(pruned_lines)
|
f.writelines(pruned_lines)
|
||||||
|
logging.debug(f"Log file pruned: {LOG_FILE}")
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename=LOG_FILE,
|
filename=LOG_FILE,
|
||||||
level=logging.INFO,
|
level=logging.DEBUG, # Changed to DEBUG for troubleshooting
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
datefmt="%Y-%m-%d %H:%M:%S"
|
datefmt="%Y-%m-%d %H:%M:%S"
|
||||||
)
|
)
|
||||||
@@ -98,12 +97,14 @@ def setup_logging():
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def acquire_lock():
|
def acquire_lock():
|
||||||
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
|
||||||
lock_fd = open(LOCK_FILE, 'w')
|
|
||||||
try:
|
try:
|
||||||
|
logging.debug("Attempting to acquire lock")
|
||||||
|
os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
|
||||||
|
lock_fd = open(LOCK_FILE, 'w')
|
||||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
lock_fd.write(str(os.getpid()))
|
lock_fd.write(str(os.getpid()))
|
||||||
lock_fd.flush()
|
lock_fd.flush()
|
||||||
|
logging.debug(f"Lock acquired: {LOCK_FILE}")
|
||||||
return lock_fd
|
return lock_fd
|
||||||
except IOError:
|
except IOError:
|
||||||
logging.info("Another instance of foodie_automator_rss.py is running")
|
logging.info("Another instance of foodie_automator_rss.py is running")
|
||||||
@@ -120,6 +121,11 @@ def signal_handler(sig, frame):
|
|||||||
signal.signal(signal.SIGTERM, signal_handler)
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
|
# Initialize posted_titles after logging setup
|
||||||
|
posted_titles_data = []
|
||||||
|
posted_titles = set()
|
||||||
|
used_images = set()
|
||||||
|
|
||||||
def create_http_session() -> requests.Session:
|
def create_http_session() -> requests.Session:
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
retry_strategy = Retry(
|
retry_strategy = Retry(
|
||||||
@@ -237,10 +243,16 @@ def fetch_duckduckgo_news_context(title, hours=24):
|
|||||||
|
|
||||||
def curate_from_rss():
|
def curate_from_rss():
|
||||||
try:
|
try:
|
||||||
|
global posted_titles_data, posted_titles, used_images
|
||||||
|
posted_titles_data = load_json_file(POSTED_TITLES_FILE, EXPIRATION_HOURS)
|
||||||
|
posted_titles = set(entry["title"] for entry in posted_titles_data)
|
||||||
|
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
||||||
|
logging.debug(f"Loaded {len(posted_titles)} posted titles and {len(used_images)} used images")
|
||||||
|
|
||||||
articles = fetch_rss_feeds()
|
articles = fetch_rss_feeds()
|
||||||
if not articles:
|
if not articles:
|
||||||
logging.info("No RSS articles available")
|
logging.info("No RSS articles available")
|
||||||
return None, None, False # Continue running
|
return None, None, False
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
max_attempts = 10
|
max_attempts = 10
|
||||||
@@ -259,7 +271,13 @@ def curate_from_rss():
|
|||||||
|
|
||||||
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
logging.info(f"Trying RSS Article: {title} from {source_name}")
|
||||||
|
|
||||||
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
try:
|
||||||
|
image_query, relevance_keywords, main_topic, skip = smart_image_and_filter(title, summary)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to process smart_image_and_filter for '{title}': {e}")
|
||||||
|
attempts += 1
|
||||||
|
continue
|
||||||
|
|
||||||
if skip:
|
if skip:
|
||||||
logging.info(f"Skipping filtered RSS article: {title}")
|
logging.info(f"Skipping filtered RSS article: {title}")
|
||||||
attempts += 1
|
attempts += 1
|
||||||
@@ -382,21 +400,22 @@ def curate_from_rss():
|
|||||||
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
logging.info(f"Saved image '{image_url}' to {USED_IMAGES_FILE}")
|
||||||
|
|
||||||
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
logging.info(f"***** SUCCESS: Posted '{post_data['title']}' (ID: {post_id}) from RSS *****")
|
||||||
return post_data, category, True # Run again immediately
|
return post_data, category, True
|
||||||
attempts += 1
|
attempts += 1
|
||||||
logging.info(f"WP posting failed for '{post_data['title']}'")
|
logging.info(f"WP posting failed for '{post_data['title']}'")
|
||||||
|
|
||||||
logging.info("No interesting RSS article found after attempts")
|
logging.info("No interesting RSS article found after attempts")
|
||||||
return None, None, False # Wait before running again
|
return None, None, False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
|
logging.error(f"Unexpected error in curate_from_rss: {e}", exc_info=True)
|
||||||
|
print(f"Error in curate_from_rss: {e}")
|
||||||
return None, None, False
|
return None, None, False
|
||||||
|
|
||||||
def run_rss_automator():
|
def run_rss_automator():
|
||||||
lock_fd = None
|
lock_fd = None
|
||||||
try:
|
try:
|
||||||
lock_fd = acquire_lock()
|
|
||||||
setup_logging()
|
setup_logging()
|
||||||
|
lock_fd = acquire_lock()
|
||||||
logging.info("***** RSS Automator Launched *****")
|
logging.info("***** RSS Automator Launched *****")
|
||||||
post_data, category, should_continue = curate_from_rss()
|
post_data, category, should_continue = curate_from_rss()
|
||||||
if not post_data:
|
if not post_data:
|
||||||
|
|||||||
+9
-10
@@ -370,11 +370,11 @@ def smart_image_and_filter(title, summary):
|
|||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
"Analyze this article title and summary. Extract key entities (brands, locations, cuisines, or topics) "
|
||||||
"for an image search about food industry trends or viral content. Prioritize specific terms if present, "
|
"for an image search about food industry trends or viral content. Prioritize specific multi-word terms if present, "
|
||||||
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
"otherwise focus on the main theme. Also identify the main topic of the article (e.g., a specific food item or cuisine). "
|
||||||
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
"Return 'SKIP' if the article is about home appliances, recipes, promotions, or contains 'homemade', else 'KEEP'. "
|
||||||
"Return as JSON with double quotes for all property names and string values (e.g., "
|
"Return as JSON with double quotes for all property names and string values (e.g., "
|
||||||
"{\"image_query\": \"specific term\", \"relevance\": [\"keyword1\", \"keyword2\"], \"main_topic\": \"main food item\", \"action\": \"KEEP\" or \"SKIP\"})."
|
"{\"image_query\": \"fast food trends\", \"relevance\": [\"fast food\", \"dining\", \"culture\"], \"main_topic\": \"fast food\", \"action\": \"KEEP\"})."
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
@@ -386,7 +386,7 @@ def smart_image_and_filter(title, summary):
|
|||||||
max_tokens=100
|
max_tokens=100
|
||||||
)
|
)
|
||||||
raw_result = response.choices[0].message.content.strip()
|
raw_result = response.choices[0].message.content.strip()
|
||||||
logging.info(f"Raw GPT smart image/filter response: '{raw_result}'")
|
logging.debug(f"Raw GPT smart image/filter response: '{raw_result}'")
|
||||||
|
|
||||||
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
cleaned_result = re.sub(r'```json\s*|\s*```', '', raw_result).strip()
|
||||||
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
fixed_result = re.sub(r"(?<!\\)'(?=\s*[\w\s]*\])|(?<=\[|\{|\s)'|'(?=\s*[\]\},:])|(?<=\w)'(?=\s*:)", '"', cleaned_result)
|
||||||
@@ -395,14 +395,13 @@ def smart_image_and_filter(title, summary):
|
|||||||
result = json.loads(fixed_result)
|
result = json.loads(fixed_result)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
logging.warning(f"JSON parsing failed: {e}, raw: '{fixed_result}'. Using fallback.")
|
||||||
# Fallback: Extract main topic using simple keyword matching
|
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
return main_topic, [main_topic, "food"], False
|
return main_topic, [main_topic, "food"], main_topic, False
|
||||||
|
|
||||||
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
if not isinstance(result, dict) or "image_query" not in result or "relevance" not in result or "action" not in result:
|
||||||
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
logging.warning(f"Invalid GPT response format: {result}, using fallback")
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
return main_topic, [main_topic, "food"], False
|
return main_topic, [main_topic, "food"], main_topic, False
|
||||||
|
|
||||||
image_query = result["image_query"]
|
image_query = result["image_query"]
|
||||||
relevance_keywords = result["relevance"]
|
relevance_keywords = result["relevance"]
|
||||||
@@ -411,20 +410,20 @@ def smart_image_and_filter(title, summary):
|
|||||||
|
|
||||||
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
|
logging.info(f"Smart image query: {image_query}, Relevance: {relevance_keywords}, Main Topic: {main_topic}, Skip: {skip_flag}")
|
||||||
|
|
||||||
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger"]
|
specific_single_words = ["kimchi", "sushi", "pizza", "taco", "burger", "chipotle", "starbucks", "mcdonalds"]
|
||||||
if not image_query:
|
if not image_query:
|
||||||
logging.warning(f"Image query is empty, using fallback")
|
logging.warning(f"Image query is empty, using fallback")
|
||||||
return main_topic, [main_topic, "food"], skip_flag
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
if len(image_query.split()) < 2 and image_query.lower() not in specific_single_words:
|
||||||
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
logging.warning(f"Image query '{image_query}' too vague, using fallback")
|
||||||
return main_topic, [main_topic, "food"], skip_flag
|
return main_topic, [main_topic, "food"], main_topic, skip_flag
|
||||||
|
|
||||||
return image_query, relevance_keywords, main_topic, skip_flag
|
return image_query, relevance_keywords, main_topic, skip_flag
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
logging.error(f"Smart image/filter failed: {e}, using fallback")
|
||||||
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
main_topic = extract_main_topic(title.lower() + " " + summary.lower())
|
||||||
return main_topic, [main_topic, "food"], False
|
return main_topic, [main_topic, "food"], main_topic, False
|
||||||
|
|
||||||
def extract_main_topic(text):
|
def extract_main_topic(text):
|
||||||
# Common food-related keywords (expand as needed)
|
# Common food-related keywords (expand as needed)
|
||||||
|
|||||||
Reference in New Issue
Block a user