merge posting x into main files

This commit is contained in:
2025-04-28 21:23:12 +10:00
parent a1d2ce4215
commit ea7d36a22b
7 changed files with 394 additions and 446 deletions
+25 -25
View File
@@ -1,3 +1,4 @@
# foodie_automator_rss.py
import requests
import random
import time
@@ -13,12 +14,17 @@ from openai import OpenAI
from urllib.parse import quote
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from foodie_config import RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, CATEGORIES, get_clean_source_name
from foodie_config import (
RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS,
HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES,
get_clean_source_name, X_API_CREDENTIALS
)
from foodie_utils import (
load_json_file, save_json_file, get_image, generate_image_query,
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, is_interesting,
generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
prepare_post_data, select_best_author, smart_image_and_filter
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
generate_category_from_summary, post_to_wp, prepare_post_data,
select_best_author, smart_image_and_filter
)
from foodie_hooks import get_dynamic_hook, select_best_cta
import feedparser
@@ -27,6 +33,7 @@ from typing import List, Dict, Any, Optional
from dotenv import load_dotenv
load_dotenv()
# Flag to indicate if we're in the middle of posting
is_posting = False
@@ -43,10 +50,10 @@ signal.signal(signal.SIGINT, signal_handler)
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
LOG_PRUNE_DAYS = 30
MAX_WORKERS = 5 # Number of concurrent workers for parallel processing
RATE_LIMIT_DELAY = 1 # Delay between API calls in seconds
FEED_TIMEOUT = 30 # Timeout for feed requests in seconds
MAX_RETRIES = 3 # Maximum number of retries for failed requests
MAX_WORKERS = 5
RATE_LIMIT_DELAY = 1
FEED_TIMEOUT = 30
MAX_RETRIES = 3
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
@@ -58,7 +65,6 @@ posted_titles = set(entry["title"] for entry in posted_titles_data)
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
def setup_logging():
"""Configure logging with rotation and cleanup."""
if os.path.exists(LOG_FILE):
with open(LOG_FILE, 'r') as f:
lines = f.readlines()
@@ -81,9 +87,14 @@ def setup_logging():
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(console_handler)
logging.info("Logging initialized for foodie_automator_rss.py")
setup_logging()
def create_http_session() -> requests.Session:
"""Create and configure an HTTP session with retry logic."""
session = requests.Session()
retry_strategy = Retry(
total=MAX_RETRIES,
@@ -101,7 +112,6 @@ def create_http_session() -> requests.Session:
return session
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
"""Fetch and parse an RSS feed with error handling and retries."""
try:
response = session.get(feed_url, timeout=FEED_TIMEOUT)
response.raise_for_status()
@@ -117,20 +127,14 @@ def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.
return None
def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
"""Enhanced content filtering with improved scoring."""
try:
# Basic validation
if not title or not summary:
return False
# Check if content is too old
if datetime.now(timezone.utc) - pub_date > timedelta(days=7):
return False
# Calculate interest score
score = 0
# Title analysis
title_lower = title.lower()
if any(keyword in title_lower for keyword in RECIPE_KEYWORDS):
score += 3
@@ -139,7 +143,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
if any(keyword in title_lower for keyword in HOME_KEYWORDS):
score += 1
# Content analysis
summary_lower = summary.lower()
if len(summary.split()) < 100:
score -= 2
@@ -152,7 +155,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
return False
def fetch_rss_feeds() -> List[Dict[str, Any]]:
"""Fetch RSS feeds with parallel processing and improved error handling."""
session = create_http_session()
articles = []
@@ -177,7 +179,6 @@ def fetch_rss_feeds() -> List[Dict[str, Any]]:
return []
def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]:
"""Process a single RSS feed and extract articles."""
try:
feed = fetch_feed(feed_url, session)
if not feed:
@@ -192,7 +193,8 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
"title": entry.title,
"link": entry.link,
"summary": entry.summary if hasattr(entry, 'summary') else entry.description,
"feed_title": get_clean_source_name(feed.feed.title),
"content": getattr(entry, 'content', [{'value': ''}])[0].value,
"feed_title": get_clean_source_name(feed_url),
"pub_date": pub_date
}
@@ -229,13 +231,12 @@ def curate_from_rss():
attempts = 0
max_attempts = 10
while attempts < max_attempts and articles:
article = articles.pop(0) # Take newest article
article = articles.pop(0)
title = article["title"]
link = article["link"]
summary = article["summary"]
content = article["content"]
feed_url = article["feed_title"]
source_name = feed_url[0] if isinstance(feed_url, tuple) and len(feed_url) > 0 else feed_url
source_name = article["feed_title"]
original_source = f'<a href="{link}">{source_name}</a>'
if title in posted_titles:
@@ -254,7 +255,6 @@ def curate_from_rss():
attempts += 1
continue
# Score using title, summary, and content
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
interest_score = is_interesting(scoring_content)
logging.info(f"Interest score for '{title}': {interest_score}")