merge posting x into main files
This commit is contained in:
+25
-25
@@ -1,3 +1,4 @@
|
||||
# foodie_automator_rss.py
|
||||
import requests
|
||||
import random
|
||||
import time
|
||||
@@ -13,12 +14,17 @@ from openai import OpenAI
|
||||
from urllib.parse import quote
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
from requests.adapters import HTTPAdapter
|
||||
from foodie_config import RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS, HOME_KEYWORDS, PRODUCT_KEYWORDS, SUMMARY_PERSONA_PROMPTS, CATEGORIES, get_clean_source_name
|
||||
from foodie_config import (
|
||||
RSS_FEEDS, RSS_FEED_NAMES, AUTHORS, RECIPE_KEYWORDS, PROMO_KEYWORDS,
|
||||
HOME_KEYWORDS, PRODUCT_KEYWORDS, PERSONA_CONFIGS, CATEGORIES,
|
||||
get_clean_source_name, X_API_CREDENTIALS
|
||||
)
|
||||
from foodie_utils import (
|
||||
load_json_file, save_json_file, get_image, generate_image_query,
|
||||
upload_image_to_wp, determine_paragraph_count, insert_link_naturally, is_interesting,
|
||||
generate_title_from_summary, summarize_with_gpt4o, generate_category_from_summary, post_to_wp,
|
||||
prepare_post_data, select_best_author, smart_image_and_filter
|
||||
upload_image_to_wp, determine_paragraph_count, insert_link_naturally,
|
||||
is_interesting, generate_title_from_summary, summarize_with_gpt4o,
|
||||
generate_category_from_summary, post_to_wp, prepare_post_data,
|
||||
select_best_author, smart_image_and_filter
|
||||
)
|
||||
from foodie_hooks import get_dynamic_hook, select_best_cta
|
||||
import feedparser
|
||||
@@ -27,6 +33,7 @@ from typing import List, Dict, Any, Optional
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Flag to indicate if we're in the middle of posting
|
||||
is_posting = False
|
||||
|
||||
@@ -43,10 +50,10 @@ signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
LOG_FILE = "/home/shane/foodie_automator/foodie_automator_rss.log"
|
||||
LOG_PRUNE_DAYS = 30
|
||||
MAX_WORKERS = 5 # Number of concurrent workers for parallel processing
|
||||
RATE_LIMIT_DELAY = 1 # Delay between API calls in seconds
|
||||
FEED_TIMEOUT = 30 # Timeout for feed requests in seconds
|
||||
MAX_RETRIES = 3 # Maximum number of retries for failed requests
|
||||
MAX_WORKERS = 5
|
||||
RATE_LIMIT_DELAY = 1
|
||||
FEED_TIMEOUT = 30
|
||||
MAX_RETRIES = 3
|
||||
|
||||
POSTED_TITLES_FILE = '/home/shane/foodie_automator/posted_rss_titles.json'
|
||||
USED_IMAGES_FILE = '/home/shane/foodie_automator/used_images.json'
|
||||
@@ -58,7 +65,6 @@ posted_titles = set(entry["title"] for entry in posted_titles_data)
|
||||
used_images = set(entry["title"] for entry in load_json_file(USED_IMAGES_FILE, IMAGE_EXPIRATION_DAYS) if "title" in entry)
|
||||
|
||||
def setup_logging():
|
||||
"""Configure logging with rotation and cleanup."""
|
||||
if os.path.exists(LOG_FILE):
|
||||
with open(LOG_FILE, 'r') as f:
|
||||
lines = f.readlines()
|
||||
@@ -81,9 +87,14 @@ def setup_logging():
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
logging.getLogger().addHandler(console_handler)
|
||||
logging.info("Logging initialized for foodie_automator_rss.py")
|
||||
|
||||
setup_logging()
|
||||
|
||||
def create_http_session() -> requests.Session:
|
||||
"""Create and configure an HTTP session with retry logic."""
|
||||
session = requests.Session()
|
||||
retry_strategy = Retry(
|
||||
total=MAX_RETRIES,
|
||||
@@ -101,7 +112,6 @@ def create_http_session() -> requests.Session:
|
||||
return session
|
||||
|
||||
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
|
||||
"""Fetch and parse an RSS feed with error handling and retries."""
|
||||
try:
|
||||
response = session.get(feed_url, timeout=FEED_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
@@ -117,20 +127,14 @@ def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.
|
||||
return None
|
||||
|
||||
def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
|
||||
"""Enhanced content filtering with improved scoring."""
|
||||
try:
|
||||
# Basic validation
|
||||
if not title or not summary:
|
||||
return False
|
||||
|
||||
# Check if content is too old
|
||||
if datetime.now(timezone.utc) - pub_date > timedelta(days=7):
|
||||
return False
|
||||
|
||||
# Calculate interest score
|
||||
score = 0
|
||||
|
||||
# Title analysis
|
||||
title_lower = title.lower()
|
||||
if any(keyword in title_lower for keyword in RECIPE_KEYWORDS):
|
||||
score += 3
|
||||
@@ -139,7 +143,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
|
||||
if any(keyword in title_lower for keyword in HOME_KEYWORDS):
|
||||
score += 1
|
||||
|
||||
# Content analysis
|
||||
summary_lower = summary.lower()
|
||||
if len(summary.split()) < 100:
|
||||
score -= 2
|
||||
@@ -152,7 +155,6 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
|
||||
return False
|
||||
|
||||
def fetch_rss_feeds() -> List[Dict[str, Any]]:
|
||||
"""Fetch RSS feeds with parallel processing and improved error handling."""
|
||||
session = create_http_session()
|
||||
articles = []
|
||||
|
||||
@@ -177,7 +179,6 @@ def fetch_rss_feeds() -> List[Dict[str, Any]]:
|
||||
return []
|
||||
|
||||
def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]:
|
||||
"""Process a single RSS feed and extract articles."""
|
||||
try:
|
||||
feed = fetch_feed(feed_url, session)
|
||||
if not feed:
|
||||
@@ -192,7 +193,8 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
|
||||
"title": entry.title,
|
||||
"link": entry.link,
|
||||
"summary": entry.summary if hasattr(entry, 'summary') else entry.description,
|
||||
"feed_title": get_clean_source_name(feed.feed.title),
|
||||
"content": getattr(entry, 'content', [{'value': ''}])[0].value,
|
||||
"feed_title": get_clean_source_name(feed_url),
|
||||
"pub_date": pub_date
|
||||
}
|
||||
|
||||
@@ -229,13 +231,12 @@ def curate_from_rss():
|
||||
attempts = 0
|
||||
max_attempts = 10
|
||||
while attempts < max_attempts and articles:
|
||||
article = articles.pop(0) # Take newest article
|
||||
article = articles.pop(0)
|
||||
title = article["title"]
|
||||
link = article["link"]
|
||||
summary = article["summary"]
|
||||
content = article["content"]
|
||||
feed_url = article["feed_title"]
|
||||
source_name = feed_url[0] if isinstance(feed_url, tuple) and len(feed_url) > 0 else feed_url
|
||||
source_name = article["feed_title"]
|
||||
original_source = f'<a href="{link}">{source_name}</a>'
|
||||
|
||||
if title in posted_titles:
|
||||
@@ -254,7 +255,6 @@ def curate_from_rss():
|
||||
attempts += 1
|
||||
continue
|
||||
|
||||
# Score using title, summary, and content
|
||||
scoring_content = f"{title}\n\n{summary}\n\nContent: {content}"
|
||||
interest_score = is_interesting(scoring_content)
|
||||
logging.info(f"Interest score for '{title}': {interest_score}")
|
||||
|
||||
Reference in New Issue
Block a user