my-fix-branch
Shane 7 months ago
parent 3fc1b40368
commit 21e0c8f41a
  1. 20
      foodie_automator_rss.py

@ -116,8 +116,9 @@ def create_http_session() -> requests.Session:
return session return session
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]: def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
logging.debug(f"Fetching feed: {feed_url}")
try: try:
response = session.get(feed_url, timeout=FEED_TIMEOUT) response = session.get(feed_url, timeout=15) # Reduced timeout to 15 seconds
response.raise_for_status() response.raise_for_status()
feed = feedparser.parse(response.content) feed = feedparser.parse(response.content)
@ -125,6 +126,7 @@ def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.
logging.warning(f"Feed parsing error for {feed_url}: {feed.bozo_exception}") logging.warning(f"Feed parsing error for {feed_url}: {feed.bozo_exception}")
return None return None
logging.debug(f"Successfully fetched feed: {feed_url}")
return feed return feed
except Exception as e: except Exception as e:
logging.error(f"Error fetching feed {feed_url}: {str(e)}") logging.error(f"Error fetching feed {feed_url}: {str(e)}")
@ -159,38 +161,46 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
return False return False
def fetch_rss_feeds() -> List[Dict[str, Any]]: def fetch_rss_feeds() -> List[Dict[str, Any]]:
logging.info("Starting fetch_rss_feeds")
session = create_http_session() session = create_http_session()
articles = [] articles = []
try: try:
logging.info(f"Processing {len(RSS_FEEDS)} feeds: {RSS_FEEDS}")
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = [] futures = []
for feed_url in RSS_FEEDS: for feed_url in RSS_FEEDS:
logging.debug(f"Scheduling feed: {feed_url}")
future = executor.submit(process_feed, feed_url, session) future = executor.submit(process_feed, feed_url, session)
futures.append(future) futures.append(future)
for future in as_completed(futures): for future in as_completed(futures):
try: try:
feed_articles = future.result() feed_articles = future.result()
logging.info(f"Completed feed processing, got {len(feed_articles)} articles")
articles.extend(feed_articles) articles.extend(feed_articles)
except Exception as e: except Exception as e:
logging.error(f"Error processing feed: {str(e)}") logging.error(f"Error processing feed in future: {str(e)}")
continue continue
logging.info(f"Finished fetch_rss_feeds, total articles: {len(articles)}")
return articles return articles
except Exception as e: except Exception as e:
logging.error(f"Error in fetch_rss_feeds: {str(e)}") logging.error(f"Error in fetch_rss_feeds: {str(e)}")
return [] return []
def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]: def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]:
logging.info(f"Processing feed: {feed_url}")
try: try:
feed = fetch_feed(feed_url, session) feed = fetch_feed(feed_url, session)
if not feed: if not feed:
logging.warning(f"No feed data for {feed_url}")
return [] return []
articles = [] articles = []
logging.debug(f"Feed entries count: {len(feed.entries)}")
for entry in feed.entries: for entry in feed.entries:
try: try:
logging.debug(f"Processing entry: {entry.get('title', 'No title')}")
pub_date = datetime.fromtimestamp(time.mktime(entry.published_parsed), tz=timezone.utc) pub_date = datetime.fromtimestamp(time.mktime(entry.published_parsed), tz=timezone.utc)
# Safely extract content # Safely extract content
@ -216,13 +226,15 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
} }
if is_interesting_rss(article["title"], article["summary"], pub_date): if is_interesting_rss(article["title"], article["summary"], pub_date):
logging.info(f"Interesting article found: {article['title']}")
articles.append(article) articles.append(article)
time.sleep(RATE_LIMIT_DELAY) time.sleep(RATE_LIMIT_DELAY)
except Exception as e: except Exception as e:
logging.warning(f"Error processing entry: {str(e)}") logging.warning(f"Error processing entry in {feed_url}: {str(e)}")
continue continue
logging.info(f"Finished processing {feed_url}, found {len(articles)} articles")
return articles return articles
except Exception as e: except Exception as e:
logging.error(f"Error processing feed {feed_url}: {str(e)}") logging.error(f"Error processing feed {feed_url}: {str(e)}")

Loading…
Cancel
Save