update
This commit is contained in:
+16
-4
@@ -116,8 +116,9 @@ def create_http_session() -> requests.Session:
|
||||
return session
|
||||
|
||||
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
|
||||
logging.debug(f"Fetching feed: {feed_url}")
|
||||
try:
|
||||
response = session.get(feed_url, timeout=FEED_TIMEOUT)
|
||||
response = session.get(feed_url, timeout=15) # Reduced timeout to 15 seconds
|
||||
response.raise_for_status()
|
||||
feed = feedparser.parse(response.content)
|
||||
|
||||
@@ -125,6 +126,7 @@ def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.
|
||||
logging.warning(f"Feed parsing error for {feed_url}: {feed.bozo_exception}")
|
||||
return None
|
||||
|
||||
logging.debug(f"Successfully fetched feed: {feed_url}")
|
||||
return feed
|
||||
except Exception as e:
|
||||
logging.error(f"Error fetching feed {feed_url}: {str(e)}")
|
||||
@@ -159,38 +161,46 @@ def is_interesting_rss(title: str, summary: str, pub_date: datetime) -> bool:
|
||||
return False
|
||||
|
||||
def fetch_rss_feeds() -> List[Dict[str, Any]]:
|
||||
logging.info("Starting fetch_rss_feeds")
|
||||
session = create_http_session()
|
||||
articles = []
|
||||
|
||||
try:
|
||||
logging.info(f"Processing {len(RSS_FEEDS)} feeds: {RSS_FEEDS}")
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
futures = []
|
||||
for feed_url in RSS_FEEDS:
|
||||
logging.debug(f"Scheduling feed: {feed_url}")
|
||||
future = executor.submit(process_feed, feed_url, session)
|
||||
futures.append(future)
|
||||
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
feed_articles = future.result()
|
||||
logging.info(f"Completed feed processing, got {len(feed_articles)} articles")
|
||||
articles.extend(feed_articles)
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing feed: {str(e)}")
|
||||
logging.error(f"Error processing feed in future: {str(e)}")
|
||||
continue
|
||||
|
||||
logging.info(f"Finished fetch_rss_feeds, total articles: {len(articles)}")
|
||||
return articles
|
||||
except Exception as e:
|
||||
logging.error(f"Error in fetch_rss_feeds: {str(e)}")
|
||||
return []
|
||||
|
||||
def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any]]:
|
||||
logging.info(f"Processing feed: {feed_url}")
|
||||
try:
|
||||
feed = fetch_feed(feed_url, session)
|
||||
if not feed:
|
||||
logging.warning(f"No feed data for {feed_url}")
|
||||
return []
|
||||
|
||||
articles = []
|
||||
logging.debug(f"Feed entries count: {len(feed.entries)}")
|
||||
for entry in feed.entries:
|
||||
try:
|
||||
logging.debug(f"Processing entry: {entry.get('title', 'No title')}")
|
||||
pub_date = datetime.fromtimestamp(time.mktime(entry.published_parsed), tz=timezone.utc)
|
||||
|
||||
# Safely extract content
|
||||
@@ -216,13 +226,15 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
|
||||
}
|
||||
|
||||
if is_interesting_rss(article["title"], article["summary"], pub_date):
|
||||
logging.info(f"Interesting article found: {article['title']}")
|
||||
articles.append(article)
|
||||
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
except Exception as e:
|
||||
logging.warning(f"Error processing entry: {str(e)}")
|
||||
logging.warning(f"Error processing entry in {feed_url}: {str(e)}")
|
||||
continue
|
||||
|
||||
logging.info(f"Finished processing {feed_url}, found {len(articles)} articles")
|
||||
return articles
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing feed {feed_url}: {str(e)}")
|
||||
|
||||
Reference in New Issue
Block a user