Replace the create_http_session
This commit is contained in:
+21
-4
@@ -98,8 +98,8 @@ def create_http_session() -> requests.Session:
|
||||
session = requests.Session()
|
||||
retry_strategy = Retry(
|
||||
total=MAX_RETRIES,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
backoff_factor=2, # Increased backoff factor for better retry handling
|
||||
status_forcelist=[429, 500, 502, 503, 504, 403], # Added 403 to retry list
|
||||
allowed_methods=["GET", "POST"]
|
||||
)
|
||||
adapter = HTTPAdapter(
|
||||
@@ -109,6 +109,10 @@ def create_http_session() -> requests.Session:
|
||||
)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
# Add a realistic User-Agent header
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
|
||||
})
|
||||
return session
|
||||
|
||||
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
|
||||
@@ -189,11 +193,24 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
|
||||
try:
|
||||
pub_date = datetime.fromtimestamp(time.mktime(entry.published_parsed), tz=timezone.utc)
|
||||
|
||||
# Safely extract content
|
||||
content = ""
|
||||
if hasattr(entry, 'content') and isinstance(entry.content, list) and len(entry.content) > 0:
|
||||
content_item = entry.content[0]
|
||||
if isinstance(content_item, dict) and 'value' in content_item:
|
||||
content = content_item['value']
|
||||
elif hasattr(content_item, 'value'):
|
||||
content = content_item.value
|
||||
elif hasattr(entry, 'description'):
|
||||
content = entry.description
|
||||
elif hasattr(entry, 'summary'):
|
||||
content = entry.summary
|
||||
|
||||
article = {
|
||||
"title": entry.title,
|
||||
"link": entry.link,
|
||||
"summary": entry.summary if hasattr(entry, 'summary') else entry.description,
|
||||
"content": getattr(entry, 'content', [{'value': ''}])[0].value,
|
||||
"summary": entry.summary if hasattr(entry, 'summary') else entry.description if hasattr(entry, 'description') else "",
|
||||
"content": content,
|
||||
"feed_title": get_clean_source_name(feed_url),
|
||||
"pub_date": pub_date
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user