Replace the create_http_session
This commit is contained in:
+21
-4
@@ -98,8 +98,8 @@ def create_http_session() -> requests.Session:
|
|||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
retry_strategy = Retry(
|
retry_strategy = Retry(
|
||||||
total=MAX_RETRIES,
|
total=MAX_RETRIES,
|
||||||
backoff_factor=1,
|
backoff_factor=2, # Increased backoff factor for better retry handling
|
||||||
status_forcelist=[429, 500, 502, 503, 504],
|
status_forcelist=[429, 500, 502, 503, 504, 403], # Added 403 to retry list
|
||||||
allowed_methods=["GET", "POST"]
|
allowed_methods=["GET", "POST"]
|
||||||
)
|
)
|
||||||
adapter = HTTPAdapter(
|
adapter = HTTPAdapter(
|
||||||
@@ -109,6 +109,10 @@ def create_http_session() -> requests.Session:
|
|||||||
)
|
)
|
||||||
session.mount("http://", adapter)
|
session.mount("http://", adapter)
|
||||||
session.mount("https://", adapter)
|
session.mount("https://", adapter)
|
||||||
|
# Add a realistic User-Agent header
|
||||||
|
session.headers.update({
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
|
||||||
|
})
|
||||||
return session
|
return session
|
||||||
|
|
||||||
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
|
def fetch_feed(feed_url: str, session: requests.Session) -> Optional[feedparser.FeedParserDict]:
|
||||||
@@ -189,11 +193,24 @@ def process_feed(feed_url: str, session: requests.Session) -> List[Dict[str, Any
|
|||||||
try:
|
try:
|
||||||
pub_date = datetime.fromtimestamp(time.mktime(entry.published_parsed), tz=timezone.utc)
|
pub_date = datetime.fromtimestamp(time.mktime(entry.published_parsed), tz=timezone.utc)
|
||||||
|
|
||||||
|
# Safely extract content
|
||||||
|
content = ""
|
||||||
|
if hasattr(entry, 'content') and isinstance(entry.content, list) and len(entry.content) > 0:
|
||||||
|
content_item = entry.content[0]
|
||||||
|
if isinstance(content_item, dict) and 'value' in content_item:
|
||||||
|
content = content_item['value']
|
||||||
|
elif hasattr(content_item, 'value'):
|
||||||
|
content = content_item.value
|
||||||
|
elif hasattr(entry, 'description'):
|
||||||
|
content = entry.description
|
||||||
|
elif hasattr(entry, 'summary'):
|
||||||
|
content = entry.summary
|
||||||
|
|
||||||
article = {
|
article = {
|
||||||
"title": entry.title,
|
"title": entry.title,
|
||||||
"link": entry.link,
|
"link": entry.link,
|
||||||
"summary": entry.summary if hasattr(entry, 'summary') else entry.description,
|
"summary": entry.summary if hasattr(entry, 'summary') else entry.description if hasattr(entry, 'description') else "",
|
||||||
"content": getattr(entry, 'content', [{'value': ''}])[0].value,
|
"content": content,
|
||||||
"feed_title": get_clean_source_name(feed_url),
|
"feed_title": get_clean_source_name(feed_url),
|
||||||
"pub_date": pub_date
|
"pub_date": pub_date
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user