|
|
@@ -3,7 +3,7 @@ from typing import Optional, ClassVar, NewType
|
|
|
from datetime import date, datetime, timedelta
|
|
|
import cattrs
|
|
|
|
|
|
-from de_quoi_parle_le_monde.http import HttpSession
|
|
|
+from de_quoi_parle_le_monde.http import HttpClient
|
|
|
|
|
|
Timestamp = NewType("Timestamp", datetime)
|
|
|
datetime_format = "%Y%m%d%H%M%S"
|
|
|
@@ -96,7 +96,7 @@ class InternetArchiveSnapshot:
|
|
|
@frozen
|
|
|
class InternetArchiveClient:
|
|
|
# https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server
|
|
|
- session: HttpSession
|
|
|
+ client: HttpClient
|
|
|
search_url: ClassVar[str] = "http://web.archive.org/cdx/search/cdx"
|
|
|
|
|
|
async def search_snapshots(
|
|
|
@@ -106,12 +106,12 @@ class InternetArchiveClient:
|
|
|
record = CdxRecord.parse_line(line)
|
|
|
return InternetArchiveSnapshotId.from_record(record)
|
|
|
|
|
|
- resp = await self.session.get(self.search_url, req.into_params())
|
|
|
+ resp = await self._get(self.search_url, req.into_params())
|
|
|
|
|
|
return [to_snapshot_id(line) for line in resp.splitlines()]
|
|
|
|
|
|
async def fetch(self, id_: InternetArchiveSnapshotId) -> str:
|
|
|
- resp = await self.session.get(id_.url)
|
|
|
+ resp = await self._get(id_.url)
|
|
|
return InternetArchiveSnapshot(id_, resp)
|
|
|
|
|
|
async def get_snapshot_id_closest_to(self, url, dt):
|
|
|
@@ -132,3 +132,7 @@ class InternetArchiveClient:
|
|
|
return min(all_snaps, key=lambda s: abs(s.timestamp - dt))
|
|
|
else:
|
|
|
raise SnapshotNotYetAvailable(dt)
|
|
|
+
|
|
|
+ async def _get(self, url, params=None):
|
|
|
+ async with self.client.session() as session:
|
|
|
+ return await session.get(url, params)
|