|
|
@@ -70,7 +70,7 @@ class CdxRequest:
|
|
|
|
|
|
|
|
|
@frozen
|
|
|
-class InternetArchiveRemoteSnapshot:
|
|
|
+class InternetArchiveSnapshotId:
|
|
|
timestamp: Timestamp
|
|
|
original: str
|
|
|
|
|
|
@@ -80,9 +80,7 @@ class InternetArchiveRemoteSnapshot:
|
|
|
|
|
|
@staticmethod
|
|
|
def from_record(rec: CdxRecord):
|
|
|
- return InternetArchiveRemoteSnapshot(
|
|
|
- timestamp=rec.timestamp, original=rec.original
|
|
|
- )
|
|
|
+ return InternetArchiveSnapshotId(timestamp=rec.timestamp, original=rec.original)
|
|
|
|
|
|
|
|
|
@frozen
|
|
|
@@ -91,22 +89,22 @@ class InternetArchiveClient:
|
|
|
session: HttpSession
|
|
|
search_url: ClassVar[str] = "http://web.archive.org/cdx/search/cdx"
|
|
|
|
|
|
- async def search_remote_snapshots(
|
|
|
+ async def search_snapshots(
|
|
|
self, req: CdxRequest
|
|
|
- ) -> list[InternetArchiveRemoteSnapshot]:
|
|
|
- def to_remote_snapshot(line):
|
|
|
+ ) -> list[InternetArchiveSnapshotId]:
|
|
|
+ def to_snapshot_id(line):
|
|
|
record = CdxRecord.parse_line(line)
|
|
|
- return InternetArchiveRemoteSnapshot.from_record(record)
|
|
|
+ return InternetArchiveSnapshotId.from_record(record)
|
|
|
|
|
|
resp = await self.session.get(self.search_url, req.into_params())
|
|
|
|
|
|
- return [to_remote_snapshot(line) for line in resp.splitlines()]
|
|
|
+ return [to_snapshot_id(line) for line in resp.splitlines()]
|
|
|
|
|
|
- async def fetch(self, snap: InternetArchiveRemoteSnapshot) -> str:
|
|
|
+ async def fetch(self, snap: InternetArchiveSnapshotId) -> str:
|
|
|
resp = await self.session.get(snap.url)
|
|
|
return resp
|
|
|
|
|
|
- async def get_remote_snapshot_closest_to(self, url, dt):
|
|
|
+ async def get_snapshot_id_closest_to(self, url, dt):
|
|
|
req = CdxRequest(
|
|
|
url=url,
|
|
|
from_=dt - timedelta(hours=6.0),
|
|
|
@@ -119,6 +117,6 @@ class InternetArchiveClient:
|
|
|
filter="statuscode:200",
|
|
|
)
|
|
|
|
|
|
- all_snaps = await self.search_remote_snapshots(req)
|
|
|
+ all_snaps = await self.search_snapshots(req)
|
|
|
closest = min(all_snaps, key=lambda s: abs(s.timestamp - dt))
|
|
|
return closest
|