jherve 1 год назад
Родитель
Сommit
fd8ca459c6

+ 8 - 4
src/de_quoi_parle_le_monde/internet_archive.py

@@ -3,7 +3,7 @@ from typing import Optional, ClassVar, NewType
 from datetime import date, datetime, timedelta
 import cattrs
 
-from de_quoi_parle_le_monde.http import HttpSession
+from de_quoi_parle_le_monde.http import HttpClient
 
 Timestamp = NewType("Timestamp", datetime)
 datetime_format = "%Y%m%d%H%M%S"
@@ -96,7 +96,7 @@ class InternetArchiveSnapshot:
 @frozen
 class InternetArchiveClient:
     # https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server
-    session: HttpSession
+    client: HttpClient
     search_url: ClassVar[str] = "http://web.archive.org/cdx/search/cdx"
 
     async def search_snapshots(
@@ -106,12 +106,12 @@ class InternetArchiveClient:
             record = CdxRecord.parse_line(line)
             return InternetArchiveSnapshotId.from_record(record)
 
-        resp = await self.session.get(self.search_url, req.into_params())
+        resp = await self._get(self.search_url, req.into_params())
 
         return [to_snapshot_id(line) for line in resp.splitlines()]
 
     async def fetch(self, id_: InternetArchiveSnapshotId) -> str:
-        resp = await self.session.get(id_.url)
+        resp = await self._get(id_.url)
         return InternetArchiveSnapshot(id_, resp)
 
     async def get_snapshot_id_closest_to(self, url, dt):
@@ -132,3 +132,7 @@ class InternetArchiveClient:
             return min(all_snaps, key=lambda s: abs(s.timestamp - dt))
         else:
             raise SnapshotNotYetAvailable(dt)
+
+    async def _get(self, url, params=None):
+        async with self.client.session() as session:
+            return await session.get(url, params)

+ 3 - 4
src/de_quoi_parle_le_monde/snapshots.py

@@ -121,10 +121,9 @@ async def main():
     logger.info("Starting snapshot service..")
     jobs = SnapshotJob.create(10, [8, 12, 18, 22])
 
-    async with http_client.session() as session:
-        ia = InternetArchiveClient(session)
-        worker = SnapshotWorker(storage, ia)
-        await asyncio.gather(*[worker.run(job) for job in jobs])
+    ia = InternetArchiveClient(http_client)
+    worker = SnapshotWorker(storage, ia)
+    await asyncio.gather(*[worker.run(job) for job in jobs])
     logger.info("Snapshot service exiting")