jherve преди 1 година
родител
ревизия
f4f9e4e0e4
променени са 2 файла, в които са добавени 23 реда и са изтрити 22 реда
  1. 3 1
      src/de_quoi_parle_le_monde/le_monde.py
  2. 20 21
      src/de_quoi_parle_le_monde/main.py

+ 3 - 1
src/de_quoi_parle_le_monde/le_monde.py

@@ -50,7 +50,9 @@ class LeMondeMainPage:
         )
 
     @staticmethod
-    async def from_content(snapshot: InternetArchiveSnapshot, text: str) -> "LeMondeMainPage":
+    async def from_content(
+        snapshot: InternetArchiveSnapshot, text: str
+    ) -> "LeMondeMainPage":
         loop = asyncio.get_event_loop()
         soup = await loop.run_in_executor(None, BeautifulSoup, text, "lxml")
         return LeMondeMainPage(snapshot, soup)

+ 20 - 21
src/de_quoi_parle_le_monde/main.py

@@ -7,33 +7,32 @@ from de_quoi_parle_le_monde.internet_archive import InternetArchiveClient
 from de_quoi_parle_le_monde.le_monde import LeMondeArchive, LeMondeMainPage
 
 
-async def get_latest_snaps(dts):
-    http_client = HttpClient()
+@frozen
+class ArchiveDownloader:
+    client: HttpClient
 
-    async with http_client.session() as session:
-        ia = InternetArchiveClient(session)
+    @staticmethod
+    def last_n_days(n):
+        return [
+            datetime.combine(date.today() - timedelta(days=i), time(hour=18))
+            for i in range(0, n)
+        ]
 
-        async def req_and_parse_first_snap(dt):
-            closest = await ia.get_snapshot_closest_to(LeMondeArchive.url, dt)
-            closest_body = await ia.fetch(closest)
-            return await LeMondeMainPage.from_content(closest, closest_body)
+    async def get_latest_snaps(self, dts):
+        async with self.client.session() as session:
+            ia = InternetArchiveClient(session)
 
-        return await asyncio.gather(*[req_and_parse_first_snap(d) for d in dts])
+            async def handle_snap(dt):
+                closest = await ia.get_snapshot_closest_to(LeMondeArchive.url, dt)
+                closest_body = await ia.fetch(closest)
+                return await LeMondeMainPage.from_content(closest, closest_body)
 
+            return await asyncio.gather(*[handle_snap(d) for d in dts])
 
-@frozen
-class ArchiveDownloader:
-    client: InternetArchiveClient
-
-    @staticmethod
-    def from_http_client(http_client):
-        return ArchiveDownloader(InternetArchiveClient(http_client))
 
+http_client = HttpClient()
+dler = ArchiveDownloader(http_client)
+snaps = asyncio.run(dler.get_latest_snaps(ArchiveDownloader.last_n_days(5)))
 
-dts = [
-    datetime.combine(date.today() - timedelta(days=n), time(hour=18))
-    for n in range(0, 5)
-]
-snaps = asyncio.run(get_latest_snaps(dts))
 for s in snaps:
     print(s.snapshot.timestamp, s.get_top_articles()[0], s.main_article())