jherve 1 년 전
부모
커밋
7638a23650
3개의 변경된 파일11개의 추가작업 그리고 8개의 파일을 삭제
  1. 2 6
      src/de_quoi_parle_le_monde/internet_archive.py
  2. 7 0
      src/de_quoi_parle_le_monde/le_monde.py
  3. 2 2
      src/de_quoi_parle_le_monde/main.py

+ 2 - 6
src/de_quoi_parle_le_monde/internet_archive.py

@@ -100,13 +100,9 @@ class InternetArchiveClient:
 
 
         return [to_snapshot(line) for line in resp.splitlines()]
         return [to_snapshot(line) for line in resp.splitlines()]
 
 
-    async def fetch_and_parse_snapshot(
-        self, snap: InternetArchiveSnapshot
-    ) -> BeautifulSoup:
+    async def fetch(self, snap: InternetArchiveSnapshot) -> str:
         resp = await self.session.get(snap.url)
         resp = await self.session.get(snap.url)
-        loop = asyncio.get_event_loop()
-        soup = await loop.run_in_executor(None, BeautifulSoup, resp, "lxml")
-        return soup
+        return resp
 
 
     async def get_snapshot_closest_to(self, url, dt):
     async def get_snapshot_closest_to(self, url, dt):
         req = CdxRequest(
         req = CdxRequest(

+ 7 - 0
src/de_quoi_parle_le_monde/le_monde.py

@@ -1,6 +1,7 @@
 from attrs import frozen
 from attrs import frozen
 from typing import ClassVar
 from typing import ClassVar
 import cattrs
 import cattrs
+import asyncio
 from bs4 import BeautifulSoup
 from bs4 import BeautifulSoup
 
 
 from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
 from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
@@ -48,6 +49,12 @@ class LeMondeMainPage:
             self.soup.find("div", class_="article--main")
             self.soup.find("div", class_="article--main")
         )
         )
 
 
+    @staticmethod
+    async def from_content(snapshot: InternetArchiveSnapshot, text: str) -> "LeMondeMainPage":
+        loop = asyncio.get_event_loop()
+        soup = await loop.run_in_executor(None, BeautifulSoup, text, "lxml")
+        return LeMondeMainPage(snapshot, soup)
+
 
 
 @frozen
 @frozen
 class LeMondeArchive:
 class LeMondeArchive:

+ 2 - 2
src/de_quoi_parle_le_monde/main.py

@@ -15,8 +15,8 @@ async def get_latest_snaps(dts):
 
 
         async def req_and_parse_first_snap(dt):
         async def req_and_parse_first_snap(dt):
             closest = await ia.get_snapshot_closest_to(LeMondeArchive.url, dt)
             closest = await ia.get_snapshot_closest_to(LeMondeArchive.url, dt)
-            closest_content = await ia.fetch_and_parse_snapshot(closest)
-            return LeMondeMainPage(closest, closest_content)
+            closest_body = await ia.fetch(closest)
+            return await LeMondeMainPage.from_content(closest, closest_body)
 
 
         return await asyncio.gather(*[req_and_parse_first_snap(d) for d in dts])
         return await asyncio.gather(*[req_and_parse_first_snap(d) for d in dts])