Prechádzať zdrojové kódy

Add top_articles / main_article attributes

jherve 1 rok pred
rodič
commit
bd172a3234

+ 4 - 2
src/de_quoi_parle_le_monde/article.py

@@ -21,8 +21,10 @@ class MainArticle(ABC):
 class MainPage(ABC):
     snapshot: InternetArchiveSnapshot
     soup: BeautifulSoup
+    top_articles: list[TopArticle]
+    main_article: MainArticle
 
-    @staticmethod
+    @classmethod
     @abstractmethod
-    async def from_snapshot(snapshot: InternetArchiveSnapshot):
+    async def from_snapshot(cls, snapshot: InternetArchiveSnapshot):
         ...

+ 9 - 12
src/de_quoi_parle_le_monde/le_monde.py

@@ -1,6 +1,5 @@
 from attrs import frozen
 from typing import ClassVar
-import cattrs
 import asyncio
 from bs4 import BeautifulSoup
 
@@ -15,30 +14,28 @@ class LeMondeMainArticle(MainArticle):
     ...
 
 
-@frozen
 class LeMondeMainPage(MainPage):
-    snapshot: InternetArchiveSnapshot
-    soup: BeautifulSoup
-
-    def get_top_articles(self):
-        all_articles = self.soup.find_all("div", class_="top-article")
+    @staticmethod
+    def get_top_articles(soup):
+        all_articles = soup.find_all("div", class_="top-article")
         return [
             LeMondeTopArticle(title=a.text.strip(), url=a.find("a")["href"])
             for a in all_articles
         ]
 
-    def main_article(self):
-        main = self.soup.find("div", class_="article--main")
+    @staticmethod
+    def get_main_article(soup):
+        main = soup.find("div", class_="article--main")
         return LeMondeMainArticle(
             title=main.find("p", class_="article__title-label").text.strip(),
             url=main.find("a")["href"],
         )
 
-    @staticmethod
-    async def from_snapshot(snapshot: InternetArchiveSnapshot) -> "LeMondeMainPage":
+    @classmethod
+    async def from_snapshot(cls, snapshot: InternetArchiveSnapshot) -> "LeMondeMainPage":
         loop = asyncio.get_event_loop()
         soup = await loop.run_in_executor(None, BeautifulSoup, snapshot.text, "lxml")
-        return LeMondeMainPage(snapshot, soup)
+        return LeMondeMainPage(snapshot, soup, cls.get_top_articles(soup), cls.get_main_article(soup))
 
 
 @frozen

+ 1 - 1
src/de_quoi_parle_le_monde/main.py

@@ -35,4 +35,4 @@ dler = ArchiveDownloader(http_client)
 snaps = asyncio.run(dler.get_latest_snaps(ArchiveDownloader.last_n_days(1)))
 
 for s in snaps:
-    print(s.snapshot.id.timestamp, s.get_top_articles()[0], s.main_article())
+    print(s.snapshot.id.timestamp, s.top_articles[0], s.main_article)