jherve 1 рік тому
батько
коміт
14d869ee75

+ 4 - 0
src/de_quoi_parle_le_monde/medias/__init__.py

@@ -2,6 +2,7 @@ from de_quoi_parle_le_monde.article import ArchiveCollection
 
 from .france_tv_info import FranceTvInfoMainPage
 from .le_monde import LeMondeMainPage
+from .cnews import CNewsMainPage
 
 
 media_collection = {
@@ -11,4 +12,7 @@ media_collection = {
     "le_monde": ArchiveCollection(
         url="https://lemonde.fr", MainPageClass=LeMondeMainPage
     ),
+    "cnews": ArchiveCollection(
+        url="https://cnews.fr", MainPageClass=CNewsMainPage
+    ),
 }

+ 51 - 0
src/de_quoi_parle_le_monde/medias/cnews.py

@@ -0,0 +1,51 @@
+import asyncio
+from bs4 import BeautifulSoup
+
+from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
+from de_quoi_parle_le_monde.article import (
+    FeaturedArticleSnapshot,
+    TopArticle,
+    MainArticle,
+    MainPage,
+)
+
+
+class CNewsFeaturedArticleSnapshot(FeaturedArticleSnapshot):
+    ...
+
+
+class CNewsMainPage(MainPage):
+    @staticmethod
+    def get_top_articles(soup):
+        all_articles = soup.css.select(".top-news-content a")
+
+        return [
+            TopArticle(
+                article=CNewsFeaturedArticleSnapshot.create(
+                    title=a.find("h3", class_="dm-letop-title").text.strip(), url=a["href"]
+                ),
+                rank=idx + 1,
+            )
+            for idx, a in enumerate(all_articles)
+        ]
+
+    @staticmethod
+    def get_main_article(soup):
+        main = soup.find("div", class_="dm-block-news_1_single_full")
+        return MainArticle(
+            article=CNewsFeaturedArticleSnapshot.create(
+                title=main.find("h2", class_="dm-news-title").text.strip(),
+                url=main.find("a")["href"],
+            )
+        )
+
+    @classmethod
+    async def from_snapshot(
+        cls, snapshot: InternetArchiveSnapshot
+    ) -> "CNewsMainPage":
+        loop = asyncio.get_event_loop()
+        soup = await loop.run_in_executor(None, BeautifulSoup, snapshot.text, "lxml")
+
+        return cls(
+            snapshot, soup, cls.get_top_articles(soup), cls.get_main_article(soup)
+        )