Parcourir la source

Add "featured article" class and subclasses

jherve il y a 1 an
Parent
commit
33616401e2

+ 7 - 3
src/de_quoi_parle_le_monde/article.py

@@ -6,16 +6,20 @@ from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
 
 
 @frozen
-class TopArticle(ABC):
+class FeaturedArticle(ABC):
     title: str
     url: str
+
+
+@frozen
+class TopArticle(ABC):
+    article: FeaturedArticle
     rank: int
 
 
 @frozen
 class MainArticle(ABC):
-    title: str
-    url: str
+    article: FeaturedArticle
 
 
 @frozen

+ 14 - 11
src/de_quoi_parle_le_monde/france_tv_info.py

@@ -3,17 +3,14 @@ from bs4 import BeautifulSoup
 
 from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
 from de_quoi_parle_le_monde.article import (
+    FeaturedArticle,
     TopArticle,
     MainArticle,
     MainPage,
 )
 
 
-class FranceTvInfoTopArticle(TopArticle):
-    ...
-
-
-class FranceTvInfoMainArticle(MainArticle):
+class FranceTvInfoFeaturedArticle(FeaturedArticle):
     ...
 
 
@@ -22,9 +19,13 @@ class FranceTvInfoMainPage(MainPage):
     def get_top_articles(soup):
         all_articles = soup.find_all("article", class_="card-article-most-read")
         return [
-            FranceTvInfoTopArticle(
-                title=a.find("p", class_="card-article-most-read__title").text.strip(),
-                url=a.find("a")["href"],
+            TopArticle(
+                article=FranceTvInfoFeaturedArticle(
+                    title=a.find(
+                        "p", class_="card-article-most-read__title"
+                    ).text.strip(),
+                    url=a.find("a")["href"],
+                ),
                 rank=idx + 1,
             )
             for idx, a in enumerate(all_articles)
@@ -39,9 +40,11 @@ class FranceTvInfoMainPage(MainPage):
             class_="card-article-actu-forte__title"
         )
 
-        return FranceTvInfoMainArticle(
-            title=title.text.strip(),
-            url=main.find("a")["href"],
+        return MainArticle(
+            article=FranceTvInfoFeaturedArticle(
+                title=title.text.strip(),
+                url=main.find("a")["href"],
+            )
         )
 
     @classmethod

+ 12 - 10
src/de_quoi_parle_le_monde/le_monde.py

@@ -3,17 +3,14 @@ from bs4 import BeautifulSoup
 
 from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
 from de_quoi_parle_le_monde.article import (
+    FeaturedArticle,
     TopArticle,
     MainArticle,
     MainPage,
 )
 
 
-class LeMondeTopArticle(TopArticle):
-    ...
-
-
-class LeMondeMainArticle(MainArticle):
+class LeMondeFeaturedArticle(FeaturedArticle):
     ...
 
 
@@ -22,8 +19,11 @@ class LeMondeMainPage(MainPage):
     def get_top_articles(soup):
         all_articles = soup.find_all("div", class_="top-article")
         return [
-            LeMondeTopArticle(
-                title=a.text.strip(), url=a.find("a")["href"], rank=idx + 1
+            TopArticle(
+                article=LeMondeFeaturedArticle(
+                    title=a.text.strip(), url=a.find("a")["href"]
+                ),
+                rank=idx + 1,
             )
             for idx, a in enumerate(all_articles)
         ]
@@ -31,9 +31,11 @@ class LeMondeMainPage(MainPage):
     @staticmethod
     def get_main_article(soup):
         main = soup.find("div", class_="article--main")
-        return LeMondeMainArticle(
-            title=main.find("p", class_="article__title-label").text.strip(),
-            url=main.find("a")["href"],
+        return MainArticle(
+            article=LeMondeFeaturedArticle(
+                title=main.find("p", class_="article__title-label").text.strip(),
+                url=main.find("a")["href"],
+            )
         )
 
     @classmethod

+ 4 - 0
src/de_quoi_parle_le_monde/main.py

@@ -40,8 +40,12 @@ class ArchiveDownloader:
 
         site_id = await storage.add_site(collection.url)
         snapshot_id = await storage.add_snapshot(site_id, main_page.snapshot.id, dt)
+
+        main = await storage.add_featured_article(main_page.main_article.article)
         await storage.add_main_article(snapshot_id, main_page.main_article)
+
         for t in main_page.top_articles:
+            article = await storage.add_featured_article(t.article)
             await storage.add_top_article(snapshot_id, t)
 
 

+ 6 - 6
src/de_quoi_parle_le_monde/storage.py

@@ -1,7 +1,7 @@
 import aiosqlite
 from datetime import datetime
 
-from de_quoi_parle_le_monde.article import MainArticle, TopArticle
+from de_quoi_parle_le_monde.article import MainArticle, TopArticle, FeaturedArticle
 from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshotId
 
 
@@ -182,11 +182,11 @@ class Storage:
             await conn.commit()
             return id_
 
-    async def add_featured_article(self, url, title):
+    async def add_featured_article(self, article: FeaturedArticle):
         async with aiosqlite.connect(self.conn_str) as conn:
             (id_,) = await conn.execute_insert(
                 self._insert_stmt("featured_articles", ["title", "url"]),
-                [title, url],
+                [article.title, article.url],
             )
 
             if id_ == 0:
@@ -196,7 +196,7 @@ class Storage:
                     FROM featured_articles
                     WHERE title = ? AND url = ?
                     """,
-                    [title, url],
+                    [article.title, article.url],
                 )
 
             await conn.commit()
@@ -206,7 +206,7 @@ class Storage:
         async with aiosqlite.connect(self.conn_str) as conn:
             await conn.execute_insert(
                 self._insert_stmt("main_articles", ["snapshot_id", "title", "url"]),
-                [snapshot_id, article.title, article.url],
+                [snapshot_id, article.article.title, article.article.url],
             )
             await conn.commit()
 
@@ -216,7 +216,7 @@ class Storage:
                 self._insert_stmt(
                     "top_articles", ["snapshot_id", "title", "url", "rank"]
                 ),
-                [snapshot_id, article.title, article.url, article.rank],
+                [snapshot_id, article.article.title, article.article.url, article.rank],
             )
             await conn.commit()