ソースを参照

Add sqlite storage

jherve 1 年間 前
コミット
6d888a1fa7

+ 1 - 0
src/de_quoi_parle_le_monde/article.py

@@ -9,6 +9,7 @@ from de_quoi_parle_le_monde.internet_archive import InternetArchiveSnapshot
 class TopArticle(ABC):
     title: str
     url: str
+    rank: int
 
 
 @frozen

+ 4 - 3
src/de_quoi_parle_le_monde/le_monde.py

@@ -1,4 +1,3 @@
-from attrs import frozen
 import asyncio
 from bs4 import BeautifulSoup
 
@@ -24,8 +23,10 @@ class LeMondeMainPage(MainPage):
     def get_top_articles(soup):
         all_articles = soup.find_all("div", class_="top-article")
         return [
-            LeMondeTopArticle(title=a.text.strip(), url=a.find("a")["href"])
-            for a in all_articles
+            LeMondeTopArticle(
+                title=a.text.strip(), url=a.find("a")["href"], rank=idx + 1
+            )
+            for idx, a in enumerate(all_articles)
         ]
 
     @staticmethod

+ 17 - 5
src/de_quoi_parle_le_monde/main.py

@@ -5,6 +5,7 @@ from attrs import frozen
 from de_quoi_parle_le_monde.http import HttpClient
 from de_quoi_parle_le_monde.internet_archive import InternetArchiveClient
 from de_quoi_parle_le_monde.le_monde import le_monde_collection
+from de_quoi_parle_le_monde.storage import Storage
 
 
 @frozen
@@ -30,11 +31,22 @@ class ArchiveDownloader:
             return await asyncio.gather(*[handle_snap(collection, d) for d in dts])
 
 
+async def main(dler):
+    storage = await Storage.create()
+    snaps = await dler.get_latest_snaps(
+        le_monde_collection, ArchiveDownloader.last_n_days(20)
+    )
+    for s in snaps:
+        await storage.add_main_article(
+            s.snapshot.id.timestamp, s.snapshot.id.original, s.main_article
+        )
+        for t in s.top_articles:
+            await storage.add_top_article(
+                s.snapshot.id.timestamp, s.snapshot.id.original, t
+            )
+
+
 http_client = HttpClient()
 dler = ArchiveDownloader(http_client)
-snaps = asyncio.run(
-    dler.get_latest_snaps(le_monde_collection, ArchiveDownloader.last_n_days(1))
-)
 
-for s in snaps:
-    print(s.snapshot.id.timestamp, s.top_articles[0], s.main_article)
+asyncio.run(main(dler))

+ 62 - 0
src/de_quoi_parle_le_monde/storage.py

@@ -0,0 +1,62 @@
+import aiosqlite
+
+from de_quoi_parle_le_monde.article import MainArticle, TopArticle
+
+
+class Storage:
+    def __init__(self):
+        self.conn_str = "test.db"
+
+    @staticmethod
+    async def create():
+        storage = Storage()
+        await storage._create_db()
+        return storage
+
+    async def _create_db(self):
+        async with aiosqlite.connect(self.conn_str) as conn:
+            await conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS main_articles (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    timestamp TEXT,
+                    site TEXT,
+                    title TEXT,
+                    url TEXT
+                );
+                """
+            )
+            await conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS top_articles (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    timestamp TEXT,
+                    site TEXT,
+                    title TEXT,
+                    url TEXT,
+                    rank INTEGER
+                );
+                """
+            )
+
+    async def add_main_article(self, timestamp: str, site: str, article: MainArticle):
+        async with aiosqlite.connect(self.conn_str) as conn:
+            await conn.execute_insert(
+                """
+                INSERT INTO main_articles (timestamp, site, title, url)
+                VALUES (?, ?, ?, ?);
+                """,
+                [timestamp, site, article.title, article.url],
+            )
+            await conn.commit()
+
+    async def add_top_article(self, timestamp: str, site: str, article: TopArticle):
+        async with aiosqlite.connect(self.conn_str) as conn:
+            await conn.execute_insert(
+                """
+                INSERT INTO top_articles (timestamp, site, title, url, rank)
+                VALUES (?, ?, ?, ?, ?);
+                """,
+                [timestamp, site, article.title, article.url, article.rank],
+            )
+            await conn.commit()