Przeglądaj źródła

Add "name" field for collections/sites

jherve 1 rok temu
rodzic
commit
a391820475

+ 1 - 0
src/de_quoi_parle_le_monde/article.py

@@ -78,6 +78,7 @@ class MainPage(ABC):
 
 @frozen
 class ArchiveCollection:
+    name: str
     url: str
     MainPageClass: type[MainPage]
 

+ 14 - 7
src/de_quoi_parle_le_monde/medias/__init__.py

@@ -6,11 +6,18 @@ from .cnews import CNewsMainPage
 
 
 media_collection = {
-    "france_tv_info": ArchiveCollection(
-        url="https://francetvinfo.fr", MainPageClass=FranceTvInfoMainPage
-    ),
-    "le_monde": ArchiveCollection(
-        url="https://lemonde.fr", MainPageClass=LeMondeMainPage
-    ),
-    "cnews": ArchiveCollection(url="https://cnews.fr", MainPageClass=CNewsMainPage),
+    c.name: c
+    for c in [
+        ArchiveCollection(
+            name="france_tv_info",
+            url="https://francetvinfo.fr",
+            MainPageClass=FranceTvInfoMainPage,
+        ),
+        ArchiveCollection(
+            name="le_monde", url="https://lemonde.fr", MainPageClass=LeMondeMainPage
+        ),
+        ArchiveCollection(
+            name="cnews", url="https://cnews.fr", MainPageClass=CNewsMainPage
+        ),
+    ]
 }

+ 6 - 6
src/de_quoi_parle_le_monde/snapshot_worker.py

@@ -30,11 +30,11 @@ class SnapshotWorker:
         try:
             return await self.ia_client.get_snapshot_id_closest_to(collection.url, dt)
         except SnapshotNotYetAvailable as e:
-            logger.warning(f"Snapshot for {collection.url} @ {dt} not yet available")
+            logger.warning(f"Snapshot for {collection.name} @ {dt} not yet available")
             raise e
         except Exception as e:
             logger.error(
-                f"Error while trying to find snapshot for {collection.url} @ {dt}"
+                f"Error while trying to find snapshot for {collection.name} @ {dt}"
             )
             traceback.print_exception(e)
             raise e
@@ -57,7 +57,7 @@ class SnapshotWorker:
 
     async def store(self, page, collection, dt):
         try:
-            site_id = await self.storage.add_site(collection.url)
+            site_id = await self.storage.add_site(collection.name, collection.url)
             snapshot_id = await self.storage.add_snapshot(site_id, page.snapshot.id, dt)
 
             article_id = await self.storage.add_featured_article(
@@ -77,19 +77,19 @@ class SnapshotWorker:
 
         except Exception as e:
             logger.error(
-                f"Error while attempting to store {page} from {collection} @ {dt}"
+                f"Error while attempting to store {page} from {collection.name} @ {dt}"
             )
             traceback.print_exception(e)
             raise e
 
     async def handle_snap(self, collection, dt):
         try:
-            logger.info(f"Start handling snap for collection {collection.url} @ {dt}")
+            logger.info(f"Start handling snap for collection {collection.name} @ {dt}")
             id_closest = await self.find(collection, dt)
             closest = await self.ia_client.fetch(id_closest)
             main_page = await self.parse(collection, closest)
             await self.store(main_page, collection, dt)
-            logger.info(f"Snap for collection {collection.url} @ {dt} is stored")
+            logger.info(f"Snap for collection {collection.name} @ {dt} is stored")
         except Exception as e:
             return
 

+ 8 - 7
src/de_quoi_parle_le_monde/storage.py

@@ -56,14 +56,15 @@ class Storage:
                 """
                 CREATE TABLE IF NOT EXISTS sites (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    name TEXT,
                     original_url TEXT
                 );
                 """
             )
             await conn.execute(
                 """
-                CREATE UNIQUE INDEX IF NOT EXISTS sites_unique_original_url
-                ON sites (original_url);
+                CREATE UNIQUE INDEX IF NOT EXISTS sites_unique_name
+                ON sites (name);
                 """
             )
 
@@ -193,16 +194,16 @@ class Storage:
                 """
             )
 
-    async def add_site(self, original_url: str) -> int:
+    async def add_site(self, name: str, original_url: str) -> int:
         return await self._insert_or_get(
-            self._insert_stmt("sites", ["original_url"]),
-            [original_url],
+            self._insert_stmt("sites", ["name", "original_url"]),
+            [name, original_url],
             """
                     SELECT id
                     FROM sites
-                    WHERE original_url = ?
+                    WHERE name = ?
                     """,
-            [original_url],
+            [name],
         )
 
     async def add_snapshot(