Ver código fonte

Add site table

jherve 1 ano atrás
pai
commit
43335a3904

+ 1 - 0
src/de_quoi_parle_le_monde/main.py

@@ -31,6 +31,7 @@ class ArchiveDownloader:
                 except AttributeError as e:
                     print(f"error while processing {id_closest}")
                     raise e
+                site_id = await storage.add_site(collection.url)
                 snapshot_id = await storage.add_snapshot(main_page.snapshot.id, dt)
                 await storage.add_main_article(snapshot_id, main_page.main_article)
                 for t in main_page.top_articles:

+ 39 - 0
src/de_quoi_parle_le_monde/storage.py

@@ -17,6 +17,21 @@ class Storage:
 
     async def _create_db(self):
         async with aiosqlite.connect(self.conn_str) as conn:
+            await conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS sites (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    original_url TEXT
+                );
+                """
+            )
+            await conn.execute(
+                """
+                CREATE UNIQUE INDEX IF NOT EXISTS sites_unique_original_url
+                ON sites (original_url);
+                """
+            )
+
             await conn.execute(
                 """
                 CREATE TABLE IF NOT EXISTS snapshots (
@@ -68,6 +83,30 @@ class Storage:
                 """
             )
 
+    async def add_site(self, original_url: str) -> int:
+        async with aiosqlite.connect(self.conn_str) as conn:
+            (id_,) = await conn.execute_insert(
+                """
+                INSERT INTO sites (original_url)
+                VALUES (?)
+                ON CONFLICT DO NOTHING;
+                """,
+                [original_url],
+            )
+
+            if id_ == 0:
+                [(id_,)] = await conn.execute_fetchall(
+                    """
+                    SELECT id
+                    FROM sites
+                    WHERE original_url = ?
+                    """,
+                    [original_url],
+                )
+
+            await conn.commit()
+            return id_
+
     async def add_snapshot(
         self, snapshot: InternetArchiveSnapshotId, virtual: datetime
     ) -> int: