Sfoglia il codice sorgente

Use an instance of ArchiveDownloader instead of static methods

jherve 1 anno fa
parent
commit
f30cdabc3e
1 ha cambiato i file con 22 aggiunte e 23 eliminazioni
  1. 22 23
      src/de_quoi_parle_le_monde/main.py

+ 22 - 23
src/de_quoi_parle_le_monde/main.py

@@ -14,6 +14,9 @@ from de_quoi_parle_le_monde.storage import Storage
 
 @frozen
 class ArchiveDownloader:
+    storage: Storage
+    ia_client: InternetArchiveClient
+
     @staticmethod
     def last_n_days_at_hours(n: int, hours: list[int]) -> list[datetime]:
         return [
@@ -22,38 +25,34 @@ class ArchiveDownloader:
             for h in hours
         ]
 
-    @staticmethod
-    async def find(ia, collection, dt):
-        return await ia.get_snapshot_id_closest_to(collection.url, dt)
+    async def find(self, collection, dt):
+        return await self.ia_client.get_snapshot_id_closest_to(collection.url, dt)
 
-    @staticmethod
-    async def parse(collection, snapshot):
+    async def parse(self, collection, snapshot):
         return await collection.MainPageClass.from_snapshot(snapshot)
 
-    @staticmethod
-    async def store(page, collection, storage, dt):
-        site_id = await storage.add_site(collection.url)
-        snapshot_id = await storage.add_snapshot(site_id, page.snapshot.id, dt)
+    async def store(self, page, collection, dt):
+        site_id = await self.storage.add_site(collection.url)
+        snapshot_id = await self.storage.add_snapshot(site_id, page.snapshot.id, dt)
 
-        article_id = await storage.add_featured_article(
+        article_id = await self.storage.add_featured_article(
             page.main_article.article.original
         )
-        main_article_snap_id = await storage.add_featured_article_snapshot(
+        main_article_snap_id = await self.storage.add_featured_article_snapshot(
             article_id, page.main_article.article
         )
-        await storage.add_main_article(snapshot_id, main_article_snap_id)
+        await self.storage.add_main_article(snapshot_id, main_article_snap_id)
 
         for t in page.top_articles:
-            article_id = await storage.add_featured_article(t.article.original)
-            top_article_snap_id = await storage.add_featured_article_snapshot(
+            article_id = await self.storage.add_featured_article(t.article.original)
+            top_article_snap_id = await self.storage.add_featured_article_snapshot(
                 article_id, t.article
             )
-            await storage.add_top_article(snapshot_id, top_article_snap_id, t)
+            await self.storage.add_top_article(snapshot_id, top_article_snap_id, t)
 
-    @classmethod
-    async def handle_snap(cls, ia, collection, storage, dt):
+    async def handle_snap(self, collection, dt):
         try:
-            id_closest = await cls.find(ia, collection, dt)
+            id_closest = await self.find(collection, dt)
         except SnapshotNotYetAvailable as e:
             print(f"Snapshot for {collection.url} @ {dt} not yet available")
             return
@@ -63,21 +62,21 @@ class ArchiveDownloader:
             return
 
         try:
-            closest = await ia.fetch(id_closest)
+            closest = await self.ia_client.fetch(id_closest)
         except Exception as e:
             print(f"Error while fetching {id_closest} from {collection} @ {dt}")
             traceback.print_exception(e)
             return
 
         try:
-            main_page = await cls.parse(collection, closest)
+            main_page = await self.parse(collection, closest)
         except Exception as e:
             print(f"Error while parsing {closest} from {collection} @ {dt}")
             traceback.print_exception(e)
             return
 
         try:
-            await cls.store(main_page, collection, storage, dt)
+            await self.store(main_page, collection, dt)
         except Exception as e:
             print(f"Error while attempting to store {main_page} from {collection} @ {dt}")
             traceback.print_exception(e)
@@ -91,11 +90,11 @@ async def main():
 
     async with http_client.session() as session:
         ia = InternetArchiveClient(session)
-        dler = ArchiveDownloader()
+        dler = ArchiveDownloader(storage, ia)
 
         return await asyncio.gather(
             *[
-                ArchiveDownloader.handle_snap(ia, c, storage, d)
+                dler.handle_snap(c, d)
                 for d in dts
                 for c in media_collection.values()
             ]