Przeglądaj źródła

Dump some information in a tmpdir in case of a parsing error

jherve 1 rok temu
rodzic
commit
4fff56ca37
1 zmienionych plików z 19 dodań i 2 usunięć
  1. 19 2
      src/de_quoi_parle_le_monde/snapshots.py

+ 19 - 2
src/de_quoi_parle_le_monde/snapshots.py

@@ -1,5 +1,8 @@
 import asyncio
 import asyncio
 import traceback
 import traceback
+import tempfile
+import urllib.parse
+from pathlib import Path
 from datetime import date, datetime, time, timedelta
 from datetime import date, datetime, time, timedelta
 from attrs import frozen
 from attrs import frozen
 from loguru import logger
 from loguru import logger
@@ -64,8 +67,22 @@ class SnapshotWorker:
         try:
         try:
             return await collection.MainPageClass.from_snapshot(snapshot)
             return await collection.MainPageClass.from_snapshot(snapshot)
         except Exception as e:
         except Exception as e:
-            logger.error(f"Error while parsing {snapshot}")
-            traceback.print_exception(e)
+            tmpdir_prefix = urllib.parse.quote_plus(
+                f"le_monde_{snapshot.id.original}_{snapshot.id.timestamp}"
+            )
+            tmpdir = Path(tempfile.mkdtemp(prefix=tmpdir_prefix))
+
+            with open(tmpdir / "snapshot.html", "w") as f:
+                f.write(snapshot.text)
+            with open(tmpdir / "exception.txt", "w") as f:
+                f.writelines(traceback.format_exception(e))
+            with open(tmpdir / "url.txt", "w") as f:
+                f.write(snapshot.id.url)
+
+            logger.error(
+                f"Error while parsing snapshot from {snapshot.id.url}, details were written in directory {tmpdir}"
+            )
+
             raise e
             raise e
 
 
     async def store(self, page, collection, dt):
     async def store(self, page, collection, dt):