Преглед изворни кода

Dump some information in a tmpdir in case of a parsing error

jherve пре 1 година
родитељ
комит
4fff56ca37
1 измењених фајлова са 19 додато и 2 уклоњено
  1. 19 2
      src/de_quoi_parle_le_monde/snapshots.py

+ 19 - 2
src/de_quoi_parle_le_monde/snapshots.py

@@ -1,5 +1,8 @@
 import asyncio
 import traceback
+import tempfile
+import urllib.parse
+from pathlib import Path
 from datetime import date, datetime, time, timedelta
 from attrs import frozen
 from loguru import logger
@@ -64,8 +67,22 @@ class SnapshotWorker:
         try:
             return await collection.MainPageClass.from_snapshot(snapshot)
         except Exception as e:
-            logger.error(f"Error while parsing {snapshot}")
-            traceback.print_exception(e)
+            tmpdir_prefix = urllib.parse.quote_plus(
+                f"le_monde_{snapshot.id.original}_{snapshot.id.timestamp}"
+            )
+            tmpdir = Path(tempfile.mkdtemp(prefix=tmpdir_prefix))
+
+            with open(tmpdir / "snapshot.html", "w") as f:
+                f.write(snapshot.text)
+            with open(tmpdir / "exception.txt", "w") as f:
+                f.writelines(traceback.format_exception(e))
+            with open(tmpdir / "url.txt", "w") as f:
+                f.write(snapshot.id.url)
+
+            logger.error(
+                f"Error while parsing snapshot from {snapshot.id.url}, details were written in directory {tmpdir}"
+            )
+
             raise e
 
     async def store(self, page, collection, dt):