Explorar el Código

Fix parsing of bfmtv/lemonde/leparisien pages

jherve hace 1 año
padre
commit
dca525b846

+ 16 - 5
src/media_observer/medias/bfmtv.py

@@ -20,8 +20,19 @@ class BfmTvFrontPage(FrontPage):
 
     @classmethod
     def get_main_article(cls, soup):
-        main = soup.select_unique("article.une_item")
-        return MainArticle.create(
-            title=main.select_unique("h2.title_une_item").stripped_text,
-            url=main.select_first("a")["href"],
-        )
+        highlighted = cls._get_kwargs(soup, ".megamax article.une_item")
+        if highlighted is not None:
+            return highlighted
+        else:
+            return cls._get_kwargs(soup, ".block_une article.une_item")
+
+    @classmethod
+    def _get_kwargs(cls, soup, main_selector: str):
+        try:
+            main = soup.select_unique(main_selector)
+            return MainArticle.create(
+                title=main.select_unique("h2").stripped_text,
+                url=main.select_first("a")["href"],
+            )
+        except ValueError:
+            return None

+ 22 - 1
src/media_observer/medias/le_monde.py

@@ -19,9 +19,30 @@ class LeMondeFrontPage(FrontPage):
         ]
 
     @classmethod
-    def get_main_article(cls, soup):
+    def get_main_article(cls, soup) -> MainArticle:
+        if highlighted := cls._get_highlighted_article(soup):
+            return highlighted
+        else:
+            return cls._get_non_highlighted_article(soup)
+
+    @classmethod
+    def _get_highlighted_article(cls, soup):
+        try:
+            main = soup.select_unique("div.hp-article-municipale")
+
+            return MainArticle.create(
+                title=main.select_unique("h2").stripped_text,
+                url=main.select_first("a")["href"],
+                is_highlighted=True,
+            )
+        except ValueError:
+            return None
+
+    @classmethod
+    def _get_non_highlighted_article(cls, soup):
         main = soup.select_unique("div.article--main")
         return MainArticle.create(
             title=main.select_unique("p.article__title-label").stripped_text,
             url=main.select_first("a")["href"],
+            is_highlighted=False,
         )

+ 1 - 1
src/media_observer/medias/le_parisien.py

@@ -27,6 +27,6 @@ class LeParisienFrontPage(FrontPage):
         url = main.select_first("a")
 
         return MainArticle.create(
-            title=url.stripped_text,
+            title=main.select_unique("h3").stripped_text,
             url=url["href"],
         )