소스 검색

Use select instead of "find"

jherve 1 년 전
부모
커밋
2bb31d8be3

+ 5 - 0
src/de_quoi_parle_le_monde/article.py

@@ -80,3 +80,8 @@ class MainPage(ABC):
 class ArchiveCollection:
     url: str
     MainPageClass: type[MainPage]
+
+
+def to_text(soup: BeautifulSoup, selector: str) -> str:
+    [text_element] = soup.select(selector)
+    return text_element.text.strip()

+ 9 - 6
src/de_quoi_parle_le_monde/medias/cnews.py

@@ -7,6 +7,7 @@ from de_quoi_parle_le_monde.article import (
     TopArticle,
     MainArticle,
     MainPage,
+    to_text,
 )
 
 
@@ -16,13 +17,13 @@ class CNewsFeaturedArticleSnapshot(FeaturedArticleSnapshot):
 
 class CNewsMainPage(MainPage):
     @staticmethod
-    def get_top_articles(soup):
-        all_articles = soup.css.select(".top-news-content a")
+    def get_top_articles(soup: BeautifulSoup):
+        all_articles = soup.select(".top-news-content a")
 
         return [
             TopArticle(
                 article=CNewsFeaturedArticleSnapshot.create(
-                    title=a.find("h3", class_="dm-letop-title").text.strip(), url=a["href"]
+                    title=to_text(a, "h3.dm-letop-title"), url=a["href"]
                 ),
                 rank=idx + 1,
             )
@@ -31,10 +32,12 @@ class CNewsMainPage(MainPage):
 
     @staticmethod
     def get_main_article(soup):
-        main = soup.find("div", class_="dm-block-news_1_single_full")
+        main = soup.select("div.dm-block-news_1_single_full")[0]
+        [url] = main.select("a")
+
         return MainArticle(
             article=CNewsFeaturedArticleSnapshot.create(
-                title=main.find("h2", class_="dm-news-title").text.strip(),
-                url=main.find("a")["href"],
+                title=to_text(main, "h2.dm-news-title"),
+                url=url["href"],
             )
         )

+ 19 - 9
src/de_quoi_parle_le_monde/medias/france_tv_info.py

@@ -7,6 +7,7 @@ from de_quoi_parle_le_monde.article import (
     TopArticle,
     MainArticle,
     MainPage,
+    to_text,
 )
 
 
@@ -17,14 +18,17 @@ class FranceTvInfoFeaturedArticleSnapshot(FeaturedArticleSnapshot):
 class FranceTvInfoMainPage(MainPage):
     @staticmethod
     def get_top_articles(soup):
-        all_articles = soup.find_all("article", class_="card-article-most-read")
+        def to_href(article, selector):
+            [url] = article.select(selector)
+            return url["href"]
+
+        all_articles = soup.select("article.card-article-most-read")
+
         return [
             TopArticle(
                 article=FranceTvInfoFeaturedArticleSnapshot.create(
-                    title=a.find(
-                        "p", class_="card-article-most-read__title"
-                    ).text.strip(),
-                    url=a.find("a")["href"],
+                    title=to_text(a, "p.card-article-most-read__title"),
+                    url=to_href(a, "a"),
                 ),
                 rank=idx + 1,
             )
@@ -33,11 +37,17 @@ class FranceTvInfoMainPage(MainPage):
 
     @staticmethod
     def get_main_article(soup):
-        main = soup.find("article", class_="card-article-majeure") or soup.find(
-            "article", class_="card-article-actu-forte"
+        def select_first_of(soup, *selectors):
+            for s in selectors:
+                if found := soup.select(s):
+                    return found
+            return None
+
+        [main] = select_first_of(
+            soup, "article.card-article-majeure", "article.card-article-actu-forte"
         )
-        title = main.find(class_="card-article-majeure__title") or main.find(
-            class_="card-article-actu-forte__title"
+        [title] = select_first_of(
+            main, ".card-article-majeure__title", ".card-article-actu-forte__title"
         )
 
         return MainArticle(

+ 9 - 4
src/de_quoi_parle_le_monde/medias/le_monde.py

@@ -7,6 +7,7 @@ from de_quoi_parle_le_monde.article import (
     TopArticle,
     MainArticle,
     MainPage,
+    to_text,
 )
 
 
@@ -17,7 +18,7 @@ class LeMondeFeaturedArticleSnapshot(FeaturedArticleSnapshot):
 class LeMondeMainPage(MainPage):
     @staticmethod
     def get_top_articles(soup):
-        all_articles = soup.find_all("div", class_="top-article")
+        all_articles = soup.select("div.top-article")
         return [
             TopArticle(
                 article=LeMondeFeaturedArticleSnapshot.create(
@@ -30,10 +31,14 @@ class LeMondeMainPage(MainPage):
 
     @staticmethod
     def get_main_article(soup):
-        main = soup.find("div", class_="article--main")
+        def to_href(soup):
+            link = soup.select("a")[0]
+            return link["href"]
+
+        [main] = soup.select("div.article--main")
         return MainArticle(
             article=LeMondeFeaturedArticleSnapshot.create(
-                title=main.find("p", class_="article__title-label").text.strip(),
-                url=main.find("a")["href"],
+                title=to_text(main, "p.article__title-label"),
+                url=to_href(main),
             )
         )