le_monde.py 807 B

1234567891011121314151617181920212223242526272829303132
  1. from media_observer.article import (
  2. TopArticle,
  3. MainArticle,
  4. FrontPage,
  5. to_text,
  6. )
  7. class LeMondeFrontPage(FrontPage):
  8. @staticmethod
  9. def get_top_articles(soup):
  10. all_articles = soup.select("div.top-article")
  11. return [
  12. TopArticle.create(
  13. title=a.text.strip(),
  14. url=a.find("a")["href"],
  15. rank=idx + 1,
  16. )
  17. for idx, a in enumerate(all_articles)
  18. ]
  19. @staticmethod
  20. def get_main_article(soup):
  21. def to_href(soup):
  22. link = soup.select("a")[0]
  23. return link["href"]
  24. [main] = soup.select("div.article--main")
  25. return MainArticle.create(
  26. title=to_text(main, "p.article__title-label"),
  27. url=to_href(main),
  28. )