conftest.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. import pytest
  2. from datetime import datetime
  3. from click.testing import CliRunner
  4. from bs4 import BeautifulSoup
  5. from twhatter.exploration import NodeTimeline
  6. from twhatter.parser.tweet import tweet_factory
  7. from typing import NamedTuple, List
  8. from twhatter.parser.media import MediaBase
  9. @pytest.fixture
  10. def cli_runner():
  11. """Runner for Click"""
  12. return CliRunner()
  13. @pytest.fixture(scope="session")
  14. def user():
  15. return "the_english_way"
  16. @pytest.fixture(scope="session")
  17. def user_prolific():
  18. return "realDonaldTrump"
  19. # Fixtures for extraction of specific tweets of several kinds, whose author
  20. # and id are known in advance
  21. class MediaInfo(NamedTuple):
  22. """Class to hold information about a media that is already known"""
  23. image_links: List[str] = []
  24. def __eq__(self, other):
  25. """Override of __eq__ to check against `MediaBase` instance"""
  26. return (isinstance(other, MediaBase)
  27. and other.image_links == self.image_links)
  28. class TweetInfo(NamedTuple):
  29. """Class to hold information about a tweet that is already known"""
  30. id: int
  31. username: str
  32. fullname: str
  33. user_id: int
  34. permalink: str
  35. timestamp: datetime = None
  36. text: str = None
  37. comments_nb: int = None
  38. retweets_nb: int = None
  39. likes_nb: int = None
  40. hashtag_list: List[str] = None
  41. mention_list: List[int] = None
  42. retweeter: str = None
  43. retweet_id: int = None
  44. reacted_id: int = None
  45. reacted_user_id: int = None
  46. link_to: str = None
  47. media: MediaInfo = None
  48. @pytest.fixture(scope="session")
  49. def tweet_collection():
  50. return {
  51. 'plain': TweetInfo(
  52. id=1077838164813848576,
  53. username="the_english_way",
  54. fullname="theenglishway",
  55. user_id=943804775942033408,
  56. timestamp=datetime.utcfromtimestamp(1545811618),
  57. permalink="/the_english_way/status/1077838164813848576",
  58. text="""Ca y est j'ai un pipeline Concourse avec un job qui builde une image @Docker qui affiche un "Hello World" dans un autre job \o/
  59. ........... je suis pas sûr de savoir ce que ça veut dire, mais en tout cas c'était mon objectif de la matinée """
  60. ),
  61. 'reaction_tweet': TweetInfo(
  62. id=1078281840945963008,
  63. username="the_english_way",
  64. fullname="theenglishway",
  65. user_id=943804775942033408,
  66. timestamp=datetime.utcfromtimestamp(1545917399),
  67. permalink="/the_english_way/status/1078281840945963008",
  68. reacted_id=1078277316193726464,
  69. reacted_user_id=19976004
  70. ),
  71. 'with_link': TweetInfo(
  72. id=1077505613079429120,
  73. username="the_english_way",
  74. fullname="theenglishway",
  75. user_id=943804775942033408,
  76. timestamp=datetime.utcfromtimestamp(1545732331),
  77. permalink="/the_english_way/status/1077505613079429120",
  78. link_to="https://t.co/el5VJucLRz"
  79. ),
  80. 'retweet': TweetInfo(
  81. id=1055037291108974592,
  82. username="Senficon",
  83. fullname="Julia Reda",
  84. user_id=14861745,
  85. retweeter="the_english_way",
  86. retweet_id=1055098556300828672,
  87. timestamp=datetime.utcfromtimestamp(1540375466),
  88. permalink="/Senficon/status/1055037291108974592",
  89. ),
  90. 'hashtags': TweetInfo(
  91. id=1039969574555471873,
  92. username="BurgerQuizOff",
  93. fullname="Burger Quiz",
  94. user_id=949604705772228608,
  95. retweeter="the_english_way",
  96. permalink="/BurgerQuizOff/status/1039969574555471873",
  97. hashtag_list=["Nuggets", "BurgerQuiz", "PrivacyMonCul"]
  98. ),
  99. 'mentions': TweetInfo(
  100. id=1077838164813848576,
  101. username="the_english_way",
  102. fullname="theenglishway",
  103. user_id=943804775942033408,
  104. timestamp=datetime.utcfromtimestamp(1545811618),
  105. permalink="/the_english_way/status/1077838164813848576",
  106. mention_list=[1138959692]
  107. ),
  108. 'stats': TweetInfo(
  109. id=1039969574555471873,
  110. username="BurgerQuizOff",
  111. fullname="Burger Quiz",
  112. user_id=949604705772228608,
  113. permalink="/BurgerQuizOff/status/1039969574555471873",
  114. retweeter="the_english_way",
  115. comments_nb=12,
  116. retweets_nb=176,
  117. likes_nb=556
  118. ),
  119. 'media':TweetInfo(
  120. id=1086327536726900736,
  121. username="the_english_way",
  122. fullname="theenglishway",
  123. user_id=943804775942033408,
  124. permalink="/the_english_way/status/1086327536726900736",
  125. media=MediaInfo(
  126. image_links=["https://pbs.twimg.com/media/DxNof6AXQAAu2oU.jpg"]
  127. )
  128. ),
  129. }
  130. @pytest.fixture(scope="session")
  131. def raw_html_user_initial_page_factory():
  132. def _raw_html_user_initial_page(user):
  133. n = NodeTimeline(user)
  134. response = n._get_base_page(user)
  135. return BeautifulSoup(response.text, "lxml")
  136. return _raw_html_user_initial_page
  137. @pytest.fixture(scope="session")
  138. def raw_html_user_initial_page(raw_html_user_initial_page_factory, user):
  139. return raw_html_user_initial_page_factory(user)
  140. @pytest.fixture(scope="session")
  141. def raw_tweet_factory(raw_html_user_initial_page_factory):
  142. def _raw_tweet_factory(tweet_info):
  143. user_page = tweet_info.retweeter or tweet_info.username
  144. soup = raw_html_user_initial_page_factory(user_page)
  145. return soup.find(id="stream-item-tweet-{}".format(tweet_info.id))
  146. return _raw_tweet_factory
  147. @pytest.fixture(scope="session")
  148. def tweet_test_data_factory(raw_tweet_factory, tweet_collection):
  149. def _tweet_test_data_factory(tweet_type):
  150. tweet_info = tweet_collection[tweet_type]
  151. raw_tweet = raw_tweet_factory(tweet_info)
  152. return tweet_factory(raw_tweet), tweet_info
  153. return _tweet_test_data_factory
  154. class UserInfo(NamedTuple):
  155. """Class to hold information about an user that is already known"""
  156. id: int
  157. fullname: str
  158. username: str
  159. join_date: datetime
  160. tweets_nb: int = None
  161. following_nb: int = None
  162. followers_nb: int = None
  163. likes_nb: int = None
  164. @pytest.fixture(scope="session")
  165. def user_collection():
  166. return {
  167. 'Marlene_beadles': UserInfo(
  168. id=295177446,
  169. username="Marlene_beadles",
  170. fullname="Marlene Hansen",
  171. join_date=datetime(2011, 5, 8, 0, 0),
  172. tweets_nb=25,
  173. following_nb=342,
  174. followers_nb=81,
  175. likes_nb=4
  176. ),
  177. 'the_english_way': UserInfo(
  178. id=943804775942033408,
  179. fullname="theenglishway",
  180. username="the_english_way",
  181. join_date=datetime(2017, 12, 21, 0, 0),
  182. ),
  183. }