conftest.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import pytest
  2. from datetime import datetime
  3. from click.testing import CliRunner
  4. from bs4 import BeautifulSoup
  5. from twhatter.exploration import NodeTimeline
  6. from twhatter.parser import tweet_factory
  7. from typing import NamedTuple, List
  8. from twhatter.parser.media import MediaBase
  9. @pytest.fixture
  10. def cli_runner():
  11. """Runner for Click"""
  12. return CliRunner()
  13. @pytest.fixture(scope="session")
  14. def user():
  15. return "the_english_way"
  16. @pytest.fixture(scope="session")
  17. def user_prolific():
  18. return "realDonaldTrump"
  19. @pytest.fixture(scope="session")
  20. def tweet_limit():
  21. return 10
  22. # Fixtures for extraction of specific tweets of several kinds, whose author
  23. # and id are known in advance
  24. class MediaInfo(NamedTuple):
  25. """Class to hold information about a media that is already known"""
  26. image_links: List[str] = []
  27. def __eq__(self, other):
  28. """Override of __eq__ to check against `MediaBase` instance"""
  29. return (isinstance(other, MediaBase)
  30. and other.image_links == self.image_links)
  31. class TweetInfo(NamedTuple):
  32. """Class to hold information about a tweet that is already known"""
  33. id: int
  34. username: str
  35. fullname: str
  36. user_id: int
  37. permalink: str
  38. timestamp: datetime = None
  39. text: str = None
  40. comments_nb: int = None
  41. retweets_nb: int = None
  42. likes_nb: int = None
  43. hashtag_list: List[str] = None
  44. mention_list: List[int] = None
  45. retweeter: str = None
  46. retweet_id: int = None
  47. reacted_id: int = None
  48. reacted_user_id: int = None
  49. link_to: str = None
  50. media: MediaInfo = None
  51. @pytest.fixture(scope="session")
  52. def tweet_collection():
  53. return {
  54. 'plain': TweetInfo(
  55. id=1077838164813848576,
  56. username="the_english_way",
  57. fullname="theenglishway",
  58. user_id=943804775942033408,
  59. timestamp=datetime.utcfromtimestamp(1545811618),
  60. permalink="/the_english_way/status/1077838164813848576",
  61. text="""Ca y est j'ai un pipeline Concourse avec un job qui builde une image @Docker qui affiche un "Hello World" dans un autre job \o/
  62. ........... je suis pas sûr de savoir ce que ça veut dire, mais en tout cas c'était mon objectif de la matinée """
  63. ),
  64. 'reaction_tweet': TweetInfo(
  65. id=1078281840945963008,
  66. username="the_english_way",
  67. fullname="theenglishway",
  68. user_id=943804775942033408,
  69. timestamp=datetime.utcfromtimestamp(1545917399),
  70. permalink="/the_english_way/status/1078281840945963008",
  71. reacted_id=1078277316193726464,
  72. reacted_user_id=19976004
  73. ),
  74. 'with_link': TweetInfo(
  75. id=1077505613079429120,
  76. username="the_english_way",
  77. fullname="theenglishway",
  78. user_id=943804775942033408,
  79. timestamp=datetime.utcfromtimestamp(1545732331),
  80. permalink="/the_english_way/status/1077505613079429120",
  81. link_to="https://t.co/el5VJucLRz"
  82. ),
  83. 'retweet': TweetInfo(
  84. id=1055037291108974592,
  85. username="Senficon",
  86. fullname="Julia Reda",
  87. user_id=14861745,
  88. retweeter="the_english_way",
  89. retweet_id=1055098556300828672,
  90. timestamp=datetime.utcfromtimestamp(1540375466),
  91. permalink="/Senficon/status/1055037291108974592",
  92. ),
  93. 'hashtags': TweetInfo(
  94. id=1039969574555471873,
  95. username="BurgerQuizOff",
  96. fullname="Burger Quiz",
  97. user_id=949604705772228608,
  98. retweeter="the_english_way",
  99. permalink="/BurgerQuizOff/status/1039969574555471873",
  100. hashtag_list=["Nuggets", "BurgerQuiz", "PrivacyMonCul"]
  101. ),
  102. 'mentions': TweetInfo(
  103. id=1077838164813848576,
  104. username="the_english_way",
  105. fullname="theenglishway",
  106. user_id=943804775942033408,
  107. timestamp=datetime.utcfromtimestamp(1545811618),
  108. permalink="/the_english_way/status/1077838164813848576",
  109. mention_list=[1138959692]
  110. ),
  111. 'stats': TweetInfo(
  112. id=1039969574555471873,
  113. username="BurgerQuizOff",
  114. fullname="Burger Quiz",
  115. user_id=949604705772228608,
  116. permalink="/BurgerQuizOff/status/1039969574555471873",
  117. retweeter="the_english_way",
  118. comments_nb=12,
  119. retweets_nb=176,
  120. likes_nb=555
  121. ),
  122. 'media':TweetInfo(
  123. id=1086327536726900736,
  124. username="the_english_way",
  125. fullname="theenglishway",
  126. user_id=943804775942033408,
  127. permalink="/the_english_way/status/1086327536726900736",
  128. media=MediaInfo(
  129. image_links=["https://pbs.twimg.com/media/DxNof6AXQAAu2oU.jpg"]
  130. )
  131. ),
  132. }
  133. @pytest.fixture(scope="session")
  134. def raw_html_user_initial_page_factory():
  135. def _raw_html_user_initial_page(user):
  136. n = NodeTimeline(user)
  137. response = n.get_user_timeline(user)
  138. return BeautifulSoup(response.text, "lxml")
  139. return _raw_html_user_initial_page
  140. @pytest.fixture(scope="session")
  141. def raw_html_user_initial_page(raw_html_user_initial_page_factory, user):
  142. return raw_html_user_initial_page_factory(user)
  143. @pytest.fixture(scope="session")
  144. def raw_tweet_factory(raw_html_user_initial_page_factory):
  145. def _raw_tweet_factory(tweet_info):
  146. user_page = tweet_info.retweeter or tweet_info.username
  147. soup = raw_html_user_initial_page_factory(user_page)
  148. return soup.find(id="stream-item-tweet-{}".format(tweet_info.id))
  149. return _raw_tweet_factory
  150. @pytest.fixture(scope="session")
  151. def tweet_test_data_factory(raw_tweet_factory, tweet_collection):
  152. def _tweet_test_data_factory(tweet_type):
  153. tweet_info = tweet_collection[tweet_type]
  154. raw_tweet = raw_tweet_factory(tweet_info)
  155. return tweet_factory(raw_tweet), tweet_info
  156. return _tweet_test_data_factory
  157. class UserInfo(NamedTuple):
  158. """Class to hold information about an user that is already known"""
  159. id: int
  160. fullname: str
  161. username: str
  162. join_date: datetime
  163. tweets_nb: int = None
  164. following_nb: int = None
  165. followers_nb: int = None
  166. likes_nb: int = None
  167. @pytest.fixture(scope="session")
  168. def user_collection():
  169. return {
  170. 'Marlene_beadles': UserInfo(
  171. id=295177446,
  172. username="Marlene_beadles",
  173. fullname="Marlene Hansen",
  174. join_date=datetime(2011, 5, 8, 0, 0),
  175. tweets_nb=25,
  176. following_nb=342,
  177. followers_nb=81,
  178. likes_nb=4
  179. ),
  180. 'the_english_way': UserInfo(
  181. id=943804775942033408,
  182. fullname="theenglishway",
  183. username="the_english_way",
  184. join_date=datetime(2017, 12, 21, 0, 0),
  185. ),
  186. }