Browse Source

Add tests on Tweet

theenglishway (time) 7 years ago
parent
commit
d184a68601
3 changed files with 107 additions and 9 deletions
  1. 62 5
      tests/conftest.py
  2. 17 0
      tests/test_parser.py
  3. 28 4
      twhatter/parser/tweet.py

+ 62 - 5
tests/conftest.py

@@ -1,8 +1,10 @@
 import pytest
 from click.testing import CliRunner
-from twhatter.api import ApiUser
 from bs4 import BeautifulSoup
 
+from twhatter.api import ApiUser
+from twhatter.parser import TweetList
+from typing import NamedTuple
 
 @pytest.fixture
 def cli_runner():
@@ -19,9 +21,64 @@ def user():
 def tweet_limit():
     return 10
 
+# Fixtures for extraction of specific tweets of several kinds, whose author
+# and id are known in advance
+
+
+class TweetInfo(NamedTuple):
+    """Class to hold information about a tweet that is already known"""
+    id: int
+    # Name of the original author
+    screen_name: str
+    user_id: int
+    # Name of the retweeter user
+    retweeter: str = None
+
+@pytest.fixture(scope="session")
+def tweet_collection():
+    return {
+        'plain': TweetInfo(
+            id=1077838164813848576,
+            screen_name="the_english_way",
+            user_id=943804775942033408
+        ),
+        'reaction_tweet': TweetInfo(
+            id=1078281840945963008,
+            screen_name="the_english_way",
+            user_id=943804775942033408
+        ),
+        'with_link': TweetInfo(
+            id=1078281840945963008,
+            screen_name="the_english_way",
+            user_id=943804775942033408
+        ),
+        'retweet': TweetInfo(
+            id=1055037291108974592,
+            screen_name="Senficon",
+            user_id=14861745,
+            retweeter="the_english_way"
+        )
+    }
+
+
+@pytest.fixture(scope="session")
+def raw_html_user_initial_page_factory():
+    def _raw_html_user_initial_page(user):
+        a = ApiUser(user)
+        response = a.get_initial()
+        return BeautifulSoup(response.text, "lxml")
+    return _raw_html_user_initial_page
+
+
+@pytest.fixture(scope="session")
+def raw_html_user_initial_page(raw_html_user_initial_page_factory, user):
+    return raw_html_user_initial_page_factory(user)
+
 
 @pytest.fixture(scope="session")
-def raw_html_user_initial_page(user):
-    a = ApiUser(user)
-    response = a.get_initial()
-    return BeautifulSoup(response.text, "lxml")
+def raw_tweet_factory(raw_html_user_initial_page_factory):
+    def _raw_tweet_factory(tweet_info):
+        user_page = tweet_info.retweeter or tweet_info.screen_name
+        soup = raw_html_user_initial_page_factory(user_page)
+        return soup.find(id="stream-item-tweet-{}".format(tweet_info.id))
+    return _raw_tweet_factory

+ 17 - 0
tests/test_parser.py

@@ -1,3 +1,4 @@
+import pytest
 from twhatter.parser import TweetList, Tweet
 
 
@@ -10,3 +11,19 @@ class TestTweetList:
         t_list = TweetList(raw_html_user_initial_page)
         for t in t_list:
             assert isinstance(t, Tweet)
+
+
+class TestTweet:
+    @pytest.mark.parametrize("tweet_type", [
+        "plain",
+        "reaction_tweet",
+        "with_link",
+    ])
+    def test_plain_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):
+        tweet_info = tweet_collection[tweet_type]
+        raw = raw_tweet_factory(tweet_info)
+        t = Tweet.extract(raw)
+        assert t
+
+        for field, value in tweet_info._asdict().items():
+            assert getattr(t, field) == value

+ 28 - 4
twhatter/parser/tweet.py

@@ -6,13 +6,19 @@ from dataclasses import dataclass, fields, InitVar, field
 
 @dataclass
 class Tweet:
+    #: Tweet ID
     id: int
+    #: Handle of the tweet's original author
+    screen_name: str
+    #: ID of the tweet's original author
+    user_id: int
     timestamp: datetime
-    user: str
     replies: int
     retweets: int
     likes: int
     text: str = field(repr=False)
+    #: Handle of the tweet's retweeter
+    retweeter: str = None
     soup: InitVar[BeautifulSoup] = None
 
     def __post_init__(self, soup):
@@ -26,13 +32,31 @@ class Tweet:
             [data_kw]
         )
 
+    @staticmethod
+    def _extract_from_div_tweet(soup, data_kw):
+        kw = "data-{}".format(data_kw)
+        return(
+            soup.find('div', class_='tweet', attrs={kw: True})[kw]
+        )
+
     @staticmethod
     def extract_id(soup):
         return int(soup['data-item-id'])
 
-    @staticmethod
-    def extract_user(soup):
-        return soup.find('span', 'username').text
+    @classmethod
+    def extract_screen_name(cls, soup):
+        return cls._extract_from_div_tweet(soup, 'screen-name')
+
+    @classmethod
+    def extract_user_id(cls, soup):
+        return int(cls._extract_from_div_tweet(soup, 'user-id'))
+
+    @classmethod
+    def extract_retweeter(cls, soup):
+        try:
+            return cls._extract_from_div_tweet(soup, 'retweeter')
+        except TypeError:
+            return None
 
     @staticmethod
     def extract_timestamp(soup):