theenglishway (time) пре 7 година
родитељ
комит
987787cffa
3 измењених фајлова са 24 додато и 0 уклоњено
  1. 9 0
      tests/conftest.py
  2. 1 0
      tests/test_parser.py
  3. 14 0
      twhatter/parser/tweet.py

+ 9 - 0
tests/conftest.py

@@ -39,6 +39,7 @@ class TweetInfo(NamedTuple):
     retweets_nb: int = None
     likes_nb: int = None
     hashtag_list: List[str] = None
+    mention_list: List[int] = None
     retweeter: str = None
     retweet_id: int = None
     reacted_id: int = None
@@ -91,6 +92,14 @@ def tweet_collection():
             permalink="/BurgerQuizOff/status/1039969574555471873",
             hashtag_list=["Nuggets", "BurgerQuiz", "PrivacyMonCul"]
         ),
+        'mentions': TweetInfo(
+            id=1077838164813848576,
+            screen_name="the_english_way",
+            user_id=943804775942033408,
+            timestamp=datetime.utcfromtimestamp(1545811618),
+            permalink="/the_english_way/status/1077838164813848576",
+            mention_list=[1138959692]
+        ),
         'stats': TweetInfo(
             id=1039969574555471873,
             screen_name="BurgerQuizOff",

+ 1 - 0
tests/test_parser.py

@@ -20,6 +20,7 @@ class TestTweet:
         "with_link",
         "retweet",
         "hashtags",
+        "mentions",
         "stats",
     ])
     def test_plain_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):

+ 14 - 0
twhatter/parser/tweet.py

@@ -27,6 +27,8 @@ class Tweet:
     text: str = field(repr=False)
     #: List of hashtags in the tweet
     hashtag_list: List[str]
+    #: List of mentions in the tweet
+    mention_list: List[int]
 
     #: Handle of the tweet's retweeter
     retweeter: str = None
@@ -158,6 +160,18 @@ class Tweet:
             for link in soup.find_all('a', class_="twitter-hashtag")
         ]
 
+    @staticmethod
+    def extract_mention_list(soup):
+        data_kw="data-mentioned-user-id"
+        return [
+            int(value[data_kw])
+            for value in soup.find_all(
+                'a',
+                class_="twitter-atreply",
+                attrs={data_kw: True}
+            )
+        ]
+
     @staticmethod
     def extract_text(soup):
         return soup.find('p', 'tweet-text').text