فهرست منبع

Add hashtag list

theenglishway (time) 7 سال پیش
والد
کامیت
73d8e3cc53
3فایلهای تغییر یافته به همراه22 افزوده شده و 2 حذف شده
  1. 11 2
      tests/conftest.py
  2. 1 0
      tests/test_parser.py
  3. 10 0
      twhatter/parser/tweet.py

+ 11 - 2
tests/conftest.py

@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
 
 from twhatter.api import ApiUser
 from twhatter.parser import TweetList
-from typing import NamedTuple
+from typing import NamedTuple, List
 
 @pytest.fixture
 def cli_runner():
@@ -38,6 +38,7 @@ class TweetInfo(NamedTuple):
     comments_nb: int = None
     retweets_nb: int = None
     likes_nb: int = None
+    hashtag_list: List[str] = None
     retweeter: str = None
     retweet_id: int = None
     reacted_id: int = None
@@ -80,7 +81,15 @@ def tweet_collection():
             retweeter="the_english_way",
             retweet_id=1055098556300828672,
             timestamp=datetime.utcfromtimestamp(1540375466),
-            permalink="/Senficon/status/1055037291108974592"
+            permalink="/Senficon/status/1055037291108974592",
+        ),
+        'hashtags': TweetInfo(
+            id=1039969574555471873,
+            screen_name="BurgerQuizOff",
+            user_id=949604705772228608,
+            retweeter="the_english_way",
+            permalink="/BurgerQuizOff/status/1039969574555471873",
+            hashtag_list=["Nuggets", "BurgerQuiz", "PrivacyMonCul"]
         ),
         'stats': TweetInfo(
             id=1039969574555471873,

+ 1 - 0
tests/test_parser.py

@@ -19,6 +19,7 @@ class TestTweet:
         "reaction_tweet",
         "with_link",
         "retweet",
+        "hashtags",
         "stats",
     ])
     def test_plain_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):

+ 10 - 0
twhatter/parser/tweet.py

@@ -2,6 +2,7 @@ from datetime import datetime
 
 from bs4 import BeautifulSoup
 from dataclasses import dataclass, fields, InitVar, field
+from typing import List
 
 
 @dataclass
@@ -24,6 +25,8 @@ class Tweet:
     permalink: str
     #: Text of the tweet
     text: str = field(repr=False)
+    #: List of hashtags in the tweet
+    hashtag_list: List[str]
 
     #: Handle of the tweet's retweeter
     retweeter: str = None
@@ -148,6 +151,13 @@ class Tweet:
             'data-tweet-stat-count'
         ))
 
+    @staticmethod
+    def extract_hashtag_list(soup):
+        return [
+            link.b.text
+            for link in soup.find_all('a', class_="twitter-hashtag")
+        ]
+
     @staticmethod
     def extract_text(soup):
         return soup.find('p', 'tweet-text').text