Explorar el Código

Add info about tweet that's retweeted or reacted to

theenglishway (time) hace 7 años
padre
commit
6e043366c3
Se han modificado 3 ficheros con 58 adiciones y 6 borrados
  1. 15 3
      tests/conftest.py
  2. 1 0
      tests/test_parser.py
  3. 42 3
      twhatter/parser/tweet.py

+ 15 - 3
tests/conftest.py

@@ -32,12 +32,16 @@ class TweetInfo(NamedTuple):
     id: int
     screen_name: str
     user_id: int
+    permalink: str
     timestamp: datetime = None
     text: str = None
     comments_nb: int = None
     retweets_nb: int = None
     likes_nb: int = None
     retweeter: str = None
+    retweet_id: int = None
+    reacted_id: int = None
+    reacted_user_id: int = None
 
 @pytest.fixture(scope="session")
 def tweet_collection():
@@ -47,6 +51,7 @@ def tweet_collection():
             screen_name="the_english_way",
             user_id=943804775942033408,
             timestamp=datetime.utcfromtimestamp(1545811618),
+            permalink="/the_english_way/status/1077838164813848576",
             text="""Ca y est j'ai un pipeline Concourse avec un job qui builde une image @Docker qui affiche un "Hello World" dans un autre job \o/
 ........... je suis pas sûr de savoir ce que ça veut dire, mais en tout cas c'était mon objectif de la matinée """
         ),
@@ -54,25 +59,32 @@ def tweet_collection():
             id=1078281840945963008,
             screen_name="the_english_way",
             user_id=943804775942033408,
-            timestamp=datetime.utcfromtimestamp(1545917399)
+            timestamp=datetime.utcfromtimestamp(1545917399),
+            permalink="/the_english_way/status/1078281840945963008",
+            reacted_id=1078277316193726464,
+            reacted_user_id=19976004
         ),
         'with_link': TweetInfo(
             id=1077505613079429120,
             screen_name="the_english_way",
             user_id=943804775942033408,
-            timestamp=datetime.utcfromtimestamp(1545732331)
+            timestamp=datetime.utcfromtimestamp(1545732331),
+            permalink="/the_english_way/status/1077505613079429120"
         ),
         'retweet': TweetInfo(
             id=1055037291108974592,
             screen_name="Senficon",
             user_id=14861745,
             retweeter="the_english_way",
-            timestamp=datetime.utcfromtimestamp(1540375466)
+            retweet_id=1055098556300828672,
+            timestamp=datetime.utcfromtimestamp(1540375466),
+            permalink="/Senficon/status/1055037291108974592"
         ),
         'stats': TweetInfo(
             id=1039969574555471873,
             screen_name="BurgerQuizOff",
             user_id=949604705772228608,
+            permalink="/BurgerQuizOff/status/1039969574555471873",
             retweeter="the_english_way",
             comments_nb=12,
             retweets_nb=176,

+ 1 - 0
tests/test_parser.py

@@ -18,6 +18,7 @@ class TestTweet:
         "plain",
         "reaction_tweet",
         "with_link",
+        "retweet",
         "stats",
     ])
     def test_plain_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):

+ 42 - 3
twhatter/parser/tweet.py

@@ -20,11 +20,21 @@ class Tweet:
     likes_nb: int
     #: Timestamp of the original tweet
     timestamp: datetime
-
+    #: Permalink to the original tweet
+    permalink: str
+    #: Text of the tweet
     text: str = field(repr=False)
 
     #: Handle of the tweet's retweeter
     retweeter: str = None
+    #: Id of the retweet
+    retweet_id: int = None
+
+    #: Id of the tweet that the tweet is in reaction to
+    reacted_id: int = None
+    #: Id of the user that the tweet is in reaction to
+    reacted_user_id: int = None
+
     #: The soup extracted from the raw HTML
     soup: InitVar[BeautifulSoup] = None
 
@@ -39,11 +49,15 @@ class Tweet:
             [data_kw]
         )
 
+    @classmethod
+    def _extract_from_div_tweet(cls, soup, data_kw):
+        return cls._extract_from_div(soup, 'tweet', data_kw)
+
     @staticmethod
-    def _extract_from_div_tweet(soup, data_kw):
+    def _extract_from_div(soup, div_class, data_kw):
         kw = "data-{}".format(data_kw)
         return(
-            soup.find('div', class_='tweet', attrs={kw: True})[kw]
+            soup.find('div', class_=div_class, attrs={kw: True})[kw]
         )
 
     @staticmethod
@@ -65,12 +79,37 @@ class Tweet:
         except TypeError:
             return None
 
+    @classmethod
+    def extract_retweet_id(cls, soup):
+        try:
+            return int(cls._extract_from_div_tweet(soup, 'retweet-id'))
+        except TypeError:
+            return None
+
+    @classmethod
+    def extract_reacted_id(cls, soup):
+        try:
+            return int(cls._extract_from_div(soup, 'QuoteTweet-innerContainer', 'item-id'))
+        except TypeError:
+            return None
+
+    @classmethod
+    def extract_reacted_user_id(cls, soup):
+        try:
+            return int(cls._extract_from_div(soup, 'QuoteTweet-innerContainer', 'user-id'))
+        except TypeError:
+            return None
+
     @staticmethod
     def extract_timestamp(soup):
         return datetime.utcfromtimestamp(
             int(soup.find('span', attrs={'data-time': True})['data-time'])
         )
 
+    @classmethod
+    def extract_permalink(cls, soup):
+        return cls._extract_from_div_tweet(soup, 'permalink-path')
+
     @staticmethod
     def extract_fullname(soup):
         return soup.find('strong', 'fullname').text