Forráskód Böngészése

Add info about tweet that's retweeted or reacted to

theenglishway (time) 7 éve
szülő
commit
6e043366c3
3 módosított fájl, 58 hozzáadás és 6 törlés
  1. 15 3
      tests/conftest.py
  2. 1 0
      tests/test_parser.py
  3. 42 3
      twhatter/parser/tweet.py

+ 15 - 3
tests/conftest.py

@@ -32,12 +32,16 @@ class TweetInfo(NamedTuple):
     id: int
     id: int
     screen_name: str
     screen_name: str
     user_id: int
     user_id: int
+    permalink: str
     timestamp: datetime = None
     timestamp: datetime = None
     text: str = None
     text: str = None
     comments_nb: int = None
     comments_nb: int = None
     retweets_nb: int = None
     retweets_nb: int = None
     likes_nb: int = None
     likes_nb: int = None
     retweeter: str = None
     retweeter: str = None
+    retweet_id: int = None
+    reacted_id: int = None
+    reacted_user_id: int = None
 
 
 @pytest.fixture(scope="session")
 @pytest.fixture(scope="session")
 def tweet_collection():
 def tweet_collection():
@@ -47,6 +51,7 @@ def tweet_collection():
             screen_name="the_english_way",
             screen_name="the_english_way",
             user_id=943804775942033408,
             user_id=943804775942033408,
             timestamp=datetime.utcfromtimestamp(1545811618),
             timestamp=datetime.utcfromtimestamp(1545811618),
+            permalink="/the_english_way/status/1077838164813848576",
             text="""Ca y est j'ai un pipeline Concourse avec un job qui builde une image @Docker qui affiche un "Hello World" dans un autre job \o/
             text="""Ca y est j'ai un pipeline Concourse avec un job qui builde une image @Docker qui affiche un "Hello World" dans un autre job \o/
 ........... je suis pas sûr de savoir ce que ça veut dire, mais en tout cas c'était mon objectif de la matinée """
 ........... je suis pas sûr de savoir ce que ça veut dire, mais en tout cas c'était mon objectif de la matinée """
         ),
         ),
@@ -54,25 +59,32 @@ def tweet_collection():
             id=1078281840945963008,
             id=1078281840945963008,
             screen_name="the_english_way",
             screen_name="the_english_way",
             user_id=943804775942033408,
             user_id=943804775942033408,
-            timestamp=datetime.utcfromtimestamp(1545917399)
+            timestamp=datetime.utcfromtimestamp(1545917399),
+            permalink="/the_english_way/status/1078281840945963008",
+            reacted_id=1078277316193726464,
+            reacted_user_id=19976004
         ),
         ),
         'with_link': TweetInfo(
         'with_link': TweetInfo(
             id=1077505613079429120,
             id=1077505613079429120,
             screen_name="the_english_way",
             screen_name="the_english_way",
             user_id=943804775942033408,
             user_id=943804775942033408,
-            timestamp=datetime.utcfromtimestamp(1545732331)
+            timestamp=datetime.utcfromtimestamp(1545732331),
+            permalink="/the_english_way/status/1077505613079429120"
         ),
         ),
         'retweet': TweetInfo(
         'retweet': TweetInfo(
             id=1055037291108974592,
             id=1055037291108974592,
             screen_name="Senficon",
             screen_name="Senficon",
             user_id=14861745,
             user_id=14861745,
             retweeter="the_english_way",
             retweeter="the_english_way",
-            timestamp=datetime.utcfromtimestamp(1540375466)
+            retweet_id=1055098556300828672,
+            timestamp=datetime.utcfromtimestamp(1540375466),
+            permalink="/Senficon/status/1055037291108974592"
         ),
         ),
         'stats': TweetInfo(
         'stats': TweetInfo(
             id=1039969574555471873,
             id=1039969574555471873,
             screen_name="BurgerQuizOff",
             screen_name="BurgerQuizOff",
             user_id=949604705772228608,
             user_id=949604705772228608,
+            permalink="/BurgerQuizOff/status/1039969574555471873",
             retweeter="the_english_way",
             retweeter="the_english_way",
             comments_nb=12,
             comments_nb=12,
             retweets_nb=176,
             retweets_nb=176,

+ 1 - 0
tests/test_parser.py

@@ -18,6 +18,7 @@ class TestTweet:
         "plain",
         "plain",
         "reaction_tweet",
         "reaction_tweet",
         "with_link",
         "with_link",
+        "retweet",
         "stats",
         "stats",
     ])
     ])
     def test_plain_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):
     def test_plain_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):

+ 42 - 3
twhatter/parser/tweet.py

@@ -20,11 +20,21 @@ class Tweet:
     likes_nb: int
     likes_nb: int
     #: Timestamp of the original tweet
     #: Timestamp of the original tweet
     timestamp: datetime
     timestamp: datetime
-
+    #: Permalink to the original tweet
+    permalink: str
+    #: Text of the tweet
     text: str = field(repr=False)
     text: str = field(repr=False)
 
 
     #: Handle of the tweet's retweeter
     #: Handle of the tweet's retweeter
     retweeter: str = None
     retweeter: str = None
+    #: Id of the retweet
+    retweet_id: int = None
+
+    #: Id of the tweet that the tweet is in reaction to
+    reacted_id: int = None
+    #: Id of the user that the tweet is in reaction to
+    reacted_user_id: int = None
+
     #: The soup extracted from the raw HTML
     #: The soup extracted from the raw HTML
     soup: InitVar[BeautifulSoup] = None
     soup: InitVar[BeautifulSoup] = None
 
 
@@ -39,11 +49,15 @@ class Tweet:
             [data_kw]
             [data_kw]
         )
         )
 
 
+    @classmethod
+    def _extract_from_div_tweet(cls, soup, data_kw):
+        return cls._extract_from_div(soup, 'tweet', data_kw)
+
     @staticmethod
     @staticmethod
-    def _extract_from_div_tweet(soup, data_kw):
+    def _extract_from_div(soup, div_class, data_kw):
         kw = "data-{}".format(data_kw)
         kw = "data-{}".format(data_kw)
         return(
         return(
-            soup.find('div', class_='tweet', attrs={kw: True})[kw]
+            soup.find('div', class_=div_class, attrs={kw: True})[kw]
         )
         )
 
 
     @staticmethod
     @staticmethod
@@ -65,12 +79,37 @@ class Tweet:
         except TypeError:
         except TypeError:
             return None
             return None
 
 
+    @classmethod
+    def extract_retweet_id(cls, soup):
+        try:
+            return int(cls._extract_from_div_tweet(soup, 'retweet-id'))
+        except TypeError:
+            return None
+
+    @classmethod
+    def extract_reacted_id(cls, soup):
+        try:
+            return int(cls._extract_from_div(soup, 'QuoteTweet-innerContainer', 'item-id'))
+        except TypeError:
+            return None
+
+    @classmethod
+    def extract_reacted_user_id(cls, soup):
+        try:
+            return int(cls._extract_from_div(soup, 'QuoteTweet-innerContainer', 'user-id'))
+        except TypeError:
+            return None
+
     @staticmethod
     @staticmethod
     def extract_timestamp(soup):
     def extract_timestamp(soup):
         return datetime.utcfromtimestamp(
         return datetime.utcfromtimestamp(
             int(soup.find('span', attrs={'data-time': True})['data-time'])
             int(soup.find('span', attrs={'data-time': True})['data-time'])
         )
         )
 
 
+    @classmethod
+    def extract_permalink(cls, soup):
+        return cls._extract_from_div_tweet(soup, 'permalink-path')
+
     @staticmethod
     @staticmethod
     def extract_fullname(soup):
     def extract_fullname(soup):
         return soup.find('strong', 'fullname').text
         return soup.find('strong', 'fullname').text