Procházet zdrojové kódy

Extract info from tweets

theenglishway (time) před 7 roky
rodič
revize
088627a3ec
1 změnil soubory, kde provedl 48 přidání a 24 odebrání
  1. 48 24
      twhatter/parser/tweet.py

+ 48 - 24
twhatter/parser/tweet.py

@@ -1,52 +1,76 @@
 from datetime import datetime
 
 from bs4 import BeautifulSoup
-from dataclasses import dataclass, fields, field
+from dataclasses import dataclass, fields, InitVar, field
 
 
 @dataclass
 class Tweet:
-    #timestamp: datetime
-    user: str
-    #fullname: fullname
     id: int
-    #url: url
     timestamp: datetime
-    #text: str
-    #replies: int
+    user: str
+    replies: int
     retweets: int
-    #quoted_tweet: int
     likes: int
-    #html: str
-    #soup: Any
-    soup: BeautifulSoup = field(repr=False)
+    text: str = field(repr=False)
+    soup: InitVar[BeautifulSoup] = None
+
+    def __post_init__(self, soup):
+        self.soup = soup
+
+    @staticmethod
+    def _extract_data(soup, distinct_span, data_kw):
+        return (
+            soup.find('span', distinct_span)
+                .find('span', attrs={data_kw: True})
+            [data_kw]
+        )
 
     @staticmethod
     def extract_id(soup):
         return int(soup['data-item-id'])
 
+    @staticmethod
+    def extract_user(soup):
+        return soup.find('span', 'username').text
+
     @staticmethod
     def extract_timestamp(soup):
         return datetime.utcfromtimestamp(
             int(soup.find('span', '_timestamp')['data-time'])
         )
 
-    @staticmethod
-    def extract_user(soup):
-        return soup.find('span', 'username').text or ""
-
     @staticmethod
     def extract_fullname(soup):
-        return soup.find('strong', 'fullname').text or ""
+        return soup.find('strong', 'fullname').text
+
+    @classmethod
+    def extract_retweets(cls, soup):
+        return cls._extract_data(
+            soup,
+            'ProfileTweet-action--retweet',
+            'data-tweet-stat-count'
+        )
+
+    @classmethod
+    def extract_replies(cls, soup):
+        return cls._extract_data(
+            soup,
+            'ProfileTweet-action--reply',
+            'data-tweet-stat-count'
+        )
+
+    @classmethod
+    def extract_likes(cls, soup):
+        return cls._extract_data(
+            soup,
+            'ProfileTweet-action--favorite',
+            'data-tweet-stat-count'
+        )
 
     @staticmethod
-    def extract_retweets(soup):
-        return int(soup.find(
-                    'span', 'ProfileTweet-action--retweet u-hiddenVisually')
-                       .find(
-                    'span', 'ProfileTweet-actionCount'
-                    )['data-tweet-stat-count']
-                   )
+    def extract_text(soup):
+        return soup.find('p', 'tweet-text').text
 
     @staticmethod
     def extract_quoted_tweet(soup):
@@ -71,7 +95,7 @@ class Tweet:
             return fn(soup)
 
         kwargs = {f.name: _extract_value(f) for f in fields(cls)}
-        return cls(**kwargs)
+        return cls(soup=soup, **kwargs)
 
 
 class TweetList: