theenglishway (time) преди 7 години
родител
ревизия
702937e7d3
променени са 4 файла, в които са добавени 49 реда и са изтрити 38 реда
  1. 13 1
      tests/conftest.py
  2. 4 9
      tests/test_parser.py
  3. 2 0
      twhatter/parser/__init__.py
  4. 30 28
      twhatter/parser/tweet.py

+ 13 - 1
tests/conftest.py

@@ -5,7 +5,7 @@ from click.testing import CliRunner
 from bs4 import BeautifulSoup
 
 from twhatter.api import ApiUser
-from twhatter.parser import TweetList
+from twhatter.parser import tweet_factory
 from typing import NamedTuple, List
 
 @pytest.fixture
@@ -46,6 +46,7 @@ class TweetInfo(NamedTuple):
     reacted_user_id: int = None
     link_to: str = None
 
+
 @pytest.fixture(scope="session")
 def tweet_collection():
     return {
@@ -133,4 +134,15 @@ def raw_tweet_factory(raw_html_user_initial_page_factory):
         user_page = tweet_info.retweeter or tweet_info.screen_name
         soup = raw_html_user_initial_page_factory(user_page)
         return soup.find(id="stream-item-tweet-{}".format(tweet_info.id))
+
     return _raw_tweet_factory
+
+
+@pytest.fixture(scope="session")
+def tweet_test_data_factory(raw_tweet_factory, tweet_collection):
+    def _tweet_test_data_factory(tweet_type):
+        tweet_info = tweet_collection[tweet_type]
+        raw_tweet = raw_tweet_factory(tweet_info)
+        return tweet_factory(raw_tweet), tweet_info
+
+    return _tweet_test_data_factory

+ 4 - 9
tests/test_parser.py

@@ -25,11 +25,8 @@ class TestTweet:
     ]
 
     @pytest.mark.parametrize("tweet_type", all_types)
-    def test_tweet(self, raw_tweet_factory, tweet_collection, tweet_type):
-        tweet_info = tweet_collection[tweet_type]
-        raw = raw_tweet_factory(tweet_info)
-        t = TweetBase.extract(raw)
-        assert t
+    def test_tweet(self, tweet_test_data_factory, tweet_type):
+        t, tweet_info = tweet_test_data_factory(tweet_type)
 
         for field, value in tweet_info._asdict().items():
             # It would be rather complicated to keep some test fixtures values
@@ -45,8 +42,6 @@ class TestTweet:
         ('with_link', TweetLink),
         ('retweet', TweetRetweet)
     ])
-    def test_tweet_type(self, raw_tweet_factory, tweet_collection, tweet_type, expected_class):
-        tweet_info = tweet_collection[tweet_type]
-        raw = raw_tweet_factory(tweet_info)
-        t = TweetBase.extract(raw)
+    def test_tweet_type(self, tweet_test_data_factory, tweet_type, expected_class):
+        t, tweet_info = tweet_test_data_factory(tweet_type)
         assert isinstance(t, expected_class)

+ 2 - 0
twhatter/parser/__init__.py

@@ -1,9 +1,11 @@
 from .tweet import (TweetList, TweetBase,
+                    tweet_factory,
                     TweetTextOnly, TweetLink, TweetReaction, TweetRetweet)
 
 __all__= [
     "TweetList",
     "TweetBase",
+    "tweet_factory",
     "TweetTextOnly",
     "TweetLink",
     "TweetReaction",

+ 30 - 28
twhatter/parser/tweet.py

@@ -46,11 +46,11 @@ class TweetBase:
     #: The soup extracted from the raw HTML
     soup: InitVar[BeautifulSoup] = None
 
-    def __post_init__(self, soup):
+    def __post_init__(self, soup: BeautifulSoup):
         self.soup = soup
 
     @staticmethod
-    def condition(kwargs):
+    def condition(kwargs: dict) -> bool:
         raise NotImplementedError()
 
     @staticmethod
@@ -184,30 +184,6 @@ class TweetBase:
     def extract_soup(soup):
         return soup
 
-    @classmethod
-    def extract(cls, soup):
-        def _extract_value(field):
-            fn = getattr(cls, "extract_{}".format(field.name), None)
-            if not fn:
-                raise NotImplementedError(
-                    "Extract function for field '{}' is not "
-                    "implemented".format(field.name)
-                )
-
-            return fn(soup)
-
-        kwargs = {f.name: _extract_value(f) for f in fields(cls)}
-
-        for kls in cls.__subclasses__():
-            try:
-                print(kls)
-                if kls.condition(kwargs):
-                    return kls(soup=soup, **kwargs)
-            except NotImplementedError:
-                continue
-        else:
-            return TweetTextOnly(soup=soup, **kwargs)
-
 
 class TweetTextOnly(TweetBase):
     """An original tweet with only plain text"""
@@ -217,7 +193,6 @@ class TweetLink(TweetBase):
     """An original tweet with a link"""
     @staticmethod
     def condition(kwargs):
-        print(kwargs)
         return kwargs['link_to']
 
 
@@ -235,6 +210,33 @@ class TweetReaction(TweetBase):
         return kwargs['reacted_id']
 
 
+def tweet_factory(soup: BeautifulSoup) -> TweetBase:
+    """
+    :param soup: the soup extracted from the raw html for that tweet
+    :return: a well-formatted Tweet
+    """
+    def _extract_value(data_field):
+        fn = getattr(TweetBase, "extract_{}".format(data_field.name), None)
+        if not fn:
+            raise NotImplementedError(
+                "Extract function for field '{}' is not "
+                "implemented".format(data_field.name)
+            )
+
+        return fn(soup)
+
+    kwargs = {f.name: _extract_value(f) for f in fields(TweetBase)}
+
+    for kls in TweetBase.__subclasses__():
+        try:
+            if kls.condition(kwargs):
+                return kls(soup=soup, **kwargs)
+        except NotImplementedError:
+            continue
+    else:
+        return TweetTextOnly(soup=soup, **kwargs)
+
+
 class TweetList:
     def __init__(self, soup):
         self.raw_tweets = soup.find_all('li', 'stream-item')
@@ -244,7 +246,7 @@ class TweetList:
             # Don't know what this u-dir stuff is about but if it's in there,
             # it's not a tweet !
             if not tweet.find_all('p', class_="u-dir"):
-                yield TweetBase.extract(tweet)
+                yield tweet_factory(tweet)
 
     def __len__(self):
         return len(self.raw_tweets)