Преглед изворни кода

Add CLI command for user profile

Also fix profile's parsing
theenglishway (time) пре 7 година
родитељ
комит
3d9819e173
7 измењених фајлова са 65 додато и 18 уклоњено
  1. 7 0
      README.rst
  2. 10 5
      tests/conftest.py
  3. 8 0
      tests/test_cli.py
  4. 8 2
      tests/test_parser.py
  5. 9 1
      twhatter/cli.py
  6. 22 9
      twhatter/client.py
  7. 1 1
      twhatter/parser/user.py

+ 7 - 0
README.rst

@@ -31,6 +31,13 @@ Display some user's tweets
     <TweetRetweet (id=1083049664021233664, date=2019-01-09 17:15:36, likes=64770, likes=21099, likes=7818)>
     <TweetRetweet (id=1083148367184781312, date=2019-01-09 23:47:49, likes=75514, likes=21966, likes=6145)>
 
+Display their profile information
+
+..highlight: shell
+
+    $ twhatter profile realDonaldTrump
+    User(id=25073877, screen_name='Donald J. Trump', join_date=datetime.datetime(2009, 3, 18, 0, 0), tweets_nb=40183, following_nb=45, followers_nb=57144827, likes_nb=7)
+
 Put them into a local database (by default in /tmp/db.sqlite)
 
 ..highlight: shell

+ 10 - 5
tests/conftest.py

@@ -123,7 +123,7 @@ def tweet_collection():
 def raw_html_user_initial_page_factory():
     def _raw_html_user_initial_page(user):
         a = ClientTimeline(user)
-        response = a.get_initial()
+        response = a.get_user_timeline(user)
         return BeautifulSoup(response.text, "lxml")
     return _raw_html_user_initial_page
 
@@ -158,10 +158,10 @@ class UserInfo(NamedTuple):
     id: int
     screen_name: str
     join_date: datetime
-    tweets_nb: int
-    following_nb: int
-    followers_nb: int
-    likes_nb: int
+    tweets_nb: int = None
+    following_nb: int = None
+    followers_nb: int = None
+    likes_nb: int = None
 
 
 @pytest.fixture(scope="session")
@@ -176,4 +176,9 @@ def user_collection():
             followers_nb=81,
             likes_nb=4
         ),
+        'the_english_way': UserInfo(
+            id=943804775942033408,
+            screen_name="theenglishway",
+            join_date=datetime(2017, 12, 21, 0, 0),
+        ),
     }

+ 8 - 0
tests/test_cli.py

@@ -43,6 +43,14 @@ class TestMain:
         lines = result.output.split('\n')[:-1]
         assert len(lines) == tweet_limit
 
+    @pytest.mark.send_request
+    def test_profile(self, cli_runner, user_prolific, tweet_limit):
+        result = cli_runner.invoke(
+            cli.main,
+            ['profile', user_prolific]
+        )
+        assert result.exit_code == 0
+
 
 class TestDb:
     @pytest.mark.send_request

+ 8 - 2
tests/test_parser.py

@@ -49,7 +49,8 @@ class TestTweet:
 
 class TestUser:
     all_handles = [
-        "Marlene_beadles"
+        "Marlene_beadles",
+        "the_english_way"
     ]
 
     @pytest.mark.parametrize("user_handle", all_handles)
@@ -59,4 +60,9 @@ class TestUser:
         user = user_factory(raw_user)
 
         for field, value in user_info._asdict().items():
-            assert getattr(user, field) == value
+            # It would be rather complicated to keep some test fixtures values
+            # accurate (e.g. number of likes, retweets, ...) so for most
+            # of them, the expected values are not set on purpose and therefore
+            # not tested
+            if value is not None:
+                assert getattr(user, field) == value

+ 9 - 1
twhatter/cli.py

@@ -5,7 +5,7 @@
 import click
 import IPython
 
-from twhatter.client import ClientTimeline
+from twhatter.client import ClientTimeline, ClientProfile
 from twhatter.output import Database, Tweet
 
 
@@ -29,6 +29,14 @@ def timeline(limit, user):
         click.echo(t)
 
 
+@main.command()
+@click.argument('user')
+def profile(user):
+    """Get basic info about some user"""
+    p = ClientProfile(user)
+    click.echo(p.user)
+
+
 @main.group()
 @click.option('-d', '--db_url', type=str, default="sqlite:////tmp/db.sqlite3", show_default=True)
 @click.pass_context

+ 22 - 9
twhatter/client.py

@@ -2,7 +2,7 @@ import requests
 from random import choice
 from bs4 import BeautifulSoup
 
-from twhatter.parser import TweetList
+from twhatter.parser import TweetList, user_factory
 import json
 
 
@@ -15,6 +15,17 @@ class Client():
         'Mozilla/5.0 (Windows NT 5.2; RW; rv:7.0a1) Gecko/20091211 SeaMonkey/9.23a1pre'
     ]
 
+    @classmethod
+    def get_user_timeline(cls, user_handle):
+        url = "https://twitter.com/{}".format(user_handle)
+        return requests.get(
+            url,
+            headers={
+                'User-Agent': choice(cls.HEADERS_LIST),
+                'Accept-Language': 'en'
+            }
+        )
+
 
 class ClientTimeline(Client):
     """Access and explore some user's timeline"""
@@ -24,13 +35,6 @@ class ClientTimeline(Client):
         self.nb_tweets = 0
         self.limit = limit
 
-    def get_initial(self):
-        url = "https://twitter.com/{}".format(self.user)
-        return requests.get(
-            url,
-            headers={'User-Agent': choice(self.HEADERS_LIST), 'Accept-Language': 'en'}
-        )
-
     def get_more_tweets(self):
         return requests.get(
             "https://twitter.com/i/profiles/show/{}/timeline/tweets".format(self.user),
@@ -44,7 +48,7 @@ class ClientTimeline(Client):
         )
 
     def __iter__(self):
-        tweets = self.get_initial()
+        tweets = self.get_user_timeline(self.user)
         soup = BeautifulSoup(tweets.text, "lxml")
         t_list = TweetList(soup)
 
@@ -67,3 +71,12 @@ class ClientTimeline(Client):
                 self.earliest_tweet = t.id
                 self.nb_tweets += 1
 
+
+class ClientProfile(Client):
+    """Get profile information about an user"""
+    def __init__(self, user_handle):
+        self.user_handle = user_handle
+        user_page = self.get_user_timeline(user_handle)
+        soup = BeautifulSoup(user_page.text, "lxml")
+
+        self.user = user_factory(soup)

+ 1 - 1
twhatter/parser/user.py

@@ -48,7 +48,7 @@ class User(ExtractableMixin):
         # The date is in a weird format (e.g. "7:27 AM - 8 May 2011") and we
         # don't really care for the exact hour so we only keep the date
         day_str = datetime_str.split(' - ')[1]
-        return datetime.strptime(day_str, '%d %B %Y')
+        return datetime.strptime(day_str, '%d %b %Y')
 
     @classmethod
     def extract_tweets_nb(cls, soup):