Sfoglia il codice sorgente

Add logging and CLI option to setup logging level

theenglishway (time) 7 anni fa
parent
commit
a9f7644c97
6 ha cambiato i file con 85 aggiunte e 11 eliminazioni
  1. 6 1
      twhatter/cli.py
  2. 14 4
      twhatter/client.py
  3. 44 0
      twhatter/log.py
  4. 7 4
      twhatter/parser/media.py
  5. 7 1
      twhatter/parser/tweet.py
  6. 7 1
      twhatter/parser/user.py

+ 6 - 1
twhatter/cli.py

@@ -7,11 +7,16 @@ import IPython
 
 from twhatter.client import ClientTimeline, ClientProfile
 from twhatter.output.sqlalchemy import Database, Tweet, User
+from twhatter.log import log_setup
 
 
 @click.group()
+@click.option('-v', '--verbosity',
+              type=click.Choice(['none', 'info', 'debug', 'verbose']),
+              default='info', show_default=True)
 @click.pass_context
-def main(ctx):
+def main(ctx, verbosity):
+    log_setup(verbosity)
     ctx.ensure_object(dict)
 
 

+ 14 - 4
twhatter/client.py

@@ -4,6 +4,10 @@ from user_agent import generate_user_agent
 
 from twhatter.parser import TweetList, user_factory
 import json
+import logging
+
+
+logger = logging.getLogger(__name__)
 
 
 class Client():
@@ -11,6 +15,7 @@ class Client():
 
     @classmethod
     def get_user_timeline(cls, user_handle):
+        logger.info("Loading initial timeline for {}".format(user_handle))
         url = "https://twitter.com/{}".format(user_handle)
         return requests.get(
             url,
@@ -29,7 +34,14 @@ class ClientTimeline(Client):
         self.nb_tweets = 0
         self.limit = limit
 
+    def _update_state(self, earliest_tweet):
+        self.earliest_tweet = earliest_tweet.id
+        self.nb_tweets += 1
+
     def get_more_tweets(self):
+        logger.info(
+            "Loading more tweets from {} ({})".format(self.user, self.nb_tweets)
+        )
         return requests.get(
             "https://twitter.com/i/profiles/show/{}/timeline/tweets".format(self.user),
             params= dict(
@@ -48,8 +60,7 @@ class ClientTimeline(Client):
 
         for t in t_list:
             yield t
-            self.earliest_tweet = t.id
-            self.nb_tweets += 1
+            self._update_state(t)
 
         while True and self.nb_tweets < self.limit:
             more_tweets = self.get_more_tweets()
@@ -62,8 +73,7 @@ class ClientTimeline(Client):
 
             for t in t_list:
                 yield t
-                self.earliest_tweet = t.id
-                self.nb_tweets += 1
+                self._update_state(t)
 
 
 class ClientProfile(Client):

+ 44 - 0
twhatter/log.py

@@ -0,0 +1,44 @@
+import logging.config
+
+
+LOGGING = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(levelname)s:%(name)s: %(message)s ',
+            'datefmt': "%Y-%m-%d %H:%M:%S",
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            'formatter': 'standard',
+            'class': 'logging.StreamHandler',
+        }
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console'],
+            'level': 'DEBUG',
+        },
+    }
+}
+
+def log_setup(verbosity):
+    logging.getLogger('urllib3').setLevel(logging.WARNING)
+
+    if verbosity == 'verbose':
+        logging.getLogger('twhatter.client').setLevel(logging.DEBUG)
+        logging.getLogger('twhatter.parser').setLevel(logging.DEBUG)
+    elif verbosity == 'debug':
+        logging.getLogger('twhatter.client').setLevel(logging.DEBUG)
+        logging.getLogger('twhatter.parser').setLevel(logging.INFO)
+    elif verbosity == 'info':
+        logging.getLogger('twhatter.client').setLevel(logging.INFO)
+        logging.getLogger('twhatter.parser').setLevel(logging.INFO)
+    elif verbosity == 'none':
+        logging.getLogger('twhatter.client').setLevel(logging.WARNING)
+        logging.getLogger('twhatter.parser').setLevel(logging.WARNING)
+
+    logging.config.dictConfig(LOGGING)

+ 7 - 4
twhatter/parser/media.py

@@ -1,4 +1,4 @@
-from datetime import datetime
+import logging
 
 from bs4 import BeautifulSoup
 from dataclasses import dataclass, fields, InitVar, field
@@ -7,6 +7,9 @@ from typing import List, Optional
 from .mixins import ExtractableMixin
 
 
+logger = logging.getLogger(__name__)
+
+
 @dataclass
 class MediaBase(ExtractableMixin):
     #: Links to images contained in the media
@@ -29,8 +32,6 @@ class MediaBase(ExtractableMixin):
     #: The soup extracted from the raw HTML
     soup: InitVar[BeautifulSoup] = None
 
-    #https: // pbs.twimg.com / media / DxNof6AXQAAu2oU.jpg
-
 
 class MediaImage(MediaBase):
     @staticmethod
@@ -51,7 +52,9 @@ def media_factory(soup: BeautifulSoup) -> Optional[MediaBase]:
     for kls in MediaBase.__subclasses__():
         try:
             if kls.condition(kwargs):
-                return kls(soup=soup, **kwargs)
+                m = kls(soup=soup, **kwargs)
+                logger.debug("Parsed media {}".format(m))
+                return m
         except NotImplementedError:
             continue
 

+ 7 - 1
twhatter/parser/tweet.py

@@ -1,3 +1,4 @@
+import logging
 from datetime import datetime
 
 from bs4 import BeautifulSoup
@@ -8,6 +9,9 @@ from .mixins import ExtractableMixin
 from .media import MediaBase, media_factory
 
 
+logger = logging.getLogger(__name__)
+
+
 @dataclass
 class TweetBase(ExtractableMixin):
     #: Tweet ID
@@ -234,7 +238,9 @@ class TweetList:
             # Don't know what this u-dir stuff is about but if it's in there,
             # it's not a tweet !
             if not tweet.find_all('p', class_="u-dir"):
-                yield tweet_factory(tweet)
+                t = tweet_factory(tweet)
+                logger.debug("Parsed tweet {}".format(t))
+                yield t
 
     def __len__(self):
         return len(self.raw_tweets)

+ 7 - 1
twhatter/parser/user.py

@@ -1,3 +1,4 @@
+import logging
 from datetime import datetime
 
 from bs4 import BeautifulSoup
@@ -6,6 +7,9 @@ from dataclasses import dataclass, fields, InitVar
 from .mixins import ExtractableMixin
 
 
+logger = logging.getLogger(__name__)
+
+
 @dataclass
 class User(ExtractableMixin):
     id: int
@@ -71,4 +75,6 @@ def user_factory(soup: BeautifulSoup) -> User:
     kwargs = {
         f.name: User._extract_value(soup, f) for f in fields(User)
     }
-    return User(**kwargs)
+    u = User(**kwargs)
+    logger.debug("Parsed user {}".format(u))
+    return u