client.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from user_agent import generate_user_agent
  4. from twhatter.parser import TweetList, user_factory
  5. import json
  6. import logging
  7. logger = logging.getLogger(__name__)
  8. class Client():
  9. user_agent = generate_user_agent(os='linux')
  10. @classmethod
  11. def get_user_timeline(cls, user_handle):
  12. logger.info("Loading initial timeline for {}".format(user_handle))
  13. url = "https://twitter.com/{}".format(user_handle)
  14. return requests.get(
  15. url,
  16. headers={
  17. 'User-Agent': cls.user_agent,
  18. 'Accept-Language': 'en'
  19. }
  20. )
  21. class ClientTimeline(Client):
  22. """Access and explore some user's timeline"""
  23. def __init__(self, user, limit=100):
  24. self.user = user
  25. self.earliest_tweet = None
  26. self.nb_tweets = 0
  27. self.limit = limit
  28. def _update_state(self, earliest_tweet):
  29. self.earliest_tweet = earliest_tweet.id
  30. self.nb_tweets += 1
  31. def get_more_tweets(self):
  32. logger.info(
  33. "Loading more tweets from {} ({})".format(self.user, self.nb_tweets)
  34. )
  35. return requests.get(
  36. "https://twitter.com/i/profiles/show/{}/timeline/tweets".format(self.user),
  37. params= dict(
  38. include_available_features=1,
  39. include_entities=1,
  40. max_position=self.earliest_tweet,
  41. reset_error_state=False
  42. ),
  43. headers={'User-Agent': self.user_agent}
  44. )
  45. def __iter__(self):
  46. tweets = self.get_user_timeline(self.user)
  47. soup = BeautifulSoup(tweets.text, "lxml")
  48. t_list = TweetList(soup)
  49. for t in t_list:
  50. yield t
  51. self._update_state(t)
  52. while True and self.nb_tweets < self.limit:
  53. more_tweets = self.get_more_tweets()
  54. html = json.loads(more_tweets.content)
  55. soup = BeautifulSoup(html['items_html'], "lxml")
  56. t_list = TweetList(soup)
  57. if len(t_list) == 0:
  58. break
  59. for t in t_list:
  60. yield t
  61. self._update_state(t)
  62. class ClientProfile(Client):
  63. """Get profile information about an user"""
  64. def __init__(self, user_handle):
  65. self.user_handle = user_handle
  66. user_page = self.get_user_timeline(user_handle)
  67. soup = BeautifulSoup(user_page.text, "lxml")
  68. self.user = user_factory(soup)