123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # -*- coding: utf-8 -*-
- # Natural Language Toolkit: Twitter API
- #
- # Copyright (C) 2001-2019 NLTK Project
- # Author: Ewan Klein <ewan@inf.ed.ac.uk>
- # Lorenzo Rubio <lrnzcig@gmail.com>
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- """
- This module provides an interface for TweetHandlers, and support for timezone
- handling.
- """
- import time as _time
- from abc import ABCMeta, abstractmethod
- from datetime import tzinfo, timedelta, datetime
- from six import add_metaclass
- from nltk.compat import UTC
- class LocalTimezoneOffsetWithUTC(tzinfo):
- """
- This is not intended to be a general purpose class for dealing with the
- local timezone. In particular:
- * it assumes that the date passed has been created using
- `datetime(..., tzinfo=Local)`, where `Local` is an instance of
- the object `LocalTimezoneOffsetWithUTC`;
- * for such an object, it returns the offset with UTC, used for date comparisons.
- Reference: https://docs.python.org/3/library/datetime.html
- """
- STDOFFSET = timedelta(seconds=-_time.timezone)
- if _time.daylight:
- DSTOFFSET = timedelta(seconds=-_time.altzone)
- else:
- DSTOFFSET = STDOFFSET
- def utcoffset(self, dt):
- """
- Access the relevant time offset.
- """
- return self.DSTOFFSET
- LOCAL = LocalTimezoneOffsetWithUTC()
- @add_metaclass(ABCMeta)
- class BasicTweetHandler(object):
- """
- Minimal implementation of `TweetHandler`.
- Counts the number of Tweets and decides when the client should stop
- fetching them.
- """
- def __init__(self, limit=20):
- self.limit = limit
- self.counter = 0
- """
- A flag to indicate to the client whether to stop fetching data given
- some condition (e.g., reaching a date limit).
- """
- self.do_stop = False
- """
- Stores the id of the last fetched Tweet to handle pagination.
- """
- self.max_id = None
- def do_continue(self):
- """
- Returns `False` if the client should stop fetching Tweets.
- """
- return self.counter < self.limit and not self.do_stop
- class TweetHandlerI(BasicTweetHandler):
- """
- Interface class whose subclasses should implement a handle method that
- Twitter clients can delegate to.
- """
- def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None):
- """
- :param int limit: The number of data items to process in the current\
- round of processing.
- :param tuple upper_date_limit: The date at which to stop collecting\
- new data. This should be entered as a tuple which can serve as the\
- argument to `datetime.datetime`.\
- E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015.
- :param tuple lower_date_limit: The date at which to stop collecting\
- new data. See `upper_data_limit` for formatting.
- """
- BasicTweetHandler.__init__(self, limit)
- self.upper_date_limit = None
- self.lower_date_limit = None
- if upper_date_limit:
- self.upper_date_limit = datetime(*upper_date_limit, tzinfo=LOCAL)
- if lower_date_limit:
- self.lower_date_limit = datetime(*lower_date_limit, tzinfo=LOCAL)
- self.startingup = True
- @abstractmethod
- def handle(self, data):
- """
- Deal appropriately with data returned by the Twitter API
- """
- @abstractmethod
- def on_finish(self):
- """
- Actions when the tweet limit has been reached
- """
- def check_date_limit(self, data, verbose=False):
- """
- Validate date limits.
- """
- if self.upper_date_limit or self.lower_date_limit:
- date_fmt = '%a %b %d %H:%M:%S +0000 %Y'
- tweet_date = datetime.strptime(data['created_at'], date_fmt).replace(
- tzinfo=UTC
- )
- if (self.upper_date_limit and tweet_date > self.upper_date_limit) or (
- self.lower_date_limit and tweet_date < self.lower_date_limit
- ):
- if self.upper_date_limit:
- message = "earlier"
- date_limit = self.upper_date_limit
- else:
- message = "later"
- date_limit = self.lower_date_limit
- if verbose:
- print(
- "Date limit {0} is {1} than date of current tweet {2}".format(
- date_limit, message, tweet_date
- )
- )
- self.do_stop = True
|