diff --git a/nexus/utils.py b/nexus/utils.py index 852482b..88427be 100644 --- a/nexus/utils.py +++ b/nexus/utils.py @@ -14,6 +14,8 @@ load_dotenv() class Utils: + MAX_TWEETS = 3200 # Twitter API 'user_timeline' max + def __init__(self): api_key = os.getenv("API_KEY") api_secret = os.getenv("API_SECRET") @@ -29,14 +31,18 @@ def user_lookup_tweepy(self, user: str, quantity: int): """Obtain Tweets from a specific user.(only up to 3200 tweets) :param user: specified user you want to retrieve tweets from - :param quantity: amount of tweets you want to retrieve + :param quantity: amount of tweets you want to retrieve (1-3200 inclusive) """ + if not isinstance(quantity, int): + raise ValueError("quantity must be an integer") + if quantity < 1 or quantity > self.MAX_TWEETS: + raise ValueError(f"quantity must be between 1 and {self.MAX_TWEETS} (inclusive)") query: List[Dict] = [] tweets = tweepy.Cursor( self.api.user_timeline, screen_name=user, count=200, tweet_mode="extended" - ).items(quantity) # tweepy.Cursor allows for pagination due to the single request tweet limitations + ).items(quantity) for tweet in tweets: query.append(tweet._json) @@ -49,11 +55,15 @@ def user_lookup_sns(self, user: str, quantity: int): :param quantity: last x tweets needed, chronologically :param user: twitter handle of user """ + if not isinstance(quantity, int): + raise ValueError("quantity must be an integer") + if quantity < 1 or quantity > self.MAX_TWEETS: + raise ValueError(f"quantity must be between 1 and {self.MAX_TWEETS} (inclusive)") query: List[Dict] = [] logger.info(f"Pulling {user}'s tweets") for idx, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(f'from:{user}').get_items())): - if idx > quantity: + if idx >= quantity: break query.append({"full_text": tweet.content, "tweet_link": f"https://twitter.com/{tweet.user.username}/status/{tweet.id}" , "created_at": tweet.date, "tweet_id": tweet.id, "user": tweet.user.username}) @@ -133,6 +143,4 @@ def create_topics(self, documents: List[Dict], id_to_cluster_label: Dict, id_fie bot = Utils() lookup = bot.user_lookup_sns("JoeBiden", 5000) print(len(lookup)) - print(lookup[-1]) - - + print(lookup[-1]) \ No newline at end of file