From cf66e7797a28e5d1fd685a2c8e5b439628ab4f08 Mon Sep 17 00:00:00 2001 From: "pensarappdev[bot]" <182706286+pensarappdev[bot]@users.noreply.github.com> Date: Wed, 7 May 2025 17:54:16 +0000 Subject: [PATCH] Fix security issue: Unbounded Tweet Retrieval Resource Exhaustion (CWE-400) --- nexus/utils.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/nexus/utils.py b/nexus/utils.py index 852482b..88427be 100644 --- a/nexus/utils.py +++ b/nexus/utils.py @@ -14,6 +14,8 @@ load_dotenv() class Utils: + MAX_TWEETS = 3200 # Twitter API 'user_timeline' max + def __init__(self): api_key = os.getenv("API_KEY") api_secret = os.getenv("API_SECRET") @@ -29,14 +31,18 @@ def user_lookup_tweepy(self, user: str, quantity: int): """Obtain Tweets from a specific user.(only up to 3200 tweets) :param user: specified user you want to retrieve tweets from - :param quantity: amount of tweets you want to retrieve + :param quantity: amount of tweets you want to retrieve (1-3200 inclusive) """ + if not isinstance(quantity, int): + raise ValueError("quantity must be an integer") + if quantity < 1 or quantity > self.MAX_TWEETS: + raise ValueError(f"quantity must be between 1 and {self.MAX_TWEETS} (inclusive)") query: List[Dict] = [] tweets = tweepy.Cursor( self.api.user_timeline, screen_name=user, count=200, tweet_mode="extended" - ).items(quantity) # tweepy.Cursor allows for pagination due to the single request tweet limitations + ).items(quantity) for tweet in tweets: query.append(tweet._json) @@ -49,11 +55,15 @@ def user_lookup_sns(self, user: str, quantity: int): :param quantity: last x tweets needed, chronologically :param user: twitter handle of user """ + if not isinstance(quantity, int): + raise ValueError("quantity must be an integer") + if quantity < 1 or quantity > self.MAX_TWEETS: + raise ValueError(f"quantity must be between 1 and {self.MAX_TWEETS} (inclusive)") query: List[Dict] = [] logger.info(f"Pulling {user}'s tweets") for idx, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(f'from:{user}').get_items())): - if idx > quantity: + if idx >= quantity: break query.append({"full_text": tweet.content, "tweet_link": f"https://twitter.com/{tweet.user.username}/status/{tweet.id}" , "created_at": tweet.date, "tweet_id": tweet.id, "user": tweet.user.username}) @@ -133,6 +143,4 @@ def create_topics(self, documents: List[Dict], id_to_cluster_label: Dict, id_fie bot = Utils() lookup = bot.user_lookup_sns("JoeBiden", 5000) print(len(lookup)) - print(lookup[-1]) - - + print(lookup[-1]) \ No newline at end of file