From 5d04c5aa5f141e17854e99dead64f56d937599c9 Mon Sep 17 00:00:00 2001 From: "pensarappstaging[bot]" <187318418+pensarappstaging[bot]@users.noreply.github.com> Date: Fri, 9 May 2025 16:29:29 +0000 Subject: [PATCH] Fix 2 security issues: 1. Unsanitized User Input in Log Statement Enabling Log Injection (CWE-117) 2. Unbounded Tweet Scraping Resource Exhaustion (CWE-400) --- nexus/utils.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/nexus/utils.py b/nexus/utils.py index 852482b..00e0bf2 100644 --- a/nexus/utils.py +++ b/nexus/utils.py @@ -49,11 +49,21 @@ def user_lookup_sns(self, user: str, quantity: int): :param quantity: last x tweets needed, chronologically :param user: twitter handle of user """ + MAX_TWEETS = 5000 # hard limit to prevent resource abuse + if not isinstance(quantity, int): + raise ValueError("quantity must be an integer") + if quantity <= 0: + raise ValueError("quantity must be > 0") + if quantity > MAX_TWEETS: + raise ValueError(f"Requested quantity {quantity} exceeds allowed maximum ({MAX_TWEETS})") + query: List[Dict] = [] - logger.info(f"Pulling {user}'s tweets") + # Sanitize user input before logging to prevent log injection + sanitized_user = user.replace('\n', ' ').replace('\r', ' ') + logger.info(f"Pulling {sanitized_user}'s tweets (max {quantity})") for idx, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(f'from:{user}').get_items())): - if idx > quantity: + if idx >= quantity: break query.append({"full_text": tweet.content, "tweet_link": f"https://twitter.com/{tweet.user.username}/status/{tweet.id}" , "created_at": tweet.date, "tweet_id": tweet.id, "user": tweet.user.username}) @@ -133,6 +143,4 @@ def create_topics(self, documents: List[Dict], id_to_cluster_label: Dict, id_fie bot = Utils() lookup = bot.user_lookup_sns("JoeBiden", 5000) print(len(lookup)) - print(lookup[-1]) - - + print(lookup[-1]) \ No newline at end of file