From 76b80eccea1a55564b01d48ec872e5977e8fe0a6 Mon Sep 17 00:00:00 2001 From: "pensarappstaging[bot]" <187318418+pensarappstaging[bot]@users.noreply.github.com> Date: Thu, 8 May 2025 20:10:53 +0000 Subject: [PATCH] Fix security issue: Unsanitized User Input in Log Message Interpolation (CWE-117) --- nexus/utils.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/nexus/utils.py b/nexus/utils.py index 852482b..4f260ed 100644 --- a/nexus/utils.py +++ b/nexus/utils.py @@ -25,6 +25,20 @@ def __init__(self): self.api = tweepy.API(auth) + def _sanitize_for_log(self, value: str) -> str: + """ + Neutralize a string for safe output in logs: + - Replace line breaks with visible placeholders. + - Remove other control characters. + """ + if not isinstance(value, str): + return "" + # Replace CR and LF with safe placeholders + value = value.replace("\n", "\\n").replace("\r", "\\r") + # Remove other ASCII control chars except tab (0x09) (optional) + value = ''.join(ch if (ord(ch) >= 32 or ch == '\t') else '?' for ch in value) + return value + def user_lookup_tweepy(self, user: str, quantity: int): """Obtain Tweets from a specific user.(only up to 3200 tweets) @@ -51,7 +65,7 @@ def user_lookup_sns(self, user: str, quantity: int): """ query: List[Dict] = [] - logger.info(f"Pulling {user}'s tweets") + logger.info(f"Pulling {self._sanitize_for_log(user)}'s tweets") for idx, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(f'from:{user}').get_items())): if idx > quantity: break @@ -133,6 +147,4 @@ def create_topics(self, documents: List[Dict], id_to_cluster_label: Dict, id_fie bot = Utils() lookup = bot.user_lookup_sns("JoeBiden", 5000) print(len(lookup)) - print(lookup[-1]) - - + print(lookup[-1]) \ No newline at end of file