From efbcda30c90d41a402d7ba2aa18ad0b6fe969bc1 Mon Sep 17 00:00:00 2001 From: "pensarappdev[bot]" <182706286+pensarappdev[bot]@users.noreply.github.com> Date: Wed, 7 May 2025 14:20:51 +0000 Subject: [PATCH] Fix security issue: Unsanitized User Input in Log Messages Enabling Log Injection (CWE-117) --- nexus/profile.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/nexus/profile.py b/nexus/profile.py index bceb62d..dcf3471 100644 --- a/nexus/profile.py +++ b/nexus/profile.py @@ -19,6 +19,21 @@ def __init__(self): self.utils = Utils() self.atlas = AtlasClient() + @staticmethod + def _safe_log_str(s, max_len=128): + """Sanitize strings for safe logging: escape newlines, limit length, remove dangerous chars.""" + if not isinstance(s, str): + s = str(s) + + # Escape common log-breaking characters + s = s.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t') + # Truncate to maximum length + if len(s) > max_len: + s = s[:max_len] + '...' + # Remove other potentially problematic control characters + s = ''.join(ch if 32 <= ord(ch) < 127 else '\\x{:02x}'.format(ord(ch)) for ch in s) + return s + def create_social_profile_tweepy(self, map_name: str, map_description: str, users: List[str], outdir: str): """Create social profile with tweepy as tweet source @@ -69,13 +84,14 @@ def create_social_profile_sns(self, for user in tqdm(users): try: - logger.info(f"Loading {user}'s tweets from disk") + logger.info(f"Loading {self._safe_log_str(user)}'s tweets from disk") data_path = os.path.join(outdir, f"{user}_tweets.jsonl") with jsonlines.open(data_path, mode="r") as tweets: for tweet in tweets: all_tweets.append(tweet) except BaseException: - logger.info(f"Not on disk! scraping {users}'s tweets now") + users_str = ','.join(self._safe_log_str(u) for u in users) + logger.info(f"Not on disk! scraping {self._safe_log_str(user)}'s tweets now (users list: [{users_str}])") tweets = self.utils.user_lookup_sns(user, 10000) with jsonlines.open(f'{outdir}/{user}_tweets.jsonl', mode='a') as writer: for idx, tweet in enumerate(tweets): @@ -144,4 +160,4 @@ def create_social_profile_sns(self, map_description="A social profile of the latest POTUS Joe Biden, with Nomic's text embedder created by Yuvanesh Anand", users=["JoeBiden", "POTUS"], topics=True, - embedding_path="embeddings/JoeBiden.npy") + embedding_path="embeddings/JoeBiden.npy") \ No newline at end of file