Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions nexus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ def __init__(self):

self.api = tweepy.API(auth)

def _sanitize_for_log(self, value: str) -> str:
"""
Neutralize a string for safe output in logs:
- Replace line breaks with visible placeholders.
- Remove other control characters.
"""
if not isinstance(value, str):
return "<not a str>"
# Replace CR and LF with safe placeholders
value = value.replace("\n", "\\n").replace("\r", "\\r")
# Remove other ASCII control chars except tab (0x09) (optional)
value = ''.join(ch if (ord(ch) >= 32 or ch == '\t') else '?' for ch in value)
return value

def user_lookup_tweepy(self, user: str, quantity: int):
"""Obtain Tweets from a specific user.(only up to 3200 tweets)

Expand All @@ -51,7 +65,7 @@ def user_lookup_sns(self, user: str, quantity: int):
"""
query: List[Dict] = []

logger.info(f"Pulling {user}'s tweets")
logger.info(f"Pulling {self._sanitize_for_log(user)}'s tweets")
for idx, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(f'from:{user}').get_items())):
if idx > quantity:
break
Expand Down Expand Up @@ -133,6 +147,4 @@ def create_topics(self, documents: List[Dict], id_to_cluster_label: Dict, id_fie
bot = Utils()
lookup = bot.user_lookup_sns("JoeBiden", 5000)
print(len(lookup))
print(lookup[-1])


print(lookup[-1])