Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions nexus/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ def __init__(self):
self.utils = Utils()
self.atlas = AtlasClient()

@staticmethod
def _safe_log_str(s, max_len=128):
"""Sanitize strings for safe logging: escape newlines, limit length, remove dangerous chars."""
if not isinstance(s, str):
s = str(s)

# Escape common log-breaking characters
s = s.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
# Truncate to maximum length
if len(s) > max_len:
s = s[:max_len] + '...'
# Remove other potentially problematic control characters
s = ''.join(ch if 32 <= ord(ch) < 127 else '\\x{:02x}'.format(ord(ch)) for ch in s)
return s

def create_social_profile_tweepy(self, map_name: str, map_description: str, users: List[str], outdir: str):
"""Create social profile with tweepy as tweet source

Expand Down Expand Up @@ -69,13 +84,14 @@ def create_social_profile_sns(self,

for user in tqdm(users):
try:
logger.info(f"Loading {user}'s tweets from disk")
logger.info(f"Loading {self._safe_log_str(user)}'s tweets from disk")
data_path = os.path.join(outdir, f"{user}_tweets.jsonl")
with jsonlines.open(data_path, mode="r") as tweets:
for tweet in tweets:
all_tweets.append(tweet)
except BaseException:
logger.info(f"Not on disk! scraping {users}'s tweets now")
users_str = ','.join(self._safe_log_str(u) for u in users)
logger.info(f"Not on disk! scraping {self._safe_log_str(user)}'s tweets now (users list: [{users_str}])")
tweets = self.utils.user_lookup_sns(user, 10000)
with jsonlines.open(f'{outdir}/{user}_tweets.jsonl', mode='a') as writer:
for idx, tweet in enumerate(tweets):
Expand Down Expand Up @@ -144,4 +160,4 @@ def create_social_profile_sns(self,
map_description="A social profile of the latest POTUS Joe Biden, with Nomic's text embedder created by Yuvanesh Anand",
users=["JoeBiden", "POTUS"],
topics=True,
embedding_path="embeddings/JoeBiden.npy")
embedding_path="embeddings/JoeBiden.npy")