forked from mmistakes/minimal-mistakes
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathweb.py
More file actions
283 lines (225 loc) · 9.5 KB
/
web.py
File metadata and controls
283 lines (225 loc) · 9.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
from collections import defaultdict
from webweb import Web
from pathlib import Path
import yaml
DATA_PATH = Path(__file__).parent.joinpath('_data')
PAPERS_PATH = DATA_PATH.joinpath('papers.yml')
PEOPLE_PATH = DATA_PATH.joinpath('people.yml')
CODE_PATH = DATA_PATH.joinpath('code.yml')
EXTRA_WEBWEB_PATH = DATA_PATH.joinpath('extra_webweb.yml')
WEBWEB_JSON_PATH = DATA_PATH.joinpath('index_web.json')
KIND_TO_COLOR_MAP = {
'collaborator': '#999999',
# 'collaborator': '#78C81F',
'lab member': '#E01E7B', # Pink for current lab members
'alumni': '#9d1557', # Yellow for alumni
'paper_scieco': '#8ebef1', # Pink for scieco papers
'paper_idepi': '#1C7BE0', # Blue for idepi papers
'paper_complex': '#14579f', # Grey for complex papers
'code_scieco': '#8ebef1', # Pink for scieco code
'code_idepi': '#1C7BE0', # Blue for idepi code
'code_complex': '#14579f', # Grey for complex code
}
def load_yaml(path):
return yaml.load(path.read_text(), Loader=yaml.FullLoader)
def clean_name(name):
if name.endswith('*') or name.endswith('.'):
name = name[:-1]
return name
# def people_to_aliases(all_people):
# aliases = dict()
# for person in all_people['people'] + all_people['alumni']:
# person_name = person['name']
# aliases[person_name] = person_name
# for alias in person.get('aliases', []):
# aliases[alias] = person_name
# return aliases
def people_to_aliases(all_people):
aliases = dict()
# Process current people
for person in all_people['people']:
person_name = person['name']
aliases[person_name] = person_name
for alias in person.get('aliases', []):
aliases[alias] = person_name
# Process alumni - now organized by category
alumni_data = all_people.get('alumni', {})
for category, people_list in alumni_data.items():
if isinstance(people_list, list):
for person in people_list:
person_name = person['name']
aliases[person_name] = person_name
for alias in person.get('aliases', []):
aliases[alias] = person_name
return aliases
def clean_all_names(data):
aliases = people_to_aliases(data['people'])
for category in data['papers']['categories']:
for i, paper in enumerate(category['pubs']):
for j, name in enumerate(paper['authors']):
name = clean_name(name)
name = aliases.get(name, name)
paper['authors'][j] = name
for project in data['code']['repos']:
for i, name in enumerate(project['authors']):
name = clean_name(name)
name = aliases.get(name, name)
project['authors'][i] = name
for item in data['extra']['projects']:
for i, person in enumerate(item.get('people', [])):
item['people'][i] = aliases.get(person, person)
# Process current people
for person in data['people']['people']:
name = clean_name(person['name'])
person['name'] = aliases.get(name, name)
# Process alumni - now organized by category
alumni_data = data['people'].get('alumni', {})
for category, people_list in alumni_data.items():
if isinstance(people_list, list):
for person in people_list:
name = clean_name(person['name'])
person['name'] = aliases.get(name, name)
def make_network(data):
nodes = defaultdict(dict)
edges = []
collaborator_connections = defaultdict(set) # To track unique connections per collaborator
dan = data['people']['people'][0]['name']
print(f"Main person: {dan}")
# Process papers - build nodes and count connections
for category in data['papers']['categories']:
for paper in category['pubs']:
title = paper['title']
paper_type = paper.get('type', 'scieco') # Default to scieco if no type
nodes[title] = {
'name': title,
'kind': f'paper_{paper_type}' # Use paper_scieco, paper_idepi, or paper_complex
}
if 'links' in paper:
nodes[title]['url'] = paper['links'][0]['url']
for name in paper['authors']:
if name == dan:
continue
# Add this paper to the collaborator's connections
collaborator_connections[name].add(title)
edges.append([title, name])
# Process code repos
for project in data['code']['repos']:
title = project['title']
code_type = project.get('type', 'scieco') # Default to scieco if no type
nodes[title] = {
'name': title,
'kind': f'code_{code_type}' # Use code_scieco, code_idepi, or code_complex
}
for name in project['authors']:
if name == dan:
continue
# Add this code project to the collaborator's connections
collaborator_connections[name].add(title)
edges.append([title, name])
# Process extra projects
for project in data['extra']['projects']:
project_name = project['name']
project_type = project.get('type', 'scieco') # Default to scieco if no type
nodes[project_name] = {
'name': project_name,
'kind': f'code_{project_type}' # Use code_scieco, code_idepi, or code_complex
}
if project.get('url'):
nodes[project_name]['url'] = project['url']
for name in project['people']:
if name == dan:
continue
# Add this project to the collaborator's connections
collaborator_connections[name].add(project_name)
edges.append([project_name, name])
# Set up collaborator nodes
for name, connections in collaborator_connections.items():
nodes[name]['name'] = name
nodes[name]['kind'] = 'collaborator'
# Add URL if available
for person in data['people'].get('collaborators', []):
if person['name'] == name and person.get('url'):
nodes[name]['url'] = person['url']
break
# Set up lab member nodes
for person in data['people']['people']:
name = person['name']
if name == dan:
continue
nodes[name]['name'] = name
nodes[name]['kind'] = 'lab member'
url = person.get('url')
if url and url != '/':
nodes[name]['url'] = url
# Set up alumni nodes
alumni_data = data['people'].get('alumni', {})
for category, people_list in alumni_data.items():
if isinstance(people_list, list):
for person in people_list:
name = person['name']
if name in nodes: # Only if they appear in collaborations
nodes[name]['kind'] = 'alumni'
url = person.get('url')
if url and url != '/':
nodes[name]['url'] = url
# Print connection counts for debugging
print("\nCollaborator connection counts:")
for name, connections in sorted(collaborator_connections.items()):
print(f"{name}: {len(connections)}")
# Set size based on connection count
for node in nodes:
kind = nodes[node]['kind']
if kind.startswith('paper_'): # Any paper type
size = 1.0 # Papers are larger
elif kind.startswith('code_'): # Any code type
size = 0.7 # Code/data contributions are smaller
elif kind == 'lab member':
size = 1.0 # Same size as high-frequency collaborators
elif kind == 'alumni':
size = 1.0 # Same size as lab members
elif kind == 'collaborator':
connection_count = len(collaborator_connections.get(node, set()))
if connection_count < 2: # One-off collaborators
size = 0.7 # Just a little smaller
elif connection_count < 5: # 2-4 papers
size = 0.85 # A little smaller
else: # 5+ papers
size = 1.0 # Just a little smaller than original
print(f"Setting size for {node} with {connection_count} connections: {size}")
nodes[node]['size'] = size
nodes[node]['color'] = KIND_TO_COLOR_MAP[kind]
web = Web(adjacency=edges, nodes=dict(nodes))
web.display.sizeBy = 'size'
web.display.colorBy = 'color'
web.display.hideMenu = True
web.display.showLegend = False
web.display.gravity = 0.7
web.display.width = 400
web.display.height = 400
web.display.scaleLinkOpacity = True
web.display.scaleLinkWidth = True
web.display.scales = {
'nodeSize': {
'min': 0.7, # Adjusted minimum size
'max': 1.0, # Maximum size for frequent collaborators and lab members
}
}
print(f"Writing network data to {WEBWEB_JSON_PATH}")
WEBWEB_JSON_PATH.write_text(web.json)
print("File written successfully!")
# web.show()
if __name__ == '__main__':
print("Loading data files...")
data = {
'papers': load_yaml(PAPERS_PATH),
'people': load_yaml(PEOPLE_PATH),
'code': load_yaml(CODE_PATH),
'extra': load_yaml(EXTRA_WEBWEB_PATH),
}
print("Data loaded successfully")
print("Cleaning names...")
clean_all_names(data)
print("Names cleaned successfully")
print("Building network...")
make_network(data)
print("Network built and saved successfully")