-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_split.py
More file actions
58 lines (52 loc) · 1.76 KB
/
data_split.py
File metadata and controls
58 lines (52 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import numpy as np
import cv2
import imageio
import random
import json
root_path = '/data3/ceiling/datasets/Gross-Combined/ROI/'
json_path = 'ROI_404_standard.json'
print(root_path)
level = ['AIS', 'MIA', '1', '2', '3']
for l in level:
path = root_path + l
patient = {}
total = 0
for name in os.listdir(path):
name = name[0:4]
total += 1
if name in patient.keys():
patient[name] += 1
else:
patient[name] = 1
print("Level:{}, Patient Num:{}, Image Num:{}".format(l, len(patient), total))
split_dict = {'train': {},
'val': {},
'test': {}}
train = 0.6
val = 0.2
test = 0.2
random.seed(404)
for l in level:
patient = {}
path = root_path + l
total = 0
for name in os.listdir(path):
name = name[0:4] # Notice Name
total += 1
if name in patient.keys():
patient[name] += 1
else:
patient[name] = 1
print("Level:{}, Patient Num:{}, Image Num:{}".format(l, len(patient), total))
name_list = list(patient.keys())
random.shuffle(name_list)
split_dict['train'][l] = name_list[: int(train * len(patient))]
split_dict['val'][l] = name_list[int(train * len(patient)): int((train + val) * len(patient))]
split_dict['test'][l] = name_list[int((train + val) * len(patient)):]
print("Training Patient Num:{}, Validation Patient Num:{}, Testing Patient Num:{}".format(len(split_dict['train'][l]),
len(split_dict['val'][l]),
len(split_dict['test'][l])))
json_data = json.dumps(split_dict)
with open(json_path, 'w') as f:
f.write(json_data)