-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathGenerateNNData.py
More file actions
46 lines (35 loc) · 1.23 KB
/
GenerateNNData.py
File metadata and controls
46 lines (35 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
'''
This module outputs the primary cluster results 'as-is', to be used by the neural net. We can
do the rest of popnet as well, I guess
'''
import pandas as pd
import numpy as np
def genNNData(clusters, chr_names, chr_breaks, s1_params, sample_list, out_path):
res = np.zeros((len(clusters), len(sample_list)))
for i, cluster in enumerate(clusters):
for j, line in enumerate(cluster):
for e in line:
res[i, sample_list.index(e)] = j
#create index
sl = s1_params.getSectionLength()
c = 0
idx = []
for i, chr in enumerate(chr_names):
b = chr_breaks[i]
for j, x in enumerate(range(c, b)):
idx.append("{0}:{1}".format(chr, int(sl * j)))
c = b
df = pd.DataFrame(res, columns = sample_list, index = idx)
df.T.to_csv(out_path, sep='\t', header=True)
if __name__ == "__main__":
#TESTING ONLY
import sys
clusters = [
[['A', 'B', 'C'], ['D', 'E']],
[['A', 'B'], ['C', 'D'], ['E']],
[['B', 'C', 'E'], ['A', 'D']],
[['C', 'D', 'E'], ['A', 'B']]
]
sample_list = ['A', 'B', 'C', 'D', 'E']
out_path = '/d/data/plasmo/training_data/testdata.tsv'
genNNData(clusters, sample_list, out_path)