-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathwrangle.py
More file actions
44 lines (29 loc) · 1.55 KB
/
wrangle.py
File metadata and controls
44 lines (29 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
""" Main wrangle function for gene lists, tissues, organisms, diets, etc.. """
import json, os, re, sys
sys.path.append('./src')
import setup_wrangler as wrangler
import pandas as pd
refDirectory = 'refFiles'
organism = 'Mus musculus'
geneFileTag = 'gene_result_'
geneOrgFile = geneFileTag + re.sub(' ', '_', organism).lower()
dietFileTag = 'mouseDiets'
valsToAlphaNumLower = True
overwrite = False
def main(geneOrgFile, refDirectory, organism, dietFileTag, overwrite, valsToAlphaNumLower):
if geneOrgFile + '.txt' not in os.listdir(refDirectory):
raise FileNotFoundError('Gene list for {0} not found'.format(organism))
if geneOrgFile + '.json' not in os.listdir(refDirectory) or overwrite is True:
df = pd.read_csv('{0}/{1}.txt'.format(refDirectory, geneOrgFile),
sep = '\t')[['Org_name', 'GeneID', 'Symbol', 'Aliases']].drop_duplicates()
geneDict = wrangler.geneWrangler(df = df, organism = organism,
valsToAlphaNumLower = valsToAlphaNumLower)
with open('{0}/{1}.json'.format(refDirectory, geneOrgFile), 'w') as fout:
json.dump(geneDict, fout, indent = 4)
if organism == 'Mus musculus':
if dietFileTag + '.json' not in os.listdir(refDirectory) or overwrite is True:
dietDict = wrangler.mouseDietWrangler()
with open('{0}/{1}.json'.format(refDirectory, dietFileTag), 'w') as fout:
json.dump(dietDict, fout, indent = 4)
if __name__ == "__main__":
main(geneOrgFile, refDirectory, organism, dietFileTag, overwrite, valsToAlphaNumLower)