-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy patherrorRateFromBAM.py
More file actions
executable file
·83 lines (61 loc) · 2.41 KB
/
errorRateFromBAM.py
File metadata and controls
executable file
·83 lines (61 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
"""Extract error profile (read_length, mismatch, insertion, deletion) from a bam
"""
import os
import sys
import argparse
import pysam
def main(arguments):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-i", "--input",
required=True,
dest="inFile",
help="Input BAM file (use - for stdin)")
parser.add_argument("-c", "--min_coverage",
required=False,
dest="min_coverage",
type=float,
default=0.0,
help="Minimal coverage threshold to filter out spurious alignments (default: 0.0)")
parser.add_argument('-o', '--outfile',
dest="outFile",
help="Output file",
default=sys.stdout,
type=argparse.FileType('w'))
args = parser.parse_args(arguments)
if args.inFile == '-':
#inH = sys.stdin
samfile = pysam.Samfile("-", "rb")
else:
samfile = pysam.Samfile(args.inFile, "rb")
#inH = open(args.inFile, 'rU')
#samfile = pysam.Samfile(args.inFile, "rb")
indelmisList=[]
for read in samfile:
#list of cigar tuples
cigar = read.cigar
alignlen = 0
querylen = read.rlen
#readlenth mismatch insertion deletion
indelmisTuple = [querylen, 0.0, 0.0, 0.0]
#skip when cigar is none
if cigar == None or read.is_secondary or read.flag & 0x800 == 2048:
continue
for cigartuple in cigar:
if cigartuple[0]==4 or cigartuple[0]==5:
continue
alignlen+=cigartuple[1]
if cigartuple[0]==1:
indelmisTuple[2]+=cigartuple[1]
elif cigartuple[0]==2:
indelmisTuple[3]+=cigartuple[1]
NM = dict(read.tags)["NM"]
indelmisTuple[1]+= NM - indelmisTuple[2] - indelmisTuple[3]
## indelmisTuple[1]+=(read.tags[0][1]) - indelmisTuple[2] - indelmisTuple[3]
indelmisTuple[2]/=alignlen
indelmisTuple[3]/=alignlen
indelmisTuple[1]/=alignlen
if alignlen*1.0/querylen > args.min_coverage:
args.outFile.write("\t".join([read.qname] +map(str, indelmisTuple)) + "\n")
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))