-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprotein.py
More file actions
82 lines (63 loc) · 3.15 KB
/
protein.py
File metadata and controls
82 lines (63 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def count_AA_frequency(aa_sequence):
single_letter_AA = {}
for each_AA in aa_sequence:
single_letter_AA[each_AA] = 0
for every_AA in aa_sequence:
single_letter_AA[every_AA] += 1
return single_letter_AA
#Program to translate mRNA -> Protein
codon_table = {"UUU": "F","UUC":"F","UUA":"L","UUG":"L",
"CUU":"L","CUC":"L","CUA":"L","CUG":"L",
"AUU":"I","AUC":"I","AUA":"I","AUG":"M",
"GUU":"V","GUC":"V","GUA":"V","GUG":"V",
"UCU":"S","UCC":"S","UCA":"S","UCG":"S",
"CCU":"P","CCC":"P","CCA":"P","CCG":"P",
"ACU":"T","ACC":"T","ACA":"T","ACG":"T",
"GCU":"A","GCC":"A","GCA":"A","GCG":"A",
"UAU":"Y","UAC":"Y","UAA":"Stop","UAG":"Stop",
"CAU":"H","CAC":"H","CAA":"Q","CAG":"Q",
"AAU":"N","AAC":"N","AAA":"K","AAG":"K",
"GAU":"D","GAC":"D","GAA":"E","GAG":"E",
"UGU":"C","UGC":"C","UGA":"Stop","UGG":"W",
"CGU":"R","CGC":"R","CGA":"R","CGG":"R",
"AGU":"S","AGC":"S","AGA":"R","AGG":"R",
"GGU":"G","GGC":"G","GGA":"G","GGG":"G"}
def translate(sequence,codon=3): #Function translate will take 2 parametere Sequence and codon. Codon by default is set to 3 because we are going to read codons (i.e in triplets)
seq_len = len(sequence) #seq_len contains the length of the sequence
protein = "" #Initializing protein variable to an empty string
for i in range(0,(seq_len-3),codon): #
if sequence[i: i + codon] in codon_table:
triplet = sequence[i:i + codon]
protein += codon_table[triplet]
else:
break
return protein
def check_start_codon(sequence,start_codon="AUG"):
sequence_len = len(sequence)
len_start_codon = len(start_codon)
for i in range(0,sequence_len):
if sequence[i: i + len_start_codon] == start_codon:
return "Start Codon present at position: {0}".format(i)
return False
def check_stop_codon(sequence,stop_codon="UAG"):
sequence_len = len(sequence)
len_stop_codon = len(stop_codon)
for i in range(0,sequence_len):
if sequence[i:i + len_stop_codon] == stop_codon:
return "Stop codon present at position : {}".format(i)
else:
False
#Program to return reverse translation of amino acid sequence
"""
This program will return you all the possible codons that codes for a single amino acid sequence.
Getting the exact codon which coded for a specific amino acid is difficult because of redundacy in genetic code."""
def convert_aa_to_mRNA(amino_sequence):
aa_seq_len = len(amino_sequence)
mRNA_seq = []
for each_aa in range(0 , aa_seq_len):
for codon,single_letter_aa in codon_table.items():
if amino_sequence[each_aa] == single_letter_aa:
mRNA_seq.append(codon)
return mRNA_seq
res = convert_aa_to_mRNA("MVHTP")
print("".join(res) )