diff --git a/module/gff_file.py b/module/gff_file.py index bdd32af..98b4345 100644 --- a/module/gff_file.py +++ b/module/gff_file.py @@ -183,9 +183,9 @@ def scan_mrna_sequence(self, base_url=None, fasta_file=None): else: return False - mrna.coding_sequence_has_start_codon() - mrna.coding_sequence_has_stop_codon() - mrna.coding_sequence_no_internal_stop_codon() + mrna.has_start_codon() + mrna.has_stop_codon() + mrna.no_internal_stop_codon() if mrna.errors != {}: gene_id = self.get_gene_id(mrna_id) diff --git a/module/transcript.py b/module/transcript.py index d57f04d..73c8cbe 100644 --- a/module/transcript.py +++ b/module/transcript.py @@ -16,6 +16,9 @@ class CodingSequence: sequence_type = 'cds' + + start_codons = ('ATG') + stop_codons = ('TAA', 'TAG', 'TGA') def __init__(self, feature_name, organism_name, sequence_name): self.feature_name = feature_name @@ -37,33 +40,34 @@ def get_sequence(self, base_url=None, fasta_file=None): else: return False elif fasta_file: - file_handle = open(fasta_file, 'r') - for seq in file_handle: - if seq[0] != '>': - self.sequence += seq.rstrip() + with open(fasta_file, 'r') as file_handle: + for seq in file_handle: + if seq[0] != '>': + self.sequence += seq.rstrip() else: return False - def coding_sequence_has_start_codon(self): - if self.sequence[0:3] == 'ATG': + def has_start_codon(self): + first_codon = self.sequence[0:3] + if first_codon in self.start_codons: return True else: self.errors['start_codon'] = 'no start codon' return False - def coding_sequence_has_stop_codon(self): + def has_stop_codon(self): last_codon = self.sequence[-3:] - if last_codon == 'TAA' or last_codon == 'TAG' or last_codon == 'TGA': + if last_codon in self.stop_codons: return True else: self.errors['stop_codon'] = 'no stop codon' return False - def coding_sequence_no_internal_stop_codon(self): + def no_internal_stop_codon(self): internal_stop_codon_count = 0 for i in range(0, len(self.sequence) - 3, 3): codon = self.sequence[i:i + 3] - if codon == 'TAA' or codon == 'TAG' or codon == 'TGA': + if codon in self.stop_codons: internal_stop_codon_count += 1 self.errors['no_internal_stop_codon'] = str(internal_stop_codon_count) + ' internal stop codon' if internal_stop_codon_count: diff --git a/test/test_transcript.py b/test/test_transcript.py index 4026c90..94a1a6b 100644 --- a/test/test_transcript.py +++ b/test/test_transcript.py @@ -12,46 +12,46 @@ def test_has_sequence(self): def test_has_start_codon(self): mrna = transcript.CodingSequence('b0b85443-0ff0-4fec-b919-f7bbeb626072', 'sandbox_arabiensis', 'SDBKB704125') mrna.sequence = "ATGCACTGA" - mrna.coding_sequence_has_start_codon() - self.assertEqual(True, mrna.coding_sequence_has_start_codon()) + mrna.has_start_codon() + self.assertEqual(True, mrna.has_start_codon()) mrna.sequence = "ATCCACTGA" - mrna.coding_sequence_has_start_codon() + mrna.has_start_codon() self.assertEqual('no start codon', mrna.errors['start_codon']) def test_has_stop_codon(self): mrna = transcript.CodingSequence('b0b85443-0ff0-4fec-b919-f7bbeb626072', 'sandbox_arabiensis', 'SDBKB704125') mrna.sequence = "ATGCACTGA" # TGA amber stop codon - self.assertEqual(True, mrna.coding_sequence_has_stop_codon()) + self.assertEqual(True, mrna.has_stop_codon()) mrna.sequence = "ATGCAGTAA" # TAA ochre stop codon - self.assertEqual(mrna.coding_sequence_has_stop_codon(), True) + self.assertEqual(mrna.has_stop_codon(), True) mrna.sequence = "ATGCAGTAG" # TAG opal stop codon - self.assertEqual(mrna.coding_sequence_has_stop_codon(), True) + self.assertEqual(mrna.has_stop_codon(), True) mrna.sequence = "ATGCAGCAG" # NO stop codon - self.assertEqual(mrna.coding_sequence_has_stop_codon(), False) + self.assertEqual(mrna.has_stop_codon(), False) self.assertEqual('no stop codon', mrna.errors['stop_codon']) def test_no_internal_stop_codon(self): mrna = transcript.CodingSequence('b0b85443-0ff0-4fec-b919-f7bbeb626072', 'sandbox_arabiensis', 'SDBKB704125') mrna.sequence = "ATGCACCTCGAGTAA" - mrna.coding_sequence_no_internal_stop_codon() - self.assertEqual(mrna.coding_sequence_no_internal_stop_codon(), True) + mrna.no_internal_stop_codon() + self.assertEqual(mrna.no_internal_stop_codon(), True) mrna.sequence = "ATGCACTAACTCGAGTAA" # TAA - mrna.coding_sequence_no_internal_stop_codon() + mrna.no_internal_stop_codon() self.assertEqual(mrna.errors['no_internal_stop_codon'], '1 internal stop codon') mrna.sequence = "ATGCACCTCGAGTAGTAA" # TAG - mrna.coding_sequence_no_internal_stop_codon() + mrna.no_internal_stop_codon() self.assertEqual(mrna.errors['no_internal_stop_codon'], '1 internal stop codon') mrna.sequence = "ATGTGACACCTCTAATAA" # TGA,TAA - mrna.coding_sequence_no_internal_stop_codon() + mrna.no_internal_stop_codon() self.assertEqual(mrna.errors['no_internal_stop_codon'], '2 internal stop codon')