-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathConsensus Identity.py
More file actions
71 lines (50 loc) · 2.3 KB
/
Consensus Identity.py
File metadata and controls
71 lines (50 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# To extract consensus identity between all species
from Bio import AlignIO
# Load the alignment
alignment = AlignIO.read("/data2/lackey_lab/shalam/PHYL/three/test4/AGO3_short.aln", "clustal")
num_sequences = len(alignment)
num_positions = alignment.get_alignment_length()
consensus = []
overall_identity = 0
# For each position in the alignment
for i in range(num_positions):
# Get all amino acids at this position
column = alignment[:, i]
# Count occurrences of each amino acid at this position
amino_acid_counts = {aa: column.count(aa) for aa in set(column)}
# Sort amino acids by their occurrences
sorted_aas = sorted(amino_acid_counts.items(), key=lambda x: x[1], reverse=True)
# Add the most common amino acid to the consensus sequence
consensus_aa, count = sorted_aas[0]
consensus.append(consensus_aa)
# Calculate the identity for this position
identity = count / num_sequences
overall_identity += identity
# Average the identity scores to get the overall consensus identity
overall_identity /= num_positions
print("Consensus sequence:", ''.join(consensus))
print("Overall consensus identity:", overall_identity)
# To extract consensus identity between Homo sapien and all other species
from Bio import AlignIO
# Load the alignment
alignment = AlignIO.read("/data2/lackey_lab/shalam/PHYL/three/test4/AGO3_short.aln", "clustal")
# Find the index of the Homo species (assuming it's the first sequence)
homo_index = 0
# Initialize a dictionary to store the consensus identity for each species
consensus_identities = {}
# For each species in the alignment
for i in range(len(alignment)):
if i == homo_index:
continue # Skip comparing Homo to itself
species_name = alignment[i].id
homo_sequence = alignment[homo_index]
species_sequence = alignment[i]
num_positions = alignment.get_alignment_length()
matching_positions = sum(1 for h, s in zip(homo_sequence, species_sequence) if h == s)
# Calculate the identity for this species
identity = matching_positions / num_positions
# Store the identity in the dictionary
consensus_identities[species_name] = identity
# Print the consensus identities
for species, identity in consensus_identities.items():
print(f"Consensus identity with {species}: {identity:.2%}")