Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 196 additions & 18 deletions BINF2025_TP3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNSXnqaXAUgZK9rmJ1TWbGo"
"provenance": []
},
"kernelspec": {
"name": "python3",
Expand Down Expand Up @@ -70,11 +69,38 @@
"id": "qqiiq5bcxYvM"
}
},
{
"cell_type": "code",
"source": [
"+ 1+1+1.5+1-1-1-1-1+1+1+1.5+2+1+1+1"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "R--yDLlLtG4n",
"outputId": "e30f9f6c-a0c9-43e5-b8f0-1931c38946db"
},
"execution_count": 1,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"9.0"
]
},
"metadata": {},
"execution_count": 1
}
]
},
{
"cell_type": "markdown",
"source": [
"```markdown\n",
"Votre réponse ici\n",
"On trouve 9\n",
"\n",
"```"
],
"metadata": {
Expand All @@ -94,7 +120,10 @@
"cell_type": "markdown",
"source": [
"```markdown\n",
"Votre réponse ici\n",
"Le meilleur alignement est\n",
"A-TG\n",
"ACTG\n",
"avec une distance de 1\n",
"```"
],
"metadata": {
Expand Down Expand Up @@ -125,7 +154,9 @@
"cell_type": "markdown",
"source": [
"```markdown\n",
"Votre réponse ici\n",
"Score de 0.5 avec\n",
"x = -T-AT\n",
"y = ATGAC\n",
"```"
],
"metadata": {
Expand Down Expand Up @@ -177,12 +208,24 @@
{
"cell_type": "code",
"source": [
"#Votre code ici"
"def levenshtein(x,y):\n",
" if len(x)==0:\n",
" return len(y)\n",
" elif len(y)==0:\n",
" return len(x)\n",
" else:\n",
" if x[0]==y[0]:\n",
" return levenshtein(x[1:],y[1:])\n",
" else:\n",
" min1 = levenshtein(x[1:],y)\n",
" min2 = levenshtein(x,y[1:])\n",
" min3 = levenshtein(x[1:],y[1:])\n",
" return min(min1,min2,min3)+1"
],
"metadata": {
"id": "FJR69IEQ4aHv"
},
"execution_count": null,
"execution_count": 14,
"outputs": []
},
{
Expand All @@ -201,6 +244,33 @@
"id": "arFVwA6E5NWn"
}
},
{
"cell_type": "code",
"source": [
"print(levenshtein('CCAG','CA'))\n",
"print(levenshtein('CCGT','CGTCA'))\n",
"print(levenshtein('AY678264*','OQ870305*'))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TL30OBd45cDV",
"outputId": "248ae1c6-9a48-4328-aba0-273d27e66aad"
},
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2\n",
"3\n",
"7\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
Expand All @@ -226,12 +296,45 @@
{
"cell_type": "code",
"source": [
"#Votre code ici"
"import numpy as np\n",
"\n",
"def gap_penality(go, ge, n):\n",
" return -(go + ge * n)\n",
"import numpy as np\n",
"\n",
"def sw_fwd(x: str, y: str, cmap: dict, sigma: np.ndarray, goge: list) -> (np.ndarray, np.ndarray):\n",
" # Initialisation des matrices S et B\n",
" go, ge = goge\n",
" S = np.zeros((len(x) + 1, len(y) + 1))\n",
" B = np.zeros((len(x) + 1, len(y) + 1), dtype=int)\n",
"\n",
" # Remplissage des matrices\n",
" for i in range(1, len(x) + 1):\n",
" for j in range(1, len(y) + 1):\n",
" # Calcul des scores pour les différentes opérations\n",
" match = S[i-1, j-1] + sigma[cmap[x[i-1]], cmap[y[j-1]]]\n",
" delete = max([S[i-k, j] - (go + ge * k) for k in range(1, i+1)])\n",
" insert = max([S[i, j-k] - (go + ge * k) for k in range(1, j+1)])\n",
"\n",
" # Calcul du score maximal\n",
" S[i, j] = max(0, match, delete, insert)\n",
"\n",
" # Mise à jour de la matrice B\n",
" if S[i, j] == 0:\n",
" B[i, j] = 0\n",
" elif S[i, j] == match:\n",
" B[i, j] = 1\n",
" elif S[i, j] == delete:\n",
" B[i, j] = 2\n",
" else:\n",
" B[i, j] = 3\n",
"\n",
" return S, B"
],
"metadata": {
"id": "njn3JB0b-WHj"
"id": "g08UKhBu8FRT"
},
"execution_count": null,
"execution_count": 50,
"outputs": []
},
{
Expand All @@ -250,12 +353,37 @@
{
"cell_type": "code",
"source": [
"#Votre code ici"
"import numpy as np\n",
"\n",
"def sw_bwd(x, y, S, B):\n",
" n = len(x)\n",
" m = len(y)\n",
" align_x = \"\"\n",
" align_y = \"\"\n",
" i, j = np.unravel_index(S.argmax(), S.shape)\n",
" score = S[i, j]\n",
"\n",
" while B[i, j] != 0 and i > 0 and j > 0: # Added conditions to prevent index error\n",
" if B[i, j] == 1:\n",
" align_x = x[i - 1] + align_x\n",
" align_y = y[j - 1] + align_y\n",
" i -= 1\n",
" j -= 1\n",
" elif B[i, j] == 2:\n",
" align_x = x[i - 1] + align_x\n",
" align_y = \"-\" + align_y\n",
" i -= 1\n",
" elif B[i, j] == 3:\n",
" align_x = \"-\" + align_x\n",
" align_y = y[j - 1] + align_y\n",
" j -= 1\n",
"\n",
" return align_x, align_y, score"
],
"metadata": {
"id": "ij9JDpBm_UZ7"
},
"execution_count": null,
"execution_count": 51,
"outputs": []
},
{
Expand All @@ -281,9 +409,48 @@
"metadata": {
"id": "JUtYRFTBAwwZ"
},
"execution_count": null,
"execution_count": 52,
"outputs": []
},
{
"cell_type": "code",
"source": [
"S, B = sw_fwd('TCGC', 'CTTAG', cmap, m, (go, ge))\n",
"print(S)\n",
"sw_bwd('TCGC', 'CTTAG', S, B)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mhjC0JP-9Pyz",
"outputId": "50cd3311-f13c-48d4-b931-c403636c4c44"
},
"execution_count": 53,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[0. 0. 0. 0. 0. 0. ]\n",
" [0. 0. 1. 1. 0.5 0. ]\n",
" [0. 1. 0.5 0.5 0.5 0. ]\n",
" [0. 0.5 0.5 0. 0. 1.5]\n",
" [0. 1. 0.5 0. 0. 1. ]]\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"('TCG', 'TAG', 1.5)"
]
},
"metadata": {},
"execution_count": 53
}
]
},
{
"cell_type": "markdown",
"source": [
Expand All @@ -293,6 +460,17 @@
"id": "eMGh4K5aIFxE"
}
},
{
"cell_type": "code",
"source": [
"from IPython.display import HTML"
],
"metadata": {
"id": "7vtyQxHW9GSG"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "code",
"source": [
Expand All @@ -301,12 +479,12 @@
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 60
"height": 61
},
"id": "joHNwJ9AIf6F",
"outputId": "a9206810-a083-4d86-8b14-38183f1dd80c"
"outputId": "4c50d6ce-c8f5-4866-b4bb-aaf7167cdd14"
},
"execution_count": null,
"execution_count": 22,
"outputs": [
{
"output_type": "execute_result",
Expand All @@ -319,7 +497,7 @@
]
},
"metadata": {},
"execution_count": 18
"execution_count": 22
}
]
},
Expand Down Expand Up @@ -387,7 +565,7 @@
"cell_type": "markdown",
"source": [
"```markdown\n",
"Votre réponse ici\n",
"le score moyen attendu pour une superposition sans trou de deux séquences aléatoires de taille N est N/4\n",
"```"
],
"metadata": {
Expand Down