diff --git a/BINF2025_TP3.ipynb b/BINF2025_TP3.ipynb index 61e87c2..c60a937 100644 --- a/BINF2025_TP3.ipynb +++ b/BINF2025_TP3.ipynb @@ -3,8 +3,7 @@ "nbformat_minor": 0, "metadata": { "colab": { - "provenance": [], - "authorship_tag": "ABX9TyNSXnqaXAUgZK9rmJ1TWbGo" + "provenance": [] }, "kernelspec": { "name": "python3", @@ -70,11 +69,38 @@ "id": "qqiiq5bcxYvM" } }, + { + "cell_type": "code", + "source": [ + "+ 1+1+1.5+1-1-1-1-1+1+1+1.5+2+1+1+1" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R--yDLlLtG4n", + "outputId": "e30f9f6c-a0c9-43e5-b8f0-1931c38946db" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "9.0" + ] + }, + "metadata": {}, + "execution_count": 1 + } + ] + }, { "cell_type": "markdown", "source": [ "```markdown\n", - "Votre réponse ici\n", + "On trouve 9\n", + "\n", "```" ], "metadata": { @@ -94,7 +120,10 @@ "cell_type": "markdown", "source": [ "```markdown\n", - "Votre réponse ici\n", + "Le meilleur alignement est\n", + "A-TG\n", + "ACTG\n", + "avec une distance de 1\n", "```" ], "metadata": { @@ -125,7 +154,9 @@ "cell_type": "markdown", "source": [ "```markdown\n", - "Votre réponse ici\n", + "Score de 0.5 avec\n", + "x = -T-AT\n", + "y = ATGAC\n", "```" ], "metadata": { @@ -177,12 +208,24 @@ { "cell_type": "code", "source": [ - "#Votre code ici" + "def levenshtein(x,y):\n", + " if len(x)==0:\n", + " return len(y)\n", + " elif len(y)==0:\n", + " return len(x)\n", + " else:\n", + " if x[0]==y[0]:\n", + " return levenshtein(x[1:],y[1:])\n", + " else:\n", + " min1 = levenshtein(x[1:],y)\n", + " min2 = levenshtein(x,y[1:])\n", + " min3 = levenshtein(x[1:],y[1:])\n", + " return min(min1,min2,min3)+1" ], "metadata": { "id": "FJR69IEQ4aHv" }, - "execution_count": null, + "execution_count": 14, "outputs": [] }, { @@ -201,6 +244,33 @@ "id": "arFVwA6E5NWn" } }, + { + "cell_type": "code", + "source": [ + "print(levenshtein('CCAG','CA'))\n", + "print(levenshtein('CCGT','CGTCA'))\n", + "print(levenshtein('AY678264*','OQ870305*'))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TL30OBd45cDV", + "outputId": "248ae1c6-9a48-4328-aba0-273d27e66aad" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2\n", + "3\n", + "7\n" + ] + } + ] + }, { "cell_type": "markdown", "source": [ @@ -226,12 +296,45 @@ { "cell_type": "code", "source": [ - "#Votre code ici" + "import numpy as np\n", + "\n", + "def gap_penality(go, ge, n):\n", + " return -(go + ge * n)\n", + "import numpy as np\n", + "\n", + "def sw_fwd(x: str, y: str, cmap: dict, sigma: np.ndarray, goge: list) -> (np.ndarray, np.ndarray):\n", + " # Initialisation des matrices S et B\n", + " go, ge = goge\n", + " S = np.zeros((len(x) + 1, len(y) + 1))\n", + " B = np.zeros((len(x) + 1, len(y) + 1), dtype=int)\n", + "\n", + " # Remplissage des matrices\n", + " for i in range(1, len(x) + 1):\n", + " for j in range(1, len(y) + 1):\n", + " # Calcul des scores pour les différentes opérations\n", + " match = S[i-1, j-1] + sigma[cmap[x[i-1]], cmap[y[j-1]]]\n", + " delete = max([S[i-k, j] - (go + ge * k) for k in range(1, i+1)])\n", + " insert = max([S[i, j-k] - (go + ge * k) for k in range(1, j+1)])\n", + "\n", + " # Calcul du score maximal\n", + " S[i, j] = max(0, match, delete, insert)\n", + "\n", + " # Mise à jour de la matrice B\n", + " if S[i, j] == 0:\n", + " B[i, j] = 0\n", + " elif S[i, j] == match:\n", + " B[i, j] = 1\n", + " elif S[i, j] == delete:\n", + " B[i, j] = 2\n", + " else:\n", + " B[i, j] = 3\n", + "\n", + " return S, B" ], "metadata": { - "id": "njn3JB0b-WHj" + "id": "g08UKhBu8FRT" }, - "execution_count": null, + "execution_count": 50, "outputs": [] }, { @@ -250,12 +353,37 @@ { "cell_type": "code", "source": [ - "#Votre code ici" + "import numpy as np\n", + "\n", + "def sw_bwd(x, y, S, B):\n", + " n = len(x)\n", + " m = len(y)\n", + " align_x = \"\"\n", + " align_y = \"\"\n", + " i, j = np.unravel_index(S.argmax(), S.shape)\n", + " score = S[i, j]\n", + "\n", + " while B[i, j] != 0 and i > 0 and j > 0: # Added conditions to prevent index error\n", + " if B[i, j] == 1:\n", + " align_x = x[i - 1] + align_x\n", + " align_y = y[j - 1] + align_y\n", + " i -= 1\n", + " j -= 1\n", + " elif B[i, j] == 2:\n", + " align_x = x[i - 1] + align_x\n", + " align_y = \"-\" + align_y\n", + " i -= 1\n", + " elif B[i, j] == 3:\n", + " align_x = \"-\" + align_x\n", + " align_y = y[j - 1] + align_y\n", + " j -= 1\n", + "\n", + " return align_x, align_y, score" ], "metadata": { "id": "ij9JDpBm_UZ7" }, - "execution_count": null, + "execution_count": 51, "outputs": [] }, { @@ -281,9 +409,48 @@ "metadata": { "id": "JUtYRFTBAwwZ" }, - "execution_count": null, + "execution_count": 52, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "S, B = sw_fwd('TCGC', 'CTTAG', cmap, m, (go, ge))\n", + "print(S)\n", + "sw_bwd('TCGC', 'CTTAG', S, B)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mhjC0JP-9Pyz", + "outputId": "50cd3311-f13c-48d4-b931-c403636c4c44" + }, + "execution_count": 53, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[0. 0. 0. 0. 0. 0. ]\n", + " [0. 0. 1. 1. 0.5 0. ]\n", + " [0. 1. 0.5 0.5 0.5 0. ]\n", + " [0. 0.5 0.5 0. 0. 1.5]\n", + " [0. 1. 0.5 0. 0. 1. ]]\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "('TCG', 'TAG', 1.5)" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, { "cell_type": "markdown", "source": [ @@ -293,6 +460,17 @@ "id": "eMGh4K5aIFxE" } }, + { + "cell_type": "code", + "source": [ + "from IPython.display import HTML" + ], + "metadata": { + "id": "7vtyQxHW9GSG" + }, + "execution_count": 21, + "outputs": [] + }, { "cell_type": "code", "source": [ @@ -301,12 +479,12 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 60 + "height": 61 }, "id": "joHNwJ9AIf6F", - "outputId": "a9206810-a083-4d86-8b14-38183f1dd80c" + "outputId": "4c50d6ce-c8f5-4866-b4bb-aaf7167cdd14" }, - "execution_count": null, + "execution_count": 22, "outputs": [ { "output_type": "execute_result", @@ -319,7 +497,7 @@ ] }, "metadata": {}, - "execution_count": 18 + "execution_count": 22 } ] }, @@ -387,7 +565,7 @@ "cell_type": "markdown", "source": [ "```markdown\n", - "Votre réponse ici\n", + "le score moyen attendu pour une superposition sans trou de deux séquences aléatoires de taille N est N/4\n", "```" ], "metadata": {