pparutto · Romain9215 · Mar 24, 2025
diff --git a/BINF2025_TP3.ipynb b/BINF2025_TP3.ipynb
@@ -3,8 +3,7 @@
   "nbformat_minor": 0,
   "metadata": {
     "colab": {
-      "provenance": [],
-      "authorship_tag": "ABX9TyNSXnqaXAUgZK9rmJ1TWbGo"
+      "provenance": []
     },
     "kernelspec": {
       "name": "python3",
@@ -74,7 +73,12 @@
       "cell_type": "markdown",
       "source": [
         "```markdown\n",
-        "Votre réponse ici\n",
+        "1 + 1 + 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 = 4\n",
+        "y(2) = 0.5 * 2 + 0.5 = 1.5\n",
+        "y(3) = 0.5 * 3 + 0.5 = 2\n",
+        "\n",
+        "4 - (1,5 + 2) = 0,5\n",
+        "\n",
         "```"
       ],
       "metadata": {
@@ -93,8 +97,24 @@
     {
       "cell_type": "markdown",
       "source": [
+        "\n",
+        "|       |   0     | A       | C       | T       | G     |  \n",
+        "| :---: |  :---:  |  :---:  | :---:   | :---:   | :---: |\n",
+        "| **0** |    0    |   1     |    2    |    3    |    4  |\n",
+        "| **A** |    1    |   0     |    1    |    2    |    3  |\n",
+        "| **T** |    2    |   1     |    1    |    1    |    2  |\n",
+        "| **G** |    3    |   2     |    2    |    2    |    1  |\n",
+        "\n",
+        "|       |   0     | A       | C       | T       | G     |\n",
+        "| :---: |  :---:  |  :---:  | :---:   | :---:   | :---: |\n",
+        "| **0** |    0    |   _     |    _    |    _    |    _  |\n",
+        "| **A** |    l    |   \\     |    _    |    _    |    _  |\n",
+        "| **T** |    l    |   l     |    \\    |    \\    |    _  |\n",
+        "| **G** |    l    |   l     |    \\    |    \\    |    \\  |\n",
+        "\n",
         "```markdown\n",
-        "Votre réponse ici\n",
+        "x = A_TG\n",
+        "y = ACTG\n",
         "```"
       ],
       "metadata": {
@@ -124,8 +144,23 @@
     {
       "cell_type": "markdown",
       "source": [
+        "|       |0    | A       | T       | G       |A      | C     |\n",
+        "| :---: |:---:| :---:   | :---:   | :---:   | :---: | :---: |\n",
+        "| **0** |0    | -0.5    | -1      | -1.5    |  -2   | -2.5  |\n",
+        "| **T** |-0.5 | -0.5    |  0.5    |   0     | -0.5  | -1    |\n",
+        "| **A** |-1   |  0.5    |   0     |   0     |  1    |  0.5  |\n",
+        "| **T** |-1.5 |   0     |  -1.5   |   1     |  0.5  |  0.5  |\n",
+        "\n",
+        "|       |0    | A       | T       | G       |A      | C     |\n",
+        "| :---: |:---:| :---:   | :---:   | :---:   | :---: | :---: |\n",
+        "| **0** |0    | _       | _       | _       |  _    | _     |\n",
+        "| **T** |l    | \\       |  \\      |   _     | _     | _     |\n",
+        "| **A** |l    |  \\      |   _     |   \\     |  \\    |  _    |\n",
+        "| **T** |l    |   l     |  \\      |   _     |  _    |  \\    |\n",
+        "\n",
         "```markdown\n",
-        "Votre réponse ici\n",
+        "x = _T_AT\n",
+        "y = ATGAC\n",
         "```"
       ],
       "metadata": {
@@ -177,13 +212,39 @@
     {
       "cell_type": "code",
       "source": [
-        "#Votre code ici"
+        "def levenshtein(x: str, y: str) -> int:\n",
+        "    if not x:\n",
+        "        return len(y)\n",
+        "    if not y:\n",
+        "        return len(x)\n",
+        "\n",
+        "    cost = 0 if x[-1] == y[-1] else 1\n",
+        "\n",
+        "    return min(\n",
+        "        levenshtein(x[:-1], y) + 1,\n",
+        "        levenshtein(x, y[:-1]) + 1,\n",
+        "        levenshtein(x[:-1], y[:-1]) + cost\n",
+        "    )\n",
+        "\n",
+        "print(levenshtein(\"CCGT\", \"CGTCA\"))\n"
       ],
       "metadata": {
-        "id": "FJR69IEQ4aHv"
+        "id": "FJR69IEQ4aHv",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "a7b912d1-5035-4f97-ed0f-a06975e619df"
       },
       "execution_count": null,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "3\n"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -226,13 +287,78 @@
     {
       "cell_type": "code",
       "source": [
-        "#Votre code ici"
+        "import numpy as np\n",
+        "\n",
+        "def sw_fwd(x: str, y: str, cmap: dict, sigma: np.array, gap_params: tuple) -> tuple:\n",
+        "    DIAG, UP, LEFT = 1, 2, 3\n",
+        "    m, n = len(x), len(y)\n",
+        "    go, ge = gap_params\n",
+        "\n",
+        "    S = np.zeros((m + 1, n + 1))\n",
+        "    B = np.zeros((m + 1, n + 1), dtype=int)\n",
+        "\n",
+        "    for i in range(1, m + 1):\n",
+        "        for j in range(1, n + 1):\n",
+        "            match_score = S[i-1, j-1] + sigma[cmap[x[i-1]], cmap[y[j-1]]]\n",
+        "\n",
+        "            max_del, best_k_del = -np.inf, 0\n",
+        "            for k in range(1, i+1):\n",
+        "                current = S[i-k, j] - (go + ge*(k-1))\n",
+        "                if current > max_del:\n",
+        "                    max_del = current\n",
+        "                    best_k_del = k\n",
+        "\n",
+        "            max_ins, best_k_ins = -np.inf, 0\n",
+        "            for k in range(1, j+1):\n",
+        "                current = S[i, j-k] - (go + ge*(k-1))\n",
+        "                if current > max_ins:\n",
+        "                    max_ins = current\n",
+        "                    best_k_ins = k\n",
+        "\n",
+        "            options = [(0, 0), (match_score, DIAG), (max_del, UP), (max_ins, LEFT)]\n",
+        "            S[i,j], B[i,j] = max(options, key=lambda x: x[0])\n",
+        "\n",
+        "    return S, B\n",
+        "\n",
+        "\n",
+        "dna_cmap = {'A': 0, 'C': 1, 'G': 2, 'T': 3}\n",
+        "sigma_matrix = np.array([[2, -1, -1, -1],\n",
+        "                          [-1, 2, -1, -1],\n",
+        "                          [-1, -1, 2, -1],\n",
+        "                          [-1, -1, -1, 2]])\n",
+        "\n",
+        "go, ge = -2, -1\n",
+        "x_seq = \"AGT\"\n",
+        "y_seq = \"GTT\"\n",
+        "\n",
+        "S, B = sw_fwd(x_seq, y_seq, dna_cmap, sigma_matrix, (go, ge))\n",
+        "print(S)\n",
+        "print(B)\n"
       ],
       "metadata": {
-        "id": "njn3JB0b-WHj"
+        "id": "njn3JB0b-WHj",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e8c2609b-8e44-43ff-de17-8bd19dc20a5f"
       },
-      "execution_count": null,
-      "outputs": []
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[[ 0.  0.  0.  0.]\n",
+            " [ 0.  2.  4.  6.]\n",
+            " [ 0.  4.  6.  8.]\n",
+            " [ 0.  6.  8. 10.]]\n",
+            "[[0 0 0 0]\n",
+            " [0 2 3 3]\n",
+            " [0 2 2 2]\n",
+            " [0 2 2 2]]\n"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -250,12 +376,47 @@
     {
       "cell_type": "code",
       "source": [
-        "#Votre code ici"
+        "import numpy as np\n",
+        "\n",
+        "def sw_bwd(x: str, y: str, S: np.ndarray, B: np.ndarray) -> (str, str, float):\n",
+        "    # Initialiser les alignements et le score\n",
+        "    align_x = []\n",
+        "    align_y = []\n",
+        "\n",
+        "    # Trouver la position du score maximum dans la matrice S (c'est ici que l'alignement commence)\n",
+        "    i, j = np.unravel_index(np.argmax(S), S.shape)\n",
+        "    score = S[i, j]\n",
+        "\n",
+        "    # Effectuer le traceback\n",
+        "    while S[i, j] > 0:\n",
+        "        if B[i, j] == 1:  # Diagonale\n",
+        "            align_x.append(x[i-1])\n",
+        "            align_y.append(y[j-1])\n",
+        "            i -= 1\n",
+        "            j -= 1\n",
+        "        elif B[i, j] == 2:  # Haut\n",
+        "            align_x.append(x[i-1])\n",
+        "            align_y.append('-')\n",
+        "            i -= 1\n",
+        "        elif B[i, j] == 3:  # Gauche\n",
+        "            align_x.append('-')\n",
+        "            align_y.append(y[j-1])\n",
+        "            j -= 1\n",
+        "\n",
+        "    # Inverser les alignements car on a commencé à partir de la fin\n",
+        "    align_x.reverse()\n",
+        "    align_y.reverse()\n",
+        "\n",
+        "    # Convertir les listes en chaînes\n",
+        "    align_x_str = ''.join(align_x)\n",
+        "    align_y_str = ''.join(align_y)\n",
+        "\n",
+        "    return align_x_str, align_y_str, score\n"
       ],
       "metadata": {
         "id": "ij9JDpBm_UZ7"
       },
-      "execution_count": null,
+      "execution_count": 7,
       "outputs": []
     },
     {
@@ -281,7 +442,7 @@
       "metadata": {
         "id": "JUtYRFTBAwwZ"
       },
-      "execution_count": null,
+      "execution_count": 2,
       "outputs": []
     },
     {
@@ -296,17 +457,18 @@
     {
       "cell_type": "code",
       "source": [
+        "from IPython.display import HTML\n",
         "HTML(\"<table align='left' style='font-family:Courier New'><tr><th>x:</th><th>TCG</th></tr><tr><th>y:</th><th>TAG</th></tr></table>\")"
       ],
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 60
+          "height": 62
         },
         "id": "joHNwJ9AIf6F",
-        "outputId": "a9206810-a083-4d86-8b14-38183f1dd80c"
+        "outputId": "d5d9b15e-5e70-4d64-958a-3587ddcd8510"
       },
-      "execution_count": null,
+      "execution_count": 5,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -319,7 +481,7 @@
             ]
           },
           "metadata": {},
-          "execution_count": 18
+          "execution_count": 5
         }
       ]
     },
@@ -341,12 +503,12 @@
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 80
+          "height": 82
         },
         "id": "HUELvWKMFtIO",
-        "outputId": "976bab6f-f1fc-4c5a-c69c-8de02fc838d0"
+        "outputId": "eab87ab3-9db2-46b3-8b8f-7c17f3466ec7"
       },
-      "execution_count": null,
+      "execution_count": 4,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -359,7 +521,7 @@
             ]
           },
           "metadata": {},
-          "execution_count": 15
+          "execution_count": 4
         }
       ]
     },
@@ -387,7 +549,7 @@
       "cell_type": "markdown",
       "source": [
         "```markdown\n",
-        "Votre réponse ici\n",
+        "1/4\n",
         "```"
       ],
       "metadata": {
@@ -428,13 +590,11 @@
     },
     {
       "cell_type": "code",
-      "source": [
-        "#Votre code ici"
-      ],
+      "source": [],
       "metadata": {
         "id": "UX0afNaqOVZ2"
       },
-      "execution_count": null,
+      "execution_count": 8,
       "outputs": []
     },
     {