diff --git a/BINF2025_TP3.ipynb b/BINF2025_TP3.ipynb index 61e87c2..dc9571e 100644 --- a/BINF2025_TP3.ipynb +++ b/BINF2025_TP3.ipynb @@ -3,8 +3,7 @@ "nbformat_minor": 0, "metadata": { "colab": { - "provenance": [], - "authorship_tag": "ABX9TyNSXnqaXAUgZK9rmJ1TWbGo" + "provenance": [] }, "kernelspec": { "name": "python3", @@ -74,7 +73,12 @@ "cell_type": "markdown", "source": [ "```markdown\n", - "Votre réponse ici\n", + "1 + 1 + 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 = 4\n", + "y(2) = 0.5 * 2 + 0.5 = 1.5\n", + "y(3) = 0.5 * 3 + 0.5 = 2\n", + "\n", + "4 - (1,5 + 2) = 0,5\n", + "\n", "```" ], "metadata": { @@ -93,8 +97,24 @@ { "cell_type": "markdown", "source": [ + "\n", + "| | 0 | A | C | T | G | \n", + "| :---: | :---: | :---: | :---: | :---: | :---: |\n", + "| **0** | 0 | 1 | 2 | 3 | 4 |\n", + "| **A** | 1 | 0 | 1 | 2 | 3 |\n", + "| **T** | 2 | 1 | 1 | 1 | 2 |\n", + "| **G** | 3 | 2 | 2 | 2 | 1 |\n", + "\n", + "| | 0 | A | C | T | G |\n", + "| :---: | :---: | :---: | :---: | :---: | :---: |\n", + "| **0** | 0 | _ | _ | _ | _ |\n", + "| **A** | l | \\ | _ | _ | _ |\n", + "| **T** | l | l | \\ | \\ | _ |\n", + "| **G** | l | l | \\ | \\ | \\ |\n", + "\n", "```markdown\n", - "Votre réponse ici\n", + "x = A_TG\n", + "y = ACTG\n", "```" ], "metadata": { @@ -124,8 +144,23 @@ { "cell_type": "markdown", "source": [ + "| |0 | A | T | G |A | C |\n", + "| :---: |:---:| :---: | :---: | :---: | :---: | :---: |\n", + "| **0** |0 | -0.5 | -1 | -1.5 | -2 | -2.5 |\n", + "| **T** |-0.5 | -0.5 | 0.5 | 0 | -0.5 | -1 |\n", + "| **A** |-1 | 0.5 | 0 | 0 | 1 | 0.5 |\n", + "| **T** |-1.5 | 0 | -1.5 | 1 | 0.5 | 0.5 |\n", + "\n", + "| |0 | A | T | G |A | C |\n", + "| :---: |:---:| :---: | :---: | :---: | :---: | :---: |\n", + "| **0** |0 | _ | _ | _ | _ | _ |\n", + "| **T** |l | \\ | \\ | _ | _ | _ |\n", + "| **A** |l | \\ | _ | \\ | \\ | _ |\n", + "| **T** |l | l | \\ | _ | _ | \\ |\n", + "\n", "```markdown\n", - "Votre réponse ici\n", + "x = _T_AT\n", + "y = ATGAC\n", "```" ], "metadata": { @@ -177,13 +212,39 @@ { "cell_type": "code", "source": [ - "#Votre code ici" + "def levenshtein(x: str, y: str) -> int:\n", + " if not x:\n", + " return len(y)\n", + " if not y:\n", + " return len(x)\n", + "\n", + " cost = 0 if x[-1] == y[-1] else 1\n", + "\n", + " return min(\n", + " levenshtein(x[:-1], y) + 1,\n", + " levenshtein(x, y[:-1]) + 1,\n", + " levenshtein(x[:-1], y[:-1]) + cost\n", + " )\n", + "\n", + "print(levenshtein(\"CCGT\", \"CGTCA\"))\n" ], "metadata": { - "id": "FJR69IEQ4aHv" + "id": "FJR69IEQ4aHv", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a7b912d1-5035-4f97-ed0f-a06975e619df" }, "execution_count": null, - "outputs": [] + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3\n" + ] + } + ] }, { "cell_type": "markdown", @@ -226,13 +287,78 @@ { "cell_type": "code", "source": [ - "#Votre code ici" + "import numpy as np\n", + "\n", + "def sw_fwd(x: str, y: str, cmap: dict, sigma: np.array, gap_params: tuple) -> tuple:\n", + " DIAG, UP, LEFT = 1, 2, 3\n", + " m, n = len(x), len(y)\n", + " go, ge = gap_params\n", + "\n", + " S = np.zeros((m + 1, n + 1))\n", + " B = np.zeros((m + 1, n + 1), dtype=int)\n", + "\n", + " for i in range(1, m + 1):\n", + " for j in range(1, n + 1):\n", + " match_score = S[i-1, j-1] + sigma[cmap[x[i-1]], cmap[y[j-1]]]\n", + "\n", + " max_del, best_k_del = -np.inf, 0\n", + " for k in range(1, i+1):\n", + " current = S[i-k, j] - (go + ge*(k-1))\n", + " if current > max_del:\n", + " max_del = current\n", + " best_k_del = k\n", + "\n", + " max_ins, best_k_ins = -np.inf, 0\n", + " for k in range(1, j+1):\n", + " current = S[i, j-k] - (go + ge*(k-1))\n", + " if current > max_ins:\n", + " max_ins = current\n", + " best_k_ins = k\n", + "\n", + " options = [(0, 0), (match_score, DIAG), (max_del, UP), (max_ins, LEFT)]\n", + " S[i,j], B[i,j] = max(options, key=lambda x: x[0])\n", + "\n", + " return S, B\n", + "\n", + "\n", + "dna_cmap = {'A': 0, 'C': 1, 'G': 2, 'T': 3}\n", + "sigma_matrix = np.array([[2, -1, -1, -1],\n", + " [-1, 2, -1, -1],\n", + " [-1, -1, 2, -1],\n", + " [-1, -1, -1, 2]])\n", + "\n", + "go, ge = -2, -1\n", + "x_seq = \"AGT\"\n", + "y_seq = \"GTT\"\n", + "\n", + "S, B = sw_fwd(x_seq, y_seq, dna_cmap, sigma_matrix, (go, ge))\n", + "print(S)\n", + "print(B)\n" ], "metadata": { - "id": "njn3JB0b-WHj" + "id": "njn3JB0b-WHj", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e8c2609b-8e44-43ff-de17-8bd19dc20a5f" }, - "execution_count": null, - "outputs": [] + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[ 0. 0. 0. 0.]\n", + " [ 0. 2. 4. 6.]\n", + " [ 0. 4. 6. 8.]\n", + " [ 0. 6. 8. 10.]]\n", + "[[0 0 0 0]\n", + " [0 2 3 3]\n", + " [0 2 2 2]\n", + " [0 2 2 2]]\n" + ] + } + ] }, { "cell_type": "markdown", @@ -250,12 +376,47 @@ { "cell_type": "code", "source": [ - "#Votre code ici" + "import numpy as np\n", + "\n", + "def sw_bwd(x: str, y: str, S: np.ndarray, B: np.ndarray) -> (str, str, float):\n", + " # Initialiser les alignements et le score\n", + " align_x = []\n", + " align_y = []\n", + "\n", + " # Trouver la position du score maximum dans la matrice S (c'est ici que l'alignement commence)\n", + " i, j = np.unravel_index(np.argmax(S), S.shape)\n", + " score = S[i, j]\n", + "\n", + " # Effectuer le traceback\n", + " while S[i, j] > 0:\n", + " if B[i, j] == 1: # Diagonale\n", + " align_x.append(x[i-1])\n", + " align_y.append(y[j-1])\n", + " i -= 1\n", + " j -= 1\n", + " elif B[i, j] == 2: # Haut\n", + " align_x.append(x[i-1])\n", + " align_y.append('-')\n", + " i -= 1\n", + " elif B[i, j] == 3: # Gauche\n", + " align_x.append('-')\n", + " align_y.append(y[j-1])\n", + " j -= 1\n", + "\n", + " # Inverser les alignements car on a commencé à partir de la fin\n", + " align_x.reverse()\n", + " align_y.reverse()\n", + "\n", + " # Convertir les listes en chaînes\n", + " align_x_str = ''.join(align_x)\n", + " align_y_str = ''.join(align_y)\n", + "\n", + " return align_x_str, align_y_str, score\n" ], "metadata": { "id": "ij9JDpBm_UZ7" }, - "execution_count": null, + "execution_count": 7, "outputs": [] }, { @@ -281,7 +442,7 @@ "metadata": { "id": "JUtYRFTBAwwZ" }, - "execution_count": null, + "execution_count": 2, "outputs": [] }, { @@ -296,17 +457,18 @@ { "cell_type": "code", "source": [ + "from IPython.display import HTML\n", "HTML(\"
| x: | TCG |
|---|---|
| y: | TAG |