Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 188 additions & 28 deletions BINF2025_TP3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNSXnqaXAUgZK9rmJ1TWbGo"
"provenance": []
},
"kernelspec": {
"name": "python3",
Expand Down Expand Up @@ -74,7 +73,12 @@
"cell_type": "markdown",
"source": [
"```markdown\n",
"Votre réponse ici\n",
"1 + 1 + 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 = 4\n",
"y(2) = 0.5 * 2 + 0.5 = 1.5\n",
"y(3) = 0.5 * 3 + 0.5 = 2\n",
"\n",
"4 - (1,5 + 2) = 0,5\n",
"\n",
"```"
],
"metadata": {
Expand All @@ -93,8 +97,24 @@
{
"cell_type": "markdown",
"source": [
"\n",
"| | 0 | A | C | T | G | \n",
"| :---: | :---: | :---: | :---: | :---: | :---: |\n",
"| **0** | 0 | 1 | 2 | 3 | 4 |\n",
"| **A** | 1 | 0 | 1 | 2 | 3 |\n",
"| **T** | 2 | 1 | 1 | 1 | 2 |\n",
"| **G** | 3 | 2 | 2 | 2 | 1 |\n",
"\n",
"| | 0 | A | C | T | G |\n",
"| :---: | :---: | :---: | :---: | :---: | :---: |\n",
"| **0** | 0 | _ | _ | _ | _ |\n",
"| **A** | l | \\ | _ | _ | _ |\n",
"| **T** | l | l | \\ | \\ | _ |\n",
"| **G** | l | l | \\ | \\ | \\ |\n",
"\n",
"```markdown\n",
"Votre réponse ici\n",
"x = A_TG\n",
"y = ACTG\n",
"```"
],
"metadata": {
Expand Down Expand Up @@ -124,8 +144,23 @@
{
"cell_type": "markdown",
"source": [
"| |0 | A | T | G |A | C |\n",
"| :---: |:---:| :---: | :---: | :---: | :---: | :---: |\n",
"| **0** |0 | -0.5 | -1 | -1.5 | -2 | -2.5 |\n",
"| **T** |-0.5 | -0.5 | 0.5 | 0 | -0.5 | -1 |\n",
"| **A** |-1 | 0.5 | 0 | 0 | 1 | 0.5 |\n",
"| **T** |-1.5 | 0 | -1.5 | 1 | 0.5 | 0.5 |\n",
"\n",
"| |0 | A | T | G |A | C |\n",
"| :---: |:---:| :---: | :---: | :---: | :---: | :---: |\n",
"| **0** |0 | _ | _ | _ | _ | _ |\n",
"| **T** |l | \\ | \\ | _ | _ | _ |\n",
"| **A** |l | \\ | _ | \\ | \\ | _ |\n",
"| **T** |l | l | \\ | _ | _ | \\ |\n",
"\n",
"```markdown\n",
"Votre réponse ici\n",
"x = _T_AT\n",
"y = ATGAC\n",
"```"
],
"metadata": {
Expand Down Expand Up @@ -177,13 +212,39 @@
{
"cell_type": "code",
"source": [
"#Votre code ici"
"def levenshtein(x: str, y: str) -> int:\n",
" if not x:\n",
" return len(y)\n",
" if not y:\n",
" return len(x)\n",
"\n",
" cost = 0 if x[-1] == y[-1] else 1\n",
"\n",
" return min(\n",
" levenshtein(x[:-1], y) + 1,\n",
" levenshtein(x, y[:-1]) + 1,\n",
" levenshtein(x[:-1], y[:-1]) + cost\n",
" )\n",
"\n",
"print(levenshtein(\"CCGT\", \"CGTCA\"))\n"
],
"metadata": {
"id": "FJR69IEQ4aHv"
"id": "FJR69IEQ4aHv",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a7b912d1-5035-4f97-ed0f-a06975e619df"
},
"execution_count": null,
"outputs": []
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"3\n"
]
}
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -226,13 +287,78 @@
{
"cell_type": "code",
"source": [
"#Votre code ici"
"import numpy as np\n",
"\n",
"def sw_fwd(x: str, y: str, cmap: dict, sigma: np.array, gap_params: tuple) -> tuple:\n",
" DIAG, UP, LEFT = 1, 2, 3\n",
" m, n = len(x), len(y)\n",
" go, ge = gap_params\n",
"\n",
" S = np.zeros((m + 1, n + 1))\n",
" B = np.zeros((m + 1, n + 1), dtype=int)\n",
"\n",
" for i in range(1, m + 1):\n",
" for j in range(1, n + 1):\n",
" match_score = S[i-1, j-1] + sigma[cmap[x[i-1]], cmap[y[j-1]]]\n",
"\n",
" max_del, best_k_del = -np.inf, 0\n",
" for k in range(1, i+1):\n",
" current = S[i-k, j] - (go + ge*(k-1))\n",
" if current > max_del:\n",
" max_del = current\n",
" best_k_del = k\n",
"\n",
" max_ins, best_k_ins = -np.inf, 0\n",
" for k in range(1, j+1):\n",
" current = S[i, j-k] - (go + ge*(k-1))\n",
" if current > max_ins:\n",
" max_ins = current\n",
" best_k_ins = k\n",
"\n",
" options = [(0, 0), (match_score, DIAG), (max_del, UP), (max_ins, LEFT)]\n",
" S[i,j], B[i,j] = max(options, key=lambda x: x[0])\n",
"\n",
" return S, B\n",
"\n",
"\n",
"dna_cmap = {'A': 0, 'C': 1, 'G': 2, 'T': 3}\n",
"sigma_matrix = np.array([[2, -1, -1, -1],\n",
" [-1, 2, -1, -1],\n",
" [-1, -1, 2, -1],\n",
" [-1, -1, -1, 2]])\n",
"\n",
"go, ge = -2, -1\n",
"x_seq = \"AGT\"\n",
"y_seq = \"GTT\"\n",
"\n",
"S, B = sw_fwd(x_seq, y_seq, dna_cmap, sigma_matrix, (go, ge))\n",
"print(S)\n",
"print(B)\n"
],
"metadata": {
"id": "njn3JB0b-WHj"
"id": "njn3JB0b-WHj",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e8c2609b-8e44-43ff-de17-8bd19dc20a5f"
},
"execution_count": null,
"outputs": []
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[ 0. 0. 0. 0.]\n",
" [ 0. 2. 4. 6.]\n",
" [ 0. 4. 6. 8.]\n",
" [ 0. 6. 8. 10.]]\n",
"[[0 0 0 0]\n",
" [0 2 3 3]\n",
" [0 2 2 2]\n",
" [0 2 2 2]]\n"
]
}
]
},
{
"cell_type": "markdown",
Expand All @@ -250,12 +376,47 @@
{
"cell_type": "code",
"source": [
"#Votre code ici"
"import numpy as np\n",
"\n",
"def sw_bwd(x: str, y: str, S: np.ndarray, B: np.ndarray) -> (str, str, float):\n",
" # Initialiser les alignements et le score\n",
" align_x = []\n",
" align_y = []\n",
"\n",
" # Trouver la position du score maximum dans la matrice S (c'est ici que l'alignement commence)\n",
" i, j = np.unravel_index(np.argmax(S), S.shape)\n",
" score = S[i, j]\n",
"\n",
" # Effectuer le traceback\n",
" while S[i, j] > 0:\n",
" if B[i, j] == 1: # Diagonale\n",
" align_x.append(x[i-1])\n",
" align_y.append(y[j-1])\n",
" i -= 1\n",
" j -= 1\n",
" elif B[i, j] == 2: # Haut\n",
" align_x.append(x[i-1])\n",
" align_y.append('-')\n",
" i -= 1\n",
" elif B[i, j] == 3: # Gauche\n",
" align_x.append('-')\n",
" align_y.append(y[j-1])\n",
" j -= 1\n",
"\n",
" # Inverser les alignements car on a commencé à partir de la fin\n",
" align_x.reverse()\n",
" align_y.reverse()\n",
"\n",
" # Convertir les listes en chaînes\n",
" align_x_str = ''.join(align_x)\n",
" align_y_str = ''.join(align_y)\n",
"\n",
" return align_x_str, align_y_str, score\n"
],
"metadata": {
"id": "ij9JDpBm_UZ7"
},
"execution_count": null,
"execution_count": 7,
"outputs": []
},
{
Expand All @@ -281,7 +442,7 @@
"metadata": {
"id": "JUtYRFTBAwwZ"
},
"execution_count": null,
"execution_count": 2,
"outputs": []
},
{
Expand All @@ -296,17 +457,18 @@
{
"cell_type": "code",
"source": [
"from IPython.display import HTML\n",
"HTML(\"<table align='left' style='font-family:Courier New'><tr><th>x:</th><th>TCG</th></tr><tr><th>y:</th><th>TAG</th></tr></table>\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 60
"height": 62
},
"id": "joHNwJ9AIf6F",
"outputId": "a9206810-a083-4d86-8b14-38183f1dd80c"
"outputId": "d5d9b15e-5e70-4d64-958a-3587ddcd8510"
},
"execution_count": null,
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
Expand All @@ -319,7 +481,7 @@
]
},
"metadata": {},
"execution_count": 18
"execution_count": 5
}
]
},
Expand All @@ -341,12 +503,12 @@
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
"height": 82
},
"id": "HUELvWKMFtIO",
"outputId": "976bab6f-f1fc-4c5a-c69c-8de02fc838d0"
"outputId": "eab87ab3-9db2-46b3-8b8f-7c17f3466ec7"
},
"execution_count": null,
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
Expand All @@ -359,7 +521,7 @@
]
},
"metadata": {},
"execution_count": 15
"execution_count": 4
}
]
},
Expand Down Expand Up @@ -387,7 +549,7 @@
"cell_type": "markdown",
"source": [
"```markdown\n",
"Votre réponse ici\n",
"1/4\n",
"```"
],
"metadata": {
Expand Down Expand Up @@ -428,13 +590,11 @@
},
{
"cell_type": "code",
"source": [
"#Votre code ici"
],
"source": [],
"metadata": {
"id": "UX0afNaqOVZ2"
},
"execution_count": null,
"execution_count": 8,
"outputs": []
},
{
Expand Down