From 1b4e0e1aad588220703100cfe295077831a286d0 Mon Sep 17 00:00:00 2001 From: Abanoub Barayo <62501003+abanoubdev@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:45:34 +0200 Subject: [PATCH 1/2] Solved Lab --- .DS_Store | Bin 0 -> 6148 bytes __pycache__/queries.cpython-314.pyc | Bin 0 -> 468 bytes queries.py | 8 + sample.ipynb | 265 ++++++++++++++++++++++++++++ 4 files changed, 273 insertions(+) create mode 100644 .DS_Store create mode 100644 __pycache__/queries.cpython-314.pyc create mode 100644 queries.py create mode 100644 sample.ipynb diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..690f396bfe7b98568b846a48e80b0e8aa2e844d2 GIT binary patch literal 6148 zcmeHLu};G<5PdEkTFTIoF+yUE)IUh8bYNhiBSUEuM3GVwKpoif4SWhe!N>v=9|P}f zt27NkVyO_ilkQ%8zO(CRD;F1l8LiR*&Ua z9>;$C&hdMA{`T@b{3$y9MRc*){1tM#v5#An`#55>4tqDwXYieX$j{eh2*JeCe! zI*gr}d-4hT9(|vl z_Sy%iU>~5#T0!YR<|{wuKg`8(wTz6F)OYx4;+HLUhX0aH#Uv}_q6X5)#V4ql3wDf+ zmdl&Bv7;;(qbFo2{PtdkJ$rEaPG=0~do6%_cQgRWxTeto{M>U~2L_|zxOcYw_NG*` zCfvZOIv@es!!Asmw)-EvI?(zsJEeuilF3Bbr6HY=N5!Nfe&`F*W$IOnm~^S8(B?Es zbQmbO6G8`c9+N)x6^W?dQj4e+CweA$D-fKsK!<{pSPEiihfH0@N!oZ}+S_ifyt&|d z=4HzA$S7NNRm;w`&K0V)m9ws(N`2Mem>wE7aYjjdRavyIU|d?do{Q-+Vz;tn%4~)` P`8tm={>c?_J!5|X4DM_J literal 0 HcmV?d00001 diff --git a/queries.py b/queries.py new file mode 100644 index 0000000..a97f330 --- /dev/null +++ b/queries.py @@ -0,0 +1,8 @@ +def get_rentals_month_query(): + query = """ + SELECT * \ + FROM rental + WHERE MONTH(rental_date) = %s + AND YEAR(rental_date) = %s; \ + """ + return query \ No newline at end of file diff --git a/sample.ipynb b/sample.ipynb new file mode 100644 index 0000000..ad3be5c --- /dev/null +++ b/sample.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:44:24.499477Z", + "start_time": "2026-04-27T14:44:24.447316Z" + } + }, + "cell_type": "code", + "source": [ + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "from queries import get_rentals_month_query" + ], + "id": "fbc121e30a2defb3", + "outputs": [], + "execution_count": 7 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:44:25.631292Z", + "start_time": "2026-04-27T14:44:25.613082Z" + } + }, + "cell_type": "code", + "source": [ + "def rentals_month(connection ,month, year):\n", + " query = get_rentals_month_query()\n", + " df = pd.read_sql(query, connection, params=(month, year))\n", + " return df" + ], + "id": "2724bcdfc25a56fb", + "outputs": [], + "execution_count": 8 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:44:27.144218Z", + "start_time": "2026-04-27T14:44:27.125543Z" + } + }, + "cell_type": "code", + "source": [ + "def rental_count_month(df):\n", + " rental_counts = df.groupby('customer_id')['rental_id'].count().reset_index()\n", + " rental_counts.columns = ['customer_id', 'rental_count']\n", + " rental_counts = rental_counts.sort_values(by='rental_count', ascending=False)\n", + " return rental_counts" + ], + "id": "d6e3619a969c3fe9", + "outputs": [], + "execution_count": 9 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:44:28.445180Z", + "start_time": "2026-04-27T14:44:28.428513Z" + } + }, + "cell_type": "code", + "source": [ + "def compare_rentals(df1,df2):\n", + " merged_df = pd.merge(df1, df2, on='customer_id', how='inner', suffixes=('_m1', '_m2'))\n", + " merged_df['difference'] = merged_df['rental_count_m2'] - merged_df['rental_count_m1']\n", + " return merged_df" + ], + "id": "6c26b62da3c02da3", + "outputs": [], + "execution_count": 10 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-27T14:44:39.905349Z", + "start_time": "2026-04-27T14:44:39.740486Z" + } + }, + "cell_type": "code", + "source": [ + "if __name__ == \"__main__\":\n", + " try:\n", + " USER = 'root'\n", + " PASSWORD = '35459583'\n", + " HOST = 'localhost'\n", + " PORT = '3306'\n", + " DATABASE = 'sakila'\n", + " sql_string = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}\"\n", + " connection = create_engine(sql_string)\n", + " df_rentals_Aug = rentals_month(connection, 8, 2005)\n", + " df_rentals_Jul = rentals_month(connection, 6, 2005)\n", + " if df_rentals_Jul is not None and df_rentals_Aug is not None:\n", + " customer_rental_count_Aug = rental_count_month(df_rentals_Aug)\n", + " customer_rental_count_Jul = rental_count_month(df_rentals_Jul)\n", + " merged_df= compare_rentals(customer_rental_count_Jul,customer_rental_count_Aug)\n", + " display(merged_df)\n", + " else:\n", + " print(\"No data found\")\n", + " except Exception as e:\n", + " print(e)\n" + ], + "id": "1c32ed2ef6bbb843", + "outputs": [ + { + "data": { + "text/plain": [ + " customer_id rental_count_m1 rental_count_m2 difference\n", + "0 31 11 4 -7\n", + "1 454 10 11 1\n", + "2 329 9 10 1\n", + "3 295 9 9 0\n", + "4 561 9 8 -1\n", + ".. ... ... ... ...\n", + "585 496 1 9 8\n", + "586 370 1 10 9\n", + "587 315 1 5 4\n", + "588 198 1 16 15\n", + "589 250 1 7 6\n", + "\n", + "[590 rows x 4 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count_m1rental_count_m2difference
031114-7
145410111
23299101
3295990
456198-1
...............
585496198
5863701109
587315154
58819811615
589250176
\n", + "

590 rows × 4 columns

\n", + "
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 12 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 8f980d073890eb94ee3353f2dd5804e25b5f2cfc Mon Sep 17 00:00:00 2001 From: Abanoub Barayo <62501003+abanoubdev@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:50:47 +0200 Subject: [PATCH 2/2] Solved Lab --- sample.ipynb | 198 ++++++--------------------------------------------- 1 file changed, 21 insertions(+), 177 deletions(-) diff --git a/sample.ipynb b/sample.ipynb index ad3be5c..f9ce81c 100644 --- a/sample.ipynb +++ b/sample.ipynb @@ -1,12 +1,7 @@ { "cells": [ { - "metadata": { - "ExecuteTime": { - "end_time": "2026-04-27T14:44:24.499477Z", - "start_time": "2026-04-27T14:44:24.447316Z" - } - }, + "metadata": {}, "cell_type": "code", "source": [ "from sqlalchemy import create_engine\n", @@ -15,15 +10,10 @@ ], "id": "fbc121e30a2defb3", "outputs": [], - "execution_count": 7 + "execution_count": null }, { - "metadata": { - "ExecuteTime": { - "end_time": "2026-04-27T14:44:25.631292Z", - "start_time": "2026-04-27T14:44:25.613082Z" - } - }, + "metadata": {}, "cell_type": "code", "source": [ "def rentals_month(connection ,month, year):\n", @@ -33,52 +23,40 @@ ], "id": "2724bcdfc25a56fb", "outputs": [], - "execution_count": 8 + "execution_count": null }, { - "metadata": { - "ExecuteTime": { - "end_time": "2026-04-27T14:44:27.144218Z", - "start_time": "2026-04-27T14:44:27.125543Z" - } - }, + "metadata": {}, "cell_type": "code", "source": [ - "def rental_count_month(df):\n", + "def rental_count_month(df,month,year):\n", + " col_name = f\"rentals_{month:02d}_{year}\"\n", " rental_counts = df.groupby('customer_id')['rental_id'].count().reset_index()\n", - " rental_counts.columns = ['customer_id', 'rental_count']\n", - " rental_counts = rental_counts.sort_values(by='rental_count', ascending=False)\n", + " rental_counts.columns = ['customer_id', col_name]\n", " return rental_counts" ], "id": "d6e3619a969c3fe9", "outputs": [], - "execution_count": 9 + "execution_count": null }, { - "metadata": { - "ExecuteTime": { - "end_time": "2026-04-27T14:44:28.445180Z", - "start_time": "2026-04-27T14:44:28.428513Z" - } - }, + "metadata": {}, "cell_type": "code", "source": [ "def compare_rentals(df1,df2):\n", - " merged_df = pd.merge(df1, df2, on='customer_id', how='inner', suffixes=('_m1', '_m2'))\n", - " merged_df['difference'] = merged_df['rental_count_m2'] - merged_df['rental_count_m1']\n", + " merged_df = pd.merge(df1, df2, on='customer_id', how='outer')\n", + " count_columns = [col for col in merged_df.columns if col != 'customer_id']\n", + " if len(count_columns) == 2:\n", + " col_m1, col_m2 = count_columns\n", + " merged_df['difference'] = merged_df[col_m2] - merged_df[col_m1]\n", " return merged_df" ], "id": "6c26b62da3c02da3", "outputs": [], - "execution_count": 10 + "execution_count": null }, { - "metadata": { - "ExecuteTime": { - "end_time": "2026-04-27T14:44:39.905349Z", - "start_time": "2026-04-27T14:44:39.740486Z" - } - }, + "metadata": {}, "cell_type": "code", "source": [ "if __name__ == \"__main__\":\n", @@ -93,8 +71,8 @@ " df_rentals_Aug = rentals_month(connection, 8, 2005)\n", " df_rentals_Jul = rentals_month(connection, 6, 2005)\n", " if df_rentals_Jul is not None and df_rentals_Aug is not None:\n", - " customer_rental_count_Aug = rental_count_month(df_rentals_Aug)\n", - " customer_rental_count_Jul = rental_count_month(df_rentals_Jul)\n", + " customer_rental_count_Aug = rental_count_month(df_rentals_Aug,8,2005)\n", + " customer_rental_count_Jul = rental_count_month(df_rentals_Jul,6,2005)\n", " merged_df= compare_rentals(customer_rental_count_Jul,customer_rental_count_Aug)\n", " display(merged_df)\n", " else:\n", @@ -103,142 +81,8 @@ " print(e)\n" ], "id": "1c32ed2ef6bbb843", - "outputs": [ - { - "data": { - "text/plain": [ - " customer_id rental_count_m1 rental_count_m2 difference\n", - "0 31 11 4 -7\n", - "1 454 10 11 1\n", - "2 329 9 10 1\n", - "3 295 9 9 0\n", - "4 561 9 8 -1\n", - ".. ... ... ... ...\n", - "585 496 1 9 8\n", - "586 370 1 10 9\n", - "587 315 1 5 4\n", - "588 198 1 16 15\n", - "589 250 1 7 6\n", - "\n", - "[590 rows x 4 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idrental_count_m1rental_count_m2difference
031114-7
145410111
23299101
3295990
456198-1
...............
585496198
5863701109
587315154
58819811615
589250176
\n", - "

590 rows × 4 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data", - "jetTransient": { - "display_id": null - } - } - ], - "execution_count": 12 + "outputs": [], + "execution_count": null } ], "metadata": {