From 9cd557cded3fd50f7f9885e1867e05ffc778b73d Mon Sep 17 00:00:00 2001 From: carmenlnr Date: Fri, 1 May 2026 17:39:57 +0200 Subject: [PATCH 1/2] Create connecting_python_to_sql_lab.ipynb --- connecting_python_to_sql_lab.ipynb | 496 +++++++++++++++++++++++++++++ 1 file changed, 496 insertions(+) create mode 100644 connecting_python_to_sql_lab.ipynb diff --git a/connecting_python_to_sql_lab.ipynb b/connecting_python_to_sql_lab.ipynb new file mode 100644 index 0000000..16a9bc4 --- /dev/null +++ b/connecting_python_to_sql_lab.ipynb @@ -0,0 +1,496 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "64af359a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Conexión creada\n" + ] + } + ], + "source": [ + "from sqlalchemy import create_engine\n", + "\n", + "engine = create_engine(\"mysql+pymysql://root:carmen2890@localhost/sakila\")\n", + "\n", + "print(\"Conexión creada\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8af89eb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_sql(\"SELECT * FROM rental LIMIT 5;\", engine)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cd2272e4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 2. Función que obtiene alquileres por mes\n", + "import pandas as pd\n", + "\n", + "def rentals_month(engine, month, year):\n", + " query = f\"\"\"\n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year};\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(query, engine)\n", + " return df\n", + "\n", + "may_data = rentals_month(engine, 5, 2005)\n", + "may_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "bcaf5a08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005
012
121
232
353
463
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005\n", + "0 1 2\n", + "1 2 1\n", + "2 3 2\n", + "3 5 3\n", + "4 6 3" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 3. Función rental_count_month que reciba el df de rentals_month y devuelva el # de alquileres por customer_id en ese mes y año\n", + "\n", + "def rental_count_month(df, month, year):\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", + " \n", + " result = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=column_name)\n", + " )\n", + " \n", + " return result\n", + "\n", + "may_count = rental_count_month(may_data, 5, 2005)\n", + "may_count.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5fe0fae2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
012.07.05.0
121.01.00.0
232.04.02.0
340.06.06.0
453.05.02.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 0.0 6.0 6.0\n", + "4 5 3.0 5.0 2.0" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 4. Función que compare 2 DataFrames (de dos meses distintos) y calcule la diferencia de alquileres por cliente\n", + "def compare_rentals(df1, df2):\n", + " merged = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n", + " \n", + " col1 = df1.columns[1]\n", + " col2 = df2.columns[1]\n", + " \n", + " merged[\"difference\"] = merged[col2] - merged[col1]\n", + " \n", + " return merged\n", + "\n", + "jun_data = rentals_month(engine, 6, 2005)\n", + "jun_count = rental_count_month(jun_data, 6, 2005)\n", + "\n", + "comparison = compare_rentals(may_count, jun_count)\n", + "comparison.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 312175ed48b79ece169cbb6d3b4e074f805164aa Mon Sep 17 00:00:00 2001 From: carmenlnr Date: Fri, 1 May 2026 18:06:31 +0200 Subject: [PATCH 2/2] Update connecting_python_to_sql_lab.ipynb --- connecting_python_to_sql_lab.ipynb | 59 +++++++++++++++++------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/connecting_python_to_sql_lab.ipynb b/connecting_python_to_sql_lab.ipynb index 16a9bc4..a51ca53 100644 --- a/connecting_python_to_sql_lab.ipynb +++ b/connecting_python_to_sql_lab.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "64af359a", "metadata": {}, "outputs": [ @@ -10,21 +10,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "Conexión creada\n" + "Conexión realizada\n" ] } ], "source": [ "from sqlalchemy import create_engine\n", + "from getpass import getpass\n", "\n", - "engine = create_engine(\"mysql+pymysql://root:carmen2890@localhost/sakila\")\n", + "try:\n", + " password = getpass(\"Introduce tu contraseña MySQL: \")\n", + " engine = create_engine(f\"mysql+pymysql://root:{password}@localhost/sakila\")\n", "\n", - "print(\"Conexión creada\")" + " import pandas as pd\n", + " pd.read_sql(\"SELECT 1\", engine)\n", + "\n", + " print(\"Conexión realizada\")\n", + "\n", + "except Exception as e:\n", + " print(\"Error al conectar\")\n", + " print(\"pruebe otra vez\")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "8af89eb5", "metadata": {}, "outputs": [ @@ -129,7 +139,7 @@ "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 " ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -143,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "id": "cd2272e4", "metadata": {}, "outputs": [ @@ -248,7 +258,7 @@ "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 " ] }, - "execution_count": 12, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -258,15 +268,18 @@ "import pandas as pd\n", "\n", "def rentals_month(engine, month, year):\n", + " \n", + " if month < 1 or month > 12:\n", + " raise ValueError(\"Month must be between 1 and 12\")\n", + " \n", " query = f\"\"\"\n", " SELECT *\n", " FROM rental\n", " WHERE MONTH(rental_date) = {month}\n", - " AND YEAR(rental_date) = {year};\n", + " AND YEAR(rental_date) = {year}\n", " \"\"\"\n", " \n", - " df = pd.read_sql(query, engine)\n", - " return df\n", + " return pd.read_sql(query, engine)\n", "\n", "may_data = rentals_month(engine, 5, 2005)\n", "may_data.head()" @@ -274,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "id": "bcaf5a08", "metadata": {}, "outputs": [ @@ -342,7 +355,7 @@ "4 6 3" ] }, - "execution_count": 14, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -351,15 +364,10 @@ "# 3. Función rental_count_month que reciba el df de rentals_month y devuelva el # de alquileres por customer_id en ese mes y año\n", "\n", "def rental_count_month(df, month, year):\n", - " column_name = f\"rentals_{month:02d}_{year}\"\n", " \n", - " result = (\n", - " df.groupby(\"customer_id\")\n", - " .size()\n", - " .reset_index(name=column_name)\n", - " )\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", " \n", - " return result\n", + " return df.groupby(\"customer_id\").size().reset_index(name=column_name)\n", "\n", "may_count = rental_count_month(may_data, 5, 2005)\n", "may_count.head()" @@ -367,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "id": "5fe0fae2", "metadata": {}, "outputs": [ @@ -447,7 +455,7 @@ "4 5 3.0 5.0 2.0" ] }, - "execution_count": 15, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -455,12 +463,13 @@ "source": [ "# 4. Función que compare 2 DataFrames (de dos meses distintos) y calcule la diferencia de alquileres por cliente\n", "def compare_rentals(df1, df2):\n", + " \n", " merged = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n", " \n", - " col1 = df1.columns[1]\n", - " col2 = df2.columns[1]\n", + " may_col = df1.columns[1]\n", + " jun_col = df2.columns[1]\n", " \n", - " merged[\"difference\"] = merged[col2] - merged[col1]\n", + " merged[\"difference\"] = merged[jun_col] - merged[may_col]\n", " \n", " return merged\n", "\n",