diff --git a/lab_sql_python_connection.ipynb b/lab_sql_python_connection.ipynb new file mode 100644 index 0000000..66dab9c --- /dev/null +++ b/lab_sql_python_connection.ipynb @@ -0,0 +1,314 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "4212c555", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine\n", + "import getpass\n", + "from urllib.parse import quote_plus\n", + "\n", + "password = quote_plus(getpass.getpass())\n", + "\n", + "engine = create_engine(f\"mysql+pymysql://root:{password}@localhost/sakila\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6fe918ee", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month(engine, month, year):\n", + " \n", + " query = f\"\"\"\n", + " SELECT *\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month}\n", + " AND YEAR(rental_date) = {year}\n", + " \"\"\"\n", + " \n", + " df = pd.read_sql(query, engine)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0ff1a2ae", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(df, month, year):\n", + " \n", + " result = (\n", + " df.groupby('customer_id')\n", + " .size()\n", + " .reset_index(name=f\"rentals_{month:02d}_{year}\")\n", + " )\n", + " \n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1cfda834", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " \n", + " merged = pd.merge(df1, df2, on='customer_id', how='outer').fillna(0)\n", + " \n", + " col1 = df1.columns[1]\n", + " col2 = df2.columns[1]\n", + " \n", + " merged['difference'] = merged[col2] - merged[col1]\n", + " \n", + " return merged" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c9147801", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
012.07.05.0
121.01.00.0
232.04.02.0
340.06.06.0
453.05.02.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 0.0 6.0 6.0\n", + "4 5 3.0 5.0 2.0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Mayo\n", + "may_df = rentals_month(engine, 5, 2005)\n", + "may_counts = rental_count_month(may_df, 5, 2005)\n", + "\n", + "# Junio\n", + "jun_df = rentals_month(engine, 6, 2005)\n", + "jun_counts = rental_count_month(jun_df, 6, 2005)\n", + "\n", + "# Comparación\n", + "comparison = compare_rentals(may_counts, jun_counts)\n", + "\n", + "comparison.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c1112890", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
30310.011.011.0
4524541.010.09.0
3273290.09.09.0
2112131.09.08.0
1771780.08.08.0
\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "30 31 0.0 11.0 11.0\n", + "452 454 1.0 10.0 9.0\n", + "327 329 0.0 9.0 9.0\n", + "211 213 1.0 9.0 8.0\n", + "177 178 0.0 8.0 8.0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comparison.sort_values('difference', ascending=False).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "17fb4b73", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Conclusión:\n", + "Los clientes con diferencia positiva alquilaron más en junio que en mayo.\n", + "Los negativos redujeron su actividad.\n" + ] + } + ], + "source": [ + "print(\"Conclusión:\")\n", + "print(\"Los clientes con diferencia positiva alquilaron más en junio que en mayo.\")\n", + "print(\"Los negativos redujeron su actividad.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}