diff --git a/lab-sql-python-connection.ipynb b/lab-sql-python-connection.ipynb new file mode 100644 index 0000000..44d438c --- /dev/null +++ b/lab-sql-python-connection.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "id": "93e4a2fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pip in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (26.0.1)\n", + "Collecting pip\n", + " Downloading pip-26.1-py3-none-any.whl.metadata (4.6 kB)\n", + "Downloading pip-26.1-py3-none-any.whl (1.8 MB)\n", + " ---------------------------------------- 0.0/1.8 MB ? eta -:--:--\n", + " ----------------------- ---------------- 1.0/1.8 MB 12.0 MB/s eta 0:00:01\n", + " ---------------------------------------- 1.8/1.8 MB 10.9 MB/s 0:00:00\n", + "Installing collected packages: pip\n", + " Attempting uninstall: pip\n", + " Found existing installation: pip 26.0.1\n", + " Uninstalling pip-26.0.1:\n", + " Successfully uninstalled pip-26.0.1\n", + "Successfully installed pip-26.1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " WARNING: The scripts pip.exe, pip3.14.exe and pip3.exe are installed in 'c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Scripts' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting cryptography\n", + " Downloading cryptography-47.0.0-cp311-abi3-win_amd64.whl.metadata (4.5 kB)\n", + "Requirement already satisfied: sqlalchemy in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (2.0.49)\n", + "Requirement already satisfied: pandas in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (3.0.2)\n", + "Requirement already satisfied: PyMySQL[rsa] in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (1.1.2)\n", + "Collecting cffi>=2.0.0 (from cryptography)\n", + " Downloading cffi-2.0.0-cp314-cp314-win_amd64.whl.metadata (2.6 kB)\n", + "Requirement already satisfied: greenlet>=1 in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from sqlalchemy) (3.5.0)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from sqlalchemy) (4.15.0)\n", + "Requirement already satisfied: numpy>=2.3.3 in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from pandas) (2.4.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in C:\\Users\\juanb\\AppData\\Roaming\\Python\\Python314\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: tzdata in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from pandas) (2026.1)\n", + "Collecting pycparser (from cffi>=2.0.0->cryptography)\n", + " Downloading pycparser-3.0-py3-none-any.whl.metadata (8.2 kB)\n", + "Requirement already satisfied: six>=1.5 in C:\\Users\\juanb\\AppData\\Roaming\\Python\\Python314\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Downloading cryptography-47.0.0-cp311-abi3-win_amd64.whl (3.8 MB)\n", + " ---------------------------------------- 0.0/3.8 MB ? eta -:--:--\n", + " ------------------- -------------------- 1.8/3.8 MB 11.8 MB/s eta 0:00:01\n", + " ---------------------------------------- 3.8/3.8 MB 11.5 MB/s 0:00:00\n", + "Downloading cffi-2.0.0-cp314-cp314-win_amd64.whl (185 kB)\n", + "Downloading pycparser-3.0-py3-none-any.whl (48 kB)\n", + "Installing collected packages: pycparser, cffi, cryptography\n", + "\n", + " ------------- -------------------------- 1/3 [cffi]\n", + " -------------------------- ------------- 2/3 [cryptography]\n", + " -------------------------- ------------- 2/3 [cryptography]\n", + " -------------------------- ------------- 2/3 [cryptography]\n", + " -------------------------- ------------- 2/3 [cryptography]\n", + " -------------------------- ------------- 2/3 [cryptography]\n", + " ---------------------------------------- 3/3 [cryptography]\n", + "\n", + "Successfully installed cffi-2.0.0 cryptography-47.0.0 pycparser-3.0\n" + ] + } + ], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install --upgrade pip\n", + "!{sys.executable} -m pip install cryptography PyMySQL[rsa] sqlalchemy pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c9d89375", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Conectado con éxito a: sakila\n" + ] + } + ], + "source": [ + "# !pip install mysql-connector-python sqlalchemy pandas\n", + "\n", + "from sqlalchemy import create_engine, text\n", + "\n", + "USER = \"root\"\n", + "PASSWORD = \"231128101507-{}>=+][\"\n", + "HOST = \"localhost\"\n", + "PORT = 3306\n", + "DATABASE = \"sakila\"\n", + "\n", + "# Usamos mysql-connector-python en lugar de pymysql\n", + "connection_url = f\"mysql+mysqlconnector://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}\"\n", + "\n", + "engine = create_engine(\n", + " connection_url,\n", + " pool_pre_ping=True\n", + ")\n", + "\n", + "with engine.connect() as connection:\n", + " result = connection.execute(text(\"SELECT DATABASE();\"))\n", + " print(\"Conectado con éxito a:\", result.fetchone()[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1d9386d3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected to database: sakila\n", + "\n", + "--- Rentals in May 2005 ---\n", + "Empty DataFrame\n", + "Columns: [rental_id, rental_date, inventory_id, customer_id, return_date, staff_id]\n", + "Index: []\n", + "\n", + "--- Rentals in June 2005 ---\n", + "Empty DataFrame\n", + "Columns: [rental_id, rental_date, inventory_id, customer_id, return_date, staff_id]\n", + "Index: []\n", + "\n", + "--- Rental counts in May 2005 ---\n", + "Empty DataFrame\n", + "Columns: [customer_id, rentals_05_2005]\n", + "Index: []\n", + "\n", + "--- Rental counts in June 2005 ---\n", + "Empty DataFrame\n", + "Columns: [customer_id, rentals_06_2005]\n", + "Index: []\n", + "\n", + "--- Customers active in both months ---\n", + "Empty DataFrame\n", + "Columns: [customer_id, rentals_05_2005, rentals_06_2005, difference]\n", + "Index: []\n", + "\n", + "--- Final report with activity change ---\n", + "Empty DataFrame\n", + "Columns: [customer_id, rentals_05_2005, rentals_06_2005, difference, activity_change]\n", + "Index: []\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sqlalchemy import create_engine, text\n", + "\n", + "# --------------------------------------------------\n", + "# 1. Establish connection to Sakila\n", + "# --------------------------------------------------\n", + "USER = \"root\"\n", + "PASSWORD = \"231128101507-{}>=+][\"\n", + "HOST = \"localhost\"\n", + "PORT = 3306\n", + "DATABASE = \"sakila\"\n", + "\n", + "connection_url = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}\"\n", + "\n", + "engine = create_engine(\n", + " connection_url,\n", + " pool_pre_ping=True\n", + ")\n", + "\n", + "with engine.connect() as connection:\n", + " result = connection.execute(text(\"SELECT DATABASE();\"))\n", + " current_db = result.fetchone()[0]\n", + " print(\"Connected to database:\", current_db)\n", + "\n", + "\n", + "# --------------------------------------------------\n", + "# 2. Function: rentals_month\n", + "# --------------------------------------------------\n", + "def rentals_month(engine, month, year):\n", + " start_date = f\"{year}-{month:02d}-01\"\n", + "\n", + " if month == 12:\n", + " end_date = f\"{year + 1}-01-01\"\n", + " else:\n", + " end_date = f\"{year}-{month + 1:02d}-01\"\n", + "\n", + " query = text(\"\"\"\n", + " SELECT\n", + " rental_id,\n", + " rental_date,\n", + " inventory_id,\n", + " customer_id,\n", + " return_date,\n", + " staff_id\n", + " FROM rental\n", + " WHERE rental_date >= :start_date\n", + " AND rental_date < :end_date\n", + " ORDER BY customer_id, rental_date\n", + " \"\"\")\n", + "\n", + " df = pd.read_sql_query(\n", + " sql=query,\n", + " con=engine,\n", + " params={\"start_date\": start_date, \"end_date\": end_date}\n", + " )\n", + "\n", + " return df\n", + "\n", + "\n", + "# --------------------------------------------------\n", + "# 3. Function: rental_count_month\n", + "# --------------------------------------------------\n", + "def rental_count_month(df, month, year):\n", + " column_name = f\"rentals_{month:02d}_{year}\"\n", + "\n", + " rental_counts = (\n", + " df.groupby(\"customer_id\")\n", + " .size()\n", + " .reset_index(name=column_name)\n", + " .sort_values(by=\"customer_id\")\n", + " .reset_index(drop=True)\n", + " )\n", + "\n", + " return rental_counts\n", + "\n", + "\n", + "# --------------------------------------------------\n", + "# 4. Function: compare_rentals\n", + "# --------------------------------------------------\n", + "def compare_rentals(df1, df2):\n", + " comparison = pd.merge(df1, df2, on=\"customer_id\", how=\"inner\")\n", + "\n", + " col1 = df1.columns[1]\n", + " col2 = df2.columns[1]\n", + "\n", + " comparison[\"difference\"] = comparison[col2] - comparison[col1]\n", + "\n", + " return comparison\n", + "\n", + "\n", + "# --------------------------------------------------\n", + "# 5. Run analysis for May and June 2005\n", + "# --------------------------------------------------\n", + "may_rentals = rentals_month(engine, 5, 2005)\n", + "june_rentals = rentals_month(engine, 6, 2005)\n", + "\n", + "may_counts = rental_count_month(may_rentals, 5, 2005)\n", + "june_counts = rental_count_month(june_rentals, 6, 2005)\n", + "\n", + "comparison_df = compare_rentals(may_counts, june_counts)\n", + "\n", + "\n", + "# --------------------------------------------------\n", + "# 6. Optional improvement\n", + "# --------------------------------------------------\n", + "def add_activity_label(df):\n", + " df = df.copy()\n", + " df[\"activity_change\"] = df[\"difference\"].apply(\n", + " lambda x: \"increased\" if x > 0 else (\"decreased\" if x < 0 else \"same\")\n", + " )\n", + " return df\n", + "\n", + "\n", + "final_report = add_activity_label(comparison_df)\n", + "\n", + "\n", + "# --------------------------------------------------\n", + "# 7. Output\n", + "# --------------------------------------------------\n", + "print(\"\\n--- Rentals in May 2005 ---\")\n", + "print(may_rentals.head())\n", + "\n", + "print(\"\\n--- Rentals in June 2005 ---\")\n", + "print(june_rentals.head())\n", + "\n", + "print(\"\\n--- Rental counts in May 2005 ---\")\n", + "print(may_counts.head())\n", + "\n", + "print(\"\\n--- Rental counts in June 2005 ---\")\n", + "print(june_counts.head())\n", + "\n", + "print(\"\\n--- Customers active in both months ---\")\n", + "print(comparison_df.head())\n", + "\n", + "print(\"\\n--- Final report with activity change ---\")\n", + "print(final_report.head(10))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}