Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
324 changes: 324 additions & 0 deletions lab-sql-python-connection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "93e4a2fa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pip in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (26.0.1)\n",
"Collecting pip\n",
" Downloading pip-26.1-py3-none-any.whl.metadata (4.6 kB)\n",
"Downloading pip-26.1-py3-none-any.whl (1.8 MB)\n",
" ---------------------------------------- 0.0/1.8 MB ? eta -:--:--\n",
" ----------------------- ---------------- 1.0/1.8 MB 12.0 MB/s eta 0:00:01\n",
" ---------------------------------------- 1.8/1.8 MB 10.9 MB/s 0:00:00\n",
"Installing collected packages: pip\n",
" Attempting uninstall: pip\n",
" Found existing installation: pip 26.0.1\n",
" Uninstalling pip-26.0.1:\n",
" Successfully uninstalled pip-26.0.1\n",
"Successfully installed pip-26.1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" WARNING: The scripts pip.exe, pip3.14.exe and pip3.exe are installed in 'c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Scripts' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting cryptography\n",
" Downloading cryptography-47.0.0-cp311-abi3-win_amd64.whl.metadata (4.5 kB)\n",
"Requirement already satisfied: sqlalchemy in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (2.0.49)\n",
"Requirement already satisfied: pandas in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (3.0.2)\n",
"Requirement already satisfied: PyMySQL[rsa] in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (1.1.2)\n",
"Collecting cffi>=2.0.0 (from cryptography)\n",
" Downloading cffi-2.0.0-cp314-cp314-win_amd64.whl.metadata (2.6 kB)\n",
"Requirement already satisfied: greenlet>=1 in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from sqlalchemy) (3.5.0)\n",
"Requirement already satisfied: typing-extensions>=4.6.0 in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from sqlalchemy) (4.15.0)\n",
"Requirement already satisfied: numpy>=2.3.3 in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from pandas) (2.4.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in C:\\Users\\juanb\\AppData\\Roaming\\Python\\Python314\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: tzdata in c:\\Users\\juanb\\AppData\\Local\\Python\\pythoncore-3.14-64\\Lib\\site-packages (from pandas) (2026.1)\n",
"Collecting pycparser (from cffi>=2.0.0->cryptography)\n",
" Downloading pycparser-3.0-py3-none-any.whl.metadata (8.2 kB)\n",
"Requirement already satisfied: six>=1.5 in C:\\Users\\juanb\\AppData\\Roaming\\Python\\Python314\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Downloading cryptography-47.0.0-cp311-abi3-win_amd64.whl (3.8 MB)\n",
" ---------------------------------------- 0.0/3.8 MB ? eta -:--:--\n",
" ------------------- -------------------- 1.8/3.8 MB 11.8 MB/s eta 0:00:01\n",
" ---------------------------------------- 3.8/3.8 MB 11.5 MB/s 0:00:00\n",
"Downloading cffi-2.0.0-cp314-cp314-win_amd64.whl (185 kB)\n",
"Downloading pycparser-3.0-py3-none-any.whl (48 kB)\n",
"Installing collected packages: pycparser, cffi, cryptography\n",
"\n",
" ------------- -------------------------- 1/3 [cffi]\n",
" -------------------------- ------------- 2/3 [cryptography]\n",
" -------------------------- ------------- 2/3 [cryptography]\n",
" -------------------------- ------------- 2/3 [cryptography]\n",
" -------------------------- ------------- 2/3 [cryptography]\n",
" -------------------------- ------------- 2/3 [cryptography]\n",
" ---------------------------------------- 3/3 [cryptography]\n",
"\n",
"Successfully installed cffi-2.0.0 cryptography-47.0.0 pycparser-3.0\n"
]
}
],
"source": [
"import sys\n",
"!{sys.executable} -m pip install --upgrade pip\n",
"!{sys.executable} -m pip install cryptography PyMySQL[rsa] sqlalchemy pandas"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "c9d89375",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Conectado con éxito a: sakila\n"
]
}
],
"source": [
"# !pip install mysql-connector-python sqlalchemy pandas\n",
"\n",
"from sqlalchemy import create_engine, text\n",
"\n",
"USER = \"root\"\n",
"PASSWORD = \"231128101507-{}>=+][\"\n",
"HOST = \"localhost\"\n",
"PORT = 3306\n",
"DATABASE = \"sakila\"\n",
"\n",
"# Usamos mysql-connector-python en lugar de pymysql\n",
"connection_url = f\"mysql+mysqlconnector://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}\"\n",
"\n",
"engine = create_engine(\n",
" connection_url,\n",
" pool_pre_ping=True\n",
")\n",
"\n",
"with engine.connect() as connection:\n",
" result = connection.execute(text(\"SELECT DATABASE();\"))\n",
" print(\"Conectado con éxito a:\", result.fetchone()[0])"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "1d9386d3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Connected to database: sakila\n",
"\n",
"--- Rentals in May 2005 ---\n",
"Empty DataFrame\n",
"Columns: [rental_id, rental_date, inventory_id, customer_id, return_date, staff_id]\n",
"Index: []\n",
"\n",
"--- Rentals in June 2005 ---\n",
"Empty DataFrame\n",
"Columns: [rental_id, rental_date, inventory_id, customer_id, return_date, staff_id]\n",
"Index: []\n",
"\n",
"--- Rental counts in May 2005 ---\n",
"Empty DataFrame\n",
"Columns: [customer_id, rentals_05_2005]\n",
"Index: []\n",
"\n",
"--- Rental counts in June 2005 ---\n",
"Empty DataFrame\n",
"Columns: [customer_id, rentals_06_2005]\n",
"Index: []\n",
"\n",
"--- Customers active in both months ---\n",
"Empty DataFrame\n",
"Columns: [customer_id, rentals_05_2005, rentals_06_2005, difference]\n",
"Index: []\n",
"\n",
"--- Final report with activity change ---\n",
"Empty DataFrame\n",
"Columns: [customer_id, rentals_05_2005, rentals_06_2005, difference, activity_change]\n",
"Index: []\n"
]
}
],
"source": [
"import pandas as pd\n",
"from sqlalchemy import create_engine, text\n",
"\n",
"# --------------------------------------------------\n",
"# 1. Establish connection to Sakila\n",
"# --------------------------------------------------\n",
"USER = \"root\"\n",
"PASSWORD = \"231128101507-{}>=+][\"\n",
"HOST = \"localhost\"\n",
"PORT = 3306\n",
"DATABASE = \"sakila\"\n",
"\n",
"connection_url = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}\"\n",
"\n",
"engine = create_engine(\n",
" connection_url,\n",
" pool_pre_ping=True\n",
")\n",
"\n",
"with engine.connect() as connection:\n",
" result = connection.execute(text(\"SELECT DATABASE();\"))\n",
" current_db = result.fetchone()[0]\n",
" print(\"Connected to database:\", current_db)\n",
"\n",
"\n",
"# --------------------------------------------------\n",
"# 2. Function: rentals_month\n",
"# --------------------------------------------------\n",
"def rentals_month(engine, month, year):\n",
" start_date = f\"{year}-{month:02d}-01\"\n",
"\n",
" if month == 12:\n",
" end_date = f\"{year + 1}-01-01\"\n",
" else:\n",
" end_date = f\"{year}-{month + 1:02d}-01\"\n",
"\n",
" query = text(\"\"\"\n",
" SELECT\n",
" rental_id,\n",
" rental_date,\n",
" inventory_id,\n",
" customer_id,\n",
" return_date,\n",
" staff_id\n",
" FROM rental\n",
" WHERE rental_date >= :start_date\n",
" AND rental_date < :end_date\n",
" ORDER BY customer_id, rental_date\n",
" \"\"\")\n",
"\n",
" df = pd.read_sql_query(\n",
" sql=query,\n",
" con=engine,\n",
" params={\"start_date\": start_date, \"end_date\": end_date}\n",
" )\n",
"\n",
" return df\n",
"\n",
"\n",
"# --------------------------------------------------\n",
"# 3. Function: rental_count_month\n",
"# --------------------------------------------------\n",
"def rental_count_month(df, month, year):\n",
" column_name = f\"rentals_{month:02d}_{year}\"\n",
"\n",
" rental_counts = (\n",
" df.groupby(\"customer_id\")\n",
" .size()\n",
" .reset_index(name=column_name)\n",
" .sort_values(by=\"customer_id\")\n",
" .reset_index(drop=True)\n",
" )\n",
"\n",
" return rental_counts\n",
"\n",
"\n",
"# --------------------------------------------------\n",
"# 4. Function: compare_rentals\n",
"# --------------------------------------------------\n",
"def compare_rentals(df1, df2):\n",
" comparison = pd.merge(df1, df2, on=\"customer_id\", how=\"inner\")\n",
"\n",
" col1 = df1.columns[1]\n",
" col2 = df2.columns[1]\n",
"\n",
" comparison[\"difference\"] = comparison[col2] - comparison[col1]\n",
"\n",
" return comparison\n",
"\n",
"\n",
"# --------------------------------------------------\n",
"# 5. Run analysis for May and June 2005\n",
"# --------------------------------------------------\n",
"may_rentals = rentals_month(engine, 5, 2005)\n",
"june_rentals = rentals_month(engine, 6, 2005)\n",
"\n",
"may_counts = rental_count_month(may_rentals, 5, 2005)\n",
"june_counts = rental_count_month(june_rentals, 6, 2005)\n",
"\n",
"comparison_df = compare_rentals(may_counts, june_counts)\n",
"\n",
"\n",
"# --------------------------------------------------\n",
"# 6. Optional improvement\n",
"# --------------------------------------------------\n",
"def add_activity_label(df):\n",
" df = df.copy()\n",
" df[\"activity_change\"] = df[\"difference\"].apply(\n",
" lambda x: \"increased\" if x > 0 else (\"decreased\" if x < 0 else \"same\")\n",
" )\n",
" return df\n",
"\n",
"\n",
"final_report = add_activity_label(comparison_df)\n",
"\n",
"\n",
"# --------------------------------------------------\n",
"# 7. Output\n",
"# --------------------------------------------------\n",
"print(\"\\n--- Rentals in May 2005 ---\")\n",
"print(may_rentals.head())\n",
"\n",
"print(\"\\n--- Rentals in June 2005 ---\")\n",
"print(june_rentals.head())\n",
"\n",
"print(\"\\n--- Rental counts in May 2005 ---\")\n",
"print(may_counts.head())\n",
"\n",
"print(\"\\n--- Rental counts in June 2005 ---\")\n",
"print(june_counts.head())\n",
"\n",
"print(\"\\n--- Customers active in both months ---\")\n",
"print(comparison_df.head())\n",
"\n",
"print(\"\\n--- Final report with activity change ---\")\n",
"print(final_report.head(10))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}