Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
343 changes: 343 additions & 0 deletions Connecting Python to SQL.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2026-04-28T17:57:50.618715Z",
"start_time": "2026-04-28T17:57:48.621050Z"
}
},
"source": [
"import pandas as pd\n",
"from getpass import getpass\n",
"from urllib.parse import quote_plus\n",
"from sqlalchemy import create_engine\n",
"from sqlalchemy import text\n",
"import sys\n",
"\n",
"!{sys.executable} -m pip install cryptography\n",
"!{sys.executable} -m pip install --upgrade pip"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: cryptography in ./.venv/lib/python3.14/site-packages (47.0.0)\r\n",
"Requirement already satisfied: cffi>=2.0.0 in ./.venv/lib/python3.14/site-packages (from cryptography) (2.0.0)\r\n",
"Requirement already satisfied: pycparser in ./.venv/lib/python3.14/site-packages (from cffi>=2.0.0->cryptography) (3.0)\r\n",
"Requirement already satisfied: pip in ./.venv/lib/python3.14/site-packages (26.1)\r\n"
]
}
],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T17:13:13.280137Z",
"start_time": "2026-04-28T17:13:06.668853Z"
}
},
"cell_type": "code",
"source": [
"#Create access to MySQL account and to sakila database.\n",
"password = quote_plus(getpass(\"Enter MySQL password: \"))\n",
"engine = create_engine(f\"mysql+pymysql://root:{password}@localhost:3306/sakila\")"
],
"id": "733d9835745ff29b",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" n\n",
"0 599\n"
]
}
],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T17:57:59.075780Z",
"start_time": "2026-04-28T17:57:59.050536Z"
}
},
"cell_type": "code",
"source": [
"#Function that retrieves rental data for a given month and year from the Sakila database as a Pandas DataFrame.\n",
"def rentals_month(engine, month, year):\n",
" query = text(\"\"\"SELECT * FROM rental WHERE :month = MONTH(rental_date) AND :year = YEAR(rental_date)\"\"\")\n",
" df = pd.read_sql(query, engine, params={\"month\": month, \"year\": year})\n",
" return df"
],
"id": "3f7898fde952f22f",
"outputs": [],
"execution_count": 7
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T19:55:49.643255Z",
"start_time": "2026-04-28T19:55:49.513251Z"
}
},
"cell_type": "code",
"source": [
"may_2005 = rentals_month(engine, 5, 2005)\n",
"june_2005 = rentals_month(engine, 6, 2005)\n"
],
"id": "256e140c79d1bf7c",
"outputs": [],
"execution_count": 28
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T18:57:58.993220Z",
"start_time": "2026-04-28T18:57:58.979477Z"
}
},
"cell_type": "code",
"source": [
"#Function that returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year.\n",
"\n",
"def rental_count_month(df, month, year):\n",
" df = df.groupby(\"customer_id\").size().reset_index()\n",
" df = df.rename(columns = {0: f\"rentals_{month:02d}_{year}\"})\n",
" return df"
],
"id": "e3985fae2117c236",
"outputs": [],
"execution_count": 17
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T19:56:03.204149Z",
"start_time": "2026-04-28T19:56:03.180948Z"
}
},
"cell_type": "code",
"source": [
"df_may_rental = rental_count_month(may_2005, 5, 2005)\n",
"df_june_rental = rental_count_month(june_2005, 6, 2005)\n"
],
"id": "bf3e6d2c1f8785b9",
"outputs": [],
"execution_count": 29
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T20:11:30.590048Z",
"start_time": "2026-04-28T20:11:30.526548Z"
}
},
"cell_type": "code",
"source": [
"#Function that returns a combined DataFrame with the difference between the number of rentals in the two months.\n",
"\n",
"def compare_rentals(df_earlier_month, df_later_month):\n",
" df_3 = pd.merge(df_earlier_month, df_later_month, on=\"customer_id\", how=\"outer\")\n",
" df_3[\"difference\"] = df_3[df_3.columns[2]] - df_3[df_3.columns[1]]\n",
" return df_3"
],
"id": "da7164088f6d846a",
"outputs": [],
"execution_count": 42
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T20:11:31.896391Z",
"start_time": "2026-04-28T20:11:31.872021Z"
}
},
"cell_type": "code",
"source": "df_may_june = compare_rentals(df_may_rental, df_june_rental)",
"id": "e8be0fa7a4445929",
"outputs": [],
"execution_count": 43
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-28T20:11:33.335371Z",
"start_time": "2026-04-28T20:11:33.303598Z"
}
},
"cell_type": "code",
"source": "df_may_june",
"id": "cb8303b7d03694",
"outputs": [
{
"data": {
"text/plain": [
" customer_id rentals_05_2005 rentals_06_2005 difference\n",
"0 1 2.0 7.0 5.0\n",
"1 2 1.0 1.0 0.0\n",
"2 3 2.0 4.0 2.0\n",
"3 4 NaN 6.0 NaN\n",
"4 5 3.0 5.0 2.0\n",
".. ... ... ... ...\n",
"593 595 1.0 2.0 1.0\n",
"594 596 6.0 2.0 -4.0\n",
"595 597 2.0 3.0 1.0\n",
"596 598 NaN 1.0 NaN\n",
"597 599 1.0 4.0 3.0\n",
"\n",
"[598 rows x 4 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>rentals_05_2005</th>\n",
" <th>rentals_06_2005</th>\n",
" <th>difference</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>593</th>\n",
" <td>595</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>594</th>\n",
" <td>596</td>\n",
" <td>6.0</td>\n",
" <td>2.0</td>\n",
" <td>-4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>595</th>\n",
" <td>597</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596</th>\n",
" <td>598</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597</th>\n",
" <td>599</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>598 rows × 4 columns</p>\n",
"</div>"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 44
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "a7847eef68fac60"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}