diff --git a/Connecting Python to SQL.ipynb b/Connecting Python to SQL.ipynb new file mode 100644 index 0000000..c072d67 --- /dev/null +++ b/Connecting Python to SQL.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2026-04-28T17:57:50.618715Z", + "start_time": "2026-04-28T17:57:48.621050Z" + } + }, + "source": [ + "import pandas as pd\n", + "from getpass import getpass\n", + "from urllib.parse import quote_plus\n", + "from sqlalchemy import create_engine\n", + "from sqlalchemy import text\n", + "import sys\n", + "\n", + "!{sys.executable} -m pip install cryptography\n", + "!{sys.executable} -m pip install --upgrade pip" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: cryptography in ./.venv/lib/python3.14/site-packages (47.0.0)\r\n", + "Requirement already satisfied: cffi>=2.0.0 in ./.venv/lib/python3.14/site-packages (from cryptography) (2.0.0)\r\n", + "Requirement already satisfied: pycparser in ./.venv/lib/python3.14/site-packages (from cffi>=2.0.0->cryptography) (3.0)\r\n", + "Requirement already satisfied: pip in ./.venv/lib/python3.14/site-packages (26.1)\r\n" + ] + } + ], + "execution_count": 6 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T17:13:13.280137Z", + "start_time": "2026-04-28T17:13:06.668853Z" + } + }, + "cell_type": "code", + "source": [ + "#Create access to MySQL account and to sakila database.\n", + "password = quote_plus(getpass(\"Enter MySQL password: \"))\n", + "engine = create_engine(f\"mysql+pymysql://root:{password}@localhost:3306/sakila\")" + ], + "id": "733d9835745ff29b", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " n\n", + "0 599\n" + ] + } + ], + "execution_count": 3 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T17:57:59.075780Z", + "start_time": "2026-04-28T17:57:59.050536Z" + } + }, + "cell_type": "code", + "source": [ + "#Function that retrieves rental data for a given month and year from the Sakila database as a Pandas DataFrame.\n", + "def rentals_month(engine, month, year):\n", + " query = text(\"\"\"SELECT * FROM rental WHERE :month = MONTH(rental_date) AND :year = YEAR(rental_date)\"\"\")\n", + " df = pd.read_sql(query, engine, params={\"month\": month, \"year\": year})\n", + " return df" + ], + "id": "3f7898fde952f22f", + "outputs": [], + "execution_count": 7 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T19:55:49.643255Z", + "start_time": "2026-04-28T19:55:49.513251Z" + } + }, + "cell_type": "code", + "source": [ + "may_2005 = rentals_month(engine, 5, 2005)\n", + "june_2005 = rentals_month(engine, 6, 2005)\n" + ], + "id": "256e140c79d1bf7c", + "outputs": [], + "execution_count": 28 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T18:57:58.993220Z", + "start_time": "2026-04-28T18:57:58.979477Z" + } + }, + "cell_type": "code", + "source": [ + "#Function that returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year.\n", + "\n", + "def rental_count_month(df, month, year):\n", + " df = df.groupby(\"customer_id\").size().reset_index()\n", + " df = df.rename(columns = {0: f\"rentals_{month:02d}_{year}\"})\n", + " return df" + ], + "id": "e3985fae2117c236", + "outputs": [], + "execution_count": 17 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T19:56:03.204149Z", + "start_time": "2026-04-28T19:56:03.180948Z" + } + }, + "cell_type": "code", + "source": [ + "df_may_rental = rental_count_month(may_2005, 5, 2005)\n", + "df_june_rental = rental_count_month(june_2005, 6, 2005)\n" + ], + "id": "bf3e6d2c1f8785b9", + "outputs": [], + "execution_count": 29 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T20:11:30.590048Z", + "start_time": "2026-04-28T20:11:30.526548Z" + } + }, + "cell_type": "code", + "source": [ + "#Function that returns a combined DataFrame with the difference between the number of rentals in the two months.\n", + "\n", + "def compare_rentals(df_earlier_month, df_later_month):\n", + " df_3 = pd.merge(df_earlier_month, df_later_month, on=\"customer_id\", how=\"outer\")\n", + " df_3[\"difference\"] = df_3[df_3.columns[2]] - df_3[df_3.columns[1]]\n", + " return df_3" + ], + "id": "da7164088f6d846a", + "outputs": [], + "execution_count": 42 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T20:11:31.896391Z", + "start_time": "2026-04-28T20:11:31.872021Z" + } + }, + "cell_type": "code", + "source": "df_may_june = compare_rentals(df_may_rental, df_june_rental)", + "id": "e8be0fa7a4445929", + "outputs": [], + "execution_count": 43 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T20:11:33.335371Z", + "start_time": "2026-04-28T20:11:33.303598Z" + } + }, + "cell_type": "code", + "source": "df_may_june", + "id": "cb8303b7d03694", + "outputs": [ + { + "data": { + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 NaN 6.0 NaN\n", + "4 5 3.0 5.0 2.0\n", + ".. ... ... ... ...\n", + "593 595 1.0 2.0 1.0\n", + "594 596 6.0 2.0 -4.0\n", + "595 597 2.0 3.0 1.0\n", + "596 598 NaN 1.0 NaN\n", + "597 599 1.0 4.0 3.0\n", + "\n", + "[598 rows x 4 columns]" + ], + "text/html": [ + "
| \n", + " | customer_id | \n", + "rentals_05_2005 | \n", + "rentals_06_2005 | \n", + "difference | \n", + "
|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "2.0 | \n", + "7.0 | \n", + "5.0 | \n", + "
| 1 | \n", + "2 | \n", + "1.0 | \n", + "1.0 | \n", + "0.0 | \n", + "
| 2 | \n", + "3 | \n", + "2.0 | \n", + "4.0 | \n", + "2.0 | \n", + "
| 3 | \n", + "4 | \n", + "NaN | \n", + "6.0 | \n", + "NaN | \n", + "
| 4 | \n", + "5 | \n", + "3.0 | \n", + "5.0 | \n", + "2.0 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 593 | \n", + "595 | \n", + "1.0 | \n", + "2.0 | \n", + "1.0 | \n", + "
| 594 | \n", + "596 | \n", + "6.0 | \n", + "2.0 | \n", + "-4.0 | \n", + "
| 595 | \n", + "597 | \n", + "2.0 | \n", + "3.0 | \n", + "1.0 | \n", + "
| 596 | \n", + "598 | \n", + "NaN | \n", + "1.0 | \n", + "NaN | \n", + "
| 597 | \n", + "599 | \n", + "1.0 | \n", + "4.0 | \n", + "3.0 | \n", + "
598 rows × 4 columns
\n", + "