From 239cc9e7997c4a877ea44a2e325ea1311c39c709 Mon Sep 17 00:00:00 2001 From: Adriana Alves Date: Tue, 28 Apr 2026 21:19:07 +0100 Subject: [PATCH] Solved lab --- Connecting Python to SQL.ipynb | 343 +++++++++++++++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 Connecting Python to SQL.ipynb diff --git a/Connecting Python to SQL.ipynb b/Connecting Python to SQL.ipynb new file mode 100644 index 0000000..c072d67 --- /dev/null +++ b/Connecting Python to SQL.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2026-04-28T17:57:50.618715Z", + "start_time": "2026-04-28T17:57:48.621050Z" + } + }, + "source": [ + "import pandas as pd\n", + "from getpass import getpass\n", + "from urllib.parse import quote_plus\n", + "from sqlalchemy import create_engine\n", + "from sqlalchemy import text\n", + "import sys\n", + "\n", + "!{sys.executable} -m pip install cryptography\n", + "!{sys.executable} -m pip install --upgrade pip" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: cryptography in ./.venv/lib/python3.14/site-packages (47.0.0)\r\n", + "Requirement already satisfied: cffi>=2.0.0 in ./.venv/lib/python3.14/site-packages (from cryptography) (2.0.0)\r\n", + "Requirement already satisfied: pycparser in ./.venv/lib/python3.14/site-packages (from cffi>=2.0.0->cryptography) (3.0)\r\n", + "Requirement already satisfied: pip in ./.venv/lib/python3.14/site-packages (26.1)\r\n" + ] + } + ], + "execution_count": 6 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T17:13:13.280137Z", + "start_time": "2026-04-28T17:13:06.668853Z" + } + }, + "cell_type": "code", + "source": [ + "#Create access to MySQL account and to sakila database.\n", + "password = quote_plus(getpass(\"Enter MySQL password: \"))\n", + "engine = create_engine(f\"mysql+pymysql://root:{password}@localhost:3306/sakila\")" + ], + "id": "733d9835745ff29b", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " n\n", + "0 599\n" + ] + } + ], + "execution_count": 3 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T17:57:59.075780Z", + "start_time": "2026-04-28T17:57:59.050536Z" + } + }, + "cell_type": "code", + "source": [ + "#Function that retrieves rental data for a given month and year from the Sakila database as a Pandas DataFrame.\n", + "def rentals_month(engine, month, year):\n", + " query = text(\"\"\"SELECT * FROM rental WHERE :month = MONTH(rental_date) AND :year = YEAR(rental_date)\"\"\")\n", + " df = pd.read_sql(query, engine, params={\"month\": month, \"year\": year})\n", + " return df" + ], + "id": "3f7898fde952f22f", + "outputs": [], + "execution_count": 7 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T19:55:49.643255Z", + "start_time": "2026-04-28T19:55:49.513251Z" + } + }, + "cell_type": "code", + "source": [ + "may_2005 = rentals_month(engine, 5, 2005)\n", + "june_2005 = rentals_month(engine, 6, 2005)\n" + ], + "id": "256e140c79d1bf7c", + "outputs": [], + "execution_count": 28 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T18:57:58.993220Z", + "start_time": "2026-04-28T18:57:58.979477Z" + } + }, + "cell_type": "code", + "source": [ + "#Function that returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year.\n", + "\n", + "def rental_count_month(df, month, year):\n", + " df = df.groupby(\"customer_id\").size().reset_index()\n", + " df = df.rename(columns = {0: f\"rentals_{month:02d}_{year}\"})\n", + " return df" + ], + "id": "e3985fae2117c236", + "outputs": [], + "execution_count": 17 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T19:56:03.204149Z", + "start_time": "2026-04-28T19:56:03.180948Z" + } + }, + "cell_type": "code", + "source": [ + "df_may_rental = rental_count_month(may_2005, 5, 2005)\n", + "df_june_rental = rental_count_month(june_2005, 6, 2005)\n" + ], + "id": "bf3e6d2c1f8785b9", + "outputs": [], + "execution_count": 29 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T20:11:30.590048Z", + "start_time": "2026-04-28T20:11:30.526548Z" + } + }, + "cell_type": "code", + "source": [ + "#Function that returns a combined DataFrame with the difference between the number of rentals in the two months.\n", + "\n", + "def compare_rentals(df_earlier_month, df_later_month):\n", + " df_3 = pd.merge(df_earlier_month, df_later_month, on=\"customer_id\", how=\"outer\")\n", + " df_3[\"difference\"] = df_3[df_3.columns[2]] - df_3[df_3.columns[1]]\n", + " return df_3" + ], + "id": "da7164088f6d846a", + "outputs": [], + "execution_count": 42 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T20:11:31.896391Z", + "start_time": "2026-04-28T20:11:31.872021Z" + } + }, + "cell_type": "code", + "source": "df_may_june = compare_rentals(df_may_rental, df_june_rental)", + "id": "e8be0fa7a4445929", + "outputs": [], + "execution_count": 43 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-28T20:11:33.335371Z", + "start_time": "2026-04-28T20:11:33.303598Z" + } + }, + "cell_type": "code", + "source": "df_may_june", + "id": "cb8303b7d03694", + "outputs": [ + { + "data": { + "text/plain": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 1 2.0 7.0 5.0\n", + "1 2 1.0 1.0 0.0\n", + "2 3 2.0 4.0 2.0\n", + "3 4 NaN 6.0 NaN\n", + "4 5 3.0 5.0 2.0\n", + ".. ... ... ... ...\n", + "593 595 1.0 2.0 1.0\n", + "594 596 6.0 2.0 -4.0\n", + "595 597 2.0 3.0 1.0\n", + "596 598 NaN 1.0 NaN\n", + "597 599 1.0 4.0 3.0\n", + "\n", + "[598 rows x 4 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005rentals_06_2005difference
012.07.05.0
121.01.00.0
232.04.02.0
34NaN6.0NaN
453.05.02.0
...............
5935951.02.01.0
5945966.02.0-4.0
5955972.03.01.0
596598NaN1.0NaN
5975991.04.03.0
\n", + "

598 rows × 4 columns

\n", + "
" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 44 + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "a7847eef68fac60" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}