diff --git a/Connecting Python to SQL.ipynb b/Connecting Python to SQL.ipynb new file mode 100644 index 0000000..3eabf2f --- /dev/null +++ b/Connecting Python to SQL.ipynb @@ -0,0 +1,497 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "593c4a8f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sqlalchemy\n", + " Downloading sqlalchemy-2.0.49-cp314-cp314-win_amd64.whl.metadata (9.8 kB)\n", + "Collecting greenlet>=1 (from sqlalchemy)\n", + " Downloading greenlet-3.4.0-cp314-cp314-win_amd64.whl.metadata (3.8 kB)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in c:\\users\\gabri\\appdata\\local\\python\\pythoncore-3.14-64\\lib\\site-packages (from sqlalchemy) (4.15.0)\n", + "Downloading sqlalchemy-2.0.49-cp314-cp314-win_amd64.whl (2.1 MB)\n", + " ---------------------------------------- 0.0/2.1 MB ? eta -:--:--\n", + " ------------------- -------------------- 1.0/2.1 MB 10.4 MB/s eta 0:00:01\n", + " ---------------------------------------- 2.1/2.1 MB 9.9 MB/s 0:00:00\n", + "Downloading greenlet-3.4.0-cp314-cp314-win_amd64.whl (239 kB)\n", + "Installing collected packages: greenlet, sqlalchemy\n", + "\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " -------------------- ------------------- 1/2 [sqlalchemy]\n", + " ---------------------------------------- 2/2 [sqlalchemy]\n", + "\n", + "Successfully installed greenlet-3.4.0 sqlalchemy-2.0.49\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 25.3 -> 26.0.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "!pip install sqlalchemy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "09d959b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pymysql\n", + " Downloading pymysql-1.1.2-py3-none-any.whl.metadata (4.3 kB)\n", + "Downloading pymysql-1.1.2-py3-none-any.whl (45 kB)\n", + "Installing collected packages: pymysql\n", + "Successfully installed pymysql-1.1.2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 25.3 -> 26.0.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "!pip install pymysql" + ] + }, + { + "cell_type": "markdown", + "id": "d9a79feb", + "metadata": {}, + "source": [ + "01. Importing Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "74e29e8e", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import getpass # To get the password without showing the input\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "7b90cf76", + "metadata": {}, + "source": [ + "1) Establish a connection between Python and the Sakila database." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e6aec72d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Engine(mysql+pymysql://root:***@localhost/sakila)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bd = \"sakila\"\n", + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n", + "engine = create_engine(connection_string)\n", + "engine" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d3cf9fcc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connection successful!\n" + ] + } + ], + "source": [ + "with engine.connect() as connection:\n", + " print(\"Connection successful!\")" + ] + }, + { + "cell_type": "markdown", + "id": "79e5a1fa", + "metadata": {}, + "source": [ + "2) Write a Python function called rentals_month that retrieves rental data for a given month and year (passed as parameters) from the Sakila database as a Pandas DataFrame. The function should take in three parameters:\n", + "\n", + "engine: an object representing the database connection engine to be used to establish a connection to the Sakila database.\n", + "month: an integer representing the month for which rental data is to be retrieved.\n", + "year: an integer representing the year for which rental data is to be retrieved.\n", + "The function should execute a SQL query to retrieve the rental data for the specified month and year from the rental table in the Sakila database, and return it as a pandas DataFrame.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3bc310c5", + "metadata": {}, + "outputs": [], + "source": [ + "def rentals_month(engine, month, year):\n", + " query = \"\"\"\n", + " SELECT \n", + " YEAR(rental_date) AS year,\n", + "\t\tMONTH(rental_date) AS month, \n", + " COUNT(*) AS rentals_count\n", + " FROM rental\n", + " GROUP BY month, year\n", + " ORDER BY year, month;\n", + " \"\"\"\n", + " return pd.read_sql(query, engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "30ca80f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmonthrentals_count
0200551156
1200562311
2200576709
3200585686
420062182
\n", + "
" + ], + "text/plain": [ + " year month rentals_count\n", + "0 2005 5 1156\n", + "1 2005 6 2311\n", + "2 2005 7 6709\n", + "3 2005 8 5686\n", + "4 2006 2 182" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rentals_month(engine, 5, 2005)" + ] + }, + { + "cell_type": "markdown", + "id": "a0d7db18", + "metadata": {}, + "source": [ + "3) Develop a Python function called rental_count_month that takes the DataFrame provided by rentals_month as input along with the month and year and returns a new DataFrame containing the number of rentals made by each customer_id during the selected month and year.\n", + "\n", + "The function should also include the month and year as parameters and use them to name the new column according to the month and year, for example, if the input month is 05 and the year is 2005, the column name should be \"rentals_05_2005\".\n", + "\n", + "Hint: Consider making use of pandas groupby()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "062bf868", + "metadata": {}, + "outputs": [], + "source": [ + "def rental_count_month(month, year):\n", + " col_name = f\"rentals_{str(month).zfill(2)}_{year}\" \n", + " query = f\"\"\"\n", + " SELECT customer_id, \n", + " COUNT(*) AS {col_name}\n", + " FROM rental\n", + " WHERE MONTH(rental_date) = {month} AND YEAR(rental_date) = {year}\n", + " GROUP BY customer_id;\n", + " \"\"\"\n", + " return pd.read_sql(query, engine)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8448c91c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_05_2005
012
121
232
353
463
.........
5155944
5165951
5175966
5185972
5195991
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rentals_05_2005\n", + "0 1 2\n", + "1 2 1\n", + "2 3 2\n", + "3 5 3\n", + "4 6 3\n", + ".. ... ...\n", + "515 594 4\n", + "516 595 1\n", + "517 596 6\n", + "518 597 2\n", + "519 599 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " rental_count_month(5, 2005)" + ] + }, + { + "cell_type": "markdown", + "id": "81ca495e", + "metadata": {}, + "source": [ + "4) Create a Python function called compare_rentals that takes two DataFrames as input containing the number of rentals made by each customer in different months and years. The function should return a combined DataFrame with a new 'difference' column, which is the difference between the number of rentals in the two months." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f85c3702", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1, df2):\n", + " df_combined = pd.merge(df1, df2, on='customer_id')\n", + " df_combined['difference'] = df_combined.iloc[:, 1] - df_combined.iloc[:, 2]\n", + " return df_combined[['customer_id', 'difference']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16fcc55a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22633c38", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9996216a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}