Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions python_sql_connection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Engine(mysql+pymysql://root:***@localhost/Sakila)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pymysql\n",
"from sqlalchemy import create_engine\n",
"import getpass\n",
"password = getpass.getpass()\n",
"bd = \"Sakila\"\n",
"connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n",
"engine = create_engine(connection_string)\n",
"engine"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "acc452c6",
"metadata": {},
"outputs": [],
"source": [
"def rentals_month(engine, month, year):\n",
" query = \"\"\"\n",
" SELECT *\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = %(month)s\n",
" AND YEAR(rental_date) = %(year)s\n",
" \"\"\"\n",
" with engine.connect() as conn:\n",
" df = pd.read_sql(query, conn, params={\"month\": month, \"year\": year})\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c87b9113",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" rental_id rental_date inventory_id customer_id \\\n",
"0 1 2005-05-24 22:53:30 367 130 \n",
"1 2 2005-05-24 22:54:33 1525 459 \n",
"2 3 2005-05-24 23:03:39 1711 408 \n",
"3 4 2005-05-24 23:04:41 2452 333 \n",
"4 5 2005-05-24 23:05:21 2079 222 \n",
"\n",
" return_date staff_id last_update \n",
"0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n",
"1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n",
"2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n",
"3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n",
"4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 \n"
]
}
],
"source": [
"may_rentals = rentals_month(engine, 5, 2005)\n",
"print(may_rentals.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19ca0f14",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df, month, year):\n",
" col_name = f\"rentals_{month:02d}_{year}\"\n",
" result = (df.groupby(\"customer_id\")[\"rental_id\"].count().reset_index().rename(columns={\"rental_id\": col_name}))\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b186891c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rentals_05_2005\n",
"0 1 2\n",
"1 2 1\n",
"2 3 2\n",
"3 5 3\n",
"4 6 3\n"
]
}
],
"source": [
"may_count = rental_count_month(may_rentals, 5, 2005)\n",
"print(may_count.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1baf2437",
"metadata": {},
"outputs": [],
"source": [
"def compare_rentals(df1, df2):\n",
" merged = pd.merge(df1, df2, on=\"customer_id\", how=\"inner\")\n",
" col1, col2 = [c for c in merged.columns if c != \"customer_id\"]\n",
" merged[\"difference\"] = merged[col1] - merged[col2]\n",
" return merged"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}