Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions sql_python.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "1136bca2",
"metadata": {},
"outputs": [],
"source": [
"from sqlalchemy import create_engine\n",
"\n",
"# Example connection string (adjust username/password/host)\n",
"engine = create_engine(\"mysql+pymysql://root:Satvik2020@localhost:3306/sakila\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "af9a5baa",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"def rentals_month(engine, month, year):\n",
" query = f\"\"\"\n",
" SELECT \n",
" rental_id,\n",
" customer_id,\n",
" rental_date\n",
" FROM rental\n",
" WHERE MONTH(rental_date) = {month}\n",
" AND YEAR(rental_date) = {year};\n",
" \"\"\"\n",
" \n",
" df = pd.read_sql(query, engine)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "545d62d9",
"metadata": {},
"outputs": [],
"source": [
"def rental_count_month(df, month, year):\n",
" column_name = f\"rentals_{month:02d}_{year}\"\n",
" \n",
" result = (\n",
" df.groupby(\"customer_id\")\n",
" .size()\n",
" .reset_index(name=column_name)\n",
" )\n",
" \n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "027b1906",
"metadata": {},
"outputs": [],
"source": [
"def compare_rentals(df1, df2):\n",
" merged = pd.merge(df1, df2, on=\"customer_id\", how=\"outer\").fillna(0)\n",
" \n",
" # Get rental column names dynamically\n",
" col1 = df1.columns[1]\n",
" col2 = df2.columns[1]\n",
" \n",
" merged[\"difference\"] = merged[col2] - merged[col1]\n",
" \n",
" return merged"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "20bb4152",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rentals_05_2005 rentals_06_2005 difference\n",
"0 1 2.0 7.0 5.0\n",
"1 2 1.0 1.0 0.0\n",
"2 3 2.0 4.0 2.0\n",
"3 4 0.0 6.0 6.0\n",
"4 5 3.0 5.0 2.0\n"
]
}
],
"source": [
"# Step 1: Get raw data\n",
"may_data = rentals_month(engine, 5, 2005)\n",
"june_data = rentals_month(engine, 6, 2005)\n",
"\n",
"# Step 2: Aggregate counts\n",
"may_counts = rental_count_month(may_data, 5, 2005)\n",
"june_counts = rental_count_month(june_data, 6, 2005)\n",
"\n",
"# Step 3: Compare\n",
"comparison = compare_rentals(may_counts, june_counts)\n",
"\n",
"print(comparison.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}