Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 208 additions & 0 deletions lab.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "f1155b30",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pymysql\n",
"from sqlalchemy import create_engine\n",
"import getpass \n",
"password = getpass.getpass()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "a2b34b9d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" rental_id rental_date inventory_id customer_id \\\n",
"0 1 2005-05-24 22:53:30 367 130 \n",
"1 2 2005-05-24 22:54:33 1525 459 \n",
"2 3 2005-05-24 23:03:39 1711 408 \n",
"3 4 2005-05-24 23:04:41 2452 333 \n",
"4 5 2005-05-24 23:05:21 2079 222 \n",
"\n",
" return_date staff_id \n",
"0 2005-05-26 22:04:30 1 \n",
"1 2005-05-28 19:40:33 1 \n",
"2 2005-06-01 22:12:39 1 \n",
"3 2005-06-03 01:43:41 2 \n",
"4 2005-06-02 04:33:21 1 \n"
]
}
],
"source": [
"bd = \"sakila\"\n",
"connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n",
"engine = create_engine(connection_string)\n",
"engine\n",
"def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n",
" sql = text(\"\"\"\n",
" SELECT rental_id, rental_date, inventory_id, customer_id, return_date, staff_id\n",
" FROM rental\n",
" WHERE YEAR(rental_date) = :year AND MONTH(rental_date) = :month\n",
" ORDER BY rental_id\n",
" \"\"\")\n",
" with engine.connect() as conn:\n",
" df = pd.read_sql_query(sql, con=conn, params={\"year\": year, \"month\": month},\n",
" parse_dates=[\"rental_date\", \"return_date\"])\n",
" return df\n",
"df = rentals_month(engine, 5, 2005)\n",
"print(df.head())"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "fe7ebd77",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def rental_count_month(rentals_df: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n",
"\n",
" month = int(month)\n",
" year = int(year)\n",
" col_name = f\"rentals_{month:02d}_{year}\"\n",
"\n",
" df = rentals_df.copy()\n",
"\n",
" if \"rental_date\" in df.columns:\n",
" df[\"rental_date\"] = pd.to_datetime(df[\"rental_date\"])\n",
" df = df[(df[\"rental_date\"].dt.year == year) & (df[\"rental_date\"].dt.month == month)]\n",
"\n",
" count_col = \"rental_id\" if \"rental_id\" in df.columns else df.columns[0]\n",
"\n",
" result = (\n",
" df.groupby(\"customer_id\", dropna=False)\n",
" .agg(**{col_name: (count_col, \"count\")})\n",
" .reset_index()\n",
" .sort_values(by=[col_name, \"customer_id\"], ascending=[False, True])\n",
" .reset_index(drop=True)\n",
" )\n",
"\n",
" return result\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "f781a9ab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rentals_05_2005\n",
"0 197 8\n",
"1 109 7\n",
"2 506 7\n",
"3 19 6\n",
"4 53 6\n"
]
}
],
"source": [
"df = rentals_month(engine, 5, 2005)\n",
"counts = rental_count_month(df, 5, 2005)\n",
"print(counts.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32efeb96",
"metadata": {},
"outputs": [],
"source": [
"def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame):\n",
"\n",
" def find_rent_col(df: pd.DataFrame) -> str:\n",
" cols = [c for c in df.columns if c != \"customer_id\"]\n",
" for c in cols:\n",
" if pd.api.types.is_numeric_dtype(df[c]):\n",
" return c\n",
" raise ValueError(\"No numeric rentals column found\")\n",
"\n",
" col1 = find_rent_col(df1)\n",
" col2 = find_rent_col(df2)\n",
"\n",
" merged = pd.merge(\n",
" df1[[\"customer_id\", col1]],\n",
" df2[[\"customer_id\", col2]],\n",
" on=\"customer_id\",\n",
" how=\"outer\"\n",
" )\n",
"\n",
" merged[[col1, col2]] = merged[[col1, col2]].fillna(0).astype(int)\n",
" merged[\"difference\"] = merged[col2] - merged[col1]\n",
" merged = merged.sort_values(by=[\"difference\", \"customer_id\"], ascending=[False, True]).reset_index(drop=True)\n",
"\n",
" return merged\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "f5f70005",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" customer_id rentals_05_2005 rentals_06_2005 difference\n",
"0 31 0 11 11\n",
"1 329 0 9 9\n",
"2 454 1 10 9\n",
"3 178 0 8 8\n",
"4 213 1 9 8\n"
]
}
],
"source": [
"m1, y1 = 5, 2005\n",
"m2, y2 = 6, 2005\n",
"rentals1 = rentals_month(engine, m1, y1)\n",
"rentals2 = rentals_month(engine, m2, y2)\n",
"counts1 = rental_count_month(rentals1, m1, y1)\n",
"counts2 = rental_count_month(rentals2, m2, y2)\n",
"comparison = compare_rentals(counts1, counts2)\n",
"\n",
"print(comparison.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}