diff --git a/lab.ipynb b/lab.ipynb new file mode 100644 index 0000000..0f1bd80 --- /dev/null +++ b/lab.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "id": "f1155b30", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import getpass \n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a2b34b9d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "\n", + " return_date staff_id \n", + "0 2005-05-26 22:04:30 1 \n", + "1 2005-05-28 19:40:33 1 \n", + "2 2005-06-01 22:12:39 1 \n", + "3 2005-06-03 01:43:41 2 \n", + "4 2005-06-02 04:33:21 1 \n" + ] + } + ], + "source": [ + "bd = \"sakila\"\n", + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/'+bd\n", + "engine = create_engine(connection_string)\n", + "engine\n", + "def rentals_month(engine, month: int, year: int) -> pd.DataFrame:\n", + " sql = text(\"\"\"\n", + " SELECT rental_id, rental_date, inventory_id, customer_id, return_date, staff_id\n", + " FROM rental\n", + " WHERE YEAR(rental_date) = :year AND MONTH(rental_date) = :month\n", + " ORDER BY rental_id\n", + " \"\"\")\n", + " with engine.connect() as conn:\n", + " df = pd.read_sql_query(sql, con=conn, params={\"year\": year, \"month\": month},\n", + " parse_dates=[\"rental_date\", \"return_date\"])\n", + " return df\n", + "df = rentals_month(engine, 5, 2005)\n", + "print(df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "fe7ebd77", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def rental_count_month(rentals_df: pd.DataFrame, month: int, year: int) -> pd.DataFrame:\n", + "\n", + " month = int(month)\n", + " year = int(year)\n", + " col_name = f\"rentals_{month:02d}_{year}\"\n", + "\n", + " df = rentals_df.copy()\n", + "\n", + " if \"rental_date\" in df.columns:\n", + " df[\"rental_date\"] = pd.to_datetime(df[\"rental_date\"])\n", + " df = df[(df[\"rental_date\"].dt.year == year) & (df[\"rental_date\"].dt.month == month)]\n", + "\n", + " count_col = \"rental_id\" if \"rental_id\" in df.columns else df.columns[0]\n", + "\n", + " result = (\n", + " df.groupby(\"customer_id\", dropna=False)\n", + " .agg(**{col_name: (count_col, \"count\")})\n", + " .reset_index()\n", + " .sort_values(by=[col_name, \"customer_id\"], ascending=[False, True])\n", + " .reset_index(drop=True)\n", + " )\n", + "\n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "f781a9ab", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " customer_id rentals_05_2005\n", + "0 197 8\n", + "1 109 7\n", + "2 506 7\n", + "3 19 6\n", + "4 53 6\n" + ] + } + ], + "source": [ + "df = rentals_month(engine, 5, 2005)\n", + "counts = rental_count_month(df, 5, 2005)\n", + "print(counts.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32efeb96", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(df1: pd.DataFrame, df2: pd.DataFrame):\n", + "\n", + " def find_rent_col(df: pd.DataFrame) -> str:\n", + " cols = [c for c in df.columns if c != \"customer_id\"]\n", + " for c in cols:\n", + " if pd.api.types.is_numeric_dtype(df[c]):\n", + " return c\n", + " raise ValueError(\"No numeric rentals column found\")\n", + "\n", + " col1 = find_rent_col(df1)\n", + " col2 = find_rent_col(df2)\n", + "\n", + " merged = pd.merge(\n", + " df1[[\"customer_id\", col1]],\n", + " df2[[\"customer_id\", col2]],\n", + " on=\"customer_id\",\n", + " how=\"outer\"\n", + " )\n", + "\n", + " merged[[col1, col2]] = merged[[col1, col2]].fillna(0).astype(int)\n", + " merged[\"difference\"] = merged[col2] - merged[col1]\n", + " merged = merged.sort_values(by=[\"difference\", \"customer_id\"], ascending=[False, True]).reset_index(drop=True)\n", + "\n", + " return merged\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "f5f70005", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " customer_id rentals_05_2005 rentals_06_2005 difference\n", + "0 31 0 11 11\n", + "1 329 0 9 9\n", + "2 454 1 10 9\n", + "3 178 0 8 8\n", + "4 213 1 9 8\n" + ] + } + ], + "source": [ + "m1, y1 = 5, 2005\n", + "m2, y2 = 6, 2005\n", + "rentals1 = rentals_month(engine, m1, y1)\n", + "rentals2 = rentals_month(engine, m2, y2)\n", + "counts1 = rental_count_month(rentals1, m1, y1)\n", + "counts2 = rental_count_month(rentals2, m2, y2)\n", + "comparison = compare_rentals(counts1, counts2)\n", + "\n", + "print(comparison.head())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}