diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..690f396 Binary files /dev/null and b/.DS_Store differ diff --git a/__pycache__/queries.cpython-314.pyc b/__pycache__/queries.cpython-314.pyc new file mode 100644 index 0000000..d2099d5 Binary files /dev/null and b/__pycache__/queries.cpython-314.pyc differ diff --git a/queries.py b/queries.py new file mode 100644 index 0000000..a97f330 --- /dev/null +++ b/queries.py @@ -0,0 +1,8 @@ +def get_rentals_month_query(): + query = """ + SELECT * \ + FROM rental + WHERE MONTH(rental_date) = %s + AND YEAR(rental_date) = %s; \ + """ + return query \ No newline at end of file diff --git a/sample.ipynb b/sample.ipynb new file mode 100644 index 0000000..f9ce81c --- /dev/null +++ b/sample.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "code", + "source": [ + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "from queries import get_rentals_month_query" + ], + "id": "fbc121e30a2defb3", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "def rentals_month(connection ,month, year):\n", + " query = get_rentals_month_query()\n", + " df = pd.read_sql(query, connection, params=(month, year))\n", + " return df" + ], + "id": "2724bcdfc25a56fb", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "def rental_count_month(df,month,year):\n", + " col_name = f\"rentals_{month:02d}_{year}\"\n", + " rental_counts = df.groupby('customer_id')['rental_id'].count().reset_index()\n", + " rental_counts.columns = ['customer_id', col_name]\n", + " return rental_counts" + ], + "id": "d6e3619a969c3fe9", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "def compare_rentals(df1,df2):\n", + " merged_df = pd.merge(df1, df2, on='customer_id', how='outer')\n", + " count_columns = [col for col in merged_df.columns if col != 'customer_id']\n", + " if len(count_columns) == 2:\n", + " col_m1, col_m2 = count_columns\n", + " merged_df['difference'] = merged_df[col_m2] - merged_df[col_m1]\n", + " return merged_df" + ], + "id": "6c26b62da3c02da3", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "if __name__ == \"__main__\":\n", + " try:\n", + " USER = 'root'\n", + " PASSWORD = '35459583'\n", + " HOST = 'localhost'\n", + " PORT = '3306'\n", + " DATABASE = 'sakila'\n", + " sql_string = f\"mysql+pymysql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}\"\n", + " connection = create_engine(sql_string)\n", + " df_rentals_Aug = rentals_month(connection, 8, 2005)\n", + " df_rentals_Jul = rentals_month(connection, 6, 2005)\n", + " if df_rentals_Jul is not None and df_rentals_Aug is not None:\n", + " customer_rental_count_Aug = rental_count_month(df_rentals_Aug,8,2005)\n", + " customer_rental_count_Jul = rental_count_month(df_rentals_Jul,6,2005)\n", + " merged_df= compare_rentals(customer_rental_count_Jul,customer_rental_count_Aug)\n", + " display(merged_df)\n", + " else:\n", + " print(\"No data found\")\n", + " except Exception as e:\n", + " print(e)\n" + ], + "id": "1c32ed2ef6bbb843", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}