diff --git a/dashboard/app.py b/dashboard/app.py index 30ee3b5..2c35fed 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -1,45 +1,225 @@ -try: - import streamlit as st -except Exception: - class _DummyST: - def title(self, *args, **kwargs): - print("TITLE:", *args) - def plotly_chart(self, *args, **kwargs): - print("PLOTLY CHART:", args[0] if args else None) - def warning(self, *args, **kwargs): - print("WARNING:", *args) - def bar_chart(self, *args, **kwargs): - print("BAR CHART:", args[0] if args else None) - def dataframe(self, *args, **kwargs): - if args and hasattr(args[0], "head"): - print("DATAFRAME PREVIEW:") - print(args[0].head()) - else: - print("DATAFRAME:", args[0] if args else None) - st = _DummyST() - +import os import pandas as pd +import streamlit as st +import plotly.express as px +import plotly.graph_objects as go +import numpy as np +from datetime import datetime + +# ====================== PAGE CONFIG ====================== +st.set_page_config( + page_title="RetailPulse Pro - Ultimate Retail Analytics", + page_icon="Shopping Cart", + layout="wide", + initial_sidebar_state="expanded" +) + +# ====================== PATHS & AUTO DISCOVERY ====================== +BASE_DIR = r"C:\Users\mmeli\repositories\CloudCTRL\RetailPulse" +PROCESSED_DIR = os.path.join(BASE_DIR, "data", "processed") + +def find_file(patterns): + if not os.path.exists(PROCESSED_DIR): + return None + for p in [p.strip() for p in patterns.split("|")]: + matches = [f for f in os.listdir(PROCESSED_DIR) if p.lower() in f.lower()] + if matches: + return os.path.join(PROCESSED_DIR, matches[0]) + return None -try: - import plotly.express as px -except Exception: - px = None +files = { + "amazon": find_file("Amazon Sale Report"), + "international": find_file("International sale Report"), + "may2022": find_file("May-2022"), + "pl_march": find_file("P L March 2021|PL March"), + "expense": find_file("Expense IIGF"), + "warehouse": find_file("Cloud Warehouse Compersion|warehouse"), + "sale_report": find_file("Sale Report"), +} -st.title("RetailPulse Dashboard") +@st.cache_data(ttl=3600) +def load(path): + if not path or not os.path.exists(path): + return pd.DataFrame() + for enc in ["utf-8", "latin1", "cp1252", "iso-8859-1"]: + try: + return pd.read_csv(path, encoding=enc, low_memory=False) + except: + continue + st.warning(f"Could not read {os.path.basename(path)}") + return pd.DataFrame() -sales_df = pd.read_csv("data/processed/sales_data.csv") +# Load everything +amazon = load(files["amazon"]) +intl = load(files["international"]) +may = load(files["may2022"]) +pl = load(files["pl_march"]) +expense = load(files["expense"]) +warehouse = load(files["warehouse"]) -# Plot sales by category (use Plotly if available, otherwise fallback to Streamlit) -if px is not None: - fig = px.bar(sales_df, x="category", y="sales", title="Sales by Category") - st.plotly_chart(fig) +# ====================== MERGE SALES DATA ====================== +df = pd.DataFrame() +for data, source in [(amazon, "Amazon India"), (intl, "International"), (may, "May-2022")]: + if not data.empty: + temp = data.copy() + temp["Source"] = source + df = pd.concat([df, temp], ignore_index=True) + +if df.empty: + st.error("No sales data found in data/processed folder!") + st.stop() + +# ====================== SMART COLUMN DETECTION ====================== +# Date column — try many possible names +date_candidates = ["Date", "Order Date", "order_date", "date", "OrderDate", "ship-date", "Date "] +date_col = next((col for col in date_candidates if col in df.columns), None) + +if date_col: + df["Date"] = pd.to_datetime(df[date_col], errors="coerce") else: - st.warning("plotly.express is not installed; falling back to Streamlit charts. Install with: pip install plotly") - grouped = sales_df.groupby("category", as_index=False)["sales"].sum() - st.bar_chart(grouped.set_index("category")["sales"]) + st.warning("No date column found → using dummy dates") + df["Date"] = pd.date_range("2022-01-01", periods=len(df), freq="D") + +# Amount / Sales column +amount_candidates = ["Amount", "Sales", "sales", "Total", "Price", "Amt", "amount"] +amount_col = next((col for col in amount_candidates if col in df.columns), None) +if amount_col: + df["Sales"] = pd.to_numeric(df[amount_col], errors="coerce").fillna(0) +else: + df["Sales"] = 0 + +# Quantity +qty_candidates = ["Qty", "Quantity", "qty", "QTY", "quantity", "Units"] +qty_col = next((col for col in qty_candidates if col in df.columns), None) +if qty_col: + df["Qty"] = pd.to_numeric(df[qty_col], errors="coerce").fillna(0) +else: + df["Qty"] = 1 + +# Category +cat_candidates = ["Category", "category", "Product Category", "item", "Item"] +cat_col = next((col for col in cat_candidates if col in df.columns), None) +if cat_col: + df["Category"] = df[cat_col] + +# State +state_candidates = ["ship-state", "Ship-State", "State", "ship_state", "state"] +state_col = next((col for col in state_candidates if col in df.columns), None) +if state_col: + df["State"] = df[state_col].str.title() + +# Status +status_candidates = ["Status", "status", "order_status", "Order Status"] +status_col = next((col for col in status_candidates if col in df.columns), None) +if status_col: + df["Status"] = df[status_col] + +# Size +if "Size" not in df.columns: + size_candidates = ["Size", "size", "SIZE"] + size_col = next((col for col in size_candidates if col in df.columns), None) + if size_col: + df["Size"] = df[size_col] + +# Revenue = Sales × Qty +df["Revenue"] = df["Sales"] * df["Qty"] + +# Final date features +df["Month"] = df["Date"].dt.strftime("%Y-%m") +df["MonthName"] = df["Date"].dt.strftime("%b %Y") + +# ====================== SIDEBAR FILTERS ====================== +st.sidebar.header("Filters") + +# Date range +if df["Date"].notna().any(): + date_range = st.sidebar.slider( + "Date Range", + min_value=df["Date"].min().date(), + max_value=df["Date"].max().date(), + value=(df["Date"].min().date(), df["Date"].max().date()) + ) + df = df[ + (df["Date"] >= pd.Timestamp(date_range[0])) & + (df["Date"] <= pd.Timestamp(date_range[1])) + ] + + +# Other filters +if "Category" in df.columns: + cat_filter = st.sidebar.multiselect("Category", options=sorted(df["Category"].dropna().unique())) + if cat_filter: + df = df[df["Category"].isin(cat_filter)] + +if "State" in df.columns: + state_filter = st.sidebar.multiselect("State", options=sorted(df["State"].dropna().unique())) + if state_filter: + df = df[df["State"].isin(state_filter)] + +source_filter = st.sidebar.multiselect("Source", options=df["Source"].unique(), default=df["Source"].unique()) +df = df[df["Source"].isin(source_filter)] + +# ====================== KPIs ====================== +c1, c2, c3, c4, c5 = st.columns(5) +with c1: st.metric("Total Revenue", f"₹{df['Revenue'].sum():,.0f}") +with c2: st.metric("Total Orders", f"{len(df):,}") +with c3: st.metric("Units Sold", f"{df['Qty'].sum():,.0f}") +with c4: st.metric("AOV", f"₹{df['Revenue'].sum()/len(df):,.0f}" if len(df) else 0) +with c5: + cancelled = len(df[df["Status"].str.contains("Cancel|Return", case=False, na=False)]) if "Status" in df.columns else 0 + st.metric("Cancelled", f"{cancelled:,}") + +# ====================== CHARTS (safe versions) ====================== +st.markdown("---") +r1 = st.columns(2) +r2 = st.columns(2) + +# Monthly trend +with r1[0]: + st.subheader("Monthly Revenue") + monthly = df.groupby("MonthName")["Revenue"].sum().reset_index() + monthly = monthly.sort_values("MonthName") + fig = px.line(monthly, x="MonthName", y="Revenue", markers=True, title="Revenue Trend") + st.plotly_chart(fig, use_container_width=True) + +# Category sales +with r1[1]: + st.subheader("Sales by Category") + if "Category" in df.columns: + cat = df.groupby("Category")["Revenue"].sum().sort_values(ascending=False).head(10) + fig = px.bar(cat, color=cat.index, text_auto=True) + st.plotly_chart(fig, use_container_width=True) + +# State map +with r2[0]: + st.subheader("Sales by State") + if "State" in df.columns: + state_sales = df.groupby("State")["Revenue"].sum().reset_index() + fig = px.choropleth(state_sales, locations="State", locationmode="country names", + color="Revenue", scope="asia", color_continuous_scale="Reds") + fig.update_geos(fitbounds="locations", visible=False) + st.plotly_chart(fig, use_container_width=True) + +# Top products +with r2[1]: + st.subheader("Top 10 Products") + prod_col = "Style" if "Style" in df.columns else "SKU" if "SKU" in df.columns else None + if prod_col: + top = df.groupby(prod_col)["Revenue"].sum().nlargest(10) + fig = px.bar(top, orientation="h") + st.plotly_chart(fig, use_container_width=True) + +# Download +st.download_button("Download Filtered Data", df.to_csv(index=False), "retailpulse_filtered.csv", "text/csv") -# Load processed data -df = pd.read_csv("etl/processed/retailpulse.csv") +# Raw tabs +st.markdown("---") +tabs = st.tabs(["Amazon", "International", "May-2022", "P&L", "Expense", "Warehouse"]) +with tabs[0]: st.dataframe(amazon.head(100), use_container_width=True) +with tabs[1]: st.dataframe(intl.head(100), use_container_width=True) +with tabs[2]: st.dataframe(may.head(100), use_container_width=True) +with tabs[3]: st.dataframe(pl, use_container_width=True) +with tabs[4]: st.dataframe(expense, use_container_width=True) +with tabs[5]: st.dataframe(warehouse, use_container_width=True) -# Display preview -st.dataframe(df) +st.success("RetailPulse Pro Dashboard is LIVE!") \ No newline at end of file diff --git a/dashboard/create_missing_files.py b/dashboard/create_missing_files.py new file mode 100644 index 0000000..7efe050 --- /dev/null +++ b/dashboard/create_missing_files.py @@ -0,0 +1,27 @@ +import pandas as pd +import os + +processed_dir = r"C:\Users\mmeli\repositories\CloudCTRL\RetailPulse\data\processed" + +# 1. Load the big Amazon file (this has category info) +amazon = pd.read_csv(os.path.join(processed_dir, "Amazon Sale Report.csv")) +intl = pd.read_csv(os.path.join(processed_dir, "International sale Report.csv"), encoding="latin1") + +# 2. Create sales_data.csv (Sales by Category) +if 'Category' in amazon.columns: + sales_by_cat = amazon.groupby("Category")["Amount"].sum().reset_index() + sales_by_cat.columns = ["category", "sales"] +elif 'category' in amazon.columns: + sales_by_cat = amazon.groupby("category")["Amount"].sum().reset_index() + sales_by_cat.columns = ["category", "sales"] +else: + # fallback dummy + sales_by_cat = pd.DataFrame({"category": ["Set", "Kurta", "Blouse"], "sales": [300000, 200000, 150000]}) + +sales_by_cat.to_csv(os.path.join(processed_dir, "sales_data.csv"), index=False) +print("Created sales_data.csv") + +# 3. Create the main merged file (retailpulse.csv) +merged = pd.concat([amazon.assign(source="Amazon"), intl.assign(source="International")], ignore_index=True) +merged.to_csv(os.path.join(processed_dir, "retailpulse.csv"), index=False) +print("Created retailpulse.csv – total rows:", len(merged)) \ No newline at end of file