MmelIGaba · MmelIGaba · Nov 27, 2025 · Nov 27, 2025
diff --git a/dashboard/app.py b/dashboard/app.py
@@ -1,45 +1,225 @@
-try:
-	import streamlit as st
-except Exception:
-	class _DummyST:
-		def title(self, *args, **kwargs):
-			print("TITLE:", *args)
-		def plotly_chart(self, *args, **kwargs):
-			print("PLOTLY CHART:", args[0] if args else None)
-		def warning(self, *args, **kwargs):
-			print("WARNING:", *args)
-		def bar_chart(self, *args, **kwargs):
-			print("BAR CHART:", args[0] if args else None)
-		def dataframe(self, *args, **kwargs):
-			if args and hasattr(args[0], "head"):
-				print("DATAFRAME PREVIEW:")
-				print(args[0].head())
-			else:
-				print("DATAFRAME:", args[0] if args else None)
-	st = _DummyST()
-
+import os
 import pandas as pd
+import streamlit as st
+import plotly.express as px
+import plotly.graph_objects as go
+import numpy as np
+from datetime import datetime
+
+# ====================== PAGE CONFIG ======================
+st.set_page_config(
+    page_title="RetailPulse Pro - Ultimate Retail Analytics",
+    page_icon="Shopping Cart",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+
+# ====================== PATHS & AUTO DISCOVERY ======================
+BASE_DIR = r"C:\Users\mmeli\repositories\CloudCTRL\RetailPulse"
+PROCESSED_DIR = os.path.join(BASE_DIR, "data", "processed")
+
+def find_file(patterns):
+    if not os.path.exists(PROCESSED_DIR):
+        return None
+    for p in [p.strip() for p in patterns.split("|")]:
+        matches = [f for f in os.listdir(PROCESSED_DIR) if p.lower() in f.lower()]
+        if matches:
+            return os.path.join(PROCESSED_DIR, matches[0])
+    return None
 
-try:
-	import plotly.express as px
-except Exception:
-	px = None
+files = {
+    "amazon": find_file("Amazon Sale Report"),
+    "international": find_file("International sale Report"),
+    "may2022": find_file("May-2022"),
+    "pl_march": find_file("P L March 2021|PL March"),
+    "expense": find_file("Expense IIGF"),
+    "warehouse": find_file("Cloud Warehouse Compersion|warehouse"),
+    "sale_report": find_file("Sale Report"),
+}
 
-st.title("RetailPulse Dashboard")
+@st.cache_data(ttl=3600)
+def load(path):
+    if not path or not os.path.exists(path):
+        return pd.DataFrame()
+    for enc in ["utf-8", "latin1", "cp1252", "iso-8859-1"]:
+        try:
+            return pd.read_csv(path, encoding=enc, low_memory=False)
+        except:
+            continue
+    st.warning(f"Could not read {os.path.basename(path)}")
+    return pd.DataFrame()
 
-sales_df = pd.read_csv("data/processed/sales_data.csv")
+# Load everything
+amazon = load(files["amazon"])
+intl   = load(files["international"])
+may    = load(files["may2022"])
+pl     = load(files["pl_march"])
+expense = load(files["expense"])
+warehouse = load(files["warehouse"])
 
-# Plot sales by category (use Plotly if available, otherwise fallback to Streamlit)
-if px is not None:
-	fig = px.bar(sales_df, x="category", y="sales", title="Sales by Category")
-	st.plotly_chart(fig)
+# ====================== MERGE SALES DATA ======================
+df = pd.DataFrame()
+for data, source in [(amazon, "Amazon India"), (intl, "International"), (may, "May-2022")]:
+    if not data.empty:
+        temp = data.copy()
+        temp["Source"] = source
+        df = pd.concat([df, temp], ignore_index=True)
+
+if df.empty:
+    st.error("No sales data found in data/processed folder!")
+    st.stop()
+
+# ====================== SMART COLUMN DETECTION ======================
+# Date column — try many possible names
+date_candidates = ["Date", "Order Date", "order_date", "date", "OrderDate", "ship-date", "Date "]
+date_col = next((col for col in date_candidates if col in df.columns), None)
+
+if date_col:
+    df["Date"] = pd.to_datetime(df[date_col], errors="coerce")
 else:
-	st.warning("plotly.express is not installed; falling back to Streamlit charts. Install with: pip install plotly")
-	grouped = sales_df.groupby("category", as_index=False)["sales"].sum()
-	st.bar_chart(grouped.set_index("category")["sales"])
+    st.warning("No date column found → using dummy dates")
+    df["Date"] = pd.date_range("2022-01-01", periods=len(df), freq="D")
+
+# Amount / Sales column
+amount_candidates = ["Amount", "Sales", "sales", "Total", "Price", "Amt", "amount"]
+amount_col = next((col for col in amount_candidates if col in df.columns), None)
+if amount_col:
+    df["Sales"] = pd.to_numeric(df[amount_col], errors="coerce").fillna(0)
+else:
+    df["Sales"] = 0
+
+# Quantity
+qty_candidates = ["Qty", "Quantity", "qty", "QTY", "quantity", "Units"]
+qty_col = next((col for col in qty_candidates if col in df.columns), None)
+if qty_col:
+    df["Qty"] = pd.to_numeric(df[qty_col], errors="coerce").fillna(0)
+else:
+    df["Qty"] = 1
+
+# Category
+cat_candidates = ["Category", "category", "Product Category", "item", "Item"]
+cat_col = next((col for col in cat_candidates if col in df.columns), None)
+if cat_col:
+    df["Category"] = df[cat_col]
+
+# State
+state_candidates = ["ship-state", "Ship-State", "State", "ship_state", "state"]
+state_col = next((col for col in state_candidates if col in df.columns), None)
+if state_col:
+    df["State"] = df[state_col].str.title()
+
+# Status
+status_candidates = ["Status", "status", "order_status", "Order Status"]
+status_col = next((col for col in status_candidates if col in df.columns), None)
+if status_col:
+    df["Status"] = df[status_col]
+
+# Size
+if "Size" not in df.columns:
+    size_candidates = ["Size", "size", "SIZE"]
+    size_col = next((col for col in size_candidates if col in df.columns), None)
+    if size_col:
+        df["Size"] = df[size_col]
+
+# Revenue = Sales × Qty
+df["Revenue"] = df["Sales"] * df["Qty"]
+
+# Final date features
+df["Month"] = df["Date"].dt.strftime("%Y-%m")
+df["MonthName"] = df["Date"].dt.strftime("%b %Y")
+
+# ====================== SIDEBAR FILTERS ======================
+st.sidebar.header("Filters")
+
+# Date range
+if df["Date"].notna().any():
+    date_range = st.sidebar.slider(
+        "Date Range",
+        min_value=df["Date"].min().date(),
+        max_value=df["Date"].max().date(),
+        value=(df["Date"].min().date(), df["Date"].max().date())
+    )
+    df = df[
+        (df["Date"] >= pd.Timestamp(date_range[0])) &
+        (df["Date"] <= pd.Timestamp(date_range[1]))
+    ]
+
+
+# Other filters
+if "Category" in df.columns:
+    cat_filter = st.sidebar.multiselect("Category", options=sorted(df["Category"].dropna().unique()))
+    if cat_filter:
+        df = df[df["Category"].isin(cat_filter)]
+
+if "State" in df.columns:
+    state_filter = st.sidebar.multiselect("State", options=sorted(df["State"].dropna().unique()))
+    if state_filter:
+        df = df[df["State"].isin(state_filter)]
+
+source_filter = st.sidebar.multiselect("Source", options=df["Source"].unique(), default=df["Source"].unique())
+df = df[df["Source"].isin(source_filter)]
+
+# ====================== KPIs ======================
+c1, c2, c3, c4, c5 = st.columns(5)
+with c1: st.metric("Total Revenue", f"₹{df['Revenue'].sum():,.0f}")
+with c2: st.metric("Total Orders", f"{len(df):,}")
+with c3: st.metric("Units Sold", f"{df['Qty'].sum():,.0f}")
+with c4: st.metric("AOV", f"₹{df['Revenue'].sum()/len(df):,.0f}" if len(df) else 0)
+with c5:
+    cancelled = len(df[df["Status"].str.contains("Cancel|Return", case=False, na=False)]) if "Status" in df.columns else 0
+    st.metric("Cancelled", f"{cancelled:,}")
+
+# ====================== CHARTS (safe versions) ======================
+st.markdown("---")
+r1 = st.columns(2)
+r2 = st.columns(2)
+
+# Monthly trend
+with r1[0]:
+    st.subheader("Monthly Revenue")
+    monthly = df.groupby("MonthName")["Revenue"].sum().reset_index()
+    monthly = monthly.sort_values("MonthName")
+    fig = px.line(monthly, x="MonthName", y="Revenue", markers=True, title="Revenue Trend")
+    st.plotly_chart(fig, use_container_width=True)
+
+# Category sales
+with r1[1]:
+    st.subheader("Sales by Category")
+    if "Category" in df.columns:
+        cat = df.groupby("Category")["Revenue"].sum().sort_values(ascending=False).head(10)
+        fig = px.bar(cat, color=cat.index, text_auto=True)
+        st.plotly_chart(fig, use_container_width=True)
+
+# State map
+with r2[0]:
+    st.subheader("Sales by State")
+    if "State" in df.columns:
+        state_sales = df.groupby("State")["Revenue"].sum().reset_index()
+        fig = px.choropleth(state_sales, locations="State", locationmode="country names",
+                            color="Revenue", scope="asia", color_continuous_scale="Reds")
+        fig.update_geos(fitbounds="locations", visible=False)
+        st.plotly_chart(fig, use_container_width=True)
+
+# Top products
+with r2[1]:
+    st.subheader("Top 10 Products")
+    prod_col = "Style" if "Style" in df.columns else "SKU" if "SKU" in df.columns else None
+    if prod_col:
+        top = df.groupby(prod_col)["Revenue"].sum().nlargest(10)
+        fig = px.bar(top, orientation="h")
+        st.plotly_chart(fig, use_container_width=True)
+
+# Download
+st.download_button("Download Filtered Data", df.to_csv(index=False), "retailpulse_filtered.csv", "text/csv")
 
-# Load processed data
-df = pd.read_csv("etl/processed/retailpulse.csv")
+# Raw tabs
+st.markdown("---")
+tabs = st.tabs(["Amazon", "International", "May-2022", "P&L", "Expense", "Warehouse"])
+with tabs[0]: st.dataframe(amazon.head(100), use_container_width=True)
+with tabs[1]: st.dataframe(intl.head(100), use_container_width=True)
+with tabs[2]: st.dataframe(may.head(100), use_container_width=True)
+with tabs[3]: st.dataframe(pl, use_container_width=True)
+with tabs[4]: st.dataframe(expense, use_container_width=True)
+with tabs[5]: st.dataframe(warehouse, use_container_width=True)
 
-# Display preview
-st.dataframe(df)
+st.success("RetailPulse Pro Dashboard is LIVE!")
diff --git a/dashboard/create_missing_files.py b/dashboard/create_missing_files.py
@@ -0,0 +1,27 @@
+import pandas as pd
+import os
+
+processed_dir = r"C:\Users\mmeli\repositories\CloudCTRL\RetailPulse\data\processed"
+
+# 1. Load the big Amazon file (this has category info)
+amazon = pd.read_csv(os.path.join(processed_dir, "Amazon Sale Report.csv"))
+intl   = pd.read_csv(os.path.join(processed_dir, "International sale Report.csv"), encoding="latin1")
+
+# 2. Create sales_data.csv (Sales by Category)
+if 'Category' in amazon.columns:
+    sales_by_cat = amazon.groupby("Category")["Amount"].sum().reset_index()
+    sales_by_cat.columns = ["category", "sales"]
+elif 'category' in amazon.columns:
+    sales_by_cat = amazon.groupby("category")["Amount"].sum().reset_index()
+    sales_by_cat.columns = ["category", "sales"]
+else:
+    # fallback dummy
+    sales_by_cat = pd.DataFrame({"category": ["Set", "Kurta", "Blouse"], "sales": [300000, 200000, 150000]})
+
+sales_by_cat.to_csv(os.path.join(processed_dir, "sales_data.csv"), index=False)
+print("Created sales_data.csv")
+
+# 3. Create the main merged file (retailpulse.csv)
+merged = pd.concat([amazon.assign(source="Amazon"), intl.assign(source="International")], ignore_index=True)
+merged.to_csv(os.path.join(processed_dir, "retailpulse.csv"), index=False)
+print("Created retailpulse.csv – total rows:", len(merged))