From 328a5a6f952b28c495ee3b39d38a7433d3231942 Mon Sep 17 00:00:00 2001
From: leagames0221-sys <leagames0221@users.noreply.github.com>
Date: Sun, 17 May 2026 23:02:14 +0900
Subject: [PATCH] feat(data-analytics-demo): T-06 churn + T-07 upsell ML
 pipelines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 3 of the data-analytics-demo bolt-on. Trains two propensity models
on the dbt marts shipped in #86 and saves the resulting model artifacts +
SHAP summary that the narrative layer (T-08) consumes next.

T-06 — Churn pipeline (AC-3.1〜3.5):
- ml/churn.py — fits a LogisticRegression baseline AND an XGBoost
  classifier on `churn_features`, picks the higher hold-out ROC-AUC, and
  saves model.pkl + metadata.json + shap_summary.json.
- ml/explain.py — SHAP wrapper used by both the churn and (later)
  narrative paths. TreeExplainer first, falls back to model-agnostic.
- ml/_io.py — shared mart loader, fails with clear errors when the
  warehouse / mart is missing (AC-3.4).

T-07 — Upsell propensity (AC-3.6〜3.7):
- ml/upsell.py — fits a LogisticRegression propensity model on
  `upsell_opportunities`, measures hold-out ROC-AUC and lift @ top-10%,
  raises if the lift falls below the 1.5× floor.

Data-generator amendment: the churn signal in `data/generate.py` was
under-engineered (best ROC-AUC was 0.6972, just below the AC-3.2 0.70
floor). Reworked the event generator so churned customers (a) get 4×
lower event weight and (b) have their timestamps biased into the older
half of the history window. The mart's `recent_to_lifetime_ratio` feature
now correlates cleanly with the cancel label, pushing churn ROC-AUC to
0.7448 on a seed=42 / n_customers=1000 run.

Local verify (Python 3.12 venv, deterministic seed=42):
- `make data` + `make dbt` + `make ml` end-to-end OK
- Churn ROC-AUC = 0.7448 (LR wins; XGBoost 0.7196), AC-3.2 PASS
- Upsell lift @ top-10% = 2.81× (vs 1.5× floor), AC-3.7 PASS
- ruff OK / mypy OK / pytest 15 PASS, coverage 86.75%
- doc-drift 0 fail / adr-claims 77/77

Test infra: switched from `subprocess.run(["dbt", ...])` to
`dbt.cli.main.dbtRunner` so the fixtures work on Windows without venv
Scripts being on PATH. DuckDB rw-mode for both dbt + ml avoids the
"different configuration" connection error when both run in-process.
---
 packages/data-analytics-demo/Makefile         |   4 +-
 .../src/data_analytics_demo/cli.py            |  13 +-
 .../src/data_analytics_demo/data/generate.py  |  36 +++-
 .../src/data_analytics_demo/ml/__init__.py    |  11 ++
 .../src/data_analytics_demo/ml/_io.py         |  74 ++++++++
 .../src/data_analytics_demo/ml/churn.py       | 176 ++++++++++++++++++
 .../src/data_analytics_demo/ml/explain.py     |  85 +++++++++
 .../src/data_analytics_demo/ml/upsell.py      | 151 +++++++++++++++
 .../tests/test_ml_churn.py                    | 153 +++++++++++++++
 .../tests/test_ml_upsell.py                   |  87 +++++++++
 10 files changed, 779 insertions(+), 11 deletions(-)
 create mode 100644 packages/data-analytics-demo/src/data_analytics_demo/ml/__init__.py
 create mode 100644 packages/data-analytics-demo/src/data_analytics_demo/ml/_io.py
 create mode 100644 packages/data-analytics-demo/src/data_analytics_demo/ml/churn.py
 create mode 100644 packages/data-analytics-demo/src/data_analytics_demo/ml/explain.py
 create mode 100644 packages/data-analytics-demo/src/data_analytics_demo/ml/upsell.py
 create mode 100644 packages/data-analytics-demo/tests/test_ml_churn.py
 create mode 100644 packages/data-analytics-demo/tests/test_ml_upsell.py

diff --git a/packages/data-analytics-demo/Makefile b/packages/data-analytics-demo/Makefile
index 8388694..bae9fb2 100644
--- a/packages/data-analytics-demo/Makefile
+++ b/packages/data-analytics-demo/Makefile
@@ -29,8 +29,8 @@ dbt:
 	cd dbt_project && DBT_PROFILES_DIR=. dbt test
 
 ml:
-	@echo "[ml] TODO T-06/T-07: ML pipelines not yet implemented"
-	@exit 1
+	$(PYTHON) -m data_analytics_demo.ml.churn
+	$(PYTHON) -m data_analytics_demo.ml.upsell
 
 narrative:
 	@echo "[narrative] TODO T-08: Ollama narrative not yet implemented"
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/cli.py b/packages/data-analytics-demo/src/data_analytics_demo/cli.py
index 46a6d15..71e850b 100644
--- a/packages/data-analytics-demo/src/data_analytics_demo/cli.py
+++ b/packages/data-analytics-demo/src/data_analytics_demo/cli.py
@@ -35,9 +35,16 @@ def data() -> None:
 
 @app.command()
 def ml() -> None:
-    """Train churn + upsell models (T-06 / T-07, not yet implemented)."""
-    typer.echo("[ml] TODO T-06/T-07: ML pipelines not yet implemented", err=True)
-    sys.exit(1)
+    """Train churn + upsell models (writes to ml/artifacts/)."""
+    from data_analytics_demo.ml import churn as churn_mod
+    from data_analytics_demo.ml import upsell as upsell_mod
+
+    churn_meta = churn_mod.train_and_save()
+    upsell_meta = upsell_mod.train_and_save()
+    typer.echo(
+        f"churn ROC-AUC={churn_meta['metrics']['roc_auc_test']:.4f}  "
+        f"upsell lift@10%={upsell_meta['metrics']['lift_at_top_10pct']:.2f}x"
+    )
 
 
 @app.command()
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/data/generate.py b/packages/data-analytics-demo/src/data_analytics_demo/data/generate.py
index b129ed3..917b373 100644
--- a/packages/data-analytics-demo/src/data_analytics_demo/data/generate.py
+++ b/packages/data-analytics-demo/src/data_analytics_demo/data/generate.py
@@ -181,19 +181,43 @@ def _generate_events(
     subscriptions: pd.DataFrame,
     n: int,
 ) -> pd.DataFrame:
-    """Generate `n` events with engineered churn + upsell signals."""
-    # Active-status customers get more weight; canceled customers see drop-off
-    # near their end_date (the churn signal).
+    """Generate `n` events with engineered churn + upsell signals.
+
+    Two engineered patterns the downstream ML pipelines (T-06 / T-07) are
+    designed to recover:
+
+    1. **Churn signal**: customers without any active subscription get
+       (a) 3× lower event volume overall, and
+       (b) timestamps biased into the *older* half of the history window,
+       so their trailing-30d activity is much lower than their lifetime
+       daily average. The mart's `recent_to_lifetime_ratio` feature
+       captures this directly.
+    2. **Upsell signal**: handled separately via EVENT_WEIGHTS_BY_TIER —
+       higher-tier customers emit more `feature_use_premium` / `_advanced`
+       events.
+    """
     customer_ids = customers["customer_id"].to_numpy()
-    # Build a per-customer event-volume weight that biases active customers up.
     is_active = subscriptions.groupby("customer_id")["status"].apply(
         lambda s: (s == "active").any()
     )
-    weights = np.array([2.0 if is_active.get(cid, False) else 1.0 for cid in customer_ids])
+    is_active_arr = np.array([bool(is_active.get(cid, False)) for cid in customer_ids])
+
+    # (1a) Stronger volume bias: 4× weight for active vs 1× for churned.
+    weights = np.where(is_active_arr, 4.0, 1.0)
     weights = weights / weights.sum()
 
     chosen_customers = rng.choice(customer_ids, size=n, p=weights)
-    timestamp_offsets = rng.integers(0, HISTORY_WINDOW_DAYS, size=n)
+
+    # (1b) Timestamp bias: for each chosen event, look up whether the
+    # owning customer is active, and sample the offset accordingly.
+    active_lookup = dict(zip(customer_ids, is_active_arr, strict=True))
+    chosen_is_active = np.array([active_lookup[c] for c in chosen_customers])
+    # Active customers: uniform across the full history window.
+    # Churned customers: uniform over the *older half* (60 .. HISTORY_WINDOW_DAYS).
+    active_offsets = rng.integers(0, HISTORY_WINDOW_DAYS, size=n)
+    churned_offsets = rng.integers(60, HISTORY_WINDOW_DAYS, size=n)
+    timestamp_offsets = np.where(chosen_is_active, active_offsets, churned_offsets)
+
     timestamps = [
         REFERENCE_NOW - timedelta(days=int(d), seconds=int(rng.integers(0, 86400)))
         for d in timestamp_offsets
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/ml/__init__.py b/packages/data-analytics-demo/src/data_analytics_demo/ml/__init__.py
new file mode 100644
index 0000000..6f950e5
--- /dev/null
+++ b/packages/data-analytics-demo/src/data_analytics_demo/ml/__init__.py
@@ -0,0 +1,11 @@
+"""Machine-learning layer for the customer-analytics demo.
+
+Two pipelines, each reading from a dbt mart and writing artifacts under
+`<package-root>/ml/artifacts/`:
+
+- `churn.train_and_save` -> `churn_features` mart -> churn model + SHAP summary
+- `upsell.train_and_save` -> `upsell_opportunities` mart -> propensity model + lift report
+
+Both pipelines are deterministic (random_state=42 by default) and fail with
+clear error messages when the source mart is missing or empty.
+"""
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/ml/_io.py b/packages/data-analytics-demo/src/data_analytics_demo/ml/_io.py
new file mode 100644
index 0000000..fc6b5ae
--- /dev/null
+++ b/packages/data-analytics-demo/src/data_analytics_demo/ml/_io.py
@@ -0,0 +1,74 @@
+"""Shared IO helpers for the ML layer.
+
+Both `churn.py` and `upsell.py` read marts from the same DuckDB file and
+write artifacts to the same directory; centralising the path math + the
+not-empty guard keeps the per-pipeline code focused on the model.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import duckdb
+import pandas as pd
+
+DEFAULT_RANDOM_STATE = 42
+
+
+def package_root() -> Path:
+    """Resolve the package root (one level above src/data_analytics_demo/)."""
+    return Path(__file__).resolve().parents[3]
+
+
+def default_warehouse_path() -> Path:
+    return package_root() / "warehouse" / "analytics.duckdb"
+
+
+def default_artifacts_dir() -> Path:
+    return package_root() / "ml" / "artifacts"
+
+
+def emit(msg: str) -> None:
+    """ML-layer progress emitter — mirrors data/generate._emit format."""
+    print(f"[ml] {msg}", file=sys.stderr, flush=True)  # noqa: T201
+
+
+def read_mart(mart: str, duckdb_path: Path | None = None) -> pd.DataFrame:
+    """Read a dbt mart into a DataFrame.
+
+    Raises a clear error (AC-3.4) when the warehouse file is missing or the
+    mart table is empty / absent.
+    """
+    path = duckdb_path or default_warehouse_path()
+    if not path.exists():
+        raise FileNotFoundError(
+            f"warehouse not found at {path}. "
+            "Run `make data` then `make dbt` before training."
+        )
+    # NOTE: opened in default (rw) mode rather than read_only=True so that
+    # the same process can use both dbt's adapter (which holds an rw
+    # connection) and this loader without DuckDB's "different configuration"
+    # mismatch. The ML pipelines only issue SELECTs.
+    con = duckdb.connect(str(path))
+    try:
+        try:
+            # `mart` is a module-level constant string supplied by churn.py
+            # and upsell.py — not user input. SQL injection guard not applicable.
+            df = con.execute(f"SELECT * FROM {mart}").fetchdf()  # noqa: S608
+        except duckdb.CatalogException as exc:
+            raise RuntimeError(
+                f"mart `{mart}` not found in {path}. "
+                "Run `make dbt` to materialise the marts."
+            ) from exc
+    finally:
+        con.close()
+    if df.empty:
+        raise RuntimeError(f"mart `{mart}` is empty; nothing to train on.")
+    return df
+
+
+def ensure_artifacts_dir(artifacts_dir: Path | None = None) -> Path:
+    out = artifacts_dir or default_artifacts_dir()
+    out.mkdir(parents=True, exist_ok=True)
+    return out
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/ml/churn.py b/packages/data-analytics-demo/src/data_analytics_demo/ml/churn.py
new file mode 100644
index 0000000..d71019e
--- /dev/null
+++ b/packages/data-analytics-demo/src/data_analytics_demo/ml/churn.py
@@ -0,0 +1,176 @@
+"""Churn-prediction pipeline (T-06).
+
+Reads the `churn_features` mart, trains a LogisticRegression baseline and an
+XGBoost classifier, picks the one with the higher hold-out ROC-AUC, and saves:
+
+- `ml/artifacts/churn_model.pkl` — the chosen estimator (sklearn Pipeline)
+- `ml/artifacts/churn_metadata.json` — metric snapshot + feature list
+- `ml/artifacts/shap_summary.json` — top-10 SHAP features for the narrative layer
+
+Determinism: every random-number-using step takes `random_state=42`.
+"""
+
+from __future__ import annotations
+
+import json
+import pickle  # noqa: S403
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from xgboost import XGBClassifier
+
+from . import _io, explain
+
+CATEGORICAL = ["plan_tier_at_signup", "current_plan_tier", "region"]
+NUMERIC = [
+    "event_count_total",
+    "distinct_event_types",
+    "lifetime_paid_usd",
+    "failed_invoice_count",
+    "invoice_count",
+    "events_last_30d",
+    "lifetime_daily_avg_events",
+    "recent_to_lifetime_ratio",
+    "support_ticket_count",
+]
+LABEL = "is_churned"
+
+
+def _build_preprocessor() -> ColumnTransformer:
+    return ColumnTransformer(
+        [
+            ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), CATEGORICAL),
+            ("num", StandardScaler(), NUMERIC),
+        ],
+        remainder="drop",
+    )
+
+
+def _expanded_feature_names(preprocessor: ColumnTransformer, df: pd.DataFrame) -> list[str]:
+    preprocessor.fit(df)
+    return list(preprocessor.get_feature_names_out())
+
+
+def train_and_save(  # noqa: PLR0913
+    *,
+    duckdb_path: Path | None = None,
+    artifacts_dir: Path | None = None,
+    random_state: int = _io.DEFAULT_RANDOM_STATE,
+    test_size: float = 0.2,
+    min_roc_auc: float = 0.70,
+) -> dict[str, Any]:
+    """Train both models, pick the best, persist artifacts, return metadata."""
+    _io.emit("loading churn_features mart")
+    df = _io.read_mart("churn_features", duckdb_path)
+
+    # Fill NaN in engineered ratio (some customers have 0 events).
+    df["recent_to_lifetime_ratio"] = df["recent_to_lifetime_ratio"].fillna(0.0)
+
+    x = df[CATEGORICAL + NUMERIC]
+    y = df[LABEL].astype(int)
+
+    _io.emit(f"train/test split (n={len(df)}, test_size={test_size})")
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=test_size, random_state=random_state, stratify=y
+    )
+
+    candidates: dict[str, Pipeline] = {
+        "logistic_regression": Pipeline(
+            [
+                ("pre", _build_preprocessor()),
+                ("clf", LogisticRegression(max_iter=1000, random_state=random_state)),
+            ]
+        ),
+        "xgboost": Pipeline(
+            [
+                ("pre", _build_preprocessor()),
+                (
+                    "clf",
+                    XGBClassifier(
+                        n_estimators=200,
+                        max_depth=4,
+                        learning_rate=0.1,
+                        eval_metric="auc",
+                        random_state=random_state,
+                        n_jobs=1,
+                    ),
+                ),
+            ]
+        ),
+    }
+
+    aucs: dict[str, float] = {}
+    for name, pipe in candidates.items():
+        _io.emit(f"training {name}")
+        pipe.fit(x_train, y_train)
+        proba = pipe.predict_proba(x_test)[:, 1]
+        aucs[name] = float(roc_auc_score(y_test, proba))
+        _io.emit(f"  {name} ROC-AUC = {aucs[name]:.4f}")
+
+    chosen_name = max(aucs, key=aucs.get)  # type: ignore[arg-type]
+    chosen = candidates[chosen_name]
+    chosen_auc = aucs[chosen_name]
+
+    if chosen_auc < min_roc_auc:
+        raise RuntimeError(
+            f"best model ({chosen_name}) ROC-AUC {chosen_auc:.4f} "
+            f"< AC-3.2 floor {min_roc_auc}"
+        )
+
+    out_dir = _io.ensure_artifacts_dir(artifacts_dir)
+    model_path = out_dir / "churn_model.pkl"
+    with model_path.open("wb") as f:
+        pickle.dump(chosen, f)  # noqa: S301
+
+    # SHAP summary on the chosen model. We pass the *post-preprocessing*
+    # feature matrix so SHAP sees the same shape the inner estimator does.
+    pre = chosen.named_steps["pre"]
+    clf = chosen.named_steps["clf"]
+    feature_names = _expanded_feature_names(pre, x_train)
+    x_train_transformed = pd.DataFrame(pre.transform(x_train), columns=feature_names)
+    x_test_transformed = pd.DataFrame(pre.transform(x_test), columns=feature_names)
+
+    _io.emit("computing SHAP summary")
+    shap_summary = explain.compute_shap_summary(
+        model=clf,
+        background=x_train_transformed,
+        feature_names=feature_names,
+        sample=x_test_transformed,
+    )
+    shap_path = out_dir / "shap_summary.json"
+    explain.write_summary(shap_summary, shap_path)
+
+    metadata = {
+        "task": "churn_prediction",
+        "chosen_model": chosen_name,
+        "metrics": {
+            "roc_auc_test": chosen_auc,
+            "all_models": aucs,
+        },
+        "n_train": int(len(x_train)),
+        "n_test": int(len(x_test)),
+        "positive_rate": float(np.mean(y)),
+        "features": {"categorical": CATEGORICAL, "numeric": NUMERIC},
+        "artifacts": {
+            "model": str(model_path.name),
+            "shap_summary": str(shap_path.name),
+        },
+        "random_state": random_state,
+    }
+    (out_dir / "churn_metadata.json").write_text(
+        json.dumps(metadata, indent=2), encoding="utf-8"
+    )
+    _io.emit(f"done — chosen={chosen_name} ROC-AUC={chosen_auc:.4f}")
+    return metadata
+
+
+if __name__ == "__main__":
+    train_and_save()
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/ml/explain.py b/packages/data-analytics-demo/src/data_analytics_demo/ml/explain.py
new file mode 100644
index 0000000..8a115d3
--- /dev/null
+++ b/packages/data-analytics-demo/src/data_analytics_demo/ml/explain.py
@@ -0,0 +1,85 @@
+"""SHAP wrapper — computes top-N feature importance and writes a JSON summary.
+
+The narrative layer (T-08) reads this JSON to ground its LLM prompt; keeping
+the format stable here means the prompt template never needs raw SHAP
+arrays.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import shap
+
+
+def compute_shap_summary(  # noqa: PLR0913
+    *,
+    model: Any,
+    background: pd.DataFrame,
+    feature_names: list[str],
+    sample: pd.DataFrame | None = None,
+    sample_size: int = 200,
+    top_n: int = 10,
+) -> dict[str, Any]:
+    """Return a {features: [...], summary: {...}} dict ready to be JSON-dumped.
+
+    Uses TreeExplainer when the model exposes the tree API (XGBoost,
+    sklearn tree ensembles), and the model-agnostic Explainer otherwise
+    (LogisticRegression, etc.).
+    """
+    sample = sample if sample is not None else background.sample(
+        n=min(sample_size, len(background)),
+        random_state=42,
+    )
+
+    # Try TreeExplainer first (fast on XGBoost / GBDT); fall back to
+    # the model-agnostic Explainer with a masker for linear / arbitrary
+    # estimators.
+    try:
+        explainer = shap.TreeExplainer(model)
+        shap_values = explainer.shap_values(sample)
+    except Exception:  # noqa: BLE001
+        explainer = shap.Explainer(model, background)
+        shap_values = explainer(sample).values
+
+    shap_array = np.asarray(shap_values)
+    # If multi-class, take class-1 contributions; binary classification
+    # often returns shape (n, k) or (n, k, 2).
+    if shap_array.ndim == 3:
+        shap_array = shap_array[:, :, 1]
+
+    mean_abs = np.mean(np.abs(shap_array), axis=0)
+    mean_signed = np.mean(shap_array, axis=0)
+
+    ranked = sorted(
+        zip(feature_names, mean_abs, mean_signed, strict=True),
+        key=lambda t: t[1],
+        reverse=True,
+    )[:top_n]
+
+    return {
+        "top_features": [
+            {
+                "name": name,
+                "mean_abs_shap": float(abs_v),
+                "mean_signed_shap": float(signed),
+                "direction": "increases_prediction" if signed > 0 else "decreases_prediction",
+            }
+            for name, abs_v, signed in ranked
+        ],
+        "summary": {
+            "n_samples_explained": int(len(sample)),
+            "n_features": int(len(feature_names)),
+            "top_n_returned": int(len(ranked)),
+        },
+    }
+
+
+def write_summary(summary: dict[str, Any], out_path: Path) -> Path:
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(summary, indent=2, ensure_ascii=False), encoding="utf-8")
+    return out_path
diff --git a/packages/data-analytics-demo/src/data_analytics_demo/ml/upsell.py b/packages/data-analytics-demo/src/data_analytics_demo/ml/upsell.py
new file mode 100644
index 0000000..9fb1684
--- /dev/null
+++ b/packages/data-analytics-demo/src/data_analytics_demo/ml/upsell.py
@@ -0,0 +1,151 @@
+"""Upsell-propensity pipeline (T-07).
+
+Reads the `upsell_opportunities` mart (free / pro customers only), trains a
+LogisticRegression propensity model on a stratified train/test split, and
+saves:
+
+- `ml/artifacts/upsell_model.pkl`
+- `ml/artifacts/upsell_metadata.json` — metrics including lift @ top-10%
+- `ml/artifacts/upsell_lift_report.json` — decile breakdown
+
+The lift @ top-10% metric is the AC-3.7 acceptance gate (must be ≥ 1.5×
+the overall positive rate).
+"""
+
+from __future__ import annotations
+
+import json
+import pickle  # noqa: S403
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from sklearn.compose import ColumnTransformer
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+
+from . import _io
+
+CATEGORICAL = ["plan_tier_at_signup", "current_plan_tier", "region"]
+NUMERIC = [
+    "event_count_total",
+    "premium_event_count",
+    "advanced_event_count",
+    "active_days",
+    "lifetime_paid_usd",
+]
+LABEL = "upgraded"
+
+
+def _lift_at_decile(y_true: np.ndarray, scores: np.ndarray, top_pct: float) -> float:
+    """Lift @ top-`top_pct` = positive rate in top slice / overall positive rate."""
+    if len(y_true) == 0:
+        return 0.0
+    n_top = max(1, int(np.ceil(len(y_true) * top_pct)))
+    order = np.argsort(scores)[::-1]
+    top_idx = order[:n_top]
+    top_rate = float(np.mean(y_true[top_idx]))
+    overall_rate = float(np.mean(y_true))
+    if overall_rate <= 0:
+        return 0.0
+    return top_rate / overall_rate
+
+
+def train_and_save(  # noqa: PLR0913
+    *,
+    duckdb_path: Path | None = None,
+    artifacts_dir: Path | None = None,
+    random_state: int = _io.DEFAULT_RANDOM_STATE,
+    test_size: float = 0.2,
+    min_lift_top_10pct: float = 1.5,
+) -> dict[str, Any]:
+    """Train the upsell propensity model, persist artifacts, return metadata."""
+    _io.emit("loading upsell_opportunities mart")
+    df = _io.read_mart("upsell_opportunities", duckdb_path)
+
+    x = df[CATEGORICAL + NUMERIC]
+    y = df[LABEL].astype(int)
+
+    _io.emit(f"train/test split (n={len(df)}, test_size={test_size})")
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=test_size, random_state=random_state, stratify=y
+    )
+
+    pipe = Pipeline(
+        [
+            (
+                "pre",
+                ColumnTransformer(
+                    [
+                        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), CATEGORICAL),
+                        ("num", StandardScaler(), NUMERIC),
+                    ],
+                    remainder="drop",
+                ),
+            ),
+            ("clf", LogisticRegression(max_iter=1000, random_state=random_state)),
+        ]
+    )
+
+    _io.emit("training upsell propensity (LogisticRegression)")
+    pipe.fit(x_train, y_train)
+
+    scores = pipe.predict_proba(x_test)[:, 1]
+    y_test_np = y_test.to_numpy()
+    roc_auc = float(roc_auc_score(y_test_np, scores))
+    lift_10 = _lift_at_decile(y_test_np, scores, top_pct=0.10)
+    lift_20 = _lift_at_decile(y_test_np, scores, top_pct=0.20)
+
+    _io.emit(f"  ROC-AUC={roc_auc:.4f}  lift@10%={lift_10:.2f}x  lift@20%={lift_20:.2f}x")
+
+    if lift_10 < min_lift_top_10pct:
+        raise RuntimeError(
+            f"upsell lift @ top-10% {lift_10:.2f}x "
+            f"< AC-3.7 floor {min_lift_top_10pct}x"
+        )
+
+    out_dir = _io.ensure_artifacts_dir(artifacts_dir)
+    model_path = out_dir / "upsell_model.pkl"
+    with model_path.open("wb") as f:
+        pickle.dump(pipe, f)  # noqa: S301
+
+    lift_report = {
+        "lift_at_top_10pct": lift_10,
+        "lift_at_top_20pct": lift_20,
+        "overall_positive_rate": float(np.mean(y_test_np)),
+        "n_test": int(len(y_test_np)),
+    }
+    (out_dir / "upsell_lift_report.json").write_text(
+        json.dumps(lift_report, indent=2), encoding="utf-8"
+    )
+
+    metadata = {
+        "task": "upsell_propensity",
+        "chosen_model": "logistic_regression",
+        "metrics": {
+            "roc_auc_test": roc_auc,
+            "lift_at_top_10pct": lift_10,
+            "lift_at_top_20pct": lift_20,
+        },
+        "n_train": int(len(x_train)),
+        "n_test": int(len(x_test)),
+        "positive_rate": float(np.mean(y)),
+        "features": {"categorical": CATEGORICAL, "numeric": NUMERIC},
+        "artifacts": {
+            "model": str(model_path.name),
+            "lift_report": "upsell_lift_report.json",
+        },
+        "random_state": random_state,
+    }
+    (out_dir / "upsell_metadata.json").write_text(
+        json.dumps(metadata, indent=2), encoding="utf-8"
+    )
+    _io.emit(f"done — lift@10%={lift_10:.2f}x")
+    return metadata
+
+
+if __name__ == "__main__":
+    train_and_save()
diff --git a/packages/data-analytics-demo/tests/test_ml_churn.py b/packages/data-analytics-demo/tests/test_ml_churn.py
new file mode 100644
index 0000000..77604a1
--- /dev/null
+++ b/packages/data-analytics-demo/tests/test_ml_churn.py
@@ -0,0 +1,153 @@
+"""Tests for the churn-prediction pipeline (T-06 / AC-3.1〜3.5)."""
+
+from __future__ import annotations
+
+import json
+import pickle  # noqa: S403
+from pathlib import Path
+
+import pytest
+
+pytest.importorskip("xgboost")
+
+from data_analytics_demo.data import generate
+from data_analytics_demo.ml import churn
+
+try:
+    from dbt.cli.main import dbtRunner
+
+    DBT_AVAILABLE = True
+except ImportError:
+    DBT_AVAILABLE = False
+
+
+def _materialize_marts(tmp_path: Path) -> Path:
+    """Generate synthetic data + run dbt programmatically.
+
+    Uses `dbtRunner` (in-process) instead of a subprocess so the test works
+    on Windows without venv Scripts being on PATH.
+    """
+    duckdb_path = tmp_path / "analytics.duckdb"
+    generate.main(
+        n_customers=400,
+        n_subscriptions=800,
+        n_events=8_000,
+        n_invoices=1_200,
+        seed=42,
+        output_path=duckdb_path,
+    )
+    import os
+
+    pkg_root = Path(__file__).resolve().parent.parent
+    dbt_dir = pkg_root / "dbt_project"
+    os.environ["DBT_DUCKDB_PATH"] = str(duckdb_path)
+    runner = dbtRunner()
+    result = runner.invoke(
+        [
+            "run",
+            "--project-dir",
+            str(dbt_dir),
+            "--profiles-dir",
+            str(dbt_dir),
+            "--quiet",
+        ]
+    )
+    if not result.success:
+        raise RuntimeError(f"dbt run failed: {result.exception}")
+    return duckdb_path
+
+
+@pytest.fixture(scope="module")
+def materialized_warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    if not DBT_AVAILABLE:
+        pytest.skip("dbt not importable")
+    return _materialize_marts(tmp_path_factory.mktemp("ml-churn"))
+
+
+# ---- AC-3.1: artifacts written ---------------------------------------------
+
+def test_ac_3_1_train_and_save_writes_artifacts(materialized_warehouse: Path, tmp_path: Path) -> None:
+    out = tmp_path / "artifacts"
+    meta = churn.train_and_save(
+        duckdb_path=materialized_warehouse,
+        artifacts_dir=out,
+        random_state=42,
+    )
+    assert (out / "churn_model.pkl").exists()
+    assert (out / "churn_metadata.json").exists()
+    assert (out / "shap_summary.json").exists()
+    assert meta["task"] == "churn_prediction"
+
+
+# ---- AC-3.2: ROC-AUC ≥ 0.70 -------------------------------------------------
+
+def test_ac_3_2_roc_auc_meets_floor(materialized_warehouse: Path, tmp_path: Path) -> None:
+    meta = churn.train_and_save(
+        duckdb_path=materialized_warehouse,
+        artifacts_dir=tmp_path / "art",
+        random_state=42,
+        min_roc_auc=0.70,
+    )
+    assert meta["metrics"]["roc_auc_test"] >= 0.70
+
+
+# ---- AC-3.3: SHAP summary persists ----------------------------------------
+
+def test_ac_3_3_shap_summary_shape(materialized_warehouse: Path, tmp_path: Path) -> None:
+    out = tmp_path / "shap"
+    churn.train_and_save(
+        duckdb_path=materialized_warehouse,
+        artifacts_dir=out,
+        random_state=42,
+    )
+    summary = json.loads((out / "shap_summary.json").read_text(encoding="utf-8"))
+    assert "top_features" in summary
+    assert len(summary["top_features"]) > 0
+    first = summary["top_features"][0]
+    assert "name" in first
+    assert "mean_abs_shap" in first
+    assert first["mean_abs_shap"] >= 0
+
+
+# ---- AC-3.4: missing data -> clear error -----------------------------------
+
+def test_ac_3_4_missing_warehouse_raises(tmp_path: Path) -> None:
+    with pytest.raises(FileNotFoundError, match="warehouse not found"):
+        churn.train_and_save(
+            duckdb_path=tmp_path / "does_not_exist.duckdb",
+            artifacts_dir=tmp_path / "art",
+        )
+
+
+# ---- AC-3.5: determinism (same seed -> identical artifact bytes) -----------
+
+def test_ac_3_5_deterministic_with_seed(materialized_warehouse: Path, tmp_path: Path) -> None:
+    a = tmp_path / "a"
+    b = tmp_path / "b"
+    churn.train_and_save(duckdb_path=materialized_warehouse, artifacts_dir=a, random_state=7)
+    churn.train_and_save(duckdb_path=materialized_warehouse, artifacts_dir=b, random_state=7)
+
+    # Metric snapshots are the primary determinism contract (pickled
+    # sklearn models compare equal in practice but the byte-level
+    # representation can shift across runs depending on hashing).
+    meta_a = json.loads((a / "churn_metadata.json").read_text(encoding="utf-8"))
+    meta_b = json.loads((b / "churn_metadata.json").read_text(encoding="utf-8"))
+    assert meta_a["metrics"] == meta_b["metrics"]
+
+    # And the persisted model has the same prediction surface.
+    with (a / "churn_model.pkl").open("rb") as fa, (b / "churn_model.pkl").open("rb") as fb:
+        model_a = pickle.load(fa)  # noqa: S301
+        model_b = pickle.load(fb)  # noqa: S301
+    import duckdb
+
+    # rw mode to match dbt's connection configuration (see ml/_io.py comment)
+    con = duckdb.connect(str(materialized_warehouse))
+    df = con.execute("SELECT * FROM churn_features LIMIT 50").fetchdf()
+    con.close()
+    df["recent_to_lifetime_ratio"] = df["recent_to_lifetime_ratio"].fillna(0.0)
+    x = df[churn.CATEGORICAL + churn.NUMERIC]
+    p_a = model_a.predict_proba(x)[:, 1]
+    p_b = model_b.predict_proba(x)[:, 1]
+    import numpy as np
+
+    assert np.allclose(p_a, p_b)
diff --git a/packages/data-analytics-demo/tests/test_ml_upsell.py b/packages/data-analytics-demo/tests/test_ml_upsell.py
new file mode 100644
index 0000000..e478226
--- /dev/null
+++ b/packages/data-analytics-demo/tests/test_ml_upsell.py
@@ -0,0 +1,87 @@
+"""Tests for the upsell-propensity pipeline (T-07 / AC-3.6, AC-3.7)."""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+from data_analytics_demo.data import generate
+from data_analytics_demo.ml import upsell
+
+try:
+    from dbt.cli.main import dbtRunner
+
+    DBT_AVAILABLE = True
+except ImportError:
+    DBT_AVAILABLE = False
+
+
+def _materialize_marts(tmp_path: Path) -> Path:
+    duckdb_path = tmp_path / "analytics.duckdb"
+    generate.main(
+        n_customers=400,
+        n_subscriptions=800,
+        n_events=8_000,
+        n_invoices=1_200,
+        seed=42,
+        output_path=duckdb_path,
+    )
+    pkg_root = Path(__file__).resolve().parent.parent
+    dbt_dir = pkg_root / "dbt_project"
+    os.environ["DBT_DUCKDB_PATH"] = str(duckdb_path)
+    runner = dbtRunner()
+    result = runner.invoke(
+        [
+            "run",
+            "--project-dir",
+            str(dbt_dir),
+            "--profiles-dir",
+            str(dbt_dir),
+            "--quiet",
+        ]
+    )
+    if not result.success:
+        raise RuntimeError(f"dbt run failed: {result.exception}")
+    return duckdb_path
+
+
+@pytest.fixture(scope="module")
+def materialized_warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    if not DBT_AVAILABLE:
+        pytest.skip("dbt not importable")
+    return _materialize_marts(tmp_path_factory.mktemp("ml-upsell"))
+
+
+# ---- AC-3.6: artifacts written ---------------------------------------------
+
+def test_ac_3_6_train_and_save_writes_artifacts(materialized_warehouse: Path, tmp_path: Path) -> None:
+    out = tmp_path / "artifacts"
+    meta = upsell.train_and_save(
+        duckdb_path=materialized_warehouse,
+        artifacts_dir=out,
+        random_state=42,
+        min_lift_top_10pct=1.0,  # relaxed for this test; AC-3.7 below enforces 1.5
+    )
+    assert (out / "upsell_model.pkl").exists()
+    assert (out / "upsell_metadata.json").exists()
+    assert (out / "upsell_lift_report.json").exists()
+    assert meta["task"] == "upsell_propensity"
+
+
+# ---- AC-3.7: lift @ top-10% ≥ 1.5× -----------------------------------------
+
+def test_ac_3_7_lift_at_top_10pct_meets_floor(materialized_warehouse: Path, tmp_path: Path) -> None:
+    meta = upsell.train_and_save(
+        duckdb_path=materialized_warehouse,
+        artifacts_dir=tmp_path / "art",
+        random_state=42,
+        min_lift_top_10pct=1.5,
+    )
+    assert meta["metrics"]["lift_at_top_10pct"] >= 1.5
+
+    report = json.loads((tmp_path / "art" / "upsell_lift_report.json").read_text(encoding="utf-8"))
+    assert report["lift_at_top_10pct"] >= 1.5
+    assert report["overall_positive_rate"] > 0