diff --git a/agentic_eda/jupyterlab_extension_backend/.gitignore b/agentic_eda/jupyterlab_extension_backend/.gitignore
new file mode 100644
index 000000000..b41013075
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/.gitignore
@@ -0,0 +1,20 @@
+# OS files
+.DS_Store
+
+# Python cache/build artifacts
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+
+# Secrets and local environment files
+.env
+*.env
+config/.env
+*.secret
+*secret*
+*.key
+*.pem
+langchain-reference
+AGENTS.md
+traces/
diff --git a/agentic_eda/jupyterlab_extension_backend/README.md b/agentic_eda/jupyterlab_extension_backend/README.md
new file mode 100644
index 000000000..d3a9b1185
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/README.md
@@ -0,0 +1,19 @@
+# JupyterLab Extension Backend
+
+Run the backend entrypoint from this directory:
+
+```bash
+cd /Users/indro/src/tutorials1/agentic_eda/jupyterlab_extension_backend
+python -m src.main \
+  --mode integrity \
+  --path /Users/indro/src/tutorials1/agentic_eda/jupyterlab_extension_backend/datasets/T1_slice.csv
+```
+
+If you run from a different directory, set `PYTHONPATH`:
+
+```bash
+PYTHONPATH=/Users/indro/src/tutorials1/agentic_eda/jupyterlab_extension_backend \
+python -m src.main \
+  --mode integrity \
+  --path /Users/indro/src/tutorials1/agentic_eda/jupyterlab_extension_backend/datasets/T1_slice.csv
+```
diff --git a/agentic_eda/jupyterlab_extension_backend/datasets/T1_slice.csv b/agentic_eda/jupyterlab_extension_backend/datasets/T1_slice.csv
new file mode 100644
index 000000000..fd8bb93b2
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/datasets/T1_slice.csv
@@ -0,0 +1,101 @@
+Date/Time,LV ActivePower (kW),Wind Speed (m/s),Theoretical_Power_Curve (KWh),Wind Direction (°)
+01 01 2018 00:00,380.047790527343,5.31133604049682,416.328907824861,259.994903564453
+01 01 2018 00:10,453.76919555664,5.67216682434082,519.917511061494,268.64111328125
+01 01 2018 00:20,306.376586914062,5.21603679656982,390.900015810951,272.564788818359
+01 01 2018 00:30,419.645904541015,5.65967416763305,516.127568975674,271.258087158203
+01 01 2018 00:40,380.650695800781,5.57794094085693,491.702971953588,265.674285888671
+01 01 2018 00:50,402.391998291015,5.60405206680297,499.436385024805,264.57861328125
+01 01 2018 01:00,447.605712890625,5.79300785064697,557.372363290225,266.163604736328
+01 01 2018 01:10,387.2421875,5.30604982376098,414.898178826186,257.949493408203
+01 01 2018 01:20,463.651214599609,5.58462905883789,493.677652137077,253.480697631835
+01 01 2018 01:30,439.725708007812,5.52322816848754,475.706782818068,258.72378540039
+01 01 2018 01:40,498.181701660156,5.72411584854125,535.841397042263,251.850997924804
+01 01 2018 01:50,526.816223144531,5.93419885635375,603.014076510633,265.504699707031
+01 01 2018 02:00,710.587280273437,6.54741382598876,824.662513585882,274.23291015625
+01 01 2018 02:10,655.194274902343,6.19974613189697,693.472641075637,266.733184814453
+01 01 2018 02:20,754.762512207031,6.50538301467895,808.098138482693,266.76040649414
+01 01 2018 02:30,790.173278808593,6.63411617279052,859.459020788565,270.493194580078
+01 01 2018 02:40,742.985290527343,6.37891292572021,759.434536596592,266.593292236328
+01 01 2018 02:50,748.229614257812,6.4466528892517,785.28100987646,265.571807861328
+01 01 2018 03:00,736.647827148437,6.41508293151855,773.172863451736,261.15869140625
+01 01 2018 03:10,787.246215820312,6.43753099441528,781.7712157188,257.56021118164
+01 01 2018 03:20,722.864074707031,6.22002410888671,700.764699868076,255.926498413085
+01 01 2018 03:30,935.033386230468,6.89802598953247,970.736626881787,250.012893676757
+01 01 2018 03:40,1220.60900878906,7.60971117019653,1315.04892785216,255.985702514648
+01 01 2018 03:50,1053.77197265625,7.28835582733154,1151.26574355584,255.444595336914
+01 01 2018 04:00,1493.80798339843,7.94310188293457,1497.58372354361,256.407409667968
+01 01 2018 04:10,1724.48803710937,8.37616157531738,1752.19966204818,252.41259765625
+01 01 2018 04:20,1636.93505859375,8.23695755004882,1668.47070685152,247.979400634765
+01 01 2018 04:30,1385.48803710937,7.87959098815917,1461.81579081391,238.609603881835
+01 01 2018 04:40,1098.93200683593,7.10137605667114,1062.28503444311,245.095596313476
+01 01 2018 04:50,1021.4580078125,6.95530700683593,995.995854606612,245.410202026367
+01 01 2018 05:00,1164.89294433593,7.09829807281494,1060.85971215544,235.227905273437
+01 01 2018 05:10,1073.33203125,6.95363092422485,995.250960801046,242.872695922851
+01 01 2018 05:20,1165.30798339843,7.24957799911499,1132.4168612641,244.835693359375
+01 01 2018 05:30,1177.98999023437,7.29469108581542,1154.36530469206,242.48159790039
+01 01 2018 05:40,1170.53601074218,7.37636995315551,1194.8430985043,247.97720336914
+01 01 2018 05:50,1145.53601074218,7.44855403900146,1231.43070603717,249.682998657226
+01 01 2018 06:00,1114.02697753906,7.2392520904541,1127.43320551345,248.401000976562
+01 01 2018 06:10,1153.18505859375,7.32921123504638,1171.35504358957,244.621704101562
+01 01 2018 06:20,1125.3310546875,7.13970518112182,1080.13908466205,244.631805419921
+01 01 2018 06:30,1228.73205566406,7.47422885894775,1244.63353439737,245.785995483398
+01 01 2018 06:40,1021.79302978515,7.03317403793334,1030.99268581181,248.652206420898
+01 01 2018 06:50,957.378173828125,6.88645505905151,965.683334443832,244.611694335937
+01 01 2018 07:00,909.887817382812,6.88782119750976,966.279104864065,235.84829711914
+01 01 2018 07:10,1000.95397949218,7.21643209457397,1116.4718990154,232.842697143554
+01 01 2018 07:20,1024.47802734375,7.0685977935791,1047.17023059277,229.933197021484
+01 01 2018 07:30,1009.53399658203,6.93829584121704,988.451940715539,230.13670349121
+01 01 2018 07:40,899.492980957031,6.53668785095214,820.416658585943,234.933807373046
+01 01 2018 07:50,725.110107421875,6.18062496185302,686.636942163399,232.837905883789
+01 01 2018 08:00,585.259399414062,5.81682586669921,564.927659543473,240.328796386718
+01 01 2018 08:10,443.913909912109,5.45015096664428,454.773587146918,238.12629699707
+01 01 2018 08:20,565.253784179687,5.81814908981323,565.349093224668,235.80029296875
+01 01 2018 08:30,644.037780761718,6.13027286529541,668.823569309414,224.958694458007
+01 01 2018 08:40,712.058898925781,6.34707784652709,747.460673422601,216.803894042968
+01 01 2018 08:50,737.394775390625,6.34743690490722,747.595109122642,205.785293579101
+01 01 2018 09:00,725.868103027343,6.19436883926391,691.546334303948,199.848495483398
+01 01 2018 09:10,408.997406005859,4.97719812393188,330.417630427964,207.997802734375
+01 01 2018 09:20,628.436828613281,5.95911121368408,611.283836510667,210.954895019531
+01 01 2018 09:30,716.1005859375,6.21137619018554,697.649474372052,215.69400024414
+01 01 2018 09:40,711.49560546875,6.11145305633544,662.235163012206,220.84260559082
+01 01 2018 09:50,838.151916503906,6.45632219314575,789.011422412419,237.065307617187
+01 01 2018 10:00,881.062072753906,6.66665792465209,872.739625855708,235.667495727539
+01 01 2018 10:10,663.703125,6.16287899017333,680.327891653483,229.329696655273
+01 01 2018 10:20,578.261596679687,6.01316785812377,628.442560754699,234.900604248046
+01 01 2018 10:30,465.620086669921,5.56120300292968,486.779567601972,230.422805786132
+01 01 2018 10:40,311.050903320312,4.96073198318481,326.411025380213,229.537506103515
+01 01 2018 10:50,230.05549621582,4.60387516021728,244.31624421611,231.79849243164
+01 01 2018 11:00,233.990600585937,4.55453395843505,233.632780531927,234.105606079101
+01 01 2018 11:10,175.592193603515,4.26362895965576,173.573663122312,228.776702880859
+01 01 2018 11:20,118.133102416992,3.89413905143737,108.571221110423,227.938995361328
+01 01 2018 11:30,142.202499389648,4.03876113891601,130.229989593698,224.46499633789
+01 01 2018 11:40,212.566192626953,4.50565099716186,223.196784083793,224.950500488281
+01 01 2018 11:50,222.610000610351,4.54339790344238,231.242507343633,229.12759399414
+01 01 2018 12:00,194.181198120117,4.32376098632812,185.598479588255,227.039993286132
+01 01 2018 12:10,82.6407470703125,3.63443708419799,68.5028197987886,230.31460571289
+01 01 2018 12:20,75.8952178955078,3.70551204681396,78.3961653540173,233.953292846679
+01 01 2018 12:30,41.9472389221191,3.25396800041198,29.2869556318446,233.06590270996
+01 01 2018 12:40,118.534599304199,3.77513694763183,88.8713653309387,227.753494262695
+01 01 2018 12:50,250.755905151367,4.69350099563598,264.119257409418,229.896606445312
+01 01 2018 13:00,346.86441040039,5.00293922424316,336.721998240131,235.279495239257
+01 01 2018 13:10,416.417907714843,5.36474990844726,430.92108895689,235.585296630859
+01 01 2018 13:20,331.941497802734,5.01618194580078,339.984940156412,229.942901611328
+01 01 2018 13:30,583.479919433593,5.97040796279907,615.05563084927,235.69529724121
+01 01 2018 13:40,776.552673339843,6.6555209159851,868.180844867276,241.457397460937
+01 01 2018 13:50,752.726379394531,6.60090398788452,846.029409522117,242.782104492187
+01 01 2018 14:00,589.073120117187,5.98137807846069,618.731442665699,234.984405517578
+01 01 2018 14:10,1109.12805175781,7.42459392547607,1219.19978672882,235.14729309082
+01 01 2018 14:20,1482.4599609375,8.18645191192626,1638.50890923271,238.479095458984
+01 01 2018 14:30,1523.43005371093,8.27493000030517,1691.1470390233,237.033203125
+01 01 2018 14:40,1572.17004394531,8.44920253753662,1796.76309010091,238.332397460937
+01 01 2018 14:50,1698.93994140625,8.5759744644165,1875.04719734159,235.641403198242
+01 01 2018 15:00,1616.84594726562,8.28225994110107,1695.53877696245,236.461395263671
+01 01 2018 15:10,1796.82397460937,8.73455238342285,1974.47580025242,234.354797363281
+01 01 2018 15:20,1885.86096191406,8.76410388946533,1993.17071186444,231.001602172851
+01 01 2018 15:30,2327.51196289062,9.66943168640136,2568.82712862015,227.60009765625
+01 01 2018 15:40,2499.162109375,10.1410903930664,2876.75361614448,227.73159790039
+01 01 2018 15:50,2820.51293945312,10.7724199295043,3186.02988321436,225.276397705078
+01 01 2018 16:00,2812.27905273437,10.6475200653076,3133.25922420184,224.680603027343
+01 01 2018 16:10,2530.44702148437,9.98266124725341,2781.27404078649,225.519500732421
+01 01 2018 16:20,2399.12109375,9.87438583374023,2711.49245838958,227.273803710937
+01 01 2018 16:30,2335.587890625,9.78547954559326,2651.34100928894,229.255493164062
diff --git a/agentic_eda/jupyterlab_extension_backend/src/config/__init__.py b/agentic_eda/jupyterlab_extension_backend/src/config/__init__.py
new file mode 100644
index 000000000..2a18c45cd
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/config/__init__.py
@@ -0,0 +1,3 @@
+"""
+Backend configuration package.
+"""
diff --git a/agentic_eda/jupyterlab_extension_backend/src/config/config.py b/agentic_eda/jupyterlab_extension_backend/src/config/config.py
new file mode 100644
index 000000000..f64d5fa9a
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/config/config.py
@@ -0,0 +1,128 @@
+"""
+Import as:
+
+import src.config.config as cconf
+"""
+
+import dataclasses
+import functools
+import os
+
+import dotenv
+import langchain_anthropic
+import langchain_google_genai
+import langchain_openai
+import pydantic
+
+dataclass = dataclasses.dataclass
+lru_cache = functools.lru_cache
+ChatOpenAI = langchain_openai.ChatOpenAI
+ChatAnthropic = langchain_anthropic.ChatAnthropic
+ChatGoogleGenerativeAI = langchain_google_genai.ChatGoogleGenerativeAI
+SecretStr = pydantic.SecretStr
+
+dotenv.load_dotenv()
+
+
+@dataclass(frozen=True)
+class Settings:
+    """
+    Store model provider settings.
+    """
+
+    provider: str
+    model: str
+    temperature: float
+    timeout: float
+    max_retries: int
+
+
+def _need(name: str) -> str:
+    """
+    Read a required environment variable.
+
+    :param name: environment variable name
+    :return: environment variable value
+    """
+    value = os.getenv(name)
+    if value is None or value == "":
+        raise RuntimeError(f"Missing required environment variable: {name}")
+    return value
+
+
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+    """
+    Build settings from environment variables.
+
+    :return: configured settings
+    """
+    settings = Settings(
+        provider=os.getenv("LLM_PROVIDER", "openai"),
+        model=os.getenv("LLM_MODEL", "gpt-5-nano"),
+        temperature=float(os.getenv("LLM_TEMP", 0.2)),
+        timeout=float(os.getenv("LLM_TIMEOUT", 60)),
+        max_retries=int(os.getenv("LLM_MAX_RETRIES", 2)),
+    )
+    return settings
+
+
+@lru_cache(maxsize=1)
+def get_chat_model(*, model: str | None = None) -> object:
+    """
+    Build the configured chat model client.
+
+    :param model: optional model override
+    :return: langchain chat model client
+    """
+    settings = get_settings()
+    model_name = settings.model if model is None else model
+    provider = settings.provider
+    if provider == "openai":
+        _need("OPENAI_API_KEY")
+        chat_model = ChatOpenAI(
+            model=model_name,
+            temperature=settings.temperature,
+            timeout=settings.timeout,
+            max_retries=settings.max_retries,
+        )
+    elif provider == "openai_compatible":
+        base_url = _need("OPENAI_COMPAT_BASE_URL")
+        api_key = _need("OPENAI_COMPAT_API_KEY")
+        chat_model = ChatOpenAI(
+            model=model_name,
+            base_url=base_url,
+            api_key=SecretStr(api_key),
+            temperature=settings.temperature,
+            timeout=settings.timeout,
+            max_retries=settings.max_retries,
+        )
+    elif provider == "azure_openai_v1":
+        azure_base = _need("AZURE_OPENAI_BASE_URL")
+        azure_key = SecretStr(_need("AZURE_OPENAI_API_KEY"))
+        chat_model = ChatOpenAI(
+            model=model_name,
+            base_url=azure_base,
+            api_key=azure_key,
+            temperature=settings.temperature,
+            timeout=settings.timeout,
+            max_retries=settings.max_retries,
+        )
+    elif provider == "anthropic":
+        _need("ANTHROPIC_API_KEY")
+        chat_model = ChatAnthropic(
+            model_name=model_name,
+            temperature=settings.temperature,
+            timeout=settings.timeout,
+            max_retries=settings.max_retries,
+            stop=None,
+        )
+    elif provider in ("google", "gemini", "google_genai"):
+        _need("GOOGLE_API_KEY")
+        chat_model = ChatGoogleGenerativeAI(
+            model=model_name,
+            temperature=settings.temperature,
+        )
+    else:
+        raise ValueError(f"Unsupported provider='{provider}'")
+    return chat_model
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/__init__.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/__init__.py
new file mode 100644
index 000000000..176a9790e
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/__init__.py
@@ -0,0 +1,3 @@
+"""
+Ingestion stages for the Jupyter backend.
+"""
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/compute_temporal_stats.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/compute_temporal_stats.py
new file mode 100644
index 000000000..1b323d8c8
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/compute_temporal_stats.py
@@ -0,0 +1,223 @@
+"""
+Import as:
+
+import src.ingest.compute_temporal_stats as sctstats
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import TypedDict
+
+import langgraph.graph as lgraph
+
+import src.ingest.infer_structure as sinferstruct
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class TemporalStatsState(TypedDict):
+    """
+    Store deterministic temporal statistics.
+    """
+
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for temporal statistics.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+
+
+def call_infer_structure(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to feature-structure inference.
+
+    :param state: graph state
+    :return: composite payload from infer_structure
+    """
+    payload = sinferstruct.run_infer_structure(state["path"])
+    return payload
+
+
+def compute_temporal_stats(state: CompositeState) -> dict:
+    """
+    Compute deterministic temporal range, coverage, and frequency statistics.
+
+    :param state: graph state
+    :return: temporal statistics payload
+    """
+    temporal_report = tinptool.compute_temporal_stats.invoke(
+        {
+            "path": state["path"],
+            "time_col": state["primary_key"],
+            "secondary_keys": state["secondary_keys"],
+            "winner_formatter": state["winner_formatter"],
+        }
+    )
+    trace_payload = {
+        "primary_key": state["primary_key"],
+        "secondary_keys": state["secondary_keys"],
+        "temporal_report": temporal_report,
+    }
+    tinptool.write_stage_trace(state["path"], "compute_temporal_stats", trace_payload)
+    payload = {
+        "n_nat_time": temporal_report["n_nat_time"],
+        "min_time": temporal_report["min_time"],
+        "max_time": temporal_report["max_time"],
+        "typical_delta_mode": temporal_report["typical_delta_mode"],
+        "typical_delta_median": temporal_report["typical_delta_median"],
+        "expected_frequency": temporal_report["expected_frequency"],
+        "dominant_frequency_fraction": temporal_report["dominant_frequency_fraction"],
+        "is_irregular_sampling": temporal_report["is_irregular_sampling"],
+        "resampling_decision": temporal_report["resampling_decision"],
+        "coverage_summary": temporal_report["coverage_summary"],
+        "coverage_per_entity": temporal_report["coverage_per_entity"],
+    }
+    return payload
+
+
+temporal_stats = lgraph.StateGraph(CompositeState)
+temporal_stats.add_node("infer_structure_pipeline", call_infer_structure)
+temporal_stats.add_node("compute_temporal_stats", compute_temporal_stats)
+temporal_stats.add_edge(lgraph.START, "infer_structure_pipeline")
+temporal_stats.add_edge("infer_structure_pipeline", "compute_temporal_stats")
+temporal_stats.add_edge("compute_temporal_stats", lgraph.END)
+graph = temporal_stats.compile()
+
+
+def run_compute_temporal_stats(path: str) -> dict:
+    """
+    Execute temporal statistics end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+        "n_nat_time": 0,
+        "min_time": None,
+        "max_time": None,
+        "typical_delta_mode": None,
+        "typical_delta_median": None,
+        "expected_frequency": None,
+        "dominant_frequency_fraction": 0.0,
+        "is_irregular_sampling": False,
+        "resampling_decision": "",
+        "coverage_summary": {},
+        "coverage_per_entity": [],
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Temporal stats output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_compute_temporal_stats(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/format_datetime.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/format_datetime.py
new file mode 100644
index 000000000..6af3065d7
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/format_datetime.py
@@ -0,0 +1,251 @@
+"""
+Import as:
+
+import src.ingest.format_datetime as sfordat
+"""
+
+import logging
+import pathlib
+from typing import TypedDict
+
+import langchain.agents as lagents
+import langchain.tools as ltools
+import langchain_core.messages as lmessages
+import langgraph.graph as lgraph
+import numpy as np
+import pandas as pd
+import pydantic
+
+import src.config.config as cconf
+import src.ingest.handle_inputs as shainp
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+def _score_parse(dt: pd.Series) -> float:
+    """
+    Score datetime parse quality.
+
+    :param dt: candidate datetime series
+    :return: score where larger means better
+    """
+    datetime_series = pd.to_datetime(dt, errors="coerce", utc=True)
+    if datetime_series.isna().all():
+        score = -1.0
+        return score
+    parsed_fraction = float(datetime_series.notna().mean())
+    min_timestamp = datetime_series.min()
+    max_timestamp = datetime_series.max()
+    range_score = 1.0
+    min_bound = pd.Timestamp("1990-01-01", tz="UTC")
+    max_bound = pd.Timestamp("2035-01-01", tz="UTC")
+    if min_timestamp < min_bound or max_timestamp > max_bound:
+        range_score = 0.7
+    datetime_no_na = datetime_series.dropna()
+    monotonic_score = 0.0
+    if len(datetime_no_na) >= 3:
+        deltas = datetime_no_na.diff()
+        inversions = float((deltas < pd.Timedelta(0)).mean())
+        monotonic_score = 1.0 - inversions
+    score = (
+        parsed_fraction * 0.65 + range_score * 0.15 + monotonic_score * 0.20
+    )
+    return float(score)
+
+
+class _Candidate(pydantic.BaseModel):
+    """
+    Store one datetime parse candidate.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    format: str | None
+    dayfirst: bool | None
+    yearfirst: bool | None
+    utc: bool
+
+
+class _ParseWithCandidatesArgs(pydantic.BaseModel):
+    """
+    Store tool arguments for candidate parsing.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    path: str
+    col_name: str
+    candidates: list[_Candidate]
+
+
+@ltools.tool(args_schema=_ParseWithCandidatesArgs)
+def _parse_with_candidates(
+    path: str,
+    col_name: str,
+    candidates: list[_Candidate],
+) -> dict:
+    """
+    Parse one column with multiple datetime candidates and pick the best.
+
+    :param path: dataset path
+    :param col_name: target column name
+    :param candidates: parse candidates
+    :return: best candidate summary
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = tinptool.load_dataset(dataset_path)
+    col = dataset[col_name]
+    best_score = -1.0
+    best_candidate = None
+    best_parsed_fraction = 0.0
+    series = col.astype(str).str.strip().replace(
+        {
+            "": np.nan,
+            "nan": np.nan,
+            "NaT": np.nan,
+        }
+    )
+    for candidate in candidates:
+        candidate_dict = candidate.model_dump()
+        format_val = candidate_dict["format"]
+        dayfirst_val = candidate_dict["dayfirst"]
+        yearfirst_val = candidate_dict["yearfirst"]
+        utc_val = candidate_dict["utc"]
+        kwargs = {
+            key: val
+            for key, val in {
+                "format": format_val,
+                "dayfirst": dayfirst_val,
+                "yearfirst": yearfirst_val,
+                "utc": utc_val,
+            }.items()
+            if val is not None
+        }
+        try:
+            datetime_series = pd.to_datetime(
+                series,
+                errors="coerce",
+                **kwargs,
+            )
+        except Exception:
+            continue
+        score = _score_parse(datetime_series)
+        if score > best_score:
+            best_score = score
+            best_candidate = candidate_dict
+            best_parsed_fraction = float(datetime_series.notna().mean())
+    payload = {
+        "best_candidate": best_candidate,
+        "best_score": float(best_score),
+        "parsed_fraction": float(best_parsed_fraction),
+    }
+    return payload
+
+
+class DateFormatterState(TypedDict):
+    """
+    Store graph state for datetime formatting.
+    """
+
+    path: str
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+
+
+class DateFormatterOutput(pydantic.BaseModel):
+    """
+    Store structured formatter output.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    candidates: list[_Candidate]
+    winner_formatter: _Candidate
+
+
+def run_formatting_agent(state: DateFormatterState) -> dict:
+    """
+    Run LLM tool-calling to find the best datetime parser.
+
+    :param state: formatter graph state
+    :return: candidate list and winner formatter
+    """
+    system_prompt = (
+        "Use tools to convert the provided time column into a correct datetime "
+        "format.\n"
+        "1. Use extract_head to inspect the temporal column and propose parse "
+        "candidates.\n"
+        "2. Call _parse_with_candidates with those candidates.\n"
+        "3. Return all candidates and the winning formatter."
+    )
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[_parse_with_candidates, tinptool.extract_head],
+        system_prompt=system_prompt,
+        response_format=DateFormatterOutput,
+    )
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=(
+                        f"The dataset path is {state['path']} and the time "
+                        f"column name is {state['time_col']}"
+                    )
+                )
+            ]
+        }
+    )
+    structured_response = out["structured_response"].model_dump()
+    payload = {
+        "candidates": structured_response["candidates"],
+        "winner_formatter": structured_response["winner_formatter"],
+    }
+    return payload
+
+
+def call_input_handler(state: DateFormatterState) -> dict:
+    """
+    Run input handler and pick the first temporal column.
+
+    :param state: formatter graph state
+    :return: selected temporal column
+    """
+    out = shainp.run_input_handler(state["path"])
+    temporal_cols = out.get("temporal_cols") or []
+    if not temporal_cols:
+        raise ValueError("No temporal columns found by input handler.")
+    payload = {"time_col": temporal_cols[0]}
+    return payload
+
+
+date_formatter = lgraph.StateGraph(DateFormatterState)
+date_formatter.add_node("input_handler", call_input_handler)
+date_formatter.add_node("run_formatting_agent", run_formatting_agent)
+date_formatter.add_edge(lgraph.START, "input_handler")
+date_formatter.add_edge("input_handler", "run_formatting_agent")
+date_formatter.add_edge("run_formatting_agent", lgraph.END)
+graph = date_formatter.compile()
+
+
+def run_date_formatter(path: str) -> dict:
+    """
+    Execute datetime formatter graph and parse the selected time column.
+
+    :param path: dataset path
+    :return: output including selected formatter and parsed dtype
+    """
+    graph_in = {"path": path}
+    out: DateFormatterState = graph.invoke(graph_in)  # type: ignore[assignment]
+    dataset_path = pathlib.Path(path)
+    dataset = tinptool.load_dataset(dataset_path)
+    raw_args = out["winner_formatter"]
+    format_args = {key: val for key, val in raw_args.items() if val is not None}
+    parsed_time = pd.to_datetime(dataset[out["time_col"]], **format_args)
+    payload = {
+        "time_col": out["time_col"],
+        "winner_formatter": out["winner_formatter"],
+        "parsed_dtype": str(parsed_time.dtype),
+    }
+    _LOG.info("Date formatter output: %s", payload)
+    return payload
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/handle_inputs.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/handle_inputs.py
new file mode 100644
index 000000000..84a3474c1
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/handle_inputs.py
@@ -0,0 +1,646 @@
+"""
+Import as:
+
+import src.ingest.handle_inputs as shainp
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import pathlib
+from typing import Any
+from typing import Literal
+from typing import TypedDict
+
+import langchain.agents as lagents
+import langchain_core.messages as lmessages
+import langgraph.graph as lgraph
+import pandas as pd
+import pydantic
+
+import src.config.config as cconf
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class InputState(TypedDict):
+    """
+    Store graph state for input checks.
+    """
+
+    path: str | pathlib.Path
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+
+
+class LLMOutput(pydantic.BaseModel):
+    """
+    Store structured output from the header classifier.
+    """
+
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+
+
+class SeriesStructureFallbackOutput(pydantic.BaseModel):
+    """
+    Store structured fallback output for ambiguous series-structure cases.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    secondary_keys: list[str]
+
+
+class BadRowDescriptor(pydantic.BaseModel):
+    """
+    Store one fuzzy descriptor for a bad row.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    row_index: int
+    fuzzy_descriptor: str
+
+
+class BadRowDescriptorOutput(pydantic.BaseModel):
+    """
+    Store structured fuzzy descriptors for detected bad rows.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    descriptors: list[BadRowDescriptor]
+
+
+class SeriesStructureAssessment(TypedDict):
+    """
+    Store deterministic and fallback evidence for series-structure inference.
+    """
+
+    duplicate_timestamps: int
+    duplicate_timestamp_fraction: float
+    timestamps_mostly_unique: bool
+    candidate_entity_cols: list[str]
+    entity_candidate_report: dict
+    secondary_keys: list[str]
+    confidence: Literal["high", "medium", "low"]
+    method: Literal["deterministic", "deterministic_no_panel", "fuzzy"]
+
+
+def _json_safe_value(value: Any) -> Any:
+    """
+    Convert dataframe cell values into JSON-safe Python values.
+
+    :param value: raw cell value
+    :return: JSON-safe value
+    """
+    if pd.isna(value):
+        return None
+    if hasattr(value, "item"):
+        try:
+            return value.item()
+        except Exception:
+            return str(value)
+    return value
+
+
+def _row_to_record(row: pd.Series) -> dict[str, Any]:
+    """
+    Convert one dataframe row into a JSON-safe mapping.
+
+    :param row: dataframe row
+    :return: serialized row mapping
+    """
+    return {
+        str(col): _json_safe_value(value)
+        for col, value in row.to_dict().items()
+    }
+
+
+def detect_bad_rows(state: InputState) -> dict:
+    """
+    Detect rows that do not behave like observations because their temporal
+    fields are missing or unparseable.
+
+    Theory:
+    In time-series ingestion, observation rows should participate in the time
+    axis. Rows whose temporal fields cannot be parsed are often metadata,
+    annotation, footer, or malformed rows. Capturing them explicitly preserves
+    evidence for downstream handling without silently dropping information at
+    ingestion time.
+
+    :param state: input graph state
+    :return: detected bad-row payload
+    """
+    temporal_cols = state.get("temporal_cols") or []
+    if not temporal_cols:
+        return {"bad_rows": []}
+
+    dataset_path = pathlib.Path(str(state["path"]))
+    dataset = tinptool.load_dataset(dataset_path)
+    valid_temporal_cols = [col for col in temporal_cols if col in dataset.columns]
+    if not valid_temporal_cols:
+        return {"bad_rows": []}
+
+    parse_matrix: dict[str, pd.Series] = {}
+    normalized_matrix: dict[str, pd.Series] = {}
+    for col in valid_temporal_cols:
+        raw_series = dataset[col]
+        normalized = raw_series.astype(str).str.strip().replace(
+            {"": pd.NA, "nan": pd.NA, "NaT": pd.NA}
+        )
+        normalized_matrix[col] = normalized
+        parse_matrix[col] = pd.to_datetime(normalized, errors="coerce")
+
+    bad_rows: list[dict[str, Any]] = []
+    for row_idx in range(int(dataset.shape[0])):
+        reasons: list[str] = []
+        temporal_values: dict[str, Any] = {}
+        has_temporal_signal = False
+        has_parseable_temporal = False
+        for col in valid_temporal_cols:
+            raw_value = normalized_matrix[col].iloc[row_idx]
+            parsed_value = parse_matrix[col].iloc[row_idx]
+            temporal_values[col] = _json_safe_value(raw_value)
+            if not pd.isna(raw_value):
+                has_temporal_signal = True
+            if not pd.isna(parsed_value):
+                has_parseable_temporal = True
+                continue
+            if pd.isna(raw_value):
+                reasons.append(f"missing_temporal_value:{col}")
+            else:
+                raw_text = str(raw_value).strip()
+                reasons.append(f"unparseable_temporal_value:{col}")
+                if raw_text.endswith(":"):
+                    reasons.append(f"annotation_like_temporal_value:{col}")
+        if has_parseable_temporal:
+            continue
+        if not has_temporal_signal and not reasons:
+            continue
+        row = dataset.iloc[row_idx]
+        bad_rows.append(
+            {
+                "row_index": int(row_idx),
+                "csv_row_number": int(row_idx) + 2,
+                "temporal_values": temporal_values,
+                "reasons": sorted(dict.fromkeys(reasons)),
+                "raw_row": _row_to_record(row),
+                "fuzzy_descriptor": "",
+            }
+        )
+    return {"bad_rows": bad_rows}
+
+
+def describe_bad_rows(state: InputState) -> dict:
+    """
+    Attach short fuzzy descriptors to already-detected bad rows.
+
+    Theory:
+    Deterministic rules can reliably tell us that a row does not behave like a
+    data observation, but they are less expressive about the row's likely role.
+    A constrained model can add a short human-readable descriptor such as
+    metadata row, blank footer row, or malformed timestamp row without being
+    allowed to invent new row IDs or alter the deterministic evidence.
+
+    :param state: input graph state
+    :return: bad rows with fuzzy descriptors
+    """
+    bad_rows = [dict(row) for row in (state.get("bad_rows") or [])]
+    if not bad_rows:
+        return {"bad_rows": []}
+
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[],
+        system_prompt=(
+            "You are labeling already-detected bad rows in a dataset. "
+            "For each row_index, return a short fuzzy descriptor such as "
+            "'metadata/control row', 'blank/incomplete row', "
+            "'annotation row', or 'malformed timestamp row'. "
+            "Do not change row_index values and do not add rows."
+        ),
+        response_format=BadRowDescriptorOutput,
+    )
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=f"Detected bad rows: {bad_rows}"
+                )
+            ]
+        }
+    )
+    descriptors = out["structured_response"].model_dump().get("descriptors") or []
+    descriptor_map = {
+        int(item["row_index"]): str(item["fuzzy_descriptor"]).strip()
+        for item in descriptors
+    }
+    for row in bad_rows:
+        row["fuzzy_descriptor"] = descriptor_map.get(
+            int(row["row_index"]),
+            "bad/non-data row",
+        )
+    return {"bad_rows": bad_rows}
+
+
+def _parse_time_series(
+    path: str | pathlib.Path,
+    time_col: str,
+    winner_formatter: dict | None = None,
+) -> pd.Series:
+    """
+    Parse a proposed time column to measure whether it behaves like a real time
+    axis.
+
+    Theory:
+    Handle-input classification identifies candidate temporal columns, but it
+    does not establish whether the observed values actually parse into a stable
+    datetime axis. Parseability is the empirical question: can the values be
+    converted into usable timestamps with only a small failure rate? That check
+    is important because schema inference should rely on observed value
+    behavior, not just column labels or LLM guesses.
+
+    :param path: dataset path
+    :param time_col: selected time column
+    :param winner_formatter: optional datetime parsing kwargs
+    :return: parsed timestamp series
+    """
+    dataset = tinptool.load_dataset(pathlib.Path(str(path)))
+    format_args = winner_formatter or {}
+    format_args = {key: val for key, val in format_args.items() if val is not None}
+    try:
+        return pd.to_datetime(dataset[time_col], errors="coerce", **format_args)
+    except Exception:
+        return pd.to_datetime(dataset[time_col], errors="coerce")
+
+
+def _select_entity_candidate_cols(
+    *,
+    cols: list[str],
+    time_col: str,
+    numeric_val_cols: list[str],
+    categorical_val_cols: list[str],
+    column_profiles: dict,
+) -> list[str]:
+    """
+    Select plausible entity-key candidates using value-level heuristics.
+
+    Theory:
+    Entity keys should behave like identifiers that partition repeated
+    timestamps into coherent per-entity series. Measurement columns usually do
+    not do that, even if they repeat. The candidate filter therefore keeps
+    likely identifier-like categoricals and only a narrow class of integer-like
+    numeric columns, while excluding continuous measurements, binary flags, and
+    near-row-unique columns.
+
+    :param cols: all dataset columns
+    :param time_col: selected time column
+    :param numeric_val_cols: numeric value columns
+    :param categorical_val_cols: categorical value columns
+    :param column_profiles: per-column deterministic profiles
+    :return: filtered candidate entity columns
+    """
+    candidates: list[str] = []
+    numeric_set = set(numeric_val_cols)
+    categorical_set = set(categorical_val_cols)
+    for col in cols:
+        if col == time_col:
+            continue
+        profile = column_profiles.get(col) or {}
+        n_unique = int(profile.get("n_unique", 0))
+        unique_ratio = float(profile.get("unique_ratio", 1.0))
+        if n_unique <= 1 or unique_ratio >= 0.95:
+            continue
+        if col in categorical_set:
+            candidates.append(col)
+            continue
+        if col in numeric_set:
+            if bool(profile.get("is_binary_like")):
+                continue
+            if not bool(profile.get("is_integer_like")):
+                continue
+            if not bool(profile.get("is_nonnegative_like")):
+                continue
+            if n_unique > 200:
+                continue
+            if unique_ratio > 0.50:
+                continue
+            candidates.append(col)
+    return candidates
+
+
+def _fuzzy_secondary_key_agent(
+    *,
+    path: str,
+    time_col: str,
+    candidate_entity_cols: list[str],
+    entity_candidate_report: dict,
+    column_profiles: dict,
+) -> list[str]:
+    """
+    Resolve ambiguous panel-vs-multivariate cases with a constrained LLM tie
+    breaker.
+
+    Theory:
+    Deterministic heuristics are strongest when the data exhibits clean
+    identifier behavior. Ambiguous cases remain, especially when columns are
+    poorly named or identifier-like columns are partially numeric. In those
+    cases, a model can act as a constrained judge over a narrow candidate set,
+    using deterministic evidence rather than inventing columns freely. This
+    keeps fuzzy reasoning explainable and bounded.
+
+    :param path: dataset path
+    :param time_col: selected time column
+    :param candidate_entity_cols: filtered entity-key candidates
+    :param entity_candidate_report: deterministic scoring report
+    :param column_profiles: per-column profiles
+    :return: chosen secondary keys, possibly empty
+    """
+    if not candidate_entity_cols:
+        return []
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[tinptool.extract_head, tinptool.extract_metadata],
+        system_prompt=(
+            "You are resolving an ambiguous series-structure classification. "
+            "Choose secondary keys only from the provided candidate_entity_cols. "
+            "Return [] if the dataset still looks like a single or wide "
+            "multivariate time series rather than panel data. Prefer the "
+            "deterministic evidence report over column names."
+        ),
+        response_format=SeriesStructureFallbackOutput,
+    )
+    profile_subset = {
+        col: column_profiles.get(col, {})
+        for col in candidate_entity_cols
+    }
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=(
+                        f"Dataset path: {path}\n"
+                        f"time_col: {time_col}\n"
+                        f"candidate_entity_cols: {candidate_entity_cols}\n"
+                        f"entity_candidate_report: {entity_candidate_report}\n"
+                        f"column_profiles: {profile_subset}"
+                    )
+                )
+            ]
+        }
+    )
+    structured = out["structured_response"].model_dump()
+    secondary_keys: list[str] = []
+    seen: set[str] = set()
+    allowed = set(candidate_entity_cols)
+    for col in structured.get("secondary_keys") or []:
+        col_name = str(col)
+        if col_name not in allowed or col_name in seen:
+            continue
+        seen.add(col_name)
+        secondary_keys.append(col_name)
+    return secondary_keys
+
+
+def assess_series_structure(
+    *,
+    path: str | pathlib.Path,
+    cols: list[str],
+    time_col: str,
+    numeric_val_cols: list[str],
+    categorical_val_cols: list[str],
+    winner_formatter: dict | None = None,
+) -> SeriesStructureAssessment:
+    """
+    Assess whether the dataset behaves like a single series, panel, or wide
+    multivariate time series.
+
+    Theory:
+    The decisive signal for panel structure is not the column name but the time
+    axis itself. If timestamps are already mostly unique, there is no need to
+    search for entity keys: the data is behaving like one wide time-indexed
+    table. Only when timestamps repeat meaningfully should we look for
+    identifier columns that make `(entity, time)` close to unique. This staging
+    avoids promoting ordinary measurement columns into fake entity IDs.
+
+    :param path: dataset path
+    :param cols: all dataset columns
+    :param time_col: selected time column
+    :param numeric_val_cols: numeric value columns
+    :param categorical_val_cols: categorical value columns
+    :param winner_formatter: optional datetime parsing kwargs
+    :return: series-structure assessment
+    """
+    string_path = str(path)
+    timestamp = _parse_time_series(string_path, time_col, winner_formatter)
+    valid_ts = timestamp.dropna()
+    duplicate_timestamps = int(valid_ts.duplicated().sum())
+    duplicate_fraction = (
+        0.0 if valid_ts.empty else float(duplicate_timestamps / max(1, int(valid_ts.shape[0])))
+    )
+    timestamps_mostly_unique = duplicate_timestamps == 0 or duplicate_fraction < 0.01
+    profiles_out = tinptool.extract_column_profiles.invoke({"path": string_path})
+    column_profiles = profiles_out.get("column_profiles") or {}
+    candidate_entity_cols = _select_entity_candidate_cols(
+        cols=cols,
+        time_col=time_col,
+        numeric_val_cols=numeric_val_cols,
+        categorical_val_cols=categorical_val_cols,
+        column_profiles=column_profiles,
+    )
+    if timestamps_mostly_unique:
+        return {
+            "duplicate_timestamps": duplicate_timestamps,
+            "duplicate_timestamp_fraction": duplicate_fraction,
+            "timestamps_mostly_unique": True,
+            "candidate_entity_cols": [],
+            "entity_candidate_report": {
+                "time_col": time_col,
+                "candidate_cols": [],
+                "candidates": [],
+                "recommended_secondary_keys": [],
+            },
+            "secondary_keys": [],
+            "confidence": "high",
+            "method": "deterministic_no_panel",
+        }
+    entity_candidate_report = tinptool.score_entity_candidates.invoke(
+        {
+            "path": string_path,
+            "time_col": time_col,
+            "candidate_cols": candidate_entity_cols,
+            "max_combo_size": 2,
+        }
+    )
+    recommended_secondary_keys = (
+        entity_candidate_report.get("recommended_secondary_keys") or []
+    )
+    candidates = entity_candidate_report.get("candidates") or []
+    top_score = 0.0 if not candidates else float(candidates[0].get("score", 0.0))
+    if recommended_secondary_keys:
+        confidence: Literal["high", "medium", "low"] = (
+            "high" if top_score >= 0.75 else "medium"
+        )
+        return {
+            "duplicate_timestamps": duplicate_timestamps,
+            "duplicate_timestamp_fraction": duplicate_fraction,
+            "timestamps_mostly_unique": False,
+            "candidate_entity_cols": candidate_entity_cols,
+            "entity_candidate_report": entity_candidate_report,
+            "secondary_keys": recommended_secondary_keys,
+            "confidence": confidence,
+            "method": "deterministic",
+        }
+    fuzzy_secondary_keys = _fuzzy_secondary_key_agent(
+        path=string_path,
+        time_col=time_col,
+        candidate_entity_cols=candidate_entity_cols,
+        entity_candidate_report=entity_candidate_report,
+        column_profiles=column_profiles,
+    )
+    return {
+        "duplicate_timestamps": duplicate_timestamps,
+        "duplicate_timestamp_fraction": duplicate_fraction,
+        "timestamps_mostly_unique": False,
+        "candidate_entity_cols": candidate_entity_cols,
+        "entity_candidate_report": entity_candidate_report,
+        "secondary_keys": fuzzy_secondary_keys,
+        "confidence": "low" if fuzzy_secondary_keys else "medium",
+        "method": "fuzzy",
+    }
+
+
+def header_classification_agent(state: InputState) -> dict:
+    """
+    Classify temporal, numeric, and categorical columns.
+
+    :param state: input graph state
+    :return: column classification payload
+    """
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[tinptool.extract_head, tinptool.extract_metadata],
+        system_prompt=(
+            "You are a header classifier agent. Use tools to identify temporal "
+            "columns and classify the remaining value columns as numeric or "
+            "categorical. Output JSON with keys temporal_cols, "
+            "numeric_val_cols, and categorical_val_cols."
+        ),
+        response_format=LLMOutput,
+    )
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=f"The dataset is in {state['path']}"
+                )
+            ]
+        }
+    )
+    result = out["structured_response"].model_dump()
+    return result
+
+
+def error_node(state: InputState) -> dict:
+    """
+    Log an error node transition.
+
+    :param state: input graph state
+    :return: empty update
+    """
+    _LOG.error("Input handler failed: %s", state["error"])
+    return {}
+
+
+def has_header(state: InputState) -> bool:
+    """
+    Check if header validation passed.
+
+    :param state: input graph state
+    :return: true when headers are valid
+    """
+    has_header_flag = state["has_header"]
+    return has_header_flag
+
+
+def run_input_handler(path: str | pathlib.Path) -> dict:
+    """
+    Run dataset header and column classification checks.
+
+    :param path: path to dataset
+    :return: final graph output
+    """
+    graph_builder = lgraph.StateGraph(InputState)
+    graph_builder.add_node("header_analysis", tinptool.analyze_header)
+    graph_builder.add_node(
+        "header_classification_agent",
+        header_classification_agent,
+    )
+    graph_builder.add_node("detect_bad_rows", detect_bad_rows)
+    graph_builder.add_node("describe_bad_rows", describe_bad_rows)
+    graph_builder.add_node("error", error_node)
+    graph_builder.add_edge(lgraph.START, "header_analysis")
+    graph_builder.add_conditional_edges(
+        "header_analysis",
+        has_header,
+        {
+            True: "header_classification_agent",
+            False: "error",
+        },
+    )
+    graph_builder.add_edge("error", lgraph.END)
+    graph_builder.add_edge("header_classification_agent", "detect_bad_rows")
+    graph_builder.add_edge("detect_bad_rows", "describe_bad_rows")
+    graph_builder.add_edge("describe_bad_rows", lgraph.END)
+    graph = graph_builder.compile()
+    init_state: InputState = {
+        "path": str(path),
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+    }
+    out = graph.invoke(init_state)
+    _LOG.info("Input handler output: %s", out)
+    return out
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_input_handler(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/infer_structure.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/infer_structure.py
new file mode 100644
index 000000000..a57f094f6
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/infer_structure.py
@@ -0,0 +1,194 @@
+"""
+Import as:
+
+import src.ingest.infer_structure as sinferstruct
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import TypedDict
+
+import langgraph.graph as lgraph
+
+import src.ingest.infer_type as sinfert
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class FeatureStructureState(TypedDict):
+    """
+    Store inferred semantic feature groupings.
+    """
+
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for feature-structure inference.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+
+
+def call_infer_type(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to series-type inference.
+
+    :param state: graph state
+    :return: composite payload from infer_type
+    """
+    payload = sinfert.run_infer_type(state["path"])
+    return payload
+
+
+def infer_structure(state: CompositeState) -> dict:
+    """
+    Infer semantic feature roles for EDA deterministically from observed column
+    behavior.
+
+    :param state: graph state
+    :return: inferred feature groupings
+    """
+    feature_bucket_report = tinptool.infer_feature_buckets.invoke(
+        {
+            "path": state["path"],
+            "time_col": state["primary_key"],
+            "secondary_keys": state["secondary_keys"],
+        }
+    )
+    trace_payload = {
+        "primary_key": state["primary_key"],
+        "secondary_keys": state["secondary_keys"],
+        "series_type": state["type"],
+        "feature_bucket_report": feature_bucket_report,
+    }
+    tinptool.write_stage_trace(state["path"], "infer_structure", trace_payload)
+    payload = {
+        "numeric_continuous_cols": feature_bucket_report["numeric_continuous_cols"],
+        "numeric_count_cols": feature_bucket_report["numeric_count_cols"],
+        "binary_flag_cols": feature_bucket_report["binary_flag_cols"],
+        "categorical_feature_cols": feature_bucket_report["categorical_feature_cols"],
+        "known_exogenous_cols": feature_bucket_report["known_exogenous_cols"],
+        "target_cols": feature_bucket_report["target_cols"],
+        "covariate_cols": feature_bucket_report["covariate_cols"],
+    }
+    return payload
+
+
+feature_structure = lgraph.StateGraph(CompositeState)
+feature_structure.add_node("infer_type_pipeline", call_infer_type)
+feature_structure.add_node("infer_structure", infer_structure)
+feature_structure.add_edge(lgraph.START, "infer_type_pipeline")
+feature_structure.add_edge("infer_type_pipeline", "infer_structure")
+feature_structure.add_edge("infer_structure", lgraph.END)
+graph = feature_structure.compile()
+
+
+def run_infer_structure(path: str) -> dict:
+    """
+    Execute feature-structure inference end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Feature structure output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_infer_structure(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/infer_type.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/infer_type.py
new file mode 100644
index 000000000..e3fe05786
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/infer_type.py
@@ -0,0 +1,222 @@
+"""
+Import as:
+
+import src.ingest.infer_type as sinfert
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import pathlib
+from typing import Literal
+from typing import TypedDict
+
+import langgraph.graph as lgraph
+
+import src.ingest.format_datetime as sfordat
+import src.ingest.handle_inputs as shainp
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class SeriesTypeState(TypedDict):
+    """
+    Store the inferred series structure.
+    """
+
+    type: Literal["single", "multiple", "multivariate"]
+    primary_key: str
+    secondary_keys: list[str]
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for series-structure inference.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: Literal["single", "multiple", "multivariate"]
+    primary_key: str
+    secondary_keys: list[str]
+
+
+def call_input_handler(state: CompositeState) -> dict:
+    """
+    Run input handler and collect column metadata.
+
+    :param state: graph state
+    :return: column classification payload
+    """
+    dataset_path = pathlib.Path(state["path"])
+    dataset = tinptool.load_dataset(dataset_path)
+    out = shainp.run_input_handler(state["path"])
+    metadata = tinptool.extract_metadata.invoke({"path": state["path"]})
+    payload = {
+        "done": out.get("done") or [],
+        "has_header": bool(out.get("has_header", True)),
+        "has_missing_values": bool(out.get("has_missing_values", False)),
+        "error": str(out.get("error") or ""),
+        "info": str(out.get("info") or ""),
+        "cols": [str(col) for col in dataset.columns.tolist()],
+        "temporal_cols": out.get("temporal_cols") or [],
+        "numeric_val_cols": out.get("numeric_val_cols") or [],
+        "categorical_val_cols": out.get("categorical_val_cols") or [],
+        "bad_rows": out.get("bad_rows") or [],
+        "numeric_cols": out.get("numeric_val_cols") or [],
+        "metadata": metadata,
+    }
+    return payload
+
+
+def call_date_formatter(state: CompositeState) -> dict:
+    """
+    Run the datetime formatter graph.
+
+    :param state: graph state
+    :return: selected time column
+    """
+    out: sfordat.DateFormatterState = sfordat.graph.invoke(  # type: ignore
+        {"path": state["path"]}
+    )
+    payload = {
+        "time_col": out["time_col"],
+        "candidates": out.get("candidates") or [],
+        "winner_formatter": out.get("winner_formatter") or {},
+    }
+    return payload
+
+
+def infer_type(state: CompositeState) -> dict:
+    """
+    Infer whether the dataset is single-series, panel, or multivariate using
+    deterministic value-level evidence.
+
+    :param state: graph state
+    :return: inferred series structure
+    """
+    structure_assessment = shainp.assess_series_structure(
+        path=state["path"],
+        cols=state["cols"],
+        time_col=state["time_col"],
+        numeric_val_cols=state["numeric_val_cols"],
+        categorical_val_cols=state["categorical_val_cols"],
+        winner_formatter=state["winner_formatter"],
+    )
+    primary_key = state["time_col"]
+    secondary_keys = structure_assessment.get("secondary_keys") or []
+    if secondary_keys:
+        inferred_type: Literal["single", "multiple", "multivariate"] = "multiple"
+    elif len(state["numeric_val_cols"]) > 1:
+        inferred_type = "multivariate"
+    else:
+        inferred_type = "single"
+    trace_payload = {
+        "time_col": primary_key,
+        "structure_assessment": structure_assessment,
+        "inferred_type": inferred_type,
+        "secondary_keys": secondary_keys,
+    }
+    tinptool.write_stage_trace(state["path"], "infer_type", trace_payload)
+    payload = {
+        "type": inferred_type,
+        "primary_key": primary_key,
+        "secondary_keys": secondary_keys,
+        "entity_col": secondary_keys[0] if secondary_keys else None,
+    }
+    return payload
+
+
+series_type = lgraph.StateGraph(CompositeState)
+series_type.add_node("input_handler", call_input_handler)
+series_type.add_node("date_formatter", call_date_formatter)
+series_type.add_node("infer_type", infer_type)
+series_type.add_edge(lgraph.START, "input_handler")
+series_type.add_edge("input_handler", "date_formatter")
+series_type.add_edge("date_formatter", "infer_type")
+series_type.add_edge("infer_type", lgraph.END)
+graph = series_type.compile()
+
+
+def run_infer_type(path: str) -> dict:
+    """
+    Execute series-structure inference end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "single",
+        "primary_key": "",
+        "secondary_keys": [],
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Series type output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_infer_type(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/ingest/integrity.py b/agentic_eda/jupyterlab_extension_backend/src/ingest/integrity.py
new file mode 100644
index 000000000..71ee4670c
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/ingest/integrity.py
@@ -0,0 +1,402 @@
+"""
+Import as:
+
+import src.ingest.integrity as sinteg
+"""
+
+import logging
+import pathlib
+from typing import Literal
+from typing import TypedDict
+
+import langchain.agents as lagents
+import langchain_core.messages as lmessages
+import langgraph.graph as lgraph
+import pandas as pd
+import pydantic
+
+import src.config.config as cconf
+import src.ingest.format_datetime as sfordat
+import src.ingest.handle_inputs as shainp
+import src.ingest.infer_type as sinfert
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class IntegrityState(TypedDict):
+    """
+    Store graph state for integrity checks.
+    """
+
+    path: str
+    time_col: str | None
+    winner_formatter: dict
+    cols: list[str]
+    temporal_cols: list[str]
+    bad_rows: list[dict]
+    entity_col: str | None
+    numeric_cols: list[str]
+    categorical_val_cols: list[str]
+    metadata: dict
+    secondary_keys: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+
+
+class IntegrityJudgeOutput(pydantic.BaseModel):
+    """
+    Store structured LLM judgment.
+    """
+
+    summary: str
+    flag: Literal["yes", "no"]
+
+
+def call_date_formatter(state: IntegrityState) -> dict:
+    """
+    Run the datetime formatter graph.
+
+    :param state: integrity graph state
+    :return: selected time column and formatter
+    """
+    out: sfordat.DateFormatterState = sfordat.graph.invoke(  # type: ignore
+        {"path": state["path"]}
+    )
+    payload = {
+        "time_col": out["time_col"],
+        "winner_formatter": out["winner_formatter"],
+    }
+    return payload
+
+
+def _maybe_infer_columns(state: IntegrityState) -> dict:
+    """
+    Collect schema context needed by downstream integrity checks.
+
+    :param state: integrity graph state
+    :return: schema-related state updates
+    """
+    if (
+        state.get("cols")
+        and state.get("temporal_cols")
+        and state.get("numeric_cols")
+        and state.get("metadata")
+    ):
+        payload = {}
+    else:
+        dataset_path = pathlib.Path(state["path"])
+        dataset = tinptool.load_dataset(dataset_path)
+        out = shainp.run_input_handler(state["path"])
+        metadata = tinptool.extract_metadata.invoke({"path": state["path"]})
+        payload = {
+            "cols": [str(col) for col in dataset.columns.tolist()],
+            "temporal_cols": out.get("temporal_cols") or [],
+            "bad_rows": out.get("bad_rows") or [],
+            "numeric_cols": out.get("numeric_val_cols") or [],
+            "categorical_val_cols": out.get("categorical_val_cols") or [],
+            "metadata": metadata,
+        }
+    return payload
+
+
+def call_infer_type(state: IntegrityState) -> dict:
+    """
+    Infer the series structure and derive the temporary entity key.
+
+    :param state: integrity graph state
+    :return: inferred secondary keys and first entity key
+    """
+    infer_state: sinfert.CompositeState = {
+        "path": state["path"],
+        "cols": state.get("cols") or [],
+        "temporal_cols": state.get("temporal_cols") or [],
+        "numeric_val_cols": state.get("numeric_cols") or [],
+        "categorical_val_cols": state.get("categorical_val_cols") or [],
+        "bad_rows": state.get("bad_rows") or [],
+        "metadata": state.get("metadata") or {},
+        "time_col": state["time_col"] or "",
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "candidates": [],
+        "winner_formatter": state.get("winner_formatter") or {},
+        "entity_col": None,
+        "numeric_cols": state.get("numeric_cols") or [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "single",
+        "primary_key": "",
+        "secondary_keys": [],
+    }
+    out = sinfert.infer_type(infer_state)
+    secondary_keys = out.get("secondary_keys") or []
+    entity_col = secondary_keys[0] if secondary_keys else None
+    payload = {
+        "secondary_keys": secondary_keys,
+        "entity_col": entity_col,
+    }
+    return payload
+
+
+def run_integrity_checks(state: IntegrityState) -> dict:
+    """
+    Run deterministic integrity checks on a dataset.
+
+    :param state: integrity graph state
+    :return: report payload
+    """
+    dataset_path = pathlib.Path(state["path"])
+    dataset = tinptool.load_dataset(dataset_path)
+    issues: list[dict] = []
+    summary: dict = {
+        "n_rows": int(dataset.shape[0]),
+        "n_cols": int(dataset.shape[1]),
+    }
+    if dataset.shape[0] == 0:
+        issues.append({"type": "empty_dataset", "msg": "Dataset has 0 rows."})
+        report = {"summary": summary, "issues": issues}
+        payload = {"report": report}
+        return payload
+    time_col = state.get("time_col")
+    if time_col is None or time_col not in dataset.columns:
+        issues.append(
+            {
+                "type": "missing_time_col",
+                "msg": f"time_col missing: {time_col!r}",
+            }
+        )
+        report = {"summary": summary, "issues": issues}
+        payload = {"report": report}
+        return payload
+    format_args = state.get("winner_formatter") or {}
+    format_args = {
+        key: val
+        for key, val in format_args.items()
+        if val is not None
+    }
+    try:
+        timestamp = pd.to_datetime(
+            dataset[time_col],
+            errors="coerce",
+            **format_args,
+        )
+    except Exception:
+        timestamp = pd.to_datetime(dataset[time_col], errors="coerce")
+    summary["n_nat_time"] = int(timestamp.isna().sum())
+    summary["min_time"] = (
+        None if timestamp.dropna().empty else str(timestamp.dropna().min())
+    )
+    summary["max_time"] = (
+        None if timestamp.dropna().empty else str(timestamp.dropna().max())
+    )
+    duplicate_timestamps = int(timestamp.dropna().duplicated().sum())
+    summary["duplicate_timestamps"] = duplicate_timestamps
+    if duplicate_timestamps > 0:
+        issues.append(
+            {"type": "duplicate_timestamps", "count": duplicate_timestamps}
+        )
+    entity_col = state.get("entity_col")
+    # TODO: Use all inferred secondary_keys as a composite entity key for
+    # integrity checks; for now we temporarily use only the first key.
+    if entity_col is not None and entity_col in dataset.columns:
+        summary["n_entities"] = int(dataset[entity_col].nunique(dropna=True))
+        tmp = dataset[[entity_col]].copy()
+        tmp["_ts"] = timestamp
+        duplicate_pairs = int(
+            tmp.dropna(subset=[entity_col, "_ts"])
+            .duplicated(subset=[entity_col, "_ts"])
+            .sum()
+        )
+        summary["duplicate_entity_timestamp_pairs"] = duplicate_pairs
+        if duplicate_pairs > 0:
+            issues.append(
+                {
+                    "type": "duplicate_entity_timestamp_pairs",
+                    "count": duplicate_pairs,
+                }
+            )
+    else:
+        summary["duplicate_entity_timestamp_pairs"] = None
+    numeric_cols = [col for col in state.get("numeric_cols") or []]
+    numeric_cols = [col for col in numeric_cols if col in dataset.columns]
+    nonnegative_cols = [col for col in state.get("nonnegative_cols") or []]
+    negative_report: dict = {}
+    for col in nonnegative_cols:
+        if col not in dataset.columns:
+            continue
+        series = pd.to_numeric(dataset[col], errors="coerce")
+        n_negative = int((series < 0).sum(skipna=True))
+        if n_negative > 0:
+            negative_report[col] = n_negative
+    summary["negatives_in_nonnegative_cols"] = negative_report
+    if negative_report:
+        issues.append({"type": "negative_values", "details": negative_report})
+    jump_mult = float(state.get("jump_mult") or 20.0)
+    jumps: dict = {}
+    if numeric_cols:
+        selected_cols = [time_col]
+        if entity_col is not None and entity_col in dataset.columns:
+            selected_cols.append(entity_col)
+        selected_cols.extend(numeric_cols)
+        tmp = dataset[selected_cols].copy()
+        tmp["_ts"] = timestamp
+        if entity_col is None or entity_col not in tmp.columns:
+            sort_cols = ["_ts"]
+        else:
+            sort_cols = [entity_col, "_ts"]
+        tmp = tmp.sort_values(sort_cols)
+        for col in numeric_cols:
+            tmp[col] = pd.to_numeric(tmp[col], errors="coerce")
+            if entity_col is None or entity_col not in tmp.columns:
+                diff = tmp[col].diff()
+            else:
+                diff = tmp.groupby(entity_col)[col].diff()
+            diff_abs = diff.abs()
+            scale = diff_abs.median()
+            if pd.isna(scale) or float(scale) <= 0.0:
+                scale = diff_abs.mean()
+            if pd.isna(scale) or float(scale) <= 0.0:
+                continue
+            threshold = float(scale) * jump_mult
+            flagged = diff_abs > threshold
+            n_flagged = int(flagged.sum(skipna=True))
+            if n_flagged <= 0:
+                continue
+            examples: list[dict] = []
+            flagged_idx = tmp.index[flagged.fillna(False)][:5]
+            for idx in flagged_idx:
+                diff_val = diff.loc[idx]
+                curr_val = tmp.loc[idx, col]
+                if pd.isna(diff_val) or pd.isna(curr_val):
+                    prev_val = None
+                else:
+                    prev_val = float(curr_val - diff_val)
+                example = {
+                    "col": col,
+                    "entity": (
+                        None
+                        if entity_col is None or entity_col not in tmp.columns
+                        else tmp.loc[idx, entity_col]
+                    ),
+                    "time": (
+                        None
+                        if pd.isna(tmp.loc[idx, "_ts"])
+                        else str(tmp.loc[idx, "_ts"])
+                    ),
+                    "prev": prev_val,
+                    "curr": None if pd.isna(curr_val) else float(curr_val),
+                    "diff": None if pd.isna(diff_val) else float(diff_val),
+                    "threshold": float(threshold),
+                }
+                examples.append(example)
+            jumps[col] = {
+                "count": n_flagged,
+                "threshold": threshold,
+                "examples": examples,
+            }
+            issues.append(
+                {
+                    "type": "impossible_jumps",
+                    "col": col,
+                    "count": n_flagged,
+                }
+            )
+    summary["jump_mult"] = jump_mult
+    summary["jumps"] = jumps
+    report = {"summary": summary, "issues": issues}
+    payload = {"report": report}
+    return payload
+
+
+def integrity_llm_summary(state: IntegrityState) -> dict:
+    """
+    Summarize integrity report and provide go/no-go flag.
+
+    :param state: integrity graph state
+    :return: summary and decision flag
+    """
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[],
+        system_prompt=(
+            "You are an integrity judge. Decide if the dataset can proceed. "
+            "Return JSON with keys summary and flag. Set flag to yes only when "
+            "there are no meaningful integrity issues."
+        ),
+        response_format=IntegrityJudgeOutput,
+    )
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=f"Here is the integrity report: {state['report']}"
+                )
+            ]
+        }
+    )
+    structured_response = out["structured_response"].model_dump()
+    payload = {
+        "summary": structured_response["summary"],
+        "flag": structured_response["flag"],
+    }
+    return payload
+
+
+integrity = lgraph.StateGraph(IntegrityState)
+integrity.add_node("date_formatter", call_date_formatter)
+integrity.add_node("maybe_infer_columns", _maybe_infer_columns)
+integrity.add_node("infer_type", call_infer_type)
+integrity.add_node("run_integrity_checks", run_integrity_checks)
+integrity.add_node("integrity_llm_summary", integrity_llm_summary)
+integrity.add_edge(lgraph.START, "date_formatter")
+integrity.add_edge("date_formatter", "maybe_infer_columns")
+integrity.add_edge("maybe_infer_columns", "infer_type")
+integrity.add_edge("infer_type", "run_integrity_checks")
+integrity.add_edge("run_integrity_checks", "integrity_llm_summary")
+integrity.add_edge("integrity_llm_summary", lgraph.END)
+graph = integrity.compile()
+
+
+def run_integrity(path: str) -> dict:
+    """
+    Execute integrity graph end to end.
+
+    :param path: dataset path
+    :return: integrity report with summary and flag
+    """
+    init_state: IntegrityState = {
+        "path": path,
+        "time_col": None,
+        "winner_formatter": {},
+        "cols": [],
+        "temporal_cols": [],
+        "bad_rows": [],
+        "entity_col": None,
+        "numeric_cols": [],
+        "categorical_val_cols": [],
+        "metadata": {},
+        "secondary_keys": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+    }
+    out = graph.invoke(init_state)
+    payload = {
+        "report": out["report"],
+        "summary": out["summary"],
+        "flag": out["flag"],
+    }
+    _LOG.info("Integrity output: %s", payload)
+    return payload
diff --git a/agentic_eda/jupyterlab_extension_backend/src/main.py b/agentic_eda/jupyterlab_extension_backend/src/main.py
new file mode 100644
index 000000000..f5fd3e70f
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/main.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+"""
+Import as:
+
+import src.main as smain
+"""
+
+import argparse
+import json
+import logging
+
+import src.ingest.compute_temporal_stats as sctstats
+import src.ingest.format_datetime as sfordat
+import src.ingest.handle_inputs as shainp
+import src.ingest.infer_structure as sinferstruct
+import src.ingest.infer_type as sinfert
+import src.ingest.integrity as sinteg
+import src.quality_handling.audit_missingness as sauditmiss
+import src.quality_handling.handle_missingness as shandlemiss
+import src.quality_handling.standardize as sstandard
+import src.univariate_analysis.test_transforms as stransforms
+import src.univariate_analysis.univariate_metrics_plotting as sunivar
+
+_LOG = logging.getLogger(__name__)
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse CLI arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode",
+        required=True,
+        choices=[
+            "input",
+            "format",
+            "infer_type",
+            "infer_structure",
+            "compute_temporal_stats",
+            "integrity",
+            "audit_missingness",
+            "handle_missingness",
+            "standardize",
+            "univariate_metrics_plotting",
+            "test_transforms",
+        ],
+        help="Pipeline stage to execute.",
+    )
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+def _run_cli(args: argparse.Namespace) -> dict:
+    """
+    Execute selected backend stage.
+
+    :param args: parsed CLI args
+    :return: stage output payload
+    """
+    mode = args.mode
+    if mode == "input":
+        payload = shainp.run_input_handler(args.path)
+    elif mode == "format":
+        payload = sfordat.run_date_formatter(args.path)
+    elif mode == "integrity":
+        payload = sinteg.run_integrity(args.path)
+    elif mode == "infer_type":
+        payload = sinfert.run_infer_type(args.path)
+    elif mode == "infer_structure":
+        payload = sinferstruct.run_infer_structure(args.path)
+    elif mode == "compute_temporal_stats":
+        payload = sctstats.run_compute_temporal_stats(args.path)
+    elif mode == "audit_missingness":
+        payload = sauditmiss.run_audit_missingness(args.path)
+    elif mode == "handle_missingness":
+        payload = shandlemiss.run_handle_missingness(args.path)
+    elif mode == "standardize":
+        payload = sstandard.run_standardize(args.path)
+    elif mode == "univariate_metrics_plotting":
+        payload = sunivar.run_univariate_metrics_plotting(args.path)
+    elif mode == "test_transforms":
+        payload = stransforms.run_test_transforms(args.path)
+    else:
+        raise ValueError(f"Unsupported mode='{mode}'")
+    return payload
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli_args = _parse_args()
+    output = _run_cli(cli_args)
+    _LOG.info("Pipeline output: %s", json.dumps(output, default=str, indent=2))
diff --git a/agentic_eda/jupyterlab_extension_backend/src/quality_handling/__init__.py b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/__init__.py
new file mode 100644
index 000000000..b6cf94fe8
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/__init__.py
@@ -0,0 +1,3 @@
+"""
+Quality-handling stages and helpers for the Jupyter backend.
+"""
diff --git a/agentic_eda/jupyterlab_extension_backend/src/quality_handling/audit_missingness.py b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/audit_missingness.py
new file mode 100644
index 000000000..a037ca02a
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/audit_missingness.py
@@ -0,0 +1,209 @@
+"""
+Import as:
+
+import src.quality_handling.audit_missingness as sauditmiss
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import TypedDict
+
+import langgraph.graph as lgraph
+
+import src.ingest.compute_temporal_stats as sctstats
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class MissingnessAuditState(TypedDict):
+    """
+    Store deterministic missingness audit output.
+    """
+
+    missingness_report: dict
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for missingness auditing.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+    missingness_report: dict
+
+
+def call_compute_temporal_stats(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to temporal statistics.
+
+    :param state: graph state
+    :return: composite payload from compute_temporal_stats
+    """
+    payload = sctstats.run_compute_temporal_stats(state["path"])
+    return payload
+
+
+def audit_missingness(state: CompositeState) -> dict:
+    """
+    Audit value missingness and timestamp missingness deterministically.
+
+    :param state: graph state
+    :return: missingness report payload
+    """
+    missingness_report = tinptool.audit_missingness.invoke(
+        {
+            "path": state["path"],
+            "time_col": state["primary_key"],
+            "secondary_keys": state["secondary_keys"],
+            "winner_formatter": state["winner_formatter"],
+        }
+    )
+    trace_payload = {
+        "primary_key": state["primary_key"],
+        "secondary_keys": state["secondary_keys"],
+        "missingness_report": missingness_report,
+    }
+    tinptool.write_stage_trace(state["path"], "audit_missingness", trace_payload)
+    payload = {
+        "missingness_report": missingness_report,
+        "has_missing_values": bool(
+            missingness_report["value_missingness_summary"]["total_missing_cells"] > 0
+            or missingness_report["timestamp_missingness_summary"]["total_missing_timestamps"] > 0
+        ),
+    }
+    return payload
+
+
+missingness_audit = lgraph.StateGraph(CompositeState)
+missingness_audit.add_node("compute_temporal_stats_pipeline", call_compute_temporal_stats)
+missingness_audit.add_node("audit_missingness", audit_missingness)
+missingness_audit.add_edge(lgraph.START, "compute_temporal_stats_pipeline")
+missingness_audit.add_edge("compute_temporal_stats_pipeline", "audit_missingness")
+missingness_audit.add_edge("audit_missingness", lgraph.END)
+graph = missingness_audit.compile()
+
+
+def run_audit_missingness(path: str) -> dict:
+    """
+    Execute missingness auditing end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+        "n_nat_time": 0,
+        "min_time": None,
+        "max_time": None,
+        "typical_delta_mode": None,
+        "typical_delta_median": None,
+        "expected_frequency": None,
+        "dominant_frequency_fraction": 0.0,
+        "is_irregular_sampling": False,
+        "resampling_decision": "",
+        "coverage_summary": {},
+        "coverage_per_entity": [],
+        "missingness_report": {},
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Missingness audit output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_audit_missingness(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/quality_handling/handle_missingness.py b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/handle_missingness.py
new file mode 100644
index 000000000..325f1cdd4
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/handle_missingness.py
@@ -0,0 +1,386 @@
+"""
+Import as:
+
+import src.quality_handling.handle_missingness as shandlemiss
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import Literal
+from typing import TypedDict
+
+import langchain.agents as lagents
+import langchain_core.messages as lmessages
+import langgraph.graph as lgraph
+import pydantic
+
+import src.config.config as cconf
+import src.quality_handling.audit_missingness as sauditmiss
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+def _build_missingness_plan_summary(actions: list[dict], *, defaulted_cols: int) -> str:
+    """
+    Build a summary from the normalized missingness actions.
+
+    :param actions: normalized action list
+    :param defaulted_cols: number of columns defaulted during normalization
+    :return: summary text aligned with the final plan
+    """
+    if not actions:
+        return "No non-time columns required missingness handling."
+    counts: dict[str, int] = {}
+    for action in actions:
+        strategy = str(action["strategy"])
+        counts[strategy] = counts.get(strategy, 0) + 1
+    ordered_counts = ", ".join(
+        f"{strategy}={counts[strategy]}"
+        for strategy in sorted(counts)
+    )
+    summary = (
+        f"Normalized missingness plan for {len(actions)} columns: {ordered_counts}. "
+        "Actions reflect the final bounded plan after validation against eligible strategies."
+    )
+    if defaulted_cols > 0:
+        summary += f" {defaulted_cols} columns were defaulted conservatively during normalization."
+    return summary
+
+
+class MissingnessDecision(pydantic.BaseModel):
+    """
+    Store one bounded missingness decision.
+    """
+
+    col: str
+    strategy: Literal[
+        "leave_as_nan",
+        "forward_fill",
+        "interpolate",
+        "zero_fill",
+        "drop_rows",
+    ]
+    create_missingness_flag: bool = True
+    reason: str
+
+
+class MissingnessPlanOutput(pydantic.BaseModel):
+    """
+    Store LLM-produced missingness plan.
+    """
+
+    summary: str
+    actions: list[MissingnessDecision]
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for missingness handling.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+    missingness_report: dict
+    missingness_plan: dict
+    missingness_handling_report: dict
+    quality_dataset_path: str
+
+
+def call_audit_missingness(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to missingness auditing.
+
+    :param state: graph state
+    :return: composite payload from audit_missingness
+    """
+    payload = sauditmiss.run_audit_missingness(state["path"])
+    return payload
+
+
+def _normalize_missingness_plan(state: CompositeState, raw_plan: dict) -> dict:
+    """
+    Ensure every missing column has one supported action.
+
+    :param state: graph state
+    :param raw_plan: LLM-produced plan
+    :return: normalized deterministic plan
+    """
+    audit_report = state["missingness_report"]
+    missing_cols = [
+        item
+        for item in audit_report["value_missingness_by_column"]
+        if item["n_missing"] > 0 and item["col"] != state["primary_key"]
+    ]
+    eligible_by_col = {
+        item["col"]: set(item["eligible_strategies"])
+        for item in missing_cols
+    }
+    plan_by_col = {}
+    defaulted_cols = 0
+    for item in raw_plan.get("actions") or []:
+        col = str(item.get("col") or "")
+        if col not in eligible_by_col:
+            continue
+        strategy = str(item.get("strategy") or "leave_as_nan")
+        if strategy not in eligible_by_col[col]:
+            strategy = "leave_as_nan"
+        plan_by_col[col] = {
+            "col": col,
+            "strategy": strategy,
+            "create_missingness_flag": bool(item.get("create_missingness_flag", True)),
+            "reason": str(item.get("reason") or ""),
+        }
+    normalized_actions = []
+    for item in missing_cols:
+        col = item["col"]
+        action = plan_by_col.get(
+            col,
+            {
+                "col": col,
+                "strategy": "leave_as_nan",
+                "create_missingness_flag": True,
+                "reason": "Defaulted conservatively because no valid explicit plan was provided.",
+            },
+        )
+        normalized_actions.append(action)
+        if col not in plan_by_col:
+            defaulted_cols += 1
+    return {
+        "summary": _build_missingness_plan_summary(
+            normalized_actions,
+            defaulted_cols=defaulted_cols,
+        ),
+        "actions": normalized_actions,
+    }
+
+
+def choose_missingness_plan(state: CompositeState) -> dict:
+    """
+    Choose bounded missingness actions using deterministic evidence.
+
+    :param state: graph state
+    :return: normalized missingness plan
+    """
+    missing_cols = [
+        item
+        for item in state["missingness_report"]["value_missingness_by_column"]
+        if item["n_missing"] > 0 and item["col"] != state["primary_key"]
+    ]
+    if not missing_cols:
+        payload = {
+            "missingness_plan": {
+                "summary": "No non-time columns contain missing values requiring handling.",
+                "actions": [],
+            }
+        }
+        return payload
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[],
+        system_prompt=(
+            "You are a missingness planner for a time-series EDA backend. "
+            "Choose exactly one bounded strategy per column with missing values. "
+            "Allowed strategies are leave_as_nan, forward_fill, interpolate, "
+            "zero_fill, and drop_rows. Prefer conservative choices when the "
+            "evidence is weak. Use zero_fill only for true count-like variables "
+            "where structural zeros are plausible. Use interpolate only for "
+            "numeric columns. Use forward_fill for stateful or slowly varying "
+            "features when continuity is plausible. Missing timestamps are a "
+            "separate issue from missing cell values; do not pretend that a cell "
+            "imputation solves timestamp holes."
+        ),
+        response_format=MissingnessPlanOutput,
+    )
+    evidence = {
+        "series_type": state["type"],
+        "expected_frequency": state["expected_frequency"],
+        "is_irregular_sampling": state["is_irregular_sampling"],
+        "timestamp_missingness_summary": state["missingness_report"]["timestamp_missingness_summary"],
+        "columns_with_missing_values": missing_cols,
+        "numeric_continuous_cols": state["numeric_continuous_cols"],
+        "numeric_count_cols": state["numeric_count_cols"],
+        "binary_flag_cols": state["binary_flag_cols"],
+        "categorical_feature_cols": state["categorical_feature_cols"],
+    }
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=f"Plan missingness handling from this evidence: {evidence}"
+                )
+            ]
+        }
+    )
+    raw_plan = out["structured_response"].model_dump()
+    normalized_plan = _normalize_missingness_plan(state, raw_plan)
+    payload = {"missingness_plan": normalized_plan}
+    return payload
+
+
+def apply_missingness_plan(state: CompositeState) -> dict:
+    """
+    Apply the chosen missingness plan deterministically.
+
+    :param state: graph state
+    :return: handling report and output dataset path
+    """
+    handling_report = tinptool.apply_missingness_actions.invoke(
+        {
+            "source_path": state["path"],
+            "input_path": state["path"],
+            "time_col": state["primary_key"],
+            "secondary_keys": state["secondary_keys"],
+            "winner_formatter": state["winner_formatter"],
+            "actions": state["missingness_plan"]["actions"],
+        }
+    )
+    trace_payload = {
+        "missingness_plan": state["missingness_plan"],
+        "missingness_handling_report": handling_report,
+    }
+    tinptool.write_stage_trace(state["path"], "handle_missingness", trace_payload)
+    payload = {
+        "missingness_handling_report": handling_report,
+        "quality_dataset_path": handling_report["output_path"],
+    }
+    return payload
+
+
+missingness_handling = lgraph.StateGraph(CompositeState)
+missingness_handling.add_node("audit_missingness_pipeline", call_audit_missingness)
+missingness_handling.add_node("choose_missingness_plan", choose_missingness_plan)
+missingness_handling.add_node("apply_missingness_plan", apply_missingness_plan)
+missingness_handling.add_edge(lgraph.START, "audit_missingness_pipeline")
+missingness_handling.add_edge("audit_missingness_pipeline", "choose_missingness_plan")
+missingness_handling.add_edge("choose_missingness_plan", "apply_missingness_plan")
+missingness_handling.add_edge("apply_missingness_plan", lgraph.END)
+graph = missingness_handling.compile()
+
+
+def run_handle_missingness(path: str) -> dict:
+    """
+    Execute missingness handling end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+        "n_nat_time": 0,
+        "min_time": None,
+        "max_time": None,
+        "typical_delta_mode": None,
+        "typical_delta_median": None,
+        "expected_frequency": None,
+        "dominant_frequency_fraction": 0.0,
+        "is_irregular_sampling": False,
+        "resampling_decision": "",
+        "coverage_summary": {},
+        "coverage_per_entity": [],
+        "missingness_report": {},
+        "missingness_plan": {},
+        "missingness_handling_report": {},
+        "quality_dataset_path": "",
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Missingness handling output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_handle_missingness(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/quality_handling/standardize.py b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/standardize.py
new file mode 100644
index 000000000..0dab99163
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/quality_handling/standardize.py
@@ -0,0 +1,488 @@
+"""
+Import as:
+
+import src.quality_handling.standardize as sstandard
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import Literal
+from typing import TypedDict
+
+import langchain.agents as lagents
+import langchain_core.messages as lmessages
+import langgraph.graph as lgraph
+import pydantic
+
+import src.config.config as cconf
+import src.quality_handling.handle_missingness as shandlemiss
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+def _build_standardization_plan_summary(actions: list[dict], *, defaulted_cols: int) -> str:
+    """
+    Build a summary from the normalized standardization actions.
+
+    :param actions: normalized action list
+    :param defaulted_cols: number of columns defaulted during normalization
+    :return: summary text aligned with the final plan
+    """
+    if not actions:
+        return "No numeric candidate columns were selected for optional standardization."
+    counts: dict[str, int] = {}
+    for action in actions:
+        transform = str(action["action"])
+        counts[transform] = counts.get(transform, 0) + 1
+    ordered_counts = ", ".join(
+        f"{transform}={counts[transform]}"
+        for transform in sorted(counts)
+    )
+    summary = (
+        f"Normalized standardization plan for {len(actions)} columns: {ordered_counts}. "
+        "This summary reflects the final validated transform choices, not the raw LLM prose."
+    )
+    if defaulted_cols > 0:
+        summary += f" {defaulted_cols} columns defaulted conservatively to `none`."
+    return summary
+
+
+class StandardizationDecision(pydantic.BaseModel):
+    """
+    Store one bounded standardization decision.
+    """
+
+    col: str
+    action: Literal["none", "robust_scale", "log1p", "log1p_then_robust_scale"]
+    reason: str
+
+
+class StandardizationPlanOutput(pydantic.BaseModel):
+    """
+    Store LLM-produced standardization plan.
+    """
+
+    summary: str
+    actions: list[StandardizationDecision]
+
+
+class StandardizationGateOutput(pydantic.BaseModel):
+    """
+    Store the dataset-level standardization gate decision.
+    """
+
+    should_standardize: bool
+    reason: str
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for optional standardization.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+    missingness_report: dict
+    missingness_plan: dict
+    missingness_handling_report: dict
+    quality_dataset_path: str
+    standardization_profile: dict
+    standardization_gate: dict
+    standardization_plan: dict
+    standardization_report: dict
+    standardized_dataset_path: str
+
+
+def call_handle_missingness(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to missingness handling.
+
+    :param state: graph state
+    :return: composite payload from handle_missingness
+    """
+    payload = shandlemiss.run_handle_missingness(state["path"])
+    return payload
+
+
+def profile_standardization(state: CompositeState) -> dict:
+    """
+    Profile numeric feature scale and tail behavior deterministically.
+
+    :param state: graph state
+    :return: scale profile report
+    """
+    input_path = state["quality_dataset_path"] or state["path"]
+    profile = tinptool.profile_standardization_candidates.invoke(
+        {
+            "path": input_path,
+            "numeric_continuous_cols": state["numeric_continuous_cols"],
+            "numeric_count_cols": state["numeric_count_cols"],
+            "binary_flag_cols": state["binary_flag_cols"],
+        }
+    )
+    payload = {"standardization_profile": profile}
+    return payload
+
+
+def choose_standardization_gate(state: CompositeState) -> dict:
+    """
+    Decide whether optional standardization should run at all.
+
+    :param state: graph state
+    :return: dataset-level gate decision
+    """
+    per_column = state["standardization_profile"].get("per_column") or []
+    if not per_column:
+        return {
+            "standardization_gate": {
+                "should_standardize": False,
+                "reason": "No numeric candidate columns were available for optional standardization.",
+            }
+        }
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[],
+        system_prompt=(
+            "You are the gatekeeper for point 9 in a time-series EDA backend. "
+            "Decide whether optional standardization should run at all for this dataset. "
+            "Favor should_standardize=false unless there is strong evidence that rescaling "
+            "or log-scaling is genuinely useful. Favor false for raw exploratory analysis, "
+            "for SCADA or sensor-style datasets where physical units matter, and for cases "
+            "where leaving values untouched preserves interpretability. Favor true only when "
+            "scale disparities or heavy tails are severe enough that not transforming would "
+            "materially hinder comparison or downstream modeling."
+        ),
+        response_format=StandardizationGateOutput,
+    )
+    evidence = {
+        "series_type": state["type"],
+        "numeric_continuous_cols": state["numeric_continuous_cols"],
+        "numeric_count_cols": state["numeric_count_cols"],
+        "binary_flag_cols": state["binary_flag_cols"],
+        "scale_summary": state["standardization_profile"].get("scale_summary"),
+        "sample_profiles": per_column[:20],
+    }
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=f"Decide whether optional standardization should run from this evidence: {evidence}"
+                )
+            ]
+        }
+    )
+    gate = out["structured_response"].model_dump()
+    return {"standardization_gate": gate}
+
+
+def _normalize_standardization_plan(state: CompositeState, raw_plan: dict) -> dict:
+    """
+    Ensure every candidate column gets a supported transform decision.
+
+    :param state: graph state
+    :param raw_plan: LLM-produced plan
+    :return: normalized plan
+    """
+    per_column = state["standardization_profile"].get("per_column") or []
+    eligible_by_col = {
+        item["col"]: set(item["eligible_actions"])
+        for item in per_column
+    }
+    plan_by_col = {}
+    defaulted_cols = 0
+    for item in raw_plan.get("actions") or []:
+        col = str(item.get("col") or "")
+        if col not in eligible_by_col:
+            continue
+        action = str(item.get("action") or "none")
+        if action not in eligible_by_col[col]:
+            action = "none"
+        plan_by_col[col] = {
+            "col": col,
+            "action": action,
+            "reason": str(item.get("reason") or ""),
+        }
+    normalized_actions = []
+    for item in per_column:
+        col = item["col"]
+        if col not in plan_by_col:
+            defaulted_cols += 1
+        normalized_actions.append(
+            plan_by_col.get(
+                col,
+                {
+                    "col": col,
+                    "action": "none",
+                    "reason": "Defaulted conservatively because no valid transform was selected.",
+                },
+            )
+        )
+    return {
+        "summary": _build_standardization_plan_summary(
+            normalized_actions,
+            defaulted_cols=defaulted_cols,
+        ),
+        "actions": normalized_actions,
+    }
+
+
+def choose_standardization_plan(state: CompositeState) -> dict:
+    """
+    Choose whether optional standardization is justified.
+
+    :param state: graph state
+    :return: normalized standardization plan
+    """
+    gate = state.get("standardization_gate") or {}
+    if not bool(gate.get("should_standardize")):
+        payload = {
+            "standardization_plan": {
+                "summary": (
+                    "Dataset-level standardization gate returned `no`. "
+                    f"Reason: {str(gate.get('reason') or 'No reason provided.')}"
+                ),
+                "actions": [],
+            }
+        }
+        return payload
+    per_column = state["standardization_profile"].get("per_column") or []
+    if not per_column:
+        payload = {
+            "standardization_plan": {
+                "summary": "No numeric candidate columns were available for optional standardization.",
+                "actions": [],
+            }
+        }
+        return payload
+    llm = cconf.get_chat_model(model="gpt-4.1")
+    agent = lagents.create_agent(
+        model=llm,
+        tools=[],
+        system_prompt=(
+            "You are an optional standardization planner for a time-series EDA backend. "
+            "This stage is optional. Use action none unless there is a concrete reason "
+            "to transform a feature. Allowed actions are none, robust_scale, log1p, "
+            "and log1p_then_robust_scale. Favor none when evidence is weak. Favor "
+            "robust_scale for large cross-feature scale disparities. Favor log1p for "
+            "strongly right-skewed nonnegative features. Never invent new actions."
+        ),
+        response_format=StandardizationPlanOutput,
+    )
+    evidence = {
+        "series_type": state["type"],
+        "scale_summary": state["standardization_profile"].get("scale_summary"),
+        "per_column": per_column,
+    }
+    out = agent.invoke(
+        {
+            "messages": [
+                lmessages.HumanMessage(
+                    content=f"Choose optional standardization actions from this evidence: {evidence}"
+                )
+            ]
+        }
+    )
+    raw_plan = out["structured_response"].model_dump()
+    normalized_plan = _normalize_standardization_plan(state, raw_plan)
+    payload = {"standardization_plan": normalized_plan}
+    return payload
+
+
+def apply_standardization_plan(state: CompositeState) -> dict:
+    """
+    Apply the chosen standardization plan deterministically.
+
+    :param state: graph state
+    :return: transformation report and output path
+    """
+    input_path = state["quality_dataset_path"] or state["path"]
+    if not state["standardization_plan"]["actions"]:
+        report = {
+            "input_path": input_path,
+            "output_path": input_path,
+            "skipped": True,
+            "reason": state["standardization_plan"]["summary"],
+            "actions_applied": [],
+        }
+        trace_payload = {
+            "input_path": input_path,
+            "standardization_profile": state["standardization_profile"],
+            "standardization_gate": state.get("standardization_gate") or {},
+            "standardization_plan": state["standardization_plan"],
+            "standardization_report": report,
+        }
+        tinptool.write_stage_trace(state["path"], "standardize", trace_payload)
+        payload = {
+            "standardization_report": report,
+            "standardized_dataset_path": input_path,
+        }
+        return payload
+    report = tinptool.apply_standardization_actions.invoke(
+        {
+            "source_path": state["path"],
+            "input_path": input_path,
+            "actions": state["standardization_plan"]["actions"],
+        }
+    )
+    trace_payload = {
+        "input_path": input_path,
+        "standardization_profile": state["standardization_profile"],
+        "standardization_gate": state.get("standardization_gate") or {},
+        "standardization_plan": state["standardization_plan"],
+        "standardization_report": report,
+    }
+    tinptool.write_stage_trace(state["path"], "standardize", trace_payload)
+    payload = {
+        "standardization_report": report,
+        "standardized_dataset_path": report["output_path"],
+    }
+    return payload
+
+
+standardization = lgraph.StateGraph(CompositeState)
+standardization.add_node("handle_missingness_pipeline", call_handle_missingness)
+standardization.add_node("profile_standardization", profile_standardization)
+standardization.add_node("choose_standardization_gate", choose_standardization_gate)
+standardization.add_node("choose_standardization_plan", choose_standardization_plan)
+standardization.add_node("apply_standardization_plan", apply_standardization_plan)
+standardization.add_edge(lgraph.START, "handle_missingness_pipeline")
+standardization.add_edge("handle_missingness_pipeline", "profile_standardization")
+standardization.add_edge("profile_standardization", "choose_standardization_gate")
+standardization.add_edge("choose_standardization_gate", "choose_standardization_plan")
+standardization.add_edge("choose_standardization_plan", "apply_standardization_plan")
+standardization.add_edge("apply_standardization_plan", lgraph.END)
+graph = standardization.compile()
+
+
+def run_standardize(path: str) -> dict:
+    """
+    Execute optional standardization end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+        "n_nat_time": 0,
+        "min_time": None,
+        "max_time": None,
+        "typical_delta_mode": None,
+        "typical_delta_median": None,
+        "expected_frequency": None,
+        "dominant_frequency_fraction": 0.0,
+        "is_irregular_sampling": False,
+        "resampling_decision": "",
+        "coverage_summary": {},
+        "coverage_per_entity": [],
+        "missingness_report": {},
+        "missingness_plan": {},
+        "missingness_handling_report": {},
+        "quality_dataset_path": "",
+        "standardization_profile": {},
+        "standardization_gate": {},
+        "standardization_plan": {},
+        "standardization_report": {},
+        "standardized_dataset_path": "",
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Standardization output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_standardize(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/tools/__init__.py b/agentic_eda/jupyterlab_extension_backend/src/tools/__init__.py
new file mode 100644
index 000000000..46d455292
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/tools/__init__.py
@@ -0,0 +1,3 @@
+"""
+Backend tool package.
+"""
diff --git a/agentic_eda/jupyterlab_extension_backend/src/tools/input_tools.py b/agentic_eda/jupyterlab_extension_backend/src/tools/input_tools.py
new file mode 100644
index 000000000..3490b4811
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/tools/input_tools.py
@@ -0,0 +1,1970 @@
+"""
+Import as:
+
+import src.tools.input_tools as tinptool
+"""
+
+import json
+import itertools
+import pathlib
+import re
+from typing import Any
+
+import langchain.tools as ltools
+import numpy as np
+import pandas as pd
+import pydantic
+
+_VALID_HEADER_START_RE = re.compile(r"^[A-Za-z_]")
+
+
+def _trace_root() -> pathlib.Path:
+    """
+    Return the backend-level trace directory.
+
+    :return: absolute trace root
+    """
+    trace_root = pathlib.Path(__file__).resolve().parents[2] / "traces"
+    trace_root.mkdir(parents=True, exist_ok=True)
+    return trace_root
+
+
+def load_dataset(path: pathlib.Path) -> pd.DataFrame:
+    """
+    Load a supported dataset from disk.
+
+    :param path: path to dataset file
+    :return: dataset as dataframe
+    """
+    ext = path.suffix.lower()
+    if ext == ".csv":
+        dataset = pd.read_csv(path)
+    else:
+        raise ValueError(f"Unsupported file extension='{ext}'")
+    return dataset
+
+
+def _sample_values(series: pd.Series, *, limit: int = 5) -> list[str]:
+    """
+    Return a small deterministic sample of distinct non-null values.
+
+    Theory:
+    A short value sample gives downstream logic human-interpretable evidence
+    about whether a column behaves like a flag, identifier, category, or
+    free-form measurement, without depending on the column name alone.
+
+    :param series: input series
+    :param limit: max number of sample values
+    :return: stringified sample values
+    """
+    values: list[str] = []
+    seen: set[str] = set()
+    for value in series.dropna().tolist():
+        key = str(value)
+        if key in seen:
+            continue
+        seen.add(key)
+        values.append(key)
+        if len(values) >= limit:
+            break
+    return values
+
+
+def _normalized_non_null_fraction(series: pd.Series) -> float:
+    """
+    Compute the non-null fraction for a series.
+
+    Theory:
+    Missingness changes how much confidence we should place in any inferred
+    semantic role. Columns with very little observed data provide weak evidence
+    for type inference, so completeness is a foundational statistic.
+
+    :param series: input series
+    :return: non-null fraction
+    """
+    if len(series) == 0:
+        return 0.0
+    return float(series.notna().mean())
+
+
+def _coerce_numeric(series: pd.Series) -> pd.Series:
+    """
+    Convert a series to numeric values where possible.
+
+    Theory:
+    Many semantic distinctions begin with whether values actually behave like
+    numbers in the data, not whether the declared dtype says so. Numeric
+    coercion exposes columns that are numerically meaningful even when loaded
+    as strings.
+
+    :param series: input series
+    :return: numeric series with NaN for non-numeric values
+    """
+    return pd.to_numeric(series, errors="coerce")
+
+
+def _is_integer_like(series: pd.Series) -> bool:
+    """
+    Check whether numeric values are effectively integers.
+
+    Theory:
+    Count variables and encoded flags often live on the integers, whereas
+    continuous measurements usually do not. Integer support is therefore a
+    useful deterministic signal for separating counts from continuous values.
+
+    :param series: numeric-like series
+    :return: true when all observed values are close to integers
+    """
+    numeric = _coerce_numeric(series).dropna()
+    if numeric.empty:
+        return False
+    rounded = numeric.round()
+    return bool((numeric - rounded).abs().le(1e-9).all())
+
+
+def _is_binary_like(series: pd.Series) -> bool:
+    """
+    Check whether a column behaves like a binary flag.
+
+    Theory:
+    Binary indicators are characterized by two logical states regardless of
+    whether they are stored as booleans, strings, or numeric codes. Recognizing
+    this two-state support helps prevent flags from being misclassified as
+    general categoricals or counts.
+
+    :param series: input series
+    :return: true when the column has exactly two logical states
+    """
+    non_null = series.dropna()
+    if non_null.empty:
+        return False
+    unique_raw = {str(value).strip().lower() for value in non_null.unique()}
+    binary_vocab = {
+        "0",
+        "1",
+        "true",
+        "false",
+        "t",
+        "f",
+        "yes",
+        "no",
+        "y",
+        "n",
+    }
+    if unique_raw and unique_raw.issubset(binary_vocab) and len(unique_raw) <= 2:
+        return True
+    return len(unique_raw) == 2
+
+
+def _build_column_profiles(dataset: pd.DataFrame) -> dict[str, dict[str, Any]]:
+    """
+    Build deterministic per-column profiles used by downstream schema tools.
+
+    Theory:
+    Robust schema inference should summarize how each column behaves in the
+    observed data: completeness, cardinality, numeric support, integer support,
+    binary support, and value examples. Those empirical signals are what later
+    stages use to infer keys and semantic feature types in a reproducible way.
+
+    :param dataset: input dataframe
+    :return: map of column name to summary statistics
+    """
+    profiles: dict[str, dict[str, Any]] = {}
+    n_rows = int(dataset.shape[0])
+    for col in dataset.columns:
+        series = dataset[col]
+        non_null = series.dropna()
+        n_non_null = int(non_null.shape[0])
+        n_unique = int(non_null.nunique(dropna=True))
+        unique_ratio = 0.0 if n_non_null == 0 else float(n_unique / n_non_null)
+        numeric = _coerce_numeric(series)
+        numeric_non_null = numeric.dropna()
+        numeric_fraction = (
+            0.0 if n_non_null == 0 else float(numeric_non_null.shape[0] / n_non_null)
+        )
+        integer_like = _is_integer_like(series)
+        nonnegative_like = (
+            False
+            if numeric_non_null.empty
+            else bool((numeric_non_null >= 0).all())
+        )
+        profile = {
+            "dtype": str(series.dtype),
+            "n_rows": n_rows,
+            "n_non_null": n_non_null,
+            "non_null_fraction": _normalized_non_null_fraction(series),
+            "n_unique": n_unique,
+            "unique_ratio": unique_ratio,
+            "is_numeric_like": bool(numeric_fraction >= 0.95 and not numeric_non_null.empty),
+            "numeric_fraction": numeric_fraction,
+            "is_integer_like": integer_like,
+            "is_binary_like": _is_binary_like(series),
+            "is_nonnegative_like": nonnegative_like,
+            "sample_values": _sample_values(series),
+        }
+        if not numeric_non_null.empty:
+            profile["min_numeric"] = float(numeric_non_null.min())
+            profile["max_numeric"] = float(numeric_non_null.max())
+        else:
+            profile["min_numeric"] = None
+            profile["max_numeric"] = None
+        profiles[str(col)] = profile
+    return profiles
+
+
+def write_stage_trace(path: str, stage: str, payload: dict[str, Any]) -> str:
+    """
+    Persist diagnostic findings for one pipeline stage to a backend-local trace
+    file.
+
+    :param path: dataset path
+    :param stage: pipeline stage name
+    :param payload: JSON-serializable diagnostic payload
+    :return: absolute trace file path
+    """
+    dataset_path = pathlib.Path(path)
+    filename = f"{dataset_path.stem}.{stage}.json"
+    trace_path = _trace_root() / filename
+    trace_payload = {
+        "dataset_path": str(dataset_path),
+        "stage": stage,
+        "payload": payload,
+    }
+    trace_path.write_text(
+        json.dumps(trace_payload, default=str, indent=2),
+        encoding="utf-8",
+    )
+    return str(trace_path)
+
+
+def write_stage_dataset(path: str, stage: str, dataset: pd.DataFrame) -> str:
+    """
+    Persist a stage-produced dataset artifact alongside trace files.
+
+    :param path: source dataset path
+    :param stage: pipeline stage name
+    :param dataset: dataframe to serialize
+    :return: absolute output dataset path
+    """
+    dataset_path = pathlib.Path(path)
+    filename = f"{dataset_path.stem}.{stage}.csv"
+    output_path = _trace_root() / filename
+    dataset.to_csv(output_path, index=False)
+    return str(output_path)
+
+
+def write_stage_plot(path: str, stage: str, plot_name: str, fig: Any) -> str:
+    """
+    Persist a stage-produced plot under the backend trace directory.
+
+    :param path: source dataset path
+    :param stage: pipeline stage name
+    :param plot_name: plot-specific filename stem
+    :param fig: matplotlib figure
+    :return: absolute output plot path
+    """
+    dataset_path = pathlib.Path(path)
+    plot_dir = _trace_root() / f"{dataset_path.stem}.{stage}"
+    plot_dir.mkdir(parents=True, exist_ok=True)
+    safe_name = re.sub(r"[^A-Za-z0-9_.-]+", "_", plot_name).strip("_")
+    if not safe_name:
+        safe_name = "plot"
+    output_path = plot_dir / f"{safe_name}.png"
+    fig.savefig(output_path, dpi=140, bbox_inches="tight")
+    return str(output_path)
+
+
+def _parse_time_series(
+    dataset: pd.DataFrame,
+    time_col: str,
+    winner_formatter: dict[str, Any] | None = None,
+) -> pd.Series:
+    """
+    Parse the selected time column with the best-known formatter settings.
+
+    Theory:
+    Temporal statistics are only meaningful once the time axis has been mapped
+    into a consistent datetime representation. Reusing the formatter selected
+    earlier in the pipeline avoids accidental drift between schema inference and
+    downstream coverage/frequency calculations.
+
+    :param dataset: input dataframe
+    :param time_col: selected time column
+    :param winner_formatter: optional datetime parsing kwargs
+    :return: parsed timestamp series
+    """
+    format_args = winner_formatter or {}
+    format_args = {key: val for key, val in format_args.items() if val is not None}
+    try:
+        return pd.to_datetime(dataset[time_col], errors="coerce", **format_args)
+    except Exception:
+        return pd.to_datetime(dataset[time_col], errors="coerce")
+
+
+def _format_timedelta(delta: pd.Timedelta | None) -> str | None:
+    """
+    Convert a timedelta into a stable string representation.
+
+    Theory:
+    Frequency and gap summaries are easier to compare across stages when they
+    are rendered into a canonical textual duration rather than leaking pandas-
+    specific objects into the public payload.
+
+    :param delta: input timedelta
+    :return: normalized string or None
+    """
+    if delta is None or pd.isna(delta):
+        return None
+    return str(delta)
+
+
+def _series_identifier(keys: list[str], values: tuple[Any, ...]) -> dict[str, Any] | None:
+    """
+    Package one composite entity identifier as a JSON-friendly mapping.
+
+    Theory:
+    Coverage and frequency statistics are naturally computed per series. When a
+    panel uses composite entity keys, the identifier must preserve every key
+    component so the reported findings still point back to the original series.
+
+    :param keys: entity key column names
+    :param values: grouped key values
+    :return: key-value mapping or None for single-series data
+    """
+    if not keys:
+        return None
+    return {key: value for key, value in zip(keys, values, strict=True)}
+
+
+def _ordered_dataset(
+    dataset: pd.DataFrame,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    winner_formatter: dict[str, Any] | None = None,
+) -> pd.DataFrame:
+    """
+    Return a stable, time-aware ordering for sequential quality operations.
+
+    :param dataset: input dataframe
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param winner_formatter: optional datetime parsing kwargs
+    :return: ordered dataframe with helper columns
+    """
+    ordered = dataset.copy()
+    ordered["_row_order"] = range(int(ordered.shape[0]))
+    if time_col in ordered.columns:
+        ordered["_ts"] = _parse_time_series(ordered, time_col, winner_formatter)
+    else:
+        ordered["_ts"] = pd.NaT
+    valid_secondary_keys = [
+        key
+        for key in (secondary_keys or [])
+        if key in ordered.columns and key != time_col
+    ]
+    sort_cols = list(valid_secondary_keys)
+    if ordered["_ts"].notna().any():
+        sort_cols.append("_ts")
+    sort_cols.append("_row_order")
+    ordered = ordered.sort_values(sort_cols, na_position="last").reset_index(drop=True)
+    return ordered
+
+
+def _iter_series_frames(
+    dataset: pd.DataFrame,
+    secondary_keys: list[str] | None = None,
+) -> list[tuple[dict[str, Any] | None, pd.DataFrame]]:
+    """
+    Yield one frame per inferred series.
+
+    :param dataset: ordered dataframe
+    :param secondary_keys: optional entity keys
+    :return: list of entity/frame pairs
+    """
+    valid_secondary_keys = [
+        key for key in (secondary_keys or []) if key in dataset.columns
+    ]
+    if not valid_secondary_keys:
+        return [(None, dataset)]
+    items: list[tuple[dict[str, Any] | None, pd.DataFrame]] = []
+    grouped = dataset.groupby(valid_secondary_keys, dropna=False, sort=False)
+    for raw_key, frame in grouped:
+        key_tuple = raw_key if isinstance(raw_key, tuple) else (raw_key,)
+        items.append((_series_identifier(valid_secondary_keys, key_tuple), frame))
+    return items
+
+
+def _mask_run_lengths(mask: pd.Series) -> list[int]:
+    """
+    Return lengths of consecutive true runs in a boolean mask.
+
+    :param mask: boolean mask
+    :return: run lengths
+    """
+    run_lengths: list[int] = []
+    current = 0
+    for is_true in mask.fillna(False).astype(bool).tolist():
+        if is_true:
+            current += 1
+        elif current > 0:
+            run_lengths.append(current)
+            current = 0
+    if current > 0:
+        run_lengths.append(current)
+    return run_lengths
+
+
+def _safe_float(value: Any) -> float | None:
+    """
+    Convert a numeric-like value into a JSON-friendly float.
+
+    :param value: input value
+    :return: float or None
+    """
+    if value is None or pd.isna(value):
+        return None
+    return float(value)
+
+
+def _candidate_univariate_numeric_cols(
+    dataset: pd.DataFrame,
+    *,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    numeric_continuous_cols: list[str] | None = None,
+    numeric_count_cols: list[str] | None = None,
+    binary_flag_cols: list[str] | None = None,
+) -> list[str]:
+    """
+    Return deterministic numeric columns suitable for univariate analysis.
+
+    :param dataset: input dataframe
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param numeric_continuous_cols: inferred continuous numeric columns
+    :param numeric_count_cols: inferred count columns
+    :param binary_flag_cols: inferred binary columns
+    :return: ordered numeric analysis columns
+    """
+    excluded = {time_col, *(secondary_keys or [])}
+    candidates = list(
+        dict.fromkeys(
+            [
+                *[col for col in (numeric_continuous_cols or []) if col in dataset.columns],
+                *[col for col in (numeric_count_cols or []) if col in dataset.columns],
+                *[col for col in (binary_flag_cols or []) if col in dataset.columns],
+            ]
+        )
+    )
+    if not candidates:
+        candidates = [
+            str(col)
+            for col in dataset.columns
+            if str(col) not in excluded and pd.to_numeric(dataset[col], errors="coerce").notna().any()
+        ]
+    return [col for col in candidates if col not in excluded]
+
+
+def _tail_ratio(series: pd.Series) -> float | None:
+    """
+    Compute a simple deterministic tail ratio.
+
+    :param series: numeric series
+    :return: tail ratio or None
+    """
+    valid = pd.to_numeric(series, errors="coerce").dropna()
+    if valid.empty:
+        return None
+    p50 = valid.quantile(0.50)
+    p99 = valid.quantile(0.99)
+    if pd.isna(p50) or pd.isna(p99):
+        return None
+    if float(abs(p50)) <= 1e-12:
+        return None if float(abs(p99)) <= 1e-12 else float(abs(p99))
+    return float(abs(p99) / abs(p50))
+
+
+def _univariate_summary(series: pd.Series) -> dict[str, Any]:
+    """
+    Compute deterministic univariate summary statistics.
+
+    :param series: numeric-like series
+    :return: summary stats
+    """
+    numeric = pd.to_numeric(series, errors="coerce")
+    valid = numeric.dropna()
+    n_total = int(series.shape[0])
+    n_non_null = int(valid.shape[0])
+    n_missing = max(0, n_total - n_non_null)
+    if valid.empty:
+        return {
+            "n_total": n_total,
+            "n_non_null": 0,
+            "n_missing": n_missing,
+            "missing_pct": None if n_total == 0 else float(100.0 * n_missing / n_total),
+            "n_unique": 0,
+            "mean": None,
+            "std": None,
+            "min": None,
+            "p01": None,
+            "p05": None,
+            "p25": None,
+            "p50": None,
+            "p75": None,
+            "p95": None,
+            "p99": None,
+            "max": None,
+            "iqr": None,
+            "zero_fraction": None,
+            "skew": None,
+            "kurtosis": None,
+            "tail_ratio_p99_p50": None,
+        }
+    q = valid.quantile([0.01, 0.05, 0.25, 0.50, 0.75, 0.95, 0.99])
+    return {
+        "n_total": n_total,
+        "n_non_null": n_non_null,
+        "n_missing": n_missing,
+        "missing_pct": None if n_total == 0 else float(100.0 * n_missing / n_total),
+        "n_unique": int(valid.nunique(dropna=True)),
+        "mean": _safe_float(valid.mean()),
+        "std": _safe_float(valid.std()),
+        "min": _safe_float(valid.min()),
+        "p01": _safe_float(q.loc[0.01]),
+        "p05": _safe_float(q.loc[0.05]),
+        "p25": _safe_float(q.loc[0.25]),
+        "p50": _safe_float(q.loc[0.50]),
+        "p75": _safe_float(q.loc[0.75]),
+        "p95": _safe_float(q.loc[0.95]),
+        "p99": _safe_float(q.loc[0.99]),
+        "max": _safe_float(valid.max()),
+        "iqr": _safe_float(q.loc[0.75] - q.loc[0.25]),
+        "zero_fraction": float((valid == 0).mean()),
+        "skew": _safe_float(valid.skew()),
+        "kurtosis": _safe_float(valid.kurt()),
+        "tail_ratio_p99_p50": _tail_ratio(valid),
+    }
+
+
+def _gaussian_kde_curve(series: pd.Series, *, n_points: int = 256) -> tuple[np.ndarray, np.ndarray] | None:
+    """
+    Compute a simple Gaussian KDE curve without scipy.
+
+    :param series: numeric series
+    :param n_points: number of evaluation points
+    :return: x/y arrays or None when KDE is not appropriate
+    """
+    valid = pd.to_numeric(series, errors="coerce").dropna().to_numpy(dtype=float)
+    if valid.size < 30:
+        return None
+    unique = np.unique(valid)
+    if unique.size < 10:
+        return None
+    std = float(np.std(valid, ddof=1))
+    iqr = float(np.subtract(*np.percentile(valid, [75, 25])))
+    scale = min(std, iqr / 1.34) if iqr > 0.0 else std
+    if not np.isfinite(scale) or scale <= 0.0:
+        return None
+    bandwidth = 0.9 * scale * (valid.size ** (-1.0 / 5.0))
+    if not np.isfinite(bandwidth) or bandwidth <= 0.0:
+        return None
+    x_grid = np.linspace(float(valid.min()), float(valid.max()), n_points)
+    diffs = (x_grid[:, None] - valid[None, :]) / bandwidth
+    density = np.exp(-0.5 * diffs**2).sum(axis=1)
+    density /= float(valid.size * bandwidth * np.sqrt(2.0 * np.pi))
+    return x_grid, density
+
+
+def _transform_candidates(series: pd.Series) -> dict[str, pd.Series]:
+    """
+    Build deterministic transform candidates for one numeric series.
+
+    :param series: numeric series
+    :return: map of candidate name to transformed series
+    """
+    numeric = pd.to_numeric(series, errors="coerce")
+    candidates: dict[str, pd.Series] = {"none": numeric}
+    valid = numeric.dropna()
+    if valid.empty:
+        return candidates
+    candidates["cuberoot"] = numeric.apply(
+        lambda value: np.cbrt(value) if pd.notna(value) else value
+    )
+    if float(valid.min()) >= 0.0:
+        candidates["sqrt"] = numeric.apply(
+            lambda value: np.sqrt(value) if pd.notna(value) else value
+        )
+        candidates["log1p"] = pd.Series(np.log1p(numeric), index=numeric.index)
+    return candidates
+
+
+def _transform_score(series: pd.Series) -> dict[str, Any]:
+    """
+    Score one transformed series using deterministic shape criteria.
+
+    :param series: transformed numeric series
+    :return: score details
+    """
+    summary = _univariate_summary(series)
+    valid = pd.to_numeric(series, errors="coerce").dropna()
+    if valid.empty:
+        return {
+            "summary": summary,
+            "score": None,
+        }
+    abs_skew = abs(float(summary["skew"])) if summary["skew"] is not None else 99.0
+    abs_kurtosis = abs(float(summary["kurtosis"])) if summary["kurtosis"] is not None else 99.0
+    tail_ratio = float(summary["tail_ratio_p99_p50"]) if summary["tail_ratio_p99_p50"] is not None else 99.0
+    score = float(abs_skew + 0.25 * abs_kurtosis + 0.10 * tail_ratio)
+    return {
+        "summary": summary,
+        "score": score,
+    }
+
+
+class _TemporalStatsArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic temporal statistics.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    path: str
+    time_col: str
+    secondary_keys: list[str] | None = None
+    winner_formatter: dict[str, Any] | None = None
+
+
+@ltools.tool(args_schema=_TemporalStatsArgs)
+def compute_temporal_stats(
+    path: str,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    winner_formatter: dict[str, Any] | None = None,
+) -> dict:
+    """
+    Compute deterministic temporal range, coverage, and sampling-frequency
+    statistics.
+
+    Theory:
+    Time-series coverage is defined relative to an expected sampling interval.
+    Once the timestamps are parsed, the empirical deltas between consecutive
+    observations reveal the dominant cadence of the data. That cadence becomes
+    the expected frequency against which we can measure irregular sampling,
+    missing timestamps, longest gaps, and per-entity coverage. For panel data,
+    these statistics must be computed per entity (or per composite entity key),
+    because a dataset can be well covered overall while still containing weak or
+    sparse individual series.
+
+    :param path: dataset path
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param winner_formatter: optional datetime parsing kwargs
+    :return: temporal statistics payload
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    if time_col not in dataset.columns:
+        raise KeyError(f"time_col '{time_col}' not found in dataset")
+    secondary_keys = [
+        key for key in (secondary_keys or []) if key in dataset.columns and key != time_col
+    ]
+    timestamp = _parse_time_series(dataset, time_col, winner_formatter)
+    valid_rows = dataset.copy()
+    valid_rows["_ts"] = timestamp
+    valid_rows = valid_rows.dropna(subset=["_ts"])
+    if secondary_keys:
+        grouped_iter = valid_rows.groupby(secondary_keys, dropna=True)
+        group_items = list(grouped_iter)
+    else:
+        group_items = [(tuple(), valid_rows)]
+
+    all_deltas: list[pd.Timedelta] = []
+    per_entity: list[dict[str, Any]] = []
+    global_min = None if valid_rows.empty else valid_rows["_ts"].min()
+    global_max = None if valid_rows.empty else valid_rows["_ts"].max()
+
+    for raw_key, frame in group_items:
+        key_tuple = raw_key if isinstance(raw_key, tuple) else (raw_key,)
+        unique_ts = (
+            frame["_ts"].dropna().drop_duplicates().sort_values().reset_index(drop=True)
+        )
+        n_observed = int(unique_ts.shape[0])
+        if n_observed >= 2:
+            deltas = unique_ts.diff().dropna()
+            positive_deltas = deltas[deltas > pd.Timedelta(0)]
+        else:
+            positive_deltas = pd.Series(dtype="timedelta64[ns]")
+        all_deltas.extend(list(positive_deltas.tolist()))
+        per_entity.append(
+            {
+                "entity": _series_identifier(secondary_keys, key_tuple),
+                "n_observed_timestamps": n_observed,
+                "min_time": None if unique_ts.empty else str(unique_ts.min()),
+                "max_time": None if unique_ts.empty else str(unique_ts.max()),
+                "_positive_deltas": positive_deltas,
+            }
+        )
+
+    if all_deltas:
+        delta_series = pd.Series(all_deltas, dtype="timedelta64[ns]")
+        mode_candidates = delta_series.mode()
+        mode_delta = None if mode_candidates.empty else mode_candidates.iloc[0]
+        median_delta = delta_series.median()
+        dominant_fraction = (
+            0.0
+            if mode_delta is None
+            else float((delta_series == mode_delta).mean())
+        )
+        expected_delta = mode_delta if dominant_fraction >= 0.5 else median_delta
+        is_irregular_sampling = bool(
+            expected_delta is not None
+            and float((delta_series == expected_delta).mean()) < 0.8
+        )
+    else:
+        delta_series = pd.Series(dtype="timedelta64[ns]")
+        mode_delta = None
+        median_delta = None
+        dominant_fraction = 0.0
+        expected_delta = None
+        is_irregular_sampling = False
+
+    coverage_values: list[float] = []
+    total_gaps = 0
+    for item in per_entity:
+        positive_deltas = item.pop("_positive_deltas")
+        n_observed = item["n_observed_timestamps"]
+        if n_observed == 0 or expected_delta is None or pd.isna(expected_delta):
+            coverage_pct = None
+            n_expected = n_observed
+            gap_mask = pd.Series(dtype=bool)
+            longest_gap = None
+        else:
+            span = pd.Timestamp(item["max_time"]) - pd.Timestamp(item["min_time"])
+            if expected_delta <= pd.Timedelta(0):
+                n_expected = n_observed
+            else:
+                n_expected = int(span / expected_delta) + 1
+            n_expected = max(n_expected, n_observed, 1)
+            coverage_pct = float(100.0 * n_observed / n_expected)
+            gap_mask = positive_deltas > expected_delta
+            longest_gap = (
+                None if positive_deltas.empty else positive_deltas.max()
+            )
+        n_gaps = int(gap_mask.sum()) if not gap_mask.empty else 0
+        total_gaps += n_gaps
+        if coverage_pct is not None:
+            coverage_values.append(coverage_pct)
+        item["n_expected_timestamps"] = int(n_expected)
+        item["coverage_pct"] = coverage_pct
+        item["n_gaps"] = n_gaps
+        item["longest_gap"] = _format_timedelta(longest_gap)
+
+    if expected_delta is None:
+        resampling_decision = "insufficient_data"
+    elif is_irregular_sampling:
+        resampling_decision = "keep_irregular_gap_aware"
+    elif coverage_values and min(coverage_values) < 99.0:
+        resampling_decision = "resample_to_regular_grid"
+    else:
+        resampling_decision = "already_regular"
+
+    coverage_summary = {
+        "n_series": len(per_entity),
+        "mean_coverage_pct": (
+            None if not coverage_values else float(pd.Series(coverage_values).mean())
+        ),
+        "min_coverage_pct": (
+            None if not coverage_values else float(pd.Series(coverage_values).min())
+        ),
+        "max_coverage_pct": (
+            None if not coverage_values else float(pd.Series(coverage_values).max())
+        ),
+        "total_gaps": int(total_gaps),
+    }
+
+    return {
+        "time_col": time_col,
+        "secondary_keys": secondary_keys,
+        "n_nat_time": int(timestamp.isna().sum()),
+        "min_time": None if global_min is None else str(global_min),
+        "max_time": None if global_max is None else str(global_max),
+        "typical_delta_mode": _format_timedelta(mode_delta),
+        "typical_delta_median": _format_timedelta(median_delta),
+        "expected_frequency": _format_timedelta(expected_delta),
+        "dominant_frequency_fraction": dominant_fraction,
+        "is_irregular_sampling": is_irregular_sampling,
+        "resampling_decision": resampling_decision,
+        "coverage_summary": coverage_summary,
+        "coverage_per_entity": per_entity,
+    }
+
+
+class _MissingnessAuditArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic missingness auditing.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    path: str
+    time_col: str
+    secondary_keys: list[str] | None = None
+    winner_formatter: dict[str, Any] | None = None
+
+
+@ltools.tool(args_schema=_MissingnessAuditArgs)
+def audit_missingness(
+    path: str,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    winner_formatter: dict[str, Any] | None = None,
+) -> dict:
+    """
+    Audit missingness as two distinct problems: missing values and missing
+    timestamps.
+
+    Theory:
+    Missing cells inside observed rows and missing timestamps in the implied
+    time grid are different failure modes. Value missingness tells us which
+    variables are incomplete at observed observation times. Timestamp
+    missingness tells us whether observations are absent from the expected
+    sampling cadence. The former guides imputation choices per feature; the
+    latter guides reindexing, coverage assessment, and gap-aware modeling.
+
+    :param path: dataset path
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param winner_formatter: optional datetime parsing kwargs
+    :return: missingness audit payload
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    ordered = _ordered_dataset(
+        dataset,
+        time_col=time_col,
+        secondary_keys=secondary_keys,
+        winner_formatter=winner_formatter,
+    )
+    temporal_report = compute_temporal_stats.invoke(
+        {
+            "path": path,
+            "time_col": time_col,
+            "secondary_keys": secondary_keys or [],
+            "winner_formatter": winner_formatter or {},
+        }
+    )
+    profiles = _build_column_profiles(dataset)
+    value_missingness_by_column: list[dict[str, Any]] = []
+    total_missing_cells = int(dataset.isna().sum().sum())
+    all_series_frames = _iter_series_frames(ordered, secondary_keys)
+    for col in [str(value) for value in dataset.columns]:
+        missing_mask = dataset[col].isna()
+        n_missing = int(missing_mask.sum())
+        missing_pct = 0.0 if dataset.empty else float(100.0 * n_missing / len(dataset))
+        run_lengths: list[int] = []
+        for _, frame in all_series_frames:
+            frame_run_lengths = _mask_run_lengths(frame[col].isna())
+            run_lengths.extend(frame_run_lengths)
+        eligible_strategies = ["leave_as_nan", "drop_rows"]
+        profile = profiles[col]
+        if n_missing > 0 and col != time_col:
+            eligible_strategies.append("forward_fill")
+            if profile["is_numeric_like"]:
+                eligible_strategies.append("interpolate")
+            if (
+                profile["is_numeric_like"]
+                and profile["is_integer_like"]
+                and profile["is_nonnegative_like"]
+            ):
+                eligible_strategies.append("zero_fill")
+        value_missingness_by_column.append(
+            {
+                "col": col,
+                "dtype": profile["dtype"],
+                "n_missing": n_missing,
+                "missing_pct": missing_pct,
+                "n_missing_runs": int(len(run_lengths)),
+                "longest_missing_run": int(max(run_lengths, default=0)),
+                "eligible_strategies": eligible_strategies,
+                "sample_values": profile["sample_values"],
+            }
+        )
+    value_missingness_by_column.sort(
+        key=lambda item: (item["n_missing"], item["longest_missing_run"]),
+        reverse=True,
+    )
+    worst_value_col = next(
+        (item for item in value_missingness_by_column if item["n_missing"] > 0),
+        None,
+    )
+    timestamp_missingness_by_entity: list[dict[str, Any]] = []
+    total_expected_timestamps = 0
+    total_observed_timestamps = 0
+    total_missing_timestamps = 0
+    n_series_with_timestamp_gaps = 0
+    for item in temporal_report["coverage_per_entity"]:
+        n_observed = int(item.get("n_observed_timestamps") or 0)
+        n_expected = int(item.get("n_expected_timestamps") or n_observed)
+        n_missing_timestamps = max(0, n_expected - n_observed)
+        total_expected_timestamps += n_expected
+        total_observed_timestamps += n_observed
+        total_missing_timestamps += n_missing_timestamps
+        if n_missing_timestamps > 0:
+            n_series_with_timestamp_gaps += 1
+        timestamp_missingness_by_entity.append(
+            {
+                "entity": item.get("entity"),
+                "n_observed_timestamps": n_observed,
+                "n_expected_timestamps": n_expected,
+                "n_missing_timestamps": n_missing_timestamps,
+                "coverage_pct": item.get("coverage_pct"),
+                "n_gaps": int(item.get("n_gaps") or 0),
+                "longest_gap": item.get("longest_gap"),
+            }
+        )
+    timestamp_missingness_by_entity.sort(
+        key=lambda item: (
+            item["n_missing_timestamps"],
+            item["n_gaps"],
+            item["coverage_pct"] if item["coverage_pct"] is not None else -1.0,
+        ),
+        reverse=True,
+    )
+    return {
+        "time_col": time_col,
+        "secondary_keys": [
+            key for key in (secondary_keys or []) if key in dataset.columns and key != time_col
+        ],
+        "n_rows": int(dataset.shape[0]),
+        "n_cols": int(dataset.shape[1]),
+        "value_missingness_summary": {
+            "total_missing_cells": total_missing_cells,
+            "total_missing_fraction": (
+                0.0
+                if dataset.empty
+                else float(100.0 * total_missing_cells / max(1, int(dataset.size)))
+            ),
+            "columns_with_missing_values": int(sum(item["n_missing"] > 0 for item in value_missingness_by_column)),
+            "worst_column": None if worst_value_col is None else worst_value_col["col"],
+            "worst_column_missing_pct": (
+                None if worst_value_col is None else worst_value_col["missing_pct"]
+            ),
+        },
+        "value_missingness_by_column": value_missingness_by_column,
+        "timestamp_missingness_summary": {
+            "expected_frequency": temporal_report["expected_frequency"],
+            "is_irregular_sampling": temporal_report["is_irregular_sampling"],
+            "resampling_decision": temporal_report["resampling_decision"],
+            "n_nat_time": temporal_report["n_nat_time"],
+            "total_expected_timestamps": total_expected_timestamps,
+            "total_observed_timestamps": total_observed_timestamps,
+            "total_missing_timestamps": total_missing_timestamps,
+            "n_series_with_timestamp_gaps": n_series_with_timestamp_gaps,
+        },
+        "timestamp_missingness_by_entity": timestamp_missingness_by_entity,
+        "column_profiles": profiles,
+    }
+
+
+class MissingnessActionSpec(pydantic.BaseModel):
+    """
+    Store one bounded missingness action.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    col: str
+    strategy: str
+    create_missingness_flag: bool = True
+    reason: str = ""
+
+
+class _ApplyMissingnessActionsArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic missingness handling.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    source_path: str
+    input_path: str
+    time_col: str
+    secondary_keys: list[str] | None = None
+    winner_formatter: dict[str, Any] | None = None
+    actions: list[MissingnessActionSpec]
+
+
+@ltools.tool(args_schema=_ApplyMissingnessActionsArgs)
+def apply_missingness_actions(
+    source_path: str,
+    input_path: str,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    winner_formatter: dict[str, Any] | None = None,
+    actions: list[MissingnessActionSpec] | None = None,
+) -> dict:
+    """
+    Apply one bounded missingness strategy per selected column.
+
+    Theory:
+    The policy choice for each column may be ambiguous, but the mechanics of
+    applying a chosen action should be deterministic and reproducible. By
+    sorting within entity/time order, optionally adding missingness flags, and
+    then applying simple bounded transforms, the stage can record exactly what
+    changed without allowing the LLM to mutate data directly.
+
+    :param source_path: original dataset path used for artifact naming
+    :param input_path: dataset path to transform
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param winner_formatter: optional datetime parsing kwargs
+    :param actions: bounded per-column action plan
+    :return: transformation report with output dataset path
+    """
+    dataset = load_dataset(pathlib.Path(input_path))
+    working = _ordered_dataset(
+        dataset,
+        time_col=time_col,
+        secondary_keys=secondary_keys,
+        winner_formatter=winner_formatter,
+    )
+    valid_secondary_keys = [
+        key
+        for key in (secondary_keys or [])
+        if key in working.columns and key != time_col
+    ]
+    action_items = [item.model_dump() if isinstance(item, pydantic.BaseModel) else item for item in (actions or [])]
+    drop_mask = pd.Series(False, index=working.index)
+    applied_actions: list[dict[str, Any]] = []
+    for action in action_items:
+        col = str(action["col"])
+        strategy = str(action["strategy"])
+        create_missingness_flag = bool(action.get("create_missingness_flag", True))
+        reason = str(action.get("reason") or "")
+        if col not in working.columns:
+            applied_actions.append(
+                {
+                    "col": col,
+                    "strategy": strategy,
+                    "status": "skipped_missing_column",
+                    "reason": reason,
+                }
+            )
+            continue
+        before_mask = working[col].isna()
+        n_missing_before = int(before_mask.sum())
+        if create_missingness_flag and n_missing_before > 0:
+            flag_col = f"{col}__was_missing"
+            if flag_col not in working.columns:
+                working[flag_col] = before_mask.astype(int)
+        status = "applied"
+        if strategy == "leave_as_nan":
+            pass
+        elif strategy == "drop_rows":
+            drop_mask = drop_mask | before_mask
+        elif strategy == "forward_fill":
+            if valid_secondary_keys:
+                working[col] = working.groupby(valid_secondary_keys, dropna=False)[col].ffill()
+            else:
+                working[col] = working[col].ffill()
+        elif strategy == "interpolate":
+            numeric = pd.to_numeric(working[col], errors="coerce")
+            if valid_secondary_keys:
+                working[col] = working.groupby(valid_secondary_keys, dropna=False)[numeric.name].transform(
+                    lambda series: pd.to_numeric(series, errors="coerce").interpolate(
+                        limit_area="inside"
+                    )
+                )
+            else:
+                working[col] = numeric.interpolate(limit_area="inside")
+        elif strategy == "zero_fill":
+            numeric = pd.to_numeric(working[col], errors="coerce")
+            working[col] = numeric.fillna(0.0)
+        else:
+            status = "skipped_unsupported_strategy"
+        n_missing_after = int(working[col].isna().sum()) if col in working.columns else None
+        applied_actions.append(
+            {
+                "col": col,
+                "strategy": strategy,
+                "status": status,
+                "reason": reason,
+                "create_missingness_flag": create_missingness_flag,
+                "n_missing_before": n_missing_before,
+                "n_missing_after": n_missing_after,
+                "n_values_filled": None if n_missing_after is None else max(0, n_missing_before - n_missing_after),
+                "n_rows_marked_for_drop": int(before_mask.sum()) if strategy == "drop_rows" else 0,
+            }
+        )
+    n_rows_before = int(working.shape[0])
+    if bool(drop_mask.any()):
+        working = working.loc[~drop_mask].copy()
+    n_rows_after = int(working.shape[0])
+    n_rows_dropped = max(0, n_rows_before - n_rows_after)
+    remaining_missing_by_column = {
+        str(col): int(working[col].isna().sum())
+        for col in working.columns
+        if not str(col).startswith("_")
+    }
+    output_dataset = working.drop(columns=["_ts", "_row_order"], errors="ignore")
+    output_path = write_stage_dataset(source_path, "handle_missingness", output_dataset)
+    return {
+        "input_path": input_path,
+        "output_path": output_path,
+        "n_rows_before": n_rows_before,
+        "n_rows_after": n_rows_after,
+        "n_rows_dropped": n_rows_dropped,
+        "actions_applied": applied_actions,
+        "remaining_missing_by_column": remaining_missing_by_column,
+        "sorted_by": valid_secondary_keys + (["_ts"] if "_ts" in working.columns else []),
+    }
+
+
+class _ScaleProfileArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic scale profiling.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    path: str
+    numeric_continuous_cols: list[str] | None = None
+    numeric_count_cols: list[str] | None = None
+    binary_flag_cols: list[str] | None = None
+
+
+@ltools.tool(args_schema=_ScaleProfileArgs)
+def profile_standardization_candidates(
+    path: str,
+    numeric_continuous_cols: list[str] | None = None,
+    numeric_count_cols: list[str] | None = None,
+    binary_flag_cols: list[str] | None = None,
+) -> dict:
+    """
+    Profile scale and tail behavior for numeric features.
+
+    Theory:
+    Standardization is only justified when the observed numeric scales or tail
+    behaviors would otherwise distort comparisons or downstream models. Robust
+    scaling depends on median/IQR support, while `log1p` depends on nonnegative
+    support and heavy right tails. These properties can be measured
+    deterministically before the LLM decides whether the optional transform is
+    worth applying.
+
+    :param path: dataset path
+    :param numeric_continuous_cols: inferred continuous numeric columns
+    :param numeric_count_cols: inferred count-like numeric columns
+    :param binary_flag_cols: inferred binary columns to exclude
+    :return: per-column scale profile
+    """
+    dataset = load_dataset(pathlib.Path(path))
+    continuous = [col for col in (numeric_continuous_cols or []) if col in dataset.columns]
+    counts = [col for col in (numeric_count_cols or []) if col in dataset.columns]
+    excluded = {col for col in (binary_flag_cols or []) if col in dataset.columns}
+    candidate_cols = [col for col in continuous + counts if col not in excluded]
+    candidate_cols = list(dict.fromkeys(candidate_cols))
+    per_column: list[dict[str, Any]] = []
+    iqr_values: list[float] = []
+    for col in candidate_cols:
+        numeric = pd.to_numeric(dataset[col], errors="coerce").dropna()
+        if numeric.empty:
+            continue
+        median = numeric.median()
+        q1 = numeric.quantile(0.25)
+        q3 = numeric.quantile(0.75)
+        iqr = q3 - q1
+        p01 = numeric.quantile(0.01)
+        p50 = numeric.quantile(0.50)
+        p99 = numeric.quantile(0.99)
+        positive_fraction = float((numeric >= 0).mean())
+        zero_fraction = float((numeric == 0).mean())
+        abs_median = abs(float(median)) if not pd.isna(median) else 0.0
+        tail_ratio = None
+        if p50 > 0:
+            tail_ratio = float(p99 / p50)
+        if float(iqr) > 0.0:
+            iqr_values.append(float(iqr))
+        feature_bucket = "numeric_continuous" if col in continuous else "numeric_count"
+        eligible_actions = ["none"]
+        if float(iqr) > 0.0:
+            eligible_actions.append("robust_scale")
+        if float(numeric.min()) >= 0.0:
+            eligible_actions.append("log1p")
+        if "robust_scale" in eligible_actions and "log1p" in eligible_actions:
+            eligible_actions.append("log1p_then_robust_scale")
+        per_column.append(
+            {
+                "col": col,
+                "feature_bucket": feature_bucket,
+                "n_non_null": int(numeric.shape[0]),
+                "min": _safe_float(numeric.min()),
+                "max": _safe_float(numeric.max()),
+                "mean": _safe_float(numeric.mean()),
+                "std": _safe_float(numeric.std()),
+                "median": _safe_float(median),
+                "iqr": _safe_float(iqr),
+                "p01": _safe_float(p01),
+                "p50": _safe_float(p50),
+                "p99": _safe_float(p99),
+                "zero_fraction": zero_fraction,
+                "positive_fraction": positive_fraction,
+                "skew": _safe_float(numeric.skew()),
+                "tail_ratio_p99_p50": None if tail_ratio is None else tail_ratio,
+                "scale_span": _safe_float(numeric.max() - numeric.min()),
+                "relative_iqr_to_median": None if abs_median <= 0.0 else float(iqr / abs_median),
+                "eligible_actions": eligible_actions,
+            }
+        )
+    positive_iqrs = [value for value in iqr_values if value > 0.0]
+    return {
+        "path": path,
+        "candidate_cols": [item["col"] for item in per_column],
+        "n_candidate_cols": len(per_column),
+        "scale_summary": {
+            "max_iqr": None if not positive_iqrs else float(max(positive_iqrs)),
+            "min_positive_iqr": None if not positive_iqrs else float(min(positive_iqrs)),
+            "iqr_ratio_max_to_min": (
+                None
+                if len(positive_iqrs) < 2 or min(positive_iqrs) <= 0.0
+                else float(max(positive_iqrs) / min(positive_iqrs))
+            ),
+            "n_nontrivial_log_candidates": int(
+                sum(
+                    (
+                        item["min"] is not None
+                        and item["min"] >= 0.0
+                        and item["tail_ratio_p99_p50"] is not None
+                        and item["tail_ratio_p99_p50"] >= 5.0
+                    )
+                    for item in per_column
+                )
+            ),
+        },
+        "per_column": per_column,
+    }
+
+
+class StandardizationActionSpec(pydantic.BaseModel):
+    """
+    Store one bounded standardization action.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    col: str
+    action: str
+    reason: str = ""
+
+
+class _ApplyStandardizationArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic standardization.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    source_path: str
+    input_path: str
+    actions: list[StandardizationActionSpec]
+
+
+@ltools.tool(args_schema=_ApplyStandardizationArgs)
+def apply_standardization_actions(
+    source_path: str,
+    input_path: str,
+    actions: list[StandardizationActionSpec] | None = None,
+) -> dict:
+    """
+    Apply bounded numeric transforms deterministically.
+
+    Theory:
+    Whether a transform is desirable is an interpretive decision, but the
+    transform itself should be a pure function of the observed column values and
+    recorded parameters. Persisting medians, IQRs, and log usage makes the
+    optional stage reproducible and auditable.
+
+    :param source_path: original dataset path used for artifact naming
+    :param input_path: dataset path to transform
+    :param actions: bounded per-column transformation plan
+    :return: transformation report with output dataset path
+    """
+    dataset = load_dataset(pathlib.Path(input_path)).copy()
+    action_items = [item.model_dump() if isinstance(item, pydantic.BaseModel) else item for item in (actions or [])]
+    applied_actions: list[dict[str, Any]] = []
+    for action in action_items:
+        col = str(action["col"])
+        transform = str(action["action"])
+        reason = str(action.get("reason") or "")
+        if col not in dataset.columns:
+            applied_actions.append(
+                {
+                    "col": col,
+                    "action": transform,
+                    "status": "skipped_missing_column",
+                    "reason": reason,
+                }
+            )
+            continue
+        numeric = pd.to_numeric(dataset[col], errors="coerce")
+        valid = numeric.dropna()
+        if valid.empty:
+            applied_actions.append(
+                {
+                    "col": col,
+                    "action": transform,
+                    "status": "skipped_no_numeric_values",
+                    "reason": reason,
+                }
+            )
+            continue
+        params: dict[str, Any] = {}
+        transformed = numeric.copy()
+        status = "applied"
+        if transform == "none":
+            pass
+        elif transform == "robust_scale":
+            median = valid.median()
+            q1 = valid.quantile(0.25)
+            q3 = valid.quantile(0.75)
+            iqr = q3 - q1
+            if float(iqr) <= 0.0:
+                status = "skipped_zero_iqr"
+            else:
+                transformed = (numeric - median) / iqr
+                params = {"median": float(median), "iqr": float(iqr)}
+        elif transform == "log1p":
+            if float(valid.min()) < 0.0:
+                status = "skipped_negative_values"
+            else:
+                transformed = pd.Series(np.log1p(numeric), index=numeric.index)
+                params = {"log1p": True}
+        elif transform == "log1p_then_robust_scale":
+            if float(valid.min()) < 0.0:
+                status = "skipped_negative_values"
+            else:
+                logged = pd.Series(np.log1p(numeric), index=numeric.index)
+                logged_valid = logged.dropna()
+                median = logged_valid.median()
+                q1 = logged_valid.quantile(0.25)
+                q3 = logged_valid.quantile(0.75)
+                iqr = q3 - q1
+                if float(iqr) <= 0.0:
+                    status = "skipped_zero_iqr_after_log1p"
+                else:
+                    transformed = (logged - median) / iqr
+                    params = {
+                        "log1p": True,
+                        "median_after_log1p": float(median),
+                        "iqr_after_log1p": float(iqr),
+                    }
+        else:
+            status = "skipped_unsupported_action"
+        if status == "applied":
+            dataset[col] = transformed
+        applied_actions.append(
+            {
+                "col": col,
+                "action": transform,
+                "status": status,
+                "reason": reason,
+                "params": params,
+            }
+        )
+    output_path = write_stage_dataset(source_path, "standardize", dataset)
+    return {
+        "input_path": input_path,
+        "output_path": output_path,
+        "actions_applied": applied_actions,
+    }
+
+
+class _UnivariateAnalysisArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic univariate analysis.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    source_path: str
+    input_path: str
+    time_col: str
+    secondary_keys: list[str] | None = None
+    numeric_continuous_cols: list[str] | None = None
+    numeric_count_cols: list[str] | None = None
+    binary_flag_cols: list[str] | None = None
+
+
+@ltools.tool(args_schema=_UnivariateAnalysisArgs)
+def compute_univariate_metrics_and_plots(
+    source_path: str,
+    input_path: str,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    numeric_continuous_cols: list[str] | None = None,
+    numeric_count_cols: list[str] | None = None,
+    binary_flag_cols: list[str] | None = None,
+) -> dict:
+    """
+    Compute deterministic univariate metrics and produce per-column plots.
+
+    Theory:
+    Univariate EDA starts by measuring one feature at a time. Summary metrics
+    expose support, spread, skew, missingness, and tail behavior, while
+    histogram/ECDF/KDE plots show what "normal values" look like. For panel
+    data, per-entity summaries are also useful because a few odd entities can
+    hide inside an otherwise normal aggregate distribution.
+
+    :param source_path: original dataset path used for artifact naming
+    :param input_path: dataset path to analyze
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param numeric_continuous_cols: inferred continuous numeric columns
+    :param numeric_count_cols: inferred count columns
+    :param binary_flag_cols: inferred binary columns
+    :return: summary report and plot manifest
+    """
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+
+    dataset = load_dataset(pathlib.Path(input_path))
+    candidate_cols = _candidate_univariate_numeric_cols(
+        dataset,
+        time_col=time_col,
+        secondary_keys=secondary_keys,
+        numeric_continuous_cols=numeric_continuous_cols,
+        numeric_count_cols=numeric_count_cols,
+        binary_flag_cols=binary_flag_cols,
+    )
+    overall_feature_summaries: list[dict[str, Any]] = []
+    per_entity_feature_summaries: list[dict[str, Any]] = []
+    plot_manifest: list[dict[str, Any]] = []
+    valid_secondary_keys = [
+        key for key in (secondary_keys or []) if key in dataset.columns and key != time_col
+    ]
+    for col in candidate_cols:
+        summary = _univariate_summary(dataset[col])
+        summary["col"] = col
+        summary["feature_bucket"] = (
+            "numeric_continuous"
+            if col in (numeric_continuous_cols or [])
+            else "numeric_count"
+            if col in (numeric_count_cols or [])
+            else "binary_flag"
+            if col in (binary_flag_cols or [])
+            else "numeric"
+        )
+        overall_feature_summaries.append(summary)
+
+        numeric = pd.to_numeric(dataset[col], errors="coerce").dropna()
+        fig, axes = plt.subplots(1, 2, figsize=(10, 3.8))
+        if numeric.empty:
+            axes[0].text(0.5, 0.5, "No numeric observations", ha="center", va="center")
+            axes[0].set_axis_off()
+            axes[1].text(0.5, 0.5, "No numeric observations", ha="center", va="center")
+            axes[1].set_axis_off()
+            kde_plotted = False
+        else:
+            n_bins = int(min(50, max(10, np.sqrt(numeric.shape[0]))))
+            axes[0].hist(numeric, bins=n_bins, color="#4472C4", alpha=0.75, density=True)
+            kde_curve = _gaussian_kde_curve(numeric)
+            kde_plotted = kde_curve is not None
+            if kde_curve is not None:
+                x_grid, density = kde_curve
+                axes[0].plot(x_grid, density, color="#D62728", linewidth=1.5)
+            sorted_vals = np.sort(numeric.to_numpy(dtype=float))
+            y_ecdf = np.arange(1, sorted_vals.size + 1) / float(sorted_vals.size)
+            axes[1].step(sorted_vals, y_ecdf, where="post", color="#2CA02C", linewidth=1.5)
+            axes[1].set_ylim(0.0, 1.0)
+        axes[0].set_title(f"{col} histogram")
+        axes[1].set_title(f"{col} ECDF")
+        fig.suptitle(
+            f"{col} | skew={summary['skew']} | tail_ratio={summary['tail_ratio_p99_p50']}",
+            fontsize=10,
+        )
+        plot_path = write_stage_plot(source_path, "univariate_metrics_plotting", f"{col}.distribution", fig)
+        plt.close(fig)
+        plot_manifest.append(
+            {
+                "col": col,
+                "plot_path": plot_path,
+                "kde_plotted": kde_plotted,
+            }
+        )
+
+        if valid_secondary_keys:
+            grouped = dataset.groupby(valid_secondary_keys, dropna=False, sort=False)
+            for raw_key, frame in grouped:
+                key_tuple = raw_key if isinstance(raw_key, tuple) else (raw_key,)
+                entity = _series_identifier(valid_secondary_keys, key_tuple)
+                entity_summary = _univariate_summary(frame[col])
+                entity_summary["col"] = col
+                entity_summary["entity"] = entity
+                per_entity_feature_summaries.append(entity_summary)
+
+    overall_feature_summaries.sort(
+        key=lambda item: (
+            item["missing_pct"] if item["missing_pct"] is not None else -1.0,
+            abs(item["skew"]) if item["skew"] is not None else -1.0,
+            item["tail_ratio_p99_p50"] if item["tail_ratio_p99_p50"] is not None else -1.0,
+        ),
+        reverse=True,
+    )
+    return {
+        "input_path": input_path,
+        "analysis_numeric_cols": candidate_cols,
+        "overall_feature_summaries": overall_feature_summaries,
+        "per_entity_feature_summaries": per_entity_feature_summaries,
+        "plot_manifest": plot_manifest,
+    }
+
+
+class _TransformTestArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic transform testing.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    source_path: str
+    input_path: str
+    time_col: str
+    secondary_keys: list[str] | None = None
+    numeric_continuous_cols: list[str] | None = None
+    numeric_count_cols: list[str] | None = None
+    binary_flag_cols: list[str] | None = None
+
+
+@ltools.tool(args_schema=_TransformTestArgs)
+def test_univariate_transforms(
+    source_path: str,
+    input_path: str,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+    numeric_continuous_cols: list[str] | None = None,
+    numeric_count_cols: list[str] | None = None,
+    binary_flag_cols: list[str] | None = None,
+) -> dict:
+    """
+    Deterministically compare candidate transforms for skewed or heavy-tailed
+    numeric features.
+
+    Theory:
+    Transform testing should only run when there is enough empirical evidence
+    that raw values may violate practical modeling assumptions or obscure
+    univariate structure. The decision can be made deterministically from
+    summary shape metrics such as skewness and tail ratios. Candidate transforms
+    are then compared by how much they reduce those distortions.
+
+    :param source_path: original dataset path used for trace naming
+    :param input_path: dataset path to analyze
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns
+    :param numeric_continuous_cols: inferred continuous numeric columns
+    :param numeric_count_cols: inferred count columns
+    :param binary_flag_cols: inferred binary columns
+    :return: transform test report
+    """
+    dataset = load_dataset(pathlib.Path(input_path))
+    candidate_cols = _candidate_univariate_numeric_cols(
+        dataset,
+        time_col=time_col,
+        secondary_keys=secondary_keys,
+        numeric_continuous_cols=numeric_continuous_cols,
+        numeric_count_cols=numeric_count_cols,
+        binary_flag_cols=binary_flag_cols,
+    )
+    tested_columns: list[dict[str, Any]] = []
+    skipped_columns: list[dict[str, Any]] = []
+    for col in candidate_cols:
+        numeric = pd.to_numeric(dataset[col], errors="coerce")
+        base_summary = _univariate_summary(numeric)
+        n_non_null = int(base_summary["n_non_null"])
+        abs_skew = abs(float(base_summary["skew"])) if base_summary["skew"] is not None else 0.0
+        tail_ratio = float(base_summary["tail_ratio_p99_p50"]) if base_summary["tail_ratio_p99_p50"] is not None else 0.0
+        should_test = bool(
+            n_non_null >= 30
+            and (
+                abs_skew >= 1.0
+                or tail_ratio >= 4.0
+            )
+        )
+        if not should_test:
+            skipped_columns.append(
+                {
+                    "col": col,
+                    "reason": (
+                        "Insufficient deterministic evidence for transform testing. "
+                        f"n_non_null={n_non_null}, abs_skew={abs_skew:.3f}, tail_ratio={tail_ratio:.3f}"
+                    ),
+                    "base_summary": base_summary,
+                }
+            )
+            continue
+        candidate_scores: list[dict[str, Any]] = []
+        for name, transformed in _transform_candidates(numeric).items():
+            score_payload = _transform_score(transformed)
+            candidate_scores.append(
+                {
+                    "transform": name,
+                    "score": score_payload["score"],
+                    "summary": score_payload["summary"],
+                }
+            )
+        valid_scores = [item for item in candidate_scores if item["score"] is not None]
+        valid_scores.sort(key=lambda item: float(item["score"]))
+        best = valid_scores[0]
+        baseline = next(item for item in valid_scores if item["transform"] == "none")
+        improvement = float(baseline["score"] - best["score"])
+        if best["transform"] == "none" or improvement < 0.25:
+            recommendation = "none"
+            reason = (
+                "Candidate transforms did not materially improve deterministic shape metrics "
+                f"(best_improvement={improvement:.3f})."
+            )
+        else:
+            recommendation = best["transform"]
+            reason = (
+                f"{best['transform']} best reduced deterministic shape distortion "
+                f"(baseline_score={baseline['score']:.3f}, best_score={best['score']:.3f})."
+            )
+        tested_columns.append(
+            {
+                "col": col,
+                "base_summary": base_summary,
+                "candidate_scores": valid_scores,
+                "recommended_transform": recommendation,
+                "improvement_over_none": improvement,
+                "reason": reason,
+            }
+        )
+    payload = {
+        "input_path": input_path,
+        "n_candidate_cols": len(candidate_cols),
+        "n_tested_cols": len(tested_columns),
+        "n_skipped_cols": len(skipped_columns),
+        "tested_columns": tested_columns,
+        "skipped_columns": skipped_columns,
+    }
+    write_stage_trace(source_path, "test_transforms", payload)
+    return payload
+
+
+def analyze_header(state: dict) -> dict:
+    """
+    Validate dataset headers.
+
+    :param state: graph state containing dataset path
+    :return: updated state fields with header status
+    """
+    path = pathlib.Path(str(state["path"]))
+    dataset = load_dataset(path)
+    cols = list(dataset.columns)
+    has_header = True
+    error = ""
+    if (
+        all(isinstance(col, int) for col in cols)
+        and cols == list(range(len(cols)))
+    ):
+        has_header = False
+        error = "No column names."
+    else:
+        for col in cols:
+            if col is None:
+                has_header = False
+                error = "One or more column names missing."
+                break
+            col_name = str(col).strip()
+            if col_name == "":
+                has_header = False
+                error = "One or more column names missing."
+                break
+            if (
+                col_name[0].isdigit()
+                or not _VALID_HEADER_START_RE.match(col_name)
+            ):
+                has_header = False
+                error = (
+                    "One or more column names start with invalid characters."
+                )
+                break
+    if has_header:
+        result = {"has_header": has_header, "dataset": dataset}
+    else:
+        result = {"has_header": has_header, "error": error}
+    return result
+
+
+@ltools.tool
+def extract_metadata(path: str) -> dict:
+    """
+    Return minimal dataset metadata.
+
+    :param path: dataset path
+    :return: metadata with shape and per-column cardinality
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    n_rows, n_cols = dataset.shape
+    n_unique = dataset.nunique(dropna=True)
+    n_unique_map = {str(col): int(n_unique[col]) for col in n_unique.index}
+    metadata = {
+        "n_rows": int(n_rows),
+        "n_cols": int(n_cols),
+        "n_unique": n_unique_map,
+    }
+    return metadata
+
+
+@ltools.tool
+def extract_column_profiles(path: str) -> dict:
+    """
+    Profile each column using value-level statistics rather than relying on
+    names alone.
+
+    Theory:
+    Semantic feature inference becomes more robust when it is grounded in
+    empirical column behavior. Binary flags tend to have two states, counts
+    tend to be nonnegative integers, continuous measurements usually have many
+    distinct real-valued observations, and identifiers often repeat but are not
+    numeric measurements. These profile statistics give later stages stable
+    evidence even when column names are unhelpful.
+
+    :param path: dataset path
+    :return: per-column profile map
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    profiles = _build_column_profiles(dataset)
+    return {"column_profiles": profiles}
+
+
+class _EntityCandidateArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic entity-key scoring.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    path: str
+    time_col: str
+    candidate_cols: list[str] | None = None
+    max_combo_size: int = 2
+
+
+@ltools.tool(args_schema=_EntityCandidateArgs)
+def score_entity_candidates(
+    path: str,
+    time_col: str,
+    candidate_cols: list[str] | None = None,
+    max_combo_size: int = 2,
+) -> dict:
+    """
+    Score candidate entity keys by how well they partition repeated time-series
+    observations into stable per-entity trajectories.
+
+    Theory:
+    A useful entity key in panel data should do three things. First, entities
+    should reappear across multiple rows, otherwise the key behaves like a
+    row-level identifier rather than a series identifier. Second, the pair
+    `(entity_key, time_col)` should be close to unique, because that pair is
+    the natural coordinate system of a panel time series. Third, a good entity
+    key should explain repeated timestamps by reducing collisions once the
+    entity dimension is included. These criteria are deterministic and more
+    reliable than name-based guessing.
+
+    :param path: dataset path
+    :param time_col: selected time column
+    :param candidate_cols: optional candidate entity columns
+    :param max_combo_size: max size of composite key combinations to evaluate
+    :return: scored candidate report with recommended secondary keys
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    if time_col not in dataset.columns:
+        raise KeyError(f"time_col '{time_col}' not found in dataset")
+    timestamp = pd.to_datetime(dataset[time_col], errors="coerce")
+    profiles = _build_column_profiles(dataset)
+    available_cols = [str(col) for col in dataset.columns if str(col) != time_col]
+    if candidate_cols is None:
+        selected = []
+        for col in available_cols:
+            profile = profiles[col]
+            if profile["n_unique"] <= 1:
+                continue
+            if profile["unique_ratio"] >= 1.0:
+                continue
+            selected.append(col)
+        candidate_cols = selected
+    else:
+        candidate_cols = [
+            col for col in candidate_cols if col in dataset.columns and col != time_col
+        ]
+    candidate_cols = sorted(dict.fromkeys(candidate_cols))
+    max_combo_size = max(1, min(int(max_combo_size), 2))
+    duplicate_timestamps = int(timestamp.dropna().duplicated().sum())
+    candidates: list[dict[str, Any]] = []
+    for combo_size in range(1, max_combo_size + 1):
+        for combo in itertools.combinations(candidate_cols, combo_size):
+            subset = dataset[list(combo)].copy()
+            subset["_ts"] = timestamp
+            valid = subset.dropna(subset=[*combo, "_ts"])
+            if valid.empty:
+                continue
+            group_sizes = valid.groupby(list(combo), dropna=True).size()
+            if group_sizes.empty:
+                continue
+            n_entities = int(group_sizes.shape[0])
+            mean_obs_per_entity = float(group_sizes.mean())
+            entity_reuse_fraction = float((group_sizes > 1).mean())
+            duplicate_pairs = int(
+                valid.duplicated(subset=[*combo, "_ts"]).sum()
+            )
+            pair_uniqueness = float(
+                1.0 - (duplicate_pairs / max(1, int(valid.shape[0])))
+            )
+            if duplicate_timestamps > 0:
+                collision_reduction = float(
+                    1.0 - (duplicate_pairs / max(1, duplicate_timestamps))
+                )
+            else:
+                collision_reduction = 1.0 if mean_obs_per_entity > 1.0 else 0.0
+            repeatability_score = float(min(max((mean_obs_per_entity - 1.0) / 4.0, 0.0), 1.0))
+            score = float(
+                0.35 * pair_uniqueness
+                + 0.35 * repeatability_score
+                + 0.20 * entity_reuse_fraction
+                + 0.10 * max(0.0, min(collision_reduction, 1.0))
+            )
+            candidates.append(
+                {
+                    "secondary_keys": list(combo),
+                    "n_entities": n_entities,
+                    "mean_obs_per_entity": mean_obs_per_entity,
+                    "entity_reuse_fraction": entity_reuse_fraction,
+                    "duplicate_entity_timestamp_pairs": duplicate_pairs,
+                    "pair_uniqueness": pair_uniqueness,
+                    "collision_reduction": collision_reduction,
+                    "score": score,
+                }
+            )
+    candidates.sort(
+        key=lambda item: (
+            item["score"],
+            item["entity_reuse_fraction"],
+            item["mean_obs_per_entity"],
+            -len(item["secondary_keys"]),
+        ),
+        reverse=True,
+    )
+    top_candidate = candidates[0] if candidates else None
+    if (
+        top_candidate is not None
+        and top_candidate["score"] >= 0.60
+        and top_candidate["n_entities"] >= 2
+        and top_candidate["mean_obs_per_entity"] >= 2.0
+    ):
+        recommended_secondary_keys = top_candidate["secondary_keys"]
+    else:
+        recommended_secondary_keys = []
+    return {
+        "time_col": time_col,
+        "duplicate_timestamps": duplicate_timestamps,
+        "candidate_cols": candidate_cols,
+        "candidates": candidates[:10],
+        "recommended_secondary_keys": recommended_secondary_keys,
+    }
+
+
+class _FeatureBucketsArgs(pydantic.BaseModel):
+    """
+    Store arguments for deterministic semantic feature typing.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid")
+    path: str
+    time_col: str
+    secondary_keys: list[str] | None = None
+
+
+@ltools.tool(args_schema=_FeatureBucketsArgs)
+def infer_feature_buckets(
+    path: str,
+    time_col: str,
+    secondary_keys: list[str] | None = None,
+) -> dict:
+    """
+    Deterministically type features from their observed value behavior.
+
+    Theory:
+    The semantic distinction between counts, binary flags, continuous measures,
+    and categoricals can often be established directly from the support of the
+    observed values. Binary flags exhibit two states, counts live on the
+    nonnegative integers, continuous measures take broader real-valued ranges,
+    and categorical features are residual non-key columns that do not behave
+    like numeric measurements. Weakly inferred classes such as targets or
+    exogenous drivers are intentionally left empty because their meaning depends
+    more on task context than on value support alone.
+
+    :param path: dataset path
+    :param time_col: selected time column
+    :param secondary_keys: optional entity key columns to exclude
+    :return: semantic feature buckets
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    profiles = _build_column_profiles(dataset)
+    excluded = {time_col, *(secondary_keys or [])}
+    numeric_continuous_cols: list[str] = []
+    numeric_count_cols: list[str] = []
+    binary_flag_cols: list[str] = []
+    categorical_feature_cols: list[str] = []
+    for col in [str(value) for value in dataset.columns]:
+        if col in excluded:
+            continue
+        profile = profiles[col]
+        if profile["is_binary_like"]:
+            binary_flag_cols.append(col)
+        elif (
+            profile["is_numeric_like"]
+            and profile["is_integer_like"]
+            and profile["is_nonnegative_like"]
+            and profile["n_unique"] > 2
+        ):
+            numeric_count_cols.append(col)
+        elif profile["is_numeric_like"]:
+            numeric_continuous_cols.append(col)
+        else:
+            categorical_feature_cols.append(col)
+    covariate_cols = (
+        numeric_continuous_cols
+        + numeric_count_cols
+        + binary_flag_cols
+        + categorical_feature_cols
+    )
+    return {
+        "numeric_continuous_cols": numeric_continuous_cols,
+        "numeric_count_cols": numeric_count_cols,
+        "binary_flag_cols": binary_flag_cols,
+        "categorical_feature_cols": categorical_feature_cols,
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": covariate_cols,
+        "column_profiles": profiles,
+    }
+
+
+@ltools.tool
+def extract_head(path: str, *, n: int = 5) -> dict:
+    """
+    Return the first rows from a dataset.
+
+    :param path: dataset path
+    :param n: number of rows to return
+    :return: head rows serialized as JSON-compatible payload
+    """
+    dataset_path = pathlib.Path(path)
+    dataset = load_dataset(dataset_path)
+    n_rows = int(n)
+    if n_rows <= 0:
+        n_rows = 5
+    n_rows = min(n_rows, 50)
+    head = dataset.head(n_rows)
+    rows = json.loads(head.to_json(orient="records", date_format="iso"))
+    payload = {
+        "n": n_rows,
+        "columns": [str(col) for col in head.columns.tolist()],
+        "rows": rows,
+    }
+    return payload
diff --git a/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/__init__.py b/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/__init__.py
new file mode 100644
index 000000000..66ee48f67
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/__init__.py
@@ -0,0 +1,3 @@
+"""
+Univariate analysis stages for the Jupyter backend.
+"""
diff --git a/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/test_transforms.py b/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/test_transforms.py
new file mode 100644
index 000000000..115afd951
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/test_transforms.py
@@ -0,0 +1,212 @@
+"""
+Import as:
+
+import src.univariate_analysis.test_transforms as stransforms
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import TypedDict
+
+import langgraph.graph as lgraph
+
+import src.univariate_analysis.univariate_metrics_plotting as sunivar
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for transform testing.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+    missingness_report: dict
+    missingness_plan: dict
+    missingness_handling_report: dict
+    quality_dataset_path: str
+    standardization_profile: dict
+    standardization_gate: dict
+    standardization_plan: dict
+    standardization_report: dict
+    standardized_dataset_path: str
+    univariate_report: dict
+    transform_test_report: dict
+
+
+def call_univariate_metrics_plotting(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to univariate metrics/plots.
+
+    :param state: graph state
+    :return: composite payload from univariate metrics/plots
+    """
+    payload = sunivar.run_univariate_metrics_plotting(state["path"])
+    return payload
+
+
+def test_transforms(state: CompositeState) -> dict:
+    """
+    Compare candidate transforms deterministically for columns where it matters.
+
+    :param state: graph state
+    :return: transform test report
+    """
+    analysis_path = state.get("quality_dataset_path") or state["path"]
+    report = tinptool.test_univariate_transforms.invoke(
+        {
+            "source_path": state["path"],
+            "input_path": analysis_path,
+            "time_col": state["primary_key"],
+            "secondary_keys": state["secondary_keys"],
+            "numeric_continuous_cols": state["numeric_continuous_cols"],
+            "numeric_count_cols": state["numeric_count_cols"],
+            "binary_flag_cols": state["binary_flag_cols"],
+        }
+    )
+    payload = {"transform_test_report": report}
+    return payload
+
+
+transform_testing = lgraph.StateGraph(CompositeState)
+transform_testing.add_node("univariate_metrics_plotting_pipeline", call_univariate_metrics_plotting)
+transform_testing.add_node("test_transforms", test_transforms)
+transform_testing.add_edge(lgraph.START, "univariate_metrics_plotting_pipeline")
+transform_testing.add_edge("univariate_metrics_plotting_pipeline", "test_transforms")
+transform_testing.add_edge("test_transforms", lgraph.END)
+graph = transform_testing.compile()
+
+
+def run_test_transforms(path: str) -> dict:
+    """
+    Execute transform testing end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+        "n_nat_time": 0,
+        "min_time": None,
+        "max_time": None,
+        "typical_delta_mode": None,
+        "typical_delta_median": None,
+        "expected_frequency": None,
+        "dominant_frequency_fraction": 0.0,
+        "is_irregular_sampling": False,
+        "resampling_decision": "",
+        "coverage_summary": {},
+        "coverage_per_entity": [],
+        "missingness_report": {},
+        "missingness_plan": {},
+        "missingness_handling_report": {},
+        "quality_dataset_path": "",
+        "standardization_profile": {},
+        "standardization_gate": {},
+        "standardization_plan": {},
+        "standardization_report": {},
+        "standardized_dataset_path": "",
+        "univariate_report": {},
+        "transform_test_report": {},
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Transform testing output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_test_transforms(args.path)
diff --git a/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/univariate_metrics_plotting.py b/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/univariate_metrics_plotting.py
new file mode 100644
index 000000000..1bcd9b6bb
--- /dev/null
+++ b/agentic_eda/jupyterlab_extension_backend/src/univariate_analysis/univariate_metrics_plotting.py
@@ -0,0 +1,214 @@
+"""
+Import as:
+
+import src.univariate_analysis.univariate_metrics_plotting as sunivar
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from typing import TypedDict
+
+import langgraph.graph as lgraph
+
+import src.quality_handling.standardize as sstandard
+import src.tools.input_tools as tinptool
+
+_LOG = logging.getLogger(__name__)
+
+
+class CompositeState(TypedDict):
+    """
+    Store graph state for univariate metrics and plotting.
+    """
+
+    path: str
+    done: list[str]
+    has_header: bool
+    has_missing_values: bool
+    error: str
+    info: str
+    cols: list[str]
+    temporal_cols: list[str]
+    numeric_val_cols: list[str]
+    categorical_val_cols: list[str]
+    bad_rows: list[dict]
+    metadata: dict
+    time_col: str
+    candidates: list[dict]
+    winner_formatter: dict
+    entity_col: str | None
+    numeric_cols: list[str]
+    nonnegative_cols: list[str]
+    jump_mult: float
+    report: dict
+    summary: str
+    flag: str
+    type: str
+    primary_key: str
+    secondary_keys: list[str]
+    numeric_continuous_cols: list[str]
+    numeric_count_cols: list[str]
+    binary_flag_cols: list[str]
+    categorical_feature_cols: list[str]
+    known_exogenous_cols: list[str]
+    target_cols: list[str]
+    covariate_cols: list[str]
+    n_nat_time: int
+    min_time: str | None
+    max_time: str | None
+    typical_delta_mode: str | None
+    typical_delta_median: str | None
+    expected_frequency: str | None
+    dominant_frequency_fraction: float
+    is_irregular_sampling: bool
+    resampling_decision: str
+    coverage_summary: dict
+    coverage_per_entity: list[dict]
+    missingness_report: dict
+    missingness_plan: dict
+    missingness_handling_report: dict
+    quality_dataset_path: str
+    standardization_profile: dict
+    standardization_gate: dict
+    standardization_plan: dict
+    standardization_report: dict
+    standardized_dataset_path: str
+    univariate_report: dict
+
+
+def call_standardize(state: CompositeState) -> dict:
+    """
+    Run the sequential pipeline up to optional standardization.
+
+    :param state: graph state
+    :return: composite payload from standardize
+    """
+    payload = sstandard.run_standardize(state["path"])
+    return payload
+
+
+def compute_univariate_metrics_and_plots(state: CompositeState) -> dict:
+    """
+    Compute univariate summaries and write per-feature distribution plots.
+
+    :param state: graph state
+    :return: univariate report
+    """
+    analysis_path = state.get("quality_dataset_path") or state["path"]
+    report = tinptool.compute_univariate_metrics_and_plots.invoke(
+        {
+            "source_path": state["path"],
+            "input_path": analysis_path,
+            "time_col": state["primary_key"],
+            "secondary_keys": state["secondary_keys"],
+            "numeric_continuous_cols": state["numeric_continuous_cols"],
+            "numeric_count_cols": state["numeric_count_cols"],
+            "binary_flag_cols": state["binary_flag_cols"],
+        }
+    )
+    trace_payload = {
+        "analysis_path": analysis_path,
+        "univariate_report": report,
+    }
+    tinptool.write_stage_trace(state["path"], "univariate_metrics_plotting", trace_payload)
+    return {"univariate_report": report}
+
+
+univariate_analysis = lgraph.StateGraph(CompositeState)
+univariate_analysis.add_node("standardize_pipeline", call_standardize)
+univariate_analysis.add_node("compute_univariate_metrics_and_plots", compute_univariate_metrics_and_plots)
+univariate_analysis.add_edge(lgraph.START, "standardize_pipeline")
+univariate_analysis.add_edge("standardize_pipeline", "compute_univariate_metrics_and_plots")
+univariate_analysis.add_edge("compute_univariate_metrics_and_plots", lgraph.END)
+graph = univariate_analysis.compile()
+
+
+def run_univariate_metrics_plotting(path: str) -> dict:
+    """
+    Execute univariate summaries and plotting end to end.
+
+    :param path: dataset path
+    :return: full composite graph payload
+    """
+    init_state: CompositeState = {
+        "path": path,
+        "done": [],
+        "has_header": True,
+        "has_missing_values": False,
+        "error": "",
+        "info": "",
+        "cols": [],
+        "temporal_cols": [],
+        "numeric_val_cols": [],
+        "categorical_val_cols": [],
+        "bad_rows": [],
+        "metadata": {},
+        "time_col": "",
+        "candidates": [],
+        "winner_formatter": {},
+        "entity_col": None,
+        "numeric_cols": [],
+        "nonnegative_cols": [],
+        "jump_mult": 20.0,
+        "report": {},
+        "summary": "",
+        "flag": "",
+        "type": "",
+        "primary_key": "",
+        "secondary_keys": [],
+        "numeric_continuous_cols": [],
+        "numeric_count_cols": [],
+        "binary_flag_cols": [],
+        "categorical_feature_cols": [],
+        "known_exogenous_cols": [],
+        "target_cols": [],
+        "covariate_cols": [],
+        "n_nat_time": 0,
+        "min_time": None,
+        "max_time": None,
+        "typical_delta_mode": None,
+        "typical_delta_median": None,
+        "expected_frequency": None,
+        "dominant_frequency_fraction": 0.0,
+        "is_irregular_sampling": False,
+        "resampling_decision": "",
+        "coverage_summary": {},
+        "coverage_per_entity": [],
+        "missingness_report": {},
+        "missingness_plan": {},
+        "missingness_handling_report": {},
+        "quality_dataset_path": "",
+        "standardization_profile": {},
+        "standardization_gate": {},
+        "standardization_plan": {},
+        "standardization_report": {},
+        "standardized_dataset_path": "",
+        "univariate_report": {},
+    }
+    out = graph.invoke(init_state)
+    payload: CompositeState = out
+    _LOG.info("Univariate analysis output: %s", payload)
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments.
+
+    :return: parsed arguments
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        required=True,
+        help="Path to dataset file.",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    run_univariate_metrics_plotting(args.path)