MDunitz · madsCodeBuddy · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/docs/build_desert_farm.py b/docs/build_desert_farm.py
@@ -8,28 +8,33 @@
 
 import pandas as pd
 import numpy as np
-from bokeh.plotting import figure
-from bokeh.models import ColumnDataSource, Span, Label, HoverTool, Legend, LegendItem
+from bokeh.models import (
+    BoxZoomTool,
+    ColumnDataSource,
+    HoverTool,
+    Legend,
+    LegendItem,
+    PanTool,
+    ResetTool,
+    WheelZoomTool,
+)
 from bokeh.resources import CDN
 from bokeh.embed import components
 
-from timeSpace.constants import TIME_MARKERS, SPACE_MARKERS
-from timeSpace.calculations import create_ellipse_data, classify_process_geometry
-from timeSpace.etl import process_magnitude_column
-from timeSpace.plotting_helpers import set_fill_alpha
+from timeSpace.etl import transform_process_response_sheet, POSSIBLE_COL_LIST
+from timeSpace.plotting import create_space_time_figure, add_magnitude_labels
 
 # ── Configuration ──────────────────────────────────────────────────
 X_RANGE = (1e-3, 1e13)
 Y_RANGE = (1e-28, 1e22)
 
 EXPLORER_N_POINTS = 100
 
-# Energy type colors
 ENERGY_COLORS = {
-    "Chemical": "#0F793D",  # green — bonds, reactions, metabolism
-    "Radiative": "#FFCC33",  # gold — photons, solar
-    "Thermal": "#CC3333",  # red — heat, evaporation, climate
-    "Mechanical": "#336699",  # steel blue — kinetic, mixing, pumping
+    "Chemical": "#0F793D",
+    "Radiative": "#FFCC33",
+    "Thermal": "#CC3333",
+    "Mechanical": "#336699",
 }
 
 ENERGY_ORDER = ["Chemical", "Radiative", "Thermal", "Mechanical"]
@@ -40,115 +45,52 @@
 COLAB_URL = "https://colab.research.google.com/github/MDunitz/timeSpace/blob/main/docs/desert_farm_colab.ipynb"
 
 
-# ── Data loading (same pattern as explorer) ────────────────────────
+# ── Data loading ───────────────────────────────────────────────────
 
 
 def load_processes(csv_path):
-    """Read desert farm process CSV and generate render coordinates.
+    """Read desert farm process CSV and run the ETL pipeline.
 
-    Classifies each process geometry (ellipse/vline/hline/point) and only
-    generates ellipse polygon data for true ellipses.  Degenerate axes
-    render as lines or point markers instead of fabricated ellipses.
-
-    Uses package functions:
-    - etl.process_magnitude_column for unit application (seconds, m³)
-    - calculations.classify_process_geometry for degeneracy detection
-    - calculations.create_ellipse_data for ellipse polygon vertices
-    - plotting_helpers.set_fill_alpha for area-based transparency
+    Pre-ETL: derive Color from Energy_type and rename Name → FullName so
+    create_name's ShortName fallback doesn't overwrite the original name.
+    The hover tooltip uses FullName; the legend groups by Energy_type.
     """
     df = pd.read_csv(csv_path)
-
-    # Apply units — same function as etl.py pipeline
-    for col in ["Time_min", "Time_max", "Space_min", "Space_max"]:
-        df[col] = df.apply(process_magnitude_column, column=col, axis=1)
-
-    # Classify geometry before generating coords
-    df["geometry"] = df.apply(classify_process_geometry, axis=1)
-
-    # Only generate ellipse data for actual ellipses
-    ellipse_mask = df["geometry"] == "ellipse"
-    df.loc[ellipse_mask, ["x_coords", "y_coords"]] = (
-        df.loc[ellipse_mask, ["Time_min", "Time_max", "Space_min", "Space_max"]]
-        .apply(
-            create_ellipse_data,
-            axis=1,
-            result_type="expand",
-            n_points=EXPLORER_N_POINTS,
-            space_on_x=False,
-        )
-        .rename(columns={0: "x_coords", 1: "y_coords"})
+    df = df.rename(columns={"Name": "FullName"})
+    df["Color"] = df.Energy_type.map(ENERGY_COLORS)
+
+    return transform_process_response_sheet(
+        df,
+        possible_col_list=POSSIBLE_COL_LIST + ["FullName", "Scale", "Energy_type"],
+        space_on_x=False,
+        n_points=EXPLORER_N_POINTS,
     )
 
-    df["color"] = df.Energy_type.map(ENERGY_COLORS)
-    df["label_x"] = np.sqrt(df.Time_min.apply(lambda q: q.value) * df.Time_max.apply(lambda q: q.value))
-    df["label_y"] = np.sqrt(df.Space_min.apply(lambda q: q.value) * df.Space_max.apply(lambda q: q.value))
-
-    # Fill alpha — same function as main Stommel figure pipeline
-    df["fill_alpha"] = df.apply(set_fill_alpha, axis=1)
-
-    return df
-
 
 # ── Build ──────────────────────────────────────────────────────────
 
 
 def build_desert_farm_figure(csv_path, output_path):
     df = load_processes(csv_path)
 
-    p = figure(
+    p = create_space_time_figure(
         width=900,
         height=650,
-        x_axis_type="log",
-        y_axis_type="log",
-        x_axis_label="Time (s)",
-        y_axis_label="Space (m³)",
-        x_range=X_RANGE,
-        y_range=Y_RANGE,
         title="Desert Farm — Processes Across Scale",
-        toolbar_location="above",
-        x_axis_location="above",
-        tools="pan,wheel_zoom,box_zoom,reset",
+        space_on_x=False,
     )
-    p.axis.axis_label_text_font_size = FONT_SIZE
-    p.axis.major_label_text_font_size = "10pt"
+    p.x_range.start, p.x_range.end = X_RANGE
+    p.y_range.start, p.y_range.end = Y_RANGE
     p.title.text_font_size = "16pt"
     p.title.text_font_style = "bold"
+    p.axis.axis_label_text_font_size = FONT_SIZE
+    p.axis.major_label_text_font_size = "10pt"
     p.background_fill_color = "#fafafa"
+    p.toolbar_location = "above"
+    p.toolbar.tools = [PanTool(), WheelZoomTool(), BoxZoomTool(), ResetTool()]
 
-    # Reference grid
-    for t, label_text in TIME_MARKERS.items():
-        if X_RANGE[0] <= t <= X_RANGE[1]:
-            p.add_layout(Span(location=t, dimension="height", line_color="#cccccc", line_dash="dashed", line_width=1))
-            p.add_layout(
-                Label(
-                    x=t,
-                    y=Y_RANGE[1],
-                    text=label_text,
-                    text_font_size=LABEL_FONT_SIZE,
-                    text_color="#aaaaaa",
-                    text_align="center",
-                    text_baseline="top",
-                )
-            )
-
-    for s, label_text in SPACE_MARKERS.items():
-        if Y_RANGE[0] <= s <= Y_RANGE[1]:
-            p.add_layout(Span(location=s, dimension="width", line_color="#dddddd", line_dash="dashed", line_width=1))
-            p.add_layout(
-                Label(
-                    y=s,
-                    x=X_RANGE[0] * 1.5,
-                    text=label_text,
-                    text_font_size=LABEL_FONT_SIZE,
-                    text_color="#aaaaaa",
-                    text_align="left",
-                )
-            )
+    add_magnitude_labels(p, font_size=LABEL_FONT_SIZE, space_on_x=False)
 
-    # Plot processes by energy type, building legend items.
-    # Split by geometry: ellipses use batched patches, lines/points
-    # use individual glyphs.  All renderers for the same energy type
-    # share a LegendItem so the legend toggle hides them together.
     legend_items = []
 
     def _hover_display(val_min, val_max, unit):
@@ -171,8 +113,8 @@ def _hover_display(val_min, val_max, unit):
                 data=dict(
                     xs=[row.x_coords.tolist() for _, row in ell.iterrows()],
                     ys=[row.y_coords.tolist() for _, row in ell.iterrows()],
-                    alpha=ell.fill_alpha.tolist(),
-                    name=ell.Name.tolist(),
+                    alpha=ell.FillAlpha.tolist(),
+                    name=ell.FullName.tolist(),
                     short_name=ell.ShortName.tolist(),
                     scale=ell.Scale.tolist(),
                     energy_type=ell.Energy_type.tolist(),
@@ -259,7 +201,6 @@ def _hover_display(val_min, val_max, unit):
 
             renderers.append(r)
 
-            # Label for non-ellipse
             lx = row.Time_min.value if geom == "point" else row.label_x
             ly = row.Space_max.value if geom == "vline" else row.label_y
             tr = p.text(
@@ -278,7 +219,7 @@ def _hover_display(val_min, val_max, unit):
         if renderers:
             legend_items.append(LegendItem(label=etype, renderers=renderers))
 
-    # Legend
+    # Compact legend — one row per energy type, click to hide
     legend = Legend(
         items=legend_items,
         location="top_left",
@@ -290,7 +231,7 @@ def _hover_display(val_min, val_max, unit):
     )
     p.add_layout(legend, "right")
 
-    # Render
+    # ── Render HTML ────────────────────────────────────────────────
     script, div = components(p)
 
     html = f"""<!DOCTYPE html>

diff --git a/docs/desert_farm_stommel.html b/docs/desert_farm_stommel.html
diff --git a/etl.py b/etl.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 from timeSpace.constants import base_space, base_time, POSSIBLE_COL_LIST
 from timeSpace.calculations import create_ellipse_data, classify_process_geometry
 from timeSpace.plotting_helpers import (
@@ -36,7 +38,7 @@ def process_magnitude_column(row, column):
         return float(new_val) * base_space
 
 
-def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_COL_LIST, space_on_x=True):
+def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_COL_LIST, space_on_x=True, n_points=1000):
     """Clean and transform Google Form process responses for plotting.
 
     Applies unit conversion, filters invalid rows (min > max), generates
@@ -52,11 +54,20 @@ def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_CO
         Axis order to bake into ellipse `x_coords`/`y_coords`. Must match the
         `space_on_x` passed to plotting functions (`add_processes`,
         `create_space_time_figure`). Default True (Stommel: x=space, y=time).
+    n_points : int
+        Number of x samples per half-ellipse (total vertices = 2 * n_points).
+        Default 1000 (smooth curves, ~16 KB per ellipse in serialized HTML).
+        Pass a smaller value (e.g. 100) for figures with many ellipses where
+        rendered HTML size matters more than perfect curve smoothness.
 
     Returns
     -------
     DataFrame
-        With added columns: Name, FillAlpha, TextAlpha, geometry, x_coords, y_coords.
+        With added columns: Name, FillAlpha, TextAlpha, geometry, x_coords,
+        y_coords, label_x, label_y. label_x is the geometric mean of
+        (Time_min, Time_max); label_y is the geometric mean of (Space_min,
+        Space_max). If label_x or label_y are already present in the input
+        (e.g. CSV-provided overrides), they are preserved unchanged.
     """
     # Validate required columns
     required = {"Time_min", "Time_max", "Space_min", "Space_max"}
@@ -82,12 +93,20 @@ def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_CO
     plottable_responses_df["TextAlpha"] = plottable_responses_df.apply(lambda row: min(1, 4 * row["FillAlpha"]), axis=1)
     plottable_responses_df["Time Max"] = plottable_responses_df.apply(lambda row: row["Time_max"].value, axis=1)
     plottable_responses_df["Space Min"] = plottable_responses_df.apply(lambda row: row["Space_min"].value, axis=1)
+    if "label_x" not in plottable_responses_df.columns:
+        plottable_responses_df["label_x"] = plottable_responses_df.apply(
+            lambda row: np.sqrt(row["Time_min"].value * row["Time_max"].value), axis=1
+        )
+    if "label_y" not in plottable_responses_df.columns:
+        plottable_responses_df["label_y"] = plottable_responses_df.apply(
+            lambda row: np.sqrt(row["Space_min"].value * row["Space_max"].value), axis=1
+        )
     plottable_responses_df["geometry"] = plottable_responses_df.apply(classify_process_geometry, axis=1)
     ellipse_mask = plottable_responses_df["geometry"] == "ellipse"
     if ellipse_mask.any():
         ellipse_coords = (
             plottable_responses_df.loc[ellipse_mask, ["Time_min", "Time_max", "Space_min", "Space_max"]]
-            .apply(create_ellipse_data, axis=1, result_type="expand", space_on_x=space_on_x)
+            .apply(create_ellipse_data, axis=1, result_type="expand", space_on_x=space_on_x, n_points=n_points)
             .rename(columns={0: "x_coords", 1: "y_coords"})
         )
         plottable_responses_df.loc[ellipse_mask, ["x_coords", "y_coords"]] = ellipse_coords

diff --git a/tests/test_etl.py b/tests/test_etl.py
@@ -1,7 +1,8 @@
 import pandas as pd
+import pytest
 import astropy.units as u
-from timeSpace.etl import process_magnitude_column, transform_predefined_processes
-from timeSpace.constants import base_time, base_space
+from timeSpace.etl import process_magnitude_column, transform_predefined_processes, transform_process_response_sheet
+from timeSpace.constants import base_time, base_space, POSSIBLE_COL_LIST
 
 
 class TestProcessMagnitudeColumn:
@@ -84,3 +85,96 @@ def test_ellipse_data_generated(self):
         row = result.iloc[0]
         assert len(row.x_coords) == 2000  # default n_points=1000, 2 arcs
         assert len(row.y_coords) == 2000
+
+
+class TestTransformProcessResponseSheet:
+    """Happy-path tests for transform_process_response_sheet (#22, #24)."""
+
+    def _basic_df(self):
+        return pd.DataFrame(
+            {
+                "ShortName": ["A", "B"],
+                "Time_min": ["1e-3", "1e2"],
+                "Time_max": ["1e0", "1e6"],
+                "Space_min": ["1e-12", "1e-9"],
+                "Space_max": ["1e-6", "1e-3"],
+            }
+        )
+
+    def test_produces_expected_columns(self):
+        result = transform_process_response_sheet(self._basic_df())
+        for col in ["x_coords", "y_coords", "FillAlpha", "TextAlpha", "Name", "geometry"]:
+            assert col in result.columns, f"Missing column: {col}"
+
+    def test_units_applied(self):
+        result = transform_process_response_sheet(self._basic_df())
+        row = result.iloc[0]
+        assert row.Time_min.unit == u.second
+        assert row.Space_max.unit == u.m**3
+
+    def test_filters_inverted_ranges(self):
+        df = pd.DataFrame(
+            {
+                "ShortName": ["valid", "bad-time", "bad-space"],
+                "Time_min": ["1", "100", "1"],
+                "Time_max": ["10", "10", "10"],  # row 1: 100 > 10 (bad)
+                "Space_min": ["1e-9", "1e-9", "1e-3"],
+                "Space_max": ["1e-6", "1e-6", "1e-9"],  # row 2: 1e-3 > 1e-9 (bad)
+            }
+        )
+        result = transform_process_response_sheet(df)
+        assert len(result) == 1
+        assert result.iloc[0].ShortName == "valid"
+
+    def test_space_on_x_default_stommel_orientation(self):
+        # Default: x_coords come from space, y_coords from time
+        # x bounds are exact (logspace endpoints); y bounds are within 0.1% (ellipse equation)
+        result = transform_process_response_sheet(self._basic_df())
+        row = result.iloc[0]
+        assert row.x_coords.min() == pytest.approx(row.Space_min.value, rel=1e-9)
+        assert row.x_coords.max() == pytest.approx(row.Space_max.value, rel=1e-9)
+        assert row.y_coords.min() == pytest.approx(row.Time_min.value, rel=1e-3)
+        assert row.y_coords.max() == pytest.approx(row.Time_max.value, rel=1e-3)
+
+    def test_space_on_x_false_boyd_orientation(self):
+        # space_on_x=False: x_coords come from time, y_coords from space
+        result = transform_process_response_sheet(self._basic_df(), space_on_x=False)
+        row = result.iloc[0]
+        assert row.x_coords.min() == pytest.approx(row.Time_min.value, rel=1e-9)
+        assert row.x_coords.max() == pytest.approx(row.Time_max.value, rel=1e-9)
+        assert row.y_coords.min() == pytest.approx(row.Space_min.value, rel=1e-3)
+        assert row.y_coords.max() == pytest.approx(row.Space_max.value, rel=1e-3)
+
+    def test_n_points_controls_vertex_count(self):
+        result_default = transform_process_response_sheet(self._basic_df())
+        assert len(result_default.iloc[0].x_coords) == 2000  # 2 * 1000
+
+        result_small = transform_process_response_sheet(self._basic_df(), n_points=50)
+        assert len(result_small.iloc[0].x_coords) == 100  # 2 * 50
+
+    def test_label_x_is_geometric_mean_of_time_range(self):
+        result = transform_process_response_sheet(self._basic_df())
+        row = result.iloc[0]
+        expected = (row.Time_min.value * row.Time_max.value) ** 0.5
+        assert row.label_x == pytest.approx(expected, rel=1e-9)
+
+    def test_label_y_is_geometric_mean_of_space_range(self):
+        result = transform_process_response_sheet(self._basic_df())
+        row = result.iloc[0]
+        expected = (row.Space_min.value * row.Space_max.value) ** 0.5
+        assert row.label_y == pytest.approx(expected, rel=1e-9)
+
+    def test_label_x_csv_override_preserved(self):
+        # If input already has label_x, ETL should not overwrite
+        df = self._basic_df()
+        df["label_x"] = [42.0, 99.0]
+        result = transform_process_response_sheet(df, possible_col_list=POSSIBLE_COL_LIST + ["label_x"])
+        assert result.label_x.iloc[0] == 42.0
+        assert result.label_x.iloc[1] == 99.0
+
+    def test_label_y_csv_override_preserved(self):
+        df = self._basic_df()
+        df["label_y"] = [1.5, 2.5]
+        result = transform_process_response_sheet(df, possible_col_list=POSSIBLE_COL_LIST + ["label_y"])
+        assert result.label_y.iloc[0] == 1.5
+        assert result.label_y.iloc[1] == 2.5