Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 42 additions & 101 deletions docs/build_desert_farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,33 @@

import pandas as pd
import numpy as np
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Span, Label, HoverTool, Legend, LegendItem
from bokeh.models import (
BoxZoomTool,
ColumnDataSource,
HoverTool,
Legend,
LegendItem,
PanTool,
ResetTool,
WheelZoomTool,
)
from bokeh.resources import CDN
from bokeh.embed import components

from timeSpace.constants import TIME_MARKERS, SPACE_MARKERS
from timeSpace.calculations import create_ellipse_data, classify_process_geometry
from timeSpace.etl import process_magnitude_column
from timeSpace.plotting_helpers import set_fill_alpha
from timeSpace.etl import transform_process_response_sheet, POSSIBLE_COL_LIST
from timeSpace.plotting import create_space_time_figure, add_magnitude_labels

# ── Configuration ──────────────────────────────────────────────────
X_RANGE = (1e-3, 1e13)
Y_RANGE = (1e-28, 1e22)

EXPLORER_N_POINTS = 100

# Energy type colors
ENERGY_COLORS = {
"Chemical": "#0F793D", # green — bonds, reactions, metabolism
"Radiative": "#FFCC33", # gold — photons, solar
"Thermal": "#CC3333", # red — heat, evaporation, climate
"Mechanical": "#336699", # steel blue — kinetic, mixing, pumping
"Chemical": "#0F793D",
"Radiative": "#FFCC33",
"Thermal": "#CC3333",
"Mechanical": "#336699",
}

ENERGY_ORDER = ["Chemical", "Radiative", "Thermal", "Mechanical"]
Expand All @@ -40,115 +45,52 @@
COLAB_URL = "https://colab.research.google.com/github/MDunitz/timeSpace/blob/main/docs/desert_farm_colab.ipynb"


# ── Data loading (same pattern as explorer) ────────────────────────
# ── Data loading ───────────────────────────────────────────────────


def load_processes(csv_path):
"""Read desert farm process CSV and generate render coordinates.
"""Read desert farm process CSV and run the ETL pipeline.

Classifies each process geometry (ellipse/vline/hline/point) and only
generates ellipse polygon data for true ellipses. Degenerate axes
render as lines or point markers instead of fabricated ellipses.

Uses package functions:
- etl.process_magnitude_column for unit application (seconds, m³)
- calculations.classify_process_geometry for degeneracy detection
- calculations.create_ellipse_data for ellipse polygon vertices
- plotting_helpers.set_fill_alpha for area-based transparency
Pre-ETL: derive Color from Energy_type and rename Name → FullName so
create_name's ShortName fallback doesn't overwrite the original name.
The hover tooltip uses FullName; the legend groups by Energy_type.
"""
df = pd.read_csv(csv_path)

# Apply units — same function as etl.py pipeline
for col in ["Time_min", "Time_max", "Space_min", "Space_max"]:
df[col] = df.apply(process_magnitude_column, column=col, axis=1)

# Classify geometry before generating coords
df["geometry"] = df.apply(classify_process_geometry, axis=1)

# Only generate ellipse data for actual ellipses
ellipse_mask = df["geometry"] == "ellipse"
df.loc[ellipse_mask, ["x_coords", "y_coords"]] = (
df.loc[ellipse_mask, ["Time_min", "Time_max", "Space_min", "Space_max"]]
.apply(
create_ellipse_data,
axis=1,
result_type="expand",
n_points=EXPLORER_N_POINTS,
space_on_x=False,
)
.rename(columns={0: "x_coords", 1: "y_coords"})
df = df.rename(columns={"Name": "FullName"})
df["Color"] = df.Energy_type.map(ENERGY_COLORS)

return transform_process_response_sheet(
df,
possible_col_list=POSSIBLE_COL_LIST + ["FullName", "Scale", "Energy_type"],
space_on_x=False,
n_points=EXPLORER_N_POINTS,
)

df["color"] = df.Energy_type.map(ENERGY_COLORS)
df["label_x"] = np.sqrt(df.Time_min.apply(lambda q: q.value) * df.Time_max.apply(lambda q: q.value))
df["label_y"] = np.sqrt(df.Space_min.apply(lambda q: q.value) * df.Space_max.apply(lambda q: q.value))

# Fill alpha — same function as main Stommel figure pipeline
df["fill_alpha"] = df.apply(set_fill_alpha, axis=1)

return df


# ── Build ──────────────────────────────────────────────────────────


def build_desert_farm_figure(csv_path, output_path):
df = load_processes(csv_path)

p = figure(
p = create_space_time_figure(
width=900,
height=650,
x_axis_type="log",
y_axis_type="log",
x_axis_label="Time (s)",
y_axis_label="Space (m³)",
x_range=X_RANGE,
y_range=Y_RANGE,
title="Desert Farm — Processes Across Scale",
toolbar_location="above",
x_axis_location="above",
tools="pan,wheel_zoom,box_zoom,reset",
space_on_x=False,
)
p.axis.axis_label_text_font_size = FONT_SIZE
p.axis.major_label_text_font_size = "10pt"
p.x_range.start, p.x_range.end = X_RANGE
p.y_range.start, p.y_range.end = Y_RANGE
p.title.text_font_size = "16pt"
p.title.text_font_style = "bold"
p.axis.axis_label_text_font_size = FONT_SIZE
p.axis.major_label_text_font_size = "10pt"
p.background_fill_color = "#fafafa"
p.toolbar_location = "above"
p.toolbar.tools = [PanTool(), WheelZoomTool(), BoxZoomTool(), ResetTool()]

# Reference grid
for t, label_text in TIME_MARKERS.items():
if X_RANGE[0] <= t <= X_RANGE[1]:
p.add_layout(Span(location=t, dimension="height", line_color="#cccccc", line_dash="dashed", line_width=1))
p.add_layout(
Label(
x=t,
y=Y_RANGE[1],
text=label_text,
text_font_size=LABEL_FONT_SIZE,
text_color="#aaaaaa",
text_align="center",
text_baseline="top",
)
)

for s, label_text in SPACE_MARKERS.items():
if Y_RANGE[0] <= s <= Y_RANGE[1]:
p.add_layout(Span(location=s, dimension="width", line_color="#dddddd", line_dash="dashed", line_width=1))
p.add_layout(
Label(
y=s,
x=X_RANGE[0] * 1.5,
text=label_text,
text_font_size=LABEL_FONT_SIZE,
text_color="#aaaaaa",
text_align="left",
)
)
add_magnitude_labels(p, font_size=LABEL_FONT_SIZE, space_on_x=False)

# Plot processes by energy type, building legend items.
# Split by geometry: ellipses use batched patches, lines/points
# use individual glyphs. All renderers for the same energy type
# share a LegendItem so the legend toggle hides them together.
legend_items = []

def _hover_display(val_min, val_max, unit):
Expand All @@ -171,8 +113,8 @@ def _hover_display(val_min, val_max, unit):
data=dict(
xs=[row.x_coords.tolist() for _, row in ell.iterrows()],
ys=[row.y_coords.tolist() for _, row in ell.iterrows()],
alpha=ell.fill_alpha.tolist(),
name=ell.Name.tolist(),
alpha=ell.FillAlpha.tolist(),
name=ell.FullName.tolist(),
short_name=ell.ShortName.tolist(),
scale=ell.Scale.tolist(),
energy_type=ell.Energy_type.tolist(),
Expand Down Expand Up @@ -259,7 +201,6 @@ def _hover_display(val_min, val_max, unit):

renderers.append(r)

# Label for non-ellipse
lx = row.Time_min.value if geom == "point" else row.label_x
ly = row.Space_max.value if geom == "vline" else row.label_y
tr = p.text(
Expand All @@ -278,7 +219,7 @@ def _hover_display(val_min, val_max, unit):
if renderers:
legend_items.append(LegendItem(label=etype, renderers=renderers))

# Legend
# Compact legend — one row per energy type, click to hide
legend = Legend(
items=legend_items,
location="top_left",
Expand All @@ -290,7 +231,7 @@ def _hover_display(val_min, val_max, unit):
)
p.add_layout(legend, "right")

# Render
# ── Render HTML ────────────────────────────────────────────────
script, div = components(p)

html = f"""<!DOCTYPE html>
Expand Down
6 changes: 3 additions & 3 deletions docs/desert_farm_stommel.html

Large diffs are not rendered by default.

25 changes: 22 additions & 3 deletions etl.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import numpy as np

from timeSpace.constants import base_space, base_time, POSSIBLE_COL_LIST
from timeSpace.calculations import create_ellipse_data, classify_process_geometry
from timeSpace.plotting_helpers import (
Expand Down Expand Up @@ -36,7 +38,7 @@ def process_magnitude_column(row, column):
return float(new_val) * base_space


def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_COL_LIST, space_on_x=True):
def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_COL_LIST, space_on_x=True, n_points=1000):
"""Clean and transform Google Form process responses for plotting.

Applies unit conversion, filters invalid rows (min > max), generates
Expand All @@ -52,11 +54,20 @@ def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_CO
Axis order to bake into ellipse `x_coords`/`y_coords`. Must match the
`space_on_x` passed to plotting functions (`add_processes`,
`create_space_time_figure`). Default True (Stommel: x=space, y=time).
n_points : int
Number of x samples per half-ellipse (total vertices = 2 * n_points).
Default 1000 (smooth curves, ~16 KB per ellipse in serialized HTML).
Pass a smaller value (e.g. 100) for figures with many ellipses where
rendered HTML size matters more than perfect curve smoothness.

Returns
-------
DataFrame
With added columns: Name, FillAlpha, TextAlpha, geometry, x_coords, y_coords.
With added columns: Name, FillAlpha, TextAlpha, geometry, x_coords,
y_coords, label_x, label_y. label_x is the geometric mean of
(Time_min, Time_max); label_y is the geometric mean of (Space_min,
Space_max). If label_x or label_y are already present in the input
(e.g. CSV-provided overrides), they are preserved unchanged.
"""
# Validate required columns
required = {"Time_min", "Time_max", "Space_min", "Space_max"}
Expand All @@ -82,12 +93,20 @@ def transform_process_response_sheet(responses_df, possible_col_list=POSSIBLE_CO
plottable_responses_df["TextAlpha"] = plottable_responses_df.apply(lambda row: min(1, 4 * row["FillAlpha"]), axis=1)
plottable_responses_df["Time Max"] = plottable_responses_df.apply(lambda row: row["Time_max"].value, axis=1)
plottable_responses_df["Space Min"] = plottable_responses_df.apply(lambda row: row["Space_min"].value, axis=1)
if "label_x" not in plottable_responses_df.columns:
plottable_responses_df["label_x"] = plottable_responses_df.apply(
lambda row: np.sqrt(row["Time_min"].value * row["Time_max"].value), axis=1
)
if "label_y" not in plottable_responses_df.columns:
plottable_responses_df["label_y"] = plottable_responses_df.apply(
lambda row: np.sqrt(row["Space_min"].value * row["Space_max"].value), axis=1
)
plottable_responses_df["geometry"] = plottable_responses_df.apply(classify_process_geometry, axis=1)
ellipse_mask = plottable_responses_df["geometry"] == "ellipse"
if ellipse_mask.any():
ellipse_coords = (
plottable_responses_df.loc[ellipse_mask, ["Time_min", "Time_max", "Space_min", "Space_max"]]
.apply(create_ellipse_data, axis=1, result_type="expand", space_on_x=space_on_x)
.apply(create_ellipse_data, axis=1, result_type="expand", space_on_x=space_on_x, n_points=n_points)
.rename(columns={0: "x_coords", 1: "y_coords"})
)
plottable_responses_df.loc[ellipse_mask, ["x_coords", "y_coords"]] = ellipse_coords
Expand Down
98 changes: 96 additions & 2 deletions tests/test_etl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pandas as pd
import pytest
import astropy.units as u
from timeSpace.etl import process_magnitude_column, transform_predefined_processes
from timeSpace.constants import base_time, base_space
from timeSpace.etl import process_magnitude_column, transform_predefined_processes, transform_process_response_sheet
from timeSpace.constants import base_time, base_space, POSSIBLE_COL_LIST


class TestProcessMagnitudeColumn:
Expand Down Expand Up @@ -84,3 +85,96 @@ def test_ellipse_data_generated(self):
row = result.iloc[0]
assert len(row.x_coords) == 2000 # default n_points=1000, 2 arcs
assert len(row.y_coords) == 2000


class TestTransformProcessResponseSheet:
"""Happy-path tests for transform_process_response_sheet (#22, #24)."""

def _basic_df(self):
return pd.DataFrame(
{
"ShortName": ["A", "B"],
"Time_min": ["1e-3", "1e2"],
"Time_max": ["1e0", "1e6"],
"Space_min": ["1e-12", "1e-9"],
"Space_max": ["1e-6", "1e-3"],
}
)

def test_produces_expected_columns(self):
result = transform_process_response_sheet(self._basic_df())
for col in ["x_coords", "y_coords", "FillAlpha", "TextAlpha", "Name", "geometry"]:
assert col in result.columns, f"Missing column: {col}"

def test_units_applied(self):
result = transform_process_response_sheet(self._basic_df())
row = result.iloc[0]
assert row.Time_min.unit == u.second
assert row.Space_max.unit == u.m**3

def test_filters_inverted_ranges(self):
df = pd.DataFrame(
{
"ShortName": ["valid", "bad-time", "bad-space"],
"Time_min": ["1", "100", "1"],
"Time_max": ["10", "10", "10"], # row 1: 100 > 10 (bad)
"Space_min": ["1e-9", "1e-9", "1e-3"],
"Space_max": ["1e-6", "1e-6", "1e-9"], # row 2: 1e-3 > 1e-9 (bad)
}
)
result = transform_process_response_sheet(df)
assert len(result) == 1
assert result.iloc[0].ShortName == "valid"

def test_space_on_x_default_stommel_orientation(self):
# Default: x_coords come from space, y_coords from time
# x bounds are exact (logspace endpoints); y bounds are within 0.1% (ellipse equation)
result = transform_process_response_sheet(self._basic_df())
row = result.iloc[0]
assert row.x_coords.min() == pytest.approx(row.Space_min.value, rel=1e-9)
assert row.x_coords.max() == pytest.approx(row.Space_max.value, rel=1e-9)
assert row.y_coords.min() == pytest.approx(row.Time_min.value, rel=1e-3)
assert row.y_coords.max() == pytest.approx(row.Time_max.value, rel=1e-3)

def test_space_on_x_false_boyd_orientation(self):
# space_on_x=False: x_coords come from time, y_coords from space
result = transform_process_response_sheet(self._basic_df(), space_on_x=False)
row = result.iloc[0]
assert row.x_coords.min() == pytest.approx(row.Time_min.value, rel=1e-9)
assert row.x_coords.max() == pytest.approx(row.Time_max.value, rel=1e-9)
assert row.y_coords.min() == pytest.approx(row.Space_min.value, rel=1e-3)
assert row.y_coords.max() == pytest.approx(row.Space_max.value, rel=1e-3)

def test_n_points_controls_vertex_count(self):
result_default = transform_process_response_sheet(self._basic_df())
assert len(result_default.iloc[0].x_coords) == 2000 # 2 * 1000

result_small = transform_process_response_sheet(self._basic_df(), n_points=50)
assert len(result_small.iloc[0].x_coords) == 100 # 2 * 50

def test_label_x_is_geometric_mean_of_time_range(self):
result = transform_process_response_sheet(self._basic_df())
row = result.iloc[0]
expected = (row.Time_min.value * row.Time_max.value) ** 0.5
assert row.label_x == pytest.approx(expected, rel=1e-9)

def test_label_y_is_geometric_mean_of_space_range(self):
result = transform_process_response_sheet(self._basic_df())
row = result.iloc[0]
expected = (row.Space_min.value * row.Space_max.value) ** 0.5
assert row.label_y == pytest.approx(expected, rel=1e-9)

def test_label_x_csv_override_preserved(self):
# If input already has label_x, ETL should not overwrite
df = self._basic_df()
df["label_x"] = [42.0, 99.0]
result = transform_process_response_sheet(df, possible_col_list=POSSIBLE_COL_LIST + ["label_x"])
assert result.label_x.iloc[0] == 42.0
assert result.label_x.iloc[1] == 99.0

def test_label_y_csv_override_preserved(self):
df = self._basic_df()
df["label_y"] = [1.5, 2.5]
result = transform_process_response_sheet(df, possible_col_list=POSSIBLE_COL_LIST + ["label_y"])
assert result.label_y.iloc[0] == 1.5
assert result.label_y.iloc[1] == 2.5
Loading