Skip to content
This repository was archived by the owner on Apr 30, 2026. It is now read-only.

Commit b96fe9e

Browse files
fixes
1 parent 6935e82 commit b96fe9e

1 file changed

Lines changed: 68 additions & 5 deletions

File tree

overmind/tracing.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,37 @@ def _span_processor_on_start(span: trace.Span, parent_context: trace.Context | N
154154
span.set_attribute(SpanAttributes.TRACELOOP_WORKFLOW_NAME, str(value))
155155

156156

157+
# ---------------------------------------------------------------------------
158+
# Remote parent context propagation (subprocess / distributed tracing)
159+
# ---------------------------------------------------------------------------
160+
161+
162+
def _attach_remote_parent_if_present() -> None:
163+
"""Attach a remote parent span from the ``TRACEPARENT`` environment variable.
164+
165+
The overclaw optimizer injects ``TRACEPARENT`` (W3C Trace Context format)
166+
into every agent subprocess before spawning it. Calling this function
167+
immediately after the TracerProvider is registered makes every OTel span
168+
started in the subprocess a child of the optimizer's current span, so all
169+
per-case evaluation runs appear under a single unified parent trace.
170+
171+
Safe to call when ``TRACEPARENT`` is absent — no-op in that case.
172+
"""
173+
raw = os.environ.get("TRACEPARENT") or os.environ.get("OTEL_TRACEPARENT")
174+
if not raw:
175+
return
176+
try:
177+
from opentelemetry import context as _ctx
178+
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
179+
180+
propagator = TraceContextTextMapPropagator()
181+
remote_ctx = propagator.extract(carrier={"traceparent": raw.strip()})
182+
_ctx.attach(remote_ctx)
183+
logger.debug("Attached remote parent context from TRACEPARENT: %s", raw)
184+
except Exception as exc:
185+
logger.debug("Could not attach remote parent context: %s", exc)
186+
187+
157188
# ---------------------------------------------------------------------------
158189
# SDK initialization
159190
# ---------------------------------------------------------------------------
@@ -223,7 +254,20 @@ def init(
223254

224255
otlp_exporter = OTLPSpanExporter(endpoint=endpoint, headers=headers)
225256

226-
span_processor = BatchSpanProcessor(otlp_exporter)
257+
# Tighten the batch flush cadence so closed child spans show up in the
258+
# backend within ~2s instead of the OTel default 5s. Long-running
259+
# workflow spans rely on this to stream progress while still open.
260+
schedule_delay_millis = int(
261+
os.environ.get("OVERMIND_SPAN_FLUSH_INTERVAL_MS", "2000")
262+
)
263+
max_export_batch_size = int(
264+
os.environ.get("OVERMIND_SPAN_MAX_EXPORT_BATCH_SIZE", "256")
265+
)
266+
span_processor = BatchSpanProcessor(
267+
otlp_exporter,
268+
schedule_delay_millis=schedule_delay_millis,
269+
max_export_batch_size=max_export_batch_size,
270+
)
227271
provider.add_span_processor(span_processor)
228272
span_processor.on_start = _span_processor_on_start
229273

@@ -234,6 +278,13 @@ def init(
234278
_tracer = trace.get_tracer("overmind", _SDK_VERSION)
235279
enable_tracing(providers)
236280

281+
# Distributed tracing: if the process was spawned by the overclaw
282+
# optimizer (or any other orchestrator) with a W3C TRACEPARENT env var,
283+
# attach it as the ambient OTel context so every span created in this
284+
# process becomes a child of the parent optimizer span — forming a single
285+
# unified trace across subprocess boundaries.
286+
_attach_remote_parent_if_present()
287+
237288
_initialized = True
238289
logger.info("Overmind SDK initialized: service=%s, environment=%s", service_name, environment)
239290

@@ -453,12 +504,18 @@ async def async_wrapper(*args, **kwargs):
453504
continue
454505
inputs[key] = _prepare_for_otel(value)
455506

456-
otel_span.set_attribute("inputs", serialize(inputs))
507+
serialized_inputs = serialize(inputs)
508+
otel_span.set_attribute("inputs", serialized_inputs)
509+
# Also tag under the overclaw namespace so the backend
510+
# can extract I/O for dataset collection from any span.
511+
otel_span.set_attribute("overclaw.input_data", serialized_inputs)
457512

458513
result = await func(*args, **kwargs)
459514

460515
output = _prepare_for_otel(result)
461-
otel_span.set_attribute("outputs", serialize(output))
516+
serialized_output = serialize(output)
517+
otel_span.set_attribute("outputs", serialized_output)
518+
otel_span.set_attribute("overclaw.output_data", serialized_output)
462519

463520
otel_span.set_status(Status(StatusCode.OK))
464521

@@ -498,12 +555,18 @@ def sync_wrapper(*args, **kwargs):
498555
continue
499556
inputs[key] = _prepare_for_otel(value)
500557

501-
otel_span.set_attribute("inputs", serialize(inputs))
558+
serialized_inputs = serialize(inputs)
559+
otel_span.set_attribute("inputs", serialized_inputs)
560+
# Also tag under the overclaw namespace so the backend
561+
# can extract I/O for dataset collection from any span.
562+
otel_span.set_attribute("overclaw.input_data", serialized_inputs)
502563

503564
result = func(*args, **kwargs)
504565

505566
output = _prepare_for_otel(result)
506-
otel_span.set_attribute("outputs", serialize(output))
567+
serialized_output = serialize(output)
568+
otel_span.set_attribute("outputs", serialized_output)
569+
otel_span.set_attribute("overclaw.output_data", serialized_output)
507570

508571
otel_span.set_status(Status(StatusCode.OK))
509572

0 commit comments

Comments
 (0)