Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 60 additions & 2 deletions pluto/op.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import traceback
from collections import defaultdict
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
from urllib.parse import urlparse

import pluto

Expand All @@ -33,6 +34,7 @@
from .sync.store import HEALTH_METRIC_KEYS
from .sys import System
from .util import (
ANSI,
deep_merge,
get_char,
get_val,
Expand Down Expand Up @@ -348,6 +350,7 @@ def __init__(self, config, settings, tags=None, resume=False) -> None:
response_data = r.json()
self.settings.url_view = response_data['url']
self.settings._op_id = response_data['runId']
self.settings._display_id = response_data.get('displayId')
self._resumed = response_data.get('resumed', False)
self._fork_run_id = response_data.get('forkedFromRunId')
self._fork_step = response_data.get('forkStep')
Expand Down Expand Up @@ -375,13 +378,15 @@ def __init__(self, config, settings, tags=None, resume=False) -> None:
f'reattach, or use a unique run_id.'
)
logger.info(f'{tag}: resumed run {str(self.settings._op_id)}')
self._print_run_banner('resumed')
logger.warning(
f'{tag}: Run was resumed via run_id. The `name` parameter '
f'is ignored for resumed runs - the original run name is '
f'preserved. For multi-node, use the same name across all ranks.'
)
else:
logger.info(f'{tag}: started run {str(self.settings._op_id)}')
self._print_run_banner('started')

os.makedirs(f'{self.settings.get_dir()}/files', exist_ok=True)

Expand Down Expand Up @@ -461,6 +466,59 @@ def _init_sync_manager(self) -> None:
)
logger.debug(f'{tag}: initialized sync process manager')

def _print_run_banner(self, verb: str) -> None:
"""Print a stable, greppable run banner to stdout.

Emits one line in a fixed format so external tooling can reverse-look
up a run from a training process's stdout, e.g.::

pluto: run LV3-12 started (external_id=dhyecrvx)

The display ID (e.g. ``LV3-12``) comes from the server's create/resume
response; the ``external_id`` is the sqid slug (the last path segment
of the run URL). This is intentionally a plain ``print`` to stdout,
independent of the logging system, so it can't be suppressed by log
levels or console-capture settings and always lands on stdout.

The line is colored green only when stdout is a TTY. When stdout is
piped or redirected (the case where tooling scrapes the banner), it is
emitted as plain text so the ANSI codes never land in captured logs and
greppability is preserved.
"""
display_id = self.settings._display_id
if not display_id:
return # server didn't return a display ID; nothing stable to print
external_id = None
if self.settings.url_view:
# Parse the path so a host-only URL (no run slug) doesn't yield the
# hostname as a bogus external_id.
path = urlparse(self.settings.url_view).path.strip('/')
if path:
external_id = path.split('/')[-1]
suffix = f' (external_id={external_id})' if external_id else ''
msg = f'pluto: run {display_id} {verb}{suffix}'
# Wrap the whole line (not just the ID) so the codes sit at the very
# start/end and the matchable token stays contiguous even in a TTY.
if sys.stdout is not None and sys.stdout.isatty():
msg = f'\033[32m{msg}\033[0m' # green
print(msg, flush=True)

def _view_run_message(self) -> str:
"""Build the 'View run [<id>] at <url>' log message.

Includes the display ID (green) when the server returned one. ANSI
codes come from ``util.ANSI``, which blanks them on non-TTY output, so
this matches how ``print_url`` colors the URL.
"""
url = print_url(self.settings.url_view)
display_id = self.settings._display_id
if display_id:
# Return to cyan (the INFO message color) after the green ID rather
# than a full reset, so the trailing "at <url>" stays cyan like the
# rest of the line.
return f'View run {ANSI.green}{display_id}{ANSI.cyan} at {url}'
return f'View run at {url}'

def start(self) -> None:
# Start sync process if enabled
if self._sync_manager is not None:
Expand All @@ -481,7 +539,7 @@ def start(self) -> None:
self._iface._update_meta(sys_metric_names)

# Print URL where users can view the run
logger.info(f'{tag}: View run at {print_url(self.settings.url_view)}')
logger.info(f'{tag}: {self._view_run_message()}')

# Register excepthook to detect unhandled exceptions and mark runs as FAILED
_register_excepthook()
Expand Down Expand Up @@ -758,7 +816,7 @@ def _teardown(self, code: Union[int, None], update_status: bool) -> None:

if update_status:
# Print URL where users can view the completed run
logger.info(f'{tag}: View run at {print_url(self.settings.url_view)}')
logger.info(f'{tag}: {self._view_run_message()}')
else:
logger.debug(f'{tag}: closed (run status unchanged)')
except (Exception, KeyboardInterrupt) as e:
Expand Down
1 change: 1 addition & 0 deletions pluto/sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class Settings:
_op_name: Optional[str] = None
_op_id: Optional[int] = None
_op_status: int = -1
_display_id: Optional[str] = None # Server display ID (e.g. "LV3-12")
_external_id: Optional[str] = None # User-provided run ID for multi-node
_external_id_from_env: bool = False # Whether _external_id was set from env var
_resume_run_id: Optional[int] = None # Numeric run ID for resuming
Expand Down
173 changes: 173 additions & 0 deletions tests/test_run_banner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""Unit tests for the stdout run banner (Op._print_run_banner).

The banner is a stable, greppable line printed to stdout when a run starts or
resumes, so external tooling can reverse-look up a run from a training
process's stdout. It must:
- go to stdout (not stderr / the logging system),
- use the server display ID (e.g. "LV3-12"), not the numeric run ID,
- include the sqid slug parsed from the run URL as external_id,
- be a fixed format matching ``pluto: run <id> <verb>``.
"""

import io
from contextlib import redirect_stderr, redirect_stdout

from pluto.op import Op
from pluto.sets import Settings


def _make_op(display_id, url):
"""Build an Op without running __init__ (no server contact)."""
op = Op.__new__(Op)
op.settings = Settings()
op.settings._display_id = display_id
op.settings.url_view = url
return op


def test_banner_started_to_stdout():
op = _make_op(
'LV3-12',
'https://pluto.trainy.ai/o/linum-n/projects/linum-v3/dhyecrvx',
)
out, err = io.StringIO(), io.StringIO()
with redirect_stdout(out), redirect_stderr(err):
op._print_run_banner('started')

assert out.getvalue() == 'pluto: run LV3-12 started (external_id=dhyecrvx)\n'
# Must not leak onto stderr.
assert err.getvalue() == ''


def test_banner_resumed_verb():
op = _make_op(
'LV3-12',
'https://pluto.trainy.ai/o/linum-n/projects/linum-v3/dhyecrvx',
)
out = io.StringIO()
with redirect_stdout(out):
op._print_run_banner('resumed')

assert out.getvalue() == 'pluto: run LV3-12 resumed (external_id=dhyecrvx)\n'


def test_banner_matches_consumer_regex():
"""The format must satisfy the documented reverse-lookup regex."""
import re

op = _make_op(
'LV3-12',
'https://pluto.trainy.ai/o/linum-n/projects/linum-v3/dhyecrvx',
)
out = io.StringIO()
with redirect_stdout(out):
op._print_run_banner('started')

m = re.search(r'pluto:\s*run\s+(LV3-\d+)', out.getvalue())
assert m is not None
assert m.group(1) == 'LV3-12'


def test_banner_trailing_slash_url():
op = _make_op('LV3-12', 'https://x/o/n/projects/p/dhyecrvx/')
out = io.StringIO()
with redirect_stdout(out):
op._print_run_banner('started')

assert out.getvalue() == 'pluto: run LV3-12 started (external_id=dhyecrvx)\n'


def test_banner_no_display_id_is_silent():
"""No display ID -> nothing stable to print, emit nothing."""
op = _make_op(None, 'https://x/o/n/projects/p/dhyecrvx')
out = io.StringIO()
with redirect_stdout(out):
op._print_run_banner('started')

assert out.getvalue() == ''


def test_banner_no_url_omits_external_id():
op = _make_op('LV3-12', None)
out = io.StringIO()
with redirect_stdout(out):
op._print_run_banner('started')

assert out.getvalue() == 'pluto: run LV3-12 started\n'
Comment thread
asaiacai marked this conversation as resolved.


def test_banner_host_only_url_omits_external_id():
"""A host-only URL has no path segment, so external_id is omitted rather
than falling back to the hostname."""
op = _make_op('LV3-12', 'https://pluto.trainy.ai')
out = io.StringIO()
with redirect_stdout(out):
op._print_run_banner('started')

assert out.getvalue() == 'pluto: run LV3-12 started\n'


class _FakeTTY(io.StringIO):
"""A StringIO that claims to be a TTY, to exercise the colored path."""

def isatty(self):
return True


def test_banner_non_tty_is_plain_no_ansi():
"""Piped/redirected stdout (isatty() is False) -> no ANSI codes, so the
captured output stays byte-clean for downstream greppers."""
op = _make_op(
'LV3-12', 'https://pluto.trainy.ai/o/linum-n/projects/linum-v3/dhyecrvx'
)
out = io.StringIO() # StringIO.isatty() -> False
with redirect_stdout(out):
op._print_run_banner('started')

assert '\033' not in out.getvalue()
assert out.getvalue() == 'pluto: run LV3-12 started (external_id=dhyecrvx)\n'


def test_banner_tty_is_green_and_still_greppable():
"""On a TTY the line is green-wrapped, but the codes sit at the start/end
so the matchable token stays contiguous and the consumer regex works."""
import re

op = _make_op(
'LV3-12', 'https://pluto.trainy.ai/o/linum-n/projects/linum-v3/dhyecrvx'
)
out = _FakeTTY()
with redirect_stdout(out):
op._print_run_banner('started')

value = out.getvalue()
assert value.startswith('\033[32m')
assert value.endswith('\033[0m\n')
# The reverse-lookup regex still extracts the display ID from the colored line.
m = re.search(r'pluto:\s*run\s+([A-Z0-9]+-\d+)', value)
assert m is not None and m.group(1) == 'LV3-12'


def test_view_run_message_includes_green_display_id(monkeypatch):
"""The 'View run' log line names the run, with the display ID colored."""
from pluto import util

monkeypatch.setattr(util.ANSI, 'green', '<G>')
monkeypatch.setattr(util.ANSI, 'cyan', '<C>')
url = 'https://pluto.trainy.ai/o/trainy/projects/testing-ci/OgiAJ'
op = _make_op('TCI-144405', url)

msg = op._view_run_message()

# ID is green, then back to cyan so the trailing "at <url>" stays cyan.
assert 'View run <G>TCI-144405<C> at ' in msg
assert url in msg


def test_view_run_message_without_display_id_falls_back():
op = _make_op(None, 'https://pluto.trainy.ai/o/trainy/projects/testing-ci/OgiAJ')

msg = op._view_run_message()

assert msg.startswith('View run at ')
assert 'TCI-' not in msg