From 004d4c73da36e133e19402dfa9cfad87c9e9351f Mon Sep 17 00:00:00 2001 From: BoKeum Date: Thu, 5 Mar 2026 10:22:30 +0900 Subject: [PATCH 1/2] feat(BA-4668): Add CLI commands for prometheus query preset admin CRUD and execution - Add SDK function class (client/func/prometheus_query_preset.py) with create, list_presets, get, modify, delete, and execute methods - Add admin CLI commands (list, info, add, modify, delete) under `backend.ai admin prometheus-query-preset` - Add top-level execute CLI command under `backend.ai prometheus-query-preset execute` with repeatable --label flag - Register SDK function in BaseSession and CLI groups in __init__ modules Co-Authored-By: Claude Opus 4.6 --- src/ai/backend/client/cli/__init__.py | 8 + src/ai/backend/client/cli/admin/__init__.py | 1 + .../cli/admin/prometheus_query_preset.py | 157 ++++++++++++++++++ .../cli/prometheus_query_preset/__init__.py | 11 ++ .../cli/prometheus_query_preset/commands.py | 76 +++++++++ .../client/func/prometheus_query_preset.py | 135 +++++++++++++++ src/ai/backend/client/session.py | 3 + 7 files changed, 391 insertions(+) create mode 100644 src/ai/backend/client/cli/admin/prometheus_query_preset.py create mode 100644 src/ai/backend/client/cli/prometheus_query_preset/__init__.py create mode 100644 src/ai/backend/client/cli/prometheus_query_preset/commands.py create mode 100644 src/ai/backend/client/func/prometheus_query_preset.py diff --git a/src/ai/backend/client/cli/__init__.py b/src/ai/backend/client/cli/__init__.py index bbb57798cd1..5b441bb0c7f 100644 --- a/src/ai/backend/client/cli/__init__.py +++ b/src/ai/backend/client/cli/__init__.py @@ -55,6 +55,14 @@ def fair_share() -> None: """Fair share scheduler operations (superadmin only)""" +@cli_main.group( + cls=LazyGroup, + import_name="ai.backend.client.cli.prometheus_query_preset:prometheus_query_preset", +) +def prometheus_query_preset() -> None: + """Prometheus query preset operations.""" + + @cli_main.group(cls=LazyGroup, import_name="ai.backend.client.cli.resource_usage:resource_usage") def resource_usage() -> None: """Resource usage history operations (superadmin only)""" diff --git a/src/ai/backend/client/cli/admin/__init__.py b/src/ai/backend/client/cli/admin/__init__.py index 8e5f8666cd3..63adc598baa 100644 --- a/src/ai/backend/client/cli/admin/__init__.py +++ b/src/ai/backend/client/cli/admin/__init__.py @@ -19,6 +19,7 @@ def admin() -> None: keypair, license, # noqa: A004 manager, + prometheus_query_preset, resource, resource_policy, scaling_group, diff --git a/src/ai/backend/client/cli/admin/prometheus_query_preset.py b/src/ai/backend/client/cli/admin/prometheus_query_preset.py new file mode 100644 index 00000000000..75b7b21c427 --- /dev/null +++ b/src/ai/backend/client/cli/admin/prometheus_query_preset.py @@ -0,0 +1,157 @@ +import sys +from typing import Any +from uuid import UUID + +import click + +from ai.backend.cli.params import JSONParamType +from ai.backend.cli.types import ExitCode +from ai.backend.client.cli.extensions import pass_ctx_obj +from ai.backend.client.cli.pretty import print_done +from ai.backend.client.cli.types import CLIContext +from ai.backend.client.session import Session + +from . import admin + + +@admin.group() +def prometheus_query_preset() -> None: + """Prometheus query preset administration commands.""" + + +@prometheus_query_preset.command() +@pass_ctx_obj +def list(ctx: CLIContext) -> None: + """List all prometheus query presets.""" + with Session() as session: + try: + items = session.PrometheusQueryPreset.list_presets() + if not items: + print("No presets found.") + return + for preset in items: + print(f"ID: {preset['id']}") + print(f" Name: {preset['name']}") + print(f" Metric: {preset['metric_name']}") + print(f" Time Window: {preset.get('time_window', '-')}") + print(f" Created: {preset['created_at']}") + print() + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) + + +@prometheus_query_preset.command() +@pass_ctx_obj +@click.argument("preset_id", type=str) +def info(ctx: CLIContext, preset_id: str) -> None: + """Show details of a prometheus query preset.""" + with Session() as session: + try: + preset = session.PrometheusQueryPreset.get(UUID(preset_id)) + print(f"ID: {preset['id']}") + print(f"Name: {preset['name']}") + print(f"Metric Name: {preset['metric_name']}") + print(f"Query Template: {preset['query_template']}") + print(f"Time Window: {preset.get('time_window', '-')}") + options = preset.get("options", {}) + print(f"Filter Labels: {options.get('filter_labels', [])}") + print(f"Group Labels: {options.get('group_labels', [])}") + print(f"Created: {preset['created_at']}") + print(f"Updated: {preset['updated_at']}") + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) + + +@prometheus_query_preset.command() +@pass_ctx_obj +@click.option("--name", type=str, required=True, help="Preset name.") +@click.option("--metric-name", type=str, required=True, help="Prometheus metric name.") +@click.option("--query-template", type=str, required=True, help="PromQL template.") +@click.option("--time-window", type=str, default=None, help="Default time window (e.g. 5m).") +@click.option( + "--options", + type=JSONParamType(), + default=None, + help='Preset options JSON (e.g. \'{"filter_labels":["k"],"group_labels":["k"]}\').', +) +def add( + ctx: CLIContext, + name: str, + metric_name: str, + query_template: str, + time_window: str | None, + options: dict[str, Any] | None, +) -> None: + """Create a new prometheus query preset.""" + with Session() as session: + try: + result = session.PrometheusQueryPreset.create( + name, + metric_name, + query_template, + time_window=time_window, + options=options, + ) + print(f"Created preset: {result['id']}") + print_done("Done.") + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) + + +@prometheus_query_preset.command() +@pass_ctx_obj +@click.argument("preset_id", type=str) +@click.option("--name", type=str, default=None, help="New preset name.") +@click.option("--metric-name", type=str, default=None, help="New Prometheus metric name.") +@click.option("--query-template", type=str, default=None, help="New PromQL template.") +@click.option("--time-window", type=str, default=None, help="New default time window.") +@click.option( + "--options", + type=JSONParamType(), + default=None, + help="New preset options JSON.", +) +def modify( + ctx: CLIContext, + preset_id: str, + name: str | None, + metric_name: str | None, + query_template: str | None, + time_window: str | None, + options: dict[str, Any] | None, +) -> None: + """Modify an existing prometheus query preset.""" + with Session() as session: + try: + result = session.PrometheusQueryPreset.modify( + UUID(preset_id), + name=name, + metric_name=metric_name, + query_template=query_template, + time_window=time_window, + options=options, + ) + print(f"Modified preset: {result['id']}") + print_done("Done.") + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) + + +@prometheus_query_preset.command() +@pass_ctx_obj +@click.argument("preset_id", type=str) +@click.confirmation_option(prompt="Are you sure you want to delete this preset?") +def delete(ctx: CLIContext, preset_id: str) -> None: + """Delete a prometheus query preset.""" + with Session() as session: + try: + _result = session.PrometheusQueryPreset.delete(UUID(preset_id)) + print(f"Deleted preset: {preset_id}") + print_done("Done.") + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) diff --git a/src/ai/backend/client/cli/prometheus_query_preset/__init__.py b/src/ai/backend/client/cli/prometheus_query_preset/__init__.py new file mode 100644 index 00000000000..0d5df7d3c2c --- /dev/null +++ b/src/ai/backend/client/cli/prometheus_query_preset/__init__.py @@ -0,0 +1,11 @@ +"""Prometheus Query Preset CLI package.""" + +import click + + +@click.group() +def prometheus_query_preset() -> None: + """Prometheus query preset operations.""" + + +from . import commands # noqa diff --git a/src/ai/backend/client/cli/prometheus_query_preset/commands.py b/src/ai/backend/client/cli/prometheus_query_preset/commands.py new file mode 100644 index 00000000000..888f38820c5 --- /dev/null +++ b/src/ai/backend/client/cli/prometheus_query_preset/commands.py @@ -0,0 +1,76 @@ +"""Execute command for prometheus query presets.""" + +from __future__ import annotations + +import json +import sys +from uuid import UUID + +import click + +from ai.backend.cli.types import ExitCode +from ai.backend.client.cli.extensions import pass_ctx_obj +from ai.backend.client.cli.types import CLIContext +from ai.backend.client.session import Session + +from . import prometheus_query_preset + + +@prometheus_query_preset.command() +@pass_ctx_obj +@click.argument("preset_id", type=str) +@click.option("--start", type=str, required=True, help="Start time (ISO8601).") +@click.option("--end", type=str, required=True, help="End time (ISO8601).") +@click.option("--step", type=str, required=True, help="Step duration (e.g. 60s).") +@click.option( + "--label", + "labels", + multiple=True, + type=str, + help="Label filter in key=value format (repeatable).", +) +@click.option( + "--group-labels", + type=str, + default=None, + help="Comma-separated group labels.", +) +@click.option("--window", type=str, default=None, help="Time window override.") +def execute( + ctx: CLIContext, + preset_id: str, + start: str, + end: str, + step: str, + labels: tuple[str, ...], + group_labels: str | None, + window: str | None, +) -> None: + """Execute a prometheus query preset.""" + with Session() as session: + try: + label_entries: list[dict[str, str]] = [] + for label in labels: + if "=" not in label: + print(f"Invalid label format: {label} (expected key=value)", file=sys.stderr) + sys.exit(ExitCode.INVALID_ARGUMENT) + key, value = label.split("=", 1) + label_entries.append({"key": key, "value": value}) + + group_labels_list: list[str] | None = None + if group_labels is not None: + group_labels_list = [gl.strip() for gl in group_labels.split(",") if gl.strip()] + + response = session.PrometheusQueryPreset.execute( + UUID(preset_id), + start=start, + end=end, + step=step, + labels=label_entries if label_entries else None, + group_labels=group_labels_list, + window=window, + ) + print(json.dumps(response.model_dump(mode="json"), indent=2, default=str)) + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) diff --git a/src/ai/backend/client/func/prometheus_query_preset.py b/src/ai/backend/client/func/prometheus_query_preset.py new file mode 100644 index 00000000000..dbee01aff94 --- /dev/null +++ b/src/ai/backend/client/func/prometheus_query_preset.py @@ -0,0 +1,135 @@ +"""Client SDK functions for prometheus query preset system.""" + +from __future__ import annotations + +from typing import Any +from uuid import UUID + +from ai.backend.client.request import Request +from ai.backend.common.dto.clients.prometheus.request import QueryTimeRange +from ai.backend.common.dto.clients.prometheus.response import PrometheusQueryRangeResponse + +from .base import BaseFunction, api_function + +__all__ = ("PrometheusQueryPreset",) + + +class PrometheusQueryPreset(BaseFunction): + """ + Provides functions to interact with prometheus query presets. + Admin CRUD requires superadmin privileges. + Execute is available to all authenticated users. + """ + + @api_function + @classmethod + async def create( + cls, + name: str, + metric_name: str, + query_template: str, + *, + time_window: str | None = None, + options: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Create a new prometheus query preset.""" + body: dict[str, Any] = { + "name": name, + "metric_name": metric_name, + "query_template": query_template, + } + if time_window is not None: + body["time_window"] = time_window + if options is not None: + body["options"] = options + rqst = Request("POST", "/resource/prometheus-query-presets") + rqst.set_json(body) + async with rqst.fetch() as resp: + data: dict[str, Any] = await resp.json() + return data + + @api_function + @classmethod + async def list_presets(cls) -> list[dict[str, Any]]: + """List all prometheus query presets.""" + rqst = Request("GET", "/resource/prometheus-query-presets") + async with rqst.fetch() as resp: + data: list[dict[str, Any]] = await resp.json() + return data + + @api_function + @classmethod + async def get(cls, preset_id: UUID) -> dict[str, Any]: + """Get a prometheus query preset by ID.""" + rqst = Request("GET", f"/resource/prometheus-query-presets/{preset_id}") + async with rqst.fetch() as resp: + data: dict[str, Any] = await resp.json() + return data + + @api_function + @classmethod + async def modify( + cls, + preset_id: UUID, + *, + name: str | None = None, + metric_name: str | None = None, + query_template: str | None = None, + time_window: str | None = None, + options: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Modify an existing prometheus query preset.""" + body: dict[str, Any] = {} + if name is not None: + body["name"] = name + if metric_name is not None: + body["metric_name"] = metric_name + if query_template is not None: + body["query_template"] = query_template + if time_window is not None: + body["time_window"] = time_window + if options is not None: + body["options"] = options + rqst = Request("PATCH", f"/resource/prometheus-query-presets/{preset_id}") + rqst.set_json(body) + async with rqst.fetch() as resp: + data: dict[str, Any] = await resp.json() + return data + + @api_function + @classmethod + async def delete(cls, preset_id: UUID) -> dict[str, Any]: + """Delete a prometheus query preset.""" + rqst = Request("DELETE", f"/resource/prometheus-query-presets/{preset_id}") + async with rqst.fetch() as resp: + data: dict[str, Any] = await resp.json() + return data + + @api_function + @classmethod + async def execute( + cls, + preset_id: UUID, + *, + start: str, + end: str, + step: str, + labels: list[dict[str, str]] | None = None, + group_labels: list[str] | None = None, + window: str | None = None, + ) -> PrometheusQueryRangeResponse: + """Execute a prometheus query preset.""" + body: dict[str, Any] = { + "time_range": QueryTimeRange(start=start, end=end, step=step).model_dump(mode="json"), + } + if labels is not None: + body["labels"] = labels + if group_labels is not None: + body["group_labels"] = group_labels + if window is not None: + body["window"] = window + rqst = Request("POST", f"/resource/prometheus-query-presets/{preset_id}/execute") + rqst.set_json(body) + async with rqst.fetch() as resp: + data = await resp.json() + return PrometheusQueryRangeResponse.model_validate(data) diff --git a/src/ai/backend/client/session.py b/src/ai/backend/client/session.py index 4961bd5216e..379ba9a2e51 100644 --- a/src/ai/backend/client/session.py +++ b/src/ai/backend/client/session.py @@ -255,6 +255,7 @@ class BaseSession(metaclass=abc.ABCMeta): "Network", "Notification", "Permission", + "PrometheusQueryPreset", "QuotaScope", "Resource", "ResourceUsage", @@ -315,6 +316,7 @@ def __init__( from .func.model import Model from .func.network import Network from .func.notification import Notification + from .func.prometheus_query_preset import PrometheusQueryPreset from .func.quota_scope import QuotaScope from .func.resource import Resource from .func.resource_usage import ResourceUsage @@ -356,6 +358,7 @@ def __init__( self.Dotfile = Dotfile self.ServerLog = ServerLog self.Permission = Permission + self.PrometheusQueryPreset = PrometheusQueryPreset self.Service = Service self.ServiceAutoScalingRule = ServiceAutoScalingRule self.Model = Model From 6dc086066d71f6aa7b787685c810e9a8b2ef1935 Mon Sep 17 00:00:00 2001 From: BoKeum Date: Thu, 5 Mar 2026 10:22:57 +0900 Subject: [PATCH 2/2] changelog: add news fragment for PR #9641 --- changes/9641.feature.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/9641.feature.md diff --git a/changes/9641.feature.md b/changes/9641.feature.md new file mode 100644 index 00000000000..9cb443397f2 --- /dev/null +++ b/changes/9641.feature.md @@ -0,0 +1 @@ +Add CLI commands for prometheus query preset admin CRUD and execution