diff --git a/libs/openant-core/openant/cli.py b/libs/openant-core/openant/cli.py index 6657d0d..d97d3a4 100644 --- a/libs/openant-core/openant/cli.py +++ b/libs/openant-core/openant/cli.py @@ -622,10 +622,9 @@ def cmd_report_data(args): and step reports — everything display-ready. """ import html as html_mod - import anthropic from core.schemas import success, error from core.step_report import step_context - from utilities.llm_client import get_global_tracker + from utilities.llm_client import AnthropicClient, get_global_tracker results_path = args.results dataset_path = args.dataset @@ -845,13 +844,9 @@ def cmd_report_data(args): {findings_text} """ print("[Report] Generating remediation guidance (LLM)...", file=sys.stderr) - client = anthropic.Anthropic() - response = client.messages.create( - model="claude-sonnet-4-20250514", - max_tokens=4096, - messages=[{"role": "user", "content": prompt}], - ) - remediation_html = response.content[0].text + # AnthropicClient handles usage tracking via the global TokenTracker. + remediation_client = AnthropicClient(model="claude-sonnet-4-20250514") + remediation_html = remediation_client.analyze_sync(prompt, max_tokens=4096) # Post-process: linkify finding references like #4, #12-#14 import re @@ -860,15 +855,11 @@ def _linkify_finding(m): return f'#{num}' remediation_html = re.sub(r'#(\d+)', _linkify_finding, remediation_html) - # Track usage - usage = response.usage - tracker = get_global_tracker() - tracker.record_call( - model="claude-sonnet-4-20250514", - input_tokens=usage.input_tokens, - output_tokens=usage.output_tokens, + last = remediation_client.get_last_call() or {} + print( + f" Remediation cost: ${last.get('cost_usd', 0.0):.4f}", + file=sys.stderr, ) - print(f" Remediation cost: ${(usage.input_tokens / 1e6) * 3.0 + (usage.output_tokens / 1e6) * 15.0:.4f}", file=sys.stderr) # --- Step reports --- step_reports_data = [] diff --git a/libs/openant-core/pyproject.toml b/libs/openant-core/pyproject.toml index 8f3d935..7eac2c4 100644 --- a/libs/openant-core/pyproject.toml +++ b/libs/openant-core/pyproject.toml @@ -5,7 +5,6 @@ description = "Two-stage SAST tool using Claude for vulnerability analysis" readme = "README.md" requires-python = ">=3.11" dependencies = [ - "anthropic>=0.40.0", "claude-agent-sdk>=0.1.48", "python-dotenv>=1.0.0", "pydantic>=2.0.0", diff --git a/libs/openant-core/utilities/context_enhancer.py b/libs/openant-core/utilities/context_enhancer.py index e52a81f..59135e0 100644 --- a/libs/openant-core/utilities/context_enhancer.py +++ b/libs/openant-core/utilities/context_enhancer.py @@ -23,8 +23,6 @@ from pathlib import Path from typing import Callable, Optional -import anthropic # Still used by shared_client below; removed in Step 5b once ContextAgent drops its client param. - from .llm_client import AnthropicClient, TokenTracker, get_global_tracker, reset_global_tracker from .agentic_enhancer import RepositoryIndex, enhance_unit_with_agent, load_index_from_file from .rate_limiter import get_rate_limiter, is_rate_limit_error, is_retryable_error diff --git a/libs/openant-core/utilities/rate_limiter.py b/libs/openant-core/utilities/rate_limiter.py index 3416f1b..c7facd1 100644 --- a/libs/openant-core/utilities/rate_limiter.py +++ b/libs/openant-core/utilities/rate_limiter.py @@ -15,10 +15,15 @@ rate_limiter = get_rate_limiter() rate_limiter.wait_if_needed() - # When catching RateLimitError - except anthropic.RateLimitError as e: - retry_after = float(e.response.headers.get("retry-after", 0)) - rate_limiter.report_rate_limit(retry_after) + # Rate-limit detection happens centrally in llm_client._run_query, which + # raises utilities.sdk_errors.RateLimitError and calls + # rate_limiter.report_rate_limit(0) on every rate-limit event. Callers + # that need to attach state before re-raising: + from utilities.sdk_errors import RateLimitError + try: + ... + except RateLimitError: + # report_rate_limit already fired in _run_query raise """