Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions src/test_mcp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
"""
test-mcp: Comprehensive testing framework for MCP (Model Context Protocol) servers

A sophisticated testing framework that combines AI agents with MCP server connectivity
for automated testing and CI/CD integration.
"""
"""test-mcp: Comprehensive testing framework for MCP servers"""

__version__ = "0.1.0-beta.5"
__author__ = "MCP Testing Suite"
__email__ = "antoni@golf.dev"

__all__ = [
"__version__",
]
__all__ = ["__version__"]
4 changes: 2 additions & 2 deletions src/test_mcp/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,14 +194,14 @@ async def _make_api_call_with_retry(self, api_params: dict) -> Any:
total_tokens = (
response.usage.input_tokens + response.usage.output_tokens
)
self.rate_limiter.record_token_usage(correlation_id, total_tokens)
await self.rate_limiter.record_token_usage(correlation_id, total_tokens)

return response

except Exception as e:
# Clean up pending request on error
if self.rate_limiter and correlation_id:
self.rate_limiter.cleanup_pending_request(correlation_id)
await self.rate_limiter.cleanup_pending_request(correlation_id)

# Check if this is the last attempt
if attempt == max_retries:
Expand Down
64 changes: 28 additions & 36 deletions src/test_mcp/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def handle_bad_parameter(self, error: click.BadParameter) -> None:
else:
self.console.print_error(f"Invalid {param_name}: {error_msg}")

_handle_command_completion(self.start_time, exit_code=1)
handle_command_completion(self.start_time, exit_code=1)
sys.exit(1)

def handle_usage_error(self, error: click.UsageError) -> None:
Expand Down Expand Up @@ -107,7 +107,7 @@ def handle_usage_error(self, error: click.UsageError) -> None:
else:
self.console.print_error(str(error))

_handle_command_completion(self.start_time, exit_code=1)
handle_command_completion(self.start_time, exit_code=1)
sys.exit(1)

def handle_system_exit(self, error: SystemExit) -> None:
Expand All @@ -120,7 +120,7 @@ def handle_system_exit(self, error: SystemExit) -> None:
exit_code = int(exit_code)
except ValueError:
exit_code = 1
_handle_command_completion(self.start_time, exit_code=exit_code)
handle_command_completion(self.start_time, exit_code=exit_code)
raise


Expand Down Expand Up @@ -171,6 +171,27 @@ def show_help(ctx, param, value):
ctx.exit()


def handle_command_completion(start_time: float, exit_code: int) -> None:
"""Track command completion and show suggestions"""
try:
# Track command for analytics
duration_ms = (time.time() - start_time) * 1000
command_name = " ".join(sys.argv) if sys.argv else "mcp-t"

command_tracker = get_command_tracker()
command_tracker.record_command(command_name, exit_code, duration_ms)

# Show suggestions for all commands (not just failures)
# Skip for help commands and version commands
if not any(flag in sys.argv for flag in ["--help", "-h", "--version"]):
ctx = click.get_current_context(silent=True)
if ctx and hasattr(ctx, "obj") and ctx.obj:
trigger_post_command_hooks(ctx)
except Exception as e:
console = get_console()
console.print_error(f"Unexpected error: {e!s}")


@click.group(
invoke_without_command=True,
name="mcp-t",
Expand Down Expand Up @@ -216,54 +237,25 @@ def mcpt_main() -> None:

try:
mcpt_cli(standalone_mode=False)
_handle_command_completion(start_time, exit_code=0)
handle_command_completion(start_time, exit_code=0)
except click.BadParameter as e:
error_handler.handle_bad_parameter(e)
except click.UsageError as e:
error_handler.handle_usage_error(e)
except SystemExit as e:
error_handler.handle_system_exit(e)
except click.Abort:
# Handle user interruption (Ctrl+C) - Click converts KeyboardInterrupt to Abort
except (click.Abort, KeyboardInterrupt):
console = get_console()
console.print("\n[dim]Operation cancelled by user[/dim]")
_handle_command_completion(start_time, exit_code=130)
sys.exit(130)
except KeyboardInterrupt:
# Handle user interruption (fallback, though Click usually catches this first)
console = get_console()
console.print("\n[dim]Operation cancelled by user[/dim]")
_handle_command_completion(start_time, exit_code=130)
handle_command_completion(start_time, exit_code=130)
sys.exit(130)
except Exception as e:
# Only for truly unexpected errors
_handle_command_completion(start_time, exit_code=1)
handle_command_completion(start_time, exit_code=1)
console = get_console()
console.print(f"[red]Unexpected error: {e}[/red]")
raise


def _handle_command_completion(start_time: float, exit_code: int) -> None:
"""Track command completion and show suggestions"""
try:
# Track command for analytics
duration_ms = (time.time() - start_time) * 1000
command_name = " ".join(sys.argv) if sys.argv else "mcp-t"

command_tracker = get_command_tracker()
command_tracker.record_command(command_name, exit_code, duration_ms)

# Show suggestions for all commands (not just failures)
# Skip for help commands and version commands
if not any(flag in sys.argv for flag in ["--help", "-h", "--version"]):
ctx = click.get_current_context(silent=True)
if ctx and hasattr(ctx, "obj") and ctx.obj:
trigger_post_command_hooks(ctx)
except Exception:
# Silent failure - don't break CLI for tracking/suggestion issues
pass


# Register all commands from modules
mcpt_cli.add_command(create_run_command())
mcpt_cli.add_command(create_generate_command())
Expand Down
10 changes: 1 addition & 9 deletions src/test_mcp/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,19 @@
"""

import os
import warnings

# API Keys for Local Testing
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Validate required API keys
if not ANTHROPIC_API_KEY:
import warnings

warnings.warn(
"ANTHROPIC_API_KEY environment variable is not set. Agent functionality will be limited.",
stacklevel=2,
)

if not OPENAI_API_KEY:
import warnings

warnings.warn(
"OPENAI_API_KEY environment variable is not set. Judge and user simulator functionality will be limited.",
stacklevel=2,
)

# Task Configuration
MAX_RESULT_SIZE_MB = int(os.getenv("MAX_TASK_RESULT_SIZE_MB", "50"))
3 changes: 0 additions & 3 deletions src/test_mcp/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,3 @@
A comprehensive testing framework for MCP (Model Context Protocol) servers with AI agents.
Supports both single-response testing and multi-turn conversation testing.
"""

__version__ = "0.2.0"
__all__ = ["__version__"]
1 change: 0 additions & 1 deletion src/test_mcp/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
# Golf Test Utils Package
40 changes: 29 additions & 11 deletions src/test_mcp/utils/performance_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,29 @@ class TestExecutionMetrics:
test_id: str
start_time: float
end_time: float | None = None
duration: float | None = None # Duration in seconds (optional for incomplete tests)
turns_completed: int = 0
api_calls_made: int = 0
# tokens_consumed removed - unreliable estimation
success: bool = False
error_message: str | None = None

@property
def duration(self) -> float | None:
"""Calculate duration from start and end times"""
if self.end_time is None:
return None
return self.end_time - self.start_time

def __post_init__(self):
"""Validate metrics after initialization"""
if self.end_time is not None and self.end_time < self.start_time:
raise ValueError(
f"end_time ({self.end_time}) cannot be before start_time ({self.start_time})"
)
if self.api_calls_made < 0:
raise ValueError("api_calls_made cannot be negative")
if self.turns_completed < 0:
raise ValueError("turns_completed cannot be negative")


@dataclass
class SuiteExecutionMetrics:
Expand All @@ -25,27 +41,29 @@ class SuiteExecutionMetrics:
start_time: float
test_metrics: list[TestExecutionMetrics] = field(default_factory=list)
parallelism_used: int = 1
total_duration: float | None = (
None # Total duration in seconds (optional until completion)
)
total_duration: float | None = None

def get_summary_stats(self) -> dict[str, str | int | float]:
def get_summary_stats(self) -> dict[str, str | int | float | None]:
"""Generate summary statistics for the test suite"""
completed_tests = [t for t in self.test_metrics if t.duration is not None]

if not completed_tests:
return {"status": "no_completed_tests"}

durations = [t.duration for t in completed_tests if t.duration is not None]
# Duration is guaranteed to be not None for completed_tests
durations = [t.duration for t in completed_tests]

return {
"total_tests": len(self.test_metrics),
"completed_tests": len(completed_tests),
"success_rate": len([t for t in completed_tests if t.success])
/ len(completed_tests),
"average_duration": statistics.mean(durations), # Duration in seconds
"median_duration": statistics.median(durations), # Duration in seconds
"average_duration": statistics.mean(durations),
"median_duration": statistics.median(durations),
"total_api_calls": sum(t.api_calls_made for t in completed_tests),
# Token consumption removed for simplicity
"parallelism_efficiency": len(completed_tests) / (self.total_duration or 1),
"parallelism_efficiency": (
len(completed_tests) / self.total_duration
if self.total_duration and self.total_duration > 0
else None
),
}
Loading
Loading