golf-mcp · dsonyy · Nov 2, 2025 · Nov 2, 2025 · Nov 2, 2025
diff --git a/src/test_mcp/__init__.py b/src/test_mcp/__init__.py
@@ -1,14 +1,5 @@
-"""
-test-mcp: Comprehensive testing framework for MCP (Model Context Protocol) servers
-
-A sophisticated testing framework that combines AI agents with MCP server connectivity
-for automated testing and CI/CD integration.
-"""
+"""test-mcp: Comprehensive testing framework for MCP servers"""
 
 __version__ = "0.1.0-beta.5"
-__author__ = "MCP Testing Suite"
-__email__ = "antoni@golf.dev"
 
-__all__ = [
-    "__version__",
-]
+__all__ = ["__version__"]
diff --git a/src/test_mcp/agent/agent.py b/src/test_mcp/agent/agent.py
@@ -194,14 +194,14 @@ async def _make_api_call_with_retry(self, api_params: dict) -> Any:
                     total_tokens = (
                         response.usage.input_tokens + response.usage.output_tokens
                     )
-                    self.rate_limiter.record_token_usage(correlation_id, total_tokens)
+                    await self.rate_limiter.record_token_usage(correlation_id, total_tokens)
 
                 return response
 
             except Exception as e:
                 # Clean up pending request on error
                 if self.rate_limiter and correlation_id:
-                    self.rate_limiter.cleanup_pending_request(correlation_id)
+                    await self.rate_limiter.cleanup_pending_request(correlation_id)
 
                 # Check if this is the last attempt
                 if attempt == max_retries:

diff --git a/src/test_mcp/cli/main.py b/src/test_mcp/cli/main.py
@@ -59,7 +59,7 @@ def handle_bad_parameter(self, error: click.BadParameter) -> None:
         else:
             self.console.print_error(f"Invalid {param_name}: {error_msg}")
 
-        _handle_command_completion(self.start_time, exit_code=1)
+        handle_command_completion(self.start_time, exit_code=1)
         sys.exit(1)
 
     def handle_usage_error(self, error: click.UsageError) -> None:
@@ -107,7 +107,7 @@ def handle_usage_error(self, error: click.UsageError) -> None:
         else:
             self.console.print_error(str(error))
 
-        _handle_command_completion(self.start_time, exit_code=1)
+        handle_command_completion(self.start_time, exit_code=1)
         sys.exit(1)
 
     def handle_system_exit(self, error: SystemExit) -> None:
@@ -120,7 +120,7 @@ def handle_system_exit(self, error: SystemExit) -> None:
                 exit_code = int(exit_code)
             except ValueError:
                 exit_code = 1
-        _handle_command_completion(self.start_time, exit_code=exit_code)
+        handle_command_completion(self.start_time, exit_code=exit_code)
         raise
 
 
@@ -171,6 +171,27 @@ def show_help(ctx, param, value):
                 ctx.exit()
 
 
+def handle_command_completion(start_time: float, exit_code: int) -> None:
+    """Track command completion and show suggestions"""
+    try:
+        # Track command for analytics
+        duration_ms = (time.time() - start_time) * 1000
+        command_name = " ".join(sys.argv) if sys.argv else "mcp-t"
+
+        command_tracker = get_command_tracker()
+        command_tracker.record_command(command_name, exit_code, duration_ms)
+
+        # Show suggestions for all commands (not just failures)
+        # Skip for help commands and version commands
+        if not any(flag in sys.argv for flag in ["--help", "-h", "--version"]):
+            ctx = click.get_current_context(silent=True)
+            if ctx and hasattr(ctx, "obj") and ctx.obj:
+                trigger_post_command_hooks(ctx)
+    except Exception as e:
+        console = get_console()
+        console.print_error(f"Unexpected error: {e!s}")
+
+
 @click.group(
     invoke_without_command=True,
     name="mcp-t",
@@ -216,54 +237,25 @@ def mcpt_main() -> None:
 
     try:
         mcpt_cli(standalone_mode=False)
-        _handle_command_completion(start_time, exit_code=0)
+        handle_command_completion(start_time, exit_code=0)
     except click.BadParameter as e:
         error_handler.handle_bad_parameter(e)
     except click.UsageError as e:
         error_handler.handle_usage_error(e)
     except SystemExit as e:
         error_handler.handle_system_exit(e)
-    except click.Abort:
-        # Handle user interruption (Ctrl+C) - Click converts KeyboardInterrupt to Abort
+    except (click.Abort, KeyboardInterrupt):
         console = get_console()
         console.print("\n[dim]Operation cancelled by user[/dim]")
-        _handle_command_completion(start_time, exit_code=130)
-        sys.exit(130)
-    except KeyboardInterrupt:
-        # Handle user interruption (fallback, though Click usually catches this first)
-        console = get_console()
-        console.print("\n[dim]Operation cancelled by user[/dim]")
-        _handle_command_completion(start_time, exit_code=130)
+        handle_command_completion(start_time, exit_code=130)
         sys.exit(130)
     except Exception as e:
-        # Only for truly unexpected errors
-        _handle_command_completion(start_time, exit_code=1)
+        handle_command_completion(start_time, exit_code=1)
         console = get_console()
         console.print(f"[red]Unexpected error: {e}[/red]")
         raise
 
 
-def _handle_command_completion(start_time: float, exit_code: int) -> None:
-    """Track command completion and show suggestions"""
-    try:
-        # Track command for analytics
-        duration_ms = (time.time() - start_time) * 1000
-        command_name = " ".join(sys.argv) if sys.argv else "mcp-t"
-
-        command_tracker = get_command_tracker()
-        command_tracker.record_command(command_name, exit_code, duration_ms)
-
-        # Show suggestions for all commands (not just failures)
-        # Skip for help commands and version commands
-        if not any(flag in sys.argv for flag in ["--help", "-h", "--version"]):
-            ctx = click.get_current_context(silent=True)
-            if ctx and hasattr(ctx, "obj") and ctx.obj:
-                trigger_post_command_hooks(ctx)
-    except Exception:
-        # Silent failure - don't break CLI for tracking/suggestion issues
-        pass
-
-
 # Register all commands from modules
 mcpt_cli.add_command(create_run_command())
 mcpt_cli.add_command(create_generate_command())

diff --git a/src/test_mcp/config.py b/src/test_mcp/config.py
@@ -5,27 +5,19 @@
 """
 
 import os
+import warnings
 
-# API Keys for Local Testing
 ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
-# Validate required API keys
 if not ANTHROPIC_API_KEY:
-    import warnings
-
     warnings.warn(
         "ANTHROPIC_API_KEY environment variable is not set. Agent functionality will be limited.",
         stacklevel=2,
     )
 
 if not OPENAI_API_KEY:
-    import warnings
-
     warnings.warn(
         "OPENAI_API_KEY environment variable is not set. Judge and user simulator functionality will be limited.",
         stacklevel=2,
     )
-
-# Task Configuration
-MAX_RESULT_SIZE_MB = int(os.getenv("MAX_TASK_RESULT_SIZE_MB", "50"))
diff --git a/src/test_mcp/testing/__init__.py b/src/test_mcp/testing/__init__.py
@@ -4,6 +4,3 @@
 A comprehensive testing framework for MCP (Model Context Protocol) servers with AI agents.
 Supports both single-response testing and multi-turn conversation testing.
 """
-
-__version__ = "0.2.0"
-__all__ = ["__version__"]
diff --git a/src/test_mcp/utils/__init__.py b/src/test_mcp/utils/__init__.py
@@ -1 +0,0 @@
-# Golf Test Utils Package

diff --git a/src/test_mcp/utils/performance_monitor.py b/src/test_mcp/utils/performance_monitor.py
@@ -9,13 +9,29 @@ class TestExecutionMetrics:
     test_id: str
     start_time: float
     end_time: float | None = None
-    duration: float | None = None  # Duration in seconds (optional for incomplete tests)
     turns_completed: int = 0
     api_calls_made: int = 0
-    # tokens_consumed removed - unreliable estimation
     success: bool = False
     error_message: str | None = None
 
+    @property
+    def duration(self) -> float | None:
+        """Calculate duration from start and end times"""
+        if self.end_time is None:
+            return None
+        return self.end_time - self.start_time
+
+    def __post_init__(self):
+        """Validate metrics after initialization"""
+        if self.end_time is not None and self.end_time < self.start_time:
+            raise ValueError(
+                f"end_time ({self.end_time}) cannot be before start_time ({self.start_time})"
+            )
+        if self.api_calls_made < 0:
+            raise ValueError("api_calls_made cannot be negative")
+        if self.turns_completed < 0:
+            raise ValueError("turns_completed cannot be negative")
+
 
 @dataclass
 class SuiteExecutionMetrics:
@@ -25,27 +41,29 @@ class SuiteExecutionMetrics:
     start_time: float
     test_metrics: list[TestExecutionMetrics] = field(default_factory=list)
     parallelism_used: int = 1
-    total_duration: float | None = (
-        None  # Total duration in seconds (optional until completion)
-    )
+    total_duration: float | None = None
 
-    def get_summary_stats(self) -> dict[str, str | int | float]:
+    def get_summary_stats(self) -> dict[str, str | int | float | None]:
         """Generate summary statistics for the test suite"""
         completed_tests = [t for t in self.test_metrics if t.duration is not None]
 
         if not completed_tests:
             return {"status": "no_completed_tests"}
 
-        durations = [t.duration for t in completed_tests if t.duration is not None]
+        # Duration is guaranteed to be not None for completed_tests
+        durations = [t.duration for t in completed_tests]
 
         return {
             "total_tests": len(self.test_metrics),
             "completed_tests": len(completed_tests),
             "success_rate": len([t for t in completed_tests if t.success])
             / len(completed_tests),
-            "average_duration": statistics.mean(durations),  # Duration in seconds
-            "median_duration": statistics.median(durations),  # Duration in seconds
+            "average_duration": statistics.mean(durations),
+            "median_duration": statistics.median(durations),
             "total_api_calls": sum(t.api_calls_made for t in completed_tests),
-            # Token consumption removed for simplicity
-            "parallelism_efficiency": len(completed_tests) / (self.total_duration or 1),
+            "parallelism_efficiency": (
+                len(completed_tests) / self.total_duration
+                if self.total_duration and self.total_duration > 0
+                else None
+            ),
         }