From 97b13c7b9e04beef4a1043fe4ae31fc6cc36ff2c Mon Sep 17 00:00:00 2001 From: Ravi Shankar Date: Thu, 5 Mar 2026 10:54:18 -0800 Subject: [PATCH] test(prof): Profiler handlers for debugging support Signed-off-by: Ravi Shankar --- cmd/node-observer/main.go | 10 ++++++++ pkg/config/config.go | 1 + pkg/node_observer/config.go | 4 ++++ pkg/node_observer/profiler.go | 43 +++++++++++++++++++++++++++++++++++ pkg/server/http_server.go | 11 +++++++++ 5 files changed, 69 insertions(+) create mode 100644 pkg/node_observer/profiler.go diff --git a/cmd/node-observer/main.go b/cmd/node-observer/main.go index 3a22cd1c..71358ab6 100644 --- a/cmd/node-observer/main.go +++ b/cmd/node-observer/main.go @@ -83,5 +83,15 @@ func mainInternal(c string) error { // Controller g.Add(controller.Start, controller.Stop) + //Register profiling endpoinds if enabled + //if cfg.EnableProfiling { + profilingPort := cfg.ProfilingPort + if profilingPort == 0 { + profilingPort = node_observer.DefaultProfilingPort + } + profilingServer := node_observer.NewProfilingServer(profilingPort) + g.Add(profilingServer.Start, profilingServer.Stop) + //} + return g.Run() } diff --git a/pkg/config/config.go b/pkg/config/config.go index f6086b7a..4db48287 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -38,6 +38,7 @@ type Config struct { CredsPath *string `yaml:"credentialsPath,omitempty"` FwdSvcURL *string `yaml:"forwardServiceUrl,omitempty"` Env map[string]string `yaml:"env"` + EnableProfiling bool `yaml:"enableProfiling,omitempty"` // derived Credentials map[string]any diff --git a/pkg/node_observer/config.go b/pkg/node_observer/config.go index a0999367..a5b2612e 100644 --- a/pkg/node_observer/config.go +++ b/pkg/node_observer/config.go @@ -26,12 +26,16 @@ import ( "github.com/NVIDIA/topograph/pkg/topology" ) +const DefaultProfilingPort = 49021 + type Config struct { GenerateTopologyURL string `yaml:"generateTopologyUrl"` Trigger Trigger `yaml:"trigger"` Provider topology.Provider `yaml:"provider"` Engine topology.Engine `yaml:"engine"` Params map[string]any `yaml:"params"` + EnableProfiling bool `yaml:"enableProfiling,omitempty"` + ProfilingPort int `yaml:"profilingPort,omitempty"` } type Trigger struct { diff --git a/pkg/node_observer/profiler.go b/pkg/node_observer/profiler.go new file mode 100644 index 00000000..c04fd362 --- /dev/null +++ b/pkg/node_observer/profiler.go @@ -0,0 +1,43 @@ +package node_observer + +import ( + "fmt" + "net" + "net/http" + _ "net/http/pprof" + + "k8s.io/klog/v2" +) + +type Profiler struct { + listener net.Listener +} + +func NewProfilingServer(port int) *Profiler { + // Listen on the specified port for pprof profiling + addr := net.JoinHostPort("localhost", fmt.Sprintf("%d", port)) + listener, err := net.Listen("tcp", addr) + if err != nil { + klog.Fatalf("Failed to start profiling server on %s: %v", addr, err) + } + + return &Profiler{ + listener: listener, + } +} + +func (c *Profiler) Start() error { + // Start the pprof server + err := http.Serve(c.listener, nil) // DefaultServeMux will handle pprof endpoints + if err != nil { + klog.Errorf("Failed to start pprof server: %v", err) + return err + } + klog.Infof("Pprofiler server started on %s", c.listener.Addr().String()) + return nil +} + +func (c *Profiler) Stop(err error) { + klog.Infof("Stopping Pprofiler server: %v", err) + c.listener.Close() +} diff --git a/pkg/server/http_server.go b/pkg/server/http_server.go index 795d1840..e426a35a 100644 --- a/pkg/server/http_server.go +++ b/pkg/server/http_server.go @@ -22,6 +22,7 @@ import ( "io" "net" "net/http" + "net/http/pprof" "time" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -99,6 +100,16 @@ func initHttpServer(ctx context.Context, cfg *config.Config) *HttpServer { mux.HandleFunc("/healthz", healthz) mux.Handle("/metrics", promhttp.Handler()) + // Register pprof handlers for performance profiling + //if cfg.EnableProfiling { + klog.Infof("Enabling pprof profiling endpoints") + mux.HandleFunc("/debug/pprof/", pprof.Index) + mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + mux.HandleFunc("/debug/pprof/profile", pprof.Profile) + mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + //} + return &HttpServer{ ctx: ctx, cfg: cfg,