Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions benchmarker/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ docker build -t weaviate-benchmarker .
## Tests

```bash
# Unit tests (no Weaviate required)
go test ./cmd/...
go test -v -run TestAnalyzer ./cmd/... # run a specific test
# Unit tests (no Weaviate required) — CGO_ENABLED=1 required for HDF5 C binding
CGO_ENABLED=1 go test ./cmd/...
CGO_ENABLED=1 go test -v -run TestAnalyzer ./cmd/... # run a specific test

# Integration tests (require Weaviate at localhost:8080 / localhost:50051)
# Integration tests (require Weaviate at localhost:8080 / localhost:50051; auto-skip if unreachable)
docker run -p 8080:8080 -p 50051:50051 semitechnologies/weaviate:latest
go test -tags integration ./cmd/...
go test -tags integration -v -run TestIntegration_RecallForExactNeighbors ./cmd/...
CGO_ENABLED=1 go test -tags integration ./cmd/...
CGO_ENABLED=1 go test -tags integration -v -run TestIntegration_RecallForExactNeighbors ./cmd/...
```

Unit tests live in `cmd/benchmark_run_test.go` and cover UUID conversions, NDCG calculation, and results analysis. Integration tests live in `cmd/integration_test.go` and exercise the full insert→query cycle against a real Weaviate instance; they skip automatically if Weaviate is not reachable.
Expand Down Expand Up @@ -100,7 +100,7 @@ All flags are defined in `cmd/config.go`. Key ones for `ann-benchmark`:
`scripts/python/` contains analysis tools:
- `memory_analysis.py` — visualize memory metrics from JSON output
- `collate-results.py` — aggregate results across multiple runs
- `ann.py` — ANN benchmark runner wrapper
- `performance-graphs.py` — generate performance comparison graphs
- `convert-parquet.py` — convert datasets to Parquet format

Install Python dependencies: `pip install -r requirements.txt`
6 changes: 6 additions & 0 deletions benchmarker/cmd/ann_benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ type ResultsJSONBenchmark struct {
HeapInuseBytes float64 `json:"heap_inuse_bytes"`
HeapSysBytes float64 `json:"heap_sys_bytes"`
Timestamp string `json:"timestamp"`
MMRBalance *float64 `json:"mmr_balance,omitempty"`
MMRLimit *int `json:"mmr_limit,omitempty"`
}

// uuidFromInt converts an integer to a UUID-formatted string.
Expand Down Expand Up @@ -301,4 +303,8 @@ func initAnnBenchmark() {
"replicas", 4, "Number of replicas for HFresh index (default 4)")
annBenchmarkCommand.PersistentFlags().Float64Var(&globalConfig.RngFactor,
"rngFactor", 10.0, "RNG factor for HFresh index (default 10.0)")
annBenchmarkCommand.PersistentFlags().Float64Var(&globalConfig.MMRBalance,
"mmrBalance", -1, "MMR balance between relevance (0) and diversity (1). Disabled if not set or negative.")
annBenchmarkCommand.PersistentFlags().IntVar(&globalConfig.MMRLimit,
"mmrLimit", 0, "Number of results to return after MMR re-ranking (default: same as --limit)")
}
11 changes: 10 additions & 1 deletion benchmarker/cmd/ann_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,16 @@ func runQueries(cfg *Config, importTime time.Duration, testData [][]float32, nei
HeapAllocBytes: memstats.HeapAllocBytes,
HeapInuseBytes: memstats.HeapInuseBytes,
HeapSysBytes: memstats.HeapSysBytes,
Timestamp: time.Now().Format(time.RFC3339),
Timestamp: time.Now().Format(time.RFC3339),
}
if cfg.MMRBalance >= 0 {
balance := cfg.MMRBalance
benchResult.MMRBalance = &balance
mmrLimit := cfg.MMRLimit
if mmrLimit == 0 {
mmrLimit = cfg.Limit
}
benchResult.MMRLimit = &mmrLimit
}
switch cfg.IndexType {
case "flat":
Expand Down
2 changes: 2 additions & 0 deletions benchmarker/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ type Config struct {
MaxPostingSizeKB int
Replicas int
RngFactor float64
MMRBalance float64
MMRLimit int
}

func (c *Config) Validate() error {
Expand Down
18 changes: 18 additions & 0 deletions benchmarker/cmd/random_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,25 @@ func nearVectorQueryGrpc(cfg *Config, vec []float32, tenant string, filter int)
On: []string{"category"},
Operator: weaviategrpc.Filters_OPERATOR_EQUAL,
}
}

if cfg.MMRBalance >= 0 {
mmrLimit := uint32(cfg.MMRLimit)
if mmrLimit == 0 {
mmrLimit = uint32(cfg.Limit)
}
balance := float32(cfg.MMRBalance)
// When using MMR, the initial candidate set should be larger than the final limit.
// The SearchRequest.Limit is used for the initial retrieval; MMR then re-ranks and
// returns mmrLimit results.
searchRequest.NearVector.Selection = &weaviategrpc.Selection{
Selection: &weaviategrpc.Selection_Mmr{
Mmr: &weaviategrpc.Selection_MMR{
Balance: &balance,
Limit: &mmrLimit,
},
},
}
}

data, err := proto.Marshal(searchRequest)
Expand Down
69 changes: 40 additions & 29 deletions benchmarker/go.mod
Original file line number Diff line number Diff line change
@@ -1,60 +1,71 @@
module github.com/semi-technologies/weaviate-benchmarking/benchmarker

go 1.24

toolchain go1.24.0
go 1.26

require (
github.com/google/uuid v1.6.0
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/pkg/errors v0.9.1
github.com/prometheus/common v0.65.0
github.com/sirupsen/logrus v1.9.3
github.com/sirupsen/logrus v1.9.4
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.10.0
github.com/stretchr/testify v1.11.1
github.com/weaviate/hdf5 v0.0.0-20230911114900-3cd888ffadcd
github.com/weaviate/weaviate v1.34.0-rc.0.0.20251022050050-d6dd7edf23f3
github.com/weaviate/weaviate v1.37.0-rc.0
github.com/weaviate/weaviate-go-client/v4 v4.16.2-0.20250127073049-5b267cd41195
golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac
google.golang.org/grpc v1.74.2
google.golang.org/protobuf v1.36.6
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6
google.golang.org/grpc v1.79.3
google.golang.org/protobuf v1.36.11
)

require (
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/go-openapi/analysis v0.23.0 // indirect
github.com/go-openapi/errors v0.22.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/loads v0.22.0 // indirect
github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/strfmt v0.23.0 // indirect
github.com/go-openapi/analysis v0.24.1 // indirect
github.com/go-openapi/errors v0.22.4 // indirect
github.com/go-openapi/jsonpointer v0.22.4 // indirect
github.com/go-openapi/jsonreference v0.21.4 // indirect
github.com/go-openapi/loads v0.23.2 // indirect
github.com/go-openapi/spec v0.22.3 // indirect
github.com/go-openapi/strfmt v0.25.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-openapi/validate v0.24.0 // indirect
github.com/gofrs/flock v0.12.1 // indirect
github.com/gomlx/go-huggingface v0.2.2 // indirect
github.com/go-openapi/swag/conv v0.25.4 // indirect
github.com/go-openapi/swag/fileutils v0.25.1 // indirect
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
github.com/go-openapi/swag/loading v0.25.4 // indirect
github.com/go-openapi/swag/mangling v0.25.1 // indirect
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
github.com/go-openapi/validate v0.25.1 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/gofrs/flock v0.13.0 // indirect
github.com/gomlx/go-huggingface v0.3.1 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/parquet-go/parquet-go v0.25.1 // indirect
github.com/parquet-go/bitpack v1.0.0 // indirect
github.com/parquet-go/jsonlite v1.0.0 // indirect
github.com/parquet-go/parquet-go v0.29.0 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/spf13/pflag v1.0.6 // indirect
go.mongodb.org/mongo-driver v1.14.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/sys v0.34.0 // indirect
golang.org/x/text v0.27.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/twpayne/go-geom v1.6.1 // indirect
go.mongodb.org/mongo-driver v1.17.6 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/net v0.50.0 // indirect
golang.org/x/oauth2 v0.35.0 // indirect
golang.org/x/sys v0.41.0 // indirect
golang.org/x/text v0.35.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading
Loading