Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions benchmarker/cmd/ann_benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,12 @@ var annBenchmarkCommand = &cobra.Command{

importTime := 0 * time.Second

if cfg.CreateOnly {
createSchema(&cfg, client)
log.Printf("Created empty collection %s, exiting", cfg.ClassName)
return
}

if !cfg.QueryOnly {

if !cfg.ExistingSchema {
Expand Down Expand Up @@ -994,6 +1000,16 @@ var annBenchmarkCommand = &cobra.Command{
}

neighbors := dataset.Neighbors()
if cfg.Diversity && cfg.DiversityBalance != 1 {
if hdf5ds, ok := dataset.(*Hdf5Dataset); ok {
sparseNeighbors := hdf5ds.SparseNeighbors()
if sparseNeighbors == nil {
log.Warn("--diversity flag is set but sparse_neighbors dataset not found in the HDF5 file, falling back to neighbors")
} else {
neighbors = sparseNeighbors
}
}
}
testData := dataset.TestVectors()
testFilters := dataset.TestFilters()

Expand Down Expand Up @@ -1113,6 +1129,8 @@ func initAnnBenchmark() {
"parallel", "p", numCPU, "Set the number of parallel threads which send queries")
annBenchmarkCommand.PersistentFlags().BoolVar(&globalConfig.ExistingSchema,
"existingSchema", false, "Leave the schema as-is (default false)")
annBenchmarkCommand.PersistentFlags().BoolVar(&globalConfig.CreateOnly,
"createOnly", false, "Only create the collection schema without importing any data (default false)")
annBenchmarkCommand.PersistentFlags().IntVar(&globalConfig.NumTenants,
"numTenants", 0, "Number of tenants to use (default 0)")
annBenchmarkCommand.PersistentFlags().IntVar(&globalConfig.StartTenantNum,
Expand Down Expand Up @@ -1173,6 +1191,12 @@ func initAnnBenchmark() {
"replicas", 4, "Number of replicas for HFresh index (default 4)")
annBenchmarkCommand.PersistentFlags().Float64Var(&globalConfig.RngFactor,
"rngFactor", 10.0, "RNG factor for HFresh index (default 10.0)")
annBenchmarkCommand.PersistentFlags().BoolVar(&globalConfig.Diversity,
"diversity", false, "Enable MMR diversity selection (default false)")
annBenchmarkCommand.PersistentFlags().Float32Var(&globalConfig.DiversityBalance,
"diversityBalance", 0.5, "MMR balance parameter: 0=pure diversity, 1=pure relevance (default 0.5)")
annBenchmarkCommand.PersistentFlags().Float32Var(&globalConfig.DiversityLimit,
"diversityLimit", 10, "MMR candidate limit passed to the selector (default 10)")
}

func benchmarkANN(cfg Config, queries Queries, neighbors Neighbors, filters []int) Results {
Expand Down
16 changes: 11 additions & 5 deletions benchmarker/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ type Config struct {
MaxPostingSizeKB int
Replicas int
RngFactor float64
CreateOnly bool
Diversity bool
DiversityBalance float32
DiversityLimit float32
}

func (c *Config) Validate() error {
Expand Down Expand Up @@ -172,12 +176,14 @@ func (c *Config) parseLabels() {
}

func (c Config) validateANN() error {
if c.BenchmarkFile == "" && c.DatasetRepo == "" {
return errors.Errorf("a vector benchmark file or a dataset repository and dataset must be provided")
}
if !c.CreateOnly {
if c.BenchmarkFile == "" && c.DatasetRepo == "" {
return errors.Errorf("a vector benchmark file or a dataset repository and dataset must be provided")
}

if c.BenchmarkFile == "" && !(c.DatasetRepo != "" && c.Dataset != "") {
return errors.Errorf("if a vector benchmark file is not provided both a dataset repo and a dataset must be provided")
if c.BenchmarkFile == "" && !(c.DatasetRepo != "" && c.Dataset != "") {
return errors.Errorf("if a vector benchmark file is not provided both a dataset repo and a dataset must be provided")
}
}

if c.API != "grpc" {
Expand Down
9 changes: 9 additions & 0 deletions benchmarker/cmd/hdf5_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ func (ds *Hdf5Dataset) Neighbors() [][]int {
return loadHdf5Neighbors(ds.file, "neighbors")
}

func (ds *Hdf5Dataset) SparseNeighbors() [][]int {
dataset, err := ds.file.OpenDataset("sparse_neighbors")
if err != nil {
return nil
}
dataset.Close()
return loadHdf5Neighbors(ds.file, "sparse_neighbors")
}

func (ds *Hdf5Dataset) TestVectors() [][]float32 {
if ds.multiVectorDimension > 0 {
return loadHdf5Colbert(ds.file, "test", ds.multiVectorDimension)
Expand Down
12 changes: 12 additions & 0 deletions benchmarker/cmd/random_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,19 @@ func nearVectorQueryGrpc(cfg *Config, vec []float32, tenant string, filter int)
On: []string{"category"},
Operator: weaviategrpc.Filters_OPERATOR_EQUAL,
}
}

if cfg.Diversity {
diversityLimit := cfg.DiversityLimit
diversityBalance := cfg.DiversityBalance
searchRequest.NearVector.Selection = &weaviategrpc.Selection{
Selection: &weaviategrpc.Selection_Mmr{
Mmr: &weaviategrpc.Selection_MMR{
Limit: &diversityLimit,
Balance: &diversityBalance,
},
},
}
}

data, err := proto.Marshal(searchRequest)
Expand Down
66 changes: 38 additions & 28 deletions benchmarker/go.mod
Original file line number Diff line number Diff line change
@@ -1,60 +1,70 @@
module github.com/semi-technologies/weaviate-benchmarking/benchmarker

go 1.24

toolchain go1.24.0
go 1.26

require (
github.com/google/uuid v1.6.0
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/pkg/errors v0.9.1
github.com/prometheus/common v0.65.0
github.com/sirupsen/logrus v1.9.3
github.com/sirupsen/logrus v1.9.4
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.10.0
github.com/stretchr/testify v1.11.1
github.com/weaviate/hdf5 v0.0.0-20230911114900-3cd888ffadcd
github.com/weaviate/weaviate v1.34.0-rc.0.0.20251022050050-d6dd7edf23f3
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we wait for 1.37.0 is out?

github.com/weaviate/weaviate-go-client/v4 v4.16.2-0.20250127073049-5b267cd41195
golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac
google.golang.org/grpc v1.74.2
google.golang.org/protobuf v1.36.6
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6
google.golang.org/grpc v1.79.3
google.golang.org/protobuf v1.36.11
)

require (
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/go-openapi/analysis v0.23.0 // indirect
github.com/go-openapi/errors v0.22.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/loads v0.22.0 // indirect
github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/strfmt v0.23.0 // indirect
github.com/go-openapi/analysis v0.24.1 // indirect
github.com/go-openapi/errors v0.22.4 // indirect
github.com/go-openapi/jsonpointer v0.22.4 // indirect
github.com/go-openapi/jsonreference v0.21.4 // indirect
github.com/go-openapi/loads v0.23.2 // indirect
github.com/go-openapi/spec v0.22.3 // indirect
github.com/go-openapi/strfmt v0.25.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-openapi/validate v0.24.0 // indirect
github.com/gofrs/flock v0.12.1 // indirect
github.com/gomlx/go-huggingface v0.2.2 // indirect
github.com/go-openapi/swag/conv v0.25.4 // indirect
github.com/go-openapi/swag/fileutils v0.25.1 // indirect
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
github.com/go-openapi/swag/loading v0.25.4 // indirect
github.com/go-openapi/swag/mangling v0.25.1 // indirect
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
github.com/go-openapi/validate v0.25.1 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/gofrs/flock v0.13.0 // indirect
github.com/gomlx/go-huggingface v0.3.1 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/parquet-go/parquet-go v0.25.1 // indirect
github.com/parquet-go/bitpack v1.0.0 // indirect
github.com/parquet-go/jsonlite v1.0.0 // indirect
github.com/parquet-go/parquet-go v0.27.0 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/spf13/pflag v1.0.6 // indirect
go.mongodb.org/mongo-driver v1.14.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/sys v0.34.0 // indirect
golang.org/x/text v0.27.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect
github.com/twpayne/go-geom v1.6.1 // indirect
go.mongodb.org/mongo-driver v1.17.6 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/net v0.49.0 // indirect
golang.org/x/oauth2 v0.35.0 // indirect
golang.org/x/sys v0.40.0 // indirect
golang.org/x/text v0.33.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading
Loading