From 1013b099c51c9193765565e8bbe6d0260ee9c880 Mon Sep 17 00:00:00 2001 From: dom Date: Wed, 6 May 2026 19:46:40 +0200 Subject: [PATCH 1/2] feat: add support for convert sd cpp api --- .github/scripts/download_ci_models.py | 4 ++++ .pre-commit-config.yaml | 1 + README.md | 5 +++-- examples/system/convert/convert.go | 26 ++++++++++++++++++++++++ pkg/gosd/system.go | 29 +++++++++++++++++++++++++++ pkg/gosd/system_test.go | 23 +++++++++++++++++++++ stable_diffusion_api_not_covered | 8 -------- 7 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 examples/system/convert/convert.go diff --git a/.github/scripts/download_ci_models.py b/.github/scripts/download_ci_models.py index 33df8a0..9d8b0d3 100644 --- a/.github/scripts/download_ci_models.py +++ b/.github/scripts/download_ci_models.py @@ -22,6 +22,10 @@ "city96/umt5-xxl-encoder-gguf", "umt5-xxl-encoder-Q3_K_S.gguf", ), + "MODEL_TO_CONVERT": ( + "black-forest-labs/FLUX.2-small-decoder", + "diffusion_pytorch_model.safetensors", + ), } # for non-huggingface sources diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8dcbe37..335c9a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -54,3 +54,4 @@ repos: - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] diff --git a/README.md b/README.md index 38b6d66..3bdb463 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,11 @@ High-performance diffusion model inference in pure Go. - Image and video generation - Image editing -- Resolution upscaling via Neural upscaling (ESRGAN) +- High-resolution upscaling (Neural ESRGAN models + Latent-space methods) - Callback support for progressive previews during inference +- Model conversion (to SafeTensors / GGUF, optional VAE merging, tensor type rules) +- Hardware-accelerated inference (CUDA, Metal, Vulkan, ROCm and CPU) - Minimal performance overhead compared to C/C++ -- GPU + CPU support ## Quick start diff --git a/examples/system/convert/convert.go b/examples/system/convert/convert.go new file mode 100644 index 0000000..d0b2048 --- /dev/null +++ b/examples/system/convert/convert.go @@ -0,0 +1,26 @@ +// example how to convert model tensor with gosd and stable-diffusion.cpp + +package main + +import ( + "os" + + sd "github.com/l8bloom/gosd/pkg/gosd" +) + +func main() { + // load dynamic libs of stable_diffusion.cpp and its deps + if err := sd.Load(); err != nil { + panic(err.Error()) + } + + modelPath := os.Getenv("MODEL_TO_CONVERT") + vaePath := "" + outputPath := "converted_model.gguf" + outputType := sd.TypeQ2_K + tensorTypeRules := "" + convertName := false + + sd.Convert(modelPath, vaePath, outputPath, sd.SDType(outputType), tensorTypeRules, convertName) + +} diff --git a/pkg/gosd/system.go b/pkg/gosd/system.go index 6da313c..33c9d76 100644 --- a/pkg/gosd/system.go +++ b/pkg/gosd/system.go @@ -66,6 +66,9 @@ var ( // SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str); strToHiresUpscaler ffi.Fun + + // SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type, const char* tensor_type_rules, bool convert_name); + convert ffi.Fun ) func loadSystemRoutines(lib ffi.Lib) error { @@ -150,6 +153,10 @@ func loadSystemRoutines(lib ffi.Lib) error { return loadError("str_to_sd_hires_upscaler", err) } + if convert, err = lib.Prep("convert", &ffi.TypeUint8, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypeSint32, &ffi.TypePointer, &ffi.TypeUint8); err != nil { + return loadError("convert", err) + } + return nil } @@ -312,3 +319,25 @@ func StrToHiresUpscaler(typeName string) HiresUpscalerType { strToHiresUpscaler.Call(unsafe.Pointer(&hiresMode), unsafe.Pointer(&name)) return hiresMode } + +// CPU-bound API +func Convert(modelPath string, vaePath string, outputPath string, outputType SDType, tensorTypeRules string, convertName bool) bool { + mp := stringToChar(modelPath) + vp := stringToChar(vaePath) + op := stringToChar(outputPath) + ttr := stringToChar(tensorTypeRules) + cn := boolToByte(convertName) + + res := uint8(0) + + convert.Call( + unsafe.Pointer(&res), + unsafe.Pointer(&mp), + unsafe.Pointer(&vp), + unsafe.Pointer(&op), + unsafe.Pointer(&outputType), + unsafe.Pointer(&ttr), + unsafe.Pointer(&cn), + ) + return byteToBool(res) +} diff --git a/pkg/gosd/system_test.go b/pkg/gosd/system_test.go index b7b4102..5a9179f 100644 --- a/pkg/gosd/system_test.go +++ b/pkg/gosd/system_test.go @@ -1,6 +1,8 @@ package gosd import ( + "errors" + "os" "testing" ) @@ -285,3 +287,24 @@ func TestStrToHiresUpscaler(t *testing.T) { t.Errorf("expected `%d` for `bicubic antialiased`, got %d", HiresUpscalerLatentBicubic, hiresMode) } } + +func TestConvert(t *testing.T) { + modelPath := os.Getenv("MODEL_TO_CONVERT") + vaePath := "" + outputPath := "converted_model.gguf" + outputType := TypeQ2_K + tensorTypeRules := "" + convertName := false + + res := Convert(modelPath, vaePath, outputPath, SDType(outputType), tensorTypeRules, convertName) + + if !res { + t.Error("Conversion failed.") + } + + _, err := os.Stat(outputPath) + if errors.Is(err, os.ErrNotExist) { + t.Error("converted model not saved.") + } + os.Remove(outputPath) +} diff --git a/stable_diffusion_api_not_covered b/stable_diffusion_api_not_covered index 33ef833..c022db3 100644 --- a/stable_diffusion_api_not_covered +++ b/stable_diffusion_api_not_covered @@ -1,14 +1,6 @@ -SD_API bool convert(const char* input_path, - const char* vae_path, - const char* output_path, - enum sd_type_t output_type, - const char* tensor_type_rules, - bool convert_name); - SD_API bool preprocess_canny(sd_image_t image, float high_threshold, float low_threshold, float weak, float strong, bool inverse); - From a6fb9cee92d11bede8df5b350dece0d5fecee2ad Mon Sep 17 00:00:00 2001 From: dom Date: Wed, 6 May 2026 20:11:26 +0200 Subject: [PATCH 2/2] feat: add support for max vram and bump sd version --- README.md | 2 +- examples/image_gen/image_gen.go | 3 +++ pkg/gosd/context.go | 4 ++++ stable_diffusion.release | 2 +- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3bdb463..c2018bb 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ High-performance diffusion model inference in pure Go. [![Linux](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml) [![Windows](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml) [![macOS](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml) -[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-6614334-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-593-3d6064b) +[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-6614334-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-596-90e87bc) [![Coverage](https://img.shields.io/badge/code%20coverage-80%25-purple)](https://github.com/l8bloom/gosd/actions) diff --git a/examples/image_gen/image_gen.go b/examples/image_gen/image_gen.go index 2b74292..3f69650 100644 --- a/examples/image_gen/image_gen.go +++ b/examples/image_gen/image_gen.go @@ -33,6 +33,9 @@ func main() { ctxParams.DiffusionFlashAttn = true // potential hardware optimizations // ctxParams.KeepClipOnCPU = true // in case of lower vram + // optionally set maximum VRAM budget in GiB(enables inference with larger models) + // ctxParams.MaxVRAM = 2.0 + fmt.Printf("\nContext values:\n%s", sd.CtxParamsToStr(ctxParams)) ctx := sd.NewContext(ctxParams) diff --git a/pkg/gosd/context.go b/pkg/gosd/context.go index 8bc2271..c71f65b 100644 --- a/pkg/gosd/context.go +++ b/pkg/gosd/context.go @@ -128,6 +128,7 @@ type contextParams struct { ChromaUseT5Mask uint8 // bool chroma_use_t5_mask; ChromaT5MaskPad int32 // int chroma_t5_mask_pad; QwenImageZeroCond uint8 // bool qwen_image_zero_cond_t; + MaxVRAM float32 // float max_vram; } func (ctx *contextParams) toGo() *ContextParams { @@ -177,6 +178,7 @@ func (ctx *contextParams) toGo() *ContextParams { ChromaUseT5Mask: byteToBool(ctx.ChromaUseT5Mask), ChromaT5MaskPad: ctx.ChromaT5MaskPad, QwenImageZeroCond: byteToBool(ctx.QwenImageZeroCond), + MaxVRAM: ctx.MaxVRAM, } } @@ -222,6 +224,7 @@ type ContextParams struct { ChromaUseT5Mask bool ChromaT5MaskPad int32 QwenImageZeroCond bool + MaxVRAM float32 } func (ctx *ContextParams) toC() *contextParams { @@ -271,6 +274,7 @@ func (ctx *ContextParams) toC() *contextParams { ChromaUseT5Mask: boolToByte(ctx.ChromaUseT5Mask), ChromaT5MaskPad: ctx.ChromaT5MaskPad, QwenImageZeroCond: boolToByte(ctx.QwenImageZeroCond), + MaxVRAM: ctx.MaxVRAM, } } diff --git a/stable_diffusion.release b/stable_diffusion.release index 6b8aa8c..f2c984a 100644 --- a/stable_diffusion.release +++ b/stable_diffusion.release @@ -1 +1 @@ -master-593-3d6064b +master-596-90e87bc