From 1013b099c51c9193765565e8bbe6d0260ee9c880 Mon Sep 17 00:00:00 2001
From: dom <domagoj@l8bloom.hr>
Date: Wed, 6 May 2026 19:46:40 +0200
Subject: [PATCH 1/2] feat: add support for convert sd cpp api

---
 .github/scripts/download_ci_models.py |  4 ++++
 .pre-commit-config.yaml               |  1 +
 README.md                             |  5 +++--
 examples/system/convert/convert.go    | 26 ++++++++++++++++++++++++
 pkg/gosd/system.go                    | 29 +++++++++++++++++++++++++++
 pkg/gosd/system_test.go               | 23 +++++++++++++++++++++
 stable_diffusion_api_not_covered      |  8 --------
 7 files changed, 86 insertions(+), 10 deletions(-)
 create mode 100644 examples/system/convert/convert.go

diff --git a/.github/scripts/download_ci_models.py b/.github/scripts/download_ci_models.py
index 33df8a0..9d8b0d3 100644
--- a/.github/scripts/download_ci_models.py
+++ b/.github/scripts/download_ci_models.py
@@ -22,6 +22,10 @@
         "city96/umt5-xxl-encoder-gguf",
         "umt5-xxl-encoder-Q3_K_S.gguf",
     ),
+    "MODEL_TO_CONVERT": (
+        "black-forest-labs/FLUX.2-small-decoder",
+        "diffusion_pytorch_model.safetensors",
+    ),
 }
 
 # for non-huggingface sources
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8dcbe37..335c9a6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -54,3 +54,4 @@ repos:
       - id: end-of-file-fixer
       - id: requirements-txt-fixer
       - id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
diff --git a/README.md b/README.md
index 38b6d66..3bdb463 100644
--- a/README.md
+++ b/README.md
@@ -16,10 +16,11 @@ High-performance diffusion model inference in pure Go.
 
 - Image and video generation
 - Image editing
-- Resolution upscaling via Neural upscaling (ESRGAN)
+- High-resolution upscaling (Neural ESRGAN models + Latent-space methods)
 - Callback support for progressive previews during inference
+- Model conversion (to SafeTensors / GGUF, optional VAE merging, tensor type rules)
+- Hardware-accelerated inference (CUDA, Metal, Vulkan, ROCm and CPU)
 - Minimal performance overhead compared to C/C++
-- GPU + CPU support
 
 ## Quick start
 
diff --git a/examples/system/convert/convert.go b/examples/system/convert/convert.go
new file mode 100644
index 0000000..d0b2048
--- /dev/null
+++ b/examples/system/convert/convert.go
@@ -0,0 +1,26 @@
+// example how to convert model tensor with gosd and stable-diffusion.cpp
+
+package main
+
+import (
+	"os"
+
+	sd "github.com/l8bloom/gosd/pkg/gosd"
+)
+
+func main() {
+	// load dynamic libs of stable_diffusion.cpp and its deps
+	if err := sd.Load(); err != nil {
+		panic(err.Error())
+	}
+
+	modelPath := os.Getenv("MODEL_TO_CONVERT")
+	vaePath := ""
+	outputPath := "converted_model.gguf"
+	outputType := sd.TypeQ2_K
+	tensorTypeRules := ""
+	convertName := false
+
+	sd.Convert(modelPath, vaePath, outputPath, sd.SDType(outputType), tensorTypeRules, convertName)
+
+}
diff --git a/pkg/gosd/system.go b/pkg/gosd/system.go
index 6da313c..33c9d76 100644
--- a/pkg/gosd/system.go
+++ b/pkg/gosd/system.go
@@ -66,6 +66,9 @@ var (
 
 	// SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
 	strToHiresUpscaler ffi.Fun
+
+	// SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type, const char* tensor_type_rules, bool convert_name);
+	convert ffi.Fun
 )
 
 func loadSystemRoutines(lib ffi.Lib) error {
@@ -150,6 +153,10 @@ func loadSystemRoutines(lib ffi.Lib) error {
 		return loadError("str_to_sd_hires_upscaler", err)
 	}
 
+	if convert, err = lib.Prep("convert", &ffi.TypeUint8, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypeSint32, &ffi.TypePointer, &ffi.TypeUint8); err != nil {
+		return loadError("convert", err)
+	}
+
 	return nil
 }
 
@@ -312,3 +319,25 @@ func StrToHiresUpscaler(typeName string) HiresUpscalerType {
 	strToHiresUpscaler.Call(unsafe.Pointer(&hiresMode), unsafe.Pointer(&name))
 	return hiresMode
 }
+
+// CPU-bound API
+func Convert(modelPath string, vaePath string, outputPath string, outputType SDType, tensorTypeRules string, convertName bool) bool {
+	mp := stringToChar(modelPath)
+	vp := stringToChar(vaePath)
+	op := stringToChar(outputPath)
+	ttr := stringToChar(tensorTypeRules)
+	cn := boolToByte(convertName)
+
+	res := uint8(0)
+
+	convert.Call(
+		unsafe.Pointer(&res),
+		unsafe.Pointer(&mp),
+		unsafe.Pointer(&vp),
+		unsafe.Pointer(&op),
+		unsafe.Pointer(&outputType),
+		unsafe.Pointer(&ttr),
+		unsafe.Pointer(&cn),
+	)
+	return byteToBool(res)
+}
diff --git a/pkg/gosd/system_test.go b/pkg/gosd/system_test.go
index b7b4102..5a9179f 100644
--- a/pkg/gosd/system_test.go
+++ b/pkg/gosd/system_test.go
@@ -1,6 +1,8 @@
 package gosd
 
 import (
+	"errors"
+	"os"
 	"testing"
 )
 
@@ -285,3 +287,24 @@ func TestStrToHiresUpscaler(t *testing.T) {
 		t.Errorf("expected `%d` for `bicubic antialiased`, got  %d", HiresUpscalerLatentBicubic, hiresMode)
 	}
 }
+
+func TestConvert(t *testing.T) {
+	modelPath := os.Getenv("MODEL_TO_CONVERT")
+	vaePath := ""
+	outputPath := "converted_model.gguf"
+	outputType := TypeQ2_K
+	tensorTypeRules := ""
+	convertName := false
+
+	res := Convert(modelPath, vaePath, outputPath, SDType(outputType), tensorTypeRules, convertName)
+
+	if !res {
+		t.Error("Conversion failed.")
+	}
+
+	_, err := os.Stat(outputPath)
+	if errors.Is(err, os.ErrNotExist) {
+		t.Error("converted model not saved.")
+	}
+	os.Remove(outputPath)
+}
diff --git a/stable_diffusion_api_not_covered b/stable_diffusion_api_not_covered
index 33ef833..c022db3 100644
--- a/stable_diffusion_api_not_covered
+++ b/stable_diffusion_api_not_covered
@@ -1,14 +1,6 @@
-SD_API bool convert(const char* input_path,
-                    const char* vae_path,
-                    const char* output_path,
-                    enum sd_type_t output_type,
-                    const char* tensor_type_rules,
-                    bool convert_name);
-
 SD_API bool preprocess_canny(sd_image_t image,
                              float high_threshold,
                              float low_threshold,
                              float weak,
                              float strong,
                              bool inverse);
-

From a6fb9cee92d11bede8df5b350dece0d5fecee2ad Mon Sep 17 00:00:00 2001
From: dom <domagoj@l8bloom.hr>
Date: Wed, 6 May 2026 20:11:26 +0200
Subject: [PATCH 2/2] feat: add support for max vram and bump sd version

---
 README.md                       | 2 +-
 examples/image_gen/image_gen.go | 3 +++
 pkg/gosd/context.go             | 4 ++++
 stable_diffusion.release        | 2 +-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3bdb463..c2018bb 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ High-performance diffusion model inference in pure Go.
 [![Linux](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml)
 [![Windows](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml)
 [![macOS](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml)
-[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-6614334-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-593-3d6064b)
+[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-6614334-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-596-90e87bc)
 [![Coverage](https://img.shields.io/badge/code%20coverage-80%25-purple)](https://github.com/l8bloom/gosd/actions)
 
 
diff --git a/examples/image_gen/image_gen.go b/examples/image_gen/image_gen.go
index 2b74292..3f69650 100644
--- a/examples/image_gen/image_gen.go
+++ b/examples/image_gen/image_gen.go
@@ -33,6 +33,9 @@ func main() {
 	ctxParams.DiffusionFlashAttn = true // potential hardware optimizations
 	// ctxParams.KeepClipOnCPU = true // in case of lower vram
 
+	// optionally set maximum VRAM budget in GiB(enables inference with larger models)
+	// ctxParams.MaxVRAM = 2.0
+
 	fmt.Printf("\nContext values:\n%s", sd.CtxParamsToStr(ctxParams))
 
 	ctx := sd.NewContext(ctxParams)
diff --git a/pkg/gosd/context.go b/pkg/gosd/context.go
index 8bc2271..c71f65b 100644
--- a/pkg/gosd/context.go
+++ b/pkg/gosd/context.go
@@ -128,6 +128,7 @@ type contextParams struct {
 	ChromaUseT5Mask             uint8             // bool chroma_use_t5_mask;
 	ChromaT5MaskPad             int32             // int chroma_t5_mask_pad;
 	QwenImageZeroCond           uint8             // bool qwen_image_zero_cond_t;
+	MaxVRAM                     float32           // float max_vram;
 }
 
 func (ctx *contextParams) toGo() *ContextParams {
@@ -177,6 +178,7 @@ func (ctx *contextParams) toGo() *ContextParams {
 		ChromaUseT5Mask:             byteToBool(ctx.ChromaUseT5Mask),
 		ChromaT5MaskPad:             ctx.ChromaT5MaskPad,
 		QwenImageZeroCond:           byteToBool(ctx.QwenImageZeroCond),
+		MaxVRAM:                     ctx.MaxVRAM,
 	}
 }
 
@@ -222,6 +224,7 @@ type ContextParams struct {
 	ChromaUseT5Mask             bool
 	ChromaT5MaskPad             int32
 	QwenImageZeroCond           bool
+	MaxVRAM                     float32
 }
 
 func (ctx *ContextParams) toC() *contextParams {
@@ -271,6 +274,7 @@ func (ctx *ContextParams) toC() *contextParams {
 		ChromaUseT5Mask:             boolToByte(ctx.ChromaUseT5Mask),
 		ChromaT5MaskPad:             ctx.ChromaT5MaskPad,
 		QwenImageZeroCond:           boolToByte(ctx.QwenImageZeroCond),
+		MaxVRAM:                     ctx.MaxVRAM,
 	}
 }
 
diff --git a/stable_diffusion.release b/stable_diffusion.release
index 6b8aa8c..f2c984a 100644
--- a/stable_diffusion.release
+++ b/stable_diffusion.release
@@ -1 +1 @@
-master-593-3d6064b
+master-596-90e87bc