diff --git a/README.md b/README.md index 72c0b5d..348d5c5 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,7 @@ Canny Edge Detection: Extract structural outlines from any image to guide Contro #### Canny Preprocessed ![cannyImage2](https://github.com/l8bloom/gosd/blob/main/examples/system/canny/canny_output.png) #### ControlNet Output +With `A futuristic office with neon lights` prompt. ![cannyImage3](https://github.com/l8bloom/gosd/blob/main/examples/system/canny/image_from_canny_output.png) (See `examples/system/canny/preprocess_canny.go` for a full implementation.) diff --git a/examples/system/canny/image_from_canny_output.png b/examples/system/canny/image_from_canny_output.png index 6ece1a5..3aad1c6 100644 Binary files a/examples/system/canny/image_from_canny_output.png and b/examples/system/canny/image_from_canny_output.png differ diff --git a/examples/system/canny/preprocess_canny.go b/examples/system/canny/preprocess_canny.go index ebcf18c..fed962e 100644 --- a/examples/system/canny/preprocess_canny.go +++ b/examples/system/canny/preprocess_canny.go @@ -1,6 +1,7 @@ // example on how to use gosd for Canny processing // 1st step: generate image // 2nd step: apply canny algorithm +// 3rd step: pass the canny-processed image to a ControlNet package main diff --git a/pkg/gosd/cache.go b/pkg/gosd/cache.go index 451a3bd..50c17f9 100644 --- a/pkg/gosd/cache.go +++ b/pkg/gosd/cache.go @@ -157,6 +157,8 @@ func newCacheParams() *cacheParams { return cp } +// CacheParamsInit creates a set of default cache values for inference. +// The cache values can be used for both video and image generation. func CacheParamsInit() CacheParams { cp := newCacheParams() diff --git a/pkg/gosd/callbacks.go b/pkg/gosd/callbacks.go index c7ce0b7..ec28c1e 100644 --- a/pkg/gosd/callbacks.go +++ b/pkg/gosd/callbacks.go @@ -52,7 +52,7 @@ func loadCallbacks(lib ffi.Lib) error { return nil } -// Type used for SetPreviewCallback representing generated data +// Type used for SetPreviewCallback representing generated data. type PreviewFrames interface { Image | Video } @@ -81,6 +81,7 @@ type LogCallback func(level LogLevel, text string, data unsafe.Pointer) var logCallback unsafe.Pointer var sizeOfClosure = unsafe.Sizeof(ffi.Closure{}) +// SetLogCallback sets custom logging for inference. func SetLogCallback(callback LogCallback, data unsafe.Pointer) { if callback == nil { panic("Can't set nil as a callback") @@ -134,6 +135,7 @@ type ProgressCallback func(step int32, steps int32, time float32, data unsafe.Po var progressCallback unsafe.Pointer +// SetProgressCallback sets custom callback used after each inference iteration step. func SetProgressCallback(callback ProgressCallback, data unsafe.Pointer) { if callback == nil { panic("Can't set nil as a callback") @@ -189,6 +191,7 @@ type PreviewCallback[T PreviewFrames] func(step int32, frames T, isNoisy bool, d var previewCallback unsafe.Pointer // keep in global due to GC +// SetPreviewCallback sets custom callback to preview images as they are generated during the inference loop. // previewMode: mode in which to do the preview // interval: iteration step slider // denoised: should preview denoised images? diff --git a/pkg/gosd/context.go b/pkg/gosd/context.go index c71f65b..e2bc130 100644 --- a/pkg/gosd/context.go +++ b/pkg/gosd/context.go @@ -381,7 +381,7 @@ func (e *Embedding) toC() *embedding { } } -// Creates default context params +// ContextParamsInit creates default context params. func ContextParamsInit() ContextParams { cp := newContextParams() @@ -389,6 +389,7 @@ func ContextParamsInit() ContextParams { return *cp.toGo() } +// NewContext creates new inference context. func NewContext(ctxParams ContextParams) Context { var context Context @@ -398,10 +399,12 @@ func NewContext(ctxParams ContextParams) Context { return context } +// FreeCtx deallocates memory used during the inference. func FreeCtx(ctx Context) { freeCtx.Call(nil, unsafe.Pointer(&ctx)) } +// CtxParamsToStr strigifies context structure. func CtxParamsToStr(ctxParams ContextParams) string { str := utilsGetNulString() @@ -411,6 +414,7 @@ func CtxParamsToStr(ctxParams ContextParams) string { return charToString(str) } +// CtxSupportsImageGeneration checks if the context can be used for image generation. func CtxSupportsImageGeneration(ctx Context) bool { var res uint8 @@ -419,6 +423,7 @@ func CtxSupportsImageGeneration(ctx Context) bool { return byteToBool(res) } +// CtxSupportsVideoGeneration checks if the context can be used for video generation. func CtxSupportsVideoGeneration(ctx Context) bool { var res uint8 diff --git a/pkg/gosd/gosd.go b/pkg/gosd/gosd.go index 1f931de..09218a2 100644 --- a/pkg/gosd/gosd.go +++ b/pkg/gosd/gosd.go @@ -108,7 +108,7 @@ func getLibraryFilename(path, lib string) string { } } -// Load loads the stable-diffusion.cpp shared library at runtime and all dependent libs +// Load loads the stable-diffusion.cpp shared library at runtime and all dependent libs. func Load() error { lib, err := loadLibrary("stable-diffusion") if err != nil { diff --git a/pkg/gosd/image.go b/pkg/gosd/image.go index e0e8610..e32e933 100644 --- a/pkg/gosd/image.go +++ b/pkg/gosd/image.go @@ -565,6 +565,7 @@ func (i *ImageParams) toC() *imageParams { } } +// GenerateImage starts the inference loop for image generation. func GenerateImage(ctx Context, ip ImageParams) Image { var image *image @@ -583,6 +584,7 @@ func newImageParams() *imageParams { return ip } +// ImageGenParamsInit creates a set of default values for image generation. func ImageGenParamsInit() ImageParams { ip := newImageParams() @@ -590,6 +592,7 @@ func ImageGenParamsInit() ImageParams { return *ip.toGo() } +// ImageGenParamsToStr stringifies structure encapsulating image generation parameters. func ImageGenParamsToStr(ip ImageParams) string { str := utilsGetNulString() @@ -599,11 +602,12 @@ func ImageGenParamsToStr(ip ImageParams) string { return charToString(str) } -// this is not a core feature of the library, +// SavePNG saves generated images as .png to the local disk. +// NOTE: This is not a core feature of the library, // just an example of what can be done with -// the generated image from the stable diffusion +// the generated image from the stable diffusion. func (img Image) SavePNG(filename string) error { - pix := img.Pixelize() + pix := img.pixelize() f, err := os.Create(filename) if err != nil { @@ -619,7 +623,7 @@ func (img Image) SavePNG(filename string) error { return png.Encode(f, &pix) } -func (img Image) Pixelize() imgPckg.RGBA { +func (img Image) pixelize() imgPckg.RGBA { if len(img.Data) == 0 { panic("Image with 0 length.") } @@ -651,6 +655,7 @@ func (img Image) Pixelize() imgPckg.RGBA { return *rgba } +// HiresParamsInit initializes default values for high-resolution upscaling. func HiresParamsInit() HiresParams { hp := newHiresParams() diff --git a/pkg/gosd/sampler.go b/pkg/gosd/sampler.go index e7149fb..1a1102d 100644 --- a/pkg/gosd/sampler.go +++ b/pkg/gosd/sampler.go @@ -128,6 +128,7 @@ func newSampleParams() *sampleParamsType { return &sampleParamsType{} } +// SampleParamsInit initializes default values for the inference sampler. func SampleParamsInit() SampleParamsType { sp := newSampleParams() @@ -136,6 +137,7 @@ func SampleParamsInit() SampleParamsType { return *sp.toGo() } +// SampleParamsToStr stringifies structure encapsulating sampler parameters. func SampleParamsToStr(params SampleParamsType) string { sp := params.toC() str := utilsGetNulString() @@ -145,6 +147,7 @@ func SampleParamsToStr(params SampleParamsType) string { return charToString(str) } +// GetDefaultSampleMethod returns default sampler method from a context. func GetDefaultSampleMethod(ctx Context) SampleMethodType { var sampleType SampleMethodType @@ -153,6 +156,7 @@ func GetDefaultSampleMethod(ctx Context) SampleMethodType { return sampleType } +// GetDefaultScheduler returns default scheduler type from a context. func GetDefaultScheduler(ctx Context, sampler SampleMethodType) SchedulerType { var schedulerType SchedulerType diff --git a/pkg/gosd/system.go b/pkg/gosd/system.go index 7eb45c5..0ae5606 100644 --- a/pkg/gosd/system.go +++ b/pkg/gosd/system.go @@ -167,6 +167,9 @@ func loadSystemRoutines(lib ffi.Lib) error { return nil } +// GetSystemInfo returns a formatted string containing the CPU instruction sets +// supported by the current hardware (e.g., AVX, AVX2, FMA). This is used +// to verify which hardware acceleration features are active for GGML operations. func GetSystemInfo() string { var systemInfo *byte @@ -178,6 +181,7 @@ func GetSystemInfo() string { return charToString(systemInfo) } +// Commit returns stable-diffusion.cpp commit hash. func Commit() string { var commitInfo *byte @@ -189,6 +193,7 @@ func Commit() string { return charToString(commitInfo) } +// Version returns stable-diffusion.cpp release version. func Version() string { var versionInfo *byte @@ -200,6 +205,7 @@ func Version() string { return charToString(versionInfo) } +// GetNumPhysicalCores returns number of physical cores in the system. func GetNumPhysicalCores() int { var count int @@ -207,6 +213,7 @@ func GetNumPhysicalCores() int { return count } +// TypeName stringifies SDType func TypeName(sdType SDType) string { res := utilsGetNulString() @@ -214,6 +221,7 @@ func TypeName(sdType SDType) string { return charToString(res) } +// StrToSDType converts SDType name to its enumeration. func StrToSDType(typeName string) SDType { var sdType SDType name := utilsStrToNulString(typeName) @@ -222,6 +230,7 @@ func StrToSDType(typeName string) SDType { return sdType } +// RNGTypeName stringifies RNGType. func RNGTypeName(rngType RNGType) string { res := utilsGetNulString() @@ -229,6 +238,7 @@ func RNGTypeName(rngType RNGType) string { return charToString(res) } +// StrToRNGType converts RNGType name to its enumeration. func StrToRNGType(typeName string) RNGType { var rngType RNGType name := utilsStrToNulString(typeName) @@ -237,6 +247,7 @@ func StrToRNGType(typeName string) RNGType { return rngType } +// SampleMethodName stringifies SampleMethodType. func SampleMethodName(sampleMethod SampleMethodType) string { res := utilsGetNulString() @@ -244,6 +255,7 @@ func SampleMethodName(sampleMethod SampleMethodType) string { return charToString(res) } +// StrToSampleMethod converts SampleMethodType name to its enumeration. func StrToSampleMethod(typeName string) SampleMethodType { var sampleMethodType SampleMethodType name := utilsStrToNulString(typeName) @@ -252,6 +264,7 @@ func StrToSampleMethod(typeName string) SampleMethodType { return sampleMethodType } +// SchedulerName stringifies SchedulerType. func SchedulerName(schedulerType SchedulerType) string { res := utilsGetNulString() @@ -259,6 +272,7 @@ func SchedulerName(schedulerType SchedulerType) string { return charToString(res) } +// StrToScheduler converts SchedulerType name to its enumeration. func StrToScheduler(typeName string) SchedulerType { var schedulerType SchedulerType name := utilsStrToNulString(typeName) @@ -267,6 +281,7 @@ func StrToScheduler(typeName string) SchedulerType { return schedulerType } +// PredictionName stringifies PredictionType. func PredictionName(predictionType PredictionType) string { res := utilsGetNulString() @@ -274,6 +289,7 @@ func PredictionName(predictionType PredictionType) string { return charToString(res) } +// StrToPrediction converts PredictionType name to its enumeration. func StrToPrediction(typeName string) PredictionType { var predictionType PredictionType name := utilsStrToNulString(typeName) @@ -282,6 +298,7 @@ func StrToPrediction(typeName string) PredictionType { return predictionType } +// PreviewName stringifies PreviewMode func PreviewName(previewType PreviewMode) string { res := utilsGetNulString() @@ -289,6 +306,7 @@ func PreviewName(previewType PreviewMode) string { return charToString(res) } +// StrToPreview converts PreviewMode name to its enumeration. func StrToPreview(typeName string) PreviewMode { var previewMode PreviewMode name := utilsStrToNulString(typeName) @@ -297,6 +315,7 @@ func StrToPreview(typeName string) PreviewMode { return previewMode } +// LoraApplyModeName stringifies LoraApplyModeType. func LoraApplyModeName(loraMode LoraApplyModeType) string { res := utilsGetNulString() @@ -304,6 +323,7 @@ func LoraApplyModeName(loraMode LoraApplyModeType) string { return charToString(res) } +// StrToLoraApplyMode converts LoraApplyModeType to its enumeration. func StrToLoraApplyMode(typeName string) LoraApplyModeType { var loraMode LoraApplyModeType name := utilsStrToNulString(typeName) @@ -312,6 +332,7 @@ func StrToLoraApplyMode(typeName string) LoraApplyModeType { return loraMode } +// HiresUpscalerName stringifies HiresUpscalerType. func HiresUpscalerName(hiresMode HiresUpscalerType) string { res := utilsGetNulString() @@ -319,6 +340,7 @@ func HiresUpscalerName(hiresMode HiresUpscalerType) string { return charToString(res) } +// StrToHiresUpscaler converts HiresUpscalerType to its enumeration. func StrToHiresUpscaler(typeName string) HiresUpscalerType { var hiresMode HiresUpscalerType name := utilsStrToNulString(typeName) @@ -327,7 +349,9 @@ func StrToHiresUpscaler(typeName string) HiresUpscalerType { return hiresMode } -// CPU-bound API +// Convert converts model to safetensor/gguf format. +// If VAE model is provided it will be merged with the diffusion model. +// CPU-bound API. func Convert(modelPath string, vaePath string, outputPath string, outputType SDType, tensorTypeRules string, convertName bool) bool { mp := stringToChar(modelPath) vp := stringToChar(vaePath) @@ -349,8 +373,9 @@ func Convert(modelPath string, vaePath string, outputPath string, outputType SDT return byteToBool(res) } +// PreprocessCanny applies Canny algorithm for edge detection in an image. +// CPU-bound API. func PreprocessCanny(image Image, highThreshold float32, lowThreshold float32, weak float32, strong float32, inverse bool) bool { - img := *image.toC() inv := boolToByte(inverse) diff --git a/pkg/gosd/upscaler.go b/pkg/gosd/upscaler.go index 39b475a..f32f695 100644 --- a/pkg/gosd/upscaler.go +++ b/pkg/gosd/upscaler.go @@ -78,6 +78,7 @@ func loadUpscalerRoutines(lib ffi.Lib) error { return nil } +// NewUpscalerCtx creates context for the upscaler. func NewUpscalerCtx(esrganPath string, offloadParamsToCPU bool, direct bool, nThreads int, tileSize int) UpscalerContext { var ctx UpscalerContext @@ -99,10 +100,12 @@ func NewUpscalerCtx(esrganPath string, offloadParamsToCPU bool, direct bool, nTh return ctx } +// FreeUpscalerCtx deallocates memory reserved by the upscaler context. func FreeUpscalerCtx(ctx UpscalerContext) { freeUpscalerCtx.Call(nil, unsafe.Pointer(&ctx)) } +// GetUpscaleFactor returns upscaler's factor. func GetUpscaleFactor(ctx UpscalerContext) int { var res int32 @@ -111,6 +114,7 @@ func GetUpscaleFactor(ctx UpscalerContext) int { return int(res) } +// Upscale upscales provided image with ESRGAN. func Upscale(ctx UpscalerContext, img Image, upscaleFactor uint) Image { var resImage image inImage := *img.toC() diff --git a/pkg/gosd/video.go b/pkg/gosd/video.go index 33c02e5..fa78987 100644 --- a/pkg/gosd/video.go +++ b/pkg/gosd/video.go @@ -163,9 +163,10 @@ type Video struct { Data []Image } -// this is not a core feature of the library, +// Save saves generated video to the local disk with the help of ffmpeg. +// NOTE: This is not a core feature of the library, // just an example of what can be done with -// the generated video after stable diffusion finishes +// the generated video after stable diffusion finishes. func (gv Video) Save(filename string, fps int) error { // requires ffmpeg installed cmd := exec.Command("ffmpeg", @@ -191,7 +192,7 @@ func (gv Video) Save(filename string, fps int) error { } for _, img := range gv.Data { - if _, err := stdin.Write(img.Pixelize().Pix); err != nil { + if _, err := stdin.Write(img.pixelize().Pix); err != nil { return err } } @@ -210,6 +211,7 @@ func newVideoParams() *videoParams { } } +// VideoGenParamsInit creates a set of default values for video generation. func VideoGenParamsInit() VideoParams { params := newVideoParams() @@ -218,6 +220,7 @@ func VideoGenParamsInit() VideoParams { return *params.toGo() } +// GenerateVideo starts the inference loop for video generation. func GenerateVideo(ctx Context, vidParams VideoParams) Video { image := &image{} _vidParams := vidParams.toC()