diff --git a/README.md b/README.md index 794d18b..23ab789 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ High-performance diffusion model inference in pure Go. [![Linux](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml) [![Windows](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml) [![macOS](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml) -[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-5b0267e-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-633-5b0267e) +[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-3a8788c-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-642-3a8788c) [![Coverage](https://img.shields.io/badge/code%20coverage-80%25-purple)](https://github.com/l8bloom/gosd/actions) diff --git a/examples/gen_video_with_audio/output.mp4 b/examples/gen_video_with_audio/output.mp4 index 5838b8c..238e3f0 100644 Binary files a/examples/gen_video_with_audio/output.mp4 and b/examples/gen_video_with_audio/output.mp4 differ diff --git a/examples/gen_video_with_audio/video_gen.go b/examples/gen_video_with_audio/video_gen.go index fd171b8..bccdd5b 100644 --- a/examples/gen_video_with_audio/video_gen.go +++ b/examples/gen_video_with_audio/video_gen.go @@ -13,7 +13,7 @@ import ( var myLogCallback sd.LogCallback = func(level sd.LogLevel, text string, data unsafe.Pointer) { fmt.Println("My log callback:") - fmt.Println("level: ", level) + fmt.Println("level: ", level.Stringify()) fmt.Println("text: ", text) } @@ -35,7 +35,7 @@ func main() { ctxParams.EmbeddingsConnectorsPath = os.Getenv("VIDEO_EX_EMBEDDINGS_PATH") // https://huggingface.co/unsloth/gemma-3-12b-it-qat-GGUF/blob/main/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf - ctxParams.LLMPath = os.Getenv("VIDEO_EX_T5XXL_PATH") + ctxParams.LLMPath = os.Getenv("VIDEO_EX_LLM_PATH") // https://huggingface.co/unsloth/LTX-2.3-GGUF/blob/main/vae/ltx-2.3-22b-dev_video_vae.safetensors ctxParams.VAEPath = os.Getenv("VIDEO_EX_VAE_PATH") @@ -57,25 +57,35 @@ func main() { // split spatial volume in case of lower vram vidParams.VAETilingParams.Enabled = true - vidParams.VAETilingParams.RelSizeX = 4 - vidParams.VAETilingParams.RelSizeY = 4 + vidParams.VAETilingParams.RelSizeX = 8 + vidParams.VAETilingParams.RelSizeY = 8 - vidParams.SampleParams.SampleSteps = 50 + vidParams.SampleParams.SampleSteps = 30 vidParams.SampleParams.SampleMethod = sd.EulerSampleMethod - vidParams.SampleParams.Guidance.TextCfg = 6 + vidParams.SampleParams.Guidance.TextCfg = 5.5 vidParams.FPS = 24 // number of video frames to generate vidParams.VideoFrames = 120 + vidParams.HiresParams.Enabled = true + // https://huggingface.co/Lightricks/LTX-2.3/blob/main/ltx-2.3-spatial-upscaler-x2-1.1.safetensors + vidParams.HiresParams.ModelPath = os.Getenv("LTX_UPSCALER_PATH") + vidParams.HiresParams.Steps = 20 + // lower keeps it similar to 1st pass image, higher brings more variance + vidParams.HiresParams.DenoisingStrength = 0.8 + vidParams.HiresParams.Scale = 2 + vidParams.HiresParams.Upscaler = sd.HiresUpscalerModel + // vidParams.HiresParams.CustomSigmas = []float32{0.85, 0.725, 0.421875, 0.0} + // prompts - vidParams.Prompt = "A cinematic, slow-motion shot of a narrow street in a rainy cyberpunk city at night. A person holding a transparent umbrella walks slowly past the camera. Neon signs reflect flawlessly on the wet pavement. Continuous light rain falls, creating ripples in puddles as steam rises from street vents and cars move in the far distance. Atmospheric fog, smooth camera pan, ultra-detailed realistic reflections. Concurrently, the synchronized audio track delivers the crisp, close-up acoustics of continuous soft rain drops falling, layered over a muffled, distant thunderstorm rumbling gently in the far background." + vidParams.Prompt = "A cinematic, slow-motion shot of a narrow street in a rainy cyberpunk city at night. A person holding a transparent umbrella walks slowly past the camera. Neon signs reflect flawlessly on the wet pavement. Continuous light rain falls, creating ripples in puddles as steam rises from street vents and cars move in the far distance. Atmospheric fog, smooth camera pan, ultra-detailed realistic reflections. The sound of continuous soft raindrops falling can be heard, layered with the muffled rumble of a distant thunderstorm in the background." vidParams.NegativePrompt = "low quality, blurry, distorted, deformed, watermark, text, oversaturated, jpeg artifacts" // video resolution - vidParams.Width = 300 - vidParams.Height = 500 + vidParams.Width = 1344 / 4 + vidParams.Height = 768 / 4 sd.SetLogCallback(myLogCallback, nil) genVideo := sd.GenerateVideo(ctx, vidParams) diff --git a/pkg/gosd/callbacks.go b/pkg/gosd/callbacks.go index ec28c1e..c5b5e5f 100644 --- a/pkg/gosd/callbacks.go +++ b/pkg/gosd/callbacks.go @@ -66,6 +66,22 @@ const ( Error ) +func (ll LogLevel) Stringify() string { + var lvl string + + switch ll { + case Debug: + lvl = "Debug" + case Info: + lvl = "Info" + case Warn: + lvl = "Warning" + case Error: + lvl = "Error" + } + return lvl +} + type PreviewMode int32 const ( diff --git a/pkg/gosd/image.go b/pkg/gosd/image.go index 6e33b75..5de8727 100644 --- a/pkg/gosd/image.go +++ b/pkg/gosd/image.go @@ -216,9 +216,17 @@ type hiresParams struct { Steps int32 // int steps; DenoisingStrength float32 // float denoising_strength; UpscaleTileSize int32 // int upscale_tile_size; + CustomSigmas *float32 // float* custom_sigmas; + CustomSigmasCount int32 // int custom_sigmas_count; } func (hp *hiresParams) toGo() *HiresParams { + var _sigmas []float32 + + if hp.CustomSigmasCount > 0 { + _sigmas = unsafe.Slice(hp.CustomSigmas, hp.CustomSigmasCount) + } + return &HiresParams{ Enabled: byteToBool(hp.Enabled), Upscaler: hp.Upscaler, @@ -229,6 +237,7 @@ func (hp *hiresParams) toGo() *HiresParams { Steps: hp.Steps, DenoisingStrength: hp.DenoisingStrength, UpscaleTileSize: hp.UpscaleTileSize, + CustomSigmas: _sigmas, } } @@ -250,9 +259,18 @@ type HiresParams struct { Steps int32 DenoisingStrength float32 UpscaleTileSize int32 + CustomSigmas []float32 } func (hp *HiresParams) toC() *hiresParams { + var _sigmas *float32 + var _sigmaCnt int32 + + if len(hp.CustomSigmas) > 0 { + _sigmas = &hp.CustomSigmas[0] + _sigmaCnt = int32(len(hp.CustomSigmas)) + } + return &hiresParams{ Enabled: boolToByte(hp.Enabled), Upscaler: hp.Upscaler, @@ -263,6 +281,8 @@ func (hp *HiresParams) toC() *hiresParams { Steps: hp.Steps, DenoisingStrength: hp.DenoisingStrength, UpscaleTileSize: hp.UpscaleTileSize, + CustomSigmas: _sigmas, + CustomSigmasCount: _sigmaCnt, } } @@ -395,46 +415,50 @@ func (pmp *PMParamsType) toC() *pMParamsType { } type vAETilingParams struct { - Enabled uint8 // bool enabled; - TemporalTiling uint8 // bool temporal_tiling; - TileSizeX int32 // int tile_size_x; - TileSizeY int32 // int tile_size_y; - TargetOverlap float32 // float target_overlap; - RelSizeX float32 // float rel_size_x; - RelSizeY float32 // float rel_size_y; + Enabled uint8 // bool enabled; + TemporalTiling uint8 // bool temporal_tiling; + TileSizeX int32 // int tile_size_x; + TileSizeY int32 // int tile_size_y; + TargetOverlap float32 // float target_overlap; + RelSizeX float32 // float rel_size_x; + RelSizeY float32 // float rel_size_y; + ExtraTilingArgs *byte // const char* extra_tiling_args; } func (vae *vAETilingParams) toGo() *VAETilingParams { return &VAETilingParams{ - Enabled: byteToBool(vae.Enabled), - TemporalTiling: byteToBool(vae.TemporalTiling), - TileSizeX: vae.TileSizeX, - TileSizeY: vae.TileSizeY, - TargetOverlap: vae.TargetOverlap, - RelSizeX: vae.RelSizeX, - RelSizeY: vae.RelSizeY, + Enabled: byteToBool(vae.Enabled), + TemporalTiling: byteToBool(vae.TemporalTiling), + TileSizeX: vae.TileSizeX, + TileSizeY: vae.TileSizeY, + TargetOverlap: vae.TargetOverlap, + RelSizeX: vae.RelSizeX, + RelSizeY: vae.RelSizeY, + ExtraTilingArgs: charToString(vae.ExtraTilingArgs), } } type VAETilingParams struct { - Enabled bool - TemporalTiling bool - TileSizeX int32 - TileSizeY int32 - TargetOverlap float32 - RelSizeX float32 - RelSizeY float32 + Enabled bool + TemporalTiling bool + TileSizeX int32 + TileSizeY int32 + TargetOverlap float32 + RelSizeX float32 + RelSizeY float32 + ExtraTilingArgs string } func (vae *VAETilingParams) toC() *vAETilingParams { return &vAETilingParams{ - Enabled: boolToByte(vae.Enabled), - TemporalTiling: boolToByte(vae.TemporalTiling), - TileSizeX: vae.TileSizeX, - TileSizeY: vae.TileSizeY, - TargetOverlap: vae.TargetOverlap, - RelSizeX: vae.RelSizeX, - RelSizeY: vae.RelSizeY, + Enabled: boolToByte(vae.Enabled), + TemporalTiling: boolToByte(vae.TemporalTiling), + TileSizeX: vae.TileSizeX, + TileSizeY: vae.TileSizeY, + TargetOverlap: vae.TargetOverlap, + RelSizeX: vae.RelSizeX, + RelSizeY: vae.RelSizeY, + ExtraTilingArgs: stringToChar(vae.ExtraTilingArgs), } } diff --git a/pkg/gosd/image_test.go b/pkg/gosd/image_test.go index e704450..3fe3981 100644 --- a/pkg/gosd/image_test.go +++ b/pkg/gosd/image_test.go @@ -58,6 +58,7 @@ func myImagePreviewCallback(step int32, image Image, isNoisy bool, data unsafe.P func myLogCallback(level LogLevel, text string, data unsafe.Pointer) { *(*int)(data)++ + level.Stringify() } func TestGenerateImage(t *testing.T) { diff --git a/pkg/gosd/video.go b/pkg/gosd/video.go index 7e3a1f2..62a7c40 100644 --- a/pkg/gosd/video.go +++ b/pkg/gosd/video.go @@ -67,6 +67,7 @@ type videoParams struct { VACEStrength float32 // float vace_strength; VAETilingParams vAETilingParams // sd_tiling_params_t vae_tiling_params; Cache cacheParams // sd_cache_params_t cache; + HiresParams hiresParams // sd_hires_params_t hires; } func (vp *videoParams) toGo() *VideoParams { @@ -105,6 +106,7 @@ func (vp *videoParams) toGo() *VideoParams { VACEStrength: vp.VACEStrength, VAETilingParams: *vp.VAETilingParams.toGo(), Cache: *vp.Cache.toGo(), + HiresParams: *vp.HiresParams.toGo(), } } @@ -130,6 +132,7 @@ type VideoParams struct { VACEStrength float32 VAETilingParams VAETilingParams Cache CacheParams + HiresParams HiresParams } func (vp *VideoParams) toC() *videoParams { @@ -165,6 +168,7 @@ func (vp *VideoParams) toC() *videoParams { VACEStrength: vp.VACEStrength, VAETilingParams: *vp.VAETilingParams.toC(), Cache: *vp.Cache.toC(), + HiresParams: *vp.HiresParams.toC(), } } diff --git a/stable_diffusion.release b/stable_diffusion.release index 6155930..017eaec 100644 --- a/stable_diffusion.release +++ b/stable_diffusion.release @@ -1 +1 @@ -master-633-5b0267e +master-642-3a8788c