Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ High-performance diffusion model inference in pure Go.
[![Linux](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/linux.yaml)
[![Windows](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/windows.yaml)
[![macOS](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml/badge.svg)](https://github.com/l8bloom/gosd/actions/workflows/macos.yaml)
[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-5b0267e-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-633-5b0267e)
[![stable-diffusion.cpp](https://img.shields.io/badge/sd.cpp-3a8788c-yellow)](https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-642-3a8788c)
[![Coverage](https://img.shields.io/badge/code%20coverage-80%25-purple)](https://github.com/l8bloom/gosd/actions)


Expand Down
Binary file modified examples/gen_video_with_audio/output.mp4
Binary file not shown.
28 changes: 19 additions & 9 deletions examples/gen_video_with_audio/video_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (

var myLogCallback sd.LogCallback = func(level sd.LogLevel, text string, data unsafe.Pointer) {
fmt.Println("My log callback:")
fmt.Println("level: ", level)
fmt.Println("level: ", level.Stringify())
fmt.Println("text: ", text)
}

Expand All @@ -35,7 +35,7 @@ func main() {
ctxParams.EmbeddingsConnectorsPath = os.Getenv("VIDEO_EX_EMBEDDINGS_PATH")

// https://huggingface.co/unsloth/gemma-3-12b-it-qat-GGUF/blob/main/gemma-3-12b-it-qat-UD-Q4_K_XL.gguf
ctxParams.LLMPath = os.Getenv("VIDEO_EX_T5XXL_PATH")
ctxParams.LLMPath = os.Getenv("VIDEO_EX_LLM_PATH")

// https://huggingface.co/unsloth/LTX-2.3-GGUF/blob/main/vae/ltx-2.3-22b-dev_video_vae.safetensors
ctxParams.VAEPath = os.Getenv("VIDEO_EX_VAE_PATH")
Expand All @@ -57,25 +57,35 @@ func main() {

// split spatial volume in case of lower vram
vidParams.VAETilingParams.Enabled = true
vidParams.VAETilingParams.RelSizeX = 4
vidParams.VAETilingParams.RelSizeY = 4
vidParams.VAETilingParams.RelSizeX = 8
vidParams.VAETilingParams.RelSizeY = 8

vidParams.SampleParams.SampleSteps = 50
vidParams.SampleParams.SampleSteps = 30
vidParams.SampleParams.SampleMethod = sd.EulerSampleMethod
vidParams.SampleParams.Guidance.TextCfg = 6
vidParams.SampleParams.Guidance.TextCfg = 5.5

vidParams.FPS = 24

// number of video frames to generate
vidParams.VideoFrames = 120

vidParams.HiresParams.Enabled = true
// https://huggingface.co/Lightricks/LTX-2.3/blob/main/ltx-2.3-spatial-upscaler-x2-1.1.safetensors
vidParams.HiresParams.ModelPath = os.Getenv("LTX_UPSCALER_PATH")
vidParams.HiresParams.Steps = 20
// lower keeps it similar to 1st pass image, higher brings more variance
vidParams.HiresParams.DenoisingStrength = 0.8
vidParams.HiresParams.Scale = 2
vidParams.HiresParams.Upscaler = sd.HiresUpscalerModel
// vidParams.HiresParams.CustomSigmas = []float32{0.85, 0.725, 0.421875, 0.0}

// prompts
vidParams.Prompt = "A cinematic, slow-motion shot of a narrow street in a rainy cyberpunk city at night. A person holding a transparent umbrella walks slowly past the camera. Neon signs reflect flawlessly on the wet pavement. Continuous light rain falls, creating ripples in puddles as steam rises from street vents and cars move in the far distance. Atmospheric fog, smooth camera pan, ultra-detailed realistic reflections. Concurrently, the synchronized audio track delivers the crisp, close-up acoustics of continuous soft rain drops falling, layered over a muffled, distant thunderstorm rumbling gently in the far background."
vidParams.Prompt = "A cinematic, slow-motion shot of a narrow street in a rainy cyberpunk city at night. A person holding a transparent umbrella walks slowly past the camera. Neon signs reflect flawlessly on the wet pavement. Continuous light rain falls, creating ripples in puddles as steam rises from street vents and cars move in the far distance. Atmospheric fog, smooth camera pan, ultra-detailed realistic reflections. The sound of continuous soft raindrops falling can be heard, layered with the muffled rumble of a distant thunderstorm in the background."
vidParams.NegativePrompt = "low quality, blurry, distorted, deformed, watermark, text, oversaturated, jpeg artifacts"

// video resolution
vidParams.Width = 300
vidParams.Height = 500
vidParams.Width = 1344 / 4
vidParams.Height = 768 / 4

sd.SetLogCallback(myLogCallback, nil)
genVideo := sd.GenerateVideo(ctx, vidParams)
Expand Down
16 changes: 16 additions & 0 deletions pkg/gosd/callbacks.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,22 @@ const (
Error
)

func (ll LogLevel) Stringify() string {
var lvl string

switch ll {
case Debug:
lvl = "Debug"
case Info:
lvl = "Info"
case Warn:
lvl = "Warning"
case Error:
lvl = "Error"
}
return lvl
}

type PreviewMode int32

const (
Expand Down
80 changes: 52 additions & 28 deletions pkg/gosd/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,17 @@ type hiresParams struct {
Steps int32 // int steps;
DenoisingStrength float32 // float denoising_strength;
UpscaleTileSize int32 // int upscale_tile_size;
CustomSigmas *float32 // float* custom_sigmas;
CustomSigmasCount int32 // int custom_sigmas_count;
}

func (hp *hiresParams) toGo() *HiresParams {
var _sigmas []float32

if hp.CustomSigmasCount > 0 {
_sigmas = unsafe.Slice(hp.CustomSigmas, hp.CustomSigmasCount)
}

return &HiresParams{
Enabled: byteToBool(hp.Enabled),
Upscaler: hp.Upscaler,
Expand All @@ -229,6 +237,7 @@ func (hp *hiresParams) toGo() *HiresParams {
Steps: hp.Steps,
DenoisingStrength: hp.DenoisingStrength,
UpscaleTileSize: hp.UpscaleTileSize,
CustomSigmas: _sigmas,
}
}

Expand All @@ -250,9 +259,18 @@ type HiresParams struct {
Steps int32
DenoisingStrength float32
UpscaleTileSize int32
CustomSigmas []float32
}

func (hp *HiresParams) toC() *hiresParams {
var _sigmas *float32
var _sigmaCnt int32

if len(hp.CustomSigmas) > 0 {
_sigmas = &hp.CustomSigmas[0]
_sigmaCnt = int32(len(hp.CustomSigmas))
}

return &hiresParams{
Enabled: boolToByte(hp.Enabled),
Upscaler: hp.Upscaler,
Expand All @@ -263,6 +281,8 @@ func (hp *HiresParams) toC() *hiresParams {
Steps: hp.Steps,
DenoisingStrength: hp.DenoisingStrength,
UpscaleTileSize: hp.UpscaleTileSize,
CustomSigmas: _sigmas,
CustomSigmasCount: _sigmaCnt,
}
}

Expand Down Expand Up @@ -395,46 +415,50 @@ func (pmp *PMParamsType) toC() *pMParamsType {
}

type vAETilingParams struct {
Enabled uint8 // bool enabled;
TemporalTiling uint8 // bool temporal_tiling;
TileSizeX int32 // int tile_size_x;
TileSizeY int32 // int tile_size_y;
TargetOverlap float32 // float target_overlap;
RelSizeX float32 // float rel_size_x;
RelSizeY float32 // float rel_size_y;
Enabled uint8 // bool enabled;
TemporalTiling uint8 // bool temporal_tiling;
TileSizeX int32 // int tile_size_x;
TileSizeY int32 // int tile_size_y;
TargetOverlap float32 // float target_overlap;
RelSizeX float32 // float rel_size_x;
RelSizeY float32 // float rel_size_y;
ExtraTilingArgs *byte // const char* extra_tiling_args;
}

func (vae *vAETilingParams) toGo() *VAETilingParams {
return &VAETilingParams{
Enabled: byteToBool(vae.Enabled),
TemporalTiling: byteToBool(vae.TemporalTiling),
TileSizeX: vae.TileSizeX,
TileSizeY: vae.TileSizeY,
TargetOverlap: vae.TargetOverlap,
RelSizeX: vae.RelSizeX,
RelSizeY: vae.RelSizeY,
Enabled: byteToBool(vae.Enabled),
TemporalTiling: byteToBool(vae.TemporalTiling),
TileSizeX: vae.TileSizeX,
TileSizeY: vae.TileSizeY,
TargetOverlap: vae.TargetOverlap,
RelSizeX: vae.RelSizeX,
RelSizeY: vae.RelSizeY,
ExtraTilingArgs: charToString(vae.ExtraTilingArgs),
}
}

type VAETilingParams struct {
Enabled bool
TemporalTiling bool
TileSizeX int32
TileSizeY int32
TargetOverlap float32
RelSizeX float32
RelSizeY float32
Enabled bool
TemporalTiling bool
TileSizeX int32
TileSizeY int32
TargetOverlap float32
RelSizeX float32
RelSizeY float32
ExtraTilingArgs string
}

func (vae *VAETilingParams) toC() *vAETilingParams {
return &vAETilingParams{
Enabled: boolToByte(vae.Enabled),
TemporalTiling: boolToByte(vae.TemporalTiling),
TileSizeX: vae.TileSizeX,
TileSizeY: vae.TileSizeY,
TargetOverlap: vae.TargetOverlap,
RelSizeX: vae.RelSizeX,
RelSizeY: vae.RelSizeY,
Enabled: boolToByte(vae.Enabled),
TemporalTiling: boolToByte(vae.TemporalTiling),
TileSizeX: vae.TileSizeX,
TileSizeY: vae.TileSizeY,
TargetOverlap: vae.TargetOverlap,
RelSizeX: vae.RelSizeX,
RelSizeY: vae.RelSizeY,
ExtraTilingArgs: stringToChar(vae.ExtraTilingArgs),
}
}

Expand Down
1 change: 1 addition & 0 deletions pkg/gosd/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func myImagePreviewCallback(step int32, image Image, isNoisy bool, data unsafe.P

func myLogCallback(level LogLevel, text string, data unsafe.Pointer) {
*(*int)(data)++
level.Stringify()
}

func TestGenerateImage(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions pkg/gosd/video.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type videoParams struct {
VACEStrength float32 // float vace_strength;
VAETilingParams vAETilingParams // sd_tiling_params_t vae_tiling_params;
Cache cacheParams // sd_cache_params_t cache;
HiresParams hiresParams // sd_hires_params_t hires;
}

func (vp *videoParams) toGo() *VideoParams {
Expand Down Expand Up @@ -105,6 +106,7 @@ func (vp *videoParams) toGo() *VideoParams {
VACEStrength: vp.VACEStrength,
VAETilingParams: *vp.VAETilingParams.toGo(),
Cache: *vp.Cache.toGo(),
HiresParams: *vp.HiresParams.toGo(),
}
}

Expand All @@ -130,6 +132,7 @@ type VideoParams struct {
VACEStrength float32
VAETilingParams VAETilingParams
Cache CacheParams
HiresParams HiresParams
}

func (vp *VideoParams) toC() *videoParams {
Expand Down Expand Up @@ -165,6 +168,7 @@ func (vp *VideoParams) toC() *videoParams {
VACEStrength: vp.VACEStrength,
VAETilingParams: *vp.VAETilingParams.toC(),
Cache: *vp.Cache.toC(),
HiresParams: *vp.HiresParams.toC(),
}
}

Expand Down
2 changes: 1 addition & 1 deletion stable_diffusion.release
Original file line number Diff line number Diff line change
@@ -1 +1 @@
master-633-5b0267e
master-642-3a8788c
Loading