chore(model gallery): add arcee-ai_afm-4.5b (#5938) #12

Workflow file for this run

.github/workflows/backend.yml at 04bad9a

	---
	name: 'build backend container images'

	on:
	push:
	branches:
	- master
	tags:
	- '*'

	concurrency:
	group: ci-backends-${{ github.head_ref \|\| github.ref }}-${{ github.repository }}
	cancel-in-progress: true

	jobs:
	backend-jobs:
	uses: ./.github/workflows/backend_build.yml
	with:
	tag-latest: ${{ matrix.tag-latest }}
	tag-suffix: ${{ matrix.tag-suffix }}
	build-type: ${{ matrix.build-type }}
	cuda-major-version: ${{ matrix.cuda-major-version }}
	cuda-minor-version: ${{ matrix.cuda-minor-version }}
	platforms: ${{ matrix.platforms }}
	runs-on: ${{ matrix.runs-on }}
	base-image: ${{ matrix.base-image }}
	backend: ${{ matrix.backend }}
	dockerfile: ${{ matrix.dockerfile }}
	skip-drivers: ${{ matrix.skip-drivers }}
	context: ${{ matrix.context }}
	secrets:
	dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
	dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
	quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
	quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
	strategy:
	fail-fast: false
	#max-parallel: ${{ github.event_name != 'pull_request' && 6 \|\| 4 }}
	matrix:
	include:
	# CUDA 11 builds
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "rerankers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-transformers'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "transformers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "diffusers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# CUDA 11 additional backends
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "kokoro"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "faster-whisper"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-coqui'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "coqui"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-bark'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "bark"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "chatterbox"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# CUDA 12 builds
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "rerankers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-vllm'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "vllm"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-transformers'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "transformers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "diffusers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# CUDA 12 additional backends
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "kokoro"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "faster-whisper"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-coqui'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "coqui"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-bark'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "bark"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "chatterbox"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# hipblas builds
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-rerankers'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "rerankers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-vllm'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "vllm"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-transformers'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "transformers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-diffusers'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "diffusers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# ROCm additional backends
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-kokoro'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "kokoro"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "faster-whisper"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-coqui'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "coqui"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-rocm-hipblas-bark'
	runs-on: 'ubuntu-latest'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	skip-drivers: 'false'
	backend: "bark"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# sycl builds
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-rerankers'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "rerankers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'sycl_f32'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'sycl_f16'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-vllm'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "vllm"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-transformers'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "transformers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-diffusers'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "diffusers"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# SYCL additional backends
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-kokoro'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "kokoro"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-faster-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "faster-whisper"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-coqui'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "coqui"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-bark'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "bark"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# piper
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-piper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "piper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	# bark-cpp
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-bark-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "bark-cpp"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/arm64'
	skip-drivers: 'true'
	tag-latest: 'auto'
	tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
	base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
	runs-on: 'ubuntu-24.04-arm'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	- build-type: 'vulkan'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-vulkan-llama-cpp'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "llama-cpp"
	dockerfile: "./backend/Dockerfile.llama-cpp"
	context: "./"
	# Stablediffusion-ggml
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-stablediffusion-ggml'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'sycl_f32'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'sycl_f16'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'vulkan'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/arm64'
	skip-drivers: 'true'
	tag-latest: 'auto'
	tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
	base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
	runs-on: 'ubuntu-24.04-arm'
	backend: "stablediffusion-ggml"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	# whisper
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'sycl_f32'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-sycl-f32-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'sycl_f16'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-sycl-f16-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'vulkan'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-vulkan-whisper'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/arm64'
	skip-drivers: 'true'
	tag-latest: 'auto'
	tag-suffix: '-nvidia-l4t-arm64-whisper'
	base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
	runs-on: 'ubuntu-24.04-arm'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-hipblas-whisper'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	runs-on: 'ubuntu-latest'
	skip-drivers: 'false'
	backend: "whisper"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	#silero-vad
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-silero-vad'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "silero-vad"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	# local-store
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-local-store'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "local-store"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	# huggingface
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-huggingface'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "huggingface"
	dockerfile: "./backend/Dockerfile.golang"
	context: "./"
	# rfdetr
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64,linux/arm64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-rfdetr'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "rfdetr"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "rfdetr"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-rfdetr'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "rfdetr"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-rfdetr'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "rfdetr"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/arm64'
	skip-drivers: 'true'
	tag-latest: 'auto'
	tag-suffix: '-nvidia-l4t-arm64-rfdetr'
	base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
	runs-on: 'ubuntu-24.04-arm'
	backend: "rfdetr"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# exllama2
	- build-type: ''
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-cpu-exllama2'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "exllama2"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "0"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "exllama2"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'cublas'
	cuda-major-version: "11"
	cuda-minor-version: "7"
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-nvidia-cuda-11-exllama2'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	skip-drivers: 'false'
	backend: "exllama2"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'intel'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	tag-latest: 'auto'
	tag-suffix: '-gpu-intel-exllama2'
	runs-on: 'ubuntu-latest'
	base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
	skip-drivers: 'false'
	backend: "exllama2"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	- build-type: 'hipblas'
	cuda-major-version: ""
	cuda-minor-version: ""
	platforms: 'linux/amd64'
	skip-drivers: 'true'
	tag-latest: 'auto'
	tag-suffix: '-gpu-hipblas-exllama2'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	runs-on: 'ubuntu-latest'
	backend: "exllama2"
	dockerfile: "./backend/Dockerfile.python"
	context: "./backend"
	# runs out of space on the runner
	# - build-type: 'hipblas'
	# cuda-major-version: ""
	# cuda-minor-version: ""
	# platforms: 'linux/amd64'
	# tag-latest: 'auto'
	# tag-suffix: '-gpu-hipblas-rfdetr'
	# base-image: "rocm/dev-ubuntu-22.04:6.1"
	# runs-on: 'ubuntu-latest'
	# skip-drivers: 'false'
	# backend: "rfdetr"
	# dockerfile: "./backend/Dockerfile.python"
	# context: "./backend"
	llama-cpp-darwin:
	runs-on: macOS-14
	strategy:
	matrix:
	go-version: ['1.21.x']
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	submodules: true
	- name: Setup Go ${{ matrix.go-version }}
	uses: actions/setup-go@v5
	with:
	go-version: ${{ matrix.go-version }}
	cache: false
	# You can test your matrix by printing the current Go version
	- name: Display Go version
	run: go version
	- name: Dependencies
	run: \|
	brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
	- name: Build llama-cpp-darwin
	run: \|
	make protogen-go
	make build
	bash scripts/build-llama-cpp-darwin.sh
	ls -la build/darwin.tar
	mv build/darwin.tar build/llama-cpp.tar
	- name: Upload llama-cpp.tar
	uses: actions/upload-artifact@v4
	with:
	name: llama-cpp-tar
	path: build/llama-cpp.tar
	llama-cpp-darwin-publish:
	needs: llama-cpp-darwin
	runs-on: ubuntu-latest
	steps:
	- name: Download llama-cpp.tar
	uses: actions/download-artifact@v4
	with:
	name: llama-cpp-tar
	path: .
	- name: Install crane
	run: \|
	curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz \| tar -xz
	sudo mv crane /usr/local/bin/
	- name: Log in to DockerHub
	run: \|
	echo "${{ secrets.DOCKERHUB_PASSWORD }}" \| crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
	- name: Log in to quay.io
	run: \|
	echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" \| crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
	- name: Docker meta
	id: meta
	uses: docker/metadata-action@v5
	with:
	images: \|
	localai/localai-backends
	tags: \|
	type=ref,event=branch
	type=semver,pattern={{raw}}
	type=sha
	flavor: \|
	latest=auto
	suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
	- name: Docker meta
	id: quaymeta
	uses: docker/metadata-action@v5
	with:
	images: \|
	quay.io/go-skynet/local-ai-backends
	tags: \|
	type=ref,event=branch
	type=semver,pattern={{raw}}
	type=sha
	flavor: \|
	latest=auto
	suffix=-metal-darwin-arm64-llama-cpp,onlatest=true
	- name: Push Docker image (DockerHub)
	run: \|
	for tag in $(echo "${{ steps.meta.outputs.tags }}" \| tr ',' '\n'); do
	crane push llama-cpp.tar $tag
	done
	- name: Push Docker image (Quay)
	run: \|
	for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" \| tr ',' '\n'); do
	crane push llama-cpp.tar $tag
	done
	llama-cpp-darwin-x86:
	runs-on: macos-13
	strategy:
	matrix:
	go-version: ['1.21.x']
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	submodules: true
	- name: Setup Go ${{ matrix.go-version }}
	uses: actions/setup-go@v5
	with:
	go-version: ${{ matrix.go-version }}
	cache: false
	# You can test your matrix by printing the current Go version
	- name: Display Go version
	run: go version
	- name: Dependencies
	run: \|
	brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
	- name: Build llama-cpp-darwin
	run: \|
	make protogen-go
	make build
	export PLATFORMARCH=darwin/amd64
	bash scripts/build-llama-cpp-darwin.sh
	ls -la build/darwin.tar
	mv build/darwin.tar build/llama-cpp.tar
	- name: Upload llama-cpp.tar
	uses: actions/upload-artifact@v4
	with:
	name: llama-cpp-tar-x86
	path: build/llama-cpp.tar
	llama-cpp-darwin-x86-publish:
	needs: llama-cpp-darwin-x86
	runs-on: ubuntu-latest
	steps:
	- name: Download llama-cpp.tar
	uses: actions/download-artifact@v4
	with:
	name: llama-cpp-tar-x86
	path: .
	- name: Install crane
	run: \|
	curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz \| tar -xz
	sudo mv crane /usr/local/bin/
	- name: Log in to DockerHub
	run: \|
	echo "${{ secrets.DOCKERHUB_PASSWORD }}" \| crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
	- name: Log in to quay.io
	run: \|
	echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" \| crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
	- name: Docker meta
	id: meta
	uses: docker/metadata-action@v5
	with:
	images: \|
	localai/localai-backends
	tags: \|
	type=ref,event=branch
	type=semver,pattern={{raw}}
	type=sha
	flavor: \|
	latest=auto
	suffix=-darwin-x86-llama-cpp,onlatest=true
	- name: Docker meta
	id: quaymeta
	uses: docker/metadata-action@v5
	with:
	images: \|
	quay.io/go-skynet/local-ai-backends
	tags: \|
	type=ref,event=branch
	type=semver,pattern={{raw}}
	type=sha
	flavor: \|
	latest=auto
	suffix=-darwin-x86-llama-cpp,onlatest=true
	- name: Push Docker image (DockerHub)
	run: \|
	for tag in $(echo "${{ steps.meta.outputs.tags }}" \| tr ',' '\n'); do
	crane push llama-cpp.tar $tag
	done
	- name: Push Docker image (Quay)
	run: \|
	for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" \| tr ',' '\n'); do
	crane push llama-cpp.tar $tag
	done

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore(model gallery): add arcee-ai_afm-4.5b (#5938) #12

Workflow file

chore(model gallery): add arcee-ai_afm-4.5b (#5938) #12

Uh oh!

Workflow file for this run