Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Useful narrower checks:

```sh
./ds4_test --server
./ds4_test --launch
./ds4_test --logprob-vectors
./ds4_test --long-context
./ds4_test --tool-call-quality
Expand All @@ -40,6 +41,9 @@ What they cover:
- `--server`: request parsing, chat rendering, streaming, tool-call parsing,
thinking controls, KV disk-cache bookkeeping, and other server-side logic.
This is the best quick check for API and prompt-rendering changes.
- `--launch`: `ds4-launch` tool-name parsing, server-option parsing, DS4 lock
pid extraction, and `lsof` listener-port extraction. This is the quick check
for launcher changes and does not start a model server.
- `--logprob-vectors`: compares local token bytes and top-logprob slices against
official DeepSeek V4 Flash continuation vectors. This catches tokenizer,
template, attention, and logits regressions.
Expand Down
40 changes: 27 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ endif
.PHONY: all help clean test cpu cuda cuda-spark cuda-generic cuda-regression

ifeq ($(UNAME_S),Darwin)
all: ds4 ds4-server ds4-bench
all: ds4 ds4-server ds4-bench ds4-launch

help:
@echo "DS4 build targets:"
@echo " make Build Metal ./ds4, ./ds4-server, and ./ds4-bench"
@echo " make cpu Build CPU-only ./ds4, ./ds4-server, and ./ds4-bench"
@echo " make Build Metal ./ds4, ./ds4-server, ./ds4-bench, and ./ds4-launch"
@echo " make cpu Build CPU-only ./ds4, ./ds4-server, ./ds4-bench, and ./ds4-launch"
@echo " make test Build and run tests"
@echo " make clean Remove build outputs"

Expand All @@ -53,10 +53,14 @@ ds4-server: ds4_server.o rax.o $(CORE_OBJS)
ds4-bench: ds4_bench.o $(CORE_OBJS)
$(CC) $(CFLAGS) -o $@ ds4_bench.o $(CORE_OBJS) $(METAL_LDLIBS)

cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o linenoise.o rax.o $(CPU_CORE_OBJS)
ds4-launch: ds4_launch.o
$(CC) $(CFLAGS) -o $@ ds4_launch.o $(LDLIBS)

cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o ds4_launch.o linenoise.o rax.o $(CPU_CORE_OBJS)
$(CC) $(CFLAGS) -o ds4 ds4_cli_cpu.o linenoise.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-server ds4_server_cpu.o rax.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-bench ds4_bench_cpu.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-launch ds4_launch.o $(LDLIBS)

cuda-regression:
@echo "cuda-regression requires a CUDA build"
Expand All @@ -68,23 +72,23 @@ help:
@echo " make cuda-spark Build CUDA for DGX Spark / GB10"
@echo " make cuda-generic Build CUDA for a generic local CUDA GPU"
@echo " make cuda CUDA_ARCH=sm_N Build CUDA with an explicit nvcc -arch value"
@echo " make cpu Build CPU-only ./ds4, ./ds4-server, and ./ds4-bench"
@echo " make cpu Build CPU-only ./ds4, ./ds4-server, ./ds4-bench, and ./ds4-launch"
@echo " make test Build and run tests"
@echo " make clean Remove build outputs"

cuda-spark:
$(MAKE) ds4 ds4-server ds4-bench CUDA_ARCH=
$(MAKE) ds4 ds4-server ds4-bench ds4-launch CUDA_ARCH=

cuda-generic:
$(MAKE) ds4 ds4-server ds4-bench CUDA_ARCH=native
$(MAKE) ds4 ds4-server ds4-bench ds4-launch CUDA_ARCH=native

cuda:
@if [ -z "$(strip $(CUDA_ARCH))" ]; then \
echo "error: specify CUDA_ARCH, for example: make cuda CUDA_ARCH=sm_120"; \
echo " or use make cuda-spark / make cuda-generic"; \
exit 2; \
fi
$(MAKE) ds4 ds4-server ds4-bench CUDA_ARCH="$(CUDA_ARCH)"
$(MAKE) ds4 ds4-server ds4-bench ds4-launch CUDA_ARCH="$(CUDA_ARCH)"

ds4: ds4_cli.o linenoise.o $(CORE_OBJS)
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)
Expand All @@ -95,10 +99,14 @@ ds4-server: ds4_server.o rax.o $(CORE_OBJS)
ds4-bench: ds4_bench.o $(CORE_OBJS)
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)

cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o linenoise.o rax.o $(CPU_CORE_OBJS)
ds4-launch: ds4_launch.o
$(CC) $(CFLAGS) -o $@ ds4_launch.o $(LDLIBS)

cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o ds4_launch.o linenoise.o rax.o $(CPU_CORE_OBJS)
$(CC) $(CFLAGS) -o ds4 ds4_cli_cpu.o linenoise.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-server ds4_server_cpu.o rax.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-bench ds4_bench_cpu.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-launch ds4_launch.o $(LDLIBS)

cuda-regression: tests/cuda_long_context_smoke
./tests/cuda_long_context_smoke
Expand All @@ -116,9 +124,15 @@ ds4_server.o: ds4_server.c ds4.h rax.h
ds4_bench.o: ds4_bench.c ds4.h
$(CC) $(CFLAGS) -c -o $@ ds4_bench.c

ds4_launch.o: ds4_launch.c
$(CC) $(CFLAGS) -c -o $@ ds4_launch.c

ds4_test.o: tests/ds4_test.c ds4_server.c ds4.h rax.h
$(CC) $(CFLAGS) -Wno-unused-function -c -o $@ tests/ds4_test.c

ds4_launch_test.o: ds4_launch.c
$(CC) $(CFLAGS) -DDS4_LAUNCH_TEST -Wno-unused-function -c -o $@ ds4_launch.c

tests/cuda_long_context_smoke.o: tests/cuda_long_context_smoke.c ds4_gpu.h
$(CC) $(CFLAGS) -I. -c -o $@ tests/cuda_long_context_smoke.c

Expand Down Expand Up @@ -149,15 +163,15 @@ ds4_cuda.o: ds4_cuda.cu ds4_gpu.h ds4_iq2_tables_cuda.inc
tests/cuda_long_context_smoke: tests/cuda_long_context_smoke.o ds4_cuda.o
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)

ds4_test: ds4_test.o rax.o $(CORE_OBJS)
ds4_test: ds4_test.o ds4_launch_test.o rax.o $(CORE_OBJS)
ifeq ($(UNAME_S),Darwin)
$(CC) $(CFLAGS) -o $@ ds4_test.o rax.o $(CORE_OBJS) $(METAL_LDLIBS)
$(CC) $(CFLAGS) -o $@ ds4_test.o ds4_launch_test.o rax.o $(CORE_OBJS) $(METAL_LDLIBS)
else
$(NVCC) $(NVCCFLAGS) -o $@ ds4_test.o rax.o $(CORE_OBJS) $(CUDA_LDLIBS)
$(NVCC) $(NVCCFLAGS) -o $@ ds4_test.o ds4_launch_test.o rax.o $(CORE_OBJS) $(CUDA_LDLIBS)
endif

test: ds4_test
./ds4_test

clean:
rm -f ds4 ds4-server ds4-bench ds4_cpu ds4_native ds4_server_test ds4_test *.o tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o
rm -f ds4 ds4-server ds4-bench ds4-launch ds4_cpu ds4_native ds4_server_test ds4_test *.o tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,22 @@ The `384000` output limit below avoids token caps since the model is able
to generate very long replies otherwise (up to 384k tokens). The server
still stops when the configured context window is full.

`ds4-launch` can configure and start common local agent clients for you. The
first argument selects the tool, and the remaining arguments are passed to
`ds4-server` when a new server is needed:

```sh
./ds4-launch pi --ctx 100000 --kv-disk-dir /tmp/ds4-kv --kv-disk-space-mb 8192
./ds4-launch copilot --port 9000
./ds4-launch opencode
```

If a DS4 server is already running, the launcher reuses it instead of trying to
load a second model process: it reads the lock-owner pid, discovers the listening
port with `lsof`, prints the pid/port it found, configures the selected client,
and then gives the terminal to that client. The launched clients see the model
as `DeepSeek V4 Flash (ds4.c local)`.

For **opencode**, add a provider and agent entry to
`~/.config/opencode/opencode.json`:

Expand Down
Loading