- Create the token
ssh-keygen -t ed25519 -C "your@email.com"
- SSH into your droplet
ssh root@
ssh -i ~/.ssh/id_ed25519 root@
- Enter the vLLM Container
docker exec -it rocm /bin/bash
- Launching a Basic Model
python -m vllm.entrypoints.openai.api_server
--model Qwen/Qwen2.5-1.5B-Instruct
--host 0.0.0.0
--port 8000
--dtype float16
--allowed-origins '["*"]' \
/tmp/vllm.log 2>&1 &
- Launching a LLAMA Model
python -m vllm.entrypoints.openai.api_server
--model meta-llama/Llama-3.2-3B-Instruct
--host 0.0.0.0
--port 8000
--dtype float16
--hf-token
--allowed-origins '["*"]' \
/tmp/vllm.log 2>&1 &
- Launching a not so basic model
python -m vllm.entrypoints.openai.api_server
--model Qwen/Qwen2.5-7B-Instruct
--host 0.0.0.0
--port 8000
--dtype float16
--allowed-origins '["*"]' \
/tmp/vllm.log 2>&1 &
- Check logs after starting the LLM Server
tail -f /tmp/vllm.log