Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,24 @@ DOCKER_BUILD_ARGS := \
BUILD_DMR ?= 1

# Main targets
.PHONY: build run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
.PHONY: build build-dmrlet run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
# Default target
.DEFAULT_GOAL := build

# Build the Go application
build:
CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .

# Build dmrlet binary
build-dmrlet:
@echo "Building dmrlet..."
@VERSION=$$(git describe --tags --always --dirty 2>/dev/null || echo "dev"); \
GIT_COMMIT=$$(git rev-parse HEAD 2>/dev/null || echo "unknown"); \
BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || echo "unknown"); \
cd cmd/dmrlet && CGO_ENABLED=0 go build -ldflags="-s -w -X 'main.Version=$${VERSION}' -X 'main.GitCommit=$${GIT_COMMIT}' -X 'main.BuildDate=$${BUILD_DATE}'" -o dmrlet .
mv cmd/dmrlet/dmrlet .
@echo "Built: dmrlet"

# Run the application locally
run: build
@LLAMACPP_BIN="llamacpp/install/bin"; \
Expand All @@ -46,6 +56,7 @@ run: build
# Clean build artifacts
clean:
rm -f $(APP_NAME)
rm -f dmrlet
rm -f model-runner.sock
rm -rf $(MODELS_PATH)

Expand Down Expand Up @@ -219,6 +230,7 @@ vllm-metal-clean:
help:
@echo "Available targets:"
@echo " build - Build the Go application"
@echo " build-dmrlet - Build dmrlet binary (lightweight node agent)"
@echo " run - Run the application locally"
@echo " clean - Clean build artifacts"
@echo " test - Run tests"
Expand Down
109 changes: 109 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,115 @@ in the form of [a Helm chart and static YAML](charts/docker-model-runner/README.
If you are interested in a specific Kubernetes use-case, please start a
discussion on the issue tracker.

## dmrlet: Container Orchestrator for AI Inference

dmrlet is a purpose-built container orchestrator for AI inference workloads. Unlike Kubernetes, it focuses exclusively on running stateless inference containers with zero configuration overhead. Multi-GPU mapping "just works" without YAML, device plugins, or node selectors.

### Key Features

| Feature | Kubernetes | dmrlet |
|---------|------------|--------|
| Multi-GPU setup | Device plugins + node selectors + resource limits YAML | `dmrlet serve llama3 --gpus all` |
| Config overhead | 50+ lines of YAML minimum | Zero YAML, CLI-only |
| Time to first inference | Minutes (pod scheduling, image pull) | Seconds (model already local) |
| Model management | External (mount PVCs, manage yourself) | Integrated with Docker Model Runner store |

### Building dmrlet

```bash
# Build the dmrlet binary
go build -o dmrlet ./cmd/dmrlet

# Verify it works
./dmrlet --help
```

### Usage

**Start the daemon:**
```bash
# Start in foreground
dmrlet daemon

# With custom socket path
dmrlet daemon --socket /tmp/dmrlet.sock
```

**Serve a model:**
```bash
# Auto-detect backend and GPUs
dmrlet serve llama3.2

# Specify backend
dmrlet serve llama3.2 --backend vllm

# Specify GPU allocation
dmrlet serve llama3.2 --gpus 0,1
dmrlet serve llama3.2 --gpus all

# Multiple replicas
dmrlet serve llama3.2 --replicas 2

# Backend-specific options
dmrlet serve llama3.2 --ctx-size 4096 # llama.cpp context size
dmrlet serve llama3.2 --gpu-memory 0.8 # vLLM GPU memory utilization
```

**List running models:**
```bash
dmrlet ps
# MODEL BACKEND REPLICAS GPUS ENDPOINTS STATUS
# llama3.2 llama.cpp 1 [0,1,2,3] localhost:30000 healthy
```

**View logs:**
```bash
dmrlet logs llama3.2 # Last 100 lines
dmrlet logs llama3.2 -f # Follow logs
```

**Scale replicas:**
```bash
dmrlet scale llama3.2 4 # Scale to 4 replicas
```

**Stop a model:**
```bash
dmrlet stop llama3.2
dmrlet stop --all # Stop all models
```

**Check status:**
```bash
dmrlet status
# DAEMON: running
# SOCKET: /var/run/dmrlet.sock
#
# GPUs:
# GPU 0: NVIDIA A100 80GB 81920MB (in use: llama3.2)
# GPU 1: NVIDIA A100 80GB 81920MB (available)
#
# MODELS: 1 running
```

### Supported Backends

- **llama.cpp** - Default backend for GGUF models
- **vLLM** - High-throughput serving for safetensors models
- **SGLang** - Fast serving with RadixAttention

### Architecture

```
dmrlet daemon
├── GPU Manager - Auto-detect and allocate GPUs
├── Container Manager - Docker-based container lifecycle
├── Service Registry - Endpoint discovery with load balancing
├── Health Monitor - Auto-restart unhealthy containers
├── Auto-scaler - Scale based on QPS/latency/GPU utilization
└── Log Aggregator - Centralized log collection
```

## Community

For general questions and discussion, please use [Docker Model Runner's Slack channel](https://dockercommunity.slack.com/archives/C09H9P5E57B).
Expand Down
88 changes: 88 additions & 0 deletions cmd/dmrlet/commands/list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package commands

import (
"fmt"
"os"

"github.com/olekukonko/tablewriter"
"github.com/olekukonko/tablewriter/renderer"
"github.com/olekukonko/tablewriter/tw"
"github.com/spf13/cobra"
)

func newListCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "list",
Aliases: []string{"ls"},
Short: "List running models",
Long: `List all running inference models managed by dmrlet.

Examples:
dmrlet list
dmrlet ls`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
return runList(cmd)
},
}

return cmd
}

func runList(cmd *cobra.Command) error {
ctx := cmd.Context()

if err := initManager(ctx); err != nil {
return fmt.Errorf("initializing manager: %w", err)
}

running, err := manager.List(ctx)
if err != nil {
return fmt.Errorf("listing models: %w", err)
}

if len(running) == 0 {
cmd.Println("No running models")
return nil
}

table := tablewriter.NewTable(os.Stdout,
tablewriter.WithRenderer(renderer.NewBlueprint(tw.Rendition{
Borders: tw.BorderNone,
Settings: tw.Settings{
Separators: tw.Separators{
BetweenColumns: tw.Off,
},
Lines: tw.Lines{
ShowHeaderLine: tw.Off,
},
},
})),
tablewriter.WithConfig(tablewriter.Config{
Header: tw.CellConfig{
Formatting: tw.CellFormatting{
AutoFormat: tw.Off,
},
Alignment: tw.CellAlignment{Global: tw.AlignLeft},
Padding: tw.CellPadding{Global: tw.Padding{Left: "", Right: " "}},
},
Row: tw.CellConfig{
Alignment: tw.CellAlignment{Global: tw.AlignLeft},
Padding: tw.CellPadding{Global: tw.Padding{Left: "", Right: " "}},
},
}),
)
table.Header([]string{"MODEL", "BACKEND", "PORT", "ENDPOINT"})

for _, m := range running {
table.Append([]string{
m.ModelRef,
string(m.Backend),
fmt.Sprintf("%d", m.Port),
m.Endpoint,
})
}

table.Render()
return nil
}
44 changes: 44 additions & 0 deletions cmd/dmrlet/commands/pull.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package commands

import (
"fmt"
"os"

"github.com/spf13/cobra"
)

func newPullCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "pull MODEL",
Short: "Pull a model without serving",
Long: `Pull a model from Docker Hub or HuggingFace without starting an inference container.
This is useful for pre-downloading models.

Examples:
dmrlet pull ai/smollm2
dmrlet pull huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runPull(cmd, args[0])
},
}

return cmd
}

func runPull(cmd *cobra.Command, modelRef string) error {
ctx := cmd.Context()

if err := initStore(); err != nil {
return fmt.Errorf("initializing store: %w", err)
}

cmd.Printf("Pulling model: %s\n", modelRef)

if err := store.EnsureModel(ctx, modelRef, os.Stdout); err != nil {
return fmt.Errorf("pulling model: %w", err)
}

cmd.Printf("\nModel pulled successfully: %s\n", modelRef)
return nil
}
Loading