Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ jobs:
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -317,6 +318,7 @@ jobs:
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -335,6 +337,7 @@ jobs:
"VLLM_VERSION=${{ env.VLLM_VERSION }}"
"VLLM_CUDA_VERSION=cu130"
"VLLM_PYTHON_TAG=cp38-abi3"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -351,6 +354,7 @@ jobs:
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
"SGLANG_VERSION=${{ env.SGLANG_VERSION }}"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -364,6 +368,7 @@ jobs:
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -379,6 +384,7 @@ jobs:
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
"LLAMA_SERVER_VARIANT=rocm"
"BASE_IMAGE=rocm/dev-ubuntu-22.04"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -395,6 +401,7 @@ jobs:
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
"LLAMA_SERVER_VARIANT=musa"
"BASE_IMAGE=mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand All @@ -411,6 +418,7 @@ jobs:
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
"LLAMA_SERVER_VARIANT=cann"
"BASE_IMAGE=ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11"
"VERSION=${{ env.RELEASE_TAG }}"
push: true
sbom: true
provenance: mode=max
Expand Down
9 changes: 7 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIA
# use 22.04 for gpu variants to match ROCm/CUDA base images
ARG BASE_IMAGE=ubuntu:26.04

ARG VERSION=dev

FROM docker.io/library/golang:${GO_VERSION}-bookworm AS builder

ARG VERSION

# Install git for go mod download if needed
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*

Expand All @@ -30,13 +34,14 @@ COPY --link . .
# Build the Go binary (static build)
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner .
CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w -X main.Version=${VERSION}" -o model-runner .

# Build the Go binary for SGLang (without vLLM)
FROM builder AS builder-sglang
ARG VERSION
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w" -o model-runner .
CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w -X main.Version=${VERSION}" -o model-runner .

# --- Get llama.cpp binary ---
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ BUILD_DMR ?= 1

# Build the Go application
build:
CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .
CGO_ENABLED=1 go build -ldflags="-s -w -X main.Version=$(shell git describe --tags --always --dirty --match 'v*')" -o $(APP_NAME) .

# Run the application locally
run: build
Expand Down
22 changes: 20 additions & 2 deletions cmd/cli/commands/version.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package commands

import (
"runtime"

"github.com/docker/model-runner/cmd/cli/commands/completion"
"github.com/docker/model-runner/cmd/cli/desktop"
"github.com/spf13/cobra"
Expand All @@ -11,8 +13,24 @@ func newVersionCmd() *cobra.Command {
Use: "version",
Short: "Show the Docker Model Runner version",
Run: func(cmd *cobra.Command, args []string) {
cmd.Printf("Docker Model Runner version %s\n", desktop.Version)
cmd.Printf("Docker Engine Kind: %s\n", modelRunner.EngineKind())
cmd.Println("Client:")
cmd.Printf(" Version: %s\n", desktop.Version)
cmd.Printf(" OS/Arch: %s/%s\n", runtime.GOOS, runtime.GOARCH)

cmd.Println()
cmd.Println("Server:")
if desktopClient == nil {
cmd.Println(" Version: (not reachable)")
cmd.Println(" Engine: (not reachable)")
return
}
sv, err := desktopClient.ServerVersion()
if err != nil {
cmd.Println(" Version: (not reachable)")
} else {
cmd.Printf(" Version: %s\n", sv.Version)
}
cmd.Printf(" Engine: %s\n", modelRunner.EngineKind())
Comment on lines +22 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current logic for displaying server information in the version command has a couple of issues. First, it incorrectly reports the engine as (not reachable) when running in standalone mode (when desktopClient is nil). The engine kind is a client-side property and should be available. Second, the logic is a bit repetitive and can be simplified for better readability and correctness.

I suggest refactoring this section to always display the engine kind and to determine the server version more concisely.

            serverVersion := "(not reachable)"
            if desktopClient != nil {
                if sv, err := desktopClient.ServerVersion(); err == nil {
                    serverVersion = sv.Version
                }
            }
            cmd.Printf(" Version:    %s\n", serverVersion)
            cmd.Printf(" Engine:     %s\n", modelRunner.EngineKind())

},
ValidArgsFunction: completion.NoComplete,
}
Expand Down
28 changes: 28 additions & 0 deletions cmd/cli/desktop/desktop.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,34 @@ func (c *Client) Remove(modelArgs []string, force bool) (string, error) {
return modelRemoved, nil
}

type ServerVersionResponse struct {
Version string `json:"version"`
}

func (c *Client) ServerVersion() (ServerVersionResponse, error) {
resp, err := c.doRequest(http.MethodGet, "/version", nil)
if err != nil {
return ServerVersionResponse{}, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return ServerVersionResponse{}, fmt.Errorf("failed to get server version: %s", resp.Status)
}

body, err := io.ReadAll(resp.Body)
if err != nil {
return ServerVersionResponse{}, fmt.Errorf("failed to read response body: %w", err)
}

var version ServerVersionResponse
if err := json.Unmarshal(body, &version); err != nil {
return ServerVersionResponse{}, fmt.Errorf("failed to unmarshal response body: %w", err)
}

return version, nil
}

// BackendStatus to be imported from docker/model-runner when https://github.com/docker/model-runner/pull/42 is merged.
type BackendStatus struct {
BackendName string `json:"backend_name"`
Expand Down
7 changes: 7 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"context"
"crypto/tls"
"encoding/json"
"net"
"net/http"
"os"
Expand Down Expand Up @@ -264,6 +265,12 @@ func main() {
anthropicHandler := anthropic.NewHandler(log, schedulerHTTP, nil, modelManager)
router.Handle(anthropic.APIPrefix+"/", anthropicHandler)

// Register /version endpoint
router.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]string{"version": Version})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The error returned by json.NewEncoder(w).Encode() is being ignored. While errors are unlikely when writing to an http.ResponseWriter, they can occur (e.g., if the client closes the connection). It's a good practice to handle this error, for example by logging it, to aid in debugging potential network issues.

if err := json.NewEncoder(w).Encode(map[string]string{"version": Version}); err != nil {
			log.Warnf("failed to write version response: %v", err)
		}

})

// Register root handler LAST - it will only catch exact "/" requests that don't match other patterns
router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
// Only respond to exact root path
Expand Down
14 changes: 3 additions & 11 deletions pkg/inference/models/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,6 @@ func TestPullModel(t *testing.T) {
}

func TestHandleGetModel(t *testing.T) {
tempDir := t.TempDir()

// Create a test registry
server := httptest.NewServer(testregistry.New())
defer server.Close()
Expand Down Expand Up @@ -207,7 +205,9 @@ func TestHandleGetModel(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
log := logrus.NewEntry(logrus.StandardLogger())
tempDir := t.TempDir()
logger := logrus.New()
log := logrus.NewEntry(logger)
manager := NewManager(log.WithFields(logrus.Fields{"component": "model-manager"}), ClientConfig{
StoreRootPath: tempDir,
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
Expand Down Expand Up @@ -263,14 +263,6 @@ func TestHandleGetModel(t *testing.T) {
t.Errorf("Failed to decode response body: %v", err)
}
}

// Clean tempDir after each test
if err := os.RemoveAll(tempDir); err != nil {
t.Fatalf("Failed to clean temp directory: %v", err)
}
if err := os.MkdirAll(tempDir, 0755); err != nil {
t.Fatalf("Failed to recreate temp directory: %v", err)
}
})
}
}
Expand Down
3 changes: 3 additions & 0 deletions version.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package main

var Version = "dev"
Loading