From 3af8549d070defce01b85c5af6d59ed6af698bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 12 Feb 2026 13:33:16 +0100 Subject: [PATCH 1/2] feat(version): add server version endpoint and client version display --- .github/workflows/release.yml | 8 ++++++++ Dockerfile | 9 +++++++-- Makefile | 2 +- cmd/cli/commands/version.go | 22 ++++++++++++++++++++-- cmd/cli/desktop/desktop.go | 28 ++++++++++++++++++++++++++++ main.go | 7 +++++++ version.go | 3 +++ 7 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 version.go diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e01555dc8..c9aa7bde2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -137,6 +137,7 @@ jobs: platforms: linux/amd64, linux/arm64 build-args: | "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -152,6 +153,7 @@ jobs: "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" "LLAMA_SERVER_VARIANT=cuda" "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -170,6 +172,7 @@ jobs: "VLLM_VERSION=${{ inputs.vllmVersion }}" "VLLM_CUDA_VERSION=cu130" "VLLM_PYTHON_TAG=cp38-abi3" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -186,6 +189,7 @@ jobs: "LLAMA_SERVER_VARIANT=cuda" "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04" "SGLANG_VERSION=${{ inputs.sglangVersion }}" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -199,6 +203,7 @@ jobs: platforms: linux/amd64, linux/arm64 build-args: | "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -214,6 +219,7 @@ jobs: "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" "LLAMA_SERVER_VARIANT=rocm" "BASE_IMAGE=rocm/dev-ubuntu-22.04" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -230,6 +236,7 @@ jobs: "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" "LLAMA_SERVER_VARIANT=musa" "BASE_IMAGE=mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max @@ -246,6 +253,7 @@ jobs: "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" "LLAMA_SERVER_VARIANT=cann" "BASE_IMAGE=ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11" + "VERSION=${{ inputs.releaseTag }}" push: true sbom: true provenance: mode=max diff --git a/Dockerfile b/Dockerfile index b20d796eb..142cafde7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,8 +9,12 @@ ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIA # use 22.04 for gpu variants to match ROCm/CUDA base images ARG BASE_IMAGE=ubuntu:26.04 +ARG VERSION=dev + FROM docker.io/library/golang:${GO_VERSION}-bookworm AS builder +ARG VERSION + # Install git for go mod download if needed RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/* @@ -30,13 +34,14 @@ COPY --link . . # Build the Go binary (static build) RUN --mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ - CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner . + CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w -X main.Version=${VERSION}" -o model-runner . # Build the Go binary for SGLang (without vLLM) FROM builder AS builder-sglang +ARG VERSION RUN --mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ - CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w" -o model-runner . + CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w -X main.Version=${VERSION}" -o model-runner . # --- Get llama.cpp binary --- FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server diff --git a/Makefile b/Makefile index 42bcfdc55..5fa0ef0e0 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ BUILD_DMR ?= 1 # Build the Go application build: - CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) . + CGO_ENABLED=1 go build -ldflags="-s -w -X main.Version=$(shell git describe --tags --always --dirty --match 'v*')" -o $(APP_NAME) . # Run the application locally run: build diff --git a/cmd/cli/commands/version.go b/cmd/cli/commands/version.go index f1467684b..a02dde830 100644 --- a/cmd/cli/commands/version.go +++ b/cmd/cli/commands/version.go @@ -1,6 +1,8 @@ package commands import ( + "runtime" + "github.com/docker/model-runner/cmd/cli/commands/completion" "github.com/docker/model-runner/cmd/cli/desktop" "github.com/spf13/cobra" @@ -11,8 +13,24 @@ func newVersionCmd() *cobra.Command { Use: "version", Short: "Show the Docker Model Runner version", Run: func(cmd *cobra.Command, args []string) { - cmd.Printf("Docker Model Runner version %s\n", desktop.Version) - cmd.Printf("Docker Engine Kind: %s\n", modelRunner.EngineKind()) + cmd.Println("Client:") + cmd.Printf(" Version: %s\n", desktop.Version) + cmd.Printf(" OS/Arch: %s/%s\n", runtime.GOOS, runtime.GOARCH) + + cmd.Println() + cmd.Println("Server:") + if desktopClient == nil { + cmd.Println(" Version: (not reachable)") + cmd.Println(" Engine: (not reachable)") + return + } + sv, err := desktopClient.ServerVersion() + if err != nil { + cmd.Println(" Version: (not reachable)") + } else { + cmd.Printf(" Version: %s\n", sv.Version) + } + cmd.Printf(" Engine: %s\n", modelRunner.EngineKind()) }, ValidArgsFunction: completion.NoComplete, } diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go index cae6ae832..6fc72e7c6 100644 --- a/cmd/cli/desktop/desktop.go +++ b/cmd/cli/desktop/desktop.go @@ -694,6 +694,34 @@ func (c *Client) Remove(modelArgs []string, force bool) (string, error) { return modelRemoved, nil } +type ServerVersionResponse struct { + Version string `json:"version"` +} + +func (c *Client) ServerVersion() (ServerVersionResponse, error) { + resp, err := c.doRequest(http.MethodGet, "/version", nil) + if err != nil { + return ServerVersionResponse{}, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return ServerVersionResponse{}, fmt.Errorf("failed to get server version: %s", resp.Status) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return ServerVersionResponse{}, fmt.Errorf("failed to read response body: %w", err) + } + + var version ServerVersionResponse + if err := json.Unmarshal(body, &version); err != nil { + return ServerVersionResponse{}, fmt.Errorf("failed to unmarshal response body: %w", err) + } + + return version, nil +} + // BackendStatus to be imported from docker/model-runner when https://github.com/docker/model-runner/pull/42 is merged. type BackendStatus struct { BackendName string `json:"backend_name"` diff --git a/main.go b/main.go index 40a374890..99efac3f9 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,7 @@ package main import ( "context" "crypto/tls" + "encoding/json" "net" "net/http" "os" @@ -264,6 +265,12 @@ func main() { anthropicHandler := anthropic.NewHandler(log, schedulerHTTP, nil, modelManager) router.Handle(anthropic.APIPrefix+"/", anthropicHandler) + // Register /version endpoint + router.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"version": Version}) + }) + // Register root handler LAST - it will only catch exact "/" requests that don't match other patterns router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { // Only respond to exact root path diff --git a/version.go b/version.go new file mode 100644 index 000000000..23bec878f --- /dev/null +++ b/version.go @@ -0,0 +1,3 @@ +package main + +var Version = "dev" From a222f443b8b63adfd2c78bdd6a9fe2a1f98b1aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Fri, 13 Feb 2026 13:20:06 +0100 Subject: [PATCH 2/2] fixes race --- pkg/inference/models/handler_test.go | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/pkg/inference/models/handler_test.go b/pkg/inference/models/handler_test.go index d8c0d580e..be255a868 100644 --- a/pkg/inference/models/handler_test.go +++ b/pkg/inference/models/handler_test.go @@ -135,8 +135,6 @@ func TestPullModel(t *testing.T) { } func TestHandleGetModel(t *testing.T) { - tempDir := t.TempDir() - // Create a test registry server := httptest.NewServer(testregistry.New()) defer server.Close() @@ -207,7 +205,9 @@ func TestHandleGetModel(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - log := logrus.NewEntry(logrus.StandardLogger()) + tempDir := t.TempDir() + logger := logrus.New() + log := logrus.NewEntry(logger) manager := NewManager(log.WithFields(logrus.Fields{"component": "model-manager"}), ClientConfig{ StoreRootPath: tempDir, Logger: log.WithFields(logrus.Fields{"component": "model-manager"}), @@ -263,14 +263,6 @@ func TestHandleGetModel(t *testing.T) { t.Errorf("Failed to decode response body: %v", err) } } - - // Clean tempDir after each test - if err := os.RemoveAll(tempDir); err != nil { - t.Fatalf("Failed to clean temp directory: %v", err) - } - if err := os.MkdirAll(tempDir, 0755); err != nil { - t.Fatalf("Failed to recreate temp directory: %v", err) - } }) } }