From 45ed802e54df27f00765ebfab358ea7a370e4077 Mon Sep 17 00:00:00 2001
From: Saadi Myftija <saadi@myftija.com>
Date: Mon, 12 Jan 2026 13:13:48 +0100
Subject: [PATCH 1/2] feat(supervisor): add node affinity rules for large
 machine worker pool scheduling

**Background**
Runs with `large-1x` or `large-2x` machine presets are disproportionally
affected by scheduling delays during peak times. This is in part caused
by the fact that the worker pool is shared for all runs, meaning large
runs compete with smaller runs for available capacity. Because large runs require
significantly more CPU and memory, they are harder for the scheduler to bin-pack onto existing
nodes, often requiring a node with a significant amount of free resources or
waiting for a new node to spin up entirely. This effect is amplified during peak times
when nodes are already densely packed with smaller workloads, leaving insufficient contiguous resources for large runs.

**Changes**

This PR adds Kubernetes node affinity settings to separate large and standard machine workloads across node pools.

  - Controlled via KUBERNETES_LARGE_MACHINE_POOL_LABEL env var (disabled when not set)
  - Large machine presets (large-*) get a soft preference to schedule on the large pool, with fallback to standard nodes
  - Non-large machines are excluded from the large pool via required anti-affinity
  - This ensures the large machine pool is reserved for large workloads while allowing large workloads to spill over to standard nodes if needed
---
 apps/supervisor/src/env.ts                    |  1 +
 .../src/workloadManager/kubernetes.ts         | 52 +++++++++++++++++++
 2 files changed, 53 insertions(+)
diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts
index 1605a21637..447991de64 100644
--- a/apps/supervisor/src/env.ts
+++ b/apps/supervisor/src/env.ts
@@ -91,6 +91,7 @@ const Env = z.object({
   KUBERNETES_MEMORY_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(1), // Ratio of memory limit, so 1 = 100% of memory limit
   KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB
   KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods
+  KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool=<value>
 
   // Placement tags settings
   PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),
diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts
index 96fbd7a274..dd363d2a48 100644
--- a/apps/supervisor/src/workloadManager/kubernetes.ts
+++ b/apps/supervisor/src/workloadManager/kubernetes.ts
@@ -95,6 +95,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
           },
           spec: {
             ...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
+            affinity: this.#getNodeAffinity(opts.machine),
             terminationGracePeriodSeconds: 60 * 60,
             containers: [
               {
@@ -356,4 +357,55 @@ export class KubernetesWorkloadManager implements WorkloadManager {
       },
     };
   }
+
+  #isLargeMachine(preset: MachinePreset): boolean {
+    return preset.name.startsWith("large-");
+  }
+
+  #getNodeAffinity(preset: MachinePreset): k8s.V1Affinity | undefined {
+    if (!env.KUBERNETES_LARGE_MACHINE_POOL_LABEL) {
+      return undefined;
+    }
+
+    if (this.#isLargeMachine(preset)) {
+      // soft preference for the large-machine pool, falls back to standard if unavailable
+      return {
+        nodeAffinity: {
+          preferredDuringSchedulingIgnoredDuringExecution: [
+            {
+              weight: 100,
+              preference: {
+                matchExpressions: [
+                  {
+                    key: "machinepool",
+                    operator: "In",
+                    values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
+                  },
+                ],
+              },
+            },
+          ],
+        },
+      };
+    }
+
+    // not schedulable in the large-machine pool
+    return {
+      nodeAffinity: {
+        requiredDuringSchedulingIgnoredDuringExecution: {
+          nodeSelectorTerms: [
+            {
+              matchExpressions: [
+                {
+                  key: "machinepool",
+                  operator: "NotIn",
+                  values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
+                },
+              ],
+            },
+          ],
+        },
+      },
+    };
+  }
 }

From a847ef6be1b2977a59dfb2ad5dccc3886694afd3 Mon Sep 17 00:00:00 2001
From: Saadi Myftija <saadi@myftija.com>
Date: Mon, 12 Jan 2026 14:29:43 +0100
Subject: [PATCH 2/2] Update machinepool label key

---
 apps/supervisor/src/workloadManager/kubernetes.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts
index dd363d2a48..90d6b3985b 100644
--- a/apps/supervisor/src/workloadManager/kubernetes.ts
+++ b/apps/supervisor/src/workloadManager/kubernetes.ts
@@ -377,7 +377,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
               preference: {
                 matchExpressions: [
                   {
-                    key: "machinepool",
+                    key: "node.cluster.x-k8s.io/machinepool",
                     operator: "In",
                     values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
                   },
@@ -397,7 +397,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
             {
               matchExpressions: [
                 {
-                  key: "machinepool",
+                  key: "node.cluster.x-k8s.io/machinepool",
                   operator: "NotIn",
                   values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
                 },