diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 1605a21637..447991de64 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -91,6 +91,7 @@ const Env = z.object({ KUBERNETES_MEMORY_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(1), // Ratio of memory limit, so 1 = 100% of memory limit KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods + KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool= // Placement tags settings PLACEMENT_TAGS_ENABLED: BoolEnv.default(false), diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 96fbd7a274..90d6b3985b 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -95,6 +95,7 @@ export class KubernetesWorkloadManager implements WorkloadManager { }, spec: { ...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags), + affinity: this.#getNodeAffinity(opts.machine), terminationGracePeriodSeconds: 60 * 60, containers: [ { @@ -356,4 +357,55 @@ export class KubernetesWorkloadManager implements WorkloadManager { }, }; } + + #isLargeMachine(preset: MachinePreset): boolean { + return preset.name.startsWith("large-"); + } + + #getNodeAffinity(preset: MachinePreset): k8s.V1Affinity | undefined { + if (!env.KUBERNETES_LARGE_MACHINE_POOL_LABEL) { + return undefined; + } + + if (this.#isLargeMachine(preset)) { + // soft preference for the large-machine pool, falls back to standard if unavailable + return { + nodeAffinity: { + preferredDuringSchedulingIgnoredDuringExecution: [ + { + weight: 100, + preference: { + matchExpressions: [ + { + key: "node.cluster.x-k8s.io/machinepool", + operator: "In", + values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL], + }, + ], + }, + }, + ], + }, + }; + } + + // not schedulable in the large-machine pool + return { + nodeAffinity: { + requiredDuringSchedulingIgnoredDuringExecution: { + nodeSelectorTerms: [ + { + matchExpressions: [ + { + key: "node.cluster.x-k8s.io/machinepool", + operator: "NotIn", + values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL], + }, + ], + }, + ], + }, + }, + }; + } }