From 49030ad9ea39608cd170f2e19b32a781d6b55975 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Mon, 23 Feb 2026 18:39:16 +0100
Subject: [PATCH 01/18] squash changes

---
 cpp/src/branch_and_bound/branch_and_bound.cpp |  35 +++---
 cpp/src/branch_and_bound/branch_and_bound.hpp |  13 ++-
 cpp/src/mip_heuristics/CMakeLists.txt         |   4 +-
 .../mip_heuristics/diversity/population.cu    |   8 +-
 cpp/src/mip_heuristics/early_heuristic.cuh    |  94 +++++++++++++++
 .../feasibility_jump/early_cpufj.cu           |  79 +++++++++++++
 .../feasibility_jump/early_cpufj.cuh          |  39 +++++++
 .../feasibility_jump/early_gpufj.cu           |  93 +++++++++++++++
 .../feasibility_jump/early_gpufj.cuh          |  45 ++++++++
 .../feasibility_jump/feasibility_jump.cu      |  18 ++-
 .../feasibility_jump/feasibility_jump.cuh     |   9 ++
 .../mip_heuristics/feasibility_jump/fj_cpu.cu |  62 +++++++---
 .../feasibility_jump/fj_cpu.cuh               |   9 ++
 cpp/src/mip_heuristics/solve.cu               | 107 +++++++++++++++++-
 cpp/src/mip_heuristics/solver.cu              |  27 ++++-
 cpp/src/mip_heuristics/solver_context.cuh     |  10 +-
 16 files changed, 605 insertions(+), 47 deletions(-)
 create mode 100644 cpp/src/mip_heuristics/early_heuristic.cuh
 create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
 create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh
 create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
 create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh

diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp
index 6ce9a4f4d..a4e43a1ec 100644
--- a/cpp/src/branch_and_bound/branch_and_bound.cpp
+++ b/cpp/src/branch_and_bound/branch_and_bound.cpp
@@ -898,7 +898,7 @@ struct nondeterministic_policy_t : tree_update_policy_t<i_t, f_t> {
   {
   }
 
-  f_t upper_bound() const override { return bnb.upper_bound_.load(); }
+  f_t upper_bound() const override { return bnb.get_cutoff(); }
 
   void update_pseudo_costs(mip_node_t<i_t, f_t>* node, f_t leaf_obj) override
   {
@@ -1316,10 +1316,11 @@ dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(
 
   simplex_solver_settings_t lp_settings = settings_;
   lp_settings.set_log(false);
+  f_t cutoff = get_cutoff();
   if (original_lp_.objective_is_integral) {
-    lp_settings.cut_off = std::ceil(upper_bound_ - settings_.integer_tol) + settings_.dual_tol;
+    lp_settings.cut_off = std::ceil(cutoff - settings_.integer_tol) + settings_.dual_tol;
   } else {
-    lp_settings.cut_off = upper_bound_ + settings_.dual_tol;
+    lp_settings.cut_off = cutoff + settings_.dual_tol;
   }
   lp_settings.inside_mip    = 2;
   lp_settings.time_limit    = settings_.time_limit - toc(exploration_stats_.start_time);
@@ -1426,7 +1427,7 @@ void branch_and_bound_t<i_t, f_t>::plunge_with(branch_and_bound_worker_t<i_t, f_
     // - The lower bound of the parent is lower or equal to its children
     worker->lower_bound = lower_bound;
 
-    if (lower_bound > upper_bound) {
+    if (lower_bound > get_cutoff()) {
       search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound);
       search_tree_.update(node_ptr, node_status_t::FATHOMED);
       worker->recompute_basis  = true;
@@ -1536,7 +1537,7 @@ void branch_and_bound_t<i_t, f_t>::dive_with(branch_and_bound_worker_t<i_t, f_t>
     f_t rel_gap         = user_relative_gap(original_lp_, upper_bound, lower_bound);
     worker->lower_bound = lower_bound;
 
-    if (node_ptr->lower_bound > upper_bound) {
+    if (node_ptr->lower_bound > get_cutoff()) {
       worker->recompute_basis  = true;
       worker->recompute_bounds = true;
       continue;
@@ -1675,7 +1676,7 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
         std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();
 
         if (!start_node.has_value()) { continue; }
-        if (upper_bound_ < start_node.value()->lower_bound) {
+        if (get_cutoff() < start_node.value()->lower_bound) {
           // This node was put on the heap earlier but its lower bound is now greater than the
           // current upper bound
           search_tree_.graphviz_node(
@@ -1699,7 +1700,7 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
         std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_diving();
 
         if (!start_node.has_value()) { continue; }
-        if (upper_bound_ < start_node.value()->lower_bound ||
+        if (get_cutoff() < start_node.value()->lower_bound ||
             start_node.value()->depth < diving_settings.min_node_depth) {
           continue;
         }
@@ -1767,7 +1768,7 @@ void branch_and_bound_t<i_t, f_t>::single_threaded_solve()
     std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();
 
     if (!start_node.has_value()) { continue; }
-    if (upper_bound_ < start_node.value()->lower_bound) {
+    if (get_cutoff() < start_node.value()->lower_bound) {
       // This node was put on the heap earlier but its lower bound is now greater than the
       // current upper bound
       search_tree_.graphviz_node(
@@ -2189,12 +2190,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         return mip_status_t::NUMERICAL;
       }
 
-      if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) {
+      if (settings_.reduced_cost_strengthening >= 1 && get_cutoff() < last_upper_bound) {
         mutex_upper_.lock();
-        last_upper_bound = upper_bound_.load();
+        last_upper_bound = get_cutoff();
         std::vector<f_t> lower_bounds;
         std::vector<f_t> upper_bounds;
-        find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
+        find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds);
         mutex_upper_.unlock();
         mutex_original_lp_.lock();
         original_lp_.lower = lower_bounds;
@@ -2372,10 +2373,10 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     return solver_status_;
   }
 
-  if (settings_.reduced_cost_strengthening >= 2 && upper_bound_.load() < last_upper_bound) {
+  if (settings_.reduced_cost_strengthening >= 2 && get_cutoff() < last_upper_bound) {
     std::vector<f_t> lower_bounds;
     std::vector<f_t> upper_bounds;
-    i_t num_fixed = find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
+    i_t num_fixed = find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds);
     if (num_fixed > 0) {
       std::vector<bool> bounds_changed(original_lp_.num_cols, true);
       std::vector<char> row_sense;
@@ -2479,7 +2480,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();
 
         if (!start_node.has_value()) { continue; }
-        if (upper_bound_ < start_node.value()->lower_bound) {
+        if (get_cutoff() < start_node.value()->lower_bound) {
           // This node was put on the heap earlier but its lower bound is now greater than the
           // current upper bound
           search_tree_.graphviz_node(
@@ -3321,7 +3322,7 @@ void branch_and_bound_t<i_t, f_t>::deterministic_sort_replay_events(
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::deterministic_prune_worker_nodes_vs_incumbent()
 {
-  f_t upper_bound = upper_bound_.load();
+  f_t upper_bound = get_cutoff();
 
   for (auto& worker : *deterministic_workers_) {
     // Check nodes in plunge stack - filter in place
@@ -3457,14 +3458,14 @@ void branch_and_bound_t<i_t, f_t>::deterministic_populate_diving_heap()
   const int num_diving                  = deterministic_diving_workers_->size();
   constexpr int target_nodes_per_worker = 10;
   const int target_total                = num_diving * target_nodes_per_worker;
-  f_t upper_bound                       = upper_bound_.load();
+  f_t cutoff                            = get_cutoff();
 
   // Collect candidate nodes from BFS worker backlog heaps
   std::vector<std::pair<mip_node_t<i_t, f_t>*, f_t>> candidates;
 
   for (auto& worker : *deterministic_workers_) {
     for (auto* node : worker.backlog.data()) {
-      if (node->lower_bound < upper_bound) {
+      if (node->lower_bound < cutoff) {
         f_t score = node->objective_estimate;
         if (score >= inf) { score = node->lower_bound; }
         candidates.push_back({node, score});
diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp
index a13d5cedc..fac413024 100644
--- a/cpp/src/branch_and_bound/branch_and_bound.hpp
+++ b/cpp/src/branch_and_bound/branch_and_bound.hpp
@@ -108,6 +108,14 @@ class branch_and_bound_t {
 
   bool stop_for_time_limit(mip_solution_t<i_t, f_t>& solution);
 
+  // Set a cutoff bound from an external source (e.g., early FJ during presolve).
+  // Used for node pruning and reduced cost strengthening but NOT for gap computation.
+  // Unlike upper_bound_, this does not imply a verified incumbent solution exists.
+  void set_initial_cutoff(f_t bound) { initial_cutoff_ = bound; }
+
+  // Effective cutoff for node pruning: min of verified incumbent and external cutoff.
+  f_t get_cutoff() const { return std::min(upper_bound_.load(), initial_cutoff_); }
+
   // Repair a low-quality solution from the heuristics.
   bool repair_solution(const std::vector<f_t>& leaf_edge_norms,
                        const std::vector<f_t>& potential_solution,
@@ -169,9 +177,12 @@ class branch_and_bound_t {
   // Mutex for upper bound
   omp_mutex_t mutex_upper_;
 
-  // Global variable for upper bound
+  // Verified incumbent bound (only set when B&B has an actual integer-feasible solution).
   omp_atomic_t<f_t> upper_bound_;
 
+  // External cutoff from early heuristics (for pruning only, no verified solution).
+  f_t initial_cutoff_{std::numeric_limits<f_t>::infinity()};
+
   // Global variable for incumbent. The incumbent should be updated with the upper bound
   mip_solution_t<i_t, f_t> incumbent_;
 
diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt
index 538e3c49a..7c1035fd1 100644
--- a/cpp/src/mip_heuristics/CMakeLists.txt
+++ b/cpp/src/mip_heuristics/CMakeLists.txt
@@ -40,7 +40,9 @@ set(MIP_NON_LP_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu
-  ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu)
+  ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_cpufj.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_gpufj.cu)
 
 # Choose which files to include based on build mode
 if(BUILD_LP_ONLY)
diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu
index c2138c91c..ba23d3fb0 100644
--- a/cpp/src/mip_heuristics/diversity/population.cu
+++ b/cpp/src/mip_heuristics/diversity/population.cu
@@ -267,9 +267,9 @@ void population_t<i_t, f_t>::invoke_get_solution_callback(
   f_t user_bound     = context.stats.get_solution_bound();
   solution_t<i_t, f_t> temp_sol(sol);
   problem_ptr->post_process_assignment(temp_sol.assignment);
-  if (context.settings.mip_scaling) {
+  if (context.settings.mip_scaling && context.scaling != nullptr) {
     rmm::device_uvector<f_t> dummy(0, temp_sol.handle_ptr->get_stream());
-    context.scaling.unscale_solutions(temp_sol.assignment, dummy);
+    context.scaling->unscale_solutions(temp_sol.assignment, dummy);
   }
   if (problem_ptr->has_papilo_presolve_data()) {
     problem_ptr->papilo_uncrush_assignment(temp_sol.assignment);
@@ -346,7 +346,9 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
                  incumbent_assignment.size(),
                  sol.handle_ptr->get_stream());
 
-      if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); }
+      if (context.settings.mip_scaling && context.scaling != nullptr) {
+        context.scaling->scale_solutions(incumbent_assignment);
+      }
       bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment);
       if (!is_valid) { return; }
       cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(),
diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
new file mode 100644
index 000000000..5ea7f346a
--- /dev/null
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -0,0 +1,94 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <mip_heuristics/problem/problem.cuh>
+#include <mip_heuristics/solution/solution.cuh>
+
+#include <cuopt/linear_programming/mip/solver_settings.hpp>
+
+#include <utilities/logger.hpp>
+
+#include <thrust/fill.h>
+
+#include <chrono>
+#include <functional>
+#include <limits>
+#include <vector>
+
+namespace cuopt::linear_programming::detail {
+
+template <typename f_t>
+using early_incumbent_callback_t =
+  std::function<void(f_t objective, const std::vector<f_t>& assignment)>;
+
+// CRTP base for early heuristics that run on the original (or papilo-presolved) problem
+// during presolve to find incumbents as early as possible.
+// Derived classes implement start() and stop().
+template <typename i_t, typename f_t, typename Derived>
+class early_heuristic_t {
+ public:
+  early_heuristic_t(const optimization_problem_t<i_t, f_t>& op_problem,
+                    const typename mip_solver_settings_t<i_t, f_t>::tolerances_t& tolerances,
+                    early_incumbent_callback_t<f_t> incumbent_callback)
+    : incumbent_callback_(std::move(incumbent_callback))
+  {
+    problem_ptr_ = std::make_unique<problem_t<i_t, f_t>>(op_problem, tolerances, false);
+    problem_ptr_->preprocess_problem();
+
+    solution_ptr_ = std::make_unique<solution_t<i_t, f_t>>(*problem_ptr_);
+    thrust::fill(problem_ptr_->handle_ptr->get_thrust_policy(),
+                 solution_ptr_->assignment.begin(),
+                 solution_ptr_->assignment.end(),
+                 f_t{0});
+    solution_ptr_->clamp_within_bounds();
+  }
+
+  bool solution_found() const { return solution_found_; }
+  f_t get_best_objective() const { return best_objective_; }
+  void set_best_objective(f_t obj) { best_objective_ = obj; }
+  const std::vector<f_t>& get_best_assignment() const { return best_assignment_; }
+
+ protected:
+  ~early_heuristic_t() = default;
+
+  void try_update_best(f_t user_obj, const std::vector<f_t>& assignment)
+  {
+    if (user_obj >= best_objective_) { return; }
+    best_objective_ = user_obj;
+
+    auto* handle_ptr = problem_ptr_->handle_ptr;
+    RAFT_CUDA_TRY(cudaSetDevice(handle_ptr->get_device()));
+    rmm::device_uvector<f_t> d_assignment(assignment.size(), handle_ptr->get_stream());
+    raft::copy(d_assignment.data(), assignment.data(), assignment.size(), handle_ptr->get_stream());
+    problem_ptr_->post_process_assignment(d_assignment);
+    auto user_assignment = cuopt::host_copy(d_assignment, handle_ptr->get_stream());
+
+    best_assignment_ = user_assignment;
+    solution_found_  = true;
+    double elapsed =
+      std::chrono::duration<double>(std::chrono::steady_clock::now() - start_time_).count();
+    CUOPT_LOG_INFO("Early heuristics (%s) lowered the primal bound. Objective %g. Time %.2f",
+                   Derived::name(),
+                   user_obj,
+                   elapsed);
+    if (incumbent_callback_) { incumbent_callback_(user_obj, user_assignment); }
+  }
+
+  std::unique_ptr<problem_t<i_t, f_t>> problem_ptr_;
+  std::unique_ptr<solution_t<i_t, f_t>> solution_ptr_;
+
+  bool solution_found_{false};
+  f_t best_objective_{std::numeric_limits<f_t>::infinity()};
+  std::vector<f_t> best_assignment_;
+
+  early_incumbent_callback_t<f_t> incumbent_callback_;
+  std::chrono::steady_clock::time_point start_time_;
+};
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
new file mode 100644
index 000000000..23f7a07da
--- /dev/null
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
@@ -0,0 +1,79 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include "early_cpufj.cuh"
+
+#include <mip_heuristics/feasibility_jump/fj_cpu.cuh>
+#include <mip_heuristics/mip_constants.hpp>
+#include <utilities/logger.hpp>
+
+namespace cuopt::linear_programming::detail {
+
+template <typename i_t, typename f_t>
+early_cpufj_t<i_t, f_t>::early_cpufj_t(
+  const optimization_problem_t<i_t, f_t>& op_problem,
+  const typename mip_solver_settings_t<i_t, f_t>::tolerances_t& tolerances,
+  early_incumbent_callback_t<f_t> incumbent_callback)
+  : early_heuristic_t<i_t, f_t, early_cpufj_t<i_t, f_t>>(
+      op_problem, tolerances, std::move(incumbent_callback))
+{
+}
+
+template <typename i_t, typename f_t>
+early_cpufj_t<i_t, f_t>::~early_cpufj_t()
+{
+  stop();
+}
+
+template <typename i_t, typename f_t>
+void early_cpufj_t<i_t, f_t>::start()
+{
+  if (cpu_fj_thread_) { return; }
+
+  this->start_time_ = std::chrono::steady_clock::now();
+
+  cpu_fj_thread_         = std::make_unique<cpu_fj_thread_t<i_t, f_t>>();
+  cpu_fj_thread_->fj_cpu = init_fj_cpu_standalone(
+    *this->problem_ptr_, *this->solution_ptr_, preemption_flag_);
+  cpu_fj_thread_->time_limit = std::numeric_limits<f_t>::infinity();
+
+  cpu_fj_thread_->fj_cpu->log_prefix = "[Early CPUFJ] ";
+
+  cpu_fj_thread_->fj_cpu->improvement_callback =
+    [this](f_t solver_obj, const std::vector<f_t>& assignment, double) {
+      f_t user_obj = this->problem_ptr_->get_user_obj_from_solver_obj(solver_obj);
+      this->try_update_best(user_obj, assignment);
+    };
+
+  cpu_fj_thread_->start_cpu_solver();
+}
+
+template <typename i_t, typename f_t>
+void early_cpufj_t<i_t, f_t>::stop()
+{
+  if (!cpu_fj_thread_) { return; }
+
+  preemption_flag_.store(true);
+  cpu_fj_thread_->stop_cpu_solver();
+  cpu_fj_thread_->wait_for_cpu_solver();
+
+  CUOPT_LOG_DEBUG("[Early CPUFJ] Stopped after %d iterations, solution_found=%d",
+                 cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0,
+                 this->solution_found_);
+
+  cpu_fj_thread_.reset();
+}
+
+#if MIP_INSTANTIATE_FLOAT
+template class early_cpufj_t<int, float>;
+#endif
+
+#if MIP_INSTANTIATE_DOUBLE
+template class early_cpufj_t<int, double>;
+#endif
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh
new file mode 100644
index 000000000..911e84655
--- /dev/null
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh
@@ -0,0 +1,39 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <mip_heuristics/early_heuristic.cuh>
+
+#include <atomic>
+#include <memory>
+
+namespace cuopt::linear_programming::detail {
+
+template <typename i_t, typename f_t>
+struct cpu_fj_thread_t;
+
+template <typename i_t, typename f_t>
+class early_cpufj_t : public early_heuristic_t<i_t, f_t, early_cpufj_t<i_t, f_t>> {
+ public:
+  early_cpufj_t(const optimization_problem_t<i_t, f_t>& op_problem,
+                const typename mip_solver_settings_t<i_t, f_t>::tolerances_t& tolerances,
+                early_incumbent_callback_t<f_t> incumbent_callback);
+
+  ~early_cpufj_t();
+
+  static constexpr const char* name() { return "CPUFJ"; }
+
+  void start();
+  void stop();
+
+ private:
+  std::unique_ptr<cpu_fj_thread_t<i_t, f_t>> cpu_fj_thread_;
+  std::atomic<bool> preemption_flag_{false};
+};
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
new file mode 100644
index 000000000..adc010c16
--- /dev/null
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
@@ -0,0 +1,93 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include "early_gpufj.cuh"
+
+#include <mip_heuristics/feasibility_jump/feasibility_jump.cuh>
+#include <mip_heuristics/solver_context.cuh>
+#include <mip_heuristics/mip_constants.hpp>
+#include <utilities/logger.hpp>
+
+#include <raft/core/error.hpp>
+
+#include <limits>
+
+namespace cuopt::linear_programming::detail {
+
+template <typename i_t, typename f_t>
+early_gpufj_t<i_t, f_t>::early_gpufj_t(const optimization_problem_t<i_t, f_t>& op_problem,
+                                       const mip_solver_settings_t<i_t, f_t>& settings,
+                                       early_incumbent_callback_t<f_t> incumbent_callback)
+  : early_heuristic_t<i_t, f_t, early_gpufj_t<i_t, f_t>>(
+      op_problem, settings.get_tolerances(), std::move(incumbent_callback))
+{
+  context_ptr_ = std::make_unique<mip_solver_context_t<i_t, f_t>>(
+    this->problem_ptr_->handle_ptr, this->problem_ptr_.get(), settings, nullptr);
+}
+
+template <typename i_t, typename f_t>
+early_gpufj_t<i_t, f_t>::~early_gpufj_t()
+{
+  stop();
+}
+
+template <typename i_t, typename f_t>
+void early_gpufj_t<i_t, f_t>::start()
+{
+  if (worker_thread_) { return; }
+
+  this->start_time_ = std::chrono::steady_clock::now();
+
+  fj_settings_t fj_settings;
+  fj_settings.mode                   = fj_mode_t::EXIT_NON_IMPROVING;
+  fj_settings.n_of_minimums_for_exit = std::numeric_limits<int>::max();
+  fj_settings.time_limit             = std::numeric_limits<f_t>::infinity();
+  fj_settings.iteration_limit        = std::numeric_limits<int>::max();
+  fj_settings.update_weights         = true;
+  fj_settings.feasibility_run        = false;
+
+  fj_ptr_ = std::make_unique<fj_t<i_t, f_t>>(*context_ptr_, fj_settings);
+
+  fj_ptr_->improvement_callback = [this](f_t user_obj, const std::vector<f_t>& h_assignment) {
+    this->try_update_best(user_obj, h_assignment);
+  };
+
+  worker_thread_ = std::make_unique<std::thread>(&early_gpufj_t::run_worker, this);
+}
+
+template <typename i_t, typename f_t>
+void early_gpufj_t<i_t, f_t>::run_worker()
+{
+  RAFT_CUDA_TRY(cudaSetDevice(this->problem_ptr_->handle_ptr->get_device()));
+  fj_ptr_->solve(*this->solution_ptr_);
+}
+
+template <typename i_t, typename f_t>
+void early_gpufj_t<i_t, f_t>::stop()
+{
+  if (!worker_thread_) { return; }
+
+  context_ptr_->preempt_heuristic_solver_.store(true);
+
+  if (worker_thread_->joinable()) { worker_thread_->join(); }
+
+  CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d",
+                 this->solution_found_);
+
+  fj_ptr_.reset();
+  worker_thread_.reset();
+}
+
+#if MIP_INSTANTIATE_FLOAT
+template class early_gpufj_t<int, float>;
+#endif
+
+#if MIP_INSTANTIATE_DOUBLE
+template class early_gpufj_t<int, double>;
+#endif
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh
new file mode 100644
index 000000000..4a7769143
--- /dev/null
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh
@@ -0,0 +1,45 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <mip_heuristics/early_heuristic.cuh>
+
+#include <memory>
+#include <thread>
+
+namespace cuopt::linear_programming::detail {
+
+template <typename i_t, typename f_t>
+class fj_t;
+
+template <typename i_t, typename f_t>
+struct mip_solver_context_t;
+
+template <typename i_t, typename f_t>
+class early_gpufj_t : public early_heuristic_t<i_t, f_t, early_gpufj_t<i_t, f_t>> {
+ public:
+  early_gpufj_t(const optimization_problem_t<i_t, f_t>& op_problem,
+                const mip_solver_settings_t<i_t, f_t>& settings,
+                early_incumbent_callback_t<f_t> incumbent_callback);
+
+  ~early_gpufj_t();
+
+  static constexpr const char* name() { return "GPUFJ"; }
+
+  void start();
+  void stop();
+
+ private:
+  void run_worker();
+
+  std::unique_ptr<mip_solver_context_t<i_t, f_t>> context_ptr_;
+  std::unique_ptr<fj_t<i_t, f_t>> fj_ptr_;
+  std::unique_ptr<std::thread> worker_thread_;
+};
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu
index e9cf0760d..b248351a3 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu
@@ -879,7 +879,7 @@ i_t fj_t<i_t, f_t>::host_loop(solution_t<i_t, f_t>& solution, i_t climber_idx)
     // every now and then, ensure external solutions are added to the population
     // this is done here because FJ is called within FP and also after recombiners
     // so FJ is one of the most inner and most frequent functions to be called
-    if (steps % 10000 == 0) {
+    if (steps % 10000 == 0 && context.diversity_manager_ptr != nullptr) {
       context.diversity_manager_ptr->get_population_pointer()
         ->add_external_solutions_to_population();
     }
@@ -933,6 +933,22 @@ i_t fj_t<i_t, f_t>::host_loop(solution_t<i_t, f_t>& solution, i_t climber_idx)
       bool is_feasible = solution.compute_feasibility();
       solution.handle_ptr->sync_stream();
 
+      // Invoke improvement callback if we have a better feasible solution
+      if (is_feasible && improvement_callback) {
+        f_t user_obj = solution.get_user_objective();
+        if (user_obj < last_reported_objective_) {
+          last_reported_objective_ = user_obj;
+          // Copy assignment to host for callback
+          std::vector<f_t> h_assignment(solution.assignment.size());
+          raft::copy(h_assignment.data(),
+                     solution.assignment.data(),
+                     solution.assignment.size(),
+                     climber_stream);
+          climber_stream.synchronize();
+          improvement_callback(user_obj, h_assignment);
+        }
+      }
+
       if (limit_reached) { break; }
 
       if (is_feasible) {
diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh
index e9040a759..50b451a86 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh
+++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh
@@ -19,11 +19,17 @@
 
 #include <utilities/event_handler.cuh>
 
+#include <functional>
+
 #define FJ_DEBUG_LOAD_BALANCING 0
 #define FJ_SINGLE_STEP          0
 
 namespace cuopt::linear_programming::detail {
 
+template <typename f_t>
+using fj_improvement_callback_t =
+  std::function<void(f_t objective, const std::vector<f_t>& assignment)>;
+
 static constexpr int TPB_resetmoves                 = raft::WarpSize * 4;
 static constexpr int TPB_heavyvars                  = raft::WarpSize * 16;
 static constexpr int TPB_heavycstrs                 = raft::WarpSize * 4;
@@ -628,6 +634,9 @@ class fj_t {
   std::vector<std::unique_ptr<climber_data_t>> climbers;
   rmm::device_uvector<typename climber_data_t::view_t> climber_views;
   fj_settings_t settings;
+
+  fj_improvement_callback_t<f_t> improvement_callback;
+  f_t last_reported_objective_{std::numeric_limits<f_t>::infinity()};
 };
 
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu
index 4d567c9ec..87f47d254 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu
@@ -790,7 +790,7 @@ static void apply_move(fj_cpu_climber_t<i_t, f_t>& fj_cpu,
       if (fj_cpu.improvement_callback) {
         double current_work_units = fj_cpu.work_units_elapsed.load(std::memory_order_acquire);
         fj_cpu.improvement_callback(
-          fj_cpu.h_best_objective, fj_cpu.h_assignment, current_work_units);
+          fj_cpu.h_incumbent_objective, fj_cpu.h_assignment, current_work_units);
       }
       fj_cpu.feasible_found = true;
     }
@@ -1402,10 +1402,8 @@ std::unique_ptr<fj_cpu_climber_t<i_t, f_t>> fj_t<i_t, f_t>::create_cpu_climber(
 }
 
 template <typename i_t, typename f_t>
-bool fj_t<i_t, f_t>::cpu_solve(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_limit)
+static bool cpufj_solve_loop(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_limit)
 {
-  raft::common::nvtx::range scope("fj_cpu");
-
   i_t local_mins       = 0;
   auto loop_start      = std::chrono::high_resolution_clock::now();
   auto time_limit      = std::chrono::milliseconds((int)(in_time_limit * 1000));
@@ -1489,10 +1487,9 @@ bool fj_t<i_t, f_t>::cpu_solve(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_l
         for (size_t i = 0; i < fj_cpu.cached_mtm_moves.size(); i++)
           fj_cpu.cached_mtm_moves[i].first = 0;
       }
-      thrust::tie(move, score) =
-        find_mtm_move_viol(fj_cpu, 1, true);  // pick a single random violated constraint
-      i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0;
-      f_t delta   = move.var_idx >= 0 ? move.value : 0;
+      thrust::tie(move, score) = find_mtm_move_viol(fj_cpu, 1, true);   // pick a single random violated constraint
+      i_t var_idx              = move.var_idx >= 0 ? move.var_idx : 0;
+      f_t delta                = move.var_idx >= 0 ? move.value : 0;
       apply_move(fj_cpu, var_idx, delta, true);
       ++local_mins;
       ++fj_cpu.n_local_minima_window;
@@ -1507,8 +1504,7 @@ bool fj_t<i_t, f_t>::cpu_solve(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_l
     if (fj_cpu.iterations % fj_cpu.log_interval == 0) {
       CUOPT_LOG_TRACE(
         "%sCPUFJ iteration: %d/%d, local mins: %d, best_objective: %g, viol: %zu, obj weight %g, "
-        "maxw "
-        "%g",
+        "maxw %g",
         fj_cpu.log_prefix.c_str(),
         fj_cpu.iterations,
         fj_cpu.settings.iteration_limit != std::numeric_limits<i_t>::max()
@@ -1537,15 +1533,10 @@ bool fj_t<i_t, f_t>::cpu_solve(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_l
     if (fj_cpu.iterations % 100 == 0 && fj_cpu.iterations > 0) {
       // Collect memory statistics
       auto [loads, stores] = fj_cpu.memory_aggregator.collect();
-
-      double biased_work = (loads + stores) * fj_cpu.work_unit_bias / 1e10;
+      double biased_work   = (loads + stores) * fj_cpu.work_unit_bias / 1e10;
       fj_cpu.work_units_elapsed += biased_work;
 
       if (fj_cpu.producer_sync != nullptr) { fj_cpu.producer_sync->notify_progress(); }
-
-      CUOPT_LOG_TRACE("CPUFJ work units: %f incumbent %g",
-                      fj_cpu.work_units_elapsed.load(std::memory_order_relaxed),
-                      fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective));
     }
 
     cuopt_func_call(sanity_checks(fj_cpu));
@@ -1569,6 +1560,13 @@ bool fj_t<i_t, f_t>::cpu_solve(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_l
   return fj_cpu.feasible_found;
 }
 
+template <typename i_t, typename f_t>
+bool fj_t<i_t, f_t>::cpu_solve(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_limit)
+{
+  raft::common::nvtx::range scope("fj_cpu");
+  return cpufj_solve_loop(fj_cpu, in_time_limit);
+}
+
 template <typename i_t, typename f_t>
 cpu_fj_thread_t<i_t, f_t>::~cpu_fj_thread_t()
 {
@@ -1578,8 +1576,7 @@ cpu_fj_thread_t<i_t, f_t>::~cpu_fj_thread_t()
 template <typename i_t, typename f_t>
 void cpu_fj_thread_t<i_t, f_t>::run_worker()
 {
-  bool solution_found   = fj_ptr->cpu_solve(*fj_cpu, time_limit);
-  cpu_fj_solution_found = solution_found;
+  cpu_fj_solution_found = cpufj_solve_loop(*fj_cpu, time_limit);
 }
 
 template <typename i_t, typename f_t>
@@ -1601,14 +1598,43 @@ void cpu_fj_thread_t<i_t, f_t>::stop_cpu_solver()
   fj_cpu->halted = true;
 }
 
+template <typename i_t, typename f_t>
+std::unique_ptr<fj_cpu_climber_t<i_t, f_t>> init_fj_cpu_standalone(
+  problem_t<i_t, f_t>& problem,
+  solution_t<i_t, f_t>& solution,
+  std::atomic<bool>& preemption_flag,
+  fj_settings_t settings)
+{
+  raft::common::nvtx::range scope("init_fj_cpu_standalone");
+
+  auto fj_cpu = std::make_unique<fj_cpu_climber_t<i_t, f_t>>(preemption_flag);
+
+  std::vector<f_t> default_weights(problem.n_constraints, 1.0);
+  init_fj_cpu(*fj_cpu, solution, default_weights, default_weights, 0.0);
+  fj_cpu->settings      = settings;
+  fj_cpu->settings.seed = cuopt::seed_generator::get_seed();
+
+  return fj_cpu;
+}
+
 #if MIP_INSTANTIATE_FLOAT
 template class fj_t<int, float>;
 template class cpu_fj_thread_t<int, float>;
+template std::unique_ptr<fj_cpu_climber_t<int, float>> init_fj_cpu_standalone(
+  problem_t<int, float>& problem,
+  solution_t<int, float>& solution,
+  std::atomic<bool>& preemption_flag,
+  fj_settings_t settings);
 #endif
 
 #if MIP_INSTANTIATE_DOUBLE
 template class fj_t<int, double>;
 template class cpu_fj_thread_t<int, double>;
+template std::unique_ptr<fj_cpu_climber_t<int, double>> init_fj_cpu_standalone(
+  problem_t<int, double>& problem,
+  solution_t<int, double>& solution,
+  std::atomic<bool>& preemption_flag,
+  fj_settings_t settings);
 #endif
 
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh
index 7dcc8d39b..3263609a2 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh
+++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh
@@ -210,4 +210,13 @@ struct cpu_fj_thread_t : public cpu_worker_thread_base_t<cpu_fj_thread_t<i_t, f_
   fj_t<i_t, f_t>* fj_ptr{nullptr};
 };
 
+// Standalone CPUFJ init for running without full fj_t infrastructure (avoids GPU allocations).
+// Used for early CPUFJ during presolve.
+template <typename i_t, typename f_t>
+std::unique_ptr<fj_cpu_climber_t<i_t, f_t>> init_fj_cpu_standalone(
+  problem_t<i_t, f_t>& problem,
+  solution_t<i_t, f_t>& solution,
+  std::atomic<bool>& preemption_flag,
+  fj_settings_t settings = fj_settings_t{});
+
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 41e272992..54d7189e7 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -7,6 +7,8 @@
 
 #include <cuopt/error.hpp>
 
+#include <mip_heuristics/feasibility_jump/early_cpufj.cuh>
+#include <mip_heuristics/feasibility_jump/early_gpufj.cuh>
 #include <mip_heuristics/mip_constants.hpp>
 #include <mip_heuristics/presolve/third_party_presolve.hpp>
 #include <mip_heuristics/presolve/trivial_presolve.cuh>
@@ -51,10 +53,29 @@ static void init_handler(const raft::handle_t* handle_ptr)
     handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream()));
 }
 
+template <typename f_t>
+static void invoke_solution_callbacks(
+  const std::vector<internals::base_solution_callback_t*>& mip_callbacks,
+  f_t objective,
+  std::vector<f_t>& assignment)
+{
+  std::vector<f_t> obj_vec   = {objective};
+  std::vector<f_t> bound_vec = {std::numeric_limits<f_t>::infinity()};
+  for (auto callback : mip_callbacks) {
+    if (callback != nullptr &&
+        callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) {
+      auto get_sol_callback = static_cast<internals::get_solution_callback_t*>(callback);
+      get_sol_callback->get_solution(
+        assignment.data(), obj_vec.data(), bound_vec.data(), get_sol_callback->get_user_data());
+    }
+  }
+}
+
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
                                  mip_solver_settings_t<i_t, f_t> const& settings,
-                                 timer_t& timer)
+                                 timer_t& timer,
+                                 f_t initial_cutoff = std::numeric_limits<f_t>::infinity())
 {
   raft::common::nvtx::range fun_scope("run_mip");
   auto constexpr const running_mip = true;
@@ -156,6 +177,31 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   auto scaled_sol                 = solver.run_solver();
   bool is_feasible_before_scaling = scaled_sol.get_feasible();
   scaled_sol.problem_ptr          = &problem;
+  solver.context.initial_cutoff = initial_cutoff;
+
+  // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache).
+  // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff.
+  std::unique_ptr<detail::early_cpufj_t<i_t, f_t>> early_cpufj;
+  bool run_early_cpufj = problem.has_papilo_presolve_data() &&
+                         settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
+                         problem.original_problem_ptr->get_n_integers() > 0;
+  if (run_early_cpufj) {
+    auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr;
+    auto mip_callbacks  = settings.get_mip_callbacks();
+    auto incumbent_callback =
+      [presolver_ptr, mip_callbacks](f_t objective, const std::vector<f_t>& assignment) {
+        std::vector<f_t> user_assignment;
+        presolver_ptr->uncrush_primal_solution(assignment, user_assignment);
+        invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
+      };
+    early_cpufj = std::make_unique<detail::early_cpufj_t<i_t, f_t>>(
+      *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback);
+    early_cpufj->set_best_objective(initial_cutoff);
+    early_cpufj->start();
+    solver.context.early_cpufj_ptr = early_cpufj.get();
+    CUOPT_LOG_DEBUG("Started early CPUFJ on papilo-presolved problem during cuOpt presolve");
+  }
+
   if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); }
   // at this point we need to compute the feasibility on the original problem not the presolved one
   bool is_feasible_after_unscaling = scaled_sol.compute_feasibility();
@@ -251,6 +297,43 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
 
     if (!run_presolve) { CUOPT_LOG_INFO("Presolve is disabled, skipping"); }
 
+    // Start early FJ (CPU and GPU) during presolve to find incumbents ASAP
+    // Only run if presolve is enabled (gives FJ time to find solutions)
+    // and we're not in deterministic mode
+    std::unique_ptr<detail::early_cpufj_t<i_t, f_t>> early_cpufj;
+    std::unique_ptr<detail::early_gpufj_t<i_t, f_t>> early_gpufj;
+
+    // Track best incumbent found during presolve (shared across CPU and GPU FJ)
+    std::atomic<f_t> early_best_objective{std::numeric_limits<f_t>::infinity()};
+    std::mutex early_callback_mutex;
+
+    bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
+                        op_problem.get_n_integers() > 0;
+    if (run_early_fj) {
+      auto early_fj_callback = [&early_best_objective,
+                                 &early_callback_mutex,
+                                 mip_callbacks = settings.get_mip_callbacks()](
+                                  f_t objective, const std::vector<f_t>& assignment) {
+        std::lock_guard<std::mutex> lock(early_callback_mutex);
+        if (objective >= early_best_objective.load()) { return; }
+        early_best_objective.store(objective);
+        auto user_assignment = assignment;
+        invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
+      };
+
+      // Start early CPUFJ on original problem (will restart on presolved problem after Papilo)
+      early_cpufj = std::make_unique<detail::early_cpufj_t<i_t, f_t>>(
+        op_problem, settings.get_tolerances(), early_fj_callback);
+      early_cpufj->start();
+      CUOPT_LOG_INFO("Started early CPUFJ on original problem");
+
+      // Start early GPU FJ (uses GPU while CPU is busy with Papilo)
+      early_gpufj =
+        std::make_unique<detail::early_gpufj_t<i_t, f_t>>(op_problem, settings, early_fj_callback);
+      early_gpufj->start();
+      CUOPT_LOG_INFO("Started early GPUFJ during presolve");
+    }
+
     auto constexpr const dual_postsolve = false;
     if (run_presolve) {
       detail::sort_csr(op_problem);
@@ -288,12 +371,32 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
       }
       CUOPT_LOG_INFO("Papilo presolve time: %.2f", presolve_time);
     }
+
+    // Stop early GPU FJ now that Papilo presolve is complete
+    if (early_gpufj) {
+      early_gpufj->stop();
+      if (early_gpufj->solution_found()) {
+        CUOPT_LOG_INFO("Early GPU FJ found incumbent with objective %.6e during presolve",
+                       early_gpufj->get_best_objective());
+      }
+      early_gpufj.reset();  // Free GPU memory
+    }
+
+    if (early_cpufj && run_presolve && presolve_result.has_value()) {
+      early_cpufj->stop();
+      if (early_cpufj->solution_found()) {
+        CUOPT_LOG_INFO("Early CPUFJ (original) found incumbent with objective %.6e",
+                       early_cpufj->get_best_objective());
+      }
+      early_cpufj.reset();
+    }
+
     if (settings.user_problem_file != "") {
       CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str());
       op_problem.write_to_mps(settings.user_problem_file);
     }
 
-    auto sol = run_mip(problem, settings, timer);
+    auto sol = run_mip(problem, settings, timer, early_best_objective.load());
 
     if (run_presolve) {
       auto status_to_skip = sol.get_termination_status() == mip_termination_status_t::TimeLimit ||
diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu
index 3e2251171..92e52a0f8 100644
--- a/cpp/src/mip_heuristics/solver.cu
+++ b/cpp/src/mip_heuristics/solver.cu
@@ -17,6 +17,7 @@
 #include <branch_and_bound/branch_and_bound.hpp>
 #include <dual_simplex/simplex_solver_settings.hpp>
 #include <dual_simplex/solve.hpp>
+#include <mip_heuristics/feasibility_jump/early_cpufj.cuh>
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/core/cusparse_macros.hpp>
@@ -48,7 +49,7 @@ mip_solver_t<i_t, f_t>::mip_solver_t(const problem_t<i_t, f_t>& op_problem,
     context(op_problem.handle_ptr,
             const_cast<problem_t<i_t, f_t>*>(&op_problem),
             solver_settings,
-            scaling),
+            &scaling),
     timer_(timer)
 {
   init_handler(op_problem.handle_ptr);
@@ -113,6 +114,17 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
                               ? std::numeric_limits<f_t>::infinity()
                               : timer_.remaining_time();
   bool presolve_success   = run_presolve ? dm.run_presolve(time_limit) : true;
+
+  // Stop early CPUFJ after cuopt presolve (probing cache) but before main solve
+  if (context.early_cpufj_ptr) {
+    context.early_cpufj_ptr->stop();
+    if (context.early_cpufj_ptr->solution_found()) {
+      f_t obj              = context.early_cpufj_ptr->get_best_objective();
+      context.initial_cutoff = std::min(context.initial_cutoff, obj);
+      CUOPT_LOG_INFO("Early CPUFJ found incumbent with objective %g during presolve", obj);
+    }
+  }
+
   if (!presolve_success) {
     CUOPT_LOG_INFO("Problem proven infeasible in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
@@ -251,7 +263,18 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     branch_and_bound = std::make_unique<dual_simplex::branch_and_bound_t<i_t, f_t>>(
       branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start());
     context.branch_and_bound_ptr = branch_and_bound.get();
-    auto* stats_ptr              = &context.stats;
+
+    // Set cutoff from early FJ if available
+    if (context.initial_cutoff < std::numeric_limits<f_t>::infinity()) {
+      f_t internal_cutoff =
+        context.problem_ptr->get_solver_obj_from_user_obj(context.initial_cutoff);
+      branch_and_bound->set_initial_cutoff(internal_cutoff);
+      CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics (internal: %.6e)",
+                     context.initial_cutoff,
+                     internal_cutoff);
+    }
+
+    auto* stats_ptr = &context.stats;
     branch_and_bound->set_user_bound_callback(
       [stats_ptr](f_t user_bound) { stats_ptr->set_solution_bound(user_bound); });
 
diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh
index baac1dd9d..d1be34ea6 100644
--- a/cpp/src/mip_heuristics/solver_context.cuh
+++ b/cpp/src/mip_heuristics/solver_context.cuh
@@ -28,6 +28,9 @@ namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>
 class diversity_manager_t;
 
+template <typename i_t, typename f_t>
+class early_cpufj_t;
+
 // Aggregate structure containing the global context of the solving process for convenience:
 // The current problem, user settings, raft handle and statistics objects
 template <typename i_t, typename f_t>
@@ -35,7 +38,7 @@ struct mip_solver_context_t {
   explicit mip_solver_context_t(raft::handle_t const* handle_ptr_,
                                 problem_t<i_t, f_t>* problem_ptr_,
                                 mip_solver_settings_t<i_t, f_t> settings_,
-                                pdlp_initial_scaling_strategy_t<i_t, f_t>& scaling)
+                                pdlp_initial_scaling_strategy_t<i_t, f_t>* scaling)
     : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling)
   {
     cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr");
@@ -53,7 +56,7 @@ struct mip_solver_context_t {
   diversity_manager_t<i_t, f_t>* diversity_manager_ptr{nullptr};
   std::atomic<bool> preempt_heuristic_solver_ = false;
   const mip_solver_settings_t<i_t, f_t> settings;
-  pdlp_initial_scaling_strategy_t<i_t, f_t>& scaling;
+  pdlp_initial_scaling_strategy_t<i_t, f_t>* scaling;  // nullptr when not available (early FJ)
   solver_stats_t<i_t, f_t> stats;
   // Work limit context for tracking work units in deterministic mode (shared across all timers in
   // GPU heuristic loop)
@@ -61,6 +64,9 @@ struct mip_solver_context_t {
 
   // synchronization every 5 seconds for deterministic mode
   work_unit_scheduler_t work_unit_scheduler_{5.0};
+
+  early_cpufj_t<i_t, f_t>* early_cpufj_ptr{nullptr};
+  f_t initial_cutoff{std::numeric_limits<f_t>::infinity()};
 };
 
 }  // namespace cuopt::linear_programming::detail

From 5881d35aa4526638a0adbb721d36329623da1446 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Mon, 23 Feb 2026 18:47:41 +0100
Subject: [PATCH 02/18] fix style

---
 .../feasibility_jump/early_cpufj.cu           | 10 ++++----
 .../feasibility_jump/early_gpufj.cu           |  5 ++--
 .../mip_heuristics/feasibility_jump/fj_cpu.cu |  7 +++---
 cpp/src/mip_heuristics/solve.cu               | 24 +++++++++----------
 cpp/src/mip_heuristics/solver.cu              |  2 +-
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
index 23f7a07da..fadec6e45 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
@@ -36,9 +36,9 @@ void early_cpufj_t<i_t, f_t>::start()
 
   this->start_time_ = std::chrono::steady_clock::now();
 
-  cpu_fj_thread_         = std::make_unique<cpu_fj_thread_t<i_t, f_t>>();
-  cpu_fj_thread_->fj_cpu = init_fj_cpu_standalone(
-    *this->problem_ptr_, *this->solution_ptr_, preemption_flag_);
+  cpu_fj_thread_ = std::make_unique<cpu_fj_thread_t<i_t, f_t>>();
+  cpu_fj_thread_->fj_cpu =
+    init_fj_cpu_standalone(*this->problem_ptr_, *this->solution_ptr_, preemption_flag_);
   cpu_fj_thread_->time_limit = std::numeric_limits<f_t>::infinity();
 
   cpu_fj_thread_->fj_cpu->log_prefix = "[Early CPUFJ] ";
@@ -62,8 +62,8 @@ void early_cpufj_t<i_t, f_t>::stop()
   cpu_fj_thread_->wait_for_cpu_solver();
 
   CUOPT_LOG_DEBUG("[Early CPUFJ] Stopped after %d iterations, solution_found=%d",
-                 cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0,
-                 this->solution_found_);
+                  cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0,
+                  this->solution_found_);
 
   cpu_fj_thread_.reset();
 }
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
index adc010c16..e2df3a5af 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
@@ -8,8 +8,8 @@
 #include "early_gpufj.cuh"
 
 #include <mip_heuristics/feasibility_jump/feasibility_jump.cuh>
-#include <mip_heuristics/solver_context.cuh>
 #include <mip_heuristics/mip_constants.hpp>
+#include <mip_heuristics/solver_context.cuh>
 #include <utilities/logger.hpp>
 
 #include <raft/core/error.hpp>
@@ -75,8 +75,7 @@ void early_gpufj_t<i_t, f_t>::stop()
 
   if (worker_thread_->joinable()) { worker_thread_->join(); }
 
-  CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d",
-                 this->solution_found_);
+  CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d", this->solution_found_);
 
   fj_ptr_.reset();
   worker_thread_.reset();
diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu
index 87f47d254..af41545a9 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu
@@ -1487,9 +1487,10 @@ static bool cpufj_solve_loop(fj_cpu_climber_t<i_t, f_t>& fj_cpu, f_t in_time_lim
         for (size_t i = 0; i < fj_cpu.cached_mtm_moves.size(); i++)
           fj_cpu.cached_mtm_moves[i].first = 0;
       }
-      thrust::tie(move, score) = find_mtm_move_viol(fj_cpu, 1, true);   // pick a single random violated constraint
-      i_t var_idx              = move.var_idx >= 0 ? move.var_idx : 0;
-      f_t delta                = move.var_idx >= 0 ? move.value : 0;
+      thrust::tie(move, score) =
+        find_mtm_move_viol(fj_cpu, 1, true);  // pick a single random violated constraint
+      i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0;
+      f_t delta   = move.var_idx >= 0 ? move.value : 0;
       apply_move(fj_cpu, var_idx, delta, true);
       ++local_mins;
       ++fj_cpu.n_local_minima_window;
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 54d7189e7..88daed0c0 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -177,7 +177,7 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   auto scaled_sol                 = solver.run_solver();
   bool is_feasible_before_scaling = scaled_sol.get_feasible();
   scaled_sol.problem_ptr          = &problem;
-  solver.context.initial_cutoff = initial_cutoff;
+  solver.context.initial_cutoff   = initial_cutoff;
 
   // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache).
   // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff.
@@ -186,14 +186,14 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
                          settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
                          problem.original_problem_ptr->get_n_integers() > 0;
   if (run_early_cpufj) {
-    auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr;
-    auto mip_callbacks  = settings.get_mip_callbacks();
-    auto incumbent_callback =
-      [presolver_ptr, mip_callbacks](f_t objective, const std::vector<f_t>& assignment) {
-        std::vector<f_t> user_assignment;
-        presolver_ptr->uncrush_primal_solution(assignment, user_assignment);
-        invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
-      };
+    auto* presolver_ptr     = problem.presolve_data.papilo_presolve_ptr;
+    auto mip_callbacks      = settings.get_mip_callbacks();
+    auto incumbent_callback = [presolver_ptr, mip_callbacks](f_t objective,
+                                                             const std::vector<f_t>& assignment) {
+      std::vector<f_t> user_assignment;
+      presolver_ptr->uncrush_primal_solution(assignment, user_assignment);
+      invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
+    };
     early_cpufj = std::make_unique<detail::early_cpufj_t<i_t, f_t>>(
       *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback);
     early_cpufj->set_best_objective(initial_cutoff);
@@ -311,9 +311,9 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
                         op_problem.get_n_integers() > 0;
     if (run_early_fj) {
       auto early_fj_callback = [&early_best_objective,
-                                 &early_callback_mutex,
-                                 mip_callbacks = settings.get_mip_callbacks()](
-                                  f_t objective, const std::vector<f_t>& assignment) {
+                                &early_callback_mutex,
+                                mip_callbacks = settings.get_mip_callbacks()](
+                                 f_t objective, const std::vector<f_t>& assignment) {
         std::lock_guard<std::mutex> lock(early_callback_mutex);
         if (objective >= early_best_objective.load()) { return; }
         early_best_objective.store(objective);
diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu
index 92e52a0f8..3f685886b 100644
--- a/cpp/src/mip_heuristics/solver.cu
+++ b/cpp/src/mip_heuristics/solver.cu
@@ -119,7 +119,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
   if (context.early_cpufj_ptr) {
     context.early_cpufj_ptr->stop();
     if (context.early_cpufj_ptr->solution_found()) {
-      f_t obj              = context.early_cpufj_ptr->get_best_objective();
+      f_t obj                = context.early_cpufj_ptr->get_best_objective();
       context.initial_cutoff = std::min(context.initial_cutoff, obj);
       CUOPT_LOG_INFO("Early CPUFJ found incumbent with objective %g during presolve", obj);
     }

From 8a3684cece66eb5d9ab2f9a8fa2fe3b7d0697f63 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Mon, 23 Feb 2026 19:31:27 +0100
Subject: [PATCH 03/18] ai review comments

---
 cpp/src/mip_heuristics/early_heuristic.cuh             | 1 +
 cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu | 1 +
 cpp/src/mip_heuristics/solve.cu                        | 9 +++++----
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
index 5ea7f346a..cfc2a9c1b 100644
--- a/cpp/src/mip_heuristics/early_heuristic.cuh
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -57,6 +57,7 @@ class early_heuristic_t {
  protected:
   ~early_heuristic_t() = default;
 
+  // NOT thread-safe
   void try_update_best(f_t user_obj, const std::vector<f_t>& assignment)
   {
     if (user_obj >= best_objective_) { return; }
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
index fadec6e45..1cd3b0005 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
@@ -34,6 +34,7 @@ void early_cpufj_t<i_t, f_t>::start()
 {
   if (cpu_fj_thread_) { return; }
 
+  this->preemption_flag_.store(false);
   this->start_time_ = std::chrono::steady_clock::now();
 
   cpu_fj_thread_ = std::make_unique<cpu_fj_thread_t<i_t, f_t>>();
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 88daed0c0..812b93dca 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -174,10 +174,7 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   detail::trivial_presolve(scaled_problem);
 
   detail::mip_solver_t<i_t, f_t> solver(scaled_problem, settings, scaling, timer);
-  auto scaled_sol                 = solver.run_solver();
-  bool is_feasible_before_scaling = scaled_sol.get_feasible();
-  scaled_sol.problem_ptr          = &problem;
-  solver.context.initial_cutoff   = initial_cutoff;
+  solver.context.initial_cutoff = initial_cutoff;
 
   // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache).
   // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff.
@@ -202,6 +199,10 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
     CUOPT_LOG_DEBUG("Started early CPUFJ on papilo-presolved problem during cuOpt presolve");
   }
 
+  auto scaled_sol                 = solver.run_solver();
+  bool is_feasible_before_scaling = scaled_sol.get_feasible();
+  scaled_sol.problem_ptr          = &problem;
+
   if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); }
   // at this point we need to compute the feasibility on the original problem not the presolved one
   bool is_feasible_after_unscaling = scaled_sol.compute_feasibility();

From 9c89092c54c3fbfd2d9fb24b12d5baff87a123da Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Mon, 23 Feb 2026 20:34:25 +0100
Subject: [PATCH 04/18] review comments, fix crash

---
 cpp/src/mip_heuristics/early_heuristic.cuh    | 13 ++++----
 .../feasibility_jump/early_cpufj.cu           |  3 +-
 .../feasibility_jump/early_gpufj.cu           |  3 +-
 .../feasibility_jump/feasibility_jump.cu      |  9 +++---
 cpp/src/mip_heuristics/solve.cu               | 30 ++++++++++++-------
 cpp/src/mip_heuristics/solver.cu              | 10 ++-----
 6 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
index cfc2a9c1b..f3644c4d2 100644
--- a/cpp/src/mip_heuristics/early_heuristic.cuh
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -25,7 +25,7 @@ namespace cuopt::linear_programming::detail {
 
 template <typename f_t>
 using early_incumbent_callback_t =
-  std::function<void(f_t objective, const std::vector<f_t>& assignment)>;
+  std::function<void(f_t solver_obj, f_t user_obj, const std::vector<f_t>& assignment)>;
 
 // CRTP base for early heuristics that run on the original (or papilo-presolved) problem
 // during presolve to find incumbents as early as possible.
@@ -57,11 +57,11 @@ class early_heuristic_t {
  protected:
   ~early_heuristic_t() = default;
 
-  // NOT thread-safe
-  void try_update_best(f_t user_obj, const std::vector<f_t>& assignment)
+  // NOT thread-safe. solver_obj is in solver-space (always minimization).
+  void try_update_best(f_t solver_obj, const std::vector<f_t>& assignment)
   {
-    if (user_obj >= best_objective_) { return; }
-    best_objective_ = user_obj;
+    if (solver_obj >= best_objective_) { return; }
+    best_objective_ = solver_obj;
 
     auto* handle_ptr = problem_ptr_->handle_ptr;
     RAFT_CUDA_TRY(cudaSetDevice(handle_ptr->get_device()));
@@ -72,13 +72,14 @@ class early_heuristic_t {
 
     best_assignment_ = user_assignment;
     solution_found_  = true;
+    f_t user_obj     = problem_ptr_->get_user_obj_from_solver_obj(solver_obj);
     double elapsed =
       std::chrono::duration<double>(std::chrono::steady_clock::now() - start_time_).count();
     CUOPT_LOG_INFO("Early heuristics (%s) lowered the primal bound. Objective %g. Time %.2f",
                    Derived::name(),
                    user_obj,
                    elapsed);
-    if (incumbent_callback_) { incumbent_callback_(user_obj, user_assignment); }
+    if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); }
   }
 
   std::unique_ptr<problem_t<i_t, f_t>> problem_ptr_;
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
index 1cd3b0005..8109653e6 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu
@@ -46,8 +46,7 @@ void early_cpufj_t<i_t, f_t>::start()
 
   cpu_fj_thread_->fj_cpu->improvement_callback =
     [this](f_t solver_obj, const std::vector<f_t>& assignment, double) {
-      f_t user_obj = this->problem_ptr_->get_user_obj_from_solver_obj(solver_obj);
-      this->try_update_best(user_obj, assignment);
+      this->try_update_best(solver_obj, assignment);
     };
 
   cpu_fj_thread_->start_cpu_solver();
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
index e2df3a5af..92d51a169 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
@@ -53,7 +53,8 @@ void early_gpufj_t<i_t, f_t>::start()
   fj_ptr_ = std::make_unique<fj_t<i_t, f_t>>(*context_ptr_, fj_settings);
 
   fj_ptr_->improvement_callback = [this](f_t user_obj, const std::vector<f_t>& h_assignment) {
-    this->try_update_best(user_obj, h_assignment);
+    f_t solver_obj = this->problem_ptr_->get_solver_obj_from_user_obj(user_obj);
+    this->try_update_best(solver_obj, h_assignment);
   };
 
   worker_thread_ = std::make_unique<std::thread>(&early_gpufj_t::run_worker, this);
diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu
index b248351a3..748dd41df 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu
@@ -936,8 +936,8 @@ i_t fj_t<i_t, f_t>::host_loop(solution_t<i_t, f_t>& solution, i_t climber_idx)
       // Invoke improvement callback if we have a better feasible solution
       if (is_feasible && improvement_callback) {
         f_t user_obj = solution.get_user_objective();
-        if (user_obj < last_reported_objective_) {
-          last_reported_objective_ = user_obj;
+        if (solution.h_obj < last_reported_objective_) {
+          last_reported_objective_ = solution.h_obj;
           // Copy assignment to host for callback
           std::vector<f_t> h_assignment(solution.assignment.size());
           raft::copy(h_assignment.data(),
@@ -1075,8 +1075,9 @@ i_t fj_t<i_t, f_t>::solve(solution_t<i_t, f_t>& solution)
 {
   raft::common::nvtx::range scope("fj_solve");
   timer_t timer(settings.time_limit);
-  handle_ptr = const_cast<raft::handle_t*>(solution.handle_ptr);
-  pb_ptr     = solution.problem_ptr;
+  handle_ptr               = const_cast<raft::handle_t*>(solution.handle_ptr);
+  pb_ptr                   = solution.problem_ptr;
+  last_reported_objective_ = std::numeric_limits<f_t>::infinity();
   if (settings.mode != fj_mode_t::ROUNDING) {
     cuopt_func_call(solution.test_variable_bounds(true));
     cuopt_assert(solution.test_number_all_integer(), "All integers must be rounded");
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 812b93dca..000684be9 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -57,10 +57,11 @@ template <typename f_t>
 static void invoke_solution_callbacks(
   const std::vector<internals::base_solution_callback_t*>& mip_callbacks,
   f_t objective,
-  std::vector<f_t>& assignment)
+  std::vector<f_t>& assignment,
+  f_t bound)
 {
   std::vector<f_t> obj_vec   = {objective};
-  std::vector<f_t> bound_vec = {std::numeric_limits<f_t>::infinity()};
+  std::vector<f_t> bound_vec = {bound};
   for (auto callback : mip_callbacks) {
     if (callback != nullptr &&
         callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) {
@@ -185,11 +186,14 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   if (run_early_cpufj) {
     auto* presolver_ptr     = problem.presolve_data.papilo_presolve_ptr;
     auto mip_callbacks      = settings.get_mip_callbacks();
-    auto incumbent_callback = [presolver_ptr, mip_callbacks](f_t objective,
-                                                             const std::vector<f_t>& assignment) {
+    f_t no_bound            = problem.presolve_data.objective_scaling_factor >= 0
+                                ? -std::numeric_limits<f_t>::infinity()
+                                : std::numeric_limits<f_t>::infinity();
+    auto incumbent_callback = [presolver_ptr, mip_callbacks, no_bound](
+                                f_t solver_obj, f_t user_obj, const std::vector<f_t>& assignment) {
       std::vector<f_t> user_assignment;
       presolver_ptr->uncrush_primal_solution(assignment, user_assignment);
-      invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
+      invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound);
     };
     early_cpufj = std::make_unique<detail::early_cpufj_t<i_t, f_t>>(
       *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback);
@@ -309,17 +313,21 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
     std::mutex early_callback_mutex;
 
     bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
-                        op_problem.get_n_integers() > 0;
+                        op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0;
     if (run_early_fj) {
+      f_t no_bound           = problem.presolve_data.objective_scaling_factor >= 0
+                                 ? -std::numeric_limits<f_t>::infinity()
+                                 : std::numeric_limits<f_t>::infinity();
       auto early_fj_callback = [&early_best_objective,
                                 &early_callback_mutex,
-                                mip_callbacks = settings.get_mip_callbacks()](
-                                 f_t objective, const std::vector<f_t>& assignment) {
+                                mip_callbacks = settings.get_mip_callbacks(),
+                                no_bound](
+                                 f_t solver_obj, f_t user_obj, const std::vector<f_t>& assignment) {
         std::lock_guard<std::mutex> lock(early_callback_mutex);
-        if (objective >= early_best_objective.load()) { return; }
-        early_best_objective.store(objective);
+        if (solver_obj >= early_best_objective.load()) { return; }
+        early_best_objective.store(solver_obj);
         auto user_assignment = assignment;
-        invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
+        invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound);
       };
 
       // Start early CPUFJ on original problem (will restart on presolved problem after Papilo)
diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu
index 3f685886b..7c7c4f6f2 100644
--- a/cpp/src/mip_heuristics/solver.cu
+++ b/cpp/src/mip_heuristics/solver.cu
@@ -264,14 +264,10 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
       branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start());
     context.branch_and_bound_ptr = branch_and_bound.get();
 
-    // Set cutoff from early FJ if available
+    // Set cutoff from early FJ if available (initial_cutoff is in solver-space)
     if (context.initial_cutoff < std::numeric_limits<f_t>::infinity()) {
-      f_t internal_cutoff =
-        context.problem_ptr->get_solver_obj_from_user_obj(context.initial_cutoff);
-      branch_and_bound->set_initial_cutoff(internal_cutoff);
-      CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics (internal: %.6e)",
-                     context.initial_cutoff,
-                     internal_cutoff);
+      branch_and_bound->set_initial_cutoff(context.initial_cutoff);
+      CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics", context.initial_cutoff);
     }
 
     auto* stats_ptr = &context.stats;

From a4a195f94f8632e6035121c67e9461f7c794fcc6 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Mon, 23 Feb 2026 21:35:30 +0100
Subject: [PATCH 05/18] add GIL locks to handle worker threads invoking python
 callbacks

---
 .../utilities/callbacks_implems.hpp           | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp
index e13fda2ba..ef0fe35b9 100644
--- a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp
@@ -32,11 +32,12 @@ class default_get_solution_callback_t : public get_solution_callback_t {
                     void* solution_bound,
                     void* user_data) override
   {
-    PyObject* numpy_matrix = get_numpy_array(data, n_variables);
-    PyObject* numpy_array  = get_numpy_array(objective_value, 1);
-    PyObject* numpy_bound  = get_numpy_array(solution_bound, 1);
-    PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
-    PyObject* res          = PyObject_CallMethod(this->pyCallbackClass,
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    PyObject* numpy_matrix  = get_numpy_array(data, n_variables);
+    PyObject* numpy_array   = get_numpy_array(objective_value, 1);
+    PyObject* numpy_bound   = get_numpy_array(solution_bound, 1);
+    PyObject* py_user_data  = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
+    PyObject* res           = PyObject_CallMethod(this->pyCallbackClass,
                                         "get_solution",
                                         "(OOOO)",
                                         numpy_matrix,
@@ -47,6 +48,7 @@ class default_get_solution_callback_t : public get_solution_callback_t {
     Py_DECREF(numpy_array);
     Py_DECREF(numpy_bound);
     if (res != nullptr) { Py_DECREF(res); }
+    PyGILState_Release(gstate);
   }
 
   PyObject* pyCallbackClass;
@@ -69,11 +71,12 @@ class default_set_solution_callback_t : public set_solution_callback_t {
                     void* solution_bound,
                     void* user_data) override
   {
-    PyObject* numpy_matrix = get_numpy_array(data, n_variables);
-    PyObject* numpy_array  = get_numpy_array(objective_value, 1);
-    PyObject* numpy_bound  = get_numpy_array(solution_bound, 1);
-    PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
-    PyObject* res          = PyObject_CallMethod(this->pyCallbackClass,
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    PyObject* numpy_matrix  = get_numpy_array(data, n_variables);
+    PyObject* numpy_array   = get_numpy_array(objective_value, 1);
+    PyObject* numpy_bound   = get_numpy_array(solution_bound, 1);
+    PyObject* py_user_data  = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
+    PyObject* res           = PyObject_CallMethod(this->pyCallbackClass,
                                         "set_solution",
                                         "(OOOO)",
                                         numpy_matrix,
@@ -84,6 +87,7 @@ class default_set_solution_callback_t : public set_solution_callback_t {
     Py_DECREF(numpy_array);
     Py_DECREF(numpy_bound);
     if (res != nullptr) { Py_DECREF(res); }
+    PyGILState_Release(gstate);
   }
 
   PyObject* pyCallbackClass;

From f349b5b573adc1d1c638c938bd639a1437714bb2 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Mon, 23 Feb 2026 22:46:07 +0100
Subject: [PATCH 06/18] fix missing nogil

---
 .../cuopt/linear_programming/solver/solver.pxd  |  4 ++--
 .../solver/solver_wrapper.pyx                   | 17 ++++++++++-------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
index 83b1e0054..adc566aa1 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd
+++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
@@ -181,9 +181,9 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
     cdef unique_ptr[solver_ret_t] call_solve(
         data_model_view_t[int, double]* data_model,
         solver_settings_t[int, double]* solver_settings,
-    ) except +
+    ) except + nogil
 
     cdef pair[vector[unique_ptr[solver_ret_t]], double] call_batch_solve( # noqa
         vector[data_model_view_t[int, double] *] data_models,
         solver_settings_t[int, double]* solver_settings,
-    ) except +
+    ) except + nogil
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
index 8c1c8fdc3..e7d700ea9 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
@@ -507,10 +507,13 @@ def Solve(py_data_model_obj, settings, mip=False):
     )
     data_model_obj.set_data_model_view()
 
-    return create_solution(move(call_solve(
-        data_model_obj.c_data_model_view.get(),
-        unique_solver_settings.get(),
-    )), data_model_obj)
+    cdef unique_ptr[solver_ret_t] sol_ret_ptr
+    with nogil:
+        sol_ret_ptr = move(call_solve(
+            data_model_obj.c_data_model_view.get(),
+            unique_solver_settings.get(),
+        ))
+    return create_solution(move(sol_ret_ptr), data_model_obj)
 
 
 cdef set_and_insert_vector(
@@ -535,9 +538,9 @@ def BatchSolve(py_data_model_list, settings):
 
     cdef pair[
         vector[unique_ptr[solver_ret_t]],
-        double] batch_solve_result = (
-        move(call_batch_solve(data_model_views, unique_solver_settings.get())) # noqa
-    )
+        double] batch_solve_result
+    with nogil:
+        batch_solve_result = move(call_batch_solve(data_model_views, unique_solver_settings.get()))  # noqa
 
     cdef vector[unique_ptr[solver_ret_t]] c_solutions = (
         move(batch_solve_result.first)

From 506d04632600da2e9fffd19ecbf1753335b368bb Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 08:52:06 +0100
Subject: [PATCH 07/18] fix python callbacks

---
 cpp/src/mip_heuristics/solve.cu | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 000684be9..b4188cade 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -85,15 +85,6 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   auto hyper_params                                     = settings.hyper_params;
   hyper_params.update_primal_weight_on_initial_solution = false;
   hyper_params.update_step_size_on_initial_solution     = true;
-  if (settings.get_mip_callbacks().size() > 0) {
-    auto callback_num_variables = problem.original_problem_ptr->get_n_variables();
-    if (problem.has_papilo_presolve_data()) {
-      callback_num_variables = problem.get_papilo_original_num_variables();
-    }
-    for (auto callback : settings.get_mip_callbacks()) {
-      callback->template setup<f_t>(callback_num_variables);
-    }
-  }
   // if the input problem is empty: early exit
   if (problem.empty) {
     detail::solution_t<i_t, f_t> solution(problem);
@@ -277,6 +268,10 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
                                       op_problem.get_handle_ptr()->get_stream());
     }
 
+    for (auto callback : settings.get_mip_callbacks()) {
+      callback->template setup<f_t>(op_problem.get_n_variables());
+    }
+
     auto timer = timer_t(time_limit);
 
     double presolve_time = 0.0;

From 33230b6bf521ec5aa0206f57015f1583ad9499b1 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 11:12:01 +0100
Subject: [PATCH 08/18] fix attempt

---
 cpp/src/mip_heuristics/early_heuristic.cuh      | 16 ++++++++++------
 cpp/src/mip_heuristics/problem/presolve_data.cu | 17 +++++++----------
 .../mip_heuristics/problem/presolve_data.cuh    | 10 +++++++++-
 cpp/src/mip_heuristics/problem/problem.cu       |  6 ++++--
 cpp/src/mip_heuristics/problem/problem.cuh      |  9 ++++++++-
 5 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
index f3644c4d2..40b604d97 100644
--- a/cpp/src/mip_heuristics/early_heuristic.cuh
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -14,6 +14,8 @@
 
 #include <utilities/logger.hpp>
 
+#include <rmm/cuda_stream.hpp>
+
 #include <thrust/fill.h>
 
 #include <chrono>
@@ -58,17 +60,18 @@ class early_heuristic_t {
   ~early_heuristic_t() = default;
 
   // NOT thread-safe. solver_obj is in solver-space (always minimization).
+  // Uses a private CUDA stream to avoid racing with the FJ solver's stream.
   void try_update_best(f_t solver_obj, const std::vector<f_t>& assignment)
   {
     if (solver_obj >= best_objective_) { return; }
     best_objective_ = solver_obj;
 
-    auto* handle_ptr = problem_ptr_->handle_ptr;
-    RAFT_CUDA_TRY(cudaSetDevice(handle_ptr->get_device()));
-    rmm::device_uvector<f_t> d_assignment(assignment.size(), handle_ptr->get_stream());
-    raft::copy(d_assignment.data(), assignment.data(), assignment.size(), handle_ptr->get_stream());
-    problem_ptr_->post_process_assignment(d_assignment);
-    auto user_assignment = cuopt::host_copy(d_assignment, handle_ptr->get_stream());
+    RAFT_CUDA_TRY(cudaSetDevice(problem_ptr_->handle_ptr->get_device()));
+    auto stream = private_stream_.view();
+    rmm::device_uvector<f_t> d_assignment(assignment.size(), stream);
+    raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream);
+    problem_ptr_->post_process_assignment(d_assignment, true, stream);
+    auto user_assignment = cuopt::host_copy(d_assignment, stream);
 
     best_assignment_ = user_assignment;
     solution_found_  = true;
@@ -91,6 +94,7 @@ class early_heuristic_t {
 
   early_incumbent_callback_t<f_t> incumbent_callback_;
   std::chrono::steady_clock::time_point start_time_;
+  rmm::cuda_stream private_stream_;
 };
 
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cu b/cpp/src/mip_heuristics/problem/presolve_data.cu
index b11f7b108..1bc05a779 100644
--- a/cpp/src/mip_heuristics/problem/presolve_data.cu
+++ b/cpp/src/mip_heuristics/problem/presolve_data.cu
@@ -107,7 +107,8 @@ template <typename i_t, typename f_t>
 void presolve_data_t<i_t, f_t>::post_process_assignment(
   problem_t<i_t, f_t>& problem,
   rmm::device_uvector<f_t>& current_assignment,
-  bool resize_to_original_problem)
+  bool resize_to_original_problem,
+  rmm::cuda_stream_view stream)
 {
   raft::common::nvtx::range fun_scope("post_process_assignment");
   cuopt_assert(current_assignment.size() == variable_mapping.size(), "size mismatch");
@@ -115,15 +116,15 @@ void presolve_data_t<i_t, f_t>::post_process_assignment(
   auto fixed_assgn = make_span(fixed_var_assignment);
   auto var_map     = make_span(variable_mapping);
   if (current_assignment.size() > 0) {
-    thrust::for_each(problem.handle_ptr->get_thrust_policy(),
+    thrust::for_each(rmm::exec_policy(stream),
                      thrust::make_counting_iterator<i_t>(0),
                      thrust::make_counting_iterator<i_t>(current_assignment.size()),
                      [fixed_assgn, var_map, assgn] __device__(auto idx) {
                        fixed_assgn[var_map[idx]] = assgn[idx];
                      });
   }
-  expand_device_copy(current_assignment, fixed_var_assignment, problem.handle_ptr->get_stream());
-  auto h_assignment = cuopt::host_copy(current_assignment, problem.handle_ptr->get_stream());
+  expand_device_copy(current_assignment, fixed_var_assignment, stream);
+  auto h_assignment = cuopt::host_copy(current_assignment, stream);
   cuopt_assert(additional_var_id_per_var.size() == h_assignment.size(), "Size mismatch");
   cuopt_assert(additional_var_used.size() == h_assignment.size(), "Size mismatch");
   for (i_t i = 0; i < (i_t)h_assignment.size(); ++i) {
@@ -148,14 +149,10 @@ void presolve_data_t<i_t, f_t>::post_process_assignment(
                     h_assignment[sub.substituted_var]);
   }
 
-  raft::copy(current_assignment.data(),
-             h_assignment.data(),
-             h_assignment.size(),
-             problem.handle_ptr->get_stream());
   // this separate resizing is needed because of the callback
+  raft::copy(current_assignment.data(), h_assignment.data(), h_assignment.size(), stream);
   if (resize_to_original_problem) {
-    current_assignment.resize(problem.original_problem_ptr->get_n_variables(),
-                              problem.handle_ptr->get_stream());
+    current_assignment.resize(problem.original_problem_ptr->get_n_variables(), stream);
   }
 }
 
diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cuh b/cpp/src/mip_heuristics/problem/presolve_data.cuh
index 51b6bac95..cac3e7165 100644
--- a/cpp/src/mip_heuristics/problem/presolve_data.cuh
+++ b/cpp/src/mip_heuristics/problem/presolve_data.cuh
@@ -89,7 +89,15 @@ class presolve_data_t {
   bool pre_process_assignment(problem_t<i_t, f_t>& problem, rmm::device_uvector<f_t>& assignment);
   void post_process_assignment(problem_t<i_t, f_t>& problem,
                                rmm::device_uvector<f_t>& current_assignment,
-                               bool resize_to_original_problem = true);
+                               bool resize_to_original_problem,
+                               rmm::cuda_stream_view stream);
+  void post_process_assignment(problem_t<i_t, f_t>& problem,
+                               rmm::device_uvector<f_t>& current_assignment,
+                               bool resize_to_original_problem = true)
+  {
+    post_process_assignment(
+      problem, current_assignment, resize_to_original_problem, problem.handle_ptr->get_stream());
+  }
   void post_process_solution(problem_t<i_t, f_t>& problem, solution_t<i_t, f_t>& solution);
 
   void set_papilo_presolve_data(const third_party_presolve_t<i_t, f_t>* presolver_ptr,
diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu
index d77e2e5f6..dde334476 100644
--- a/cpp/src/mip_heuristics/problem/problem.cu
+++ b/cpp/src/mip_heuristics/problem/problem.cu
@@ -2045,9 +2045,11 @@ bool problem_t<i_t, f_t>::pre_process_assignment(rmm::device_uvector<f_t>& assig
 
 template <typename i_t, typename f_t>
 void problem_t<i_t, f_t>::post_process_assignment(rmm::device_uvector<f_t>& current_assignment,
-                                                  bool resize_to_original_problem)
+                                                  bool resize_to_original_problem,
+                                                  rmm::cuda_stream_view stream)
 {
-  presolve_data.post_process_assignment(*this, current_assignment, resize_to_original_problem);
+  presolve_data.post_process_assignment(
+    *this, current_assignment, resize_to_original_problem, stream);
 }
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh
index 6cd180a80..9876773ec 100644
--- a/cpp/src/mip_heuristics/problem/problem.cuh
+++ b/cpp/src/mip_heuristics/problem/problem.cuh
@@ -94,7 +94,14 @@ class problem_t {
   void preprocess_problem();
   bool pre_process_assignment(rmm::device_uvector<f_t>& assignment);
   void post_process_assignment(rmm::device_uvector<f_t>& current_assignment,
-                               bool resize_to_original_problem = true);
+                               bool resize_to_original_problem,
+                               rmm::cuda_stream_view stream);
+  void post_process_assignment(rmm::device_uvector<f_t>& current_assignment,
+                               bool resize_to_original_problem = true)
+  {
+    post_process_assignment(
+      current_assignment, resize_to_original_problem, handle_ptr->get_stream());
+  }
   void post_process_solution(solution_t<i_t, f_t>& solution);
   void set_papilo_presolve_data(const third_party_presolve_t<i_t, f_t>* presolver_ptr,
                                 std::vector<i_t> reduced_to_original,

From 71e8c9ac118d155ca56f0e063c5026534dcd4aeb Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 12:22:56 +0000
Subject: [PATCH 09/18] operate on a problem copy for early GPUFJ

---
 cpp/src/mip_heuristics/early_heuristic.cuh     | 18 ++++++++++--------
 .../feasibility_jump/early_gpufj.cu            |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
index 40b604d97..47d50daa4 100644
--- a/cpp/src/mip_heuristics/early_heuristic.cuh
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -14,8 +14,6 @@
 
 #include <utilities/logger.hpp>
 
-#include <rmm/cuda_stream.hpp>
-
 #include <thrust/fill.h>
 
 #include <chrono>
@@ -40,11 +38,15 @@ class early_heuristic_t {
                     early_incumbent_callback_t<f_t> incumbent_callback)
     : incumbent_callback_(std::move(incumbent_callback))
   {
-    problem_ptr_ = std::make_unique<problem_t<i_t, f_t>>(op_problem, tolerances, false);
-    problem_ptr_->preprocess_problem();
+    // Build and preprocess on the original handle, then copy onto our own handle
+    // so the derived solver can run on a dedicated stream (prevents graph capture conflicts).
+    problem_t<i_t, f_t> temp_problem(op_problem, tolerances, false);
+    temp_problem.preprocess_problem();
+    temp_problem.handle_ptr->sync_stream();
+    problem_ptr_ = std::make_unique<problem_t<i_t, f_t>>(temp_problem, &handle_);
 
     solution_ptr_ = std::make_unique<solution_t<i_t, f_t>>(*problem_ptr_);
-    thrust::fill(problem_ptr_->handle_ptr->get_thrust_policy(),
+    thrust::fill(handle_.get_thrust_policy(),
                  solution_ptr_->assignment.begin(),
                  solution_ptr_->assignment.end(),
                  f_t{0});
@@ -66,8 +68,8 @@ class early_heuristic_t {
     if (solver_obj >= best_objective_) { return; }
     best_objective_ = solver_obj;
 
-    RAFT_CUDA_TRY(cudaSetDevice(problem_ptr_->handle_ptr->get_device()));
-    auto stream = private_stream_.view();
+    RAFT_CUDA_TRY(cudaSetDevice(handle_.get_device()));
+    auto stream = handle_.get_stream();
     rmm::device_uvector<f_t> d_assignment(assignment.size(), stream);
     raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream);
     problem_ptr_->post_process_assignment(d_assignment, true, stream);
@@ -94,7 +96,7 @@ class early_heuristic_t {
 
   early_incumbent_callback_t<f_t> incumbent_callback_;
   std::chrono::steady_clock::time_point start_time_;
-  rmm::cuda_stream private_stream_;
+  raft::handle_t handle_;
 };
 
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
index 92d51a169..f9b552c9f 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
@@ -26,7 +26,7 @@ early_gpufj_t<i_t, f_t>::early_gpufj_t(const optimization_problem_t<i_t, f_t>& o
       op_problem, settings.get_tolerances(), std::move(incumbent_callback))
 {
   context_ptr_ = std::make_unique<mip_solver_context_t<i_t, f_t>>(
-    this->problem_ptr_->handle_ptr, this->problem_ptr_.get(), settings, nullptr);
+    &this->handle_, this->problem_ptr_.get(), settings, nullptr);
 }
 
 template <typename i_t, typename f_t>

From 9d7055ef479f49ed27c492d43606393405b5b860 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 13:52:31 +0000
Subject: [PATCH 10/18] fix reported bound in user callback

---
 cpp/src/mip_heuristics/solve.cu | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index b4188cade..417521b11 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -175,11 +175,9 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
                          settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
                          problem.original_problem_ptr->get_n_integers() > 0;
   if (run_early_cpufj) {
-    auto* presolver_ptr     = problem.presolve_data.papilo_presolve_ptr;
-    auto mip_callbacks      = settings.get_mip_callbacks();
-    f_t no_bound            = problem.presolve_data.objective_scaling_factor >= 0
-                                ? -std::numeric_limits<f_t>::infinity()
-                                : std::numeric_limits<f_t>::infinity();
+    auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr;
+    auto mip_callbacks  = settings.get_mip_callbacks();
+    f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20;
     auto incumbent_callback = [presolver_ptr, mip_callbacks, no_bound](
                                 f_t solver_obj, f_t user_obj, const std::vector<f_t>& assignment) {
       std::vector<f_t> user_assignment;
@@ -310,9 +308,7 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
     bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
                         op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0;
     if (run_early_fj) {
-      f_t no_bound           = problem.presolve_data.objective_scaling_factor >= 0
-                                 ? -std::numeric_limits<f_t>::infinity()
-                                 : std::numeric_limits<f_t>::infinity();
+      f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20;
       auto early_fj_callback = [&early_best_objective,
                                 &early_callback_mutex,
                                 mip_callbacks = settings.get_mip_callbacks(),

From 7b9451b8898f53af58e399314faec3c9733a63de Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 14:33:37 +0000
Subject: [PATCH 11/18] fix build

---
 cpp/src/pdlp/swap_and_resize_helper.cuh | 1 +
 cpp/src/pdlp/utils.cuh                  | 1 +
 cpp/src/utilities/copy_helpers.hpp      | 1 +
 cpp/src/utilities/cuda_helpers.cuh      | 1 +
 4 files changed, 4 insertions(+)

diff --git a/cpp/src/pdlp/swap_and_resize_helper.cuh b/cpp/src/pdlp/swap_and_resize_helper.cuh
index 0d4e2a6d0..6ed05df24 100644
--- a/cpp/src/pdlp/swap_and_resize_helper.cuh
+++ b/cpp/src/pdlp/swap_and_resize_helper.cuh
@@ -16,6 +16,7 @@
 #include <thrust/iterator/permutation_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
+#include <thrust/tuple.h>
 #include <thrust/universal_vector.h>
 
 #include <cuda/std/tuple>
diff --git a/cpp/src/pdlp/utils.cuh b/cpp/src/pdlp/utils.cuh
index 33625f768..138c9c2ab 100644
--- a/cpp/src/pdlp/utils.cuh
+++ b/cpp/src/pdlp/utils.cuh
@@ -25,6 +25,7 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/transform_reduce.h>
+#include <thrust/tuple.h>
 
 namespace cuopt::linear_programming::detail {
 
diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp
index 943ce463a..36a465905 100644
--- a/cpp/src/utilities/copy_helpers.hpp
+++ b/cpp/src/utilities/copy_helpers.hpp
@@ -15,6 +15,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/transform.h>
+#include <thrust/tuple.h>
 #include <thrust/universal_vector.h>
 
 #include <cuda/std/functional>
diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh
index ae50e9967..946099648 100644
--- a/cpp/src/utilities/cuda_helpers.cuh
+++ b/cpp/src/utilities/cuda_helpers.cuh
@@ -10,6 +10,7 @@
 #include <utilities/macros.cuh>
 
 #include <thrust/host_vector.h>
+#include <thrust/tuple.h>
 #include <mutex>
 #include <raft/core/device_span.hpp>
 #include <raft/util/cuda_utils.cuh>

From d0c5a425ca1cc24f9b4cb984bc0090b6a4c38755 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 16:09:51 +0000
Subject: [PATCH 12/18] fix thrust build + more timer checks

---
 .../diversity/diversity_manager.cu            | 26 ++++++++++---------
 cpp/src/mip_heuristics/solve.cu               |  5 ++++
 cpp/src/mip_heuristics/solver.cu              | 16 ++++++++++++
 cpp/src/pdlp/swap_and_resize_helper.cuh       |  1 +
 cpp/src/pdlp/utils.cuh                        |  1 +
 cpp/src/utilities/copy_helpers.hpp            |  1 +
 cpp/src/utilities/cuda_helpers.cuh            |  1 +
 7 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu
index 25b021ac3..4fa0d3f4a 100644
--- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu
+++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu
@@ -201,18 +201,20 @@ bool diversity_manager_t<i_t, f_t>::run_presolve(f_t time_limit)
       compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer);
     if (problem_is_infeasible) { return false; }
   }
-  const bool remap_cache_ids = true;
-  trivial_presolve(*problem_ptr, remap_cache_ids);
-  if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; }
-  // May overconstrain if Papilo presolve has been run before
-  if (context.settings.presolver == presolver_t::None) {
-    if (!problem_ptr->empty) {
-      // do the resizing no-matter what, bounds presolve might not change the bounds but initial
-      // trivial presolve might have
-      ls.constraint_prop.bounds_update.resize(*problem_ptr);
-      ls.constraint_prop.conditional_bounds_update.update_constraint_bounds(
-        *problem_ptr, ls.constraint_prop.bounds_update);
-      if (!check_bounds_sanity(*problem_ptr)) { return false; }
+  if (!presolve_timer.check_time_limit()) {
+    const bool remap_cache_ids = true;
+    trivial_presolve(*problem_ptr, remap_cache_ids);
+    if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; }
+    // May overconstrain if Papilo presolve has been run before
+    if (context.settings.presolver == presolver_t::None) {
+      if (!problem_ptr->empty) {
+        // do the resizing no-matter what, bounds presolve might not change the bounds but initial
+        // trivial presolve might have
+        ls.constraint_prop.bounds_update.resize(*problem_ptr);
+        ls.constraint_prop.conditional_bounds_update.update_constraint_bounds(
+          *problem_ptr, ls.constraint_prop.bounds_update);
+        if (!check_bounds_sanity(*problem_ptr)) { return false; }
+      }
     }
   }
   stats.presolve_time = presolve_timer.elapsed_time();
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 41e272992..3c6ee21a4 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -153,6 +153,11 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   detail::trivial_presolve(scaled_problem);
 
   detail::mip_solver_t<i_t, f_t> solver(scaled_problem, settings, scaling, timer);
+  if (timer.check_time_limit()) {
+    CUOPT_LOG_INFO("Time limit reached before main solve");
+    detail::solution_t<i_t, f_t> sol(problem);
+    return sol.get_solution(false, solver.get_solver_stats(), false);
+  }
   auto scaled_sol                 = solver.run_solver();
   bool is_feasible_before_scaling = scaled_sol.get_feasible();
   scaled_sol.problem_ptr          = &problem;
diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu
index 3e2251171..235d4500d 100644
--- a/cpp/src/mip_heuristics/solver.cu
+++ b/cpp/src/mip_heuristics/solver.cu
@@ -134,6 +134,14 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     return sol;
   }
 
+  if (timer_.check_time_limit()) {
+    CUOPT_LOG_INFO("Time limit reached after presolve");
+    solution_t<i_t, f_t> sol(*context.problem_ptr);
+    context.stats.total_solve_time = timer_.elapsed_time();
+    context.problem_ptr->post_process_solution(sol);
+    return sol;
+  }
+
   // if the problem was reduced to a LP: run concurrent LP
   if (run_presolve && context.problem_ptr->n_integer_vars == 0) {
     CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP");
@@ -285,6 +293,14 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
                 std::placeholders::_5,
                 std::placeholders::_6);
 
+    if (timer_.check_time_limit()) {
+      CUOPT_LOG_INFO("Time limit reached during B&B setup");
+      solution_t<i_t, f_t> sol(*context.problem_ptr);
+      context.stats.total_solve_time = timer_.elapsed_time();
+      context.problem_ptr->post_process_solution(sol);
+      return sol;
+    }
+
     // Fork a thread for branch and bound
     // std::async and std::future allow us to get the return value of bb::solve()
     // without having to manually manage the thread
diff --git a/cpp/src/pdlp/swap_and_resize_helper.cuh b/cpp/src/pdlp/swap_and_resize_helper.cuh
index 0d4e2a6d0..6ed05df24 100644
--- a/cpp/src/pdlp/swap_and_resize_helper.cuh
+++ b/cpp/src/pdlp/swap_and_resize_helper.cuh
@@ -16,6 +16,7 @@
 #include <thrust/iterator/permutation_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
+#include <thrust/tuple.h>
 #include <thrust/universal_vector.h>
 
 #include <cuda/std/tuple>
diff --git a/cpp/src/pdlp/utils.cuh b/cpp/src/pdlp/utils.cuh
index 33625f768..138c9c2ab 100644
--- a/cpp/src/pdlp/utils.cuh
+++ b/cpp/src/pdlp/utils.cuh
@@ -25,6 +25,7 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/transform_reduce.h>
+#include <thrust/tuple.h>
 
 namespace cuopt::linear_programming::detail {
 
diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp
index 943ce463a..36a465905 100644
--- a/cpp/src/utilities/copy_helpers.hpp
+++ b/cpp/src/utilities/copy_helpers.hpp
@@ -15,6 +15,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/transform.h>
+#include <thrust/tuple.h>
 #include <thrust/universal_vector.h>
 
 #include <cuda/std/functional>
diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh
index ae50e9967..946099648 100644
--- a/cpp/src/utilities/cuda_helpers.cuh
+++ b/cpp/src/utilities/cuda_helpers.cuh
@@ -10,6 +10,7 @@
 #include <utilities/macros.cuh>
 
 #include <thrust/host_vector.h>
+#include <thrust/tuple.h>
 #include <mutex>
 #include <raft/core/device_span.hpp>
 #include <raft/util/cuda_utils.cuh>

From 67240f57035bde789b1e5512c116c00fd46c5baf Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 16:51:09 +0000
Subject: [PATCH 13/18] review comment

---
 cpp/src/mip_heuristics/solve.cu | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 3c6ee21a4..44f0e7468 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -156,7 +156,9 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   if (timer.check_time_limit()) {
     CUOPT_LOG_INFO("Time limit reached before main solve");
     detail::solution_t<i_t, f_t> sol(problem);
-    return sol.get_solution(false, solver.get_solver_stats(), false);
+    auto stats             = solver.get_solver_stats();
+    stats.total_solve_time = timer.elapsed_time();
+    return sol.get_solution(false, stats, false);
   }
   auto scaled_sol                 = solver.run_solver();
   bool is_feasible_before_scaling = scaled_sol.get_feasible();

From a15424fc250b6e649b8b0b0015641d9771c9b426 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Tue, 24 Feb 2026 17:02:33 +0000
Subject: [PATCH 14/18] fix thrust solve

---
 cpp/src/routing/utilities/check_input.cu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/src/routing/utilities/check_input.cu b/cpp/src/routing/utilities/check_input.cu
index b2f035ee5..e902f2d46 100644
--- a/cpp/src/routing/utilities/check_input.cu
+++ b/cpp/src/routing/utilities/check_input.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -17,6 +17,7 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/logical.h>
 #include <thrust/pair.h>
+#include <thrust/tuple.h>
 #include <cuda/std/functional>
 
 #include <unordered_set>

From 7b3d40c85104917e20761a91e8a6ee49dc796fb1 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Wed, 25 Feb 2026 14:45:05 +0000
Subject: [PATCH 15/18] fix handle being destroyed before other gpu structures
 in early heuristics

---
 cpp/src/mip_heuristics/early_heuristic.cuh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
index 47d50daa4..26d9e8431 100644
--- a/cpp/src/mip_heuristics/early_heuristic.cuh
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -87,6 +87,10 @@ class early_heuristic_t {
     if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); }
   }
 
+  // handle_ must be declared before problem_ptr_/solution_ptr_ so it outlives them
+  // (C++ destroys members in reverse declaration order)
+  raft::handle_t handle_;
+
   std::unique_ptr<problem_t<i_t, f_t>> problem_ptr_;
   std::unique_ptr<solution_t<i_t, f_t>> solution_ptr_;
 
@@ -96,7 +100,6 @@ class early_heuristic_t {
 
   early_incumbent_callback_t<f_t> incumbent_callback_;
   std::chrono::steady_clock::time_point start_time_;
-  raft::handle_t handle_;
 };
 
 }  // namespace cuopt::linear_programming::detail

From 68e07bb649ad15cd1dc427446b5e6e395a5ad85e Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Wed, 25 Feb 2026 15:26:32 +0000
Subject: [PATCH 16/18] fix build

---
 cpp/src/mip_heuristics/solve.cu | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index ccd04ff05..4592037bb 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -57,10 +57,11 @@ template <typename f_t>
 static void invoke_solution_callbacks(
   const std::vector<internals::base_solution_callback_t*>& mip_callbacks,
   f_t objective,
-  std::vector<f_t>& assignment)
+  std::vector<f_t>& assignment,
+  f_t bound)
 {
   std::vector<f_t> obj_vec   = {objective};
-  std::vector<f_t> bound_vec = {std::numeric_limits<f_t>::infinity()};
+  std::vector<f_t> bound_vec = {bound};
   for (auto callback : mip_callbacks) {
     if (callback != nullptr &&
         callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) {
@@ -313,16 +314,18 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
 
     bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
                         op_problem.get_n_integers() > 0;
+    f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20;
     if (run_early_fj) {
       auto early_fj_callback = [&early_best_objective,
                                 &early_callback_mutex,
-                                mip_callbacks = settings.get_mip_callbacks()](
-                                 f_t objective, const std::vector<f_t>& assignment) {
+                                mip_callbacks = settings.get_mip_callbacks(),
+                                no_bound](
+                                 f_t solver_obj, f_t user_obj, const std::vector<f_t>& assignment) {
         std::lock_guard<std::mutex> lock(early_callback_mutex);
-        if (objective >= early_best_objective.load()) { return; }
-        early_best_objective.store(objective);
+        if (solver_obj >= early_best_objective.load()) { return; }
+        early_best_objective.store(solver_obj);
         auto user_assignment = assignment;
-        invoke_solution_callbacks(mip_callbacks, objective, user_assignment);
+        invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound);
       };
 
       // Start early CPUFJ on original problem (will restart on presolved problem after Papilo)

From 352cd1564e00d7d8a612e5a8af653fa9b8e9f3e0 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Wed, 25 Feb 2026 17:00:06 +0000
Subject: [PATCH 17/18] fix cudaSetDevice bug

---
 cpp/src/mip_heuristics/early_heuristic.cuh             | 6 +++++-
 cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh
index 26d9e8431..333fde195 100644
--- a/cpp/src/mip_heuristics/early_heuristic.cuh
+++ b/cpp/src/mip_heuristics/early_heuristic.cuh
@@ -38,6 +38,8 @@ class early_heuristic_t {
                     early_incumbent_callback_t<f_t> incumbent_callback)
     : incumbent_callback_(std::move(incumbent_callback))
   {
+    RAFT_CUDA_TRY(cudaGetDevice(&device_id_));
+
     // Build and preprocess on the original handle, then copy onto our own handle
     // so the derived solver can run on a dedicated stream (prevents graph capture conflicts).
     problem_t<i_t, f_t> temp_problem(op_problem, tolerances, false);
@@ -68,7 +70,7 @@ class early_heuristic_t {
     if (solver_obj >= best_objective_) { return; }
     best_objective_ = solver_obj;
 
-    RAFT_CUDA_TRY(cudaSetDevice(handle_.get_device()));
+    RAFT_CUDA_TRY(cudaSetDevice(device_id_));
     auto stream = handle_.get_stream();
     rmm::device_uvector<f_t> d_assignment(assignment.size(), stream);
     raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream);
@@ -87,6 +89,8 @@ class early_heuristic_t {
     if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); }
   }
 
+  int device_id_{0};
+
   // handle_ must be declared before problem_ptr_/solution_ptr_ so it outlives them
   // (C++ destroys members in reverse declaration order)
   raft::handle_t handle_;
diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
index f9b552c9f..55726421d 100644
--- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
+++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu
@@ -63,7 +63,7 @@ void early_gpufj_t<i_t, f_t>::start()
 template <typename i_t, typename f_t>
 void early_gpufj_t<i_t, f_t>::run_worker()
 {
-  RAFT_CUDA_TRY(cudaSetDevice(this->problem_ptr_->handle_ptr->get_device()));
+  RAFT_CUDA_TRY(cudaSetDevice(this->device_id_));
   fj_ptr_->solve(*this->solution_ptr_);
 }
 

From e4b8166784a41ee63575d0dd2e948ef9cf45b6b5 Mon Sep 17 00:00:00 2001
From: Alice Boucher <yboucher@nvidia.com>
Date: Wed, 25 Feb 2026 17:14:46 +0000
Subject: [PATCH 18/18] fix empty tests

---
 cpp/src/mip_heuristics/solve.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index 4592037bb..f7c3063c9 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -313,7 +313,7 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
     std::mutex early_callback_mutex;
 
     bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC &&
-                        op_problem.get_n_integers() > 0;
+                        op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0;
     f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20;
     if (run_early_fj) {
       auto early_fj_callback = [&early_best_objective,