From 49030ad9ea39608cd170f2e19b32a781d6b55975 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 23 Feb 2026 18:39:16 +0100 Subject: [PATCH 01/18] squash changes --- cpp/src/branch_and_bound/branch_and_bound.cpp | 35 +++--- cpp/src/branch_and_bound/branch_and_bound.hpp | 13 ++- cpp/src/mip_heuristics/CMakeLists.txt | 4 +- .../mip_heuristics/diversity/population.cu | 8 +- cpp/src/mip_heuristics/early_heuristic.cuh | 94 +++++++++++++++ .../feasibility_jump/early_cpufj.cu | 79 +++++++++++++ .../feasibility_jump/early_cpufj.cuh | 39 +++++++ .../feasibility_jump/early_gpufj.cu | 93 +++++++++++++++ .../feasibility_jump/early_gpufj.cuh | 45 ++++++++ .../feasibility_jump/feasibility_jump.cu | 18 ++- .../feasibility_jump/feasibility_jump.cuh | 9 ++ .../mip_heuristics/feasibility_jump/fj_cpu.cu | 62 +++++++--- .../feasibility_jump/fj_cpu.cuh | 9 ++ cpp/src/mip_heuristics/solve.cu | 107 +++++++++++++++++- cpp/src/mip_heuristics/solver.cu | 27 ++++- cpp/src/mip_heuristics/solver_context.cuh | 10 +- 16 files changed, 605 insertions(+), 47 deletions(-) create mode 100644 cpp/src/mip_heuristics/early_heuristic.cuh create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu create mode 100644 cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6ce9a4f4d..a4e43a1ec 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -898,7 +898,7 @@ struct nondeterministic_policy_t : tree_update_policy_t { { } - f_t upper_bound() const override { return bnb.upper_bound_.load(); } + f_t upper_bound() const override { return bnb.get_cutoff(); } void update_pseudo_costs(mip_node_t* node, f_t leaf_obj) override { @@ -1316,10 +1316,11 @@ dual::status_t branch_and_bound_t::solve_node_lp( simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); + f_t cutoff = get_cutoff(); if (original_lp_.objective_is_integral) { - lp_settings.cut_off = std::ceil(upper_bound_ - settings_.integer_tol) + settings_.dual_tol; + lp_settings.cut_off = std::ceil(cutoff - settings_.integer_tol) + settings_.dual_tol; } else { - lp_settings.cut_off = upper_bound_ + settings_.dual_tol; + lp_settings.cut_off = cutoff + settings_.dual_tol; } lp_settings.inside_mip = 2; lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); @@ -1426,7 +1427,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_tlower_bound = lower_bound; - if (lower_bound > upper_bound) { + if (lower_bound > get_cutoff()) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); search_tree_.update(node_ptr, node_status_t::FATHOMED); worker->recompute_basis = true; @@ -1536,7 +1537,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); worker->lower_bound = lower_bound; - if (node_ptr->lower_bound > upper_bound) { + if (node_ptr->lower_bound > get_cutoff()) { worker->recompute_basis = true; worker->recompute_bounds = true; continue; @@ -1675,7 +1676,7 @@ void branch_and_bound_t::run_scheduler() std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound) { + if (get_cutoff() < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node( @@ -1699,7 +1700,7 @@ void branch_and_bound_t::run_scheduler() std::optional*> start_node = node_queue_.pop_diving(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound || + if (get_cutoff() < start_node.value()->lower_bound || start_node.value()->depth < diving_settings.min_node_depth) { continue; } @@ -1767,7 +1768,7 @@ void branch_and_bound_t::single_threaded_solve() std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound) { + if (get_cutoff() < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node( @@ -2189,12 +2190,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return mip_status_t::NUMERICAL; } - if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) { + if (settings_.reduced_cost_strengthening >= 1 && get_cutoff() < last_upper_bound) { mutex_upper_.lock(); - last_upper_bound = upper_bound_.load(); + last_upper_bound = get_cutoff(); std::vector lower_bounds; std::vector upper_bounds; - find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds); + find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds); mutex_upper_.unlock(); mutex_original_lp_.lock(); original_lp_.lower = lower_bounds; @@ -2372,10 +2373,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return solver_status_; } - if (settings_.reduced_cost_strengthening >= 2 && upper_bound_.load() < last_upper_bound) { + if (settings_.reduced_cost_strengthening >= 2 && get_cutoff() < last_upper_bound) { std::vector lower_bounds; std::vector upper_bounds; - i_t num_fixed = find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds); + i_t num_fixed = find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds); if (num_fixed > 0) { std::vector bounds_changed(original_lp_.num_cols, true); std::vector row_sense; @@ -2479,7 +2480,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound) { + if (get_cutoff() < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node( @@ -3321,7 +3322,7 @@ void branch_and_bound_t::deterministic_sort_replay_events( template void branch_and_bound_t::deterministic_prune_worker_nodes_vs_incumbent() { - f_t upper_bound = upper_bound_.load(); + f_t upper_bound = get_cutoff(); for (auto& worker : *deterministic_workers_) { // Check nodes in plunge stack - filter in place @@ -3457,14 +3458,14 @@ void branch_and_bound_t::deterministic_populate_diving_heap() const int num_diving = deterministic_diving_workers_->size(); constexpr int target_nodes_per_worker = 10; const int target_total = num_diving * target_nodes_per_worker; - f_t upper_bound = upper_bound_.load(); + f_t cutoff = get_cutoff(); // Collect candidate nodes from BFS worker backlog heaps std::vector*, f_t>> candidates; for (auto& worker : *deterministic_workers_) { for (auto* node : worker.backlog.data()) { - if (node->lower_bound < upper_bound) { + if (node->lower_bound < cutoff) { f_t score = node->objective_estimate; if (score >= inf) { score = node->lower_bound; } candidates.push_back({node, score}); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index a13d5cedc..fac413024 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -108,6 +108,14 @@ class branch_and_bound_t { bool stop_for_time_limit(mip_solution_t& solution); + // Set a cutoff bound from an external source (e.g., early FJ during presolve). + // Used for node pruning and reduced cost strengthening but NOT for gap computation. + // Unlike upper_bound_, this does not imply a verified incumbent solution exists. + void set_initial_cutoff(f_t bound) { initial_cutoff_ = bound; } + + // Effective cutoff for node pruning: min of verified incumbent and external cutoff. + f_t get_cutoff() const { return std::min(upper_bound_.load(), initial_cutoff_); } + // Repair a low-quality solution from the heuristics. bool repair_solution(const std::vector& leaf_edge_norms, const std::vector& potential_solution, @@ -169,9 +177,12 @@ class branch_and_bound_t { // Mutex for upper bound omp_mutex_t mutex_upper_; - // Global variable for upper bound + // Verified incumbent bound (only set when B&B has an actual integer-feasible solution). omp_atomic_t upper_bound_; + // External cutoff from early heuristics (for pruning only, no verified solution). + f_t initial_cutoff_{std::numeric_limits::infinity()}; + // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 538e3c49a..7c1035fd1 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -40,7 +40,9 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu - ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_cpufj.cu + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_gpufj.cu) # Choose which files to include based on build mode if(BUILD_LP_ONLY) diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index c2138c91c..ba23d3fb0 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -267,9 +267,9 @@ void population_t::invoke_get_solution_callback( f_t user_bound = context.stats.get_solution_bound(); solution_t temp_sol(sol); problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { + if (context.settings.mip_scaling && context.scaling != nullptr) { rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling.unscale_solutions(temp_sol.assignment, dummy); + context.scaling->unscale_solutions(temp_sol.assignment, dummy); } if (problem_ptr->has_papilo_presolve_data()) { problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); @@ -346,7 +346,9 @@ void population_t::run_solution_callbacks(solution_t& sol) incumbent_assignment.size(), sol.handle_ptr->get_stream()); - if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); } + if (context.settings.mip_scaling && context.scaling != nullptr) { + context.scaling->scale_solutions(incumbent_assignment); + } bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); if (!is_valid) { return; } cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh new file mode 100644 index 000000000..5ea7f346a --- /dev/null +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -0,0 +1,94 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +using early_incumbent_callback_t = + std::function& assignment)>; + +// CRTP base for early heuristics that run on the original (or papilo-presolved) problem +// during presolve to find incumbents as early as possible. +// Derived classes implement start() and stop(). +template +class early_heuristic_t { + public: + early_heuristic_t(const optimization_problem_t& op_problem, + const typename mip_solver_settings_t::tolerances_t& tolerances, + early_incumbent_callback_t incumbent_callback) + : incumbent_callback_(std::move(incumbent_callback)) + { + problem_ptr_ = std::make_unique>(op_problem, tolerances, false); + problem_ptr_->preprocess_problem(); + + solution_ptr_ = std::make_unique>(*problem_ptr_); + thrust::fill(problem_ptr_->handle_ptr->get_thrust_policy(), + solution_ptr_->assignment.begin(), + solution_ptr_->assignment.end(), + f_t{0}); + solution_ptr_->clamp_within_bounds(); + } + + bool solution_found() const { return solution_found_; } + f_t get_best_objective() const { return best_objective_; } + void set_best_objective(f_t obj) { best_objective_ = obj; } + const std::vector& get_best_assignment() const { return best_assignment_; } + + protected: + ~early_heuristic_t() = default; + + void try_update_best(f_t user_obj, const std::vector& assignment) + { + if (user_obj >= best_objective_) { return; } + best_objective_ = user_obj; + + auto* handle_ptr = problem_ptr_->handle_ptr; + RAFT_CUDA_TRY(cudaSetDevice(handle_ptr->get_device())); + rmm::device_uvector d_assignment(assignment.size(), handle_ptr->get_stream()); + raft::copy(d_assignment.data(), assignment.data(), assignment.size(), handle_ptr->get_stream()); + problem_ptr_->post_process_assignment(d_assignment); + auto user_assignment = cuopt::host_copy(d_assignment, handle_ptr->get_stream()); + + best_assignment_ = user_assignment; + solution_found_ = true; + double elapsed = + std::chrono::duration(std::chrono::steady_clock::now() - start_time_).count(); + CUOPT_LOG_INFO("Early heuristics (%s) lowered the primal bound. Objective %g. Time %.2f", + Derived::name(), + user_obj, + elapsed); + if (incumbent_callback_) { incumbent_callback_(user_obj, user_assignment); } + } + + std::unique_ptr> problem_ptr_; + std::unique_ptr> solution_ptr_; + + bool solution_found_{false}; + f_t best_objective_{std::numeric_limits::infinity()}; + std::vector best_assignment_; + + early_incumbent_callback_t incumbent_callback_; + std::chrono::steady_clock::time_point start_time_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu new file mode 100644 index 000000000..23f7a07da --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu @@ -0,0 +1,79 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include "early_cpufj.cuh" + +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +early_cpufj_t::early_cpufj_t( + const optimization_problem_t& op_problem, + const typename mip_solver_settings_t::tolerances_t& tolerances, + early_incumbent_callback_t incumbent_callback) + : early_heuristic_t>( + op_problem, tolerances, std::move(incumbent_callback)) +{ +} + +template +early_cpufj_t::~early_cpufj_t() +{ + stop(); +} + +template +void early_cpufj_t::start() +{ + if (cpu_fj_thread_) { return; } + + this->start_time_ = std::chrono::steady_clock::now(); + + cpu_fj_thread_ = std::make_unique>(); + cpu_fj_thread_->fj_cpu = init_fj_cpu_standalone( + *this->problem_ptr_, *this->solution_ptr_, preemption_flag_); + cpu_fj_thread_->time_limit = std::numeric_limits::infinity(); + + cpu_fj_thread_->fj_cpu->log_prefix = "[Early CPUFJ] "; + + cpu_fj_thread_->fj_cpu->improvement_callback = + [this](f_t solver_obj, const std::vector& assignment, double) { + f_t user_obj = this->problem_ptr_->get_user_obj_from_solver_obj(solver_obj); + this->try_update_best(user_obj, assignment); + }; + + cpu_fj_thread_->start_cpu_solver(); +} + +template +void early_cpufj_t::stop() +{ + if (!cpu_fj_thread_) { return; } + + preemption_flag_.store(true); + cpu_fj_thread_->stop_cpu_solver(); + cpu_fj_thread_->wait_for_cpu_solver(); + + CUOPT_LOG_DEBUG("[Early CPUFJ] Stopped after %d iterations, solution_found=%d", + cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0, + this->solution_found_); + + cpu_fj_thread_.reset(); +} + +#if MIP_INSTANTIATE_FLOAT +template class early_cpufj_t; +#endif + +#if MIP_INSTANTIATE_DOUBLE +template class early_cpufj_t; +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh new file mode 100644 index 000000000..911e84655 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh @@ -0,0 +1,39 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include +#include + +namespace cuopt::linear_programming::detail { + +template +struct cpu_fj_thread_t; + +template +class early_cpufj_t : public early_heuristic_t> { + public: + early_cpufj_t(const optimization_problem_t& op_problem, + const typename mip_solver_settings_t::tolerances_t& tolerances, + early_incumbent_callback_t incumbent_callback); + + ~early_cpufj_t(); + + static constexpr const char* name() { return "CPUFJ"; } + + void start(); + void stop(); + + private: + std::unique_ptr> cpu_fj_thread_; + std::atomic preemption_flag_{false}; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu new file mode 100644 index 000000000..adc010c16 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -0,0 +1,93 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include "early_gpufj.cuh" + +#include +#include +#include +#include + +#include + +#include + +namespace cuopt::linear_programming::detail { + +template +early_gpufj_t::early_gpufj_t(const optimization_problem_t& op_problem, + const mip_solver_settings_t& settings, + early_incumbent_callback_t incumbent_callback) + : early_heuristic_t>( + op_problem, settings.get_tolerances(), std::move(incumbent_callback)) +{ + context_ptr_ = std::make_unique>( + this->problem_ptr_->handle_ptr, this->problem_ptr_.get(), settings, nullptr); +} + +template +early_gpufj_t::~early_gpufj_t() +{ + stop(); +} + +template +void early_gpufj_t::start() +{ + if (worker_thread_) { return; } + + this->start_time_ = std::chrono::steady_clock::now(); + + fj_settings_t fj_settings; + fj_settings.mode = fj_mode_t::EXIT_NON_IMPROVING; + fj_settings.n_of_minimums_for_exit = std::numeric_limits::max(); + fj_settings.time_limit = std::numeric_limits::infinity(); + fj_settings.iteration_limit = std::numeric_limits::max(); + fj_settings.update_weights = true; + fj_settings.feasibility_run = false; + + fj_ptr_ = std::make_unique>(*context_ptr_, fj_settings); + + fj_ptr_->improvement_callback = [this](f_t user_obj, const std::vector& h_assignment) { + this->try_update_best(user_obj, h_assignment); + }; + + worker_thread_ = std::make_unique(&early_gpufj_t::run_worker, this); +} + +template +void early_gpufj_t::run_worker() +{ + RAFT_CUDA_TRY(cudaSetDevice(this->problem_ptr_->handle_ptr->get_device())); + fj_ptr_->solve(*this->solution_ptr_); +} + +template +void early_gpufj_t::stop() +{ + if (!worker_thread_) { return; } + + context_ptr_->preempt_heuristic_solver_.store(true); + + if (worker_thread_->joinable()) { worker_thread_->join(); } + + CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d", + this->solution_found_); + + fj_ptr_.reset(); + worker_thread_.reset(); +} + +#if MIP_INSTANTIATE_FLOAT +template class early_gpufj_t; +#endif + +#if MIP_INSTANTIATE_DOUBLE +template class early_gpufj_t; +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh new file mode 100644 index 000000000..4a7769143 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh @@ -0,0 +1,45 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include +#include + +namespace cuopt::linear_programming::detail { + +template +class fj_t; + +template +struct mip_solver_context_t; + +template +class early_gpufj_t : public early_heuristic_t> { + public: + early_gpufj_t(const optimization_problem_t& op_problem, + const mip_solver_settings_t& settings, + early_incumbent_callback_t incumbent_callback); + + ~early_gpufj_t(); + + static constexpr const char* name() { return "GPUFJ"; } + + void start(); + void stop(); + + private: + void run_worker(); + + std::unique_ptr> context_ptr_; + std::unique_ptr> fj_ptr_; + std::unique_ptr worker_thread_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index e9cf0760d..b248351a3 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -879,7 +879,7 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) // every now and then, ensure external solutions are added to the population // this is done here because FJ is called within FP and also after recombiners // so FJ is one of the most inner and most frequent functions to be called - if (steps % 10000 == 0) { + if (steps % 10000 == 0 && context.diversity_manager_ptr != nullptr) { context.diversity_manager_ptr->get_population_pointer() ->add_external_solutions_to_population(); } @@ -933,6 +933,22 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) bool is_feasible = solution.compute_feasibility(); solution.handle_ptr->sync_stream(); + // Invoke improvement callback if we have a better feasible solution + if (is_feasible && improvement_callback) { + f_t user_obj = solution.get_user_objective(); + if (user_obj < last_reported_objective_) { + last_reported_objective_ = user_obj; + // Copy assignment to host for callback + std::vector h_assignment(solution.assignment.size()); + raft::copy(h_assignment.data(), + solution.assignment.data(), + solution.assignment.size(), + climber_stream); + climber_stream.synchronize(); + improvement_callback(user_obj, h_assignment); + } + } + if (limit_reached) { break; } if (is_feasible) { diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh index e9040a759..50b451a86 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh @@ -19,11 +19,17 @@ #include +#include + #define FJ_DEBUG_LOAD_BALANCING 0 #define FJ_SINGLE_STEP 0 namespace cuopt::linear_programming::detail { +template +using fj_improvement_callback_t = + std::function& assignment)>; + static constexpr int TPB_resetmoves = raft::WarpSize * 4; static constexpr int TPB_heavyvars = raft::WarpSize * 16; static constexpr int TPB_heavycstrs = raft::WarpSize * 4; @@ -628,6 +634,9 @@ class fj_t { std::vector> climbers; rmm::device_uvector climber_views; fj_settings_t settings; + + fj_improvement_callback_t improvement_callback; + f_t last_reported_objective_{std::numeric_limits::infinity()}; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index 4d567c9ec..87f47d254 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -790,7 +790,7 @@ static void apply_move(fj_cpu_climber_t& fj_cpu, if (fj_cpu.improvement_callback) { double current_work_units = fj_cpu.work_units_elapsed.load(std::memory_order_acquire); fj_cpu.improvement_callback( - fj_cpu.h_best_objective, fj_cpu.h_assignment, current_work_units); + fj_cpu.h_incumbent_objective, fj_cpu.h_assignment, current_work_units); } fj_cpu.feasible_found = true; } @@ -1402,10 +1402,8 @@ std::unique_ptr> fj_t::create_cpu_climber( } template -bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) +static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) { - raft::common::nvtx::range scope("fj_cpu"); - i_t local_mins = 0; auto loop_start = std::chrono::high_resolution_clock::now(); auto time_limit = std::chrono::milliseconds((int)(in_time_limit * 1000)); @@ -1489,10 +1487,9 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l for (size_t i = 0; i < fj_cpu.cached_mtm_moves.size(); i++) fj_cpu.cached_mtm_moves[i].first = 0; } - thrust::tie(move, score) = - find_mtm_move_viol(fj_cpu, 1, true); // pick a single random violated constraint - i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0; - f_t delta = move.var_idx >= 0 ? move.value : 0; + thrust::tie(move, score) = find_mtm_move_viol(fj_cpu, 1, true); // pick a single random violated constraint + i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0; + f_t delta = move.var_idx >= 0 ? move.value : 0; apply_move(fj_cpu, var_idx, delta, true); ++local_mins; ++fj_cpu.n_local_minima_window; @@ -1507,8 +1504,7 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l if (fj_cpu.iterations % fj_cpu.log_interval == 0) { CUOPT_LOG_TRACE( "%sCPUFJ iteration: %d/%d, local mins: %d, best_objective: %g, viol: %zu, obj weight %g, " - "maxw " - "%g", + "maxw %g", fj_cpu.log_prefix.c_str(), fj_cpu.iterations, fj_cpu.settings.iteration_limit != std::numeric_limits::max() @@ -1537,15 +1533,10 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l if (fj_cpu.iterations % 100 == 0 && fj_cpu.iterations > 0) { // Collect memory statistics auto [loads, stores] = fj_cpu.memory_aggregator.collect(); - - double biased_work = (loads + stores) * fj_cpu.work_unit_bias / 1e10; + double biased_work = (loads + stores) * fj_cpu.work_unit_bias / 1e10; fj_cpu.work_units_elapsed += biased_work; if (fj_cpu.producer_sync != nullptr) { fj_cpu.producer_sync->notify_progress(); } - - CUOPT_LOG_TRACE("CPUFJ work units: %f incumbent %g", - fj_cpu.work_units_elapsed.load(std::memory_order_relaxed), - fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective)); } cuopt_func_call(sanity_checks(fj_cpu)); @@ -1569,6 +1560,13 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l return fj_cpu.feasible_found; } +template +bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) +{ + raft::common::nvtx::range scope("fj_cpu"); + return cpufj_solve_loop(fj_cpu, in_time_limit); +} + template cpu_fj_thread_t::~cpu_fj_thread_t() { @@ -1578,8 +1576,7 @@ cpu_fj_thread_t::~cpu_fj_thread_t() template void cpu_fj_thread_t::run_worker() { - bool solution_found = fj_ptr->cpu_solve(*fj_cpu, time_limit); - cpu_fj_solution_found = solution_found; + cpu_fj_solution_found = cpufj_solve_loop(*fj_cpu, time_limit); } template @@ -1601,14 +1598,43 @@ void cpu_fj_thread_t::stop_cpu_solver() fj_cpu->halted = true; } +template +std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings) +{ + raft::common::nvtx::range scope("init_fj_cpu_standalone"); + + auto fj_cpu = std::make_unique>(preemption_flag); + + std::vector default_weights(problem.n_constraints, 1.0); + init_fj_cpu(*fj_cpu, solution, default_weights, default_weights, 0.0); + fj_cpu->settings = settings; + fj_cpu->settings.seed = cuopt::seed_generator::get_seed(); + + return fj_cpu; +} + #if MIP_INSTANTIATE_FLOAT template class fj_t; template class cpu_fj_thread_t; +template std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings); #endif #if MIP_INSTANTIATE_DOUBLE template class fj_t; template class cpu_fj_thread_t; +template std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh index 7dcc8d39b..3263609a2 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh @@ -210,4 +210,13 @@ struct cpu_fj_thread_t : public cpu_worker_thread_base_t* fj_ptr{nullptr}; }; +// Standalone CPUFJ init for running without full fj_t infrastructure (avoids GPU allocations). +// Used for early CPUFJ during presolve. +template +std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings = fj_settings_t{}); + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 41e272992..54d7189e7 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -7,6 +7,8 @@ #include +#include +#include #include #include #include @@ -51,10 +53,29 @@ static void init_handler(const raft::handle_t* handle_ptr) handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); } +template +static void invoke_solution_callbacks( + const std::vector& mip_callbacks, + f_t objective, + std::vector& assignment) +{ + std::vector obj_vec = {objective}; + std::vector bound_vec = {std::numeric_limits::infinity()}; + for (auto callback : mip_callbacks) { + if (callback != nullptr && + callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { + auto get_sol_callback = static_cast(callback); + get_sol_callback->get_solution( + assignment.data(), obj_vec.data(), bound_vec.data(), get_sol_callback->get_user_data()); + } + } +} + template mip_solution_t run_mip(detail::problem_t& problem, mip_solver_settings_t const& settings, - timer_t& timer) + timer_t& timer, + f_t initial_cutoff = std::numeric_limits::infinity()) { raft::common::nvtx::range fun_scope("run_mip"); auto constexpr const running_mip = true; @@ -156,6 +177,31 @@ mip_solution_t run_mip(detail::problem_t& problem, auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; + solver.context.initial_cutoff = initial_cutoff; + + // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache). + // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff. + std::unique_ptr> early_cpufj; + bool run_early_cpufj = problem.has_papilo_presolve_data() && + settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && + problem.original_problem_ptr->get_n_integers() > 0; + if (run_early_cpufj) { + auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; + auto mip_callbacks = settings.get_mip_callbacks(); + auto incumbent_callback = + [presolver_ptr, mip_callbacks](f_t objective, const std::vector& assignment) { + std::vector user_assignment; + presolver_ptr->uncrush_primal_solution(assignment, user_assignment); + invoke_solution_callbacks(mip_callbacks, objective, user_assignment); + }; + early_cpufj = std::make_unique>( + *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback); + early_cpufj->set_best_objective(initial_cutoff); + early_cpufj->start(); + solver.context.early_cpufj_ptr = early_cpufj.get(); + CUOPT_LOG_DEBUG("Started early CPUFJ on papilo-presolved problem during cuOpt presolve"); + } + if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } // at this point we need to compute the feasibility on the original problem not the presolved one bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); @@ -251,6 +297,43 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, if (!run_presolve) { CUOPT_LOG_INFO("Presolve is disabled, skipping"); } + // Start early FJ (CPU and GPU) during presolve to find incumbents ASAP + // Only run if presolve is enabled (gives FJ time to find solutions) + // and we're not in deterministic mode + std::unique_ptr> early_cpufj; + std::unique_ptr> early_gpufj; + + // Track best incumbent found during presolve (shared across CPU and GPU FJ) + std::atomic early_best_objective{std::numeric_limits::infinity()}; + std::mutex early_callback_mutex; + + bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && + op_problem.get_n_integers() > 0; + if (run_early_fj) { + auto early_fj_callback = [&early_best_objective, + &early_callback_mutex, + mip_callbacks = settings.get_mip_callbacks()]( + f_t objective, const std::vector& assignment) { + std::lock_guard lock(early_callback_mutex); + if (objective >= early_best_objective.load()) { return; } + early_best_objective.store(objective); + auto user_assignment = assignment; + invoke_solution_callbacks(mip_callbacks, objective, user_assignment); + }; + + // Start early CPUFJ on original problem (will restart on presolved problem after Papilo) + early_cpufj = std::make_unique>( + op_problem, settings.get_tolerances(), early_fj_callback); + early_cpufj->start(); + CUOPT_LOG_INFO("Started early CPUFJ on original problem"); + + // Start early GPU FJ (uses GPU while CPU is busy with Papilo) + early_gpufj = + std::make_unique>(op_problem, settings, early_fj_callback); + early_gpufj->start(); + CUOPT_LOG_INFO("Started early GPUFJ during presolve"); + } + auto constexpr const dual_postsolve = false; if (run_presolve) { detail::sort_csr(op_problem); @@ -288,12 +371,32 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } CUOPT_LOG_INFO("Papilo presolve time: %.2f", presolve_time); } + + // Stop early GPU FJ now that Papilo presolve is complete + if (early_gpufj) { + early_gpufj->stop(); + if (early_gpufj->solution_found()) { + CUOPT_LOG_INFO("Early GPU FJ found incumbent with objective %.6e during presolve", + early_gpufj->get_best_objective()); + } + early_gpufj.reset(); // Free GPU memory + } + + if (early_cpufj && run_presolve && presolve_result.has_value()) { + early_cpufj->stop(); + if (early_cpufj->solution_found()) { + CUOPT_LOG_INFO("Early CPUFJ (original) found incumbent with objective %.6e", + early_cpufj->get_best_objective()); + } + early_cpufj.reset(); + } + if (settings.user_problem_file != "") { CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str()); op_problem.write_to_mps(settings.user_problem_file); } - auto sol = run_mip(problem, settings, timer); + auto sol = run_mip(problem, settings, timer, early_best_objective.load()); if (run_presolve) { auto status_to_skip = sol.get_termination_status() == mip_termination_status_t::TimeLimit || diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 3e2251171..92e52a0f8 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -48,7 +49,7 @@ mip_solver_t::mip_solver_t(const problem_t& op_problem, context(op_problem.handle_ptr, const_cast*>(&op_problem), solver_settings, - scaling), + &scaling), timer_(timer) { init_handler(op_problem.handle_ptr); @@ -113,6 +114,17 @@ solution_t mip_solver_t::run_solver() ? std::numeric_limits::infinity() : timer_.remaining_time(); bool presolve_success = run_presolve ? dm.run_presolve(time_limit) : true; + + // Stop early CPUFJ after cuopt presolve (probing cache) but before main solve + if (context.early_cpufj_ptr) { + context.early_cpufj_ptr->stop(); + if (context.early_cpufj_ptr->solution_found()) { + f_t obj = context.early_cpufj_ptr->get_best_objective(); + context.initial_cutoff = std::min(context.initial_cutoff, obj); + CUOPT_LOG_INFO("Early CPUFJ found incumbent with objective %g during presolve", obj); + } + } + if (!presolve_success) { CUOPT_LOG_INFO("Problem proven infeasible in presolve"); solution_t sol(*context.problem_ptr); @@ -251,7 +263,18 @@ solution_t mip_solver_t::run_solver() branch_and_bound = std::make_unique>( branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start()); context.branch_and_bound_ptr = branch_and_bound.get(); - auto* stats_ptr = &context.stats; + + // Set cutoff from early FJ if available + if (context.initial_cutoff < std::numeric_limits::infinity()) { + f_t internal_cutoff = + context.problem_ptr->get_solver_obj_from_user_obj(context.initial_cutoff); + branch_and_bound->set_initial_cutoff(internal_cutoff); + CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics (internal: %.6e)", + context.initial_cutoff, + internal_cutoff); + } + + auto* stats_ptr = &context.stats; branch_and_bound->set_user_bound_callback( [stats_ptr](f_t user_bound) { stats_ptr->set_solution_bound(user_bound); }); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index baac1dd9d..d1be34ea6 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -28,6 +28,9 @@ namespace cuopt::linear_programming::detail { template class diversity_manager_t; +template +class early_cpufj_t; + // Aggregate structure containing the global context of the solving process for convenience: // The current problem, user settings, raft handle and statistics objects template @@ -35,7 +38,7 @@ struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, mip_solver_settings_t settings_, - pdlp_initial_scaling_strategy_t& scaling) + pdlp_initial_scaling_strategy_t* scaling) : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); @@ -53,7 +56,7 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - pdlp_initial_scaling_strategy_t& scaling; + pdlp_initial_scaling_strategy_t* scaling; // nullptr when not available (early FJ) solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) @@ -61,6 +64,9 @@ struct mip_solver_context_t { // synchronization every 5 seconds for deterministic mode work_unit_scheduler_t work_unit_scheduler_{5.0}; + + early_cpufj_t* early_cpufj_ptr{nullptr}; + f_t initial_cutoff{std::numeric_limits::infinity()}; }; } // namespace cuopt::linear_programming::detail From 5881d35aa4526638a0adbb721d36329623da1446 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 23 Feb 2026 18:47:41 +0100 Subject: [PATCH 02/18] fix style --- .../feasibility_jump/early_cpufj.cu | 10 ++++---- .../feasibility_jump/early_gpufj.cu | 5 ++-- .../mip_heuristics/feasibility_jump/fj_cpu.cu | 7 +++--- cpp/src/mip_heuristics/solve.cu | 24 +++++++++---------- cpp/src/mip_heuristics/solver.cu | 2 +- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu index 23f7a07da..fadec6e45 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu @@ -36,9 +36,9 @@ void early_cpufj_t::start() this->start_time_ = std::chrono::steady_clock::now(); - cpu_fj_thread_ = std::make_unique>(); - cpu_fj_thread_->fj_cpu = init_fj_cpu_standalone( - *this->problem_ptr_, *this->solution_ptr_, preemption_flag_); + cpu_fj_thread_ = std::make_unique>(); + cpu_fj_thread_->fj_cpu = + init_fj_cpu_standalone(*this->problem_ptr_, *this->solution_ptr_, preemption_flag_); cpu_fj_thread_->time_limit = std::numeric_limits::infinity(); cpu_fj_thread_->fj_cpu->log_prefix = "[Early CPUFJ] "; @@ -62,8 +62,8 @@ void early_cpufj_t::stop() cpu_fj_thread_->wait_for_cpu_solver(); CUOPT_LOG_DEBUG("[Early CPUFJ] Stopped after %d iterations, solution_found=%d", - cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0, - this->solution_found_); + cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0, + this->solution_found_); cpu_fj_thread_.reset(); } diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu index adc010c16..e2df3a5af 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -8,8 +8,8 @@ #include "early_gpufj.cuh" #include -#include #include +#include #include #include @@ -75,8 +75,7 @@ void early_gpufj_t::stop() if (worker_thread_->joinable()) { worker_thread_->join(); } - CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d", - this->solution_found_); + CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d", this->solution_found_); fj_ptr_.reset(); worker_thread_.reset(); diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index 87f47d254..af41545a9 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -1487,9 +1487,10 @@ static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_lim for (size_t i = 0; i < fj_cpu.cached_mtm_moves.size(); i++) fj_cpu.cached_mtm_moves[i].first = 0; } - thrust::tie(move, score) = find_mtm_move_viol(fj_cpu, 1, true); // pick a single random violated constraint - i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0; - f_t delta = move.var_idx >= 0 ? move.value : 0; + thrust::tie(move, score) = + find_mtm_move_viol(fj_cpu, 1, true); // pick a single random violated constraint + i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0; + f_t delta = move.var_idx >= 0 ? move.value : 0; apply_move(fj_cpu, var_idx, delta, true); ++local_mins; ++fj_cpu.n_local_minima_window; diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 54d7189e7..88daed0c0 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -177,7 +177,7 @@ mip_solution_t run_mip(detail::problem_t& problem, auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; - solver.context.initial_cutoff = initial_cutoff; + solver.context.initial_cutoff = initial_cutoff; // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache). // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff. @@ -186,14 +186,14 @@ mip_solution_t run_mip(detail::problem_t& problem, settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && problem.original_problem_ptr->get_n_integers() > 0; if (run_early_cpufj) { - auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; - auto mip_callbacks = settings.get_mip_callbacks(); - auto incumbent_callback = - [presolver_ptr, mip_callbacks](f_t objective, const std::vector& assignment) { - std::vector user_assignment; - presolver_ptr->uncrush_primal_solution(assignment, user_assignment); - invoke_solution_callbacks(mip_callbacks, objective, user_assignment); - }; + auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; + auto mip_callbacks = settings.get_mip_callbacks(); + auto incumbent_callback = [presolver_ptr, mip_callbacks](f_t objective, + const std::vector& assignment) { + std::vector user_assignment; + presolver_ptr->uncrush_primal_solution(assignment, user_assignment); + invoke_solution_callbacks(mip_callbacks, objective, user_assignment); + }; early_cpufj = std::make_unique>( *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback); early_cpufj->set_best_objective(initial_cutoff); @@ -311,9 +311,9 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, op_problem.get_n_integers() > 0; if (run_early_fj) { auto early_fj_callback = [&early_best_objective, - &early_callback_mutex, - mip_callbacks = settings.get_mip_callbacks()]( - f_t objective, const std::vector& assignment) { + &early_callback_mutex, + mip_callbacks = settings.get_mip_callbacks()]( + f_t objective, const std::vector& assignment) { std::lock_guard lock(early_callback_mutex); if (objective >= early_best_objective.load()) { return; } early_best_objective.store(objective); diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 92e52a0f8..3f685886b 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -119,7 +119,7 @@ solution_t mip_solver_t::run_solver() if (context.early_cpufj_ptr) { context.early_cpufj_ptr->stop(); if (context.early_cpufj_ptr->solution_found()) { - f_t obj = context.early_cpufj_ptr->get_best_objective(); + f_t obj = context.early_cpufj_ptr->get_best_objective(); context.initial_cutoff = std::min(context.initial_cutoff, obj); CUOPT_LOG_INFO("Early CPUFJ found incumbent with objective %g during presolve", obj); } From 8a3684cece66eb5d9ab2f9a8fa2fe3b7d0697f63 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 23 Feb 2026 19:31:27 +0100 Subject: [PATCH 03/18] ai review comments --- cpp/src/mip_heuristics/early_heuristic.cuh | 1 + cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu | 1 + cpp/src/mip_heuristics/solve.cu | 9 +++++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh index 5ea7f346a..cfc2a9c1b 100644 --- a/cpp/src/mip_heuristics/early_heuristic.cuh +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -57,6 +57,7 @@ class early_heuristic_t { protected: ~early_heuristic_t() = default; + // NOT thread-safe void try_update_best(f_t user_obj, const std::vector& assignment) { if (user_obj >= best_objective_) { return; } diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu index fadec6e45..1cd3b0005 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu @@ -34,6 +34,7 @@ void early_cpufj_t::start() { if (cpu_fj_thread_) { return; } + this->preemption_flag_.store(false); this->start_time_ = std::chrono::steady_clock::now(); cpu_fj_thread_ = std::make_unique>(); diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 88daed0c0..812b93dca 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -174,10 +174,7 @@ mip_solution_t run_mip(detail::problem_t& problem, detail::trivial_presolve(scaled_problem); detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); - auto scaled_sol = solver.run_solver(); - bool is_feasible_before_scaling = scaled_sol.get_feasible(); - scaled_sol.problem_ptr = &problem; - solver.context.initial_cutoff = initial_cutoff; + solver.context.initial_cutoff = initial_cutoff; // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache). // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff. @@ -202,6 +199,10 @@ mip_solution_t run_mip(detail::problem_t& problem, CUOPT_LOG_DEBUG("Started early CPUFJ on papilo-presolved problem during cuOpt presolve"); } + auto scaled_sol = solver.run_solver(); + bool is_feasible_before_scaling = scaled_sol.get_feasible(); + scaled_sol.problem_ptr = &problem; + if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } // at this point we need to compute the feasibility on the original problem not the presolved one bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); From 9c89092c54c3fbfd2d9fb24b12d5baff87a123da Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 23 Feb 2026 20:34:25 +0100 Subject: [PATCH 04/18] review comments, fix crash --- cpp/src/mip_heuristics/early_heuristic.cuh | 13 ++++---- .../feasibility_jump/early_cpufj.cu | 3 +- .../feasibility_jump/early_gpufj.cu | 3 +- .../feasibility_jump/feasibility_jump.cu | 9 +++--- cpp/src/mip_heuristics/solve.cu | 30 ++++++++++++------- cpp/src/mip_heuristics/solver.cu | 10 ++----- 6 files changed, 37 insertions(+), 31 deletions(-) diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh index cfc2a9c1b..f3644c4d2 100644 --- a/cpp/src/mip_heuristics/early_heuristic.cuh +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -25,7 +25,7 @@ namespace cuopt::linear_programming::detail { template using early_incumbent_callback_t = - std::function& assignment)>; + std::function& assignment)>; // CRTP base for early heuristics that run on the original (or papilo-presolved) problem // during presolve to find incumbents as early as possible. @@ -57,11 +57,11 @@ class early_heuristic_t { protected: ~early_heuristic_t() = default; - // NOT thread-safe - void try_update_best(f_t user_obj, const std::vector& assignment) + // NOT thread-safe. solver_obj is in solver-space (always minimization). + void try_update_best(f_t solver_obj, const std::vector& assignment) { - if (user_obj >= best_objective_) { return; } - best_objective_ = user_obj; + if (solver_obj >= best_objective_) { return; } + best_objective_ = solver_obj; auto* handle_ptr = problem_ptr_->handle_ptr; RAFT_CUDA_TRY(cudaSetDevice(handle_ptr->get_device())); @@ -72,13 +72,14 @@ class early_heuristic_t { best_assignment_ = user_assignment; solution_found_ = true; + f_t user_obj = problem_ptr_->get_user_obj_from_solver_obj(solver_obj); double elapsed = std::chrono::duration(std::chrono::steady_clock::now() - start_time_).count(); CUOPT_LOG_INFO("Early heuristics (%s) lowered the primal bound. Objective %g. Time %.2f", Derived::name(), user_obj, elapsed); - if (incumbent_callback_) { incumbent_callback_(user_obj, user_assignment); } + if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); } } std::unique_ptr> problem_ptr_; diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu index 1cd3b0005..8109653e6 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu @@ -46,8 +46,7 @@ void early_cpufj_t::start() cpu_fj_thread_->fj_cpu->improvement_callback = [this](f_t solver_obj, const std::vector& assignment, double) { - f_t user_obj = this->problem_ptr_->get_user_obj_from_solver_obj(solver_obj); - this->try_update_best(user_obj, assignment); + this->try_update_best(solver_obj, assignment); }; cpu_fj_thread_->start_cpu_solver(); diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu index e2df3a5af..92d51a169 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -53,7 +53,8 @@ void early_gpufj_t::start() fj_ptr_ = std::make_unique>(*context_ptr_, fj_settings); fj_ptr_->improvement_callback = [this](f_t user_obj, const std::vector& h_assignment) { - this->try_update_best(user_obj, h_assignment); + f_t solver_obj = this->problem_ptr_->get_solver_obj_from_user_obj(user_obj); + this->try_update_best(solver_obj, h_assignment); }; worker_thread_ = std::make_unique(&early_gpufj_t::run_worker, this); diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index b248351a3..748dd41df 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -936,8 +936,8 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) // Invoke improvement callback if we have a better feasible solution if (is_feasible && improvement_callback) { f_t user_obj = solution.get_user_objective(); - if (user_obj < last_reported_objective_) { - last_reported_objective_ = user_obj; + if (solution.h_obj < last_reported_objective_) { + last_reported_objective_ = solution.h_obj; // Copy assignment to host for callback std::vector h_assignment(solution.assignment.size()); raft::copy(h_assignment.data(), @@ -1075,8 +1075,9 @@ i_t fj_t::solve(solution_t& solution) { raft::common::nvtx::range scope("fj_solve"); timer_t timer(settings.time_limit); - handle_ptr = const_cast(solution.handle_ptr); - pb_ptr = solution.problem_ptr; + handle_ptr = const_cast(solution.handle_ptr); + pb_ptr = solution.problem_ptr; + last_reported_objective_ = std::numeric_limits::infinity(); if (settings.mode != fj_mode_t::ROUNDING) { cuopt_func_call(solution.test_variable_bounds(true)); cuopt_assert(solution.test_number_all_integer(), "All integers must be rounded"); diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 812b93dca..000684be9 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -57,10 +57,11 @@ template static void invoke_solution_callbacks( const std::vector& mip_callbacks, f_t objective, - std::vector& assignment) + std::vector& assignment, + f_t bound) { std::vector obj_vec = {objective}; - std::vector bound_vec = {std::numeric_limits::infinity()}; + std::vector bound_vec = {bound}; for (auto callback : mip_callbacks) { if (callback != nullptr && callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { @@ -185,11 +186,14 @@ mip_solution_t run_mip(detail::problem_t& problem, if (run_early_cpufj) { auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; auto mip_callbacks = settings.get_mip_callbacks(); - auto incumbent_callback = [presolver_ptr, mip_callbacks](f_t objective, - const std::vector& assignment) { + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 + ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity(); + auto incumbent_callback = [presolver_ptr, mip_callbacks, no_bound]( + f_t solver_obj, f_t user_obj, const std::vector& assignment) { std::vector user_assignment; presolver_ptr->uncrush_primal_solution(assignment, user_assignment); - invoke_solution_callbacks(mip_callbacks, objective, user_assignment); + invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound); }; early_cpufj = std::make_unique>( *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback); @@ -309,17 +313,21 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, std::mutex early_callback_mutex; bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && - op_problem.get_n_integers() > 0; + op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0; if (run_early_fj) { + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 + ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity(); auto early_fj_callback = [&early_best_objective, &early_callback_mutex, - mip_callbacks = settings.get_mip_callbacks()]( - f_t objective, const std::vector& assignment) { + mip_callbacks = settings.get_mip_callbacks(), + no_bound]( + f_t solver_obj, f_t user_obj, const std::vector& assignment) { std::lock_guard lock(early_callback_mutex); - if (objective >= early_best_objective.load()) { return; } - early_best_objective.store(objective); + if (solver_obj >= early_best_objective.load()) { return; } + early_best_objective.store(solver_obj); auto user_assignment = assignment; - invoke_solution_callbacks(mip_callbacks, objective, user_assignment); + invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound); }; // Start early CPUFJ on original problem (will restart on presolved problem after Papilo) diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 3f685886b..7c7c4f6f2 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -264,14 +264,10 @@ solution_t mip_solver_t::run_solver() branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start()); context.branch_and_bound_ptr = branch_and_bound.get(); - // Set cutoff from early FJ if available + // Set cutoff from early FJ if available (initial_cutoff is in solver-space) if (context.initial_cutoff < std::numeric_limits::infinity()) { - f_t internal_cutoff = - context.problem_ptr->get_solver_obj_from_user_obj(context.initial_cutoff); - branch_and_bound->set_initial_cutoff(internal_cutoff); - CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics (internal: %.6e)", - context.initial_cutoff, - internal_cutoff); + branch_and_bound->set_initial_cutoff(context.initial_cutoff); + CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics", context.initial_cutoff); } auto* stats_ptr = &context.stats; From a4a195f94f8632e6035121c67e9461f7c794fcc6 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 23 Feb 2026 21:35:30 +0100 Subject: [PATCH 05/18] add GIL locks to handle worker threads invoking python callbacks --- .../utilities/callbacks_implems.hpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp index e13fda2ba..ef0fe35b9 100644 --- a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp @@ -32,11 +32,12 @@ class default_get_solution_callback_t : public get_solution_callback_t { void* solution_bound, void* user_data) override { - PyObject* numpy_matrix = get_numpy_array(data, n_variables); - PyObject* numpy_array = get_numpy_array(objective_value, 1); - PyObject* numpy_bound = get_numpy_array(solution_bound, 1); - PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); - PyObject* res = PyObject_CallMethod(this->pyCallbackClass, + PyGILState_STATE gstate = PyGILState_Ensure(); + PyObject* numpy_matrix = get_numpy_array(data, n_variables); + PyObject* numpy_array = get_numpy_array(objective_value, 1); + PyObject* numpy_bound = get_numpy_array(solution_bound, 1); + PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); + PyObject* res = PyObject_CallMethod(this->pyCallbackClass, "get_solution", "(OOOO)", numpy_matrix, @@ -47,6 +48,7 @@ class default_get_solution_callback_t : public get_solution_callback_t { Py_DECREF(numpy_array); Py_DECREF(numpy_bound); if (res != nullptr) { Py_DECREF(res); } + PyGILState_Release(gstate); } PyObject* pyCallbackClass; @@ -69,11 +71,12 @@ class default_set_solution_callback_t : public set_solution_callback_t { void* solution_bound, void* user_data) override { - PyObject* numpy_matrix = get_numpy_array(data, n_variables); - PyObject* numpy_array = get_numpy_array(objective_value, 1); - PyObject* numpy_bound = get_numpy_array(solution_bound, 1); - PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); - PyObject* res = PyObject_CallMethod(this->pyCallbackClass, + PyGILState_STATE gstate = PyGILState_Ensure(); + PyObject* numpy_matrix = get_numpy_array(data, n_variables); + PyObject* numpy_array = get_numpy_array(objective_value, 1); + PyObject* numpy_bound = get_numpy_array(solution_bound, 1); + PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); + PyObject* res = PyObject_CallMethod(this->pyCallbackClass, "set_solution", "(OOOO)", numpy_matrix, @@ -84,6 +87,7 @@ class default_set_solution_callback_t : public set_solution_callback_t { Py_DECREF(numpy_array); Py_DECREF(numpy_bound); if (res != nullptr) { Py_DECREF(res); } + PyGILState_Release(gstate); } PyObject* pyCallbackClass; From f349b5b573adc1d1c638c938bd639a1437714bb2 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 23 Feb 2026 22:46:07 +0100 Subject: [PATCH 06/18] fix missing nogil --- .../cuopt/linear_programming/solver/solver.pxd | 4 ++-- .../solver/solver_wrapper.pyx | 17 ++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd index 83b1e0054..adc566aa1 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd +++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd @@ -181,9 +181,9 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace cdef unique_ptr[solver_ret_t] call_solve( data_model_view_t[int, double]* data_model, solver_settings_t[int, double]* solver_settings, - ) except + + ) except + nogil cdef pair[vector[unique_ptr[solver_ret_t]], double] call_batch_solve( # noqa vector[data_model_view_t[int, double] *] data_models, solver_settings_t[int, double]* solver_settings, - ) except + + ) except + nogil diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx index 8c1c8fdc3..e7d700ea9 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx @@ -507,10 +507,13 @@ def Solve(py_data_model_obj, settings, mip=False): ) data_model_obj.set_data_model_view() - return create_solution(move(call_solve( - data_model_obj.c_data_model_view.get(), - unique_solver_settings.get(), - )), data_model_obj) + cdef unique_ptr[solver_ret_t] sol_ret_ptr + with nogil: + sol_ret_ptr = move(call_solve( + data_model_obj.c_data_model_view.get(), + unique_solver_settings.get(), + )) + return create_solution(move(sol_ret_ptr), data_model_obj) cdef set_and_insert_vector( @@ -535,9 +538,9 @@ def BatchSolve(py_data_model_list, settings): cdef pair[ vector[unique_ptr[solver_ret_t]], - double] batch_solve_result = ( - move(call_batch_solve(data_model_views, unique_solver_settings.get())) # noqa - ) + double] batch_solve_result + with nogil: + batch_solve_result = move(call_batch_solve(data_model_views, unique_solver_settings.get())) # noqa cdef vector[unique_ptr[solver_ret_t]] c_solutions = ( move(batch_solve_result.first) From 506d04632600da2e9fffd19ecbf1753335b368bb Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 08:52:06 +0100 Subject: [PATCH 07/18] fix python callbacks --- cpp/src/mip_heuristics/solve.cu | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 000684be9..b4188cade 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -85,15 +85,6 @@ mip_solution_t run_mip(detail::problem_t& problem, auto hyper_params = settings.hyper_params; hyper_params.update_primal_weight_on_initial_solution = false; hyper_params.update_step_size_on_initial_solution = true; - if (settings.get_mip_callbacks().size() > 0) { - auto callback_num_variables = problem.original_problem_ptr->get_n_variables(); - if (problem.has_papilo_presolve_data()) { - callback_num_variables = problem.get_papilo_original_num_variables(); - } - for (auto callback : settings.get_mip_callbacks()) { - callback->template setup(callback_num_variables); - } - } // if the input problem is empty: early exit if (problem.empty) { detail::solution_t solution(problem); @@ -277,6 +268,10 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, op_problem.get_handle_ptr()->get_stream()); } + for (auto callback : settings.get_mip_callbacks()) { + callback->template setup(op_problem.get_n_variables()); + } + auto timer = timer_t(time_limit); double presolve_time = 0.0; From 33230b6bf521ec5aa0206f57015f1583ad9499b1 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 11:12:01 +0100 Subject: [PATCH 08/18] fix attempt --- cpp/src/mip_heuristics/early_heuristic.cuh | 16 ++++++++++------ cpp/src/mip_heuristics/problem/presolve_data.cu | 17 +++++++---------- .../mip_heuristics/problem/presolve_data.cuh | 10 +++++++++- cpp/src/mip_heuristics/problem/problem.cu | 6 ++++-- cpp/src/mip_heuristics/problem/problem.cuh | 9 ++++++++- 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh index f3644c4d2..40b604d97 100644 --- a/cpp/src/mip_heuristics/early_heuristic.cuh +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -14,6 +14,8 @@ #include +#include + #include #include @@ -58,17 +60,18 @@ class early_heuristic_t { ~early_heuristic_t() = default; // NOT thread-safe. solver_obj is in solver-space (always minimization). + // Uses a private CUDA stream to avoid racing with the FJ solver's stream. void try_update_best(f_t solver_obj, const std::vector& assignment) { if (solver_obj >= best_objective_) { return; } best_objective_ = solver_obj; - auto* handle_ptr = problem_ptr_->handle_ptr; - RAFT_CUDA_TRY(cudaSetDevice(handle_ptr->get_device())); - rmm::device_uvector d_assignment(assignment.size(), handle_ptr->get_stream()); - raft::copy(d_assignment.data(), assignment.data(), assignment.size(), handle_ptr->get_stream()); - problem_ptr_->post_process_assignment(d_assignment); - auto user_assignment = cuopt::host_copy(d_assignment, handle_ptr->get_stream()); + RAFT_CUDA_TRY(cudaSetDevice(problem_ptr_->handle_ptr->get_device())); + auto stream = private_stream_.view(); + rmm::device_uvector d_assignment(assignment.size(), stream); + raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream); + problem_ptr_->post_process_assignment(d_assignment, true, stream); + auto user_assignment = cuopt::host_copy(d_assignment, stream); best_assignment_ = user_assignment; solution_found_ = true; @@ -91,6 +94,7 @@ class early_heuristic_t { early_incumbent_callback_t incumbent_callback_; std::chrono::steady_clock::time_point start_time_; + rmm::cuda_stream private_stream_; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cu b/cpp/src/mip_heuristics/problem/presolve_data.cu index b11f7b108..1bc05a779 100644 --- a/cpp/src/mip_heuristics/problem/presolve_data.cu +++ b/cpp/src/mip_heuristics/problem/presolve_data.cu @@ -107,7 +107,8 @@ template void presolve_data_t::post_process_assignment( problem_t& problem, rmm::device_uvector& current_assignment, - bool resize_to_original_problem) + bool resize_to_original_problem, + rmm::cuda_stream_view stream) { raft::common::nvtx::range fun_scope("post_process_assignment"); cuopt_assert(current_assignment.size() == variable_mapping.size(), "size mismatch"); @@ -115,15 +116,15 @@ void presolve_data_t::post_process_assignment( auto fixed_assgn = make_span(fixed_var_assignment); auto var_map = make_span(variable_mapping); if (current_assignment.size() > 0) { - thrust::for_each(problem.handle_ptr->get_thrust_policy(), + thrust::for_each(rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(current_assignment.size()), [fixed_assgn, var_map, assgn] __device__(auto idx) { fixed_assgn[var_map[idx]] = assgn[idx]; }); } - expand_device_copy(current_assignment, fixed_var_assignment, problem.handle_ptr->get_stream()); - auto h_assignment = cuopt::host_copy(current_assignment, problem.handle_ptr->get_stream()); + expand_device_copy(current_assignment, fixed_var_assignment, stream); + auto h_assignment = cuopt::host_copy(current_assignment, stream); cuopt_assert(additional_var_id_per_var.size() == h_assignment.size(), "Size mismatch"); cuopt_assert(additional_var_used.size() == h_assignment.size(), "Size mismatch"); for (i_t i = 0; i < (i_t)h_assignment.size(); ++i) { @@ -148,14 +149,10 @@ void presolve_data_t::post_process_assignment( h_assignment[sub.substituted_var]); } - raft::copy(current_assignment.data(), - h_assignment.data(), - h_assignment.size(), - problem.handle_ptr->get_stream()); // this separate resizing is needed because of the callback + raft::copy(current_assignment.data(), h_assignment.data(), h_assignment.size(), stream); if (resize_to_original_problem) { - current_assignment.resize(problem.original_problem_ptr->get_n_variables(), - problem.handle_ptr->get_stream()); + current_assignment.resize(problem.original_problem_ptr->get_n_variables(), stream); } } diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cuh b/cpp/src/mip_heuristics/problem/presolve_data.cuh index 51b6bac95..cac3e7165 100644 --- a/cpp/src/mip_heuristics/problem/presolve_data.cuh +++ b/cpp/src/mip_heuristics/problem/presolve_data.cuh @@ -89,7 +89,15 @@ class presolve_data_t { bool pre_process_assignment(problem_t& problem, rmm::device_uvector& assignment); void post_process_assignment(problem_t& problem, rmm::device_uvector& current_assignment, - bool resize_to_original_problem = true); + bool resize_to_original_problem, + rmm::cuda_stream_view stream); + void post_process_assignment(problem_t& problem, + rmm::device_uvector& current_assignment, + bool resize_to_original_problem = true) + { + post_process_assignment( + problem, current_assignment, resize_to_original_problem, problem.handle_ptr->get_stream()); + } void post_process_solution(problem_t& problem, solution_t& solution); void set_papilo_presolve_data(const third_party_presolve_t* presolver_ptr, diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index d77e2e5f6..dde334476 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -2045,9 +2045,11 @@ bool problem_t::pre_process_assignment(rmm::device_uvector& assig template void problem_t::post_process_assignment(rmm::device_uvector& current_assignment, - bool resize_to_original_problem) + bool resize_to_original_problem, + rmm::cuda_stream_view stream) { - presolve_data.post_process_assignment(*this, current_assignment, resize_to_original_problem); + presolve_data.post_process_assignment( + *this, current_assignment, resize_to_original_problem, stream); } template diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index 6cd180a80..9876773ec 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -94,7 +94,14 @@ class problem_t { void preprocess_problem(); bool pre_process_assignment(rmm::device_uvector& assignment); void post_process_assignment(rmm::device_uvector& current_assignment, - bool resize_to_original_problem = true); + bool resize_to_original_problem, + rmm::cuda_stream_view stream); + void post_process_assignment(rmm::device_uvector& current_assignment, + bool resize_to_original_problem = true) + { + post_process_assignment( + current_assignment, resize_to_original_problem, handle_ptr->get_stream()); + } void post_process_solution(solution_t& solution); void set_papilo_presolve_data(const third_party_presolve_t* presolver_ptr, std::vector reduced_to_original, From 71e8c9ac118d155ca56f0e063c5026534dcd4aeb Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 12:22:56 +0000 Subject: [PATCH 09/18] operate on a problem copy for early GPUFJ --- cpp/src/mip_heuristics/early_heuristic.cuh | 18 ++++++++++-------- .../feasibility_jump/early_gpufj.cu | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh index 40b604d97..47d50daa4 100644 --- a/cpp/src/mip_heuristics/early_heuristic.cuh +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -14,8 +14,6 @@ #include -#include - #include #include @@ -40,11 +38,15 @@ class early_heuristic_t { early_incumbent_callback_t incumbent_callback) : incumbent_callback_(std::move(incumbent_callback)) { - problem_ptr_ = std::make_unique>(op_problem, tolerances, false); - problem_ptr_->preprocess_problem(); + // Build and preprocess on the original handle, then copy onto our own handle + // so the derived solver can run on a dedicated stream (prevents graph capture conflicts). + problem_t temp_problem(op_problem, tolerances, false); + temp_problem.preprocess_problem(); + temp_problem.handle_ptr->sync_stream(); + problem_ptr_ = std::make_unique>(temp_problem, &handle_); solution_ptr_ = std::make_unique>(*problem_ptr_); - thrust::fill(problem_ptr_->handle_ptr->get_thrust_policy(), + thrust::fill(handle_.get_thrust_policy(), solution_ptr_->assignment.begin(), solution_ptr_->assignment.end(), f_t{0}); @@ -66,8 +68,8 @@ class early_heuristic_t { if (solver_obj >= best_objective_) { return; } best_objective_ = solver_obj; - RAFT_CUDA_TRY(cudaSetDevice(problem_ptr_->handle_ptr->get_device())); - auto stream = private_stream_.view(); + RAFT_CUDA_TRY(cudaSetDevice(handle_.get_device())); + auto stream = handle_.get_stream(); rmm::device_uvector d_assignment(assignment.size(), stream); raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream); problem_ptr_->post_process_assignment(d_assignment, true, stream); @@ -94,7 +96,7 @@ class early_heuristic_t { early_incumbent_callback_t incumbent_callback_; std::chrono::steady_clock::time_point start_time_; - rmm::cuda_stream private_stream_; + raft::handle_t handle_; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu index 92d51a169..f9b552c9f 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -26,7 +26,7 @@ early_gpufj_t::early_gpufj_t(const optimization_problem_t& o op_problem, settings.get_tolerances(), std::move(incumbent_callback)) { context_ptr_ = std::make_unique>( - this->problem_ptr_->handle_ptr, this->problem_ptr_.get(), settings, nullptr); + &this->handle_, this->problem_ptr_.get(), settings, nullptr); } template From 9d7055ef479f49ed27c492d43606393405b5b860 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 13:52:31 +0000 Subject: [PATCH 10/18] fix reported bound in user callback --- cpp/src/mip_heuristics/solve.cu | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index b4188cade..417521b11 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -175,11 +175,9 @@ mip_solution_t run_mip(detail::problem_t& problem, settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && problem.original_problem_ptr->get_n_integers() > 0; if (run_early_cpufj) { - auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; - auto mip_callbacks = settings.get_mip_callbacks(); - f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 - ? -std::numeric_limits::infinity() - : std::numeric_limits::infinity(); + auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; + auto mip_callbacks = settings.get_mip_callbacks(); + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20; auto incumbent_callback = [presolver_ptr, mip_callbacks, no_bound]( f_t solver_obj, f_t user_obj, const std::vector& assignment) { std::vector user_assignment; @@ -310,9 +308,7 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0; if (run_early_fj) { - f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 - ? -std::numeric_limits::infinity() - : std::numeric_limits::infinity(); + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20; auto early_fj_callback = [&early_best_objective, &early_callback_mutex, mip_callbacks = settings.get_mip_callbacks(), From 7b9451b8898f53af58e399314faec3c9733a63de Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 14:33:37 +0000 Subject: [PATCH 11/18] fix build --- cpp/src/pdlp/swap_and_resize_helper.cuh | 1 + cpp/src/pdlp/utils.cuh | 1 + cpp/src/utilities/copy_helpers.hpp | 1 + cpp/src/utilities/cuda_helpers.cuh | 1 + 4 files changed, 4 insertions(+) diff --git a/cpp/src/pdlp/swap_and_resize_helper.cuh b/cpp/src/pdlp/swap_and_resize_helper.cuh index 0d4e2a6d0..6ed05df24 100644 --- a/cpp/src/pdlp/swap_and_resize_helper.cuh +++ b/cpp/src/pdlp/swap_and_resize_helper.cuh @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/pdlp/utils.cuh b/cpp/src/pdlp/utils.cuh index 33625f768..138c9c2ab 100644 --- a/cpp/src/pdlp/utils.cuh +++ b/cpp/src/pdlp/utils.cuh @@ -25,6 +25,7 @@ #include #include #include +#include namespace cuopt::linear_programming::detail { diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 943ce463a..36a465905 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index ae50e9967..946099648 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -10,6 +10,7 @@ #include #include +#include #include #include #include From d0c5a425ca1cc24f9b4cb984bc0090b6a4c38755 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 16:09:51 +0000 Subject: [PATCH 12/18] fix thrust build + more timer checks --- .../diversity/diversity_manager.cu | 26 ++++++++++--------- cpp/src/mip_heuristics/solve.cu | 5 ++++ cpp/src/mip_heuristics/solver.cu | 16 ++++++++++++ cpp/src/pdlp/swap_and_resize_helper.cuh | 1 + cpp/src/pdlp/utils.cuh | 1 + cpp/src/utilities/copy_helpers.hpp | 1 + cpp/src/utilities/cuda_helpers.cuh | 1 + 7 files changed, 39 insertions(+), 12 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 25b021ac3..4fa0d3f4a 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -201,18 +201,20 @@ bool diversity_manager_t::run_presolve(f_t time_limit) compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); if (problem_is_infeasible) { return false; } } - const bool remap_cache_ids = true; - trivial_presolve(*problem_ptr, remap_cache_ids); - if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } - // May overconstrain if Papilo presolve has been run before - if (context.settings.presolver == presolver_t::None) { - if (!problem_ptr->empty) { - // do the resizing no-matter what, bounds presolve might not change the bounds but initial - // trivial presolve might have - ls.constraint_prop.bounds_update.resize(*problem_ptr); - ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( - *problem_ptr, ls.constraint_prop.bounds_update); - if (!check_bounds_sanity(*problem_ptr)) { return false; } + if (!presolve_timer.check_time_limit()) { + const bool remap_cache_ids = true; + trivial_presolve(*problem_ptr, remap_cache_ids); + if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } + // May overconstrain if Papilo presolve has been run before + if (context.settings.presolver == presolver_t::None) { + if (!problem_ptr->empty) { + // do the resizing no-matter what, bounds presolve might not change the bounds but initial + // trivial presolve might have + ls.constraint_prop.bounds_update.resize(*problem_ptr); + ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( + *problem_ptr, ls.constraint_prop.bounds_update); + if (!check_bounds_sanity(*problem_ptr)) { return false; } + } } } stats.presolve_time = presolve_timer.elapsed_time(); diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 41e272992..3c6ee21a4 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -153,6 +153,11 @@ mip_solution_t run_mip(detail::problem_t& problem, detail::trivial_presolve(scaled_problem); detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + if (timer.check_time_limit()) { + CUOPT_LOG_INFO("Time limit reached before main solve"); + detail::solution_t sol(problem); + return sol.get_solution(false, solver.get_solver_stats(), false); + } auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 3e2251171..235d4500d 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -134,6 +134,14 @@ solution_t mip_solver_t::run_solver() return sol; } + if (timer_.check_time_limit()) { + CUOPT_LOG_INFO("Time limit reached after presolve"); + solution_t sol(*context.problem_ptr); + context.stats.total_solve_time = timer_.elapsed_time(); + context.problem_ptr->post_process_solution(sol); + return sol; + } + // if the problem was reduced to a LP: run concurrent LP if (run_presolve && context.problem_ptr->n_integer_vars == 0) { CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP"); @@ -285,6 +293,14 @@ solution_t mip_solver_t::run_solver() std::placeholders::_5, std::placeholders::_6); + if (timer_.check_time_limit()) { + CUOPT_LOG_INFO("Time limit reached during B&B setup"); + solution_t sol(*context.problem_ptr); + context.stats.total_solve_time = timer_.elapsed_time(); + context.problem_ptr->post_process_solution(sol); + return sol; + } + // Fork a thread for branch and bound // std::async and std::future allow us to get the return value of bb::solve() // without having to manually manage the thread diff --git a/cpp/src/pdlp/swap_and_resize_helper.cuh b/cpp/src/pdlp/swap_and_resize_helper.cuh index 0d4e2a6d0..6ed05df24 100644 --- a/cpp/src/pdlp/swap_and_resize_helper.cuh +++ b/cpp/src/pdlp/swap_and_resize_helper.cuh @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/pdlp/utils.cuh b/cpp/src/pdlp/utils.cuh index 33625f768..138c9c2ab 100644 --- a/cpp/src/pdlp/utils.cuh +++ b/cpp/src/pdlp/utils.cuh @@ -25,6 +25,7 @@ #include #include #include +#include namespace cuopt::linear_programming::detail { diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 943ce463a..36a465905 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index ae50e9967..946099648 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -10,6 +10,7 @@ #include #include +#include #include #include #include From 67240f57035bde789b1e5512c116c00fd46c5baf Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 16:51:09 +0000 Subject: [PATCH 13/18] review comment --- cpp/src/mip_heuristics/solve.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 3c6ee21a4..44f0e7468 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -156,7 +156,9 @@ mip_solution_t run_mip(detail::problem_t& problem, if (timer.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached before main solve"); detail::solution_t sol(problem); - return sol.get_solution(false, solver.get_solver_stats(), false); + auto stats = solver.get_solver_stats(); + stats.total_solve_time = timer.elapsed_time(); + return sol.get_solution(false, stats, false); } auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); From a15424fc250b6e649b8b0b0015641d9771c9b426 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 17:02:33 +0000 Subject: [PATCH 14/18] fix thrust solve --- cpp/src/routing/utilities/check_input.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/routing/utilities/check_input.cu b/cpp/src/routing/utilities/check_input.cu index b2f035ee5..e902f2d46 100644 --- a/cpp/src/routing/utilities/check_input.cu +++ b/cpp/src/routing/utilities/check_input.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -17,6 +17,7 @@ #include #include #include +#include #include #include From 7b3d40c85104917e20761a91e8a6ee49dc796fb1 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 25 Feb 2026 14:45:05 +0000 Subject: [PATCH 15/18] fix handle being destroyed before other gpu structures in early heuristics --- cpp/src/mip_heuristics/early_heuristic.cuh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh index 47d50daa4..26d9e8431 100644 --- a/cpp/src/mip_heuristics/early_heuristic.cuh +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -87,6 +87,10 @@ class early_heuristic_t { if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); } } + // handle_ must be declared before problem_ptr_/solution_ptr_ so it outlives them + // (C++ destroys members in reverse declaration order) + raft::handle_t handle_; + std::unique_ptr> problem_ptr_; std::unique_ptr> solution_ptr_; @@ -96,7 +100,6 @@ class early_heuristic_t { early_incumbent_callback_t incumbent_callback_; std::chrono::steady_clock::time_point start_time_; - raft::handle_t handle_; }; } // namespace cuopt::linear_programming::detail From 68e07bb649ad15cd1dc427446b5e6e395a5ad85e Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 25 Feb 2026 15:26:32 +0000 Subject: [PATCH 16/18] fix build --- cpp/src/mip_heuristics/solve.cu | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index ccd04ff05..4592037bb 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -57,10 +57,11 @@ template static void invoke_solution_callbacks( const std::vector& mip_callbacks, f_t objective, - std::vector& assignment) + std::vector& assignment, + f_t bound) { std::vector obj_vec = {objective}; - std::vector bound_vec = {std::numeric_limits::infinity()}; + std::vector bound_vec = {bound}; for (auto callback : mip_callbacks) { if (callback != nullptr && callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { @@ -313,16 +314,18 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && op_problem.get_n_integers() > 0; + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20; if (run_early_fj) { auto early_fj_callback = [&early_best_objective, &early_callback_mutex, - mip_callbacks = settings.get_mip_callbacks()]( - f_t objective, const std::vector& assignment) { + mip_callbacks = settings.get_mip_callbacks(), + no_bound]( + f_t solver_obj, f_t user_obj, const std::vector& assignment) { std::lock_guard lock(early_callback_mutex); - if (objective >= early_best_objective.load()) { return; } - early_best_objective.store(objective); + if (solver_obj >= early_best_objective.load()) { return; } + early_best_objective.store(solver_obj); auto user_assignment = assignment; - invoke_solution_callbacks(mip_callbacks, objective, user_assignment); + invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound); }; // Start early CPUFJ on original problem (will restart on presolved problem after Papilo) From 352cd1564e00d7d8a612e5a8af653fa9b8e9f3e0 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 25 Feb 2026 17:00:06 +0000 Subject: [PATCH 17/18] fix cudaSetDevice bug --- cpp/src/mip_heuristics/early_heuristic.cuh | 6 +++++- cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh index 26d9e8431..333fde195 100644 --- a/cpp/src/mip_heuristics/early_heuristic.cuh +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -38,6 +38,8 @@ class early_heuristic_t { early_incumbent_callback_t incumbent_callback) : incumbent_callback_(std::move(incumbent_callback)) { + RAFT_CUDA_TRY(cudaGetDevice(&device_id_)); + // Build and preprocess on the original handle, then copy onto our own handle // so the derived solver can run on a dedicated stream (prevents graph capture conflicts). problem_t temp_problem(op_problem, tolerances, false); @@ -68,7 +70,7 @@ class early_heuristic_t { if (solver_obj >= best_objective_) { return; } best_objective_ = solver_obj; - RAFT_CUDA_TRY(cudaSetDevice(handle_.get_device())); + RAFT_CUDA_TRY(cudaSetDevice(device_id_)); auto stream = handle_.get_stream(); rmm::device_uvector d_assignment(assignment.size(), stream); raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream); @@ -87,6 +89,8 @@ class early_heuristic_t { if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); } } + int device_id_{0}; + // handle_ must be declared before problem_ptr_/solution_ptr_ so it outlives them // (C++ destroys members in reverse declaration order) raft::handle_t handle_; diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu index f9b552c9f..55726421d 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -63,7 +63,7 @@ void early_gpufj_t::start() template void early_gpufj_t::run_worker() { - RAFT_CUDA_TRY(cudaSetDevice(this->problem_ptr_->handle_ptr->get_device())); + RAFT_CUDA_TRY(cudaSetDevice(this->device_id_)); fj_ptr_->solve(*this->solution_ptr_); } From e4b8166784a41ee63575d0dd2e948ef9cf45b6b5 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 25 Feb 2026 17:14:46 +0000 Subject: [PATCH 18/18] fix empty tests --- cpp/src/mip_heuristics/solve.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 4592037bb..f7c3063c9 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -313,7 +313,7 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, std::mutex early_callback_mutex; bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && - op_problem.get_n_integers() > 0; + op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0; f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20; if (run_early_fj) { auto early_fj_callback = [&early_best_objective,