diff --git a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp index e13fda2ba..ef0fe35b9 100644 --- a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp @@ -32,11 +32,12 @@ class default_get_solution_callback_t : public get_solution_callback_t { void* solution_bound, void* user_data) override { - PyObject* numpy_matrix = get_numpy_array(data, n_variables); - PyObject* numpy_array = get_numpy_array(objective_value, 1); - PyObject* numpy_bound = get_numpy_array(solution_bound, 1); - PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); - PyObject* res = PyObject_CallMethod(this->pyCallbackClass, + PyGILState_STATE gstate = PyGILState_Ensure(); + PyObject* numpy_matrix = get_numpy_array(data, n_variables); + PyObject* numpy_array = get_numpy_array(objective_value, 1); + PyObject* numpy_bound = get_numpy_array(solution_bound, 1); + PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); + PyObject* res = PyObject_CallMethod(this->pyCallbackClass, "get_solution", "(OOOO)", numpy_matrix, @@ -47,6 +48,7 @@ class default_get_solution_callback_t : public get_solution_callback_t { Py_DECREF(numpy_array); Py_DECREF(numpy_bound); if (res != nullptr) { Py_DECREF(res); } + PyGILState_Release(gstate); } PyObject* pyCallbackClass; @@ -69,11 +71,12 @@ class default_set_solution_callback_t : public set_solution_callback_t { void* solution_bound, void* user_data) override { - PyObject* numpy_matrix = get_numpy_array(data, n_variables); - PyObject* numpy_array = get_numpy_array(objective_value, 1); - PyObject* numpy_bound = get_numpy_array(solution_bound, 1); - PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); - PyObject* res = PyObject_CallMethod(this->pyCallbackClass, + PyGILState_STATE gstate = PyGILState_Ensure(); + PyObject* numpy_matrix = get_numpy_array(data, n_variables); + PyObject* numpy_array = get_numpy_array(objective_value, 1); + PyObject* numpy_bound = get_numpy_array(solution_bound, 1); + PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast(user_data); + PyObject* res = PyObject_CallMethod(this->pyCallbackClass, "set_solution", "(OOOO)", numpy_matrix, @@ -84,6 +87,7 @@ class default_set_solution_callback_t : public set_solution_callback_t { Py_DECREF(numpy_array); Py_DECREF(numpy_bound); if (res != nullptr) { Py_DECREF(res); } + PyGILState_Release(gstate); } PyObject* pyCallbackClass; diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6ce9a4f4d..a4e43a1ec 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -898,7 +898,7 @@ struct nondeterministic_policy_t : tree_update_policy_t { { } - f_t upper_bound() const override { return bnb.upper_bound_.load(); } + f_t upper_bound() const override { return bnb.get_cutoff(); } void update_pseudo_costs(mip_node_t* node, f_t leaf_obj) override { @@ -1316,10 +1316,11 @@ dual::status_t branch_and_bound_t::solve_node_lp( simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); + f_t cutoff = get_cutoff(); if (original_lp_.objective_is_integral) { - lp_settings.cut_off = std::ceil(upper_bound_ - settings_.integer_tol) + settings_.dual_tol; + lp_settings.cut_off = std::ceil(cutoff - settings_.integer_tol) + settings_.dual_tol; } else { - lp_settings.cut_off = upper_bound_ + settings_.dual_tol; + lp_settings.cut_off = cutoff + settings_.dual_tol; } lp_settings.inside_mip = 2; lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); @@ -1426,7 +1427,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_tlower_bound = lower_bound; - if (lower_bound > upper_bound) { + if (lower_bound > get_cutoff()) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); search_tree_.update(node_ptr, node_status_t::FATHOMED); worker->recompute_basis = true; @@ -1536,7 +1537,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); worker->lower_bound = lower_bound; - if (node_ptr->lower_bound > upper_bound) { + if (node_ptr->lower_bound > get_cutoff()) { worker->recompute_basis = true; worker->recompute_bounds = true; continue; @@ -1675,7 +1676,7 @@ void branch_and_bound_t::run_scheduler() std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound) { + if (get_cutoff() < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node( @@ -1699,7 +1700,7 @@ void branch_and_bound_t::run_scheduler() std::optional*> start_node = node_queue_.pop_diving(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound || + if (get_cutoff() < start_node.value()->lower_bound || start_node.value()->depth < diving_settings.min_node_depth) { continue; } @@ -1767,7 +1768,7 @@ void branch_and_bound_t::single_threaded_solve() std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound) { + if (get_cutoff() < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node( @@ -2189,12 +2190,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return mip_status_t::NUMERICAL; } - if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) { + if (settings_.reduced_cost_strengthening >= 1 && get_cutoff() < last_upper_bound) { mutex_upper_.lock(); - last_upper_bound = upper_bound_.load(); + last_upper_bound = get_cutoff(); std::vector lower_bounds; std::vector upper_bounds; - find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds); + find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds); mutex_upper_.unlock(); mutex_original_lp_.lock(); original_lp_.lower = lower_bounds; @@ -2372,10 +2373,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return solver_status_; } - if (settings_.reduced_cost_strengthening >= 2 && upper_bound_.load() < last_upper_bound) { + if (settings_.reduced_cost_strengthening >= 2 && get_cutoff() < last_upper_bound) { std::vector lower_bounds; std::vector upper_bounds; - i_t num_fixed = find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds); + i_t num_fixed = find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds); if (num_fixed > 0) { std::vector bounds_changed(original_lp_.num_cols, true); std::vector row_sense; @@ -2479,7 +2480,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (upper_bound_ < start_node.value()->lower_bound) { + if (get_cutoff() < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node( @@ -3321,7 +3322,7 @@ void branch_and_bound_t::deterministic_sort_replay_events( template void branch_and_bound_t::deterministic_prune_worker_nodes_vs_incumbent() { - f_t upper_bound = upper_bound_.load(); + f_t upper_bound = get_cutoff(); for (auto& worker : *deterministic_workers_) { // Check nodes in plunge stack - filter in place @@ -3457,14 +3458,14 @@ void branch_and_bound_t::deterministic_populate_diving_heap() const int num_diving = deterministic_diving_workers_->size(); constexpr int target_nodes_per_worker = 10; const int target_total = num_diving * target_nodes_per_worker; - f_t upper_bound = upper_bound_.load(); + f_t cutoff = get_cutoff(); // Collect candidate nodes from BFS worker backlog heaps std::vector*, f_t>> candidates; for (auto& worker : *deterministic_workers_) { for (auto* node : worker.backlog.data()) { - if (node->lower_bound < upper_bound) { + if (node->lower_bound < cutoff) { f_t score = node->objective_estimate; if (score >= inf) { score = node->lower_bound; } candidates.push_back({node, score}); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index a13d5cedc..fac413024 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -108,6 +108,14 @@ class branch_and_bound_t { bool stop_for_time_limit(mip_solution_t& solution); + // Set a cutoff bound from an external source (e.g., early FJ during presolve). + // Used for node pruning and reduced cost strengthening but NOT for gap computation. + // Unlike upper_bound_, this does not imply a verified incumbent solution exists. + void set_initial_cutoff(f_t bound) { initial_cutoff_ = bound; } + + // Effective cutoff for node pruning: min of verified incumbent and external cutoff. + f_t get_cutoff() const { return std::min(upper_bound_.load(), initial_cutoff_); } + // Repair a low-quality solution from the heuristics. bool repair_solution(const std::vector& leaf_edge_norms, const std::vector& potential_solution, @@ -169,9 +177,12 @@ class branch_and_bound_t { // Mutex for upper bound omp_mutex_t mutex_upper_; - // Global variable for upper bound + // Verified incumbent bound (only set when B&B has an actual integer-feasible solution). omp_atomic_t upper_bound_; + // External cutoff from early heuristics (for pruning only, no verified solution). + f_t initial_cutoff_{std::numeric_limits::infinity()}; + // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 538e3c49a..7c1035fd1 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -40,7 +40,9 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu - ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_cpufj.cu + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_gpufj.cu) # Choose which files to include based on build mode if(BUILD_LP_ONLY) diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index c2138c91c..ba23d3fb0 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -267,9 +267,9 @@ void population_t::invoke_get_solution_callback( f_t user_bound = context.stats.get_solution_bound(); solution_t temp_sol(sol); problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { + if (context.settings.mip_scaling && context.scaling != nullptr) { rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling.unscale_solutions(temp_sol.assignment, dummy); + context.scaling->unscale_solutions(temp_sol.assignment, dummy); } if (problem_ptr->has_papilo_presolve_data()) { problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); @@ -346,7 +346,9 @@ void population_t::run_solution_callbacks(solution_t& sol) incumbent_assignment.size(), sol.handle_ptr->get_stream()); - if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); } + if (context.settings.mip_scaling && context.scaling != nullptr) { + context.scaling->scale_solutions(incumbent_assignment); + } bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); if (!is_valid) { return; } cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), diff --git a/cpp/src/mip_heuristics/early_heuristic.cuh b/cpp/src/mip_heuristics/early_heuristic.cuh new file mode 100644 index 000000000..333fde195 --- /dev/null +++ b/cpp/src/mip_heuristics/early_heuristic.cuh @@ -0,0 +1,109 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +using early_incumbent_callback_t = + std::function& assignment)>; + +// CRTP base for early heuristics that run on the original (or papilo-presolved) problem +// during presolve to find incumbents as early as possible. +// Derived classes implement start() and stop(). +template +class early_heuristic_t { + public: + early_heuristic_t(const optimization_problem_t& op_problem, + const typename mip_solver_settings_t::tolerances_t& tolerances, + early_incumbent_callback_t incumbent_callback) + : incumbent_callback_(std::move(incumbent_callback)) + { + RAFT_CUDA_TRY(cudaGetDevice(&device_id_)); + + // Build and preprocess on the original handle, then copy onto our own handle + // so the derived solver can run on a dedicated stream (prevents graph capture conflicts). + problem_t temp_problem(op_problem, tolerances, false); + temp_problem.preprocess_problem(); + temp_problem.handle_ptr->sync_stream(); + problem_ptr_ = std::make_unique>(temp_problem, &handle_); + + solution_ptr_ = std::make_unique>(*problem_ptr_); + thrust::fill(handle_.get_thrust_policy(), + solution_ptr_->assignment.begin(), + solution_ptr_->assignment.end(), + f_t{0}); + solution_ptr_->clamp_within_bounds(); + } + + bool solution_found() const { return solution_found_; } + f_t get_best_objective() const { return best_objective_; } + void set_best_objective(f_t obj) { best_objective_ = obj; } + const std::vector& get_best_assignment() const { return best_assignment_; } + + protected: + ~early_heuristic_t() = default; + + // NOT thread-safe. solver_obj is in solver-space (always minimization). + // Uses a private CUDA stream to avoid racing with the FJ solver's stream. + void try_update_best(f_t solver_obj, const std::vector& assignment) + { + if (solver_obj >= best_objective_) { return; } + best_objective_ = solver_obj; + + RAFT_CUDA_TRY(cudaSetDevice(device_id_)); + auto stream = handle_.get_stream(); + rmm::device_uvector d_assignment(assignment.size(), stream); + raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream); + problem_ptr_->post_process_assignment(d_assignment, true, stream); + auto user_assignment = cuopt::host_copy(d_assignment, stream); + + best_assignment_ = user_assignment; + solution_found_ = true; + f_t user_obj = problem_ptr_->get_user_obj_from_solver_obj(solver_obj); + double elapsed = + std::chrono::duration(std::chrono::steady_clock::now() - start_time_).count(); + CUOPT_LOG_INFO("Early heuristics (%s) lowered the primal bound. Objective %g. Time %.2f", + Derived::name(), + user_obj, + elapsed); + if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); } + } + + int device_id_{0}; + + // handle_ must be declared before problem_ptr_/solution_ptr_ so it outlives them + // (C++ destroys members in reverse declaration order) + raft::handle_t handle_; + + std::unique_ptr> problem_ptr_; + std::unique_ptr> solution_ptr_; + + bool solution_found_{false}; + f_t best_objective_{std::numeric_limits::infinity()}; + std::vector best_assignment_; + + early_incumbent_callback_t incumbent_callback_; + std::chrono::steady_clock::time_point start_time_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu new file mode 100644 index 000000000..8109653e6 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cu @@ -0,0 +1,79 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include "early_cpufj.cuh" + +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +early_cpufj_t::early_cpufj_t( + const optimization_problem_t& op_problem, + const typename mip_solver_settings_t::tolerances_t& tolerances, + early_incumbent_callback_t incumbent_callback) + : early_heuristic_t>( + op_problem, tolerances, std::move(incumbent_callback)) +{ +} + +template +early_cpufj_t::~early_cpufj_t() +{ + stop(); +} + +template +void early_cpufj_t::start() +{ + if (cpu_fj_thread_) { return; } + + this->preemption_flag_.store(false); + this->start_time_ = std::chrono::steady_clock::now(); + + cpu_fj_thread_ = std::make_unique>(); + cpu_fj_thread_->fj_cpu = + init_fj_cpu_standalone(*this->problem_ptr_, *this->solution_ptr_, preemption_flag_); + cpu_fj_thread_->time_limit = std::numeric_limits::infinity(); + + cpu_fj_thread_->fj_cpu->log_prefix = "[Early CPUFJ] "; + + cpu_fj_thread_->fj_cpu->improvement_callback = + [this](f_t solver_obj, const std::vector& assignment, double) { + this->try_update_best(solver_obj, assignment); + }; + + cpu_fj_thread_->start_cpu_solver(); +} + +template +void early_cpufj_t::stop() +{ + if (!cpu_fj_thread_) { return; } + + preemption_flag_.store(true); + cpu_fj_thread_->stop_cpu_solver(); + cpu_fj_thread_->wait_for_cpu_solver(); + + CUOPT_LOG_DEBUG("[Early CPUFJ] Stopped after %d iterations, solution_found=%d", + cpu_fj_thread_->fj_cpu ? cpu_fj_thread_->fj_cpu->iterations : 0, + this->solution_found_); + + cpu_fj_thread_.reset(); +} + +#if MIP_INSTANTIATE_FLOAT +template class early_cpufj_t; +#endif + +#if MIP_INSTANTIATE_DOUBLE +template class early_cpufj_t; +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh new file mode 100644 index 000000000..911e84655 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_cpufj.cuh @@ -0,0 +1,39 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include +#include + +namespace cuopt::linear_programming::detail { + +template +struct cpu_fj_thread_t; + +template +class early_cpufj_t : public early_heuristic_t> { + public: + early_cpufj_t(const optimization_problem_t& op_problem, + const typename mip_solver_settings_t::tolerances_t& tolerances, + early_incumbent_callback_t incumbent_callback); + + ~early_cpufj_t(); + + static constexpr const char* name() { return "CPUFJ"; } + + void start(); + void stop(); + + private: + std::unique_ptr> cpu_fj_thread_; + std::atomic preemption_flag_{false}; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu new file mode 100644 index 000000000..55726421d --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -0,0 +1,93 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include "early_gpufj.cuh" + +#include +#include +#include +#include + +#include + +#include + +namespace cuopt::linear_programming::detail { + +template +early_gpufj_t::early_gpufj_t(const optimization_problem_t& op_problem, + const mip_solver_settings_t& settings, + early_incumbent_callback_t incumbent_callback) + : early_heuristic_t>( + op_problem, settings.get_tolerances(), std::move(incumbent_callback)) +{ + context_ptr_ = std::make_unique>( + &this->handle_, this->problem_ptr_.get(), settings, nullptr); +} + +template +early_gpufj_t::~early_gpufj_t() +{ + stop(); +} + +template +void early_gpufj_t::start() +{ + if (worker_thread_) { return; } + + this->start_time_ = std::chrono::steady_clock::now(); + + fj_settings_t fj_settings; + fj_settings.mode = fj_mode_t::EXIT_NON_IMPROVING; + fj_settings.n_of_minimums_for_exit = std::numeric_limits::max(); + fj_settings.time_limit = std::numeric_limits::infinity(); + fj_settings.iteration_limit = std::numeric_limits::max(); + fj_settings.update_weights = true; + fj_settings.feasibility_run = false; + + fj_ptr_ = std::make_unique>(*context_ptr_, fj_settings); + + fj_ptr_->improvement_callback = [this](f_t user_obj, const std::vector& h_assignment) { + f_t solver_obj = this->problem_ptr_->get_solver_obj_from_user_obj(user_obj); + this->try_update_best(solver_obj, h_assignment); + }; + + worker_thread_ = std::make_unique(&early_gpufj_t::run_worker, this); +} + +template +void early_gpufj_t::run_worker() +{ + RAFT_CUDA_TRY(cudaSetDevice(this->device_id_)); + fj_ptr_->solve(*this->solution_ptr_); +} + +template +void early_gpufj_t::stop() +{ + if (!worker_thread_) { return; } + + context_ptr_->preempt_heuristic_solver_.store(true); + + if (worker_thread_->joinable()) { worker_thread_->join(); } + + CUOPT_LOG_DEBUG("[Early GPU FJ] Stopped, solution_found=%d", this->solution_found_); + + fj_ptr_.reset(); + worker_thread_.reset(); +} + +#if MIP_INSTANTIATE_FLOAT +template class early_gpufj_t; +#endif + +#if MIP_INSTANTIATE_DOUBLE +template class early_gpufj_t; +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh new file mode 100644 index 000000000..4a7769143 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cuh @@ -0,0 +1,45 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include +#include + +namespace cuopt::linear_programming::detail { + +template +class fj_t; + +template +struct mip_solver_context_t; + +template +class early_gpufj_t : public early_heuristic_t> { + public: + early_gpufj_t(const optimization_problem_t& op_problem, + const mip_solver_settings_t& settings, + early_incumbent_callback_t incumbent_callback); + + ~early_gpufj_t(); + + static constexpr const char* name() { return "GPUFJ"; } + + void start(); + void stop(); + + private: + void run_worker(); + + std::unique_ptr> context_ptr_; + std::unique_ptr> fj_ptr_; + std::unique_ptr worker_thread_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index e9cf0760d..748dd41df 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -879,7 +879,7 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) // every now and then, ensure external solutions are added to the population // this is done here because FJ is called within FP and also after recombiners // so FJ is one of the most inner and most frequent functions to be called - if (steps % 10000 == 0) { + if (steps % 10000 == 0 && context.diversity_manager_ptr != nullptr) { context.diversity_manager_ptr->get_population_pointer() ->add_external_solutions_to_population(); } @@ -933,6 +933,22 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) bool is_feasible = solution.compute_feasibility(); solution.handle_ptr->sync_stream(); + // Invoke improvement callback if we have a better feasible solution + if (is_feasible && improvement_callback) { + f_t user_obj = solution.get_user_objective(); + if (solution.h_obj < last_reported_objective_) { + last_reported_objective_ = solution.h_obj; + // Copy assignment to host for callback + std::vector h_assignment(solution.assignment.size()); + raft::copy(h_assignment.data(), + solution.assignment.data(), + solution.assignment.size(), + climber_stream); + climber_stream.synchronize(); + improvement_callback(user_obj, h_assignment); + } + } + if (limit_reached) { break; } if (is_feasible) { @@ -1059,8 +1075,9 @@ i_t fj_t::solve(solution_t& solution) { raft::common::nvtx::range scope("fj_solve"); timer_t timer(settings.time_limit); - handle_ptr = const_cast(solution.handle_ptr); - pb_ptr = solution.problem_ptr; + handle_ptr = const_cast(solution.handle_ptr); + pb_ptr = solution.problem_ptr; + last_reported_objective_ = std::numeric_limits::infinity(); if (settings.mode != fj_mode_t::ROUNDING) { cuopt_func_call(solution.test_variable_bounds(true)); cuopt_assert(solution.test_number_all_integer(), "All integers must be rounded"); diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh index e9040a759..50b451a86 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh @@ -19,11 +19,17 @@ #include +#include + #define FJ_DEBUG_LOAD_BALANCING 0 #define FJ_SINGLE_STEP 0 namespace cuopt::linear_programming::detail { +template +using fj_improvement_callback_t = + std::function& assignment)>; + static constexpr int TPB_resetmoves = raft::WarpSize * 4; static constexpr int TPB_heavyvars = raft::WarpSize * 16; static constexpr int TPB_heavycstrs = raft::WarpSize * 4; @@ -628,6 +634,9 @@ class fj_t { std::vector> climbers; rmm::device_uvector climber_views; fj_settings_t settings; + + fj_improvement_callback_t improvement_callback; + f_t last_reported_objective_{std::numeric_limits::infinity()}; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index 4d567c9ec..af41545a9 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -790,7 +790,7 @@ static void apply_move(fj_cpu_climber_t& fj_cpu, if (fj_cpu.improvement_callback) { double current_work_units = fj_cpu.work_units_elapsed.load(std::memory_order_acquire); fj_cpu.improvement_callback( - fj_cpu.h_best_objective, fj_cpu.h_assignment, current_work_units); + fj_cpu.h_incumbent_objective, fj_cpu.h_assignment, current_work_units); } fj_cpu.feasible_found = true; } @@ -1402,10 +1402,8 @@ std::unique_ptr> fj_t::create_cpu_climber( } template -bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) +static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) { - raft::common::nvtx::range scope("fj_cpu"); - i_t local_mins = 0; auto loop_start = std::chrono::high_resolution_clock::now(); auto time_limit = std::chrono::milliseconds((int)(in_time_limit * 1000)); @@ -1507,8 +1505,7 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l if (fj_cpu.iterations % fj_cpu.log_interval == 0) { CUOPT_LOG_TRACE( "%sCPUFJ iteration: %d/%d, local mins: %d, best_objective: %g, viol: %zu, obj weight %g, " - "maxw " - "%g", + "maxw %g", fj_cpu.log_prefix.c_str(), fj_cpu.iterations, fj_cpu.settings.iteration_limit != std::numeric_limits::max() @@ -1537,15 +1534,10 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l if (fj_cpu.iterations % 100 == 0 && fj_cpu.iterations > 0) { // Collect memory statistics auto [loads, stores] = fj_cpu.memory_aggregator.collect(); - - double biased_work = (loads + stores) * fj_cpu.work_unit_bias / 1e10; + double biased_work = (loads + stores) * fj_cpu.work_unit_bias / 1e10; fj_cpu.work_units_elapsed += biased_work; if (fj_cpu.producer_sync != nullptr) { fj_cpu.producer_sync->notify_progress(); } - - CUOPT_LOG_TRACE("CPUFJ work units: %f incumbent %g", - fj_cpu.work_units_elapsed.load(std::memory_order_relaxed), - fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective)); } cuopt_func_call(sanity_checks(fj_cpu)); @@ -1569,6 +1561,13 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l return fj_cpu.feasible_found; } +template +bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) +{ + raft::common::nvtx::range scope("fj_cpu"); + return cpufj_solve_loop(fj_cpu, in_time_limit); +} + template cpu_fj_thread_t::~cpu_fj_thread_t() { @@ -1578,8 +1577,7 @@ cpu_fj_thread_t::~cpu_fj_thread_t() template void cpu_fj_thread_t::run_worker() { - bool solution_found = fj_ptr->cpu_solve(*fj_cpu, time_limit); - cpu_fj_solution_found = solution_found; + cpu_fj_solution_found = cpufj_solve_loop(*fj_cpu, time_limit); } template @@ -1601,14 +1599,43 @@ void cpu_fj_thread_t::stop_cpu_solver() fj_cpu->halted = true; } +template +std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings) +{ + raft::common::nvtx::range scope("init_fj_cpu_standalone"); + + auto fj_cpu = std::make_unique>(preemption_flag); + + std::vector default_weights(problem.n_constraints, 1.0); + init_fj_cpu(*fj_cpu, solution, default_weights, default_weights, 0.0); + fj_cpu->settings = settings; + fj_cpu->settings.seed = cuopt::seed_generator::get_seed(); + + return fj_cpu; +} + #if MIP_INSTANTIATE_FLOAT template class fj_t; template class cpu_fj_thread_t; +template std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings); #endif #if MIP_INSTANTIATE_DOUBLE template class fj_t; template class cpu_fj_thread_t; +template std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh index 7dcc8d39b..3263609a2 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh @@ -210,4 +210,13 @@ struct cpu_fj_thread_t : public cpu_worker_thread_base_t* fj_ptr{nullptr}; }; +// Standalone CPUFJ init for running without full fj_t infrastructure (avoids GPU allocations). +// Used for early CPUFJ during presolve. +template +std::unique_ptr> init_fj_cpu_standalone( + problem_t& problem, + solution_t& solution, + std::atomic& preemption_flag, + fj_settings_t settings = fj_settings_t{}); + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cu b/cpp/src/mip_heuristics/problem/presolve_data.cu index b11f7b108..1bc05a779 100644 --- a/cpp/src/mip_heuristics/problem/presolve_data.cu +++ b/cpp/src/mip_heuristics/problem/presolve_data.cu @@ -107,7 +107,8 @@ template void presolve_data_t::post_process_assignment( problem_t& problem, rmm::device_uvector& current_assignment, - bool resize_to_original_problem) + bool resize_to_original_problem, + rmm::cuda_stream_view stream) { raft::common::nvtx::range fun_scope("post_process_assignment"); cuopt_assert(current_assignment.size() == variable_mapping.size(), "size mismatch"); @@ -115,15 +116,15 @@ void presolve_data_t::post_process_assignment( auto fixed_assgn = make_span(fixed_var_assignment); auto var_map = make_span(variable_mapping); if (current_assignment.size() > 0) { - thrust::for_each(problem.handle_ptr->get_thrust_policy(), + thrust::for_each(rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(current_assignment.size()), [fixed_assgn, var_map, assgn] __device__(auto idx) { fixed_assgn[var_map[idx]] = assgn[idx]; }); } - expand_device_copy(current_assignment, fixed_var_assignment, problem.handle_ptr->get_stream()); - auto h_assignment = cuopt::host_copy(current_assignment, problem.handle_ptr->get_stream()); + expand_device_copy(current_assignment, fixed_var_assignment, stream); + auto h_assignment = cuopt::host_copy(current_assignment, stream); cuopt_assert(additional_var_id_per_var.size() == h_assignment.size(), "Size mismatch"); cuopt_assert(additional_var_used.size() == h_assignment.size(), "Size mismatch"); for (i_t i = 0; i < (i_t)h_assignment.size(); ++i) { @@ -148,14 +149,10 @@ void presolve_data_t::post_process_assignment( h_assignment[sub.substituted_var]); } - raft::copy(current_assignment.data(), - h_assignment.data(), - h_assignment.size(), - problem.handle_ptr->get_stream()); // this separate resizing is needed because of the callback + raft::copy(current_assignment.data(), h_assignment.data(), h_assignment.size(), stream); if (resize_to_original_problem) { - current_assignment.resize(problem.original_problem_ptr->get_n_variables(), - problem.handle_ptr->get_stream()); + current_assignment.resize(problem.original_problem_ptr->get_n_variables(), stream); } } diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cuh b/cpp/src/mip_heuristics/problem/presolve_data.cuh index 51b6bac95..cac3e7165 100644 --- a/cpp/src/mip_heuristics/problem/presolve_data.cuh +++ b/cpp/src/mip_heuristics/problem/presolve_data.cuh @@ -89,7 +89,15 @@ class presolve_data_t { bool pre_process_assignment(problem_t& problem, rmm::device_uvector& assignment); void post_process_assignment(problem_t& problem, rmm::device_uvector& current_assignment, - bool resize_to_original_problem = true); + bool resize_to_original_problem, + rmm::cuda_stream_view stream); + void post_process_assignment(problem_t& problem, + rmm::device_uvector& current_assignment, + bool resize_to_original_problem = true) + { + post_process_assignment( + problem, current_assignment, resize_to_original_problem, problem.handle_ptr->get_stream()); + } void post_process_solution(problem_t& problem, solution_t& solution); void set_papilo_presolve_data(const third_party_presolve_t* presolver_ptr, diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index d77e2e5f6..dde334476 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -2045,9 +2045,11 @@ bool problem_t::pre_process_assignment(rmm::device_uvector& assig template void problem_t::post_process_assignment(rmm::device_uvector& current_assignment, - bool resize_to_original_problem) + bool resize_to_original_problem, + rmm::cuda_stream_view stream) { - presolve_data.post_process_assignment(*this, current_assignment, resize_to_original_problem); + presolve_data.post_process_assignment( + *this, current_assignment, resize_to_original_problem, stream); } template diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index 6cd180a80..9876773ec 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -94,7 +94,14 @@ class problem_t { void preprocess_problem(); bool pre_process_assignment(rmm::device_uvector& assignment); void post_process_assignment(rmm::device_uvector& current_assignment, - bool resize_to_original_problem = true); + bool resize_to_original_problem, + rmm::cuda_stream_view stream); + void post_process_assignment(rmm::device_uvector& current_assignment, + bool resize_to_original_problem = true) + { + post_process_assignment( + current_assignment, resize_to_original_problem, handle_ptr->get_stream()); + } void post_process_solution(solution_t& solution); void set_papilo_presolve_data(const third_party_presolve_t* presolver_ptr, std::vector reduced_to_original, diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 44f0e7468..f7c3063c9 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -7,6 +7,8 @@ #include +#include +#include #include #include #include @@ -51,10 +53,30 @@ static void init_handler(const raft::handle_t* handle_ptr) handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); } +template +static void invoke_solution_callbacks( + const std::vector& mip_callbacks, + f_t objective, + std::vector& assignment, + f_t bound) +{ + std::vector obj_vec = {objective}; + std::vector bound_vec = {bound}; + for (auto callback : mip_callbacks) { + if (callback != nullptr && + callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { + auto get_sol_callback = static_cast(callback); + get_sol_callback->get_solution( + assignment.data(), obj_vec.data(), bound_vec.data(), get_sol_callback->get_user_data()); + } + } +} + template mip_solution_t run_mip(detail::problem_t& problem, mip_solver_settings_t const& settings, - timer_t& timer) + timer_t& timer, + f_t initial_cutoff = std::numeric_limits::infinity()) { raft::common::nvtx::range fun_scope("run_mip"); auto constexpr const running_mip = true; @@ -63,15 +85,6 @@ mip_solution_t run_mip(detail::problem_t& problem, auto hyper_params = settings.hyper_params; hyper_params.update_primal_weight_on_initial_solution = false; hyper_params.update_step_size_on_initial_solution = true; - if (settings.get_mip_callbacks().size() > 0) { - auto callback_num_variables = problem.original_problem_ptr->get_n_variables(); - if (problem.has_papilo_presolve_data()) { - callback_num_variables = problem.get_papilo_original_num_variables(); - } - for (auto callback : settings.get_mip_callbacks()) { - callback->template setup(callback_num_variables); - } - } // if the input problem is empty: early exit if (problem.empty) { detail::solution_t solution(problem); @@ -153,6 +166,7 @@ mip_solution_t run_mip(detail::problem_t& problem, detail::trivial_presolve(scaled_problem); detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + solver.context.initial_cutoff = initial_cutoff; if (timer.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached before main solve"); detail::solution_t sol(problem); @@ -160,9 +174,35 @@ mip_solution_t run_mip(detail::problem_t& problem, stats.total_solve_time = timer.elapsed_time(); return sol.get_solution(false, stats, false); } + + // Run early CPUFJ on papilo-presolved problem during cuOpt presolve (probing cache). + // Stopped by run_solver after presolve completes; its best objective feeds into initial_cutoff. + std::unique_ptr> early_cpufj; + bool run_early_cpufj = problem.has_papilo_presolve_data() && + settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && + problem.original_problem_ptr->get_n_integers() > 0; + if (run_early_cpufj) { + auto* presolver_ptr = problem.presolve_data.papilo_presolve_ptr; + auto mip_callbacks = settings.get_mip_callbacks(); + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20; + auto incumbent_callback = [presolver_ptr, mip_callbacks, no_bound]( + f_t solver_obj, f_t user_obj, const std::vector& assignment) { + std::vector user_assignment; + presolver_ptr->uncrush_primal_solution(assignment, user_assignment); + invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound); + }; + early_cpufj = std::make_unique>( + *problem.original_problem_ptr, settings.get_tolerances(), incumbent_callback); + early_cpufj->set_best_objective(initial_cutoff); + early_cpufj->start(); + solver.context.early_cpufj_ptr = early_cpufj.get(); + CUOPT_LOG_DEBUG("Started early CPUFJ on papilo-presolved problem during cuOpt presolve"); + } + auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; + if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } // at this point we need to compute the feasibility on the original problem not the presolved one bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); @@ -233,6 +273,10 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, op_problem.get_handle_ptr()->get_stream()); } + for (auto callback : settings.get_mip_callbacks()) { + callback->template setup(op_problem.get_n_variables()); + } + auto timer = timer_t(time_limit); double presolve_time = 0.0; @@ -258,6 +302,45 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, if (!run_presolve) { CUOPT_LOG_INFO("Presolve is disabled, skipping"); } + // Start early FJ (CPU and GPU) during presolve to find incumbents ASAP + // Only run if presolve is enabled (gives FJ time to find solutions) + // and we're not in deterministic mode + std::unique_ptr> early_cpufj; + std::unique_ptr> early_gpufj; + + // Track best incumbent found during presolve (shared across CPU and GPU FJ) + std::atomic early_best_objective{std::numeric_limits::infinity()}; + std::mutex early_callback_mutex; + + bool run_early_fj = run_presolve && settings.determinism_mode != CUOPT_MODE_DETERMINISTIC && + op_problem.get_n_integers() > 0 && op_problem.get_n_constraints() > 0; + f_t no_bound = problem.presolve_data.objective_scaling_factor >= 0 ? (f_t)-1e20 : (f_t)1e20; + if (run_early_fj) { + auto early_fj_callback = [&early_best_objective, + &early_callback_mutex, + mip_callbacks = settings.get_mip_callbacks(), + no_bound]( + f_t solver_obj, f_t user_obj, const std::vector& assignment) { + std::lock_guard lock(early_callback_mutex); + if (solver_obj >= early_best_objective.load()) { return; } + early_best_objective.store(solver_obj); + auto user_assignment = assignment; + invoke_solution_callbacks(mip_callbacks, user_obj, user_assignment, no_bound); + }; + + // Start early CPUFJ on original problem (will restart on presolved problem after Papilo) + early_cpufj = std::make_unique>( + op_problem, settings.get_tolerances(), early_fj_callback); + early_cpufj->start(); + CUOPT_LOG_INFO("Started early CPUFJ on original problem"); + + // Start early GPU FJ (uses GPU while CPU is busy with Papilo) + early_gpufj = + std::make_unique>(op_problem, settings, early_fj_callback); + early_gpufj->start(); + CUOPT_LOG_INFO("Started early GPUFJ during presolve"); + } + auto constexpr const dual_postsolve = false; if (run_presolve) { detail::sort_csr(op_problem); @@ -295,12 +378,32 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } CUOPT_LOG_INFO("Papilo presolve time: %.2f", presolve_time); } + + // Stop early GPU FJ now that Papilo presolve is complete + if (early_gpufj) { + early_gpufj->stop(); + if (early_gpufj->solution_found()) { + CUOPT_LOG_INFO("Early GPU FJ found incumbent with objective %.6e during presolve", + early_gpufj->get_best_objective()); + } + early_gpufj.reset(); // Free GPU memory + } + + if (early_cpufj && run_presolve && presolve_result.has_value()) { + early_cpufj->stop(); + if (early_cpufj->solution_found()) { + CUOPT_LOG_INFO("Early CPUFJ (original) found incumbent with objective %.6e", + early_cpufj->get_best_objective()); + } + early_cpufj.reset(); + } + if (settings.user_problem_file != "") { CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str()); op_problem.write_to_mps(settings.user_problem_file); } - auto sol = run_mip(problem, settings, timer); + auto sol = run_mip(problem, settings, timer, early_best_objective.load()); if (run_presolve) { auto status_to_skip = sol.get_termination_status() == mip_termination_status_t::TimeLimit || diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 235d4500d..789e71d2c 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -48,7 +49,7 @@ mip_solver_t::mip_solver_t(const problem_t& op_problem, context(op_problem.handle_ptr, const_cast*>(&op_problem), solver_settings, - scaling), + &scaling), timer_(timer) { init_handler(op_problem.handle_ptr); @@ -113,6 +114,17 @@ solution_t mip_solver_t::run_solver() ? std::numeric_limits::infinity() : timer_.remaining_time(); bool presolve_success = run_presolve ? dm.run_presolve(time_limit) : true; + + // Stop early CPUFJ after cuopt presolve (probing cache) but before main solve + if (context.early_cpufj_ptr) { + context.early_cpufj_ptr->stop(); + if (context.early_cpufj_ptr->solution_found()) { + f_t obj = context.early_cpufj_ptr->get_best_objective(); + context.initial_cutoff = std::min(context.initial_cutoff, obj); + CUOPT_LOG_INFO("Early CPUFJ found incumbent with objective %g during presolve", obj); + } + } + if (!presolve_success) { CUOPT_LOG_INFO("Problem proven infeasible in presolve"); solution_t sol(*context.problem_ptr); @@ -259,7 +271,14 @@ solution_t mip_solver_t::run_solver() branch_and_bound = std::make_unique>( branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start()); context.branch_and_bound_ptr = branch_and_bound.get(); - auto* stats_ptr = &context.stats; + + // Set cutoff from early FJ if available (initial_cutoff is in solver-space) + if (context.initial_cutoff < std::numeric_limits::infinity()) { + branch_and_bound->set_initial_cutoff(context.initial_cutoff); + CUOPT_LOG_INFO("B&B using initial cutoff %.6e from early heuristics", context.initial_cutoff); + } + + auto* stats_ptr = &context.stats; branch_and_bound->set_user_bound_callback( [stats_ptr](f_t user_bound) { stats_ptr->set_solution_bound(user_bound); }); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index baac1dd9d..d1be34ea6 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -28,6 +28,9 @@ namespace cuopt::linear_programming::detail { template class diversity_manager_t; +template +class early_cpufj_t; + // Aggregate structure containing the global context of the solving process for convenience: // The current problem, user settings, raft handle and statistics objects template @@ -35,7 +38,7 @@ struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, mip_solver_settings_t settings_, - pdlp_initial_scaling_strategy_t& scaling) + pdlp_initial_scaling_strategy_t* scaling) : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); @@ -53,7 +56,7 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - pdlp_initial_scaling_strategy_t& scaling; + pdlp_initial_scaling_strategy_t* scaling; // nullptr when not available (early FJ) solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) @@ -61,6 +64,9 @@ struct mip_solver_context_t { // synchronization every 5 seconds for deterministic mode work_unit_scheduler_t work_unit_scheduler_{5.0}; + + early_cpufj_t* early_cpufj_ptr{nullptr}; + f_t initial_cutoff{std::numeric_limits::infinity()}; }; } // namespace cuopt::linear_programming::detail diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd index 83b1e0054..adc566aa1 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd +++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd @@ -181,9 +181,9 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace cdef unique_ptr[solver_ret_t] call_solve( data_model_view_t[int, double]* data_model, solver_settings_t[int, double]* solver_settings, - ) except + + ) except + nogil cdef pair[vector[unique_ptr[solver_ret_t]], double] call_batch_solve( # noqa vector[data_model_view_t[int, double] *] data_models, solver_settings_t[int, double]* solver_settings, - ) except + + ) except + nogil diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx index 8c1c8fdc3..e7d700ea9 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx @@ -507,10 +507,13 @@ def Solve(py_data_model_obj, settings, mip=False): ) data_model_obj.set_data_model_view() - return create_solution(move(call_solve( - data_model_obj.c_data_model_view.get(), - unique_solver_settings.get(), - )), data_model_obj) + cdef unique_ptr[solver_ret_t] sol_ret_ptr + with nogil: + sol_ret_ptr = move(call_solve( + data_model_obj.c_data_model_view.get(), + unique_solver_settings.get(), + )) + return create_solution(move(sol_ret_ptr), data_model_obj) cdef set_and_insert_vector( @@ -535,9 +538,9 @@ def BatchSolve(py_data_model_list, settings): cdef pair[ vector[unique_ptr[solver_ret_t]], - double] batch_solve_result = ( - move(call_batch_solve(data_model_views, unique_solver_settings.get())) # noqa - ) + double] batch_solve_result + with nogil: + batch_solve_result = move(call_batch_solve(data_model_views, unique_solver_settings.get())) # noqa cdef vector[unique_ptr[solver_ret_t]] c_solutions = ( move(batch_solve_result.first)