Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
49030ad
squash changes
aliceb-nv Feb 23, 2026
5881d35
fix style
aliceb-nv Feb 23, 2026
8a3684c
ai review comments
aliceb-nv Feb 23, 2026
e019653
Merge branch 'main' into pre-presolve-heuristics
aliceb-nv Feb 23, 2026
9c89092
review comments, fix crash
aliceb-nv Feb 23, 2026
d5f7dde
Merge branch 'pre-presolve-heuristics' of https://github.com/aliceb-n…
aliceb-nv Feb 23, 2026
62a53ef
Merge branch 'main' into pre-presolve-heuristics
aliceb-nv Feb 23, 2026
a4a195f
add GIL locks to handle worker threads invoking python callbacks
aliceb-nv Feb 23, 2026
ca34f77
Merge branch 'pre-presolve-heuristics' of https://github.com/aliceb-n…
aliceb-nv Feb 23, 2026
f349b5b
fix missing nogil
aliceb-nv Feb 23, 2026
506d046
fix python callbacks
aliceb-nv Feb 24, 2026
33230b6
fix attempt
aliceb-nv Feb 24, 2026
71e8c9a
operate on a problem copy for early GPUFJ
aliceb-nv Feb 24, 2026
9d7055e
fix reported bound in user callback
aliceb-nv Feb 24, 2026
7b9451b
fix build
aliceb-nv Feb 24, 2026
d0c5a42
fix thrust build + more timer checks
aliceb-nv Feb 24, 2026
d559b34
Merge branch 'main' into fix-thrust-build
aliceb-nv Feb 24, 2026
c5d0bd5
Merge branch 'fix-thrust-build' into pre-presolve-heuristics
aliceb-nv Feb 24, 2026
67240f5
review comment
aliceb-nv Feb 24, 2026
a15424f
fix thrust solve
aliceb-nv Feb 24, 2026
dee5d4d
Merge branch 'fix-thrust-build' into pre-presolve-heuristics
aliceb-nv Feb 24, 2026
7b3d40c
fix handle being destroyed before other gpu structures in early heuri…
aliceb-nv Feb 25, 2026
a5122ad
Merge branch 'main' into pre-presolve-heuristics
aliceb-nv Feb 25, 2026
68e07bb
fix build
aliceb-nv Feb 25, 2026
352cd15
fix cudaSetDevice bug
aliceb-nv Feb 25, 2026
e4b8166
fix empty tests
aliceb-nv Feb 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ class default_get_solution_callback_t : public get_solution_callback_t {
void* solution_bound,
void* user_data) override
{
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
PyObject* numpy_array = get_numpy_array(objective_value, 1);
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
PyGILState_STATE gstate = PyGILState_Ensure();
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
PyObject* numpy_array = get_numpy_array(objective_value, 1);
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
"get_solution",
"(OOOO)",
numpy_matrix,
Expand All @@ -47,6 +48,7 @@ class default_get_solution_callback_t : public get_solution_callback_t {
Py_DECREF(numpy_array);
Py_DECREF(numpy_bound);
if (res != nullptr) { Py_DECREF(res); }
PyGILState_Release(gstate);
}

PyObject* pyCallbackClass;
Expand All @@ -69,11 +71,12 @@ class default_set_solution_callback_t : public set_solution_callback_t {
void* solution_bound,
void* user_data) override
{
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
PyObject* numpy_array = get_numpy_array(objective_value, 1);
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
PyGILState_STATE gstate = PyGILState_Ensure();
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
PyObject* numpy_array = get_numpy_array(objective_value, 1);
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
"set_solution",
"(OOOO)",
numpy_matrix,
Expand All @@ -84,6 +87,7 @@ class default_set_solution_callback_t : public set_solution_callback_t {
Py_DECREF(numpy_array);
Py_DECREF(numpy_bound);
if (res != nullptr) { Py_DECREF(res); }
PyGILState_Release(gstate);
}

PyObject* pyCallbackClass;
Expand Down
35 changes: 18 additions & 17 deletions cpp/src/branch_and_bound/branch_and_bound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ struct nondeterministic_policy_t : tree_update_policy_t<i_t, f_t> {
{
}

f_t upper_bound() const override { return bnb.upper_bound_.load(); }
f_t upper_bound() const override { return bnb.get_cutoff(); }

void update_pseudo_costs(mip_node_t<i_t, f_t>* node, f_t leaf_obj) override
{
Expand Down Expand Up @@ -1316,10 +1316,11 @@ dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(

simplex_solver_settings_t lp_settings = settings_;
lp_settings.set_log(false);
f_t cutoff = get_cutoff();
if (original_lp_.objective_is_integral) {
lp_settings.cut_off = std::ceil(upper_bound_ - settings_.integer_tol) + settings_.dual_tol;
lp_settings.cut_off = std::ceil(cutoff - settings_.integer_tol) + settings_.dual_tol;
} else {
lp_settings.cut_off = upper_bound_ + settings_.dual_tol;
lp_settings.cut_off = cutoff + settings_.dual_tol;
}
lp_settings.inside_mip = 2;
lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time);
Expand Down Expand Up @@ -1426,7 +1427,7 @@ void branch_and_bound_t<i_t, f_t>::plunge_with(branch_and_bound_worker_t<i_t, f_
// - The lower bound of the parent is lower or equal to its children
worker->lower_bound = lower_bound;

if (lower_bound > upper_bound) {
if (lower_bound > get_cutoff()) {
search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound);
search_tree_.update(node_ptr, node_status_t::FATHOMED);
worker->recompute_basis = true;
Expand Down Expand Up @@ -1536,7 +1537,7 @@ void branch_and_bound_t<i_t, f_t>::dive_with(branch_and_bound_worker_t<i_t, f_t>
f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound);
worker->lower_bound = lower_bound;

if (node_ptr->lower_bound > upper_bound) {
if (node_ptr->lower_bound > get_cutoff()) {
worker->recompute_basis = true;
worker->recompute_bounds = true;
continue;
Expand Down Expand Up @@ -1675,7 +1676,7 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();

if (!start_node.has_value()) { continue; }
if (upper_bound_ < start_node.value()->lower_bound) {
if (get_cutoff() < start_node.value()->lower_bound) {
// This node was put on the heap earlier but its lower bound is now greater than the
// current upper bound
search_tree_.graphviz_node(
Expand All @@ -1699,7 +1700,7 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_diving();

if (!start_node.has_value()) { continue; }
if (upper_bound_ < start_node.value()->lower_bound ||
if (get_cutoff() < start_node.value()->lower_bound ||
start_node.value()->depth < diving_settings.min_node_depth) {
continue;
}
Expand Down Expand Up @@ -1767,7 +1768,7 @@ void branch_and_bound_t<i_t, f_t>::single_threaded_solve()
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();

if (!start_node.has_value()) { continue; }
if (upper_bound_ < start_node.value()->lower_bound) {
if (get_cutoff() < start_node.value()->lower_bound) {
// This node was put on the heap earlier but its lower bound is now greater than the
// current upper bound
search_tree_.graphviz_node(
Expand Down Expand Up @@ -2189,12 +2190,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
return mip_status_t::NUMERICAL;
}

if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) {
if (settings_.reduced_cost_strengthening >= 1 && get_cutoff() < last_upper_bound) {
mutex_upper_.lock();
last_upper_bound = upper_bound_.load();
last_upper_bound = get_cutoff();
std::vector<f_t> lower_bounds;
std::vector<f_t> upper_bounds;
find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds);
mutex_upper_.unlock();
mutex_original_lp_.lock();
original_lp_.lower = lower_bounds;
Expand Down Expand Up @@ -2372,10 +2373,10 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
return solver_status_;
}

if (settings_.reduced_cost_strengthening >= 2 && upper_bound_.load() < last_upper_bound) {
if (settings_.reduced_cost_strengthening >= 2 && get_cutoff() < last_upper_bound) {
std::vector<f_t> lower_bounds;
std::vector<f_t> upper_bounds;
i_t num_fixed = find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
i_t num_fixed = find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds);
if (num_fixed > 0) {
std::vector<bool> bounds_changed(original_lp_.num_cols, true);
std::vector<char> row_sense;
Expand Down Expand Up @@ -2479,7 +2480,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();

if (!start_node.has_value()) { continue; }
if (upper_bound_ < start_node.value()->lower_bound) {
if (get_cutoff() < start_node.value()->lower_bound) {
// This node was put on the heap earlier but its lower bound is now greater than the
// current upper bound
search_tree_.graphviz_node(
Expand Down Expand Up @@ -3321,7 +3322,7 @@ void branch_and_bound_t<i_t, f_t>::deterministic_sort_replay_events(
template <typename i_t, typename f_t>
void branch_and_bound_t<i_t, f_t>::deterministic_prune_worker_nodes_vs_incumbent()
{
f_t upper_bound = upper_bound_.load();
f_t upper_bound = get_cutoff();

for (auto& worker : *deterministic_workers_) {
// Check nodes in plunge stack - filter in place
Expand Down Expand Up @@ -3457,14 +3458,14 @@ void branch_and_bound_t<i_t, f_t>::deterministic_populate_diving_heap()
const int num_diving = deterministic_diving_workers_->size();
constexpr int target_nodes_per_worker = 10;
const int target_total = num_diving * target_nodes_per_worker;
f_t upper_bound = upper_bound_.load();
f_t cutoff = get_cutoff();

// Collect candidate nodes from BFS worker backlog heaps
std::vector<std::pair<mip_node_t<i_t, f_t>*, f_t>> candidates;

for (auto& worker : *deterministic_workers_) {
for (auto* node : worker.backlog.data()) {
if (node->lower_bound < upper_bound) {
if (node->lower_bound < cutoff) {
f_t score = node->objective_estimate;
if (score >= inf) { score = node->lower_bound; }
candidates.push_back({node, score});
Expand Down
13 changes: 12 additions & 1 deletion cpp/src/branch_and_bound/branch_and_bound.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ class branch_and_bound_t {

bool stop_for_time_limit(mip_solution_t<i_t, f_t>& solution);

// Set a cutoff bound from an external source (e.g., early FJ during presolve).
// Used for node pruning and reduced cost strengthening but NOT for gap computation.
// Unlike upper_bound_, this does not imply a verified incumbent solution exists.
void set_initial_cutoff(f_t bound) { initial_cutoff_ = bound; }

// Effective cutoff for node pruning: min of verified incumbent and external cutoff.
f_t get_cutoff() const { return std::min(upper_bound_.load(), initial_cutoff_); }

// Repair a low-quality solution from the heuristics.
bool repair_solution(const std::vector<f_t>& leaf_edge_norms,
const std::vector<f_t>& potential_solution,
Expand Down Expand Up @@ -169,9 +177,12 @@ class branch_and_bound_t {
// Mutex for upper bound
omp_mutex_t mutex_upper_;

// Global variable for upper bound
// Verified incumbent bound (only set when B&B has an actual integer-feasible solution).
omp_atomic_t<f_t> upper_bound_;

// External cutoff from early heuristics (for pruning only, no verified solution).
f_t initial_cutoff_{std::numeric_limits<f_t>::infinity()};

// Global variable for incumbent. The incumbent should be updated with the upper bound
mip_solution_t<i_t, f_t> incumbent_;

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/mip_heuristics/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ set(MIP_NON_LP_FILES
${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu)
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_cpufj.cu
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_gpufj.cu)

# Choose which files to include based on build mode
if(BUILD_LP_ONLY)
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/mip_heuristics/diversity/population.cu
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,9 @@ void population_t<i_t, f_t>::invoke_get_solution_callback(
f_t user_bound = context.stats.get_solution_bound();
solution_t<i_t, f_t> temp_sol(sol);
problem_ptr->post_process_assignment(temp_sol.assignment);
if (context.settings.mip_scaling) {
if (context.settings.mip_scaling && context.scaling != nullptr) {
rmm::device_uvector<f_t> dummy(0, temp_sol.handle_ptr->get_stream());
context.scaling.unscale_solutions(temp_sol.assignment, dummy);
context.scaling->unscale_solutions(temp_sol.assignment, dummy);
}
if (problem_ptr->has_papilo_presolve_data()) {
problem_ptr->papilo_uncrush_assignment(temp_sol.assignment);
Expand Down Expand Up @@ -346,7 +346,9 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
incumbent_assignment.size(),
sol.handle_ptr->get_stream());

if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); }
if (context.settings.mip_scaling && context.scaling != nullptr) {
context.scaling->scale_solutions(incumbent_assignment);
}
bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment);
if (!is_valid) { return; }
cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(),
Expand Down
109 changes: 109 additions & 0 deletions cpp/src/mip_heuristics/early_heuristic.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */

#pragma once

#include <mip_heuristics/problem/problem.cuh>
#include <mip_heuristics/solution/solution.cuh>

#include <cuopt/linear_programming/mip/solver_settings.hpp>

#include <utilities/logger.hpp>

#include <thrust/fill.h>

#include <chrono>
#include <functional>
#include <limits>
#include <vector>

namespace cuopt::linear_programming::detail {

template <typename f_t>
using early_incumbent_callback_t =
std::function<void(f_t solver_obj, f_t user_obj, const std::vector<f_t>& assignment)>;

// CRTP base for early heuristics that run on the original (or papilo-presolved) problem
// during presolve to find incumbents as early as possible.
// Derived classes implement start() and stop().
template <typename i_t, typename f_t, typename Derived>
class early_heuristic_t {
public:
early_heuristic_t(const optimization_problem_t<i_t, f_t>& op_problem,
const typename mip_solver_settings_t<i_t, f_t>::tolerances_t& tolerances,
early_incumbent_callback_t<f_t> incumbent_callback)
: incumbent_callback_(std::move(incumbent_callback))
{
RAFT_CUDA_TRY(cudaGetDevice(&device_id_));

// Build and preprocess on the original handle, then copy onto our own handle
// so the derived solver can run on a dedicated stream (prevents graph capture conflicts).
problem_t<i_t, f_t> temp_problem(op_problem, tolerances, false);
temp_problem.preprocess_problem();
temp_problem.handle_ptr->sync_stream();
problem_ptr_ = std::make_unique<problem_t<i_t, f_t>>(temp_problem, &handle_);

solution_ptr_ = std::make_unique<solution_t<i_t, f_t>>(*problem_ptr_);
thrust::fill(handle_.get_thrust_policy(),
solution_ptr_->assignment.begin(),
solution_ptr_->assignment.end(),
f_t{0});
solution_ptr_->clamp_within_bounds();
}

bool solution_found() const { return solution_found_; }
f_t get_best_objective() const { return best_objective_; }
void set_best_objective(f_t obj) { best_objective_ = obj; }
const std::vector<f_t>& get_best_assignment() const { return best_assignment_; }

protected:
~early_heuristic_t() = default;

// NOT thread-safe. solver_obj is in solver-space (always minimization).
// Uses a private CUDA stream to avoid racing with the FJ solver's stream.
void try_update_best(f_t solver_obj, const std::vector<f_t>& assignment)
{
if (solver_obj >= best_objective_) { return; }
best_objective_ = solver_obj;

RAFT_CUDA_TRY(cudaSetDevice(device_id_));
auto stream = handle_.get_stream();
rmm::device_uvector<f_t> d_assignment(assignment.size(), stream);
raft::copy(d_assignment.data(), assignment.data(), assignment.size(), stream);
problem_ptr_->post_process_assignment(d_assignment, true, stream);
auto user_assignment = cuopt::host_copy(d_assignment, stream);

best_assignment_ = user_assignment;
solution_found_ = true;
f_t user_obj = problem_ptr_->get_user_obj_from_solver_obj(solver_obj);
double elapsed =
std::chrono::duration<double>(std::chrono::steady_clock::now() - start_time_).count();
CUOPT_LOG_INFO("Early heuristics (%s) lowered the primal bound. Objective %g. Time %.2f",
Derived::name(),
user_obj,
elapsed);
if (incumbent_callback_) { incumbent_callback_(solver_obj, user_obj, user_assignment); }
}

int device_id_{0};

// handle_ must be declared before problem_ptr_/solution_ptr_ so it outlives them
// (C++ destroys members in reverse declaration order)
raft::handle_t handle_;

std::unique_ptr<problem_t<i_t, f_t>> problem_ptr_;
std::unique_ptr<solution_t<i_t, f_t>> solution_ptr_;

bool solution_found_{false};
f_t best_objective_{std::numeric_limits<f_t>::infinity()};
std::vector<f_t> best_assignment_;

early_incumbent_callback_t<f_t> incumbent_callback_;
std::chrono::steady_clock::time_point start_time_;
};

} // namespace cuopt::linear_programming::detail
Loading