From 7e0a4dc83585f1afeb1ca578b7512558f2a64f92 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Mon, 23 Feb 2026 12:07:41 +0100 Subject: [PATCH] Move ChannelMonitorUpdateStatus mode check from ChannelManager to ChainMonitor When `ChannelMonitor::update_monitor` fails (e.g. a counterparty `commitment_signed` arrives after funding was spent), `ChainMonitor` persists the full monitor successfully but overrides the return value to `InProgress`. If the user's `Persist` impl only returns `Completed`, this causes `ChannelManager::handle_monitor_update_res` to panic on the mode mismatch. The root cause is that the mode check lived in `ChannelManager`, which sees the *post-override* status. By moving the check to `ChainMonitor` and running it against the *raw* `Persist` result (before the `update_res.is_err()` override), we validate only the user's persister behavior. ChainMonitor's internal override to `InProgress` for failed monitor updates no longer triggers a false mode violation. Also fix the `chanmon_consistency` fuzzer's `reload_node`, which hardcoded the persister to `Completed` before calling `watch_channel`, then switched to the node's actual mode afterward. With the mode check now in `ChainMonitor`, this caused a false violation. Set the persister to the correct mode before calling `watch_channel` so `ChainMonitor` sees a consistent persist mode from the start. Additionally, mirror pending monitor updates into `TestChainMonitor.latest_monitors` after `watch_channel` returns `InProgress`. The real `ChainMonitor` tracks the initial `update_id` as pending, but `TestChainMonitor` was unaware of it since `watch_channel` is called on the real `ChainMonitor` directly (bypassing `TestChainMonitor`). Without this, `complete_all_monitor_updates` could never drain and complete the update, leaving reloaded node channels permanently stuck. Co-Authored-By: Claude Opus 4.6 --- fuzz/src/chanmon_consistency.rs | 16 ++++++++++++-- lightning/src/chain/chainmonitor.rs | 33 +++++++++++++++++++++++++++++ lightning/src/ln/channelmanager.rs | 25 +--------------------- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs index 21623fdba1e..539f6d7e7d1 100644 --- a/fuzz/src/chanmon_consistency.rs +++ b/fuzz/src/chanmon_consistency.rs @@ -1038,13 +1038,25 @@ pub fn do_test( let manager = <(BlockHash, ChanMan)>::read(&mut &ser[..], read_args).expect("Failed to read manager"); let res = (manager.1, chain_monitor.clone()); + let expected_status = *mon_style[node_id as usize].borrow(); + *chain_monitor.persister.update_ret.lock().unwrap() = expected_status.clone(); for (channel_id, mon) in monitors.drain() { + let monitor_id = mon.get_latest_update_id(); assert_eq!( chain_monitor.chain_monitor.watch_channel(channel_id, mon), - Ok(ChannelMonitorUpdateStatus::Completed) + Ok(expected_status.clone()) ); + // When persistence returns InProgress, the real ChainMonitor tracks + // the initial update_id as pending. We must mirror this in the + // TestChainMonitor's latest_monitors so that + // complete_all_monitor_updates can drain and complete it later. + if expected_status == chain::ChannelMonitorUpdateStatus::InProgress { + let mut map = chain_monitor.latest_monitors.lock().unwrap(); + if let Some(state) = map.get_mut(&channel_id) { + state.pending_monitors.push((monitor_id, state.persisted_monitor.clone())); + } + } } - *chain_monitor.persister.update_ret.lock().unwrap() = *mon_style[node_id as usize].borrow(); res }; diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 7db1b697c2b..aec31cb3a47 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -369,6 +369,12 @@ pub struct ChainMonitor< /// Messages to send to the peer. This is currently used to distribute PeerStorage to channel partners. pending_send_only_events: Mutex>, + /// We only support using one of [`ChannelMonitorUpdateStatus::InProgress`] and + /// [`ChannelMonitorUpdateStatus::Completed`] without restarting. We enforce this in non-test + /// builds by storing which one is in use (0 = unset, 1 = InProgress, 2 = Completed). + #[cfg(not(any(test, feature = "_externalize_tests")))] + monitor_update_type: AtomicUsize, + #[cfg(peer_storage)] our_peerstorage_encryption_key: PeerStorageKey, } @@ -412,6 +418,8 @@ where event_notifier: Arc::clone(&event_notifier), persister: AsyncPersister { persister, event_notifier }, pending_send_only_events: Mutex::new(Vec::new()), + #[cfg(not(any(test, feature = "_externalize_tests")))] + monitor_update_type: AtomicUsize::new(0), #[cfg(peer_storage)] our_peerstorage_encryption_key: _our_peerstorage_encryption_key, } @@ -617,11 +625,32 @@ where highest_chain_height: AtomicUsize::new(0), event_notifier: Arc::new(Notifier::new()), pending_send_only_events: Mutex::new(Vec::new()), + #[cfg(not(any(test, feature = "_externalize_tests")))] + monitor_update_type: AtomicUsize::new(0), #[cfg(peer_storage)] our_peerstorage_encryption_key: _our_peerstorage_encryption_key, } } + #[cfg(not(any(test, feature = "_externalize_tests")))] + fn check_monitor_update_type( + monitor_update_type: &AtomicUsize, persist_res: &ChannelMonitorUpdateStatus, + ) { + match persist_res { + ChannelMonitorUpdateStatus::InProgress => { + if monitor_update_type.swap(1, Ordering::Relaxed) == 2 { + panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart"); + } + }, + ChannelMonitorUpdateStatus::Completed => { + if monitor_update_type.swap(2, Ordering::Relaxed) == 1 { + panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart"); + } + }, + ChannelMonitorUpdateStatus::UnrecoverableError => {}, + } + } + /// Gets the balances in the contained [`ChannelMonitor`]s which are claimable on-chain or /// claims which are awaiting confirmation. /// @@ -1285,6 +1314,8 @@ where let update_id = monitor.get_latest_update_id(); let mut pending_monitor_updates = Vec::new(); let persist_res = self.persister.persist_new_channel(monitor.persistence_key(), &monitor); + #[cfg(not(any(test, feature = "_externalize_tests")))] + Self::check_monitor_update_type(&self.monitor_update_type, &persist_res); match persist_res { ChannelMonitorUpdateStatus::InProgress => { log_info!(logger, "Persistence of new ChannelMonitor in progress",); @@ -1367,6 +1398,8 @@ where monitor, ) }; + #[cfg(not(any(test, feature = "_externalize_tests")))] + Self::check_monitor_update_type(&self.monitor_update_type, &persist_res); match persist_res { ChannelMonitorUpdateStatus::InProgress => { pending_monitor_updates.push(update_id); diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 08cbb6f6bf7..4b24dde0d26 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -2779,13 +2779,6 @@ pub struct ChannelManager< #[cfg(any(test, feature = "_test_utils"))] pub(super) per_peer_state: FairRwLock>>>, - /// We only support using one of [`ChannelMonitorUpdateStatus::InProgress`] and - /// [`ChannelMonitorUpdateStatus::Completed`] without restarting. Because the API does not - /// otherwise directly enforce this, we enforce it in non-test builds here by storing which one - /// is in use. - #[cfg(not(any(test, feature = "_externalize_tests")))] - monitor_update_type: AtomicUsize, - /// The set of events which we need to give to the user to handle. In some cases an event may /// require some further action after the user handles it (currently only blocking a monitor /// update from being handed to the user to ensure the included changes to the channel state @@ -3527,9 +3520,6 @@ impl< per_peer_state: FairRwLock::new(new_hash_map()), - #[cfg(not(any(test, feature = "_externalize_tests")))] - monitor_update_type: AtomicUsize::new(0), - pending_events: Mutex::new(VecDeque::new()), pending_events_processor: AtomicBool::new(false), pending_htlc_forwards_processor: AtomicBool::new(false), @@ -10006,23 +9996,13 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ panic!("{}", err_str); }, ChannelMonitorUpdateStatus::InProgress => { - #[cfg(not(any(test, feature = "_externalize_tests")))] - if self.monitor_update_type.swap(1, Ordering::Relaxed) == 2 { - panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart"); - } log_debug!( logger, "ChannelMonitor update in flight, holding messages until the update completes.", ); false }, - ChannelMonitorUpdateStatus::Completed => { - #[cfg(not(any(test, feature = "_externalize_tests")))] - if self.monitor_update_type.swap(2, Ordering::Relaxed) == 1 { - panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart"); - } - true - }, + ChannelMonitorUpdateStatus::Completed => true, } } @@ -19550,9 +19530,6 @@ impl< per_peer_state: FairRwLock::new(per_peer_state), - #[cfg(not(any(test, feature = "_externalize_tests")))] - monitor_update_type: AtomicUsize::new(0), - pending_events: Mutex::new(pending_events_read), pending_events_processor: AtomicBool::new(false), pending_htlc_forwards_processor: AtomicBool::new(false),