From 7e0a4dc83585f1afeb1ca578b7512558f2a64f92 Mon Sep 17 00:00:00 2001
From: Joost Jager <joost.jager@gmail.com>
Date: Mon, 23 Feb 2026 12:07:41 +0100
Subject: [PATCH] Move ChannelMonitorUpdateStatus mode check from
 ChannelManager to ChainMonitor

When `ChannelMonitor::update_monitor` fails (e.g. a counterparty
`commitment_signed` arrives after funding was spent), `ChainMonitor`
persists the full monitor successfully but overrides the return value
to `InProgress`. If the user's `Persist` impl only returns `Completed`,
this causes `ChannelManager::handle_monitor_update_res` to panic on
the mode mismatch.

The root cause is that the mode check lived in `ChannelManager`, which
sees the *post-override* status. By moving the check to `ChainMonitor`
and running it against the *raw* `Persist` result (before the
`update_res.is_err()` override), we validate only the user's persister
behavior. ChainMonitor's internal override to `InProgress` for failed
monitor updates no longer triggers a false mode violation.

Also fix the `chanmon_consistency` fuzzer's `reload_node`, which
hardcoded the persister to `Completed` before calling `watch_channel`,
then switched to the node's actual mode afterward. With the mode check
now in `ChainMonitor`, this caused a false violation. Set the persister
to the correct mode before calling `watch_channel` so `ChainMonitor`
sees a consistent persist mode from the start.

Additionally, mirror pending monitor updates into
`TestChainMonitor.latest_monitors` after `watch_channel` returns
`InProgress`. The real `ChainMonitor` tracks the initial `update_id`
as pending, but `TestChainMonitor` was unaware of it since
`watch_channel` is called on the real `ChainMonitor` directly
(bypassing `TestChainMonitor`). Without this,
`complete_all_monitor_updates` could never drain and complete the
update, leaving reloaded node channels permanently stuck.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 fuzz/src/chanmon_consistency.rs     | 16 ++++++++++++--
 lightning/src/chain/chainmonitor.rs | 33 +++++++++++++++++++++++++++++
 lightning/src/ln/channelmanager.rs  | 25 +---------------------
 3 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs
index 21623fdba1e..539f6d7e7d1 100644
--- a/fuzz/src/chanmon_consistency.rs
+++ b/fuzz/src/chanmon_consistency.rs
@@ -1038,13 +1038,25 @@ pub fn do_test<Out: Output + MaybeSend + MaybeSync>(
 		let manager =
 			<(BlockHash, ChanMan)>::read(&mut &ser[..], read_args).expect("Failed to read manager");
 		let res = (manager.1, chain_monitor.clone());
+		let expected_status = *mon_style[node_id as usize].borrow();
+		*chain_monitor.persister.update_ret.lock().unwrap() = expected_status.clone();
 		for (channel_id, mon) in monitors.drain() {
+			let monitor_id = mon.get_latest_update_id();
 			assert_eq!(
 				chain_monitor.chain_monitor.watch_channel(channel_id, mon),
-				Ok(ChannelMonitorUpdateStatus::Completed)
+				Ok(expected_status.clone())
 			);
+			// When persistence returns InProgress, the real ChainMonitor tracks
+			// the initial update_id as pending. We must mirror this in the
+			// TestChainMonitor's latest_monitors so that
+			// complete_all_monitor_updates can drain and complete it later.
+			if expected_status == chain::ChannelMonitorUpdateStatus::InProgress {
+				let mut map = chain_monitor.latest_monitors.lock().unwrap();
+				if let Some(state) = map.get_mut(&channel_id) {
+					state.pending_monitors.push((monitor_id, state.persisted_monitor.clone()));
+				}
+			}
 		}
-		*chain_monitor.persister.update_ret.lock().unwrap() = *mon_style[node_id as usize].borrow();
 		res
 	};
 
diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs
index 7db1b697c2b..aec31cb3a47 100644
--- a/lightning/src/chain/chainmonitor.rs
+++ b/lightning/src/chain/chainmonitor.rs
@@ -369,6 +369,12 @@ pub struct ChainMonitor<
 	/// Messages to send to the peer. This is currently used to distribute PeerStorage to channel partners.
 	pending_send_only_events: Mutex<Vec<MessageSendEvent>>,
 
+	/// We only support using one of [`ChannelMonitorUpdateStatus::InProgress`] and
+	/// [`ChannelMonitorUpdateStatus::Completed`] without restarting. We enforce this in non-test
+	/// builds by storing which one is in use (0 = unset, 1 = InProgress, 2 = Completed).
+	#[cfg(not(any(test, feature = "_externalize_tests")))]
+	monitor_update_type: AtomicUsize,
+
 	#[cfg(peer_storage)]
 	our_peerstorage_encryption_key: PeerStorageKey,
 }
@@ -412,6 +418,8 @@ where
 			event_notifier: Arc::clone(&event_notifier),
 			persister: AsyncPersister { persister, event_notifier },
 			pending_send_only_events: Mutex::new(Vec::new()),
+			#[cfg(not(any(test, feature = "_externalize_tests")))]
+			monitor_update_type: AtomicUsize::new(0),
 			#[cfg(peer_storage)]
 			our_peerstorage_encryption_key: _our_peerstorage_encryption_key,
 		}
@@ -617,11 +625,32 @@ where
 			highest_chain_height: AtomicUsize::new(0),
 			event_notifier: Arc::new(Notifier::new()),
 			pending_send_only_events: Mutex::new(Vec::new()),
+			#[cfg(not(any(test, feature = "_externalize_tests")))]
+			monitor_update_type: AtomicUsize::new(0),
 			#[cfg(peer_storage)]
 			our_peerstorage_encryption_key: _our_peerstorage_encryption_key,
 		}
 	}
 
+	#[cfg(not(any(test, feature = "_externalize_tests")))]
+	fn check_monitor_update_type(
+		monitor_update_type: &AtomicUsize, persist_res: &ChannelMonitorUpdateStatus,
+	) {
+		match persist_res {
+			ChannelMonitorUpdateStatus::InProgress => {
+				if monitor_update_type.swap(1, Ordering::Relaxed) == 2 {
+					panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart");
+				}
+			},
+			ChannelMonitorUpdateStatus::Completed => {
+				if monitor_update_type.swap(2, Ordering::Relaxed) == 1 {
+					panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart");
+				}
+			},
+			ChannelMonitorUpdateStatus::UnrecoverableError => {},
+		}
+	}
+
 	/// Gets the balances in the contained [`ChannelMonitor`]s which are claimable on-chain or
 	/// claims which are awaiting confirmation.
 	///
@@ -1285,6 +1314,8 @@ where
 		let update_id = monitor.get_latest_update_id();
 		let mut pending_monitor_updates = Vec::new();
 		let persist_res = self.persister.persist_new_channel(monitor.persistence_key(), &monitor);
+		#[cfg(not(any(test, feature = "_externalize_tests")))]
+		Self::check_monitor_update_type(&self.monitor_update_type, &persist_res);
 		match persist_res {
 			ChannelMonitorUpdateStatus::InProgress => {
 				log_info!(logger, "Persistence of new ChannelMonitor in progress",);
@@ -1367,6 +1398,8 @@ where
 						monitor,
 					)
 				};
+				#[cfg(not(any(test, feature = "_externalize_tests")))]
+				Self::check_monitor_update_type(&self.monitor_update_type, &persist_res);
 				match persist_res {
 					ChannelMonitorUpdateStatus::InProgress => {
 						pending_monitor_updates.push(update_id);
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 08cbb6f6bf7..4b24dde0d26 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -2779,13 +2779,6 @@ pub struct ChannelManager<
 	#[cfg(any(test, feature = "_test_utils"))]
 	pub(super) per_peer_state: FairRwLock<HashMap<PublicKey, Mutex<PeerState<SP>>>>,
 
-	/// We only support using one of [`ChannelMonitorUpdateStatus::InProgress`] and
-	/// [`ChannelMonitorUpdateStatus::Completed`] without restarting. Because the API does not
-	/// otherwise directly enforce this, we enforce it in non-test builds here by storing which one
-	/// is in use.
-	#[cfg(not(any(test, feature = "_externalize_tests")))]
-	monitor_update_type: AtomicUsize,
-
 	/// The set of events which we need to give to the user to handle. In some cases an event may
 	/// require some further action after the user handles it (currently only blocking a monitor
 	/// update from being handed to the user to ensure the included changes to the channel state
@@ -3527,9 +3520,6 @@ impl<
 
 			per_peer_state: FairRwLock::new(new_hash_map()),
 
-			#[cfg(not(any(test, feature = "_externalize_tests")))]
-			monitor_update_type: AtomicUsize::new(0),
-
 			pending_events: Mutex::new(VecDeque::new()),
 			pending_events_processor: AtomicBool::new(false),
 			pending_htlc_forwards_processor: AtomicBool::new(false),
@@ -10006,23 +9996,13 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
 				panic!("{}", err_str);
 			},
 			ChannelMonitorUpdateStatus::InProgress => {
-				#[cfg(not(any(test, feature = "_externalize_tests")))]
-				if self.monitor_update_type.swap(1, Ordering::Relaxed) == 2 {
-					panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart");
-				}
 				log_debug!(
 					logger,
 					"ChannelMonitor update in flight, holding messages until the update completes.",
 				);
 				false
 			},
-			ChannelMonitorUpdateStatus::Completed => {
-				#[cfg(not(any(test, feature = "_externalize_tests")))]
-				if self.monitor_update_type.swap(2, Ordering::Relaxed) == 1 {
-					panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart");
-				}
-				true
-			},
+			ChannelMonitorUpdateStatus::Completed => true,
 		}
 	}
 
@@ -19550,9 +19530,6 @@ impl<
 
 			per_peer_state: FairRwLock::new(per_peer_state),
 
-			#[cfg(not(any(test, feature = "_externalize_tests")))]
-			monitor_update_type: AtomicUsize::new(0),
-
 			pending_events: Mutex::new(pending_events_read),
 			pending_events_processor: AtomicBool::new(false),
 			pending_htlc_forwards_processor: AtomicBool::new(false),