diff --git a/.gitignore b/.gitignore index 4fffb2f..4401d3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target /Cargo.lock +testbins/basic/target \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index d2867fe..1c27e11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,16 +15,23 @@ anyhow = "1.0.56" futures = "0.3" libc = "0.2.76" log = "0.4.17" +memfile = "0.3.1" nix = "0.25" -sd-notify = "0.3" +sd-notify = { version = "0.4.1", features = ["fdstore"] } serde = { version = "1", features = ["derive", "rc"] } serde_json = "1.0" thiserror = "1.0" tokio = { version = "1.24.1", features = ["full", "test-util"] } -tokio-stream = { version = "0.1", features = ["net", "io-util" ] } +tokio-stream = { version = "0.1", features = ["net", "io-util"] } tokio-util = { version = "0.7.4", features = ["compat", "time", "codec"] } [dev-dependencies] clap = { version = "4.1.8", features = ["derive"] } +command-group = "5.0" env_logger = "0.10.0" rand = { version = "0.8", features = ["small_rng"] } +sendfd = "0.4.3" +test-binary = "3.0" + +[patch.crates-io] +sd-notify = { git = "https://github.com/cbranch/sd-notify.git", rev = "b80e6eb18cb64a18015f59dcf182284ffe96fdf8" } diff --git a/README.md b/README.md index 09a307c..10d98cb 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,30 @@ The main struct of interest is `RestartConfig` which has methods for detecting o restart. For shutting down a restarted process, the `ShutdownCoordinator` provides the means for both signalling a shutdown event to spawned tasks, and awaiting their completion. +## Using shellflip with systemd + +If you are running your process as a systemd service, you may use shellflip to fork a new instance +of the process. This allows you to upgrade the binary or apply new configuration, but you cannot +change the environment or any systemd-set configuration without stopping the service completely. + +If this is insufficient for your needs, shellflip has an alternative runtime model that works with +systemd's process management model. When issuing a `systemctl restart` or using the restart +coordination socket, shellflip can pass data intended for the new process to systemd, then receive +that data in the newly started instance. This requires systemd's file descriptor store to be +enabled; an example `.service` file can be found in the `examples/` directory, or you may test it as +a one-off command using `systemd-run`: + +`systemd-run -p FileDescriptorStoreMax=4096 target/debug/examples/restarter --systemd` + +The limitations of systemd's process management remain; the old process must terminate before the +new process can start, so all tasks must end promptly and any child processes must terminate or +accept being ungracefully killed when the parent terminates. + +If you need to prevent restarting the service if the service cannot successfully serialize its +state, use ExecReload and the restart coordination socket like the non-systemd-aware shellflip mode. +Make sure that systemd restarts your service on successful exit and/or force your service to +terminate with a non-zero error code on restart. + ## License BSD licensed. See the [LICENSE](LICENSE) file for details. diff --git a/examples/restarter.rs b/examples/restarter.rs index 9f7cc05..6c02a64 100644 --- a/examples/restarter.rs +++ b/examples/restarter.rs @@ -40,7 +40,7 @@ struct AppData { #[async_trait] impl LifecycleHandler for AppData { - async fn send_to_new_process(&mut self, mut write_pipe: PipeWriter) -> std::io::Result<()> { + async fn send_to_new_process(&mut self, mut write_pipe: PipeWriter<'_>) -> std::io::Result<()> { if self.restart_generation > 4 { log::info!("Four restarts is more than anybody needs, surely?"); return Err(std::io::Error::new( @@ -57,21 +57,11 @@ impl LifecycleHandler for AppData { async fn main() -> Result<(), Error> { env_logger::init(); let args = Args::parse(); - let mut app_data = AppData { - restart_generation: 0, - }; - - if let Some(mut handover_pipe) = receive_from_old_process() { - app_data.restart_generation = handover_pipe.read_u32().await? + 1; - } - - let restart_generation = app_data.restart_generation; // Configure the essential requirements for implementing graceful restart. let restart_conf = RestartConfig { enabled: true, coordination_socket_path: args.socket.into(), - lifecycle_handler: Box::new(app_data), ..Default::default() }; @@ -94,8 +84,18 @@ async fn main() -> Result<(), Error> { None => {} } + let mut app_data = AppData { + restart_generation: 0, + }; + + if let Some(mut handover_pipe) = receive_from_old_process() { + app_data.restart_generation = handover_pipe.read_u32().await? + 1; + } + + let restart_generation = app_data.restart_generation; + // Start the restart thread and get a task that will complete when a restart completes. - let restart_task = restart_conf.try_into_restart_task()?; + let restart_task = restart_conf.try_into_restart_task(app_data)?; // (need to pin this because of the loop below!) pin!(restart_task); // Create a shutdown coordinator so that we can wait for all client connections to complete. diff --git a/src/fdstore.rs b/src/fdstore.rs new file mode 100644 index 0000000..7146fa5 --- /dev/null +++ b/src/fdstore.rs @@ -0,0 +1,22 @@ +use memfile::MemFile; +use std::io; +use std::pin::Pin; +use tokio::fs::File; + +const SHELLFLIP_FD_PREFIX: &str = "sf_"; +const SYSTEMD_MEMFD_NAME: &str = "mem_handover"; + +pub(crate) fn create_handover_memfd() -> io::Result>> { + let memfd = MemFile::create_default("shellflip_restart")?; + + sd_notify::notify_with_fds( + false, + &[ + sd_notify::NotifyState::FdStore, + sd_notify::NotifyState::FdName(&format!("{SHELLFLIP_FD_PREFIX}{SYSTEMD_MEMFD_NAME}")), + ], + &[memfd.as_fd()], + )?; + + Ok(Box::pin(File::from_std(memfd.into_file()))) +} diff --git a/src/lib.rs b/src/lib.rs index 7146b8d..bba1f5e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,7 @@ //! `LifecycleHandler::new_process_failed` is called and you can undo any changes you made in //! preparation for handover. If the new process succeeds, however, the restart task will resolve //! and you may terminate the process as usual. +pub mod fdstore; pub mod lifecycle; mod pipes; pub mod restart_coordination_socket; @@ -67,6 +68,7 @@ use std::os::fd::{AsFd, AsRawFd, BorrowedFd, OwnedFd, RawFd}; use std::os::unix::net::UnixListener as StdUnixListener; use std::os::unix::process::CommandExt; use std::path::{Path, PathBuf}; +use std::pin::pin; use std::process; use std::thread; use thiserror::Error; @@ -93,19 +95,33 @@ pub struct RestartConfig { pub coordination_socket_path: PathBuf, /// Sets environment variables on the newly-started process pub environment: Vec<(OsString, OsString)>, - /// Receive fine-grained events on the lifecycle of the new process and support data transfer. - pub lifecycle_handler: Box, /// Exits early when child process fail to start pub exit_on_error: bool, } impl RestartConfig { /// Prepare the current process to handle restarts, if enabled. - pub fn try_into_restart_task( + pub fn try_into_restart_task( self, + lifecycle_handler: L, ) -> io::Result<(impl Future> + Send)> { fixup_systemd_env(); - spawn_restart_task(self) + spawn_restart_task(self, Box::new(lifecycle_handler)) + } + + /// Prepare the current process to handle restarts through systemd and its file descriptor store. + /// + /// When SIGUSR1 is received, instead of forking like `try_into_restart_task`, the process shall store state in the + /// file descriptor store before terminating. + /// + /// Proper use of this option requires a different restart strategy for your service; instead of letting tasks + /// run to completion, the shutdown signal indicates that you need to suspend task state. + pub fn try_into_systemd_restart_task( + self, + lifecycle_handler: L, + ) -> io::Result<(impl Future> + Send)> { + fixup_systemd_env(); + spawn_systemd_restart_task(self, Box::new(lifecycle_handler)) } /// Request an already-running service to restart. @@ -137,7 +153,6 @@ impl Default for RestartConfig { enabled: false, coordination_socket_path: Default::default(), environment: vec![], - lifecycle_handler: Box::new(lifecycle::NullLifecycleHandler), exit_on_error: true, } } @@ -197,6 +212,11 @@ impl RestartResponder { } } } + + /// True if responding to a client, false if restart was triggered by signal. + fn client_initiated(&self) -> bool { + self.rpc.is_some() + } } /// Spawns a thread that can be used to restart the process. @@ -205,6 +225,7 @@ impl RestartResponder { /// The child spawner thread needs to be created before seccomp locks down fork/exec. pub fn spawn_restart_task( settings: RestartConfig, + lifecycle_handler: Box, ) -> io::Result> + Send> { let socket = match settings.enabled { true => Some(settings.coordination_socket_path.as_ref()), @@ -213,8 +234,7 @@ pub fn spawn_restart_task( let mut signal_stream = signal(SignalKind::user_defined1())?; let (restart_fd, mut socket_stream) = new_restart_coordination_socket_stream(socket)?; - let mut child_spawner = - ChildSpawner::new(restart_fd, settings.environment, settings.lifecycle_handler); + let mut child_spawner = ChildSpawner::new(restart_fd, settings.environment, lifecycle_handler); Ok(async move { startup_complete()?; @@ -255,6 +275,66 @@ pub fn spawn_restart_task( }) } +/// Spawns a thread that handles state serialization during systemd restart. +/// Returns a future that resolves when a restart succeeds. +pub fn spawn_systemd_restart_task( + settings: RestartConfig, + mut lifecycle_handler: Box, +) -> io::Result> + Send> { + let socket = match settings.enabled { + true => Some(settings.coordination_socket_path.as_ref()), + false => None, + }; + + let mut signal_stream = signal(SignalKind::user_defined1())?; + // No child process, so drop the spare inheritable socket. + let (_, mut socket_stream) = new_restart_coordination_socket_stream(socket)?; + + // Ensure that we can store at least the serialized state into systemd. Without this, no graceful restart can happen! + let mut memfd = match fdstore::create_handover_memfd() { + Ok(memfd) => memfd, + Err(e) => { + log::error!( + "Failed to write memfd to systemd file descriptor store: {}", + e + ); + return Err(e); + } + }; + + Ok(async move { + startup_complete()?; + loop { + let responder = next_restart_request(&mut signal_stream, &mut socket_stream).await?; + log::debug!("Received restart signal, serializing state"); + + if !responder.client_initiated() { + if let Err(e) = sd_notify::notify(false, &[sd_notify::NotifyState::Stopping]) { + log::error!("Failed to notify systemd: {}", e); + } + } + + let res = lifecycle_handler.send_to_new_process(memfd.as_mut()).await; + + let retry_on_err = responder.client_initiated(); + + responder + .respond(res.as_ref().map(|_| 0).map_err(|e| e.to_string())) + .await; + + if res.is_err() && retry_on_err { + // If a call to `send_to_new_process` fails, this will clean up + // the memfd to avoid invalid data read on restart. + memfd.set_len(0).await.inspect_err(|e| { + log::error!("Failed to truncate handover memfd: {}", e); + })?; + } else { + return res.map_err(|e| e.into()); + } + } + }) +} + /// Handles forking a new client in a more privileged thread. struct ChildSpawner { signal_sender: Sender<()>, @@ -461,9 +541,8 @@ async fn send_parent_state( notif_w: CompletionSender, handover_w: StdFile, ) -> io::Result<()> { - lifecycle_handler - .send_to_new_process(Box::pin(File::from(handover_w))) - .await?; + let handover_w = pin!(File::from(handover_w)); + lifecycle_handler.send_to_new_process(handover_w).await?; // only the child needs the write end drop(notif_w); diff --git a/src/lifecycle.rs b/src/lifecycle.rs index 79f011e..0f3294d 100644 --- a/src/lifecycle.rs +++ b/src/lifecycle.rs @@ -8,14 +8,14 @@ use tokio::fs::File; use tokio::io::{AsyncRead, AsyncWrite}; pub type PipeReader = Pin>; -pub type PipeWriter = Pin>; +pub type PipeWriter<'a> = Pin<&'a mut (dyn AsyncWrite + Send)>; #[async_trait] pub trait LifecycleHandler: Send { /// Called after the child process has been spawned, allowing the current process to send state /// to the child process. The child process can receive this data by calling /// `receive_from_old_process`. - async fn send_to_new_process(&mut self, _write_pipe: PipeWriter) -> io::Result<()> { + async fn send_to_new_process(&mut self, _write_pipe: PipeWriter<'_>) -> io::Result<()> { Ok(()) } diff --git a/testbins/basic/Cargo.lock b/testbins/basic/Cargo.lock new file mode 100644 index 0000000..94ffb7c --- /dev/null +++ b/testbins/basic/Cargo.lock @@ -0,0 +1,762 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "basic" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "clap", + "futures", + "shellflip", + "tokio", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bytes" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gimli" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "libc" +version = "0.2.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memfile" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64636fdb65a5f0740f920c4281f3dbb76a71e25e25914b6d27000739897d40e" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi", + "libc", + "wasi", + "windows-sys", +] + +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset", + "pin-utils", +] + +[[package]] +name = "object" +version = "0.36.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" +dependencies = [ + "memchr", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sd-notify" +version = "0.4.1" +source = "git+https://github.com/cbranch/sd-notify.git?rev=b80e6eb18cb64a18015f59dcf182284ffe96fdf8#b80e6eb18cb64a18015f59dcf182284ffe96fdf8" +dependencies = [ + "sendfd", +] + +[[package]] +name = "sendfd" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604b71b8fc267e13bb3023a2c901126c8f349393666a6d98ac1ae5729b701798" +dependencies = [ + "libc", +] + +[[package]] +name = "serde" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.128" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shellflip" +version = "2.1.0" +dependencies = [ + "anyhow", + "async-trait", + "futures", + "libc", + "log", + "memfile", + "nix", + "sd-notify", + "serde", + "serde_json", + "thiserror", + "tokio", + "tokio-stream", + "tokio-util", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio" +version = "1.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-io", + "futures-sink", + "pin-project-lite", + "slab", + "tokio", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/testbins/basic/Cargo.toml b/testbins/basic/Cargo.toml new file mode 100644 index 0000000..ffc681a --- /dev/null +++ b/testbins/basic/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "basic" +version = "0.1.0" +edition = "2021" +description = "Basic test binary" + +[workspace] + +[dependencies] +shellflip = { path = "../.." } +async-trait = "0.1.61" +anyhow = "1.0.56" +futures = "0.3" +tokio = { version = "1.24.1", features = ["full", "test-util"] } +clap = { version = "4.1.8", features = ["derive"] } + +[patch.crates-io] +sd-notify = { git = "https://github.com/cbranch/sd-notify.git", rev = "b80e6eb18cb64a18015f59dcf182284ffe96fdf8" } diff --git a/testbins/basic/src/main.rs b/testbins/basic/src/main.rs new file mode 100644 index 0000000..8ac8d42 --- /dev/null +++ b/testbins/basic/src/main.rs @@ -0,0 +1,118 @@ +//! Integration test binary for verifying process lifecycle. +use anyhow::{bail, Error}; +use async_trait::async_trait; +use clap::{Parser, Subcommand}; +use futures::future::{Either, TryFutureExt}; +use shellflip::lifecycle::*; +use shellflip::RestartConfig; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; + +const SERIALIZED_MAGIC_NUMBER: u32 = 0xCAFEF00D; + +/// Simple program to test graceful shutdown and restart +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Args { + #[command(subcommand)] + command: Option, + /// Restart coordination socket path + #[arg(short, long)] + socket: Option, + /// Use systemd restart lifecycle + #[arg(long)] + systemd: bool, +} + +#[derive(Subcommand)] +enum Commands { + /// Trigger restart + Restart, +} + +struct AppData { + restart_generation: u32, +} + +#[async_trait] +impl LifecycleHandler for AppData { + async fn send_to_new_process(&mut self, mut write_pipe: PipeWriter<'_>) -> std::io::Result<()> { + // Magic number + write_pipe.write_u32(SERIALIZED_MAGIC_NUMBER).await?; + + if self.restart_generation > 4 { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "The operation completed successfully", + )); + } + write_pipe.write_u32(self.restart_generation).await?; + Ok(()) + } +} + +/// Restart an already-running process +async fn do_restart(restart_conf: RestartConfig) -> Result<(), Error> { + match restart_conf.request_restart().await { + Ok(_) => { + println!("Restart request succeeded"); + Ok(()) + } + Err(e) => { + println!("Restart request failed: {}", e); + Err(e) + } + } +} + +/// Standard operating mode +async fn do_main(restart_conf: RestartConfig, systemd: bool) -> Result<(), Error> { + let mut app_data = AppData { + restart_generation: 0, + }; + + if let Some(mut handover_pipe) = receive_from_old_process() { + if handover_pipe.read_u32().await? != SERIALIZED_MAGIC_NUMBER { + bail!("Expected serialized data to begin with the magic number"); + } + + app_data.restart_generation = handover_pipe.read_u32().await? + 1; + } + + let generation = app_data.restart_generation; + // Start the restart thread and get a task that will complete when a restart completes. + let restart_task = if systemd { + Either::Left(restart_conf.try_into_systemd_restart_task(app_data)?) + } else { + Either::Right(restart_conf.try_into_restart_task(app_data)?.map_ok(|_| ())) + }; + // Bind a TCP listener socket to give us something to do + println!("Started with generation {}", generation); + + match restart_task.await { + Ok(_) => { + println!("Restart successful"); + Ok(()) + } + Err(e) => { + println!("Restart task failed: {}", e); + Err(e) + } + } +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + let args = Args::parse(); + + // Configure the essential requirements for implementing graceful restart. + let restart_conf = RestartConfig { + enabled: true, + coordination_socket_path: args.socket.unwrap_or_default().into(), + ..Default::default() + }; + + match args.command { + Some(Commands::Restart) => do_restart(restart_conf).await, + None => do_main(restart_conf, args.systemd).await, + } +} diff --git a/tests/basic.rs b/tests/basic.rs new file mode 100644 index 0000000..fa6b9f5 --- /dev/null +++ b/tests/basic.rs @@ -0,0 +1,164 @@ +use command_group::{CommandGroup, Signal, UnixChildExt}; +use sendfd::RecvWithFd; +use std::fs::File; +use std::io::{BufRead, BufReader, Read, Seek}; +use std::os::fd::FromRawFd; +use std::os::unix::net::UnixDatagram; +use std::process::{Command, Stdio}; +use test_binary::build_test_binary_once; + +build_test_binary_once!(basic, "testbins"); + +#[track_caller] +fn assert_line(stdout: &mut BufReader, expected: &str) { + let mut line = String::new(); + stdout.read_line(&mut line).unwrap(); + assert_eq!(line, expected); +} + +#[test] +fn test_restart_on_signal() { + let mut test_process = Command::new(path_to_basic()) + .stdout(Stdio::piped()) + .group_spawn() + .expect("error running test binary"); + + let mut stdout = BufReader::new(test_process.inner().stdout.take().unwrap()); + assert_line(&mut stdout, "Started with generation 0\n"); + + // Try reloading the process every time. + for i in 1..6 { + test_process.signal(Signal::SIGUSR1).unwrap(); + // This is output from the child process + assert_line(&mut stdout, &format!("Started with generation {}\n", i)); + // This is output from the parent process + assert_line(&mut stdout, "Restart successful\n"); + + // The parent terminates immediately after the child starts successfully + assert!(test_process + .wait() + .expect("error waiting for test binary") + .success()); + } + + // The 6th restart fails; we should see this on stdout + test_process.signal(Signal::SIGUSR1).unwrap(); + // This is output from the child process + assert_line( + &mut stdout, + "Restart task failed: Restart failed: The operation completed successfully\n", + ); + + test_process.kill().unwrap(); +} + +#[test] +fn test_restart_coordination_socket() { + let sock_args = ["--socket", "/tmp/restarter.sock"]; + + let mut test_process = Command::new(path_to_basic()) + .args(sock_args) + .stdout(Stdio::piped()) + .group_spawn() + .expect("error running test binary"); + + let mut stdout = BufReader::new(test_process.inner().stdout.take().unwrap()); + assert_line(&mut stdout, "Started with generation 0\n"); + + for i in 1..6 { + let test_restart_output = Command::new(path_to_basic()) + .args(sock_args) + .arg("restart") + .output() + .expect("error running test binary"); + + assert!(test_restart_output.status.success()); + assert_eq!( + std::str::from_utf8(&test_restart_output.stdout).unwrap(), + "Restart request succeeded\n" + ); + + // This is output from the child process + assert_line(&mut stdout, &format!("Started with generation {}\n", i)); + // This is output from the parent process + assert_line(&mut stdout, "Restart successful\n"); + + // The parent terminates immediately after the child starts successfully + assert!(test_process + .wait() + .expect("error waiting for test binary") + .success()); + } + + // The 6th restart fails; this should be propagated to the restart invoker + let test_restart_output = Command::new(path_to_basic()) + .args(sock_args) + .arg("restart") + .output() + .expect("error running test binary"); + + assert!(!test_restart_output.status.success()); + assert_eq!( + std::str::from_utf8(&test_restart_output.stdout).unwrap(), + "Restart request failed: Child failed to start: The operation completed successfully\n" + ); + + test_process.kill().unwrap(); +} + +#[test] +fn test_systemd() { + let notify_socket_path = "/tmp/restart_notify.sock"; + let _ = std::fs::remove_file(notify_socket_path); + let notify_socket = UnixDatagram::bind(notify_socket_path).unwrap(); + notify_socket + .set_read_timeout(Some(std::time::Duration::from_secs(1))) + .unwrap(); + + let mut test_process = Command::new(path_to_basic()) + .stdout(Stdio::piped()) + .env("NOTIFY_SOCKET", notify_socket_path) + .arg("--systemd") + .group_spawn() + .expect("error running test binary"); + + let mut stdout = BufReader::new(test_process.inner().stdout.take().unwrap()); + assert_line(&mut stdout, "Started with generation 0\n"); + + let recv_notify_messages = || { + let mut buf = [0u8; 1024]; + let mut fds = [0i32; 32]; + let (n, fd_count) = notify_socket.recv_with_fd(&mut buf, &mut fds).unwrap(); + let messages = std::str::from_utf8(&buf[..n]).unwrap(); + ( + messages.split('\n').map(String::from).collect::>(), + fds[..fd_count].to_vec(), + ) + }; + + let (message_fdstore, stored_fds) = recv_notify_messages(); + assert_eq!(message_fdstore[0], "FDSTORE=1"); + assert_eq!(message_fdstore[1], "FDNAME=sf_mem_handover"); + assert_eq!(message_fdstore[2], ""); + assert_eq!(stored_fds.len(), 1); + assert_eq!(recv_notify_messages().0[0], "READY=1"); + + let mut memfd = unsafe { File::from_raw_fd(stored_fds[0]) }; + assert_eq!(memfd.metadata().unwrap().len(), 0); + + // Trigger restart in the process (i.e. a shutdown) and see what it wrote to the memfd + test_process.signal(Signal::SIGUSR1).unwrap(); + assert!(test_process + .wait() + .expect("error waiting for test binary") + .success()); + + let mut serialized = vec![0u8; 0]; + memfd.rewind().unwrap(); + memfd.read_to_end(&mut serialized).unwrap(); + // We always write a magic number to the beginning of the data + assert_eq!( + u32::from_be_bytes(serialized[0..4].try_into().unwrap()), + 0xCAFEF00D + ); +}