forked from newrelic/k8s-agents-operator
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: NR-121867 Supervisor traits (newrelic#24)
* feat: NR-121867 Supervisor traits, infra supervisor * refactor: supervise just one process, wait and kill inside run * Add Graceful shutdown to supervisor (newrelic#26) * chore: cleanup * test: kill process with the terminator * test: scope process termination to unix * feat: return another type for run test: move to integration * test: deactivate miri * chore: clippy suggestions, typos * feat: default impl for SupervisorContext * chore: cleanup run function and supervisorrunner * refactor: use from attribute to provide impls * test: remove supervisor mod test (now integration) * refactor: unify types, go full typestate * feat: define stop and Supervisor state * refactor: make ProcessRunner.process private * docs: remove unneeded, change docs for stop method --------- Co-authored-by: Alvaro Cabanas <acabanas@newrelic.com> Co-authored-by: Roger Coll <rogercoll@protonmail.com>
- Loading branch information
1 parent
4e4743d
commit 245ee7e
Showing
14 changed files
with
324 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
use std::sync::{Arc, Condvar, Mutex, MutexGuard, PoisonError}; | ||
|
||
#[derive(Debug, Clone, Default)] | ||
pub struct SupervisorContext(Arc<(Mutex<bool>, Condvar)>); | ||
|
||
impl SupervisorContext { | ||
pub fn new() -> Self { | ||
Self::default() | ||
} | ||
|
||
/// Sets the cancellation signal. All threads that are waiting for this signal (i.e. were passed this [`SupervisorContext`] are notified so they unblock and finish execution, cancelling the processes. | ||
pub fn cancel_all(&self) -> Result<(), PoisonError<MutexGuard<'_, bool>>> /* this is the error type returned by a failed `lock()` */ | ||
{ | ||
let (lck, cvar) = &*self.0; | ||
*lck.lock()? = true; | ||
cvar.notify_all(); | ||
Ok(()) | ||
} | ||
|
||
pub(crate) fn get_lock_cvar(&self) -> &(Mutex<bool>, Condvar) { | ||
&self.0 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
use std::{fmt::Debug, process::ExitStatus}; | ||
use thiserror::Error; | ||
|
||
#[derive(Error, Debug)] | ||
pub enum ProcessError { | ||
#[error("process exited with error: `{0}`")] | ||
ProcessExited(ExitStatus), | ||
|
||
#[error("io error")] | ||
IOError(#[from] std::io::Error), | ||
|
||
#[cfg(target_family = "unix")] | ||
#[error("system error")] | ||
NixError(#[from] nix::Error), | ||
} | ||
|
||
impl From<ExitStatus> for ProcessError { | ||
fn from(value: ExitStatus) -> Self { | ||
ProcessError::ProcessExited(value) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
pub mod context; | ||
mod error; | ||
pub mod runner; | ||
|
||
/// The Runner trait defines the entry-point interface for a supervisor. Exposes a run method that will start the supervised process' execution. | ||
pub trait Runner { | ||
type E: std::error::Error + Send + Sync; | ||
type H: Handle; | ||
|
||
/// The run method will execute a supervisor (non-blocking). Returns a [`Handle`] to manage the running process. | ||
fn run(self) -> Self::H; | ||
} | ||
|
||
/// The Handle trait defines the interface for a supervised process' handle. Exposes a stop method that will cancel the supervised process' execution. | ||
pub trait Handle { | ||
type E: std::error::Error + Send + Sync; | ||
type S: Send + Sync; | ||
|
||
/// Cancels the supervised process and returns its inner handle. | ||
fn stop(self) -> Self::S; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
use std::{ | ||
ops::Deref, | ||
sync::mpsc::Sender, | ||
sync::{Arc, Condvar, Mutex}, | ||
thread::{self, JoinHandle}, | ||
}; | ||
|
||
use crate::command::{ | ||
stream::OutputEvent, wait_exit_timeout_default, CommandExecutor, CommandHandle, | ||
CommandTerminator, OutputStreamer, ProcessRunner, ProcessTerminator, | ||
}; | ||
|
||
use super::{context::SupervisorContext, error::ProcessError, Handle, Runner}; | ||
|
||
use log::error; | ||
|
||
pub struct Stopped { | ||
bin: String, | ||
args: Vec<String>, | ||
ctx: SupervisorContext, | ||
snd: Sender<OutputEvent>, | ||
} | ||
pub struct Running { | ||
handle: JoinHandle<()>, | ||
ctx: SupervisorContext, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct SupervisorRunner<State = Stopped> { | ||
state: State, | ||
} | ||
|
||
impl<T> Deref for SupervisorRunner<T> { | ||
type Target = T; | ||
fn deref(&self) -> &Self::Target { | ||
&self.state | ||
} | ||
} | ||
|
||
impl Runner for SupervisorRunner<Stopped> { | ||
type E = ProcessError; | ||
type H = SupervisorRunner<Running>; | ||
|
||
fn run(self) -> Self::H { | ||
let ctx = self.ctx.clone(); | ||
SupervisorRunner { | ||
state: Running { | ||
handle: run_process_thread(self), | ||
ctx, | ||
}, | ||
} | ||
} | ||
} | ||
|
||
impl From<&SupervisorRunner<Stopped>> for ProcessRunner { | ||
fn from(value: &SupervisorRunner<Stopped>) -> Self { | ||
ProcessRunner::new(&value.bin, &value.args) | ||
} | ||
} | ||
|
||
fn run_process_thread(runner: SupervisorRunner<Stopped>) -> JoinHandle<()> { | ||
thread::spawn({ | ||
move || loop { | ||
let proc_runner = ProcessRunner::from(&runner); | ||
|
||
// Actually run the process | ||
let started = match proc_runner.start() { | ||
Ok(s) => s, | ||
Err(e) => { | ||
error!("Failed to start a supervised process: {}", e); | ||
continue; | ||
} | ||
}; | ||
|
||
// Stream the output | ||
let streaming = match started.stream(runner.snd.clone()) { | ||
Ok(s) => s, | ||
Err(e) => { | ||
error!("Failed to stream the output of a supervised process: {}", e); | ||
continue; | ||
} | ||
}; | ||
|
||
_ = wait_for_termination(streaming.get_pid(), runner.ctx.clone()); | ||
_ = streaming.wait().unwrap(); | ||
|
||
let (lck, _) = SupervisorContext::get_lock_cvar(&runner.ctx); | ||
let val = lck.lock().unwrap(); | ||
if *val { | ||
break; | ||
} | ||
} | ||
}) | ||
} | ||
|
||
/// Blocks on the [`SupervisorContext`], [`ctx`]. When the termination signal is activated, this will send a shutdown signal to the process being supervised (the one whose PID was passed as [`pid`]). | ||
fn wait_for_termination(pid: u32, ctx: SupervisorContext) -> JoinHandle<()> { | ||
thread::spawn(move || { | ||
let (lck, cvar) = SupervisorContext::get_lock_cvar(&ctx); | ||
_ = cvar.wait_while(lck.lock().unwrap(), |finish| !*finish); | ||
|
||
thread::spawn(move || { | ||
let shutdown_ctx = Arc::new((Mutex::new(false), Condvar::new())); | ||
_ = ProcessTerminator::new(pid).shutdown(|| wait_exit_timeout_default(shutdown_ctx)); | ||
}); | ||
}) | ||
} | ||
|
||
impl Handle for SupervisorRunner<Running> { | ||
type E = ProcessError; | ||
type S = JoinHandle<()>; | ||
|
||
fn stop(self) -> Self::S { | ||
// Stop all the supervisors | ||
// TODO: handle PoisonErrors (log?) | ||
self.ctx.cancel_all().unwrap(); | ||
self.state.handle | ||
} | ||
} | ||
|
||
impl SupervisorRunner<Stopped> { | ||
pub fn new( | ||
bin: String, | ||
args: Vec<String>, | ||
ctx: SupervisorContext, | ||
snd: Sender<OutputEvent>, | ||
) -> Self { | ||
SupervisorRunner { | ||
state: Stopped { | ||
bin, | ||
args, | ||
ctx, | ||
snd, | ||
}, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.