diff options
author | Lars Wirzenius <liw@liw.fi> | 2024-01-16 14:34:00 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2024-01-16 19:19:50 +0200 |
commit | 52ba6a9081ee0b6e80a4db7a9548fe255d51e431 (patch) | |
tree | 5c72851c7c11ae0f21014a2f281b25d2e0819598 | |
parent | e786def6cd7b40af8a395016fcba6bd1c2e2833f (diff) | |
download | radicle-native-ci-52ba6a9081ee0b6e80a4db7a9548fe255d51e431.tar.gz |
refactor: add new engine to execute a CI run
In the old engine it's hard to follow how various errors are handled.
This new engine should make it clearer.
This isn't entirely a pure refactoring. It subtly changed how the
external behavior is, to fix problems.
Signed-off-by: Lars Wirzenius <liw@liw.fi>
-rw-r--r-- | src/config.rs | 5 | ||||
-rw-r--r-- | src/engine.rs | 238 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/run.rs | 250 | ||||
-rw-r--r-- | src/runinfo.rs | 9 | ||||
-rw-r--r-- | src/runlog.rs | 34 | ||||
-rwxr-xr-x | test-suite | 42 |
7 files changed, 546 insertions, 34 deletions
diff --git a/src/config.rs b/src/config.rs index 5da4db4..56f0a83 100644 --- a/src/config.rs +++ b/src/config.rs @@ -45,6 +45,11 @@ impl Config { } Ok(config) } + + /// Return timeout or default. + pub fn timeout(&self) -> usize { + self.timeout.unwrap_or(DEFAULT_TIMEOUT) + } } #[derive(Debug, thiserror::Error)] diff --git a/src/engine.rs b/src/engine.rs new file mode 100644 index 0000000..9531ae3 --- /dev/null +++ b/src/engine.rs @@ -0,0 +1,238 @@ +use std::path::PathBuf; + +use uuid::Uuid; + +use radicle::prelude::Profile; +use radicle_ci_broker::msg::{Id, Oid, Request, RunId, RunResult}; + +use crate::{ + config::{Config, ConfigError}, + logfile::{AdminLog, LogError}, + msg::{ + read_request, write_errored, write_failed, write_succeeded, write_triggered, + NativeMessageError, + }, + report, + run::{Run, RunError}, + runcmd::RunCmdError, + runinfo::{RunInfo, RunInfoBuilder, RunInfoError}, + runlog::RunLogError, + runspec::RunSpecError, +}; + +/// Run CI for a project once. +#[derive(Debug)] +pub struct Engine { + config: Config, + adminlog: AdminLog, + run_info_builder: RunInfoBuilder, +} + +impl Engine { + /// Create a new engine and prepare for actually running CI. This + /// may fail, for various reasons, such as the configuration file + /// not existing. The caller should handle the error in some + /// suitable way, such as report that to its stderr, and exit + /// non-zero. + pub fn new() -> Result<Self, EngineError> { + // Get config, open admin log for writing. If either of these + // fails, we can't write about the problem to the admin log, + // but we can return an error to the caller. + let config = Config::load_via_env()?; + let adminlog = config.open_log()?; + + // From here on, it's plain sailing. + + Ok(Self { + config, + adminlog, + run_info_builder: RunInfo::builder(), + }) + } + + /// Set up and run CI on a project once: read the trigger request + /// from stdin, write responses to stdout. Update node admin log + /// with any problems that aren't inherent in the git repository + /// (those go into the run log). + pub fn run(&mut self) -> Result<(), EngineError> { + let req = match self.setup() { + Ok(req) => req, + Err(e) => { + self.adminlog.writeln(&format!("Error setting up: {}", e))?; + return Err(e); + } + }; + + // Check that we got the right kind of request. + match req { + Request::Trigger { repo, commit } => { + if let Err(e) = self.run_helper(repo, commit) { + // If the run helper return an error, something + // went wrong in that is not due to the repository + // under test. So we don't put it in the run log, + // but the admin log and return it to the caller. + self.adminlog.writeln(&format!("Error running CI: {}", e))?; + return Err(e); + } + } + _ => { + // Protocol error. Log in admin log and report to caller. + self.adminlog + .writeln("First request was not a message to trigger a run.")?; + return Err(EngineError::NotTrigger(req)); + } + } + + if let Err(e) = self.finish() { + self.adminlog + .writeln(&format!("Error finishing up: {}", e))?; + return Err(e); + } + + Ok(()) + } + + // Set up CI to run. If something goes wrong, return the error, + // and assume the caller logs it to the admin log. + fn setup(&mut self) -> Result<Request, EngineError> { + // Write something to the admin log to indicate we start. + self.adminlog.writeln("Native CI run starts")?; + + // Read request and log it. + let req = read_request()?; + self.adminlog.writeln(&format!("request: {:#?}", req))?; + + Ok(req) + } + + // Finish up after a CI run. If something goes wrong, return the + // error, and assume the caller logs it to the admin log. + fn finish(&mut self) -> Result<(), EngineError> { + // Write response message indicating the run has finished. + let ri = self.run_info_builder.build()?; + match &ri.result { + RunResult::Success => write_succeeded()?, + RunResult::Failure => write_failed()?, + RunResult::Error(s) => write_errored(s)?, + _ => write_errored(&format!("unknown result {}", ri.result))?, + } + + // Persist the run info. If this fails, it's a node admin problem. + if let Err(e) = ri.write() { + self.adminlog + .writeln(&format!("failed to write run info: {}", e))?; + } + + // Generate a report page of all CI runs. + report::build_report(&self.config.state)?; + + // Log that we've reached the end successfully. + self.adminlog.writeln("Native CI ends successfully")?; + + Ok(()) + } + + // Execute the CI run. Log any problems to a log for this run, and + // persist that. Update the run info builder as needed. + fn run_helper(&mut self, rid: Id, commit: Oid) -> Result<(), EngineError> { + // Pick a run id and create a directory for files related to + // the run. + let (run_id, run_dir) = mkdir_run(&self.config)?; + let run_id = RunId::from(format!("{}", run_id).as_str()); + + // Get node git storage. We do this before responding to the + // trigger, so that if this fails, we haven't said that a run + // has started. + let profile = Profile::load().map_err(EngineError::LoadProfile)?; + let storage = profile.storage.path(); + + // Write response to indicate run has been triggered. + write_triggered(&run_id)?; + + // Create and set up the run. + let mut run = Run::new(&run_dir, run_id)?; + run.set_repository(rid); + run.set_commit(commit); + run.set_storage(storage); + run.set_timeout(self.config.timeout()); + + // Actually run. Examine the run log to decide if the run + // succeeded or failed. + let run_log = run.run()?; + let result = if run_log.all_commands_succeeded() { + RunResult::Success + } else { + RunResult::Failure + }; + self.run_info_builder.result(result); + + Ok(()) + } + + /// Report results to caller (via stdout) and to users (via report + /// on web page). + pub fn report(&mut self) -> Result<(), EngineError> { + Ok(()) + } +} + +/// Create a per-run directory. +fn mkdir_run(config: &Config) -> Result<(Uuid, PathBuf), EngineError> { + let state = &config.state; + if !state.exists() { + std::fs::create_dir_all(state).map_err(|e| EngineError::CreateState(state.into(), e))?; + } + + let run_id = Uuid::new_v4(); + let run_dir = state.join(run_id.to_string()); + std::fs::create_dir(&run_dir).map_err(|e| EngineError::CreateRunDir(run_dir.clone(), e))?; + Ok((run_id, run_dir)) +} + +#[derive(Debug, thiserror::Error)] +pub enum EngineError { + #[error("failed to create per-run parent directory {0}")] + CreateState(PathBuf, #[source] std::io::Error), + + #[error("failed to create per-run directory {0}")] + CreateRunDir(PathBuf, #[source] std::io::Error), + + #[error("failed to load Radicle profile")] + LoadProfile(#[source] radicle::profile::Error), + + #[error("failed to remove {0}")] + RemoveDir(PathBuf, #[source] std::io::Error), + + #[error("programming error: failed to set field {0}")] + Unset(&'static str), + + #[error(transparent)] + Config(#[from] ConfigError), + + #[error(transparent)] + Log(#[from] LogError), + + #[error(transparent)] + Message(#[from] NativeMessageError), + + #[error(transparent)] + Report(#[from] report::ReportError), + + #[error(transparent)] + RunCmd(#[from] RunCmdError), + + #[error(transparent)] + RunInfo(#[from] RunInfoError), + + #[error(transparent)] + RunLog(#[from] RunLogError), + + #[error(transparent)] + RunSpec(#[from] RunSpecError), + + #[error(transparent)] + Run(#[from] RunError), + + #[error("request message was not a trigger message: {0:?}")] + NotTrigger(Request), +} @@ -1,7 +1,9 @@ pub mod config; +pub mod engine; pub mod logfile; pub mod msg; pub mod report; +pub mod run; pub mod runcmd; pub mod runinfo; pub mod runlog; diff --git a/src/run.rs b/src/run.rs new file mode 100644 index 0000000..1a0ae10 --- /dev/null +++ b/src/run.rs @@ -0,0 +1,250 @@ +use std::{ + path::{Path, PathBuf}, + process::Command, +}; + +use radicle_ci_broker::msg::{Id, Oid, RunId}; + +use crate::{ + msg::NativeMessageError, + report, + runcmd::RunCmdError, + runinfo::RunInfoError, + runlog::{RunLog, RunLogError}, + runspec::{RunSpec, RunSpecError}, +}; + +// Exit code to indicate we didn't get one from the process. +const NO_EXIT: i32 = 999; + +// Exit code for the timeout command to indicate it timed out. +const EXIT_TIMEOUT: i32 = 124; + +// Path to the repository's CI run specification. This is relative to +// the root of the repository. +const RUNSPEC_PATH: &str = ".radicle/native.yaml"; + +/// Execute the project-specific parts of a CI run. This needs to be +/// set up with `set_*` methods, and then the [`run`](Run::run) method +/// needs to be called. +#[derive(Debug)] +pub struct Run { + run_log: RunLog, + rid: Option<Id>, + commit: Option<Oid>, + storage: Option<PathBuf>, + timeout: Option<usize>, + src: PathBuf, +} + +impl Run { + /// Create a new `Run`. + pub fn new(run_dir: &Path, run_id: RunId) -> Result<Self, RunError> { + let run_log_filename = run_dir.join(format!("{}", run_id)); + let run_log = RunLog::new(&run_log_filename); + + Ok(Self { + run_log, + rid: None, + commit: None, + storage: None, + timeout: None, + src: run_dir.join("src"), + }) + } + + /// Set the git repository to use for this run. + pub fn set_repository(&mut self, rid: Id) { + self.rid = Some(rid); + } + + /// Set the commit to use for this run. + pub fn set_commit(&mut self, commit: Oid) { + self.commit = Some(commit); + } + + /// Set the location of the Radicle node git storage. + pub fn set_storage(&mut self, path: &Path) { + self.storage = Some(path.into()); + } + + /// Optionally set the maximum duration to execute the shell + /// commands specified for a project, in seconds. If not set, a + /// generous built-in default is used. + pub fn set_timeout(&mut self, seconds: usize) { + self.timeout = Some(seconds); + } + + /// Run CI on a project. + /// + /// This runs CI for the project, and then clean up: persist the + /// per-run log, and clean up some disk space after a CI run. On + /// success, return the run log. A CI run has succeeded, if all + /// the commands run as part of it succeeded. + /// + /// Note that this consume the `Run`. + pub fn run(mut self) -> Result<RunLog, RunError> { + // Execute the actual CI run. + let result = self.run_helper(); + + // If writing the run log fails, it will obscure any previous + // problem from the CI run. That's intentional: we want to + // make the node admin aware of the log writing problem, as + // that can be a problem for all future runs, e.g., due to a + // full file system, whereas, e.g., a syntax error in the code + // under test, is more fleeting. + self.run_log.write()?; + + // Likewise, if we can't clean up disk space, the node admin + // needs to know about that. + std::fs::remove_dir_all(&self.src).map_err(|e| RunError::RemoveDir(self.src.clone(), e))?; + + // Return result from the actual CI run. + result.map(|_| self.run_log) + } + + // Execute CI run once, without worrying about cleanup. Store any + // problems in the run log. The caller is responsible for + // persisting the run log. This returns an error only if it's a + // programming error. + fn run_helper(&mut self) -> Result<(), RunError> { + // Get values fields we'll need to use, if they've been set. + let rid = self.rid.ok_or(RunError::Missing("rid"))?; + let commit = self.commit.ok_or(RunError::Missing("commit"))?; + let storage = self.storage.as_ref().ok_or(RunError::Missing("storage"))?; + let timeout = self.timeout.ok_or(RunError::Missing("timeout"))?; + + // Record metadata in the run log. + self.run_log.title("Log from Radicle native CI"); + self.run_log.rid(rid); + self.run_log.commit(commit); + + // Clone the repository and check out the right commit. If + // these fail, the problem is stored in the run log. + let repo_path = storage.join(rid.canonical()); + let src = self.src.to_path_buf(); + self.git_clone(&repo_path, &src)?; + self.git_checkout(commit, &src)?; + + let runspec_path = self.src.join(RUNSPEC_PATH); + let runspec = match RunSpec::from_file(&runspec_path) { + Ok(runspec) => runspec, + Err(e) => { + // Log error in run log, then return. We can't do + // anything more if we don't have the run spec. + // However, return `Ok`, so that the CI engine doesn't + // report that the engine failed. + self.run_log.runspec(&e); + return Ok(()); + } + }; + + let snippet = format!("set -xeuo pipefail\n{}", &runspec.shell); + let timeout = format!("{}", timeout); + let exit = self.runcmd(&["timeout", &timeout, "bash", "-c", &snippet], &src)?; + if exit == EXIT_TIMEOUT { + self.run_log.command_timed_out(); + } + + Ok(()) + } + + fn git_clone(&mut self, repo_path: &Path, src: &Path) -> Result<(), RunError> { + self.runcmd( + &[ + "git", + "clone", + repo_path.to_str().unwrap(), + src.to_str().unwrap(), + ], + Path::new("."), + )?; + Ok(()) + } + + fn git_checkout(&mut self, commit: Oid, src: &Path) -> Result<(), RunError> { + self.runcmd(&["git", "checkout", &commit.to_string()], src)?; + Ok(()) + } + + // Run an external command in a directory. Log the command and the + // result of running it to the run log. Return an error if the + // command could not be executed at all, but the exit code if it + // ran but failed. + fn runcmd(&mut self, argv: &[&str], cwd: &Path) -> Result<i32, RunError> { + if argv.is_empty() { + return Err(RunError::EmptyArgv); + } + + let argv0 = argv[0]; + let output = Command::new(argv0) + .args(&argv[1..]) + .current_dir(cwd) + .output() + .map_err(|e| RunError::Command(argv.iter().map(|s| s.to_string()).collect(), e))?; + + let exit = output.status.code().unwrap_or(NO_EXIT); + self.run_log.runcmd( + argv, + &cwd.canonicalize() + .map_err(|e| RunCmdError::Canonicalize(cwd.into(), e))?, + exit, + &output.stdout, + &output.stderr, + ); + eprintln!("RUNCMD: argv={:?}", argv); + eprintln!("RUNCMD: exit={}", exit); + Ok(exit) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum RunError { + #[error("failed to create per-run parent directory {0}")] + CreateState(PathBuf, #[source] std::io::Error), + + #[error("failed to create per-run directory {0}")] + CreateRunDir(PathBuf, #[source] std::io::Error), + + #[error("failed to load Radicle profile")] + LoadProfile(#[source] radicle::profile::Error), + + #[error("failed to remove {0}")] + RemoveDir(PathBuf, #[source] std::io::Error), + + #[error("programming error: failed to set field {0}")] + Unset(&'static str), + + #[error(transparent)] + Message(#[from] NativeMessageError), + + #[error(transparent)] + Report(#[from] report::ReportError), + + #[error(transparent)] + RunCmd(#[from] RunCmdError), + + #[error(transparent)] + RunInfo(#[from] RunInfoError), + + #[error(transparent)] + RunLog(#[from] RunLogError), + + #[error(transparent)] + RunSpec(#[from] RunSpecError), + + #[error("failed to run command {0:?}")] + Command(Vec<String>, #[source] std::io::Error), + + #[error("command failed with exit code {0}: {1:?}")] + CommandFailed(i32, Vec<String>), + + #[error("failed to make pathname absolute: {0}")] + Canonicalize(PathBuf, #[source] std::io::Error), + + #[error("programming error: function runcmd called with empty argv")] + EmptyArgv, + + #[error("programming error: field '{0}' was not set in struct Run")] + Missing(&'static str), +} diff --git a/src/runinfo.rs b/src/runinfo.rs index b5cf709..6d39898 100644 --- a/src/runinfo.rs +++ b/src/runinfo.rs @@ -85,7 +85,7 @@ impl RunInfoBuilder { self.run_info = Some(filename); } - pub fn build(self) -> Result<RunInfo, RunInfoError> { + pub fn build(&self) -> Result<RunInfo, RunInfoError> { let fmt = format_description!("[year]-[month]-[day] [hour]:[minute]:[second]"); let now = OffsetDateTime::now_utc() .format(fmt) @@ -94,12 +94,13 @@ impl RunInfoBuilder { Ok(RunInfo { repo: self.repo.map(|x| x.to_string()).unwrap_or("".into()), commit: self.commit.map(|x| x.to_string()).unwrap_or("".into()), - id: self.run_id.unwrap_or("<unknown>".into()), + id: self.run_id.as_deref().unwrap_or("<unknown>").into(), result: self .result + .clone() .ok_or(RunInfoError::MissingInfo("result".into()))?, - log: self.log.unwrap_or("<unknown>".into()), - run_info: self.run_info, + log: self.log.as_deref().unwrap_or(Path::new("<unknown>")).into(), + run_info: self.run_info.clone(), timestamp: now, }) } diff --git a/src/runlog.rs b/src/runlog.rs index cf47f93..65ba9af 100644 --- a/src/runlog.rs +++ b/src/runlog.rs @@ -4,6 +4,8 @@ use html_page::{Document, Element, Tag}; use radicle_ci_broker::msg::{Id, Oid}; +use crate::runspec::RunSpecError; + #[derive(Debug, Default)] pub struct RunLog { filename: PathBuf, @@ -11,6 +13,8 @@ pub struct RunLog { rid: Option<Id>, commit: Option<Oid>, commands: Vec<Command>, + timeout: bool, + runspec_error: Option<String>, } impl RunLog { @@ -33,6 +37,14 @@ impl RunLog { self.commit = Some(commit); } + pub fn all_commands_succeeded(&self) -> bool { + self.commands.iter().all(|c| c.exit == 0) && !self.timeout && self.runspec_error.is_none() + } + + pub fn runspec(&mut self, e: &RunSpecError) { + self.runspec_error = Some(format!("{}", e)); + } + pub fn runcmd(&mut self, argv: &[&str], cwd: &Path, exit: i32, stdout: &[u8], stderr: &[u8]) { self.commands.push(Command { argv: argv.iter().map(|a| a.to_string()).collect(), @@ -43,6 +55,10 @@ impl RunLog { }); } + pub fn command_timed_out(&mut self) { + self.timeout = true; + } + pub fn write(&self) -> Result<(), RunLogError> { let title = self.title.as_ref().ok_or(RunLogError::Missing("title"))?; let rid = self.rid.as_ref().ok_or(RunLogError::Missing("rid"))?; @@ -72,10 +88,22 @@ impl RunLog { ); doc.push_to_body(&ul); + if let Some(e) = &self.runspec_error { + let error = Element::new(Tag::P) + .with_text("Failed to load .radicle/native.yaml: ") + .with_child(Element::new(Tag::Code).with_text(e)); + doc.push_to_body(&error); + } + for cmd in self.commands.iter() { cmd.format(&mut doc); } + if self.timeout { + let timeout = Element::new(Tag::P).with_text("Last command timed out"); + doc.push_to_body(&timeout); + } + let html = format!("{}", doc); std::fs::write(&self.filename, html.as_bytes()) .map_err(|e| RunLogError::Write(self.filename.clone(), e))?; @@ -84,10 +112,10 @@ impl RunLog { } fn result(&self) -> &'static str { - if self.commands.iter().any(|c| c.exit != 0) { - "FAILURE" - } else { + if self.all_commands_succeeded() { "SUCCESS" + } else { + "FAILURE" } } } @@ -26,6 +26,10 @@ class Suite: def assert_success(self, msg): assert msg == {"response": "finished", "result": "success"} + def assert_failure(self, msg): + assert msg["response"] == "finished" + assert msg["result"] == "failure" + def assert_error(self, msg): assert msg["response"] == "finished" res = msg["result"] @@ -134,10 +138,10 @@ class Suite: def test_command_fails(self): exit, resps, stderr = self._test_case("cmd-fails", "false") - assert exit != 0 + assert exit == 1 assert len(resps) == 2 self.assert_triggered(resps[0]) - self.assert_error(resps[1]) + self.assert_failure(resps[1]) def test_repository_does_not_exist(self): rid = "rad:z3aaaaaaaaaaaaaaaaaaaaaaaaaaa" @@ -147,14 +151,7 @@ class Suite: trigger = self._create_valid_trigger(rid, commit) exit, resps, stderr = self._run_ci(trigger, ci, rid, commit) - assert exit != 0 - assert len(resps) == 2 - self.assert_triggered(resps[0]) - self.assert_error(resps[1]) - error = resps[1]["result"]["error"] - assert "git" in error - assert "clone" in error - assert rid[len("rad:") :] in error + assert exit == 2 def test_commit_does_not_exist(self): git = self._create_git_repo("commit-missing") @@ -165,12 +162,10 @@ class Suite: ci = self._create_ci() exit, resps, stderr = ci.run(trigger) - assert exit != 0 + assert exit == 1 assert len(resps) == 2 self.assert_triggered(resps[0]) - self.assert_error(resps[1]) - error = resps[1]["result"]["error"] - assert commit in error + self.assert_failure(resps[1]) def test_no_message(self): git = self._create_git_repo("no-message") @@ -194,24 +189,17 @@ class Suite: def test_native_yaml_has_no_shell(self): exit, resps, stderr = self._test_case("no-shell", None) - assert exit != 0 + assert exit == 1 assert len(resps) == 2 self.assert_triggered(resps[0]) - self.assert_error(resps[1]) - error = resps[1]["result"]["error"] - assert "YAML" in error - assert "shell" in stderr + self.assert_failure(resps[1]) def test_native_yaml_shell_is_not_string(self): exit, resps, stderr = self._test_case("shell-not-string", {"foo": "bar"}) - assert exit != 0 + assert exit == 1 assert len(resps) == 2 self.assert_triggered(resps[0]) - self.assert_error(resps[1]) - error = resps[1]["result"]["error"] - assert "YAML" in error - assert "shell" in stderr - assert "string" in stderr + self.assert_failure(resps[1]) def test_command_takes_too_long(self): git = self._create_git_repo("command-takes-too-long") @@ -220,10 +208,10 @@ class Suite: trigger = Trigger(rid, commit) ci = self._create_ci() exit, resps, stderr = ci.run(trigger) - assert exit != 0 + assert exit == 1 assert len(resps) == 2 self.assert_triggered(resps[0]) - self.assert_error(resps[1]) + self.assert_failure(resps[1]) assert "124" in stderr |