diff --git a/.github/workflows/cargo-build.yml b/.github/workflows/cargo-build.yml new file mode 100644 index 0000000..f64b3dc --- /dev/null +++ b/.github/workflows/cargo-build.yml @@ -0,0 +1,30 @@ +name: Cargo build & test + +on: + push: + paths-ignore: + - '.github/**' + - '*.md' + - '.*' + - '*.lock' + - '.LICENSE' + - 'examples' + - 'test' + branches: [ "main", "develop" ] +# pull_request: +# branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/Cargo.lock b/Cargo.lock index e66cc68..7fd6fdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,7 +165,7 @@ dependencies = [ [[package]] name = "docker-autoheal" -version = "0.1.0" +version = "0.2.0" dependencies = [ "bollard", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 5c5020a..0d717b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "docker-autoheal" -version = "0.1.0" +version = "0.2.1" authors = ["Travis M Knight "] license = "MIT" description = "Monitor and restart unhealthy docker containers" readme = "README.md" homepage = "https://github.com/tmknight/docker-autoheal" edition = "2021" -rust-version = "1.75.0" +rust-version = "1.74.1" [dependencies] bollard = "*" diff --git a/README.md b/README.md index 105ea08..842680e 100644 --- a/README.md +++ b/README.md @@ -9,16 +9,18 @@ The `docker-autoheal` binary may be executed via a native OS or via a Docker con | Variable | Default | Description | |:---------------------------------:|:--------:|:---------------------------------------------------------------------------------------------------------------------------------:| | **AUTOHEAL_CONTAINER_LABEL** | autoheal |This is the label (set to `true`) that `docker-autoheal` will monitor and remediate - or set to `all` to simply monitor all containers on the host| -| **AUTOHEAL_DEFAULT_STOP_TIMEOUT** | 10 | Docker waits `n` seconds for a container to stop before killing it during restarts (overridable via label; see below) | +| **AUTOHEAL_DEFAULT_STOP_TIMEOUT** | 10 | Docker waits `n` seconds for a container to stop before killing it during restarts | | **AUTOHEAL_INTERVAL** | 5 | Check container health every`n` seconds** | | **AUTOHEAL_START_PERIOD** | 0 | Wait `n` seconds before first health check | + ## How to use diff --git a/src/main.rs b/src/main.rs index ac3c050..810484d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,13 +5,15 @@ use std::collections::HashMap; use std::io::{stdout, Write}; use std::time::Duration; +// Logging async fn log_message(msg: &str) { let date = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); let mut lock = stdout().lock(); writeln!(lock, "{} {}", date, msg).unwrap(); } -async fn get_env(key: &str, default: &str) -> String { +// Return environment variable +fn get_env(key: &str, default: &str) -> String { match std::env::var(key) { Ok(val) => return val, Err(e) => return default.to_string(), @@ -21,14 +23,13 @@ async fn get_env(key: &str, default: &str) -> String { #[tokio::main] async fn main() -> Result<(), Box> { // Autoheal variables - let autoheal_connection_type = get_env("AUTOHEAL_CONNECTION_TYPE", "local").await; - let autoheal_container_label = get_env("AUTOHEAL_CONTAINER_LABEL", "autoheal").await; + let autoheal_connection_type = get_env("AUTOHEAL_CONNECTION_TYPE", "local"); + let autoheal_container_label = get_env("AUTOHEAL_CONTAINER_LABEL", "autoheal"); let autoheal_default_stop_timeout = get_env("AUTOHEAL_DEFAULT_STOP_TIMEOUT", "10") - .await .parse() .unwrap(); - let autoheal_interval = get_env("AUTOHEAL_INTERVAL", "5").await.parse().unwrap(); - let autoheal_start_period = get_env("AUTOHEAL_START_PERIOD", "0").await.parse().unwrap(); + let autoheal_interval = get_env("AUTOHEAL_INTERVAL", "5").parse().unwrap(); + let autoheal_start_period = get_env("AUTOHEAL_START_PERIOD", "0").parse().unwrap(); // todo // Webhook variables @@ -36,7 +37,7 @@ async fn main() -> Result<(), Box> { // let webhook_json_key = "text"; // let apprise_url = ""; - // Determine connection type & Connect to docker + // Determine connection type & connect to docker per type let mut docker_tmp: Option = None; match autoheal_connection_type.as_str() { "socket" => { @@ -73,16 +74,14 @@ async fn main() -> Result<(), Box> { // Establish loop interval let mut interval = tokio::time::interval(Duration::from_secs(autoheal_interval)); - loop { - // Loop interval - interval.tick().await; // Build container assessment criteria let mut filters = HashMap::new(); filters.insert("health", vec!["unhealthy"]); if autoheal_container_label != "ALL" { filters.insert("label", vec![&autoheal_container_label]); } + // Gather all containers that are unhealthy let container_options = Some(ListContainersOptions { all: true, @@ -90,40 +89,59 @@ async fn main() -> Result<(), Box> { ..Default::default() }); let containers = docker.list_containers(container_options).await?; - for container in containers { - // Get name of container - let name0 = &container.names.unwrap()[0]; - let name = name0.trim_matches('/').trim(); - // Get id of container - let id: String = container.id.unwrap().chars().take(12).collect(); - // Determine if state is readable - if let Some(state) = container.state { - // Determine if matches restart criteria - if !matches!(state.as_str(), "paused" | "restarting") { - // Build restart options - let restart_options = Some(RestartContainerOptions { - t: autoheal_default_stop_timeout, - ..Default::default() - }); - // Report what is transpiring - let msg0 = format!("Container '{}' ({}) unhealthy", name, id); - // todo - // let msg1 = format!( - // "Restarting '{}' with {}s timeout", - // name, autoheal_default_stop_timeout - // ); - let msg1 = format!("Restarting '{}' now", name); + // Execute concurrently + let docker_clone = docker.clone(); + let join = tokio::task::spawn(async move { + // Get name of container + let name0 = &container.names.unwrap()[0]; + let name = name0.trim_matches('/').trim(); + // Get id of container + let id: String = container.id.unwrap().chars().take(12).collect(); + + // Determine if state is readable + if let Some(state) = container.state { + // Determine if matches restart criteria + if !matches!(state.as_str(), "paused" | "restarting") { + // Build restart options + let restart_options = Some(RestartContainerOptions { + t: autoheal_default_stop_timeout, + ..Default::default() + }); + + // Report what is transpiring + let msg0 = format!("Container '{}' ({}) unhealthy", name, id); + // todo + // let msg1 = format!( + // "Restarting '{}' with {}s timeout", + // name, autoheal_default_stop_timeout + // ); + let msg1 = format!("Restarting '{}' now", name); + log_message(&msg0).await; + log_message(&msg1).await; + + // Restart unhealthy container + let rslt = docker_clone.restart_container(&id, restart_options).await; + match rslt { + Ok(()) => { + let msg0 = format!("Restart of '{}' was successful", name); + log_message(&msg0).await; + } + Err(e) => { + let msg0 = format!("Restart of '{}' failed: {}", name, e); + log_message(&msg0).await; + } + } + } + } else { + let msg0 = format!("Could not determine state of {}", name); log_message(&msg0).await; - log_message(&msg1).await; - // Restart unhealthy container - docker.restart_container(&id, restart_options).await?; } - } else { - let msg0 = format!("Could not determine state of {}", name); - log_message(&msg0).await; - } + }); + join.await?; } + // Loop interval + interval.tick().await; } }