Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.2.1 #2

Merged
merged 6 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/cargo-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Cargo build & test

on:
push:
paths-ignore:
- '.github/**'
- '*.md'
- '.*'
- '*.lock'
- '.LICENSE'
- 'examples'
- 'test'
branches: [ "main", "develop" ]
# pull_request:
# branches: [ "main" ]

env:
CARGO_TERM_COLOR: always

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[package]
name = "docker-autoheal"
version = "0.1.0"
version = "0.2.1"
authors = ["Travis M Knight <[email protected]>"]
license = "MIT"
description = "Monitor and restart unhealthy docker containers"
readme = "README.md"
homepage = "https://github.com/tmknight/docker-autoheal"
edition = "2021"
rust-version = "1.75.0"
rust-version = "1.74.1"

[dependencies]
bollard = "*"
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@ The `docker-autoheal` binary may be executed via a native OS or via a Docker con
| Variable | Default | Description |
|:---------------------------------:|:--------:|:---------------------------------------------------------------------------------------------------------------------------------:|
| **AUTOHEAL_CONTAINER_LABEL** | autoheal |This is the label (set to `true`) that `docker-autoheal` will monitor and remediate - or set to `all` to simply monitor all containers on the host|
| **AUTOHEAL_DEFAULT_STOP_TIMEOUT** | 10 | Docker waits `n` seconds for a container to stop before killing it during restarts (overridable via label; see below) |
| **AUTOHEAL_DEFAULT_STOP_TIMEOUT** | 10 | Docker waits `n` seconds for a container to stop before killing it during restarts <!-- (overridable via label; see below) --> |
| **AUTOHEAL_INTERVAL** | 5 | Check container health every`n` seconds** |
| **AUTOHEAL_START_PERIOD** | 0 | Wait `n` seconds before first health check |
<!-- |WEBHOOK_URL | |Post messages to the webhook following actions on unhealthy container | -->

<!--
### Optional Container Labels

| Label | Value | Description |
|:---------------------------------:|:--------:|:---------------------------------------------------------------------------------------------------------------------------------:|
| **autoheal.stop.timeout** | 20 | Per container override of the stop timeout (in seconds) during restart |
-->

## How to use

Expand Down
98 changes: 58 additions & 40 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ use std::collections::HashMap;
use std::io::{stdout, Write};
use std::time::Duration;

// Logging
async fn log_message(msg: &str) {
let date = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
let mut lock = stdout().lock();
writeln!(lock, "{} {}", date, msg).unwrap();
}

async fn get_env(key: &str, default: &str) -> String {
// Return environment variable
fn get_env(key: &str, default: &str) -> String {
match std::env::var(key) {
Ok(val) => return val,
Err(e) => return default.to_string(),
Expand All @@ -21,22 +23,21 @@ async fn get_env(key: &str, default: &str) -> String {
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Autoheal variables
let autoheal_connection_type = get_env("AUTOHEAL_CONNECTION_TYPE", "local").await;
let autoheal_container_label = get_env("AUTOHEAL_CONTAINER_LABEL", "autoheal").await;
let autoheal_connection_type = get_env("AUTOHEAL_CONNECTION_TYPE", "local");
let autoheal_container_label = get_env("AUTOHEAL_CONTAINER_LABEL", "autoheal");
let autoheal_default_stop_timeout = get_env("AUTOHEAL_DEFAULT_STOP_TIMEOUT", "10")
.await
.parse()
.unwrap();
let autoheal_interval = get_env("AUTOHEAL_INTERVAL", "5").await.parse().unwrap();
let autoheal_start_period = get_env("AUTOHEAL_START_PERIOD", "0").await.parse().unwrap();
let autoheal_interval = get_env("AUTOHEAL_INTERVAL", "5").parse().unwrap();
let autoheal_start_period = get_env("AUTOHEAL_START_PERIOD", "0").parse().unwrap();

// todo
// Webhook variables
// let webhook_url = "";
// let webhook_json_key = "text";
// let apprise_url = "";

// Determine connection type & Connect to docker
// Determine connection type & connect to docker per type
let mut docker_tmp: Option<Docker> = None;
match autoheal_connection_type.as_str() {
"socket" => {
Expand Down Expand Up @@ -73,57 +74,74 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {

// Establish loop interval
let mut interval = tokio::time::interval(Duration::from_secs(autoheal_interval));

loop {
// Loop interval
interval.tick().await;
// Build container assessment criteria
let mut filters = HashMap::new();
filters.insert("health", vec!["unhealthy"]);
if autoheal_container_label != "ALL" {
filters.insert("label", vec![&autoheal_container_label]);
}

// Gather all containers that are unhealthy
let container_options = Some(ListContainersOptions {
all: true,
filters,
..Default::default()
});
let containers = docker.list_containers(container_options).await?;

for container in containers {
// Get name of container
let name0 = &container.names.unwrap()[0];
let name = name0.trim_matches('/').trim();
// Get id of container
let id: String = container.id.unwrap().chars().take(12).collect();
// Determine if state is readable
if let Some(state) = container.state {
// Determine if matches restart criteria
if !matches!(state.as_str(), "paused" | "restarting") {
// Build restart options
let restart_options = Some(RestartContainerOptions {
t: autoheal_default_stop_timeout,
..Default::default()
});
// Report what is transpiring
let msg0 = format!("Container '{}' ({}) unhealthy", name, id);
// todo
// let msg1 = format!(
// "Restarting '{}' with {}s timeout",
// name, autoheal_default_stop_timeout
// );
let msg1 = format!("Restarting '{}' now", name);
// Execute concurrently
let docker_clone = docker.clone();
let join = tokio::task::spawn(async move {
// Get name of container
let name0 = &container.names.unwrap()[0];
let name = name0.trim_matches('/').trim();

// Get id of container
let id: String = container.id.unwrap().chars().take(12).collect();

// Determine if state is readable
if let Some(state) = container.state {
// Determine if matches restart criteria
if !matches!(state.as_str(), "paused" | "restarting") {
// Build restart options
let restart_options = Some(RestartContainerOptions {
t: autoheal_default_stop_timeout,
..Default::default()
});

// Report what is transpiring
let msg0 = format!("Container '{}' ({}) unhealthy", name, id);
// todo
// let msg1 = format!(
// "Restarting '{}' with {}s timeout",
// name, autoheal_default_stop_timeout
// );
let msg1 = format!("Restarting '{}' now", name);
log_message(&msg0).await;
log_message(&msg1).await;

// Restart unhealthy container
let rslt = docker_clone.restart_container(&id, restart_options).await;
match rslt {
Ok(()) => {
let msg0 = format!("Restart of '{}' was successful", name);
log_message(&msg0).await;
}
Err(e) => {
let msg0 = format!("Restart of '{}' failed: {}", name, e);
log_message(&msg0).await;
}
}
}
} else {
let msg0 = format!("Could not determine state of {}", name);
log_message(&msg0).await;
log_message(&msg1).await;
// Restart unhealthy container
docker.restart_container(&id, restart_options).await?;
}
} else {
let msg0 = format!("Could not determine state of {}", name);
log_message(&msg0).await;
}
});
join.await?;
}
// Loop interval
interval.tick().await;
}
}