From 2f4fe0c0e2c0225c0349a620802462bc61cd782a Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Fri, 30 Aug 2024 15:53:46 +0900 Subject: [PATCH] Restart without downtime Add a new feature: Update/Reload without downtime. 1. The current supervisor receives a signal. 2. The current supervisor sends signals to its workers, and the workers stop all plugins that cannot run in parallel. 3. The current supervisor starts a new supervisor. * => Old processes and new processes run in parallel. 4. After the new supervisor and its workers start to work, the current supervisor and its workers stop. ref: nginx's feature for upgrading on the fly * http://nginx.org/en/docs/control.html#upgrade Problem to solve: Updating Fluentd or reloading a config causes downtime. Plugins that receive data as a server, such as `in_udp`, `in_tcp`, and `in_syslog`, cannot receive data during this time. This means that the data sent by a client is lost during this time unless the client has a re-sending feature. This makes updating Fluentd or reloading a config difficult in some cases. Specific feature: Run only limited Input plugins in parallel, such as `in_tcp`, `in_udp`, and `in_syslog`. Stop all plugins except those Input plugins, and prepare a dedicated file buffer for Output. After the new workers start, they load the file buffer and route those events to the ROOT label. Note: need https://github.com/treasure-data/serverengine/pull/146 Signed-off-by: Daijiro Fukuda --- lib/fluent/engine.rb | 4 + lib/fluent/plugin/in_tcp.rb | 4 + lib/fluent/plugin/in_udp.rb | 4 + lib/fluent/plugin/input.rb | 9 ++ lib/fluent/plugin_helper/event_emitter.rb | 8 ++ lib/fluent/root_agent.rb | 119 +++++++++++++++++++--- lib/fluent/supervisor.rb | 47 +++++++-- 7 files changed, 174 insertions(+), 21 deletions(-) diff --git a/lib/fluent/engine.rb b/lib/fluent/engine.rb index afac3167ca..5e45084a39 100644 --- a/lib/fluent/engine.rb +++ b/lib/fluent/engine.rb @@ -136,6 +136,10 @@ def flush! @root_agent.flush! end + def shift_to_limited_mode! + @root_agent.shift_to_limited_mode! + end + def now # TODO thread update Fluent::EventTime.now diff --git a/lib/fluent/plugin/in_tcp.rb b/lib/fluent/plugin/in_tcp.rb index bd2ea83e5b..ccc28209ec 100644 --- a/lib/fluent/plugin/in_tcp.rb +++ b/lib/fluent/plugin/in_tcp.rb @@ -101,6 +101,10 @@ def multi_workers_ready? true end + def limited_mode_ready? + true + end + def start super diff --git a/lib/fluent/plugin/in_udp.rb b/lib/fluent/plugin/in_udp.rb index c2d436115f..dd65526ffa 100644 --- a/lib/fluent/plugin/in_udp.rb +++ b/lib/fluent/plugin/in_udp.rb @@ -65,6 +65,10 @@ def multi_workers_ready? true end + def limited_mode_ready? + true + end + def start super diff --git a/lib/fluent/plugin/input.rb b/lib/fluent/plugin/input.rb index 7a6909f7a9..347e742f7c 100644 --- a/lib/fluent/plugin/input.rb +++ b/lib/fluent/plugin/input.rb @@ -70,6 +70,15 @@ def metric_callback(es) def multi_workers_ready? false end + + def limited_mode_ready? + false + end + + def shift_to_limited_mode! + raise "BUG: use shift_to_limited_mode although this plugin is not ready for the limited mode" unless limited_mode_ready? + event_emitter_force_limited_router + end end end end diff --git a/lib/fluent/plugin_helper/event_emitter.rb b/lib/fluent/plugin_helper/event_emitter.rb index ba089e485a..5c573d7299 100644 --- a/lib/fluent/plugin_helper/event_emitter.rb +++ b/lib/fluent/plugin_helper/event_emitter.rb @@ -26,6 +26,9 @@ module EventEmitter def router @_event_emitter_used_actually = true + + return Engine.root_agent.limited_router if @_event_emitter_force_limited_router + if @_event_emitter_lazy_init @router = @primary_instance.router end @@ -48,6 +51,10 @@ def event_emitter_used_actually? @_event_emitter_used_actually end + def event_emitter_force_limited_router + @_event_emitter_force_limited_router = true + end + def event_emitter_router(label_name) if label_name if label_name == "@ROOT" @@ -72,6 +79,7 @@ def initialize super @_event_emitter_used_actually = false @_event_emitter_lazy_init = false + @_event_emitter_force_limited_router = false @router = nil end diff --git a/lib/fluent/root_agent.rb b/lib/fluent/root_agent.rb index 1165dbadcf..513b6700bb 100644 --- a/lib/fluent/root_agent.rb +++ b/lib/fluent/root_agent.rb @@ -29,13 +29,13 @@ module Fluent # # RootAgent # | - # +------------+-------------+-------------+ - # | | | | - #