From 1c892a37cbc1abc26ebe46f121f59a29da3702fd Mon Sep 17 00:00:00 2001 From: Szymon Mentel Date: Thu, 31 Oct 2024 13:25:45 +0100 Subject: [PATCH 1/2] Make the `FLAME.Pool` handle backend.init/1 errors Even though the `FLAME.Backend` allows for the `backend.init/1` to return an error: `@callback init(opts :: Keyword.t()) :: {:ok, state :: term()} | {:error, term()}` The `FLAME.Pool` would not handle it and and get into an infinite restart loop. This behaviour can be tested wit `FlameTest.Application.call` in the https://github.com/mentels/flame_test that uses a custom backend which `init/1` returns `{:error, :just_fail}`. If the pool is configured with `min: 0`, after the call we will keep getting: ```elixir iex(1)> FlameTest.Application.call :ok 13:35:17.334 [error] Task #PID<0.394.0> started from FlameTest.Pool terminating ** (MatchError) no match of right hand side value: {:error, :just_fail} (flame 0.5.1) lib/flame/pool.ex:696: FLAME.Pool.start_child_runner/2 (elixir 1.17.2) lib/task/supervised.ex:101: Task.Supervised.invoke_mfa/2 (elixir 1.17.2) lib/task/supervised.ex:36: Task.Supervised.reply/4 Function: #Function<1.81824075/0 in FLAME.Pool.async_boot_runner/1> Args: [] 13:35:18.353 [error] Task #PID<0.396.0> started from FlameTest.Pool terminating ** (MatchError) no match of right hand side value: {:error, :just_fail} (flame 0.5.1) lib/flame/pool.ex:696: FLAME.Pool.start_child_runner/2 (elixir 1.17.2) lib/task/supervised.ex:101: Task.Supervised.invoke_mfa/2 (elixir 1.17.2) lib/task/supervised.ex:36: Task.Supervised.reply/4 Function: #Function<1.81824075/0 in FLAME.Pool.async_boot_runner/1> Args: [] 13:35:19.354 [error] Task #PID<0.398.0> started from FlameTest.Pool terminating ** (MatchError) no match of right hand side value: {:error, :just_fail} (flame 0.5.1) lib/flame/pool.ex:696: FLAME.Pool.start_child_runner/2 (elixir 1.17.2) lib/task/supervised.ex:101: Task.Supervised.invoke_mfa/2 (elixir 1.17.2) lib/task/supervised.ex:36: Task.Supervised.reply/4 Function: #Function<1.81824075/0 in FLAME.Pool.async_boot_runner/1> Args: [] ... ``` While if the runners are started at the application start up (e.g: `min: 1`), the error application will not start at all, reporting an error: ```elixir 13:37:00.209 [notice] Application flame exited: :stopped ** (Mix) Could not start application flame_test: FlameTest.Application.start(:normal, []) returned an error: shutdown: failed to start child: {FLAME.Pool, FlameTest.Pool} ** (EXIT) shutdown: failed to start child: {FLAME.Pool, FlameTest.Pool} ** (EXIT) an exception was raised: ** (MatchError) no match of right hand side value: {:error, :just_fail} (flame 0.5.1) lib/flame/pool.ex:696: FLAME.Pool.start_child_runner/2 (elixir 1.17.2) lib/task/supervised.ex:101: Task.Supervised.invoke_mfa/2 (elixir 1.17.2) lib/task/supervised.ex:36: Task.Supervised.reply/4 ```` This commit attempts to fix the error by: * preventing the `MatchError` resulting from the `{:ok, pid} = DynamicSupervisor.start_child(state.runner_sup, spec)` in the `start_child_runner/2` * adding more explicit error handling in the `start_child_runner/2` * catch an exit signal possibly resulting from the `DynamicSupervisor.start_child/2` or `Runner.remote_boot/2` * the previous catch clause with `{:exit, reason}` was not catching the exit signal from the `DynamicSupervisor.start_child/2` ```elixir iex(1)> try do throw({:exit, :ha}) catch {:exit, :ha} -> :haha end :haha iex(2)> try do exit(:ha) catch {:exit, :ha} -> :haha end ** (exit) :ha iex:2: (file) iex(2)> ```` * adding a new `handle_info/2` clause to handle the `{:error, reason}` Task result * adding new clauses to the fun in `boot_runners/2` * the previous `{:exit, reason}` clause would never match since the Pool `GenServer` is not trapping exists and thus the tasks spawn with `Task.async_stream/3` would never return `{:exit, reason}` as mentioned in the https://hexdocs.pm/elixir/Task.html#async_stream/5 I think that the `Runner.remote_boot/2` might have the same problem but I will stop here and see if this work lands anywhere - maybe I'm missing the point :) --- lib/flame/pool.ex | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/flame/pool.ex b/lib/flame/pool.ex index 545776c..eded73c 100644 --- a/lib/flame/pool.ex +++ b/lib/flame/pool.ex @@ -482,6 +482,10 @@ defmodule FLAME.Pool do {:noreply, handle_runner_async_up(state, pid, ref)} end + def handle_info({ref, {:error, reason}}, %Pool{} = state) when is_reference(ref) do + {:stop, reason, state} + end + def handle_info(:async_boot_continue, %Pool{} = state) do {:noreply, async_boot_runner(%Pool{state | async_boot_timer: nil})} end @@ -652,7 +656,10 @@ defmodule FLAME.Pool do {_runner, new_acc} = put_runner(acc, pid) new_acc - {:exit, reason}, _acc -> + {:ok, {:error, reason}}, _acc -> + raise "failed to boot runner: #{inspect(reason)}" + + {:ok, {:exit, reason}}, _acc -> raise "failed to boot runner: #{inspect(reason)}" end) else @@ -693,16 +700,19 @@ defmodule FLAME.Pool do restart: :temporary } - {:ok, pid} = DynamicSupervisor.start_child(state.runner_sup, spec) + with {:start, {:ok, pid}} <- {:start, DynamicSupervisor.start_child(state.runner_sup, spec)}, + {:remote_boot, :ok} <- {:remote_boot, Runner.remote_boot(pid, state.base_sync_stream)} do + {:ok, pid} + else + {:start, {:error, reason}} -> + {:error, {:start_error, reason}} - try do - case Runner.remote_boot(pid, state.base_sync_stream) do - :ok -> {:ok, pid} - {:error, reason} -> {:error, reason} - end - catch - {:exit, reason} -> {:error, {:exit, reason}} + {:remote_boot, {:error, reason}} -> + {:error, {:remote_boot_error, reason}} end + catch + :exit, reason -> + {:error, {:exit, reason}} end defp put_runner(%Pool{} = state, pid) when is_pid(pid) do From 5e14488e62aa1d0d3d5c0753906e0dcc34035570 Mon Sep 17 00:00:00 2001 From: Szymon Mentel Date: Tue, 28 Jan 2025 16:36:57 +0100 Subject: [PATCH 2/2] Adhere to the review comments --- lib/flame/pool.ex | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/flame/pool.ex b/lib/flame/pool.ex index eded73c..05e3999 100644 --- a/lib/flame/pool.ex +++ b/lib/flame/pool.ex @@ -700,15 +700,18 @@ defmodule FLAME.Pool do restart: :temporary } - with {:start, {:ok, pid}} <- {:start, DynamicSupervisor.start_child(state.runner_sup, spec)}, - {:remote_boot, :ok} <- {:remote_boot, Runner.remote_boot(pid, state.base_sync_stream)} do - {:ok, pid} - else - {:start, {:error, reason}} -> - {:error, {:start_error, reason}} + case DynamicSupervisor.start_child(state.runner_sup, spec) do + {:ok, pid} -> + case Runner.remote_boot(pid, state.base_sync_stream) do + :ok -> + {:ok, pid} + + {:error, reason} -> + {:error, {:remote_boot_error, reason}} + end - {:remote_boot, {:error, reason}} -> - {:error, {:remote_boot_error, reason}} + {:error, reason} -> + {:error, {:start_error, reason}} end catch :exit, reason ->