From 93f317ee714a553b2e5c0ce2d0729cd9cbc2b42d Mon Sep 17 00:00:00 2001 From: Kazuki Fujii <68278821+okoge-kaz@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:18:36 +0900 Subject: [PATCH] Fix[critical] AdamW epsilon default value --- src/llama_recipes/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama_recipes/arguments.py b/src/llama_recipes/arguments.py index 6c0b8d0..2618828 100644 --- a/src/llama_recipes/arguments.py +++ b/src/llama_recipes/arguments.py @@ -306,7 +306,7 @@ def _add_regularization_args(parser: argparse.ArgumentParser) -> argparse.Argume help='Second coefficient for computing running averages of gradient and its square' ) group.add_argument( - '--adam-eps', type=float, default=1e-06, + '--adam-eps', type=float, default=1e-08, help='Term added to the denominator to improve numerical stability' )