From ba085e53dc6025739ede8d3aad5166134a0e5a79 Mon Sep 17 00:00:00 2001 From: willfengg Date: Wed, 10 Jul 2024 16:36:53 -0700 Subject: [PATCH] add precompute scale in README Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ff19b93..1ebab81 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,18 @@ model = FSDP(model, use_orig_params=True) # optional: enable torch.compile for improved performance m = torch.compile(m) -# train/finetune (not shown) +# toy training loop +for _ in range(N_ITER): + optimizer.zero_grad() + y = m(x) + y.sum().backward() + optimizer.step() + + # specific to fsdp2 + float8 with dynamic scaling + # this method is optional but is highly recommended for performance + # it calcuclates scales for all parameters in a single all-reduce + precompute_float8_scale_for_fsdp(model) + ``` ## float8 linear with delayed scaling