Skip to content

Commit

Permalink
[megatron] fix: critic and reward model load tokenizer from config
Browse files Browse the repository at this point in the history
Currently, the worker will fail if the critic or reward model path
doesn't contain a tokenizer. This PR tries to fix this by loading
tokenizer from the config for the previously mentioned case.

- For the critic model, we fall back to load from
  `critic.model.tokenizer_path`.
- For the reward model, we first fall back to load from
  `reward_model.model.rm_tokenizer`, and then
  `reward_model.model.input_tokenizer` if that is not set.

Signed-off-by: Hollow Man <[email protected]>
  • Loading branch information
HollowMan6 committed Feb 21, 2025
1 parent 55a4d3c commit 9b45db2
Showing 1 changed file with 20 additions and 3 deletions.
23 changes: 20 additions & 3 deletions verl/workers/megatron_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,12 @@ def _build_critic_model_optimizer(self,

# Step 1: initialize the tokenizer
local_path = copy_local_path_from_hdfs(model_path)
self.tokenizer = hf_tokenizer(local_path)
try:
self.tokenizer = hf_tokenizer(local_path)
except OSError:
# If the model path doesn't contain a tokenizer, we use the tokenizer path specified in the config
tokenizer_local_path = copy_local_path_from_hdfs(self.config.model.tokenizer_path)
self.tokenizer = hf_tokenizer(tokenizer_local_path)

# Step 2: get the actor_model_config
critic_model_config = AutoConfig.from_pretrained(local_path)
Expand Down Expand Up @@ -639,15 +644,26 @@ def __init__(self, config):
self.config.micro_batch_size //= mpu.get_data_parallel_world_size()
self.config.micro_batch_size_per_gpu = self.config.micro_batch_size

def _build_rm_model(self, model_path, megatron_config: ModelParallelConfig, override_model_config):
def _build_rm_model(self,
model_path,
megatron_config: ModelParallelConfig,
override_model_config,
rm_tokenizer=None):
from megatron.core.models.gpt.gpt_model import ModelType
from verl.utils.model import print_model_size, update_model_config
from verl.utils.megatron_utils import get_model
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig

# Step 1: initialize the tokenizer
local_path = copy_local_path_from_hdfs(model_path)
self.tokenizer = hf_tokenizer(local_path)
try:
self.tokenizer = hf_tokenizer(local_path)
except OSError:
# If the model path doesn't contain a tokenizer, we use rm_tokenizer
if rm_tokenizer:
self.tokenizer = rm_tokenizer
else:
raise ValueError('No tokenizer found in the model path and rm_tokenizer is not provided!')

# Step 2: get the actor_model_config
rm_model_config = AutoConfig.from_pretrained(local_path)
Expand Down Expand Up @@ -735,6 +751,7 @@ def init_model(self):
model_path=self.config.model.path,
megatron_config=megatron_config,
override_model_config=override_model_config,
rm_tokenizer=rm_tokenizer,
)
# FIXME(sgm): reward model param offload is implemented in MegatronRewardModel
# should be implemented in workers
Expand Down

0 comments on commit 9b45db2

Please sign in to comment.