Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[megatron] fix: critic and reward model load tokenizer from config #301

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions verl/workers/megatron_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,12 @@ def _build_critic_model_optimizer(self,

# Step 1: initialize the tokenizer
local_path = copy_local_path_from_hdfs(model_path)
self.tokenizer = hf_tokenizer(local_path)
try:
self.tokenizer = hf_tokenizer(local_path)
except OSError:
# If the model path doesn't contain a tokenizer, we use the tokenizer path specified in the config
tokenizer_local_path = copy_local_path_from_hdfs(self.config.model.tokenizer_path)
self.tokenizer = hf_tokenizer(tokenizer_local_path)

# Step 2: get the actor_model_config
critic_model_config = AutoConfig.from_pretrained(local_path)
Expand Down Expand Up @@ -643,15 +648,26 @@ def __init__(self, config):
self.config.micro_batch_size //= mpu.get_data_parallel_world_size()
self.config.micro_batch_size_per_gpu = self.config.micro_batch_size

def _build_rm_model(self, model_path, megatron_config: ModelParallelConfig, override_model_config):
def _build_rm_model(self,
model_path,
megatron_config: ModelParallelConfig,
override_model_config,
rm_tokenizer=None):
from megatron.core.models.gpt.gpt_model import ModelType
from verl.utils.model import print_model_size, update_model_config
from verl.utils.megatron_utils import get_model
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig

# Step 1: initialize the tokenizer
local_path = copy_local_path_from_hdfs(model_path)
self.tokenizer = hf_tokenizer(local_path)
try:
self.tokenizer = hf_tokenizer(local_path)
except OSError:
# If the model path doesn't contain a tokenizer, we use rm_tokenizer
if rm_tokenizer:
self.tokenizer = rm_tokenizer
else:
raise ValueError('No tokenizer found in the model path and rm_tokenizer is not provided!')

# Step 2: get the actor_model_config
rm_model_config = AutoConfig.from_pretrained(local_path)
Expand Down Expand Up @@ -740,6 +756,7 @@ def init_model(self):
model_path=self.config.model.path,
megatron_config=megatron_config,
override_model_config=override_model_config,
rm_tokenizer=rm_tokenizer,
)
# FIXME(sgm): reward model param offload is implemented in MegatronRewardModel
# should be implemented in workers
Expand Down
Loading