volcengine · HollowMan6 · Feb 18, 2025
diff --git a/verl/workers/megatron_workers.py b/verl/workers/megatron_workers.py
@@ -486,7 +486,12 @@ def _build_critic_model_optimizer(self,
 
         # Step 1: initialize the tokenizer
         local_path = copy_local_path_from_hdfs(model_path)
-        self.tokenizer = hf_tokenizer(local_path)
+        try:
+            self.tokenizer = hf_tokenizer(local_path)
+        except OSError:
+            # If the model path doesn't contain a tokenizer, we use the tokenizer path specified in the config
+            tokenizer_local_path = copy_local_path_from_hdfs(self.config.model.tokenizer_path)
+            self.tokenizer = hf_tokenizer(tokenizer_local_path)
 
         # Step 2: get the actor_model_config
         critic_model_config = AutoConfig.from_pretrained(local_path)
@@ -643,15 +648,26 @@ def __init__(self, config):
             self.config.micro_batch_size //= mpu.get_data_parallel_world_size()
             self.config.micro_batch_size_per_gpu = self.config.micro_batch_size
 
-    def _build_rm_model(self, model_path, megatron_config: ModelParallelConfig, override_model_config):
+    def _build_rm_model(self,
+                        model_path,
+                        megatron_config: ModelParallelConfig,
+                        override_model_config,
+                        rm_tokenizer=None):
         from megatron.core.models.gpt.gpt_model import ModelType
         from verl.utils.model import print_model_size, update_model_config
         from verl.utils.megatron_utils import get_model
         from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 
         # Step 1: initialize the tokenizer
         local_path = copy_local_path_from_hdfs(model_path)
-        self.tokenizer = hf_tokenizer(local_path)
+        try:
+            self.tokenizer = hf_tokenizer(local_path)
+        except OSError:
+            # If the model path doesn't contain a tokenizer, we use rm_tokenizer
+            if rm_tokenizer:
+                self.tokenizer = rm_tokenizer
+            else:
+                raise ValueError('No tokenizer found in the model path and rm_tokenizer is not provided!')
 
         # Step 2: get the actor_model_config
         rm_model_config = AutoConfig.from_pretrained(local_path)
@@ -740,6 +756,7 @@ def init_model(self):
             model_path=self.config.model.path,
             megatron_config=megatron_config,
             override_model_config=override_model_config,
+            rm_tokenizer=rm_tokenizer,
         )
         # FIXME(sgm): reward model param offload is implemented in MegatronRewardModel
         # should be implemented in workers