update load weights

mindspore-lab · Dec 19, 2024 · 922ab29 · 922ab29
1 parent 5f5da70
commit 922ab29
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 20 deletions.
diff --git a/mindone/diffusers/models/model_loading_utils.py b/mindone/diffusers/models/model_loading_utils.py
@@ -111,9 +111,9 @@ def _load_state_dict_into_model(model_to_load, state_dict: OrderedDict) -> List[
     local_state = {k: v for k, v in model_to_load.parameters_and_names()}
     for k, v in state_dict.items():
         if k in local_state:
-            v.set_dtype(local_state[k].dtype)
+            state_dict[k] = ms.Parameter(v.to(local_state[k].dtype), name=k)
         else:
-            pass  # unexpect key keeps origin dtype
+            state_dict[k] = ms.Parameter(v, name=k)  # unexpect key keeps origin dtype
     ms.load_param_into_net(model_to_load, state_dict, strict_load=True)
     return error_msgs
 

diff --git a/mindone/diffusers/models/modeling_utils.py b/mindone/diffusers/models/modeling_utils.py
@@ -27,6 +27,7 @@
 
 import mindspore as ms
 from mindspore import nn, ops
+from mindspore.nn.utils import no_init_parameters
 
 from mindone.safetensors.mindspore import save_file as safe_save_file
 
@@ -61,9 +62,7 @@ def _get_pt2ms_mappings(m):
     mappings = {}  # pt_param_name: (ms_param_name, pt_param_to_ms_param_func)
     for name, cell in m.cells_and_names():
         if isinstance(cell, (nn.Conv1d, nn.Conv1dTranspose)):
-            mappings[f"{name}.weight"] = f"{name}.weight", lambda x: ms.Parameter(
-                ops.expand_dims(x, axis=-2), name=x.name
-            )
+            mappings[f"{name}.weight"] = f"{name}.weight", lambda x: ops.expand_dims(x, axis=-2)
         elif isinstance(cell, nn.Embedding):
             mappings[f"{name}.weight"] = f"{name}.embedding_table", lambda x: x
         elif isinstance(cell, (nn.BatchNorm2d, nn.LayerNorm, nn.GroupNorm)):
@@ -608,8 +607,15 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     user_agent=user_agent,
                     commit_hash=commit_hash,
                 )
+            with no_init_parameters():
+                model = cls.from_config(config, **unused_kwargs)
 
-            model = cls.from_config(config, **unused_kwargs)
+            if mindspore_dtype is not None and not isinstance(mindspore_dtype, ms.Type):
+                raise ValueError(
+                    f"{mindspore_dtype} needs to be of type `ms.Type`, e.g. `ms.float16`, but is {type(mindspore_dtype)}."
+                )
+            elif mindspore_dtype is not None:
+                model = model.to(mindspore_dtype)
 
             if is_sharded:
                 load_checkpoint_and_dispatch(
@@ -637,13 +643,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     "error_msgs": error_msgs,
                 }
 
-        if mindspore_dtype is not None and not isinstance(mindspore_dtype, ms.Type):
-            raise ValueError(
-                f"{mindspore_dtype} needs to be of type `ms.Type`, e.g. `ms.float16`, but is {type(mindspore_dtype)}."
-            )
-        elif mindspore_dtype is not None:
-            model = model.to(mindspore_dtype)
-
         model.register_to_config(_name_or_path=pretrained_model_name_or_path)
 
         # Set model in evaluation mode to deactivate DropOut modules by default

diff --git a/mindone/safetensors/mindspore.py b/mindone/safetensors/mindspore.py
@@ -125,7 +125,7 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, ms.Tensor]:
 
 def _np2ms(np_dict: Dict[str, np.ndarray]) -> Dict[str, ms.Tensor]:
     for k, v in np_dict.items():
-        np_dict[k] = ms.Parameter(v, name=k)
+        np_dict[k] = ms.tensor(v)
     return np_dict
 
 

diff --git a/mindone/transformers/modeling_utils.py b/mindone/transformers/modeling_utils.py
@@ -52,6 +52,7 @@
 
 import mindspore as ms
 from mindspore import Tensor, nn, ops
+from mindspore.nn.utils import no_init_parameters
 
 from .integrations import PeftAdapterMixin
 from .modeling_attn_mask_utils import dtype_to_min
@@ -71,9 +72,7 @@ def _get_pt2ms_mappings(m):
     mappings = {}  # pt_param_name: (ms_param_name, pt_param_to_ms_param_func)
     for name, cell in m.cells_and_names():
         if isinstance(cell, (nn.Conv1d, nn.Conv1dTranspose)):
-            mappings[f"{name}.weight"] = f"{name}.weight", lambda x: ms.Parameter(
-                ops.expand_dims(x, axis=-2), name=x.name
-            )
+            mappings[f"{name}.weight"] = f"{name}.weight", lambda x: ops.expand_dims(x, axis=-2)
         elif isinstance(cell, nn.Embedding):
             mappings[f"{name}.weight"] = f"{name}.embedding_table", lambda x: x
         elif isinstance(cell, (nn.BatchNorm2d, nn.LayerNorm, nn.GroupNorm)):
@@ -294,9 +293,9 @@ def _load_state_dict_into_model(model_to_load, state_dict, start_prefix, is_shar
     local_state = {start_prefix + k: v for k, v in model_to_load.parameters_and_names()}
     for k, v in state_dict.items():
         if k in local_state:
-            v.set_dtype(local_state[k].dtype)
+            state_dict[k] = ms.Parameter(v.to(local_state[k].dtype), name=k)
         else:
-            pass  # unexpect key keeps origin dtype
+            state_dict[k] = ms.Parameter(v, name=k)  # unexpect key keeps origin dtype
     cm = silence_mindspore_logger() if is_sharded else nullcontext()
     with cm:
         ms.load_param_into_net(model_to_load, state_dict, strict_load=True)
@@ -1730,7 +1729,8 @@ def from_pretrained(
 
         config.name_or_path = pretrained_model_name_or_path
         config = copy.deepcopy(config)  # We do not want to modify the config inplace in from_pretrained.
-        model = cls(config, *model_args, **model_kwargs)
+        with no_init_parameters():
+            model = cls(config, *model_args, **model_kwargs)
         # We cannot set default mindspore dtype. So we need to cast model weights after creating.
         if mindspore_dtype is not None:
             model = model.to(mindspore_dtype)