-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CUDA out of memory #4
Comments
This error appeared in HuBERT, although the dataset used is much less than BAVED. |
try to reduce batch size to 16 or 8 |
same problem |
If i pay for Google Colab pro, will it solve the problem? |
I bought Google Colab pro and still the same error |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
if training_args.do_train:
print(f"last_checkpoint: {last_checkpoint}")
train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
trainer.save_model()
feature_extractor.save_pretrained(training_args.output_dir)
metrics = train_result.metrics
metrics["train_samples"] = len(train_dataset)
The following columns in the training set don't have a corresponding argument in
HubertForSpeechClassification.forward
and have been ignored: name, emotion. If name, emotion are not expected byHubertForSpeechClassification.forward
, you can safely ignore this message./usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set
no_deprecation_warning=True
to disable this warningwarnings.warn(
***** Running training *****
Num examples = 105
Num Epochs = 15
Instantaneous batch size per device = 32
Total train batch size (w. parallel, distributed & accumulation) = 64
Gradient Accumulation steps = 2
Total optimization steps = 30
Number of trainable parameters = 90764163
last_checkpoint: None
OutOfMemoryError Traceback (most recent call last)
in
1 if training_args.do_train:
2 print(f"last_checkpoint: {last_checkpoint}")
----> 3 train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
4 trainer.save_model()
5 feature_extractor.save_pretrained(training_args.output_dir)
14 frames
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1552 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1553 )
-> 1554 return inner_training_loop(
1555 args=args,
1556 resume_from_checkpoint=resume_from_checkpoint,
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1800 tr_loss_step = self.training_step(model, inputs)
1801 else:
-> 1802 tr_loss_step = self.training_step(model, inputs)
1803
1804 if (
in training_step(self, model, inputs)
43 if self.use_cuda_amp:
44 with autocast():
---> 45 loss = self.compute_loss(model, inputs)
46 else:
47 loss = self.compute_loss(model, inputs)
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2580 else:
2581 labels = None
-> 2582 outputs = model(**inputs)
2583 # Save past state if it exists
2584 # TODO: this needs to be fixed and made cleaner later.
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
in forward(self, input_values, attention_mask, output_attentions, output_hidden_states, return_dict, labels)
74 ):
75 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
---> 76 outputs = self.hubert(
77 input_values,
78 attention_mask=attention_mask,
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.8/dist-packages/transformers/models/hubert/modeling_hubert.py in forward(self, input_values, attention_mask, mask_time_indices, output_attentions, output_hidden_states, return_dict)
1063 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1064
-> 1065 extract_features = self.feature_extractor(input_values)
1066 extract_features = extract_features.transpose(1, 2)
1067
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.8/dist-packages/transformers/models/hubert/modeling_hubert.py in forward(self, input_values)
357 )
358 else:
--> 359 hidden_states = conv_layer(hidden_states)
360
361 return hidden_states
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.8/dist-packages/transformers/models/hubert/modeling_hubert.py in forward(self, hidden_states)
257 def forward(self, hidden_states):
258 hidden_states = self.conv(hidden_states)
--> 259 hidden_states = self.layer_norm(hidden_states)
260 hidden_states = self.activation(hidden_states)
261 return hidden_states
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/normalization.py in forward(self, input)
271
272 def forward(self, input: Tensor) -> Tensor:
--> 273 return F.group_norm(
274 input, self.num_groups, self.weight, self.bias, self.eps)
275
/usr/local/lib/python3.8/dist-packages/torch/nn/functional.py in group_norm(input, num_groups, weight, bias, eps)
2526 return handle_torch_function(group_norm, (input, weight, bias,), input, num_groups, weight=weight, bias=bias, eps=eps)
2527 _verify_batch_size([input.size(0) * input.size(1) // num_groups, num_groups] + list(input.size()[2:]))
-> 2528 return torch.group_norm(input, num_groups, weight, bias, eps, torch.backends.cudnn.enabled)
2529
2530
OutOfMemoryError: CUDA out of memory. Tried to allocate 13.72 GiB (GPU 0; 14.76 GiB total capacity; 7.70 GiB already allocated; 5.75 GiB free; 7.78 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
The text was updated successfully, but these errors were encountered: