CUDA out of memory #4

ama454 · 2023-02-01T15:02:08Z

if training_args.do_train:
print(f"last_checkpoint: {last_checkpoint}")
train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
trainer.save_model()
feature_extractor.save_pretrained(training_args.output_dir)
metrics = train_result.metrics
metrics["train_samples"] = len(train_dataset)

trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

The following columns in the training set don't have a corresponding argument in `HubertForSpeechClassification.forward` and have been ignored: name, emotion. If name, emotion are not expected by `HubertForSpeechClassification.forward`, you can safely ignore this message.
/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
* Running training *
Num examples = 105
Num Epochs = 15
Instantaneous batch size per device = 32
Total train batch size (w. parallel, distributed & accumulation) = 64
Gradient Accumulation steps = 2
Total optimization steps = 30
Number of trainable parameters = 90764163
last_checkpoint: None

OutOfMemoryError Traceback (most recent call last)
in
1 if training_args.do_train:
2 print(f"last_checkpoint: {last_checkpoint}")
----> 3 train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
4 trainer.save_model()
5 feature_extractor.save_pretrained(training_args.output_dir)

14 frames
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1552 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1553 )
-> 1554 return inner_training_loop(
1555 args=args,
1556 resume_from_checkpoint=resume_from_checkpoint,

/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1800 tr_loss_step = self.training_step(model, inputs)
1801 else:
-> 1802 tr_loss_step = self.training_step(model, inputs)
1803
1804 if (

in training_step(self, model, inputs)
43 if self.use_cuda_amp:
44 with autocast():
---> 45 loss = self.compute_loss(model, inputs)
46 else:
47 loss = self.compute_loss(model, inputs)

/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2580 else:
2581 labels = None
-> 2582 outputs = model(**inputs)
2583 # Save past state if it exists
2584 # TODO: this needs to be fixed and made cleaner later.

/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

in forward(self, input_values, attention_mask, output_attentions, output_hidden_states, return_dict, labels)
74 ):
75 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
---> 76 outputs = self.hubert(
77 input_values,
78 attention_mask=attention_mask,

/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.8/dist-packages/transformers/models/hubert/modeling_hubert.py in forward(self, input_values, attention_mask, mask_time_indices, output_attentions, output_hidden_states, return_dict)
1063 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1064
-> 1065 extract_features = self.feature_extractor(input_values)
1066 extract_features = extract_features.transpose(1, 2)
1067

/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.8/dist-packages/transformers/models/hubert/modeling_hubert.py in forward(self, input_values)
357 )
358 else:
--> 359 hidden_states = conv_layer(hidden_states)
360
361 return hidden_states

/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.8/dist-packages/transformers/models/hubert/modeling_hubert.py in forward(self, hidden_states)
257 def forward(self, hidden_states):
258 hidden_states = self.conv(hidden_states)
--> 259 hidden_states = self.layer_norm(hidden_states)
260 hidden_states = self.activation(hidden_states)
261 return hidden_states

/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.8/dist-packages/torch/nn/modules/normalization.py in forward(self, input)
271
272 def forward(self, input: Tensor) -> Tensor:
--> 273 return F.group_norm(
274 input, self.num_groups, self.weight, self.bias, self.eps)
275

/usr/local/lib/python3.8/dist-packages/torch/nn/functional.py in group_norm(input, num_groups, weight, bias, eps)
2526 return handle_torch_function(group_norm, (input, weight, bias,), input, num_groups, weight=weight, bias=bias, eps=eps)
2527 _verify_batch_size([input.size(0) * input.size(1) // num_groups, num_groups] + list(input.size()[2:]))
-> 2528 return torch.group_norm(input, num_groups, weight, bias, eps, torch.backends.cudnn.enabled)
2529
2530

OutOfMemoryError: CUDA out of memory. Tried to allocate 13.72 GiB (GPU 0; 14.76 GiB total capacity; 7.70 GiB already allocated; 5.75 GiB free; 7.78 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

The text was updated successfully, but these errors were encountered:

ama454 · 2023-02-02T07:53:31Z

This error appeared in HuBERT, although the dataset used is much less than BAVED.

OmarMohammed88 · 2023-02-02T11:45:58Z

try to reduce batch size to 16 or 8

ama454 · 2023-02-02T12:21:58Z

same problem

ama454 · 2023-02-02T12:25:46Z

If i pay for Google Colab pro, will it solve the problem?

ama454 · 2023-02-02T18:07:40Z

I bought Google Colab pro and still the same error

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CUDA out of memory #4

CUDA out of memory #4

ama454 commented Feb 1, 2023

ama454 commented Feb 2, 2023

OmarMohammed88 commented Feb 2, 2023

ama454 commented Feb 2, 2023

ama454 commented Feb 2, 2023

ama454 commented Feb 2, 2023

CUDA out of memory #4

CUDA out of memory #4

Comments

ama454 commented Feb 1, 2023

ama454 commented Feb 2, 2023

OmarMohammed88 commented Feb 2, 2023

ama454 commented Feb 2, 2023

ama454 commented Feb 2, 2023

ama454 commented Feb 2, 2023