Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Updated BridgeTower Image processor #32384

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
"""Image processor class for BridgeTower."""

import warnings
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -156,12 +157,6 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
Can be overridden by the `image_std` parameter in the `preprocess` method.
do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to center crop the image. Can be overridden by the `do_center_crop` parameter in the `preprocess`
method.
crop_size (`Dict[str, int]`, *optional*):
Desired output size when applying center-cropping. Only has an effect if `do_center_crop` is set to `True`.
Can be overridden by the `crop_size` parameter in the `preprocess` method. If unset defaults to `size`,
do_pad (`bool`, *optional*, defaults to `True`):
Whether to pad the image to the `(max_height, max_width)` of the images in the batch. Can be overridden by
the `do_pad` parameter in the `preprocess` method.
Expand All @@ -180,8 +175,6 @@ def __init__(
do_normalize: bool = True,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_center_crop: bool = True,
crop_size: Dict[str, int] = None,
do_pad: bool = True,
**kwargs,
) -> None:
Expand All @@ -202,8 +195,6 @@ def __init__(
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
self.do_pad = do_pad
self.do_center_crop = do_center_crop
self.crop_size = crop_size

# Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize
def resize(
Expand Down Expand Up @@ -277,6 +268,7 @@ def center_crop(
The channel dimension format of the input image. If not provided, it will be inferred from the input
image.
"""
warnings.warn("The center_crop method is deprecated and will be removed in v4.50.0")
output_size = size["shortest_edge"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, what we want to do is update the docstring to reflect the true behaviour of the method, but still take the same size["shortest_edge"] argument. Otherwise this would be a breaking change and previous configs for the model would have a different behaviour. In fact - I think existing configs would break as they don't have "height" or "width" keys for size

return center_crop(
image,
Expand Down Expand Up @@ -421,12 +413,6 @@ def preprocess(
do_pad (`bool`, *optional*, defaults to `self.do_pad`):
Whether to pad the image to the (max_height, max_width) in the batch. If `True`, a pixel mask is also
created and returned.
do_center_crop (`bool`, *optional*, defaults to `self.do_center_crop`):
Whether to center crop the image. If the input size is smaller than `crop_size` along any edge, the
image is padded with 0's and then center cropped.
crop_size (`Dict[str, int]`, *optional*, defaults to `self.crop_size`):
Size of the image after center crop. If one edge the image is smaller than `crop_size`, it will be
padded with zeros and then cropped
return_tensors (`str` or `TensorType`, *optional*):
The type of tensors to return. Can be one of:
- Unset: Return a list of `np.ndarray`.
Expand Down Expand Up @@ -455,12 +441,6 @@ def preprocess(
image_mean = image_mean if image_mean is not None else self.image_mean
image_std = image_std if image_std is not None else self.image_std
do_pad = do_pad if do_pad is not None else self.do_pad
do_center_crop if do_center_crop is not None else self.do_center_crop
# For backwards compatibility. Initial version of this processor was cropping to the "size" argument, which
# it should default to if crop_size is undefined.
crop_size = (
crop_size if crop_size is not None else (self.crop_size if self.crop_size is not None else self.size)
)

size = size if size is not None else self.size
size = get_size_dict(size, default_to_square=False)
Expand All @@ -473,7 +453,6 @@ def preprocess(
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
"torch.Tensor, tf.Tensor or jax.ndarray."
)
# Here, crop_size is used only if it is set, else size will be used.
validate_preprocess_arguments(
do_rescale=do_rescale,
rescale_factor=rescale_factor,
Expand All @@ -482,8 +461,6 @@ def preprocess(
image_std=image_std,
do_pad=do_pad,
size_divisibility=size_divisor,
do_center_crop=do_center_crop,
crop_size=crop_size,
do_resize=do_resize,
size=size,
resample=resample,
Expand All @@ -497,6 +474,8 @@ def preprocess(
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if do_center_crop or crop_size:
warnings.warn("The center_crop method is deprecated and will be removed in v4.50.0")
if do_resize:
images = [
self.resize(
Expand All @@ -509,11 +488,6 @@ def preprocess(
for image in images
]

if do_center_crop:
images = [
self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images
]

if do_rescale:
images = [
self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format)
Expand Down
2 changes: 0 additions & 2 deletions tests/models/bridgetower/test_image_processing_bridgetower.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def __init__(
do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255,
do_normalize: bool = True,
do_center_crop: bool = True,
image_mean: Optional[Union[float, List[float]]] = [0.48145466, 0.4578275, 0.40821073],
image_std: Optional[Union[float, List[float]]] = [0.26862954, 0.26130258, 0.27577711],
do_pad: bool = True,
Expand All @@ -58,7 +57,6 @@ def __init__(
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
self.do_normalize = do_normalize
self.do_center_crop = do_center_crop
self.image_mean = image_mean
self.image_std = image_std
self.do_pad = do_pad
Expand Down