11
from copy import deepcopy
12
from pathlib import Path
13
from unittest import mock
21
import torchvision.transforms.v2 as transforms
23
from common_utils import (
28
ignore_jit_no_profile_information_warning,
34
make_segmentation_mask,
42
from torch.testing import assert_close
43
from torch.utils._pytree import tree_flatten, tree_map
44
from torch.utils.data import DataLoader, default_collate
45
from torchvision import tv_tensors
46
from torchvision.ops.boxes import box_iou
48
from torchvision.transforms._functional_tensor import _max_value as get_max_value
49
from torchvision.transforms.functional import pil_modes_mapping, to_pil_image
50
from torchvision.transforms.v2 import functional as F
51
from torchvision.transforms.v2._utils import check_type, is_pure_tensor
52
from torchvision.transforms.v2.functional._geometry import _get_perspective_coeffs
53
from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal
57
pytestmark = [pytest.mark.filterwarnings("error")]
59
if sys.version_info[:2] >= (3, 12):
62
pytestmark.append(pytest.mark.filterwarnings("ignore::DeprecationWarning"))
65
@pytest.fixture(autouse=True)
71
def _to_tolerances(maybe_tolerance_dict):
72
if not isinstance(maybe_tolerance_dict, dict):
73
return dict(rtol=None, atol=None)
75
tolerances = dict(rtol=0, atol=0)
76
tolerances.update(maybe_tolerance_dict)
80
def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs):
81
"""Checks if the kernel produces closes results for inputs on GPU and CPU."""
82
if input.device.type != "cuda":
85
input_cuda = input.as_subclass(torch.Tensor)
86
input_cpu = input_cuda.to("cpu")
88
with freeze_rng_state():
89
actual = kernel(input_cuda, *args, **kwargs)
90
with freeze_rng_state():
91
expected = kernel(input_cpu, *args, **kwargs)
93
assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol)
99
return torch.jit.script(obj)
100
except Exception as error:
101
name = getattr(obj, "__name__", obj.__class__.__name__)
102
raise AssertionError(f"Trying to `torch.jit.script` `{name}` raised the error above.") from error
105
def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
106
"""Checks if the kernel is scriptable and if the scripted output is close to the eager one."""
107
if input.device.type != "cpu":
110
kernel_scripted = _script(kernel)
112
input = input.as_subclass(torch.Tensor)
113
with ignore_jit_no_profile_information_warning():
114
with freeze_rng_state():
115
actual = kernel_scripted(input, *args, **kwargs)
116
with freeze_rng_state():
117
expected = kernel(input, *args, **kwargs)
119
assert_close(actual, expected, rtol=rtol, atol=atol)
122
def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs):
123
"""Checks if the kernel produces close results for batched and unbatched inputs."""
124
unbatched_input = input.as_subclass(torch.Tensor)
126
for batch_dims in [(2,), (2, 1)]:
127
repeats = [*batch_dims, *[1] * input.ndim]
129
actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs)
131
expected = kernel(unbatched_input, *args, **kwargs)
133
if isinstance(expected, torch.Tensor):
134
expected = expected.repeat(repeats)
136
tensor, *metadata = expected
137
expected = (tensor.repeat(repeats), *metadata)
139
assert_close(actual, expected, rtol=rtol, atol=atol)
141
for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]:
142
degenerate_batched_input = torch.empty(
143
degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device
146
output = kernel(degenerate_batched_input, *args, **kwargs)
148
if not isinstance(output, torch.Tensor):
151
assert output.shape[: -input.ndim] == degenerate_batch_dims
158
check_cuda_vs_cpu=True,
159
check_scripted_vs_eager=True,
160
check_batched_vs_unbatched=True,
163
initial_input_version = input._version
165
output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs)
167
if not isinstance(output, torch.Tensor):
171
assert input._version == initial_input_version
173
if kernel not in {F.to_dtype_image, F.to_dtype_video}:
174
assert output.dtype == input.dtype
175
assert output.device == input.device
177
if check_cuda_vs_cpu:
178
_check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu))
180
if check_scripted_vs_eager:
181
_check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager))
183
if check_batched_vs_unbatched:
184
_check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched))
187
def _check_functional_scripted_smoke(functional, input, *args, **kwargs):
188
"""Checks if the functional can be scripted and the scripted version can be called without error."""
189
if not isinstance(input, tv_tensors.Image):
192
functional_scripted = _script(functional)
193
with ignore_jit_no_profile_information_warning():
194
functional_scripted(input.as_subclass(torch.Tensor), *args, **kwargs)
197
def check_functional(functional, input, *args, check_scripted_smoke=True, **kwargs):
198
unknown_input = object()
199
with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))):
200
functional(unknown_input, *args, **kwargs)
202
with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy:
203
output = functional(input, *args, **kwargs)
205
spy.assert_any_call(f"{functional.__module__}.{functional.__name__}")
207
assert isinstance(output, type(input))
209
if isinstance(input, tv_tensors.BoundingBoxes) and functional is not F.convert_bounding_box_format:
210
assert output.format == input.format
212
if check_scripted_smoke:
213
_check_functional_scripted_smoke(functional, input, *args, **kwargs)
216
def check_functional_kernel_signature_match(functional, *, kernel, input_type):
217
"""Checks if the signature of the functional matches the kernel signature."""
218
functional_params = list(inspect.signature(functional).parameters.values())[1:]
219
kernel_params = list(inspect.signature(kernel).parameters.values())[1:]
221
if issubclass(input_type, tv_tensors.TVTensor):
224
explicit_metadata = {
225
tv_tensors.BoundingBoxes: {"format", "canvas_size"},
227
kernel_params = [param for param in kernel_params if param.name not in explicit_metadata.get(input_type, set())]
229
functional_params = iter(functional_params)
230
for functional_param, kernel_param in zip(functional_params, kernel_params):
234
while functional_param.name != kernel_param.name:
235
functional_param = next(functional_params)
236
except StopIteration:
237
raise AssertionError(
238
f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` "
239
f"has no corresponding parameter on the functional `{functional.__name__}`."
242
if issubclass(input_type, PIL.Image.Image):
245
functional_param._annotation = kernel_param._annotation = inspect.Parameter.empty
247
assert functional_param == kernel_param
250
def _check_transform_v1_compatibility(transform, input, *, rtol, atol):
251
"""If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static
252
``get_params`` method that is the v1 equivalent, the output is close to v1, is scriptable, and the scripted version
253
can be called without error."""
254
if not (type(input) is torch.Tensor or isinstance(input, PIL.Image.Image)):
257
v1_transform_cls = transform._v1_transform_cls
258
if v1_transform_cls is None:
261
if hasattr(v1_transform_cls, "get_params"):
262
assert type(transform).get_params is v1_transform_cls.get_params
264
v1_transform = v1_transform_cls(**transform._extract_params_for_v1_transform())
266
with freeze_rng_state():
267
output_v2 = transform(input)
269
with freeze_rng_state():
270
output_v1 = v1_transform(input)
272
assert_close(F.to_image(output_v2), F.to_image(output_v1), rtol=rtol, atol=atol)
274
if isinstance(input, PIL.Image.Image):
277
_script(v1_transform)(input)
280
def _make_transform_sample(transform, *, image_or_video, adapter):
281
device = image_or_video.device if isinstance(image_or_video, torch.Tensor) else "cpu"
282
size = F.get_size(image_or_video)
284
image_or_video=image_or_video,
285
image_tv_tensor=make_image(size, device=device),
286
video_tv_tensor=make_video(size, device=device),
287
image_pil=make_image_pil(size),
288
bounding_boxes_xyxy=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.XYXY, device=device),
289
bounding_boxes_xywh=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.XYWH, device=device),
290
bounding_boxes_cxcywh=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.CXCYWH, device=device),
291
bounding_boxes_degenerate_xyxy=tv_tensors.BoundingBoxes(
300
format=tv_tensors.BoundingBoxFormat.XYXY,
304
bounding_boxes_degenerate_xywh=tv_tensors.BoundingBoxes(
313
format=tv_tensors.BoundingBoxFormat.XYWH,
317
bounding_boxes_degenerate_cxcywh=tv_tensors.BoundingBoxes(
326
format=tv_tensors.BoundingBoxFormat.CXCYWH,
330
detection_mask=make_detection_masks(size, device=device),
331
segmentation_mask=make_segmentation_mask(size, device=device),
339
tensor=torch.empty(5),
342
if adapter is not None:
343
input = adapter(transform, input, device)
347
def _check_transform_sample_input_smoke(transform, input, *, adapter):
350
if not check_type(input, (is_pure_tensor, PIL.Image.Image, tv_tensors.Image, tv_tensors.Video)):
353
sample = _make_transform_sample(
355
transform=transform if adapter is None else deepcopy(transform),
356
image_or_video=input,
359
for container_type in [dict, list, tuple]:
360
if container_type is dict:
363
input = container_type(sample.values())
365
input_flat, input_spec = tree_flatten(input)
367
with freeze_rng_state():
369
output = transform(input)
370
output_flat, output_spec = tree_flatten(output)
372
assert output_spec == input_spec
374
for output_item, input_item, should_be_transformed in zip(
375
output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat)
377
if should_be_transformed:
378
assert type(output_item) is type(input_item)
380
assert output_item is input_item
384
for degenerate_bounding_boxes in (
386
for name, bounding_box in sample.items()
387
if "degenerate" in name and isinstance(bounding_box, tv_tensors.BoundingBoxes)
390
boxes=degenerate_bounding_boxes,
391
labels=torch.randint(10, (degenerate_bounding_boxes.shape[0],), device=degenerate_bounding_boxes.device),
393
assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4)
396
def check_transform(transform, input, check_v1_compatibility=True, check_sample_input=True):
397
pickle.loads(pickle.dumps(transform))
399
output = transform(input)
400
assert isinstance(output, type(input))
402
if isinstance(input, tv_tensors.BoundingBoxes) and not isinstance(transform, transforms.ConvertBoundingBoxFormat):
403
assert output.format == input.format
405
if check_sample_input:
406
_check_transform_sample_input_smoke(
407
transform, input, adapter=check_sample_input if callable(check_sample_input) else None
410
if check_v1_compatibility:
411
_check_transform_v1_compatibility(transform, input, **_to_tolerances(check_v1_compatibility))
416
def transform_cls_to_functional(transform_cls, **transform_specific_kwargs):
417
def wrapper(input, *args, **kwargs):
418
transform = transform_cls(*args, **transform_specific_kwargs, **kwargs)
419
return transform(input)
421
wrapper.__name__ = transform_cls.__name__
426
def param_value_parametrization(**kwargs):
427
"""Helper function to turn
429
@pytest.mark.parametrize(
440
@param_value_parametrization(a=[1, 2, 3], b=[-1.0, 1.0])
442
return pytest.mark.parametrize(
444
[(param, value) for param, values in kwargs.items() for value in values],
448
def adapt_fill(value, *, dtype):
449
"""Adapt fill values in the range [0.0, 1.0] to the value range of the dtype"""
453
max_value = get_max_value(dtype)
454
value_type = float if dtype.is_floating_point else int
456
if isinstance(value, (int, float)):
457
return value_type(value * max_value)
458
elif isinstance(value, (list, tuple)):
459
return type(value)(value_type(v * max_value) for v in value)
461
raise ValueError(f"fill should be an int or float, or a list or tuple of the former, but got '{value}'.")
464
EXHAUSTIVE_TYPE_FILLS = [
478
v for v in EXHAUSTIVE_TYPE_FILLS if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1)
483
INTERPOLATION_MODES = [
484
transforms.InterpolationMode.NEAREST,
485
transforms.InterpolationMode.NEAREST_EXACT,
486
transforms.InterpolationMode.BILINEAR,
487
transforms.InterpolationMode.BICUBIC,
491
def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True):
492
format = bounding_boxes.format
493
canvas_size = new_canvas_size or bounding_boxes.canvas_size
495
def affine_bounding_boxes(bounding_boxes):
496
dtype = bounding_boxes.dtype
497
device = bounding_boxes.device
500
input_xyxy = F.convert_bounding_box_format(
501
bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True),
503
new_format=tv_tensors.BoundingBoxFormat.XYXY,
506
x1, y1, x2, y2 = input_xyxy.squeeze(0).tolist()
516
transformed_points = np.matmul(points, affine_matrix.astype(points.dtype).T)
518
output_xyxy = torch.Tensor(
520
float(np.min(transformed_points[:, 0])),
521
float(np.min(transformed_points[:, 1])),
522
float(np.max(transformed_points[:, 0])),
523
float(np.max(transformed_points[:, 1])),
527
output = F.convert_bounding_box_format(
528
output_xyxy, old_format=tv_tensors.BoundingBoxFormat.XYXY, new_format=format
533
output = F.clamp_bounding_boxes(
536
canvas_size=canvas_size,
543
return output.to(dtype=dtype, device=device)
545
return tv_tensors.BoundingBoxes(
546
torch.cat([affine_bounding_boxes(b) for b in bounding_boxes.reshape(-1, 4).unbind()], dim=0).reshape(
550
canvas_size=canvas_size,
555
INPUT_SIZE = (17, 11)
556
OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)]
558
def _make_max_size_kwarg(self, *, use_max_size, size):
560
max_size = min(list(self.INPUT_SIZE))
562
if not (isinstance(size, int) or len(size) == 1):
566
max_size = (size if isinstance(size, int) else size[0]) + 1
570
return dict(max_size=max_size)
572
def _compute_output_size(self, *, input_size, size, max_size):
576
elif not (isinstance(size, int) or len(size) == 1):
579
elif not isinstance(size, int):
582
old_height, old_width = input_size
583
ratio = old_width / old_height
586
new_width = int(ratio * new_height)
589
new_height = int(new_width / ratio)
591
if max_size is not None and max(new_height, new_width) > max_size:
593
ratio = new_width / new_height
596
new_height = int(new_width / ratio)
598
new_height = max_size
599
new_width = int(new_height * ratio)
601
return new_height, new_width
603
@pytest.mark.parametrize("size", OUTPUT_SIZES)
604
@pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
605
@pytest.mark.parametrize("use_max_size", [True, False])
606
@pytest.mark.parametrize("antialias", [True, False])
607
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
608
@pytest.mark.parametrize("device", cpu_and_cuda())
609
def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype, device):
610
if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
615
atol = 30 if (interpolation is transforms.InterpolationMode.BICUBIC and dtype is torch.uint8) else 1
616
check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol)
620
make_image(self.INPUT_SIZE, dtype=dtype, device=device),
622
interpolation=interpolation,
625
check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances,
626
check_scripted_vs_eager=not isinstance(size, int),
629
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
630
@pytest.mark.parametrize("size", OUTPUT_SIZES)
631
@pytest.mark.parametrize("use_max_size", [True, False])
632
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
633
@pytest.mark.parametrize("device", cpu_and_cuda())
634
def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device):
635
if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
638
bounding_boxes = make_bounding_boxes(
640
canvas_size=self.INPUT_SIZE,
645
F.resize_bounding_boxes,
647
canvas_size=bounding_boxes.canvas_size,
650
check_scripted_vs_eager=not isinstance(size, int),
653
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
654
def test_kernel_mask(self, make_mask):
655
check_kernel(F.resize_mask, make_mask(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1])
657
def test_kernel_video(self):
658
check_kernel(F.resize_video, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True)
660
@pytest.mark.parametrize("size", OUTPUT_SIZES)
661
@pytest.mark.parametrize(
663
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
665
def test_functional(self, size, make_input):
666
max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size)
670
make_input(self.INPUT_SIZE),
674
check_scripted_smoke=not isinstance(size, int),
677
@pytest.mark.parametrize(
678
("kernel", "input_type"),
680
(F.resize_image, torch.Tensor),
681
(F._geometry._resize_image_pil, PIL.Image.Image),
682
(F.resize_image, tv_tensors.Image),
683
(F.resize_bounding_boxes, tv_tensors.BoundingBoxes),
684
(F.resize_mask, tv_tensors.Mask),
685
(F.resize_video, tv_tensors.Video),
688
def test_functional_signature(self, kernel, input_type):
689
check_functional_kernel_signature_match(F.resize, kernel=kernel, input_type=input_type)
691
@pytest.mark.parametrize("size", OUTPUT_SIZES)
692
@pytest.mark.parametrize("device", cpu_and_cuda())
693
@pytest.mark.parametrize(
700
make_segmentation_mask,
701
make_detection_masks,
705
def test_transform(self, size, device, make_input):
706
max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size)
709
transforms.Resize(size=size, **max_size_kwarg, antialias=True),
710
make_input(self.INPUT_SIZE, device=device),
712
check_v1_compatibility=dict(rtol=0, atol=1) if size is not None else False,
715
def _check_output_size(self, input, output, *, size, max_size):
716
assert tuple(F.get_size(output)) == self._compute_output_size(
717
input_size=F.get_size(input), size=size, max_size=max_size
720
@pytest.mark.parametrize("size", OUTPUT_SIZES)
723
@pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
724
@pytest.mark.parametrize("use_max_size", [True, False])
725
@pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
726
def test_image_correctness(self, size, interpolation, use_max_size, fn):
727
if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
730
image = make_image(self.INPUT_SIZE, dtype=torch.uint8)
732
actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True)
733
expected = F.to_image(F.resize(F.to_pil_image(image), size=size, interpolation=interpolation, **max_size_kwarg))
735
self._check_output_size(image, actual, size=size, **max_size_kwarg)
736
torch.testing.assert_close(actual, expected, atol=1, rtol=0)
738
def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=None):
739
old_height, old_width = bounding_boxes.canvas_size
740
new_height, new_width = self._compute_output_size(
741
input_size=bounding_boxes.canvas_size, size=size, max_size=max_size
744
if (old_height, old_width) == (new_height, new_width):
745
return bounding_boxes
747
affine_matrix = np.array(
749
[new_width / old_width, 0, 0],
750
[0, new_height / old_height, 0],
754
return reference_affine_bounding_boxes_helper(
756
affine_matrix=affine_matrix,
757
new_canvas_size=(new_height, new_width),
760
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
761
@pytest.mark.parametrize("size", OUTPUT_SIZES)
762
@pytest.mark.parametrize("use_max_size", [True, False])
763
@pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
764
def test_bounding_boxes_correctness(self, format, size, use_max_size, fn):
765
if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
768
bounding_boxes = make_bounding_boxes(format=format, canvas_size=self.INPUT_SIZE)
770
actual = fn(bounding_boxes, size=size, **max_size_kwarg)
771
expected = self._reference_resize_bounding_boxes(bounding_boxes, size=size, **max_size_kwarg)
773
self._check_output_size(bounding_boxes, actual, size=size, **max_size_kwarg)
774
torch.testing.assert_close(actual, expected)
776
@pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES))
777
@pytest.mark.parametrize(
779
[make_image_tensor, make_image_pil, make_image, make_video],
781
def test_pil_interpolation_compat_smoke(self, interpolation, make_input):
782
input = make_input(self.INPUT_SIZE)
785
contextlib.nullcontext()
786
if isinstance(input, PIL.Image.Image)
788
else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}")
792
size=self.OUTPUT_SIZES[0],
793
interpolation=interpolation,
796
def test_functional_pil_antialias_warning(self):
797
with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"):
798
F.resize(make_image_pil(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False)
800
@pytest.mark.parametrize("size", OUTPUT_SIZES)
801
@pytest.mark.parametrize(
808
make_segmentation_mask,
809
make_detection_masks,
813
def test_max_size_error(self, size, make_input):
817
match = "max_size must be an integer when size is None"
818
elif isinstance(size, int) or len(size) == 1:
819
max_size = (size if isinstance(size, int) else size[0]) - 1
820
match = "must be strictly greater than the requested size"
824
match = "size should be an int or a sequence of length 1"
826
with pytest.raises(ValueError, match=match):
827
F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True)
829
if isinstance(size, list) and len(size) != 1:
830
with pytest.raises(ValueError, match="max_size should only be passed if size is None or specifies"):
831
F.resize(make_input(self.INPUT_SIZE), size=size, max_size=500)
833
@pytest.mark.parametrize(
834
"input_size, max_size, expected_size",
836
((10, 10), 10, (10, 10)),
837
((10, 20), 40, (20, 40)),
838
((20, 10), 40, (40, 20)),
839
((10, 20), 10, (5, 10)),
840
((20, 10), 10, (10, 5)),
843
@pytest.mark.parametrize(
850
make_segmentation_mask,
851
make_detection_masks,
855
def test_resize_size_none(self, input_size, max_size, expected_size, make_input):
856
img = make_input(input_size)
857
out = F.resize(img, size=None, max_size=max_size)
858
assert F.get_size(out)[-2:] == list(expected_size)
860
@pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
861
@pytest.mark.parametrize(
863
[make_image_tensor, make_image_pil, make_image, make_video],
865
def test_interpolation_int(self, interpolation, make_input):
866
input = make_input(self.INPUT_SIZE)
871
if isinstance(input, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT:
874
expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True)
876
input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True
879
assert_equal(actual, expected)
881
def test_transform_unknown_size_error(self):
882
with pytest.raises(ValueError, match="size can be an integer, a sequence of one or two integers, or None"):
883
transforms.Resize(size=object())
885
@pytest.mark.parametrize(
886
"size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)]
888
@pytest.mark.parametrize(
895
make_segmentation_mask,
896
make_detection_masks,
900
def test_noop(self, size, make_input):
901
input = make_input(self.INPUT_SIZE)
903
output = F.resize(input, size=F.get_size(input), antialias=True)
907
if isinstance(input, tv_tensors.TVTensor):
911
assert output.data_ptr() == input.data_ptr()
913
assert output is input
915
@pytest.mark.parametrize(
922
make_segmentation_mask,
923
make_detection_masks,
927
def test_no_regression_5405(self, make_input):
931
input = make_input(self.INPUT_SIZE)
933
size = min(F.get_size(input))
935
output = F.resize(input, size=size, max_size=max_size, antialias=True)
937
assert max(F.get_size(output)) == max_size
939
def _make_image(self, *args, batch_dims=(), memory_format=torch.contiguous_format, **kwargs):
945
emulate_channels_last = memory_format is torch.channels_last and len(batch_dims) != 1
949
batch_dims=(math.prod(batch_dims),) if emulate_channels_last else batch_dims,
950
memory_format=memory_format,
954
if emulate_channels_last:
955
image = tv_tensors.wrap(image.view(*batch_dims, *image.shape[-3:]), like=image)
959
def _check_stride(self, image, *, memory_format):
960
C, H, W = F.get_dimensions(image)
961
if memory_format is torch.contiguous_format:
962
expected_stride = (H * W, W, 1)
963
elif memory_format is torch.channels_last:
964
expected_stride = (1, W * C, C)
966
raise ValueError(f"Unknown memory_format: {memory_format}")
968
assert image.stride() == expected_stride
972
@pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
973
@pytest.mark.parametrize("antialias", [True, False])
974
@pytest.mark.parametrize("memory_format", [torch.contiguous_format, torch.channels_last])
975
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
976
@pytest.mark.parametrize("device", cpu_and_cuda())
977
def test_kernel_image_memory_format_consistency(self, interpolation, antialias, memory_format, dtype, device):
978
size = self.OUTPUT_SIZES[0]
980
input = self._make_image(self.INPUT_SIZE, dtype=dtype, device=device, memory_format=memory_format)
983
self._check_stride(input, memory_format=memory_format)
985
output = F.resize_image(input, size=size, interpolation=interpolation, antialias=antialias)
987
self._check_stride(output, memory_format=memory_format)
989
def test_float16_no_rounding(self):
993
input = make_image_tensor(self.INPUT_SIZE, dtype=torch.float16)
994
output = F.resize_image(input, size=self.OUTPUT_SIZES[0], antialias=True)
996
assert output.dtype is torch.float16
997
assert (output.round() - output).abs().sum() > 0
1000
class TestHorizontalFlip:
1001
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
1002
@pytest.mark.parametrize("device", cpu_and_cuda())
1003
def test_kernel_image(self, dtype, device):
1004
check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device))
1006
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1007
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
1008
@pytest.mark.parametrize("device", cpu_and_cuda())
1009
def test_kernel_bounding_boxes(self, format, dtype, device):
1010
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
1012
F.horizontal_flip_bounding_boxes,
1015
canvas_size=bounding_boxes.canvas_size,
1018
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
1019
def test_kernel_mask(self, make_mask):
1020
check_kernel(F.horizontal_flip_mask, make_mask())
1022
def test_kernel_video(self):
1023
check_kernel(F.horizontal_flip_video, make_video())
1025
@pytest.mark.parametrize(
1027
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1029
def test_functional(self, make_input):
1030
check_functional(F.horizontal_flip, make_input())
1032
@pytest.mark.parametrize(
1033
("kernel", "input_type"),
1035
(F.horizontal_flip_image, torch.Tensor),
1036
(F._geometry._horizontal_flip_image_pil, PIL.Image.Image),
1037
(F.horizontal_flip_image, tv_tensors.Image),
1038
(F.horizontal_flip_bounding_boxes, tv_tensors.BoundingBoxes),
1039
(F.horizontal_flip_mask, tv_tensors.Mask),
1040
(F.horizontal_flip_video, tv_tensors.Video),
1043
def test_functional_signature(self, kernel, input_type):
1044
check_functional_kernel_signature_match(F.horizontal_flip, kernel=kernel, input_type=input_type)
1046
@pytest.mark.parametrize(
1048
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1050
@pytest.mark.parametrize("device", cpu_and_cuda())
1051
def test_transform(self, make_input, device):
1052
check_transform(transforms.RandomHorizontalFlip(p=1), make_input(device=device))
1054
@pytest.mark.parametrize(
1055
"fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
1057
def test_image_correctness(self, fn):
1058
image = make_image(dtype=torch.uint8, device="cpu")
1061
expected = F.to_image(F.horizontal_flip(F.to_pil_image(image)))
1063
torch.testing.assert_close(actual, expected)
1065
def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes):
1066
affine_matrix = np.array(
1068
[-1, 0, bounding_boxes.canvas_size[1]],
1073
return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
1075
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1076
@pytest.mark.parametrize(
1077
"fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
1079
def test_bounding_boxes_correctness(self, format, fn):
1080
bounding_boxes = make_bounding_boxes(format=format)
1082
actual = fn(bounding_boxes)
1083
expected = self._reference_horizontal_flip_bounding_boxes(bounding_boxes)
1085
torch.testing.assert_close(actual, expected)
1087
@pytest.mark.parametrize(
1089
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1091
@pytest.mark.parametrize("device", cpu_and_cuda())
1092
def test_transform_noop(self, make_input, device):
1093
input = make_input(device=device)
1095
transform = transforms.RandomHorizontalFlip(p=0)
1097
output = transform(input)
1099
assert_equal(output, input)
1103
_EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict(
1107
translate=[[6.3, -0.6], [1, -3], (16.6, -6.6), (-2, 4)],
1113
shear=[35.6, 38, [-37.7], [-23], (5.3,), (-52,), [5.4, 21.8], [-47, 51], (-11.2, 36.7), (8, -53)],
1116
center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)],
1119
_MINIMAL_AFFINE_KWARGS = {
1120
k: vs[0] if k != "shear" else next(v for v in vs if isinstance(v, list))
1121
for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()
1123
_CORRECTNESS_AFFINE_KWARGS = {
1124
k: [v for v in vs if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1)]
1125
for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()
1128
_EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict(
1129
degrees=[30, (-15, 20)],
1130
translate=[None, (0.5, 0.5)],
1131
scale=[None, (0.75, 1.25)],
1132
shear=[None, (12, 30, -17, 5), 10, (-5, 12)],
1134
_CORRECTNESS_TRANSFORM_AFFINE_RANGES = {
1135
k: next(v for v in vs if v is not None) for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items()
1138
def _check_kernel(self, kernel, input, *args, **kwargs):
1139
kwargs_ = self._MINIMAL_AFFINE_KWARGS.copy()
1140
kwargs_.update(kwargs)
1141
check_kernel(kernel, input, *args, **kwargs_)
1143
@param_value_parametrization(
1144
angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
1145
translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"],
1146
shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
1147
center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
1148
interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR],
1149
fill=EXHAUSTIVE_TYPE_FILLS,
1151
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
1152
@pytest.mark.parametrize("device", cpu_and_cuda())
1153
def test_kernel_image(self, param, value, dtype, device):
1155
value = adapt_fill(value, dtype=dtype)
1158
make_image(dtype=dtype, device=device),
1160
check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))),
1161
check_cuda_vs_cpu=dict(atol=1, rtol=0)
1162
if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR
1166
@param_value_parametrization(
1167
angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
1168
translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"],
1169
shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
1170
center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
1172
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1173
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
1174
@pytest.mark.parametrize("device", cpu_and_cuda())
1175
def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
1176
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
1178
F.affine_bounding_boxes,
1181
canvas_size=bounding_boxes.canvas_size,
1183
check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))),
1186
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
1187
def test_kernel_mask(self, make_mask):
1188
self._check_kernel(F.affine_mask, make_mask())
1190
def test_kernel_video(self):
1191
self._check_kernel(F.affine_video, make_video())
1193
@pytest.mark.parametrize(
1195
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1197
def test_functional(self, make_input):
1198
check_functional(F.affine, make_input(), **self._MINIMAL_AFFINE_KWARGS)
1200
@pytest.mark.parametrize(
1201
("kernel", "input_type"),
1203
(F.affine_image, torch.Tensor),
1204
(F._geometry._affine_image_pil, PIL.Image.Image),
1205
(F.affine_image, tv_tensors.Image),
1206
(F.affine_bounding_boxes, tv_tensors.BoundingBoxes),
1207
(F.affine_mask, tv_tensors.Mask),
1208
(F.affine_video, tv_tensors.Video),
1211
def test_functional_signature(self, kernel, input_type):
1212
check_functional_kernel_signature_match(F.affine, kernel=kernel, input_type=input_type)
1214
@pytest.mark.parametrize(
1216
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1218
@pytest.mark.parametrize("device", cpu_and_cuda())
1219
def test_transform(self, make_input, device):
1220
input = make_input(device=device)
1222
check_transform(transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES), input)
1224
@pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
1225
@pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
1226
@pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
1227
@pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"])
1228
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1229
@pytest.mark.parametrize(
1230
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
1232
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
1233
def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill):
1234
image = make_image(dtype=torch.uint8, device="cpu")
1236
fill = adapt_fill(fill, dtype=torch.uint8)
1241
translate=translate,
1245
interpolation=interpolation,
1248
expected = F.to_image(
1250
F.to_pil_image(image),
1252
translate=translate,
1256
interpolation=interpolation,
1261
mae = (actual.float() - expected.float()).abs().mean()
1262
assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8
1264
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1265
@pytest.mark.parametrize(
1266
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
1268
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
1269
@pytest.mark.parametrize("seed", list(range(5)))
1270
def test_transform_image_correctness(self, center, interpolation, fill, seed):
1271
image = make_image(dtype=torch.uint8, device="cpu")
1273
fill = adapt_fill(fill, dtype=torch.uint8)
1275
transform = transforms.RandomAffine(
1276
**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center, interpolation=interpolation, fill=fill
1279
torch.manual_seed(seed)
1280
actual = transform(image)
1282
torch.manual_seed(seed)
1283
expected = F.to_image(transform(F.to_pil_image(image)))
1285
mae = (actual.float() - expected.float()).abs().mean()
1286
assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8
1288
def _compute_affine_matrix(self, *, angle, translate, scale, shear, center):
1289
rot = math.radians(angle)
1292
sx, sy = [math.radians(s) for s in ([shear, 0.0] if isinstance(shear, (int, float)) else shear)]
1294
c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]])
1295
t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
1296
c_matrix_inv = np.linalg.inv(c_matrix)
1297
rs_matrix = np.array(
1299
[scale * math.cos(rot), -scale * math.sin(rot), 0],
1300
[scale * math.sin(rot), scale * math.cos(rot), 0],
1304
shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]])
1305
shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]])
1306
rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix))
1307
true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv)))
1308
return true_matrix[:2, :]
1310
def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate, scale, shear, center):
1312
center = [s * 0.5 for s in bounding_boxes.canvas_size[::-1]]
1314
return reference_affine_bounding_boxes_helper(
1316
affine_matrix=self._compute_affine_matrix(
1317
angle=angle, translate=translate, scale=scale, shear=shear, center=center
1321
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1322
@pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
1323
@pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
1324
@pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
1325
@pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"])
1326
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1327
def test_functional_bounding_boxes_correctness(self, format, angle, translate, scale, shear, center):
1328
bounding_boxes = make_bounding_boxes(format=format)
1333
translate=translate,
1338
expected = self._reference_affine_bounding_boxes(
1341
translate=translate,
1347
torch.testing.assert_close(actual, expected)
1349
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1350
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1351
@pytest.mark.parametrize("seed", list(range(5)))
1352
def test_transform_bounding_boxes_correctness(self, format, center, seed):
1353
bounding_boxes = make_bounding_boxes(format=format)
1355
transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center)
1357
torch.manual_seed(seed)
1358
params = transform._get_params([bounding_boxes])
1360
torch.manual_seed(seed)
1361
actual = transform(bounding_boxes)
1363
expected = self._reference_affine_bounding_boxes(bounding_boxes, **params, center=center)
1365
torch.testing.assert_close(actual, expected)
1367
@pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"])
1368
@pytest.mark.parametrize("translate", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["translate"])
1369
@pytest.mark.parametrize("scale", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["scale"])
1370
@pytest.mark.parametrize("shear", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["shear"])
1371
@pytest.mark.parametrize("seed", list(range(10)))
1372
def test_transform_get_params_bounds(self, degrees, translate, scale, shear, seed):
1373
image = make_image()
1374
height, width = F.get_size(image)
1376
transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, shear=shear)
1378
torch.manual_seed(seed)
1379
params = transform._get_params([image])
1381
if isinstance(degrees, (int, float)):
1382
assert -degrees <= params["angle"] <= degrees
1384
assert degrees[0] <= params["angle"] <= degrees[1]
1386
if translate is not None:
1387
width_max = int(round(translate[0] * width))
1388
height_max = int(round(translate[1] * height))
1389
assert -width_max <= params["translate"][0] <= width_max
1390
assert -height_max <= params["translate"][1] <= height_max
1392
assert params["translate"] == (0, 0)
1394
if scale is not None:
1395
assert scale[0] <= params["scale"] <= scale[1]
1397
assert params["scale"] == 1.0
1399
if shear is not None:
1400
if isinstance(shear, (int, float)):
1401
assert -shear <= params["shear"][0] <= shear
1402
assert params["shear"][1] == 0.0
1403
elif len(shear) == 2:
1404
assert shear[0] <= params["shear"][0] <= shear[1]
1405
assert params["shear"][1] == 0.0
1406
elif len(shear) == 4:
1407
assert shear[0] <= params["shear"][0] <= shear[1]
1408
assert shear[2] <= params["shear"][1] <= shear[3]
1410
assert params["shear"] == (0, 0)
1412
@pytest.mark.parametrize("param", ["degrees", "translate", "scale", "shear", "center"])
1413
@pytest.mark.parametrize("value", [0, [0], [0, 0, 0]])
1414
def test_transform_sequence_len_errors(self, param, value):
1415
if param in {"degrees", "shear"} and not isinstance(value, list):
1418
kwargs = {param: value}
1419
if param != "degrees":
1420
kwargs["degrees"] = 0
1423
ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2"
1425
transforms.RandomAffine(**kwargs)
1427
def test_transform_negative_degrees_error(self):
1428
with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"):
1429
transforms.RandomAffine(degrees=-1)
1431
@pytest.mark.parametrize("translate", [[-1, 0], [2, 0], [-1, 2]])
1432
def test_transform_translate_range_error(self, translate):
1433
with pytest.raises(ValueError, match="translation values should be between 0 and 1"):
1434
transforms.RandomAffine(degrees=0, translate=translate)
1436
@pytest.mark.parametrize("scale", [[-1, 0], [0, -1], [-1, -1]])
1437
def test_transform_scale_range_error(self, scale):
1438
with pytest.raises(ValueError, match="scale values should be positive"):
1439
transforms.RandomAffine(degrees=0, scale=scale)
1441
def test_transform_negative_shear_error(self):
1442
with pytest.raises(ValueError, match="If shear is a single number, it must be positive"):
1443
transforms.RandomAffine(degrees=0, shear=-1)
1445
def test_transform_unknown_fill_error(self):
1446
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
1447
transforms.RandomAffine(degrees=0, fill="fill")
1450
class TestVerticalFlip:
1451
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
1452
@pytest.mark.parametrize("device", cpu_and_cuda())
1453
def test_kernel_image(self, dtype, device):
1454
check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device))
1456
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1457
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
1458
@pytest.mark.parametrize("device", cpu_and_cuda())
1459
def test_kernel_bounding_boxes(self, format, dtype, device):
1460
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
1462
F.vertical_flip_bounding_boxes,
1465
canvas_size=bounding_boxes.canvas_size,
1468
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
1469
def test_kernel_mask(self, make_mask):
1470
check_kernel(F.vertical_flip_mask, make_mask())
1472
def test_kernel_video(self):
1473
check_kernel(F.vertical_flip_video, make_video())
1475
@pytest.mark.parametrize(
1477
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1479
def test_functional(self, make_input):
1480
check_functional(F.vertical_flip, make_input())
1482
@pytest.mark.parametrize(
1483
("kernel", "input_type"),
1485
(F.vertical_flip_image, torch.Tensor),
1486
(F._geometry._vertical_flip_image_pil, PIL.Image.Image),
1487
(F.vertical_flip_image, tv_tensors.Image),
1488
(F.vertical_flip_bounding_boxes, tv_tensors.BoundingBoxes),
1489
(F.vertical_flip_mask, tv_tensors.Mask),
1490
(F.vertical_flip_video, tv_tensors.Video),
1493
def test_functional_signature(self, kernel, input_type):
1494
check_functional_kernel_signature_match(F.vertical_flip, kernel=kernel, input_type=input_type)
1496
@pytest.mark.parametrize(
1498
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1500
@pytest.mark.parametrize("device", cpu_and_cuda())
1501
def test_transform(self, make_input, device):
1502
check_transform(transforms.RandomVerticalFlip(p=1), make_input(device=device))
1504
@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
1505
def test_image_correctness(self, fn):
1506
image = make_image(dtype=torch.uint8, device="cpu")
1509
expected = F.to_image(F.vertical_flip(F.to_pil_image(image)))
1511
torch.testing.assert_close(actual, expected)
1513
def _reference_vertical_flip_bounding_boxes(self, bounding_boxes):
1514
affine_matrix = np.array(
1517
[0, -1, bounding_boxes.canvas_size[0]],
1521
return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
1523
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1524
@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
1525
def test_bounding_boxes_correctness(self, format, fn):
1526
bounding_boxes = make_bounding_boxes(format=format)
1528
actual = fn(bounding_boxes)
1529
expected = self._reference_vertical_flip_bounding_boxes(bounding_boxes)
1531
torch.testing.assert_close(actual, expected)
1533
@pytest.mark.parametrize(
1535
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1537
@pytest.mark.parametrize("device", cpu_and_cuda())
1538
def test_transform_noop(self, make_input, device):
1539
input = make_input(device=device)
1541
transform = transforms.RandomVerticalFlip(p=0)
1543
output = transform(input)
1545
assert_equal(output, input)
1549
_EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict(
1554
center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)],
1556
_MINIMAL_AFFINE_KWARGS = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()}
1557
_CORRECTNESS_AFFINE_KWARGS = {
1558
k: [v for v in vs if v is None or isinstance(v, float) or isinstance(v, list)]
1559
for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()
1562
_EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict(
1563
degrees=[30, (-15, 20)],
1565
_CORRECTNESS_TRANSFORM_AFFINE_RANGES = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items()}
1567
@param_value_parametrization(
1568
angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
1569
interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR],
1570
expand=[False, True],
1571
center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
1572
fill=EXHAUSTIVE_TYPE_FILLS,
1574
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
1575
@pytest.mark.parametrize("device", cpu_and_cuda())
1576
def test_kernel_image(self, param, value, dtype, device):
1577
kwargs = {param: value}
1578
if param != "angle":
1579
kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
1582
make_image(dtype=dtype, device=device),
1584
check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))),
1587
@param_value_parametrization(
1588
angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
1589
expand=[False, True],
1590
center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
1592
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1593
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
1594
@pytest.mark.parametrize("device", cpu_and_cuda())
1595
def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
1596
kwargs = {param: value}
1597
if param != "angle":
1598
kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
1600
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
1603
F.rotate_bounding_boxes,
1606
canvas_size=bounding_boxes.canvas_size,
1610
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
1611
def test_kernel_mask(self, make_mask):
1612
check_kernel(F.rotate_mask, make_mask(), **self._MINIMAL_AFFINE_KWARGS)
1614
def test_kernel_video(self):
1615
check_kernel(F.rotate_video, make_video(), **self._MINIMAL_AFFINE_KWARGS)
1617
@pytest.mark.parametrize(
1619
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1621
def test_functional(self, make_input):
1622
check_functional(F.rotate, make_input(), **self._MINIMAL_AFFINE_KWARGS)
1624
@pytest.mark.parametrize(
1625
("kernel", "input_type"),
1627
(F.rotate_image, torch.Tensor),
1628
(F._geometry._rotate_image_pil, PIL.Image.Image),
1629
(F.rotate_image, tv_tensors.Image),
1630
(F.rotate_bounding_boxes, tv_tensors.BoundingBoxes),
1631
(F.rotate_mask, tv_tensors.Mask),
1632
(F.rotate_video, tv_tensors.Video),
1635
def test_functional_signature(self, kernel, input_type):
1636
check_functional_kernel_signature_match(F.rotate, kernel=kernel, input_type=input_type)
1638
@pytest.mark.parametrize(
1640
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
1642
@pytest.mark.parametrize("device", cpu_and_cuda())
1643
def test_transform(self, make_input, device):
1645
transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES), make_input(device=device)
1648
@pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
1649
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1650
@pytest.mark.parametrize(
1651
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
1653
@pytest.mark.parametrize("expand", [False, True])
1654
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
1655
def test_functional_image_correctness(self, angle, center, interpolation, expand, fill):
1656
image = make_image(dtype=torch.uint8, device="cpu")
1658
fill = adapt_fill(fill, dtype=torch.uint8)
1660
actual = F.rotate(image, angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill)
1661
expected = F.to_image(
1663
F.to_pil_image(image), angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill
1667
mae = (actual.float() - expected.float()).abs().mean()
1668
assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
1670
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1671
@pytest.mark.parametrize(
1672
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
1674
@pytest.mark.parametrize("expand", [False, True])
1675
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
1676
@pytest.mark.parametrize("seed", list(range(5)))
1677
def test_transform_image_correctness(self, center, interpolation, expand, fill, seed):
1678
image = make_image(dtype=torch.uint8, device="cpu")
1680
fill = adapt_fill(fill, dtype=torch.uint8)
1682
transform = transforms.RandomRotation(
1683
**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES,
1685
interpolation=interpolation,
1690
torch.manual_seed(seed)
1691
actual = transform(image)
1693
torch.manual_seed(seed)
1694
expected = F.to_image(transform(F.to_pil_image(image)))
1696
mae = (actual.float() - expected.float()).abs().mean()
1697
assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
1699
def _compute_output_canvas_size(self, *, expand, canvas_size, affine_matrix):
1701
return canvas_size, (0.0, 0.0)
1703
input_height, input_width = canvas_size
1705
input_image_frame = np.array(
1708
[0.0, input_height, 1.0],
1709
[input_width, input_height, 1.0],
1710
[input_width, 0.0, 1.0],
1714
output_image_frame = np.matmul(input_image_frame, affine_matrix.astype(input_image_frame.dtype).T)
1716
recenter_x = float(np.min(output_image_frame[:, 0]))
1717
recenter_y = float(np.min(output_image_frame[:, 1]))
1719
output_width = int(np.max(output_image_frame[:, 0]) - recenter_x)
1720
output_height = int(np.max(output_image_frame[:, 1]) - recenter_y)
1722
return (output_height, output_width), (recenter_x, recenter_y)
1724
def _recenter_bounding_boxes_after_expand(self, bounding_boxes, *, recenter_xy):
1726
if bounding_boxes.format is tv_tensors.BoundingBoxFormat.XYXY:
1727
translate = [x, y, x, y]
1729
translate = [x, y, 0.0, 0.0]
1730
return tv_tensors.wrap(
1731
(bounding_boxes.to(torch.float64) - torch.tensor(translate)).to(bounding_boxes.dtype), like=bounding_boxes
1734
def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center):
1736
center = [s * 0.5 for s in bounding_boxes.canvas_size[::-1]]
1739
a = np.cos(angle * np.pi / 180.0)
1740
b = np.sin(angle * np.pi / 180.0)
1741
affine_matrix = np.array(
1743
[a, b, cx - cx * a - b * cy],
1744
[-b, a, cy + cx * b - a * cy],
1748
new_canvas_size, recenter_xy = self._compute_output_canvas_size(
1749
expand=expand, canvas_size=bounding_boxes.canvas_size, affine_matrix=affine_matrix
1752
output = reference_affine_bounding_boxes_helper(
1754
affine_matrix=affine_matrix,
1755
new_canvas_size=new_canvas_size,
1759
return F.clamp_bounding_boxes(self._recenter_bounding_boxes_after_expand(output, recenter_xy=recenter_xy)).to(
1763
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1764
@pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
1765
@pytest.mark.parametrize("expand", [False, True])
1766
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1767
def test_functional_bounding_boxes_correctness(self, format, angle, expand, center):
1768
bounding_boxes = make_bounding_boxes(format=format)
1770
actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center)
1771
expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center)
1773
torch.testing.assert_close(actual, expected)
1774
torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
1776
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
1777
@pytest.mark.parametrize("expand", [False, True])
1778
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
1779
@pytest.mark.parametrize("seed", list(range(5)))
1780
def test_transform_bounding_boxes_correctness(self, format, expand, center, seed):
1781
bounding_boxes = make_bounding_boxes(format=format)
1783
transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center)
1785
torch.manual_seed(seed)
1786
params = transform._get_params([bounding_boxes])
1788
torch.manual_seed(seed)
1789
actual = transform(bounding_boxes)
1791
expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center)
1793
torch.testing.assert_close(actual, expected)
1794
torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
1796
@pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"])
1797
@pytest.mark.parametrize("seed", list(range(10)))
1798
def test_transform_get_params_bounds(self, degrees, seed):
1799
transform = transforms.RandomRotation(degrees=degrees)
1801
torch.manual_seed(seed)
1802
params = transform._get_params([])
1804
if isinstance(degrees, (int, float)):
1805
assert -degrees <= params["angle"] <= degrees
1807
assert degrees[0] <= params["angle"] <= degrees[1]
1809
@pytest.mark.parametrize("param", ["degrees", "center"])
1810
@pytest.mark.parametrize("value", [0, [0], [0, 0, 0]])
1811
def test_transform_sequence_len_errors(self, param, value):
1812
if param == "degrees" and not isinstance(value, list):
1815
kwargs = {param: value}
1816
if param != "degrees":
1817
kwargs["degrees"] = 0
1820
ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2"
1822
transforms.RandomRotation(**kwargs)
1824
def test_transform_negative_degrees_error(self):
1825
with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"):
1826
transforms.RandomAffine(degrees=-1)
1828
def test_transform_unknown_fill_error(self):
1829
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
1830
transforms.RandomAffine(degrees=0, fill="fill")
1832
@pytest.mark.parametrize("size", [(11, 17), (16, 16)])
1833
@pytest.mark.parametrize("angle", [0, 90, 180, 270])
1834
@pytest.mark.parametrize("expand", [False, True])
1835
def test_functional_image_fast_path_correctness(self, size, angle, expand):
1836
image = make_image(size, dtype=torch.uint8, device="cpu")
1838
actual = F.rotate(image, angle=angle, expand=expand)
1839
expected = F.to_image(F.rotate(F.to_pil_image(image), angle=angle, expand=expand))
1841
torch.testing.assert_close(actual, expected)
1844
class TestContainerTransforms:
1845
class BuiltinTransform(transforms.Transform):
1846
def _transform(self, inpt, params):
1849
class PackedInputTransform(nn.Module):
1850
def forward(self, sample):
1851
assert len(sample) == 2
1854
class UnpackedInputTransform(nn.Module):
1855
def forward(self, image, label):
1858
@pytest.mark.parametrize(
1859
"transform_cls", [transforms.Compose, functools.partial(transforms.RandomApply, p=1), transforms.RandomOrder]
1861
@pytest.mark.parametrize(
1862
"wrapped_transform_clss",
1865
[PackedInputTransform],
1866
[UnpackedInputTransform],
1867
[BuiltinTransform, BuiltinTransform],
1868
[PackedInputTransform, PackedInputTransform],
1869
[UnpackedInputTransform, UnpackedInputTransform],
1870
[BuiltinTransform, PackedInputTransform, BuiltinTransform],
1871
[BuiltinTransform, UnpackedInputTransform, BuiltinTransform],
1872
[PackedInputTransform, BuiltinTransform, PackedInputTransform],
1873
[UnpackedInputTransform, BuiltinTransform, UnpackedInputTransform],
1876
@pytest.mark.parametrize("unpack", [True, False])
1877
def test_packed_unpacked(self, transform_cls, wrapped_transform_clss, unpack):
1878
needs_packed_inputs = any(issubclass(cls, self.PackedInputTransform) for cls in wrapped_transform_clss)
1879
needs_unpacked_inputs = any(issubclass(cls, self.UnpackedInputTransform) for cls in wrapped_transform_clss)
1880
assert not (needs_packed_inputs and needs_unpacked_inputs)
1882
transform = transform_cls([cls() for cls in wrapped_transform_clss])
1884
image = make_image()
1886
packed_input = (image, label)
1888
def call_transform():
1890
return transform(*packed_input)
1892
return transform(packed_input)
1894
if needs_unpacked_inputs and not unpack:
1895
with pytest.raises(TypeError, match="missing 1 required positional argument"):
1897
elif needs_packed_inputs and unpack:
1898
with pytest.raises(TypeError, match="takes 2 positional arguments but 3 were given"):
1901
output = call_transform()
1903
assert isinstance(output, tuple) and len(output) == 2
1904
assert output[0] is image
1905
assert output[1] is label
1907
def test_compose(self):
1908
transform = transforms.Compose(
1910
transforms.RandomHorizontalFlip(p=1),
1911
transforms.RandomVerticalFlip(p=1),
1915
input = make_image()
1917
actual = check_transform(transform, input)
1918
expected = F.vertical_flip(F.horizontal_flip(input))
1920
assert_equal(actual, expected)
1922
@pytest.mark.parametrize("p", [0.0, 1.0])
1923
@pytest.mark.parametrize("sequence_type", [list, nn.ModuleList])
1924
def test_random_apply(self, p, sequence_type):
1925
transform = transforms.RandomApply(
1928
transforms.RandomHorizontalFlip(p=1),
1929
transforms.RandomVerticalFlip(p=1),
1937
input = make_image_tensor()
1938
output = check_transform(transform, input, check_v1_compatibility=issubclass(sequence_type, nn.ModuleList))
1941
assert_equal(output, F.vertical_flip(F.horizontal_flip(input)))
1943
assert output is input
1945
@pytest.mark.parametrize("p", [(0, 1), (1, 0)])
1946
def test_random_choice(self, p):
1947
transform = transforms.RandomChoice(
1949
transforms.RandomHorizontalFlip(p=1),
1950
transforms.RandomVerticalFlip(p=1),
1955
input = make_image()
1956
output = check_transform(transform, input)
1960
assert_equal(output, F.horizontal_flip(input))
1962
assert_equal(output, F.vertical_flip(input))
1964
def test_random_order(self):
1965
transform = transforms.Compose(
1967
transforms.RandomHorizontalFlip(p=1),
1968
transforms.RandomVerticalFlip(p=1),
1972
input = make_image()
1974
actual = check_transform(transform, input)
1978
expected = F.vertical_flip(F.horizontal_flip(input))
1980
assert_equal(actual, expected)
1982
def test_errors(self):
1983
for cls in [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder]:
1984
with pytest.raises(TypeError, match="Argument transforms should be a sequence of callables"):
1987
with pytest.raises(ValueError, match="at least one transform"):
1988
transforms.Compose([])
1991
with pytest.raises(ValueError, match=re.escape("value in the interval [0.0, 1.0]")):
1992
transforms.RandomApply([lambda x: x], p=p)
1994
for transforms_, p in [([lambda x: x], []), ([], [1.0])]:
1995
with pytest.raises(ValueError, match="Length of p doesn't match the number of transforms"):
1996
transforms.RandomChoice(transforms_, p=p)
2000
@pytest.mark.parametrize(
2001
("kernel", "make_input"),
2003
(F.to_dtype_image, make_image_tensor),
2004
(F.to_dtype_image, make_image),
2005
(F.to_dtype_video, make_video),
2008
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
2009
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
2010
@pytest.mark.parametrize("device", cpu_and_cuda())
2011
@pytest.mark.parametrize("scale", (True, False))
2012
def test_kernel(self, kernel, make_input, input_dtype, output_dtype, device, scale):
2015
make_input(dtype=input_dtype, device=device),
2020
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
2021
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
2022
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
2023
@pytest.mark.parametrize("device", cpu_and_cuda())
2024
@pytest.mark.parametrize("scale", (True, False))
2025
def test_functional(self, make_input, input_dtype, output_dtype, device, scale):
2028
make_input(dtype=input_dtype, device=device),
2033
@pytest.mark.parametrize(
2035
[make_image_tensor, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2037
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
2038
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
2039
@pytest.mark.parametrize("device", cpu_and_cuda())
2040
@pytest.mark.parametrize("scale", (True, False))
2041
@pytest.mark.parametrize("as_dict", (True, False))
2042
def test_transform(self, make_input, input_dtype, output_dtype, device, scale, as_dict):
2043
input = make_input(dtype=input_dtype, device=device)
2045
output_dtype = {type(input): output_dtype}
2046
check_transform(transforms.ToDtype(dtype=output_dtype, scale=scale), input, check_sample_input=not as_dict)
2048
def reference_convert_dtype_image_tensor(self, image, dtype=torch.float, scale=False):
2049
input_dtype = image.dtype
2050
output_dtype = dtype
2053
return image.to(dtype)
2055
if output_dtype == input_dtype:
2059
if input_dtype.is_floating_point:
2060
if output_dtype.is_floating_point:
2063
return round(decimal.Decimal(value) * torch.iinfo(output_dtype).max)
2065
input_max_value = torch.iinfo(input_dtype).max
2067
if output_dtype.is_floating_point:
2068
return float(decimal.Decimal(value) / input_max_value)
2070
output_max_value = torch.iinfo(output_dtype).max
2072
if input_max_value > output_max_value:
2073
factor = (input_max_value + 1) // (output_max_value + 1)
2074
return value / factor
2076
factor = (output_max_value + 1) // (input_max_value + 1)
2077
return value * factor
2079
return torch.tensor(tree_map(fn, image.tolist())).to(dtype=output_dtype, device=image.device)
2081
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
2082
@pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8, torch.uint16])
2083
@pytest.mark.parametrize("device", cpu_and_cuda())
2084
@pytest.mark.parametrize("scale", (True, False))
2085
def test_image_correctness(self, input_dtype, output_dtype, device, scale):
2086
if input_dtype.is_floating_point and output_dtype == torch.int64:
2087
pytest.xfail("float to int64 conversion is not supported")
2088
if input_dtype == torch.uint8 and output_dtype == torch.uint16 and device == "cuda":
2089
pytest.xfail("uint8 to uint16 conversion is not supported on cuda")
2091
input = make_image(dtype=input_dtype, device=device)
2093
out = F.to_dtype(input, dtype=output_dtype, scale=scale)
2094
expected = self.reference_convert_dtype_image_tensor(input, dtype=output_dtype, scale=scale)
2096
if input_dtype.is_floating_point and not output_dtype.is_floating_point and scale:
2097
torch.testing.assert_close(out, expected, atol=1, rtol=0)
2099
torch.testing.assert_close(out, expected)
2101
def was_scaled(self, inpt):
2103
return inpt.max() <= 1
2105
def make_inpt_with_bbox_and_mask(self, make_input):
2107
inpt_dtype = torch.uint8
2108
bbox_dtype = torch.float32
2109
mask_dtype = torch.bool
2111
"inpt": make_input(size=(H, W), dtype=inpt_dtype),
2112
"bbox": make_bounding_boxes(canvas_size=(H, W), dtype=bbox_dtype),
2113
"mask": make_detection_masks(size=(H, W), dtype=mask_dtype),
2116
return sample, inpt_dtype, bbox_dtype, mask_dtype
2118
@pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video))
2119
@pytest.mark.parametrize("scale", (True, False))
2120
def test_dtype_not_a_dict(self, make_input, scale):
2123
sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input)
2124
out = transforms.ToDtype(dtype=torch.float32, scale=scale)(sample)
2126
assert out["inpt"].dtype != inpt_dtype
2127
assert out["inpt"].dtype == torch.float32
2129
assert self.was_scaled(out["inpt"])
2131
assert not self.was_scaled(out["inpt"])
2132
assert out["bbox"].dtype == bbox_dtype
2133
assert out["mask"].dtype == mask_dtype
2135
@pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video))
2136
def test_others_catch_all_and_none(self, make_input):
2139
sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input)
2140
out = transforms.ToDtype(dtype={tv_tensors.Mask: torch.int64, "others": None})(sample)
2141
assert out["inpt"].dtype == inpt_dtype
2142
assert out["bbox"].dtype == bbox_dtype
2143
assert out["mask"].dtype != mask_dtype
2144
assert out["mask"].dtype == torch.int64
2146
@pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video))
2147
def test_typical_use_case(self, make_input):
2151
sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input)
2152
out = transforms.ToDtype(
2153
dtype={type(sample["inpt"]): torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True
2155
assert out["inpt"].dtype != inpt_dtype
2156
assert out["inpt"].dtype == torch.float32
2157
assert self.was_scaled(out["inpt"])
2158
assert out["bbox"].dtype == bbox_dtype
2159
assert out["mask"].dtype != mask_dtype
2160
assert out["mask"].dtype == torch.int64
2162
@pytest.mark.parametrize("make_input", (make_image_tensor, make_image, make_video))
2163
def test_errors_warnings(self, make_input):
2164
sample, inpt_dtype, bbox_dtype, mask_dtype = self.make_inpt_with_bbox_and_mask(make_input)
2166
with pytest.raises(ValueError, match="No dtype was specified for"):
2167
out = transforms.ToDtype(dtype={tv_tensors.Mask: torch.float32})(sample)
2168
with pytest.warns(UserWarning, match=re.escape("plain `torch.Tensor` will *not* be transformed")):
2169
transforms.ToDtype(dtype={torch.Tensor: torch.float32, tv_tensors.Image: torch.float32})
2170
with pytest.warns(UserWarning, match="no scaling will be done"):
2171
out = transforms.ToDtype(dtype={"others": None}, scale=True)(sample)
2172
assert out["inpt"].dtype == inpt_dtype
2173
assert out["bbox"].dtype == bbox_dtype
2174
assert out["mask"].dtype == mask_dtype
2176
def test_uint16(self):
2180
img_uint16 = torch.randint(0, 65535, (256, 512), dtype=torch.uint16)
2182
img_uint8 = F.to_dtype(img_uint16, torch.uint8, scale=True)
2183
img_float32 = F.to_dtype(img_uint16, torch.float32, scale=True)
2184
img_int32 = F.to_dtype(img_uint16, torch.int32, scale=True)
2186
assert_equal(img_uint8, (img_uint16 / 256).to(torch.uint8))
2187
assert_close(img_float32, (img_uint16 / 65535))
2189
assert_close(F.to_dtype(img_float32, torch.uint16, scale=True), img_uint16, rtol=0, atol=1)
2192
assert_equal(F.to_dtype(img_uint8, torch.uint16, scale=True), ((img_uint16 / 256).to(torch.uint16) * 256))
2193
assert_equal(F.to_dtype(img_int32, torch.uint16, scale=True), img_uint16)
2195
assert_equal(F.to_dtype(img_float32, torch.uint8, scale=True), img_uint8)
2196
assert_close(F.to_dtype(img_uint8, torch.float32, scale=True), img_float32, rtol=0, atol=1e-2)
2199
class TestAdjustBrightness:
2200
_CORRECTNESS_BRIGHTNESS_FACTORS = [0.5, 0.0, 1.0, 5.0]
2201
_DEFAULT_BRIGHTNESS_FACTOR = _CORRECTNESS_BRIGHTNESS_FACTORS[0]
2203
@pytest.mark.parametrize(
2204
("kernel", "make_input"),
2206
(F.adjust_brightness_image, make_image),
2207
(F.adjust_brightness_video, make_video),
2210
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
2211
@pytest.mark.parametrize("device", cpu_and_cuda())
2212
def test_kernel(self, kernel, make_input, dtype, device):
2213
check_kernel(kernel, make_input(dtype=dtype, device=device), brightness_factor=self._DEFAULT_BRIGHTNESS_FACTOR)
2215
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
2216
def test_functional(self, make_input):
2217
check_functional(F.adjust_brightness, make_input(), brightness_factor=self._DEFAULT_BRIGHTNESS_FACTOR)
2219
@pytest.mark.parametrize(
2220
("kernel", "input_type"),
2222
(F.adjust_brightness_image, torch.Tensor),
2223
(F._color._adjust_brightness_image_pil, PIL.Image.Image),
2224
(F.adjust_brightness_image, tv_tensors.Image),
2225
(F.adjust_brightness_video, tv_tensors.Video),
2228
def test_functional_signature(self, kernel, input_type):
2229
check_functional_kernel_signature_match(F.adjust_brightness, kernel=kernel, input_type=input_type)
2231
@pytest.mark.parametrize("brightness_factor", _CORRECTNESS_BRIGHTNESS_FACTORS)
2232
def test_image_correctness(self, brightness_factor):
2233
image = make_image(dtype=torch.uint8, device="cpu")
2235
actual = F.adjust_brightness(image, brightness_factor=brightness_factor)
2236
expected = F.to_image(F.adjust_brightness(F.to_pil_image(image), brightness_factor=brightness_factor))
2238
torch.testing.assert_close(actual, expected)
2241
class TestCutMixMixUp:
2243
def __init__(self, size, num_classes, one_hot_labels):
2245
self.num_classes = num_classes
2246
self.one_hot_labels = one_hot_labels
2247
assert size < num_classes
2249
def __getitem__(self, idx):
2250
img = torch.rand(3, 100, 100)
2252
if self.one_hot_labels:
2253
label = torch.nn.functional.one_hot(torch.tensor(label), num_classes=self.num_classes)
2259
@pytest.mark.parametrize("T", [transforms.CutMix, transforms.MixUp])
2260
@pytest.mark.parametrize("one_hot_labels", (True, False))
2261
def test_supported_input_structure(self, T, one_hot_labels):
2266
dataset = self.DummyDataset(size=batch_size, num_classes=num_classes, one_hot_labels=one_hot_labels)
2268
cutmix_mixup = T(num_classes=num_classes)
2270
dl = DataLoader(dataset, batch_size=batch_size)
2273
img, target = next(iter(dl))
2274
input_img_size = img.shape[-3:]
2275
assert isinstance(img, torch.Tensor) and isinstance(target, torch.Tensor)
2276
assert target.shape == (batch_size, num_classes) if one_hot_labels else (batch_size,)
2278
def check_output(img, target):
2279
assert img.shape == (batch_size, *input_img_size)
2280
assert target.shape == (batch_size, num_classes)
2281
torch.testing.assert_close(target.sum(axis=-1), torch.ones(batch_size))
2282
num_non_zero_labels = (target != 0).sum(axis=-1)
2283
assert (num_non_zero_labels == 2).all()
2286
img, target = next(iter(dl))
2287
assert target.shape == (batch_size, num_classes) if one_hot_labels else (batch_size,)
2288
img, target = cutmix_mixup(img, target)
2289
check_output(img, target)
2292
packed_from_dl = next(iter(dl))
2293
assert isinstance(packed_from_dl, list)
2294
img, target = cutmix_mixup(packed_from_dl)
2295
check_output(img, target)
2298
def collate_fn_1(batch):
2299
return cutmix_mixup(default_collate(batch))
2301
def collate_fn_2(batch):
2302
return cutmix_mixup(*default_collate(batch))
2304
for collate_fn in (collate_fn_1, collate_fn_2):
2305
dl = DataLoader(dataset, batch_size=batch_size, collate_fn=collate_fn)
2306
img, target = next(iter(dl))
2307
check_output(img, target)
2310
@pytest.mark.parametrize("T", [transforms.CutMix, transforms.MixUp])
2311
def test_cpu_vs_gpu(self, T):
2316
imgs = torch.rand(batch_size, 3, H, W)
2317
labels = torch.randint(0, num_classes, (batch_size,))
2318
cutmix_mixup = T(alpha=0.5, num_classes=num_classes)
2320
_check_kernel_cuda_vs_cpu(cutmix_mixup, imgs, labels, rtol=None, atol=None)
2322
@pytest.mark.parametrize("T", [transforms.CutMix, transforms.MixUp])
2323
def test_error(self, T):
2328
imgs = torch.rand(batch_size, 3, 12, 12)
2329
cutmix_mixup = T(alpha=0.5, num_classes=num_classes)
2331
for input_with_bad_type in (
2332
F.to_pil_image(imgs[0]),
2333
tv_tensors.Mask(torch.rand(12, 12)),
2334
tv_tensors.BoundingBoxes(torch.rand(2, 4), format="XYXY", canvas_size=12),
2336
with pytest.raises(ValueError, match="does not support PIL images, "):
2337
cutmix_mixup(input_with_bad_type)
2339
with pytest.raises(ValueError, match="Could not infer where the labels are"):
2340
cutmix_mixup({"img": imgs, "Nothing_else": 3})
2342
with pytest.raises(ValueError, match="labels should be index based"):
2347
with pytest.raises(ValueError, match="When using the default labels_getter"):
2348
cutmix_mixup(imgs, "not_a_tensor")
2350
with pytest.raises(ValueError, match="Expected a batched input with 4 dims"):
2351
cutmix_mixup(imgs[None, None], torch.randint(0, num_classes, size=(batch_size,)))
2353
with pytest.raises(ValueError, match="does not match the batch size of the labels"):
2354
cutmix_mixup(imgs, torch.randint(0, num_classes, size=(batch_size + 1,)))
2356
with pytest.raises(ValueError, match="When passing 2D labels"):
2357
wrong_num_classes = num_classes + 1
2358
T(alpha=0.5, num_classes=num_classes)(imgs, torch.randint(0, 2, size=(batch_size, wrong_num_classes)))
2360
with pytest.raises(ValueError, match="but got a tensor of shape"):
2361
cutmix_mixup(imgs, torch.randint(0, 2, size=(2, 3, 4)))
2363
with pytest.raises(ValueError, match="num_classes must be passed"):
2364
T(alpha=0.5)(imgs, torch.randint(0, num_classes, size=(batch_size,)))
2367
@pytest.mark.parametrize("key", ("labels", "LABELS", "LaBeL", "SOME_WEIRD_KEY_THAT_HAS_LABeL_IN_IT"))
2368
@pytest.mark.parametrize("sample_type", (tuple, list, dict))
2369
def test_labels_getter_default_heuristic(key, sample_type):
2370
labels = torch.arange(10)
2371
sample = {key: labels, "another_key": "whatever"}
2372
if sample_type is not dict:
2373
sample = sample_type((None, sample, "whatever_again"))
2374
assert transforms._utils._find_labels_default_heuristic(sample) is labels
2376
if key.lower() != "labels":
2379
d = {key: "something_else", "labels": labels}
2380
assert transforms._utils._find_labels_default_heuristic(d) is labels
2383
class TestShapeGetters:
2384
@pytest.mark.parametrize(
2385
("kernel", "make_input"),
2387
(F.get_dimensions_image, make_image_tensor),
2388
(F._meta._get_dimensions_image_pil, make_image_pil),
2389
(F.get_dimensions_image, make_image),
2390
(F.get_dimensions_video, make_video),
2393
def test_get_dimensions(self, kernel, make_input):
2395
color_space, num_channels = "RGB", 3
2397
input = make_input(size, color_space=color_space)
2399
assert kernel(input) == F.get_dimensions(input) == [num_channels, *size]
2401
@pytest.mark.parametrize(
2402
("kernel", "make_input"),
2404
(F.get_num_channels_image, make_image_tensor),
2405
(F._meta._get_num_channels_image_pil, make_image_pil),
2406
(F.get_num_channels_image, make_image),
2407
(F.get_num_channels_video, make_video),
2410
def test_get_num_channels(self, kernel, make_input):
2411
color_space, num_channels = "RGB", 3
2413
input = make_input(color_space=color_space)
2415
assert kernel(input) == F.get_num_channels(input) == num_channels
2417
@pytest.mark.parametrize(
2418
("kernel", "make_input"),
2420
(F.get_size_image, make_image_tensor),
2421
(F._meta._get_size_image_pil, make_image_pil),
2422
(F.get_size_image, make_image),
2423
(F.get_size_bounding_boxes, make_bounding_boxes),
2424
(F.get_size_mask, make_detection_masks),
2425
(F.get_size_mask, make_segmentation_mask),
2426
(F.get_size_video, make_video),
2429
def test_get_size(self, kernel, make_input):
2432
input = make_input(size)
2434
assert kernel(input) == F.get_size(input) == list(size)
2436
@pytest.mark.parametrize(
2437
("kernel", "make_input"),
2439
(F.get_num_frames_video, make_video_tensor),
2440
(F.get_num_frames_video, make_video),
2443
def test_get_num_frames(self, kernel, make_input):
2446
input = make_input(num_frames=num_frames)
2448
assert kernel(input) == F.get_num_frames(input) == num_frames
2450
@pytest.mark.parametrize(
2451
("functional", "make_input"),
2453
(F.get_dimensions, make_bounding_boxes),
2454
(F.get_dimensions, make_detection_masks),
2455
(F.get_dimensions, make_segmentation_mask),
2456
(F.get_num_channels, make_bounding_boxes),
2457
(F.get_num_channels, make_detection_masks),
2458
(F.get_num_channels, make_segmentation_mask),
2459
(F.get_num_frames, make_image_pil),
2460
(F.get_num_frames, make_image),
2461
(F.get_num_frames, make_bounding_boxes),
2462
(F.get_num_frames, make_detection_masks),
2463
(F.get_num_frames, make_segmentation_mask),
2466
def test_unsupported_types(self, functional, make_input):
2467
input = make_input()
2469
with pytest.raises(TypeError, match=re.escape(str(type(input)))):
2473
class TestRegisterKernel:
2474
@pytest.mark.parametrize("functional", (F.resize, "resize"))
2475
def test_register_kernel(self, functional):
2476
class CustomTVTensor(tv_tensors.TVTensor):
2479
kernel_was_called = False
2481
@F.register_kernel(functional, CustomTVTensor)
2482
def new_resize(dp, *args, **kwargs):
2483
nonlocal kernel_was_called
2484
kernel_was_called = True
2487
t = transforms.Resize(size=(224, 224), antialias=True)
2489
my_dp = CustomTVTensor(torch.rand(3, 10, 10))
2492
assert kernel_was_called
2495
t(torch.rand(3, 10, 10)).shape == (3, 224, 224)
2496
t(tv_tensors.Image(torch.rand(3, 10, 10))).shape == (3, 224, 224)
2498
def test_errors(self):
2499
with pytest.raises(ValueError, match="Could not find functional with name"):
2500
F.register_kernel("bad_name", tv_tensors.Image)
2502
with pytest.raises(ValueError, match="Kernels can only be registered on functionals"):
2503
F.register_kernel(tv_tensors.Image, F.resize)
2505
with pytest.raises(ValueError, match="Kernels can only be registered for subclasses"):
2506
F.register_kernel(F.resize, object)
2508
with pytest.raises(ValueError, match="cannot be registered for the builtin tv_tensor classes"):
2509
F.register_kernel(F.resize, tv_tensors.Image)(F.resize_image)
2511
class CustomTVTensor(tv_tensors.TVTensor):
2514
def resize_custom_tv_tensor():
2517
F.register_kernel(F.resize, CustomTVTensor)(resize_custom_tv_tensor)
2519
with pytest.raises(ValueError, match="already has a kernel registered for type"):
2520
F.register_kernel(F.resize, CustomTVTensor)(resize_custom_tv_tensor)
2527
torch.Tensor: F.resize_image,
2528
PIL.Image.Image: F._geometry._resize_image_pil,
2529
tv_tensors.Image: F.resize_image,
2530
tv_tensors.BoundingBoxes: F.resize_bounding_boxes,
2531
tv_tensors.Mask: F.resize_mask,
2532
tv_tensors.Video: F.resize_video,
2535
@pytest.mark.parametrize("input_type", [str, int, object])
2536
def test_unsupported_types(self, input_type):
2537
with pytest.raises(TypeError, match="supports inputs of type"):
2538
_get_kernel(F.resize, input_type)
2540
def test_exact_match(self):
2544
def resize_with_pure_kernels():
2547
for input_type, kernel in self.KERNELS.items():
2548
_register_kernel_internal(resize_with_pure_kernels, input_type, tv_tensor_wrapper=False)(kernel)
2550
assert _get_kernel(resize_with_pure_kernels, input_type) is kernel
2552
def test_builtin_tv_tensor_subclass(self):
2557
def resize_with_pure_kernels():
2560
class MyImage(tv_tensors.Image):
2563
class MyBoundingBoxes(tv_tensors.BoundingBoxes):
2566
class MyMask(tv_tensors.Mask):
2569
class MyVideo(tv_tensors.Video):
2572
for custom_tv_tensor_subclass in [
2578
builtin_tv_tensor_class = custom_tv_tensor_subclass.__mro__[1]
2579
builtin_tv_tensor_kernel = self.KERNELS[builtin_tv_tensor_class]
2580
_register_kernel_internal(resize_with_pure_kernels, builtin_tv_tensor_class, tv_tensor_wrapper=False)(
2581
builtin_tv_tensor_kernel
2584
assert _get_kernel(resize_with_pure_kernels, custom_tv_tensor_subclass) is builtin_tv_tensor_kernel
2586
def test_tv_tensor_subclass(self):
2587
class MyTVTensor(tv_tensors.TVTensor):
2590
with pytest.raises(TypeError, match="supports inputs of type"):
2591
_get_kernel(F.resize, MyTVTensor)
2593
def resize_my_tv_tensor():
2596
_register_kernel_internal(F.resize, MyTVTensor, tv_tensor_wrapper=False)(resize_my_tv_tensor)
2598
assert _get_kernel(F.resize, MyTVTensor) is resize_my_tv_tensor
2600
def test_pil_image_subclass(self):
2601
opened_image = PIL.Image.open(Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
2602
loaded_image = opened_image.convert("RGB")
2605
assert isinstance(opened_image, PIL.Image.Image)
2606
assert type(opened_image) is not PIL.Image.Image
2608
assert type(loaded_image) is PIL.Image.Image
2611
for image in [opened_image, loaded_image]:
2612
kernel = _get_kernel(F.resize, type(image))
2614
output = kernel(image, size=size)
2616
assert F.get_size(output) == size
2619
class TestPermuteChannels:
2620
_DEFAULT_PERMUTATION = [2, 0, 1]
2622
@pytest.mark.parametrize(
2623
("kernel", "make_input"),
2625
(F.permute_channels_image, make_image_tensor),
2628
(F.permute_channels_image, make_image),
2629
(F.permute_channels_video, make_video),
2632
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
2633
@pytest.mark.parametrize("device", cpu_and_cuda())
2634
def test_kernel(self, kernel, make_input, dtype, device):
2635
check_kernel(kernel, make_input(dtype=dtype, device=device), permutation=self._DEFAULT_PERMUTATION)
2637
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
2638
def test_functional(self, make_input):
2639
check_functional(F.permute_channels, make_input(), permutation=self._DEFAULT_PERMUTATION)
2641
@pytest.mark.parametrize(
2642
("kernel", "input_type"),
2644
(F.permute_channels_image, torch.Tensor),
2645
(F._color._permute_channels_image_pil, PIL.Image.Image),
2646
(F.permute_channels_image, tv_tensors.Image),
2647
(F.permute_channels_video, tv_tensors.Video),
2650
def test_functional_signature(self, kernel, input_type):
2651
check_functional_kernel_signature_match(F.permute_channels, kernel=kernel, input_type=input_type)
2653
def reference_image_correctness(self, image, permutation):
2654
channel_images = image.split(1, dim=-3)
2655
permuted_channel_images = [channel_images[channel_idx] for channel_idx in permutation]
2656
return tv_tensors.Image(torch.concat(permuted_channel_images, dim=-3))
2658
@pytest.mark.parametrize("permutation", [[2, 0, 1], [1, 2, 0], [2, 0, 1], [0, 1, 2]])
2659
@pytest.mark.parametrize("batch_dims", [(), (2,), (2, 1)])
2660
def test_image_correctness(self, permutation, batch_dims):
2661
image = make_image(batch_dims=batch_dims)
2663
actual = F.permute_channels(image, permutation=permutation)
2664
expected = self.reference_image_correctness(image, permutation=permutation)
2666
torch.testing.assert_close(actual, expected)
2670
def _make_displacement(self, inpt):
2675
dtype=torch.float32,
2676
device=inpt.device if isinstance(inpt, torch.Tensor) else "cpu",
2679
@param_value_parametrization(
2680
interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR],
2681
fill=EXHAUSTIVE_TYPE_FILLS,
2683
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8, torch.float16])
2684
@pytest.mark.parametrize("device", cpu_and_cuda())
2685
def test_kernel_image(self, param, value, dtype, device):
2686
image = make_image_tensor(dtype=dtype, device=device)
2691
displacement=self._make_displacement(image),
2693
check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))),
2694
check_cuda_vs_cpu=dtype is not torch.float16,
2697
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
2698
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
2699
@pytest.mark.parametrize("device", cpu_and_cuda())
2700
def test_kernel_bounding_boxes(self, format, dtype, device):
2701
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
2704
F.elastic_bounding_boxes,
2706
format=bounding_boxes.format,
2707
canvas_size=bounding_boxes.canvas_size,
2708
displacement=self._make_displacement(bounding_boxes),
2711
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
2712
def test_kernel_mask(self, make_mask):
2714
check_kernel(F.elastic_mask, mask, displacement=self._make_displacement(mask))
2716
def test_kernel_video(self):
2717
video = make_video()
2718
check_kernel(F.elastic_video, video, displacement=self._make_displacement(video))
2720
@pytest.mark.parametrize(
2722
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2724
def test_functional(self, make_input):
2725
input = make_input()
2726
check_functional(F.elastic, input, displacement=self._make_displacement(input))
2728
@pytest.mark.parametrize(
2729
("kernel", "input_type"),
2731
(F.elastic_image, torch.Tensor),
2732
(F._geometry._elastic_image_pil, PIL.Image.Image),
2733
(F.elastic_image, tv_tensors.Image),
2734
(F.elastic_bounding_boxes, tv_tensors.BoundingBoxes),
2735
(F.elastic_mask, tv_tensors.Mask),
2736
(F.elastic_video, tv_tensors.Video),
2739
def test_functional_signature(self, kernel, input_type):
2740
check_functional_kernel_signature_match(F.elastic, kernel=kernel, input_type=input_type)
2742
@pytest.mark.parametrize(
2744
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2746
def test_displacement_error(self, make_input):
2747
input = make_input()
2749
with pytest.raises(TypeError, match="displacement should be a Tensor"):
2750
F.elastic(input, displacement=None)
2752
with pytest.raises(ValueError, match="displacement shape should be"):
2753
F.elastic(input, displacement=torch.rand(F.get_size(input)))
2755
@pytest.mark.parametrize(
2757
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2760
@pytest.mark.parametrize("size", [(163, 163), (72, 333), (313, 95)])
2761
@pytest.mark.parametrize("device", cpu_and_cuda())
2762
def test_transform(self, make_input, size, device):
2766
check_v1_compatibility = False if sys.platform == "darwin" else dict(rtol=0, atol=1)
2769
transforms.ElasticTransform(),
2770
make_input(size, device=device),
2771
check_v1_compatibility=check_v1_compatibility,
2775
class TestToPureTensor:
2776
def test_correctness(self):
2778
"img": make_image(),
2779
"img_tensor": make_image_tensor(),
2780
"img_pil": make_image_pil(),
2781
"mask": make_detection_masks(),
2782
"video": make_video(),
2783
"bbox": make_bounding_boxes(),
2787
out = transforms.ToPureTensor()(input)
2789
for input_value, out_value in zip(input.values(), out.values()):
2790
if isinstance(input_value, tv_tensors.TVTensor):
2791
assert isinstance(out_value, torch.Tensor) and not isinstance(out_value, tv_tensors.TVTensor)
2793
assert isinstance(out_value, type(input_value))
2797
INPUT_SIZE = (21, 11)
2799
CORRECTNESS_CROP_KWARGS = [
2801
dict(top=5, left=5, height=10, width=5),
2803
dict(top=-5, left=-5, height=30, width=20),
2805
dict(top=-5, left=-5, height=30, width=10),
2806
dict(top=-5, left=5, height=30, width=10),
2807
dict(top=-5, left=-5, height=20, width=20),
2808
dict(top=5, left=-5, height=20, width=20),
2810
dict(top=-5, left=-5, height=20, width=10),
2811
dict(top=-5, left=5, height=20, width=10),
2812
dict(top=5, left=-5, height=20, width=10),
2813
dict(top=5, left=5, height=20, width=10),
2815
MINIMAL_CROP_KWARGS = CORRECTNESS_CROP_KWARGS[0]
2817
@pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
2818
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
2819
@pytest.mark.parametrize("device", cpu_and_cuda())
2820
def test_kernel_image(self, kwargs, dtype, device):
2821
check_kernel(F.crop_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **kwargs)
2823
@pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
2824
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
2825
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
2826
@pytest.mark.parametrize("device", cpu_and_cuda())
2827
def test_kernel_bounding_box(self, kwargs, format, dtype, device):
2828
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
2829
check_kernel(F.crop_bounding_boxes, bounding_boxes, format=format, **kwargs)
2831
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
2832
def test_kernel_mask(self, make_mask):
2833
check_kernel(F.crop_mask, make_mask(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS)
2835
def test_kernel_video(self):
2836
check_kernel(F.crop_video, make_video(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS)
2838
@pytest.mark.parametrize(
2840
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2842
def test_functional(self, make_input):
2843
check_functional(F.crop, make_input(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS)
2845
@pytest.mark.parametrize(
2846
("kernel", "input_type"),
2848
(F.crop_image, torch.Tensor),
2849
(F._geometry._crop_image_pil, PIL.Image.Image),
2850
(F.crop_image, tv_tensors.Image),
2851
(F.crop_bounding_boxes, tv_tensors.BoundingBoxes),
2852
(F.crop_mask, tv_tensors.Mask),
2853
(F.crop_video, tv_tensors.Video),
2856
def test_functional_signature(self, kernel, input_type):
2857
check_functional_kernel_signature_match(F.crop, kernel=kernel, input_type=input_type)
2859
@pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
2860
def test_functional_image_correctness(self, kwargs):
2861
image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu")
2863
actual = F.crop(image, **kwargs)
2864
expected = F.to_image(F.crop(F.to_pil_image(image), **kwargs))
2866
assert_equal(actual, expected)
2868
@param_value_parametrization(
2869
size=[(10, 5), (25, 15), (25, 5), (10, 15)],
2870
fill=EXHAUSTIVE_TYPE_FILLS,
2872
@pytest.mark.parametrize(
2874
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
2876
def test_transform(self, param, value, make_input):
2877
input = make_input(self.INPUT_SIZE)
2879
check_sample_input = True
2881
if isinstance(value, (tuple, list)):
2882
if isinstance(input, tv_tensors.Mask):
2883
pytest.skip("F.pad_mask doesn't support non-scalar fill.")
2885
check_sample_input = False
2890
size=[s + 4 for s in self.INPUT_SIZE],
2891
fill=adapt_fill(value, dtype=input.dtype if isinstance(input, torch.Tensor) else torch.uint8),
2894
kwargs = {param: value}
2897
transforms.RandomCrop(**kwargs, pad_if_needed=True),
2899
check_v1_compatibility=param != "fill" or isinstance(value, (int, float)),
2900
check_sample_input=check_sample_input,
2903
@pytest.mark.parametrize("padding", [1, (1, 1), (1, 1, 1, 1)])
2904
def test_transform_padding(self, padding):
2905
inpt = make_image(self.INPUT_SIZE)
2907
output_size = [s + 2 for s in F.get_size(inpt)]
2908
transform = transforms.RandomCrop(output_size, padding=padding)
2910
output = transform(inpt)
2912
assert F.get_size(output) == output_size
2914
@pytest.mark.parametrize("padding", [None, 1, (1, 1), (1, 1, 1, 1)])
2915
def test_transform_insufficient_padding(self, padding):
2916
inpt = make_image(self.INPUT_SIZE)
2918
output_size = [s + 3 for s in F.get_size(inpt)]
2919
transform = transforms.RandomCrop(output_size, padding=padding)
2921
with pytest.raises(ValueError, match="larger than (padded )?input image size"):
2924
def test_transform_pad_if_needed(self):
2925
inpt = make_image(self.INPUT_SIZE)
2927
output_size = [s * 2 for s in F.get_size(inpt)]
2928
transform = transforms.RandomCrop(output_size, pad_if_needed=True)
2930
output = transform(inpt)
2932
assert F.get_size(output) == output_size
2934
@param_value_parametrization(
2935
size=[(10, 5), (25, 15), (25, 5), (10, 15)],
2936
fill=CORRECTNESS_FILLS,
2937
padding_mode=["constant", "edge", "reflect", "symmetric"],
2939
@pytest.mark.parametrize("seed", list(range(5)))
2940
def test_transform_image_correctness(self, param, value, seed):
2941
kwargs = {param: value}
2945
kwargs["size"] = [s + 4 for s in self.INPUT_SIZE]
2947
kwargs["fill"] = adapt_fill(kwargs["fill"], dtype=torch.uint8)
2949
transform = transforms.RandomCrop(pad_if_needed=True, **kwargs)
2951
image = make_image(self.INPUT_SIZE)
2953
with freeze_rng_state():
2954
torch.manual_seed(seed)
2955
actual = transform(image)
2957
torch.manual_seed(seed)
2958
expected = F.to_image(transform(F.to_pil_image(image)))
2960
assert_equal(actual, expected)
2962
def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width):
2963
affine_matrix = np.array(
2969
return reference_affine_bounding_boxes_helper(
2970
bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=(height, width)
2973
@pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
2974
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
2975
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
2976
@pytest.mark.parametrize("device", cpu_and_cuda())
2977
def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device):
2978
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
2980
actual = F.crop(bounding_boxes, **kwargs)
2981
expected = self._reference_crop_bounding_boxes(bounding_boxes, **kwargs)
2983
assert_equal(actual, expected, atol=1, rtol=0)
2984
assert_equal(F.get_size(actual), F.get_size(expected))
2986
@pytest.mark.parametrize("output_size", [(17, 11), (11, 17), (11, 11)])
2987
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
2988
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
2989
@pytest.mark.parametrize("device", cpu_and_cuda())
2990
@pytest.mark.parametrize("seed", list(range(5)))
2991
def test_transform_bounding_boxes_correctness(self, output_size, format, dtype, device, seed):
2992
input_size = [s * 2 for s in output_size]
2993
bounding_boxes = make_bounding_boxes(input_size, format=format, dtype=dtype, device=device)
2995
transform = transforms.RandomCrop(output_size)
2997
with freeze_rng_state():
2998
torch.manual_seed(seed)
2999
params = transform._get_params([bounding_boxes])
3000
assert not params.pop("needs_pad")
3001
del params["padding"]
3002
assert params.pop("needs_crop")
3004
torch.manual_seed(seed)
3005
actual = transform(bounding_boxes)
3007
expected = self._reference_crop_bounding_boxes(bounding_boxes, **params)
3009
assert_equal(actual, expected)
3010
assert_equal(F.get_size(actual), F.get_size(expected))
3012
def test_errors(self):
3013
with pytest.raises(ValueError, match="Please provide only two dimensions"):
3014
transforms.RandomCrop([10, 12, 14])
3016
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
3017
transforms.RandomCrop([10, 12], padding="abc")
3019
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
3020
transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7])
3022
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
3023
transforms.RandomCrop([10, 12], padding=1, fill="abc")
3025
with pytest.raises(ValueError, match="Padding mode should be either"):
3026
transforms.RandomCrop([10, 12], padding=1, padding_mode="abc")
3030
INPUT_SIZE = (17, 11)
3031
FUNCTIONAL_KWARGS = dict(
3032
zip("ijhwv", [2, 2, 10, 8, torch.tensor(0.0, dtype=torch.float32, device="cpu").reshape(-1, 1, 1)])
3035
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
3036
@pytest.mark.parametrize("device", cpu_and_cuda())
3037
def test_kernel_image(self, dtype, device):
3038
check_kernel(F.erase_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **self.FUNCTIONAL_KWARGS)
3040
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
3041
@pytest.mark.parametrize("device", cpu_and_cuda())
3042
def test_kernel_image_inplace(self, dtype, device):
3043
input = make_image(self.INPUT_SIZE, dtype=dtype, device=device)
3044
input_version = input._version
3046
output_out_of_place = F.erase_image(input, **self.FUNCTIONAL_KWARGS)
3047
assert output_out_of_place.data_ptr() != input.data_ptr()
3048
assert output_out_of_place is not input
3050
output_inplace = F.erase_image(input, **self.FUNCTIONAL_KWARGS, inplace=True)
3051
assert output_inplace.data_ptr() == input.data_ptr()
3052
assert output_inplace._version > input_version
3053
assert output_inplace is input
3055
assert_equal(output_inplace, output_out_of_place)
3057
def test_kernel_video(self):
3058
check_kernel(F.erase_video, make_video(self.INPUT_SIZE), **self.FUNCTIONAL_KWARGS)
3060
@pytest.mark.parametrize(
3062
[make_image_tensor, make_image_pil, make_image, make_video],
3064
def test_functional(self, make_input):
3065
check_functional(F.erase, make_input(), **self.FUNCTIONAL_KWARGS)
3067
@pytest.mark.parametrize(
3068
("kernel", "input_type"),
3070
(F.erase_image, torch.Tensor),
3071
(F._augment._erase_image_pil, PIL.Image.Image),
3072
(F.erase_image, tv_tensors.Image),
3073
(F.erase_video, tv_tensors.Video),
3076
def test_functional_signature(self, kernel, input_type):
3077
check_functional_kernel_signature_match(F.erase, kernel=kernel, input_type=input_type)
3079
@pytest.mark.parametrize(
3081
[make_image_tensor, make_image_pil, make_image, make_video],
3083
@pytest.mark.parametrize("device", cpu_and_cuda())
3084
def test_transform(self, make_input, device):
3085
input = make_input(device=device)
3087
with pytest.warns(UserWarning, match="currently passing through inputs of type"):
3089
transforms.RandomErasing(p=1),
3091
check_v1_compatibility=not isinstance(input, PIL.Image.Image),
3094
def _reference_erase_image(self, image, *, i, j, h, w, v):
3095
mask = torch.zeros_like(image, dtype=torch.bool)
3096
mask[..., i : i + h, j : j + w] = True
3099
value = torch.broadcast_to(v, (*image.shape[:-2], h, w)).to(image)
3101
erased_image = torch.empty_like(image)
3102
erased_image[mask] = value.flatten()
3103
erased_image[~mask] = image[~mask]
3107
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
3108
@pytest.mark.parametrize("device", cpu_and_cuda())
3109
def test_functional_image_correctness(self, dtype, device):
3110
image = make_image(dtype=dtype, device=device)
3112
actual = F.erase(image, **self.FUNCTIONAL_KWARGS)
3113
expected = self._reference_erase_image(image, **self.FUNCTIONAL_KWARGS)
3115
assert_equal(actual, expected)
3117
@param_value_parametrization(
3118
scale=[(0.1, 0.2), [0.0, 1.0]],
3119
ratio=[(0.3, 0.7), [0.1, 5.0]],
3120
value=[0, 0.5, (0, 1, 0), [-0.2, 0.0, 1.3], "random"],
3122
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
3123
@pytest.mark.parametrize("device", cpu_and_cuda())
3124
@pytest.mark.parametrize("seed", list(range(5)))
3125
def test_transform_image_correctness(self, param, value, dtype, device, seed):
3126
transform = transforms.RandomErasing(**{param: value}, p=1)
3128
image = make_image(dtype=dtype, device=device)
3130
with freeze_rng_state():
3131
torch.manual_seed(seed)
3134
params = transform._get_params([image])
3136
torch.manual_seed(seed)
3137
actual = transform(image)
3139
expected = self._reference_erase_image(image, **params)
3141
assert_equal(actual, expected)
3143
def test_transform_errors(self):
3144
with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"):
3145
transforms.RandomErasing(value={})
3147
with pytest.raises(ValueError, match="If value is str, it should be 'random'"):
3148
transforms.RandomErasing(value="abc")
3150
with pytest.raises(TypeError, match="Scale should be a sequence"):
3151
transforms.RandomErasing(scale=123)
3153
with pytest.raises(TypeError, match="Ratio should be a sequence"):
3154
transforms.RandomErasing(ratio=123)
3156
with pytest.raises(ValueError, match="Scale should be between 0 and 1"):
3157
transforms.RandomErasing(scale=[-1, 2])
3159
transform = transforms.RandomErasing(value=[1, 2, 3, 4])
3161
with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"):
3162
transform._get_params([make_image()])
3165
class TestGaussianBlur:
3166
@pytest.mark.parametrize("kernel_size", [1, 3, (3, 1), [3, 5]])
3167
@pytest.mark.parametrize("sigma", [None, 1.0, 1, (0.5,), [0.3], (0.3, 0.7), [0.9, 0.2]])
3168
def test_kernel_image(self, kernel_size, sigma):
3170
F.gaussian_blur_image,
3172
kernel_size=kernel_size,
3174
check_scripted_vs_eager=not (isinstance(kernel_size, int) or isinstance(sigma, (float, int))),
3177
def test_kernel_image_errors(self):
3178
image = make_image_tensor()
3180
with pytest.raises(ValueError, match="kernel_size is a sequence its length should be 2"):
3181
F.gaussian_blur_image(image, kernel_size=[1, 2, 3])
3183
for kernel_size in [2, -1]:
3184
with pytest.raises(ValueError, match="kernel_size should have odd and positive integers"):
3185
F.gaussian_blur_image(image, kernel_size=kernel_size)
3187
with pytest.raises(ValueError, match="sigma is a sequence, its length should be 2"):
3188
F.gaussian_blur_image(image, kernel_size=1, sigma=[1, 2, 3])
3190
with pytest.raises(TypeError, match="sigma should be either float or sequence of floats"):
3191
F.gaussian_blur_image(image, kernel_size=1, sigma=object())
3193
with pytest.raises(ValueError, match="sigma should have positive values"):
3194
F.gaussian_blur_image(image, kernel_size=1, sigma=-1)
3196
def test_kernel_video(self):
3197
check_kernel(F.gaussian_blur_video, make_video(), kernel_size=(3, 3))
3199
@pytest.mark.parametrize(
3201
[make_image_tensor, make_image_pil, make_image, make_video],
3203
def test_functional(self, make_input):
3204
check_functional(F.gaussian_blur, make_input(), kernel_size=(3, 3))
3206
@pytest.mark.parametrize(
3207
("kernel", "input_type"),
3209
(F.gaussian_blur_image, torch.Tensor),
3210
(F._misc._gaussian_blur_image_pil, PIL.Image.Image),
3211
(F.gaussian_blur_image, tv_tensors.Image),
3212
(F.gaussian_blur_video, tv_tensors.Video),
3215
def test_functional_signature(self, kernel, input_type):
3216
check_functional_kernel_signature_match(F.gaussian_blur, kernel=kernel, input_type=input_type)
3218
@pytest.mark.parametrize(
3220
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3222
@pytest.mark.parametrize("device", cpu_and_cuda())
3223
@pytest.mark.parametrize("sigma", [5, 2.0, (0.5, 2), [1.3, 2.7]])
3224
def test_transform(self, make_input, device, sigma):
3225
check_transform(transforms.GaussianBlur(kernel_size=3, sigma=sigma), make_input(device=device))
3227
def test_assertions(self):
3228
with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"):
3229
transforms.GaussianBlur([10, 12, 14])
3231
with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"):
3232
transforms.GaussianBlur(4)
3234
with pytest.raises(ValueError, match="If sigma is a sequence its length should be 1 or 2. Got 3"):
3235
transforms.GaussianBlur(3, sigma=[1, 2, 3])
3237
with pytest.raises(ValueError, match="sigma values should be positive and of the form"):
3238
transforms.GaussianBlur(3, sigma=-1.0)
3240
with pytest.raises(ValueError, match="sigma values should be positive and of the form"):
3241
transforms.GaussianBlur(3, sigma=[2.0, 1.0])
3243
with pytest.raises(TypeError, match="sigma should be a number or a sequence of numbers"):
3244
transforms.GaussianBlur(3, sigma={})
3246
@pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0], (10, 12.0), [10]])
3247
def test__get_params(self, sigma):
3248
transform = transforms.GaussianBlur(3, sigma=sigma)
3249
params = transform._get_params([])
3251
if isinstance(sigma, float):
3252
assert params["sigma"][0] == params["sigma"][1] == sigma
3253
elif isinstance(sigma, list) and len(sigma) == 1:
3254
assert params["sigma"][0] == params["sigma"][1] == sigma[0]
3256
assert sigma[0] <= params["sigma"][0] <= sigma[1]
3257
assert sigma[0] <= params["sigma"][1] <= sigma[1]
3268
REFERENCE_GAUSSIAN_BLUR_IMAGE_RESULTS = torch.load(
3269
Path(__file__).parent / "assets" / "gaussian_blur_opencv_results.pt",
3273
@pytest.mark.parametrize(
3274
("dimensions", "kernel_size", "sigma"),
3276
((3, 10, 12), (3, 3), 0.8),
3277
((3, 10, 12), (3, 3), 0.5),
3278
((3, 10, 12), (3, 5), 0.8),
3279
((3, 10, 12), (3, 5), 0.5),
3280
((1, 26, 28), (23, 23), 1.7),
3283
@pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.float16])
3284
@pytest.mark.parametrize("device", cpu_and_cuda())
3285
def test_functional_image_correctness(self, dimensions, kernel_size, sigma, dtype, device):
3286
if dtype is torch.float16 and device == "cpu":
3287
pytest.skip("The CPU implementation of float16 on CPU differs from opencv")
3289
num_channels, height, width = dimensions
3291
reference_results_key = f"{height}_{width}_{num_channels}__{kernel_size[0]}_{kernel_size[1]}_{sigma}"
3293
torch.tensor(self.REFERENCE_GAUSSIAN_BLUR_IMAGE_RESULTS[reference_results_key])
3294
.reshape(height, width, num_channels)
3296
.to(dtype=dtype, device=device)
3299
image = tv_tensors.Image(
3300
torch.arange(num_channels * height * width, dtype=torch.uint8)
3301
.reshape(height, width, num_channels)
3307
actual = F.gaussian_blur_image(image, kernel_size=kernel_size, sigma=sigma)
3309
torch.testing.assert_close(actual, expected, rtol=0, atol=1)
3312
class TestGaussianNoise:
3313
@pytest.mark.parametrize(
3315
[make_image_tensor, make_image, make_video],
3317
def test_kernel(self, make_input):
3320
make_input(dtype=torch.float32),
3322
check_batched_vs_unbatched=False,
3325
@pytest.mark.parametrize(
3327
[make_image_tensor, make_image, make_video],
3329
def test_functional(self, make_input):
3330
check_functional(F.gaussian_noise, make_input(dtype=torch.float32))
3332
@pytest.mark.parametrize(
3333
("kernel", "input_type"),
3335
(F.gaussian_noise, torch.Tensor),
3336
(F.gaussian_noise_image, tv_tensors.Image),
3337
(F.gaussian_noise_video, tv_tensors.Video),
3340
def test_functional_signature(self, kernel, input_type):
3341
check_functional_kernel_signature_match(F.gaussian_noise, kernel=kernel, input_type=input_type)
3343
@pytest.mark.parametrize(
3345
[make_image_tensor, make_image, make_video],
3347
def test_transform(self, make_input):
3348
def adapter(_, input, __):
3351
for key, value in input.items():
3352
if isinstance(value, torch.Tensor) and not value.is_floating_point():
3353
input[key] = value.to(torch.float32)
3354
if isinstance(value, PIL.Image.Image):
3355
input[key] = F.pil_to_tensor(value).to(torch.float32)
3358
check_transform(transforms.GaussianNoise(), make_input(dtype=torch.float32), check_sample_input=adapter)
3360
def test_bad_input(self):
3361
with pytest.raises(ValueError, match="Gaussian Noise is not implemented for PIL images."):
3362
F.gaussian_noise(make_image_pil())
3363
with pytest.raises(ValueError, match="Input tensor is expected to be in float dtype"):
3364
F.gaussian_noise(make_image(dtype=torch.uint8))
3365
with pytest.raises(ValueError, match="sigma shouldn't be negative"):
3366
F.gaussian_noise(make_image(dtype=torch.float32), sigma=-1)
3368
def test_clip(self):
3369
img = make_image(dtype=torch.float32)
3371
out = F.gaussian_noise(img, mean=100, clip=False)
3372
assert out.min() > 50
3374
out = F.gaussian_noise(img, mean=100, clip=True)
3375
assert (out == 1).all()
3377
out = F.gaussian_noise(img, mean=-100, clip=False)
3378
assert out.min() < -50
3380
out = F.gaussian_noise(img, mean=-100, clip=True)
3381
assert (out == 0).all()
3384
class TestAutoAugmentTransforms:
3390
def _reference_shear_translate(self, image, *, transform_id, magnitude, interpolation, fill):
3391
if isinstance(image, PIL.Image.Image):
3394
input = F.to_pil_image(image)
3397
"ShearX": (1, magnitude, 0, 0, 1, 0),
3398
"ShearY": (1, 0, 0, magnitude, 1, 0),
3399
"TranslateX": (1, 0, -int(magnitude), 0, 1, 0),
3400
"TranslateY": (1, 0, 0, 0, 1, -int(magnitude)),
3403
output = input.transform(
3404
input.size, PIL.Image.AFFINE, matrix, resample=pil_modes_mapping[interpolation], fill=fill
3407
if isinstance(image, PIL.Image.Image):
3410
return F.to_image(output)
3412
@pytest.mark.parametrize("transform_id", ["ShearX", "ShearY", "TranslateX", "TranslateY"])
3413
@pytest.mark.parametrize("magnitude", [0.3, -0.2, 0.0])
3414
@pytest.mark.parametrize(
3415
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
3417
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
3418
@pytest.mark.parametrize("input_type", ["Tensor", "PIL"])
3419
def test_correctness_shear_translate(self, transform_id, magnitude, interpolation, fill, input_type):
3425
image = make_image(dtype=torch.uint8, device="cpu")
3426
if input_type == "PIL":
3427
image = F.to_pil_image(image)
3429
if "Translate" in transform_id:
3431
magnitude *= min(F.get_size(image))
3433
actual = transforms.AutoAugment()._apply_image_or_video_transform(
3435
transform_id=transform_id,
3436
magnitude=magnitude,
3437
interpolation=interpolation,
3438
fill={type(image): fill},
3440
expected = self._reference_shear_translate(
3441
image, transform_id=transform_id, magnitude=magnitude, interpolation=interpolation, fill=fill
3444
if input_type == "PIL":
3445
actual, expected = F.to_image(actual), F.to_image(expected)
3447
if "Shear" in transform_id and input_type == "Tensor":
3448
mae = (actual.float() - expected.float()).abs().mean()
3449
assert mae < (12 if interpolation is transforms.InterpolationMode.NEAREST else 5)
3451
assert_close(actual, expected, rtol=0, atol=1)
3453
def _sample_input_adapter(self, transform, input, device):
3455
image_or_video_found = False
3456
for key, value in input.items():
3457
if isinstance(value, (tv_tensors.BoundingBoxes, tv_tensors.Mask)):
3460
elif check_type(value, (tv_tensors.Image, tv_tensors.Video, is_pure_tensor, PIL.Image.Image)):
3461
if image_or_video_found:
3464
image_or_video_found = True
3465
adapted_input[key] = value
3466
return adapted_input
3468
@pytest.mark.parametrize(
3470
[transforms.AutoAugment(), transforms.RandAugment(), transforms.TrivialAugmentWide(), transforms.AugMix()],
3472
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
3473
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
3474
@pytest.mark.parametrize("device", cpu_and_cuda())
3475
def test_transform_smoke(self, transform, make_input, dtype, device):
3476
if make_input is make_image_pil and not (dtype is torch.uint8 and device == "cpu"):
3478
"PIL image tests with parametrization other than dtype=torch.uint8 and device='cpu' "
3479
"will degenerate to that anyway."
3481
input = make_input(dtype=dtype, device=device)
3483
with freeze_rng_state():
3487
torch.manual_seed(hash((make_input, dtype, device)))
3493
transform, input, check_v1_compatibility=False, check_sample_input=self._sample_input_adapter
3496
if type(input) is torch.Tensor and dtype is torch.uint8:
3497
_script(transform)(input)
3499
def test_auto_augment_policy_error(self):
3500
with pytest.raises(ValueError, match="provided policy"):
3501
transforms.AutoAugment(policy=None)
3503
@pytest.mark.parametrize("severity", [0, 11])
3504
def test_aug_mix_severity_error(self, severity):
3505
with pytest.raises(ValueError, match="severity must be between"):
3506
transforms.AugMix(severity=severity)
3509
class TestConvertBoundingBoxFormat:
3510
old_new_formats = list(itertools.permutations(iter(tv_tensors.BoundingBoxFormat), 2))
3512
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
3513
def test_kernel(self, old_format, new_format):
3515
F.convert_bounding_box_format,
3516
make_bounding_boxes(format=old_format),
3517
new_format=new_format,
3518
old_format=old_format,
3521
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
3522
@pytest.mark.parametrize("inplace", [False, True])
3523
def test_kernel_noop(self, format, inplace):
3524
input = make_bounding_boxes(format=format).as_subclass(torch.Tensor)
3525
input_version = input._version
3527
output = F.convert_bounding_box_format(input, old_format=format, new_format=format, inplace=inplace)
3529
assert output is input
3530
assert output.data_ptr() == input.data_ptr()
3531
assert output._version == input_version
3533
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
3534
def test_kernel_inplace(self, old_format, new_format):
3535
input = make_bounding_boxes(format=old_format).as_subclass(torch.Tensor)
3536
input_version = input._version
3538
output_out_of_place = F.convert_bounding_box_format(input, old_format=old_format, new_format=new_format)
3539
assert output_out_of_place.data_ptr() != input.data_ptr()
3540
assert output_out_of_place is not input
3542
output_inplace = F.convert_bounding_box_format(
3543
input, old_format=old_format, new_format=new_format, inplace=True
3545
assert output_inplace.data_ptr() == input.data_ptr()
3546
assert output_inplace._version > input_version
3547
assert output_inplace is input
3549
assert_equal(output_inplace, output_out_of_place)
3551
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
3552
def test_functional(self, old_format, new_format):
3553
check_functional(F.convert_bounding_box_format, make_bounding_boxes(format=old_format), new_format=new_format)
3555
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
3556
@pytest.mark.parametrize("format_type", ["enum", "str"])
3557
def test_transform(self, old_format, new_format, format_type):
3559
transforms.ConvertBoundingBoxFormat(new_format.name if format_type == "str" else new_format),
3560
make_bounding_boxes(format=old_format),
3563
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
3564
def test_strings(self, old_format, new_format):
3566
input = tv_tensors.BoundingBoxes(torch.tensor([[10, 10, 20, 20]]), format=old_format, canvas_size=(50, 50))
3567
expected = self._reference_convert_bounding_box_format(input, new_format)
3569
old_format = old_format.name
3570
new_format = new_format.name
3572
out_functional = F.convert_bounding_box_format(input, new_format=new_format)
3573
out_functional_tensor = F.convert_bounding_box_format(
3574
input.as_subclass(torch.Tensor), old_format=old_format, new_format=new_format
3576
out_transform = transforms.ConvertBoundingBoxFormat(new_format)(input)
3577
for out in (out_functional, out_functional_tensor, out_transform):
3578
assert_equal(out, expected)
3580
def _reference_convert_bounding_box_format(self, bounding_boxes, new_format):
3581
return tv_tensors.wrap(
3582
torchvision.ops.box_convert(
3583
bounding_boxes.as_subclass(torch.Tensor),
3584
in_fmt=bounding_boxes.format.name.lower(),
3585
out_fmt=new_format.name.lower(),
3586
).to(bounding_boxes.dtype),
3587
like=bounding_boxes,
3591
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
3592
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
3593
@pytest.mark.parametrize("device", cpu_and_cuda())
3594
@pytest.mark.parametrize("fn_type", ["functional", "transform"])
3595
def test_correctness(self, old_format, new_format, dtype, device, fn_type):
3596
bounding_boxes = make_bounding_boxes(format=old_format, dtype=dtype, device=device)
3598
if fn_type == "functional":
3599
fn = functools.partial(F.convert_bounding_box_format, new_format=new_format)
3601
fn = transforms.ConvertBoundingBoxFormat(format=new_format)
3603
actual = fn(bounding_boxes)
3604
expected = self._reference_convert_bounding_box_format(bounding_boxes, new_format)
3606
assert_equal(actual, expected)
3608
def test_errors(self):
3609
input_tv_tensor = make_bounding_boxes()
3610
input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor)
3612
for input in [input_tv_tensor, input_pure_tensor]:
3613
with pytest.raises(TypeError, match="missing 1 required argument: 'new_format'"):
3614
F.convert_bounding_box_format(input)
3616
with pytest.raises(ValueError, match="`old_format` has to be passed"):
3617
F.convert_bounding_box_format(input_pure_tensor, new_format=input_tv_tensor.format)
3619
with pytest.raises(ValueError, match="`old_format` must not be passed"):
3620
F.convert_bounding_box_format(
3621
input_tv_tensor, old_format=input_tv_tensor.format, new_format=input_tv_tensor.format
3625
class TestResizedCrop:
3626
INPUT_SIZE = (17, 11)
3627
CROP_KWARGS = dict(top=2, left=2, height=5, width=7)
3628
OUTPUT_SIZE = (19, 32)
3630
@pytest.mark.parametrize(
3631
("kernel", "make_input"),
3633
(F.resized_crop_image, make_image),
3634
(F.resized_crop_bounding_boxes, make_bounding_boxes),
3635
(F.resized_crop_mask, make_segmentation_mask),
3636
(F.resized_crop_mask, make_detection_masks),
3637
(F.resized_crop_video, make_video),
3640
def test_kernel(self, kernel, make_input):
3641
input = make_input(self.INPUT_SIZE)
3642
if isinstance(input, tv_tensors.BoundingBoxes):
3643
extra_kwargs = dict(format=input.format)
3644
elif isinstance(input, tv_tensors.Mask):
3645
extra_kwargs = dict()
3647
extra_kwargs = dict(antialias=True)
3649
check_kernel(kernel, input, **self.CROP_KWARGS, size=self.OUTPUT_SIZE, **extra_kwargs)
3651
@pytest.mark.parametrize(
3653
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3655
def test_functional(self, make_input):
3657
F.resized_crop, make_input(self.INPUT_SIZE), **self.CROP_KWARGS, size=self.OUTPUT_SIZE, antialias=True
3660
@pytest.mark.parametrize(
3661
("kernel", "input_type"),
3663
(F.resized_crop_image, torch.Tensor),
3664
(F._geometry._resized_crop_image_pil, PIL.Image.Image),
3665
(F.resized_crop_image, tv_tensors.Image),
3666
(F.resized_crop_bounding_boxes, tv_tensors.BoundingBoxes),
3667
(F.resized_crop_mask, tv_tensors.Mask),
3668
(F.resized_crop_video, tv_tensors.Video),
3671
def test_functional_signature(self, kernel, input_type):
3672
check_functional_kernel_signature_match(F.resized_crop, kernel=kernel, input_type=input_type)
3674
@param_value_parametrization(
3675
scale=[(0.1, 0.2), [0.0, 1.0]],
3676
ratio=[(0.3, 0.7), [0.1, 5.0]],
3678
@pytest.mark.parametrize(
3680
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3682
def test_transform(self, param, value, make_input):
3684
transforms.RandomResizedCrop(size=self.OUTPUT_SIZE, **{param: value}, antialias=True),
3685
make_input(self.INPUT_SIZE),
3686
check_v1_compatibility=dict(rtol=0, atol=1),
3691
@pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
3692
def test_functional_image_correctness(self, interpolation):
3693
image = make_image(self.INPUT_SIZE, dtype=torch.uint8)
3695
actual = F.resized_crop(
3696
image, **self.CROP_KWARGS, size=self.OUTPUT_SIZE, interpolation=interpolation, antialias=True
3698
expected = F.to_image(
3700
F.to_pil_image(image), **self.CROP_KWARGS, size=self.OUTPUT_SIZE, interpolation=interpolation
3704
torch.testing.assert_close(actual, expected, atol=1, rtol=0)
3706
def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width, size):
3707
new_height, new_width = size
3709
crop_affine_matrix = np.array(
3716
resize_affine_matrix = np.array(
3718
[new_width / width, 0, 0],
3719
[0, new_height / height, 0],
3723
affine_matrix = (resize_affine_matrix @ crop_affine_matrix)[:2, :]
3725
return reference_affine_bounding_boxes_helper(
3727
affine_matrix=affine_matrix,
3728
new_canvas_size=size,
3731
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
3732
def test_functional_bounding_boxes_correctness(self, format):
3733
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
3735
actual = F.resized_crop(bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE)
3736
expected = self._reference_resized_crop_bounding_boxes(
3737
bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE
3740
assert_equal(actual, expected)
3741
assert_equal(F.get_size(actual), F.get_size(expected))
3743
def test_transform_errors_warnings(self):
3744
with pytest.raises(ValueError, match="provide only two dimensions"):
3745
transforms.RandomResizedCrop(size=(1, 2, 3))
3747
with pytest.raises(TypeError, match="Scale should be a sequence"):
3748
transforms.RandomResizedCrop(size=self.INPUT_SIZE, scale=123)
3750
with pytest.raises(TypeError, match="Ratio should be a sequence"):
3751
transforms.RandomResizedCrop(size=self.INPUT_SIZE, ratio=123)
3753
for param in ["scale", "ratio"]:
3754
with pytest.warns(match="Scale and ratio should be of kind"):
3755
transforms.RandomResizedCrop(size=self.INPUT_SIZE, **{param: [1, 0]})
3759
EXHAUSTIVE_TYPE_PADDINGS = [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
3760
CORRECTNESS_PADDINGS = [
3762
for padding in EXHAUSTIVE_TYPE_PADDINGS
3763
if isinstance(padding, int) or isinstance(padding, list) and len(padding) > 1
3765
PADDING_MODES = ["constant", "symmetric", "edge", "reflect"]
3767
@param_value_parametrization(
3768
padding=EXHAUSTIVE_TYPE_PADDINGS,
3769
fill=EXHAUSTIVE_TYPE_FILLS,
3770
padding_mode=PADDING_MODES,
3772
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
3773
@pytest.mark.parametrize("device", cpu_and_cuda())
3774
def test_kernel_image(self, param, value, dtype, device):
3776
value = adapt_fill(value, dtype=dtype)
3777
kwargs = {param: value}
3778
if param != "padding":
3779
kwargs["padding"] = [1]
3781
image = make_image(dtype=dtype, device=device)
3787
check_scripted_vs_eager=not (
3788
(param == "padding" and isinstance(value, int))
3793
isinstance(value, tuple) or (isinstance(value, list) and any(isinstance(v, int) for v in value))
3799
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
3800
def test_kernel_bounding_boxes(self, format):
3801
bounding_boxes = make_bounding_boxes(format=format)
3803
F.pad_bounding_boxes,
3805
format=bounding_boxes.format,
3806
canvas_size=bounding_boxes.canvas_size,
3810
@pytest.mark.parametrize("padding_mode", ["symmetric", "edge", "reflect"])
3811
def test_kernel_bounding_boxes_errors(self, padding_mode):
3812
bounding_boxes = make_bounding_boxes()
3813
with pytest.raises(ValueError, match=f"'{padding_mode}' is not supported"):
3814
F.pad_bounding_boxes(
3816
format=bounding_boxes.format,
3817
canvas_size=bounding_boxes.canvas_size,
3819
padding_mode=padding_mode,
3822
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
3823
def test_kernel_mask(self, make_mask):
3824
check_kernel(F.pad_mask, make_mask(), padding=[1])
3826
@pytest.mark.parametrize("fill", [[1], (0,), [1, 0, 1], (0, 1, 0)])
3827
def test_kernel_mask_errors(self, fill):
3828
with pytest.raises(ValueError, match="Non-scalar fill value is not supported"):
3829
F.pad_mask(make_segmentation_mask(), padding=[1], fill=fill)
3831
def test_kernel_video(self):
3832
check_kernel(F.pad_video, make_video(), padding=[1])
3834
@pytest.mark.parametrize(
3836
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3838
def test_functional(self, make_input):
3839
check_functional(F.pad, make_input(), padding=[1])
3841
@pytest.mark.parametrize(
3842
("kernel", "input_type"),
3844
(F.pad_image, torch.Tensor),
3850
(F.pad_image, tv_tensors.Image),
3851
(F.pad_bounding_boxes, tv_tensors.BoundingBoxes),
3852
(F.pad_mask, tv_tensors.Mask),
3853
(F.pad_video, tv_tensors.Video),
3856
def test_functional_signature(self, kernel, input_type):
3857
check_functional_kernel_signature_match(F.pad, kernel=kernel, input_type=input_type)
3859
@pytest.mark.parametrize(
3861
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3863
def test_transform(self, make_input):
3864
check_transform(transforms.Pad(padding=[1]), make_input())
3866
def test_transform_errors(self):
3867
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
3868
transforms.Pad("abc")
3870
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
3871
transforms.Pad([-0.7, 0, 0.7])
3873
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
3874
transforms.Pad(12, fill="abc")
3876
with pytest.raises(ValueError, match="Padding mode should be either"):
3877
transforms.Pad(12, padding_mode="abc")
3879
@pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
3880
@pytest.mark.parametrize(
3881
("padding_mode", "fill"),
3883
*[("constant", fill) for fill in CORRECTNESS_FILLS],
3884
*[(padding_mode, None) for padding_mode in ["symmetric", "edge", "reflect"]],
3887
@pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
3888
def test_image_correctness(self, padding, padding_mode, fill, fn):
3889
image = make_image(dtype=torch.uint8, device="cpu")
3891
fill = adapt_fill(fill, dtype=torch.uint8)
3893
actual = fn(image, padding=padding, padding_mode=padding_mode, fill=fill)
3894
expected = F.to_image(F.pad(F.to_pil_image(image), padding=padding, padding_mode=padding_mode, fill=fill))
3896
assert_equal(actual, expected)
3898
def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
3899
if isinstance(padding, int):
3901
left, top, right, bottom = padding * (4 // len(padding))
3903
affine_matrix = np.array(
3910
height = bounding_boxes.canvas_size[0] + top + bottom
3911
width = bounding_boxes.canvas_size[1] + left + right
3913
return reference_affine_bounding_boxes_helper(
3914
bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=(height, width)
3917
@pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
3918
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
3919
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
3920
@pytest.mark.parametrize("device", cpu_and_cuda())
3921
@pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
3922
def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn):
3923
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
3925
actual = fn(bounding_boxes, padding=padding)
3926
expected = self._reference_pad_bounding_boxes(bounding_boxes, padding=padding)
3928
assert_equal(actual, expected)
3931
class TestCenterCrop:
3932
INPUT_SIZE = (17, 11)
3933
OUTPUT_SIZES = [(3, 5), (5, 3), (4, 4), (21, 9), (13, 15), (19, 14), 3, (4,), [5], INPUT_SIZE]
3935
@pytest.mark.parametrize("output_size", OUTPUT_SIZES)
3936
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
3937
@pytest.mark.parametrize("device", cpu_and_cuda())
3938
def test_kernel_image(self, output_size, dtype, device):
3940
F.center_crop_image,
3941
make_image(self.INPUT_SIZE, dtype=dtype, device=device),
3942
output_size=output_size,
3943
check_scripted_vs_eager=not isinstance(output_size, int),
3946
@pytest.mark.parametrize("output_size", OUTPUT_SIZES)
3947
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
3948
def test_kernel_bounding_boxes(self, output_size, format):
3949
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
3951
F.center_crop_bounding_boxes,
3953
format=bounding_boxes.format,
3954
canvas_size=bounding_boxes.canvas_size,
3955
output_size=output_size,
3956
check_scripted_vs_eager=not isinstance(output_size, int),
3959
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
3960
def test_kernel_mask(self, make_mask):
3961
check_kernel(F.center_crop_mask, make_mask(), output_size=self.OUTPUT_SIZES[0])
3963
def test_kernel_video(self):
3964
check_kernel(F.center_crop_video, make_video(self.INPUT_SIZE), output_size=self.OUTPUT_SIZES[0])
3966
@pytest.mark.parametrize(
3968
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3970
def test_functional(self, make_input):
3971
check_functional(F.center_crop, make_input(self.INPUT_SIZE), output_size=self.OUTPUT_SIZES[0])
3973
@pytest.mark.parametrize(
3974
("kernel", "input_type"),
3976
(F.center_crop_image, torch.Tensor),
3977
(F._geometry._center_crop_image_pil, PIL.Image.Image),
3978
(F.center_crop_image, tv_tensors.Image),
3979
(F.center_crop_bounding_boxes, tv_tensors.BoundingBoxes),
3980
(F.center_crop_mask, tv_tensors.Mask),
3981
(F.center_crop_video, tv_tensors.Video),
3984
def test_functional_signature(self, kernel, input_type):
3985
check_functional_kernel_signature_match(F.center_crop, kernel=kernel, input_type=input_type)
3987
@pytest.mark.parametrize(
3989
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
3991
def test_transform(self, make_input):
3992
check_transform(transforms.CenterCrop(self.OUTPUT_SIZES[0]), make_input(self.INPUT_SIZE))
3994
@pytest.mark.parametrize("output_size", OUTPUT_SIZES)
3995
@pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
3996
def test_image_correctness(self, output_size, fn):
3997
image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu")
3999
actual = fn(image, output_size)
4000
expected = F.to_image(F.center_crop(F.to_pil_image(image), output_size=output_size))
4002
assert_equal(actual, expected)
4004
def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size):
4005
image_height, image_width = bounding_boxes.canvas_size
4006
if isinstance(output_size, int):
4007
output_size = (output_size, output_size)
4008
elif len(output_size) == 1:
4010
crop_height, crop_width = output_size
4012
top = int(round((image_height - crop_height) / 2))
4013
left = int(round((image_width - crop_width) / 2))
4015
affine_matrix = np.array(
4021
return reference_affine_bounding_boxes_helper(
4022
bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=output_size
4025
@pytest.mark.parametrize("output_size", OUTPUT_SIZES)
4026
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
4027
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
4028
@pytest.mark.parametrize("device", cpu_and_cuda())
4029
@pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
4030
def test_bounding_boxes_correctness(self, output_size, format, dtype, device, fn):
4031
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device)
4033
actual = fn(bounding_boxes, output_size)
4034
expected = self._reference_center_crop_bounding_boxes(bounding_boxes, output_size)
4036
assert_equal(actual, expected)
4039
class TestPerspective:
4041
[1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018],
4042
[0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063],
4044
START_END_POINTS = [
4045
([[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]),
4046
([[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]),
4047
([[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]),
4049
MINIMAL_KWARGS = dict(startpoints=None, endpoints=None, coefficients=COEFFICIENTS[0])
4051
@param_value_parametrization(
4052
coefficients=COEFFICIENTS,
4053
start_end_points=START_END_POINTS,
4054
fill=EXHAUSTIVE_TYPE_FILLS,
4056
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4057
@pytest.mark.parametrize("device", cpu_and_cuda())
4058
def test_kernel_image(self, param, value, dtype, device):
4059
if param == "start_end_points":
4060
kwargs = dict(zip(["startpoints", "endpoints"], value))
4062
kwargs = {"startpoints": None, "endpoints": None, param: value}
4064
kwargs["coefficients"] = self.COEFFICIENTS[0]
4067
F.perspective_image,
4068
make_image(dtype=dtype, device=device),
4070
check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))),
4073
def test_kernel_image_error(self):
4074
image = make_image_tensor()
4076
with pytest.raises(ValueError, match="startpoints/endpoints or the coefficients must have non `None` values"):
4077
F.perspective_image(image, startpoints=None, endpoints=None)
4080
ValueError, match="startpoints/endpoints and the coefficients shouldn't be defined concurrently"
4082
startpoints, endpoints = self.START_END_POINTS[0]
4083
coefficients = self.COEFFICIENTS[0]
4084
F.perspective_image(image, startpoints=startpoints, endpoints=endpoints, coefficients=coefficients)
4086
with pytest.raises(ValueError, match="coefficients should have 8 float values"):
4087
F.perspective_image(image, startpoints=None, endpoints=None, coefficients=list(range(7)))
4089
@param_value_parametrization(
4090
coefficients=COEFFICIENTS,
4091
start_end_points=START_END_POINTS,
4093
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
4094
def test_kernel_bounding_boxes(self, param, value, format):
4095
if param == "start_end_points":
4096
kwargs = dict(zip(["startpoints", "endpoints"], value))
4098
kwargs = {"startpoints": None, "endpoints": None, param: value}
4100
bounding_boxes = make_bounding_boxes(format=format)
4103
F.perspective_bounding_boxes,
4105
format=bounding_boxes.format,
4106
canvas_size=bounding_boxes.canvas_size,
4110
def test_kernel_bounding_boxes_error(self):
4111
bounding_boxes = make_bounding_boxes()
4112
format, canvas_size = bounding_boxes.format, bounding_boxes.canvas_size
4113
bounding_boxes = bounding_boxes.as_subclass(torch.Tensor)
4115
with pytest.raises(RuntimeError, match="Denominator is zero"):
4116
F.perspective_bounding_boxes(
4119
canvas_size=canvas_size,
4122
coefficients=[0.0] * 8,
4125
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_masks])
4126
def test_kernel_mask(self, make_mask):
4127
check_kernel(F.perspective_mask, make_mask(), **self.MINIMAL_KWARGS)
4129
def test_kernel_video(self):
4130
check_kernel(F.perspective_video, make_video(), **self.MINIMAL_KWARGS)
4132
@pytest.mark.parametrize(
4134
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
4136
def test_functional(self, make_input):
4137
check_functional(F.perspective, make_input(), **self.MINIMAL_KWARGS)
4139
@pytest.mark.parametrize(
4140
("kernel", "input_type"),
4142
(F.perspective_image, torch.Tensor),
4143
(F._geometry._perspective_image_pil, PIL.Image.Image),
4144
(F.perspective_image, tv_tensors.Image),
4145
(F.perspective_bounding_boxes, tv_tensors.BoundingBoxes),
4146
(F.perspective_mask, tv_tensors.Mask),
4147
(F.perspective_video, tv_tensors.Video),
4150
def test_functional_signature(self, kernel, input_type):
4151
check_functional_kernel_signature_match(F.perspective, kernel=kernel, input_type=input_type)
4153
@pytest.mark.parametrize("distortion_scale", [0.5, 0.0, 1.0])
4154
@pytest.mark.parametrize(
4156
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
4158
def test_transform(self, distortion_scale, make_input):
4159
check_transform(transforms.RandomPerspective(distortion_scale=distortion_scale, p=1), make_input())
4161
@pytest.mark.parametrize("distortion_scale", [-1, 2])
4162
def test_transform_error(self, distortion_scale):
4163
with pytest.raises(ValueError, match="distortion_scale value should be between 0 and 1"):
4164
transforms.RandomPerspective(distortion_scale=distortion_scale)
4166
@pytest.mark.parametrize("coefficients", COEFFICIENTS)
4167
@pytest.mark.parametrize(
4168
"interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
4170
@pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
4171
def test_image_functional_correctness(self, coefficients, interpolation, fill):
4172
image = make_image(dtype=torch.uint8, device="cpu")
4174
actual = F.perspective(
4175
image, startpoints=None, endpoints=None, coefficients=coefficients, interpolation=interpolation, fill=fill
4177
expected = F.to_image(
4179
F.to_pil_image(image),
4182
coefficients=coefficients,
4183
interpolation=interpolation,
4188
if interpolation is transforms.InterpolationMode.BILINEAR:
4189
abs_diff = (actual.float() - expected.float()).abs()
4190
assert (abs_diff > 1).float().mean() < 7e-2
4191
mae = abs_diff.mean()
4194
assert_equal(actual, expected)
4196
def _reference_perspective_bounding_boxes(self, bounding_boxes, *, startpoints, endpoints):
4197
format = bounding_boxes.format
4198
canvas_size = bounding_boxes.canvas_size
4199
dtype = bounding_boxes.dtype
4200
device = bounding_boxes.device
4202
coefficients = _get_perspective_coeffs(endpoints, startpoints)
4204
def perspective_bounding_boxes(bounding_boxes):
4207
[coefficients[0], coefficients[1], coefficients[2]],
4208
[coefficients[3], coefficients[4], coefficients[5]],
4213
[coefficients[6], coefficients[7], 1.0],
4214
[coefficients[6], coefficients[7], 1.0],
4219
input_xyxy = F.convert_bounding_box_format(
4220
bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True),
4222
new_format=tv_tensors.BoundingBoxFormat.XYXY,
4225
x1, y1, x2, y2 = input_xyxy.squeeze(0).tolist()
4236
numerator = points @ m1.T
4237
denominator = points @ m2.T
4238
transformed_points = numerator / denominator
4240
output_xyxy = torch.Tensor(
4242
float(np.min(transformed_points[:, 0])),
4243
float(np.min(transformed_points[:, 1])),
4244
float(np.max(transformed_points[:, 0])),
4245
float(np.max(transformed_points[:, 1])),
4249
output = F.convert_bounding_box_format(
4250
output_xyxy, old_format=tv_tensors.BoundingBoxFormat.XYXY, new_format=format
4254
return F.clamp_bounding_boxes(
4257
canvas_size=canvas_size,
4258
).to(dtype=dtype, device=device)
4260
return tv_tensors.BoundingBoxes(
4261
torch.cat([perspective_bounding_boxes(b) for b in bounding_boxes.reshape(-1, 4).unbind()], dim=0).reshape(
4262
bounding_boxes.shape
4265
canvas_size=canvas_size,
4268
@pytest.mark.parametrize(("startpoints", "endpoints"), START_END_POINTS)
4269
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
4270
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
4271
@pytest.mark.parametrize("device", cpu_and_cuda())
4272
def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device):
4273
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
4275
actual = F.perspective(bounding_boxes, startpoints=startpoints, endpoints=endpoints)
4276
expected = self._reference_perspective_bounding_boxes(
4277
bounding_boxes, startpoints=startpoints, endpoints=endpoints
4280
assert_close(actual, expected, rtol=0, atol=1)
4284
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4285
@pytest.mark.parametrize("device", cpu_and_cuda())
4286
def test_kernel_image(self, dtype, device):
4287
check_kernel(F.equalize_image, make_image(dtype=dtype, device=device))
4289
def test_kernel_video(self):
4290
check_kernel(F.equalize_image, make_video())
4292
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
4293
def test_functional(self, make_input):
4294
check_functional(F.equalize, make_input())
4296
@pytest.mark.parametrize(
4297
("kernel", "input_type"),
4299
(F.equalize_image, torch.Tensor),
4300
(F._color._equalize_image_pil, PIL.Image.Image),
4301
(F.equalize_image, tv_tensors.Image),
4302
(F.equalize_video, tv_tensors.Video),
4305
def test_functional_signature(self, kernel, input_type):
4306
check_functional_kernel_signature_match(F.equalize, kernel=kernel, input_type=input_type)
4308
@pytest.mark.parametrize(
4310
[make_image_tensor, make_image_pil, make_image, make_video],
4312
def test_transform(self, make_input):
4313
check_transform(transforms.RandomEqualize(p=1), make_input())
4315
@pytest.mark.parametrize(("low", "high"), [(0, 64), (64, 192), (192, 256), (0, 1), (127, 128), (255, 256)])
4316
@pytest.mark.parametrize("fn", [F.equalize, transform_cls_to_functional(transforms.RandomEqualize, p=1)])
4317
def test_image_correctness(self, low, high, fn):
4322
image = tv_tensors.Image(
4323
torch.testing.make_tensor((3, 117, 253), dtype=torch.uint8, device="cpu", low=low, high=high)
4327
expected = F.to_image(F.equalize(F.to_pil_image(image)))
4329
assert_equal(actual, expected)
4332
class TestUniformTemporalSubsample:
4333
def test_kernel_video(self):
4334
check_kernel(F.uniform_temporal_subsample_video, make_video(), num_samples=2)
4336
@pytest.mark.parametrize("make_input", [make_video_tensor, make_video])
4337
def test_functional(self, make_input):
4338
check_functional(F.uniform_temporal_subsample, make_input(), num_samples=2)
4340
@pytest.mark.parametrize(
4341
("kernel", "input_type"),
4343
(F.uniform_temporal_subsample_video, torch.Tensor),
4344
(F.uniform_temporal_subsample_video, tv_tensors.Video),
4347
def test_functional_signature(self, kernel, input_type):
4348
check_functional_kernel_signature_match(F.uniform_temporal_subsample, kernel=kernel, input_type=input_type)
4350
@pytest.mark.parametrize("make_input", [make_video_tensor, make_video])
4351
def test_transform(self, make_input):
4352
check_transform(transforms.UniformTemporalSubsample(num_samples=2), make_input())
4354
def _reference_uniform_temporal_subsample_video(self, video, *, num_samples):
4358
assert num_samples > 0 and t > 0
4360
indices = torch.linspace(0, t - 1, num_samples, device=video.device)
4361
indices = torch.clamp(indices, 0, t - 1).long()
4362
return tv_tensors.Video(torch.index_select(video, -4, indices))
4364
CORRECTNESS_NUM_FRAMES = 5
4366
@pytest.mark.parametrize("num_samples", list(range(1, CORRECTNESS_NUM_FRAMES + 1)))
4367
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4368
@pytest.mark.parametrize("device", cpu_and_cuda())
4369
@pytest.mark.parametrize(
4370
"fn", [F.uniform_temporal_subsample, transform_cls_to_functional(transforms.UniformTemporalSubsample)]
4372
def test_video_correctness(self, num_samples, dtype, device, fn):
4373
video = make_video(num_frames=self.CORRECTNESS_NUM_FRAMES, dtype=dtype, device=device)
4375
actual = fn(video, num_samples=num_samples)
4376
expected = self._reference_uniform_temporal_subsample_video(video, num_samples=num_samples)
4378
assert_equal(actual, expected)
4383
((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
4384
([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]),
4386
MEAN, STD = MEANS_STDS[0]
4388
@pytest.mark.parametrize(("mean", "std"), [*MEANS_STDS, (0.5, 2.0)])
4389
@pytest.mark.parametrize("device", cpu_and_cuda())
4390
def test_kernel_image(self, mean, std, device):
4391
check_kernel(F.normalize_image, make_image(dtype=torch.float32, device=device), mean=self.MEAN, std=self.STD)
4393
@pytest.mark.parametrize("device", cpu_and_cuda())
4394
def test_kernel_image_inplace(self, device):
4395
input = make_image_tensor(dtype=torch.float32, device=device)
4396
input_version = input._version
4398
output_out_of_place = F.normalize_image(input, mean=self.MEAN, std=self.STD)
4399
assert output_out_of_place.data_ptr() != input.data_ptr()
4400
assert output_out_of_place is not input
4402
output_inplace = F.normalize_image(input, mean=self.MEAN, std=self.STD, inplace=True)
4403
assert output_inplace.data_ptr() == input.data_ptr()
4404
assert output_inplace._version > input_version
4405
assert output_inplace is input
4407
assert_equal(output_inplace, output_out_of_place)
4409
def test_kernel_video(self):
4410
check_kernel(F.normalize_video, make_video(dtype=torch.float32), mean=self.MEAN, std=self.STD)
4412
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
4413
def test_functional(self, make_input):
4414
check_functional(F.normalize, make_input(dtype=torch.float32), mean=self.MEAN, std=self.STD)
4416
@pytest.mark.parametrize(
4417
("kernel", "input_type"),
4419
(F.normalize_image, torch.Tensor),
4420
(F.normalize_image, tv_tensors.Image),
4421
(F.normalize_video, tv_tensors.Video),
4424
def test_functional_signature(self, kernel, input_type):
4425
check_functional_kernel_signature_match(F.normalize, kernel=kernel, input_type=input_type)
4427
def test_functional_error(self):
4428
with pytest.raises(TypeError, match="should be a float tensor"):
4429
F.normalize_image(make_image(dtype=torch.uint8), mean=self.MEAN, std=self.STD)
4431
with pytest.raises(ValueError, match="tensor image of size"):
4432
F.normalize_image(torch.rand(16, 16, dtype=torch.float32), mean=self.MEAN, std=self.STD)
4434
for std in [0, [0, 0, 0], [0, 1, 1]]:
4435
with pytest.raises(ValueError, match="std evaluated to zero, leading to division by zero"):
4436
F.normalize_image(make_image(dtype=torch.float32), mean=self.MEAN, std=std)
4438
def _sample_input_adapter(self, transform, input, device):
4440
for key, value in input.items():
4441
if isinstance(value, PIL.Image.Image):
4444
elif check_type(value, (is_pure_tensor, tv_tensors.Image, tv_tensors.Video)):
4446
value = F.to_dtype(value, torch.float32, scale=True)
4447
adapted_input[key] = value
4448
return adapted_input
4450
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
4451
def test_transform(self, make_input):
4453
transforms.Normalize(mean=self.MEAN, std=self.STD),
4454
make_input(dtype=torch.float32),
4455
check_sample_input=self._sample_input_adapter,
4458
def _reference_normalize_image(self, image, *, mean, std):
4459
image = image.numpy()
4460
mean, std = [np.array(stat, dtype=image.dtype).reshape((-1, 1, 1)) for stat in [mean, std]]
4461
return tv_tensors.Image((image - mean) / std)
4463
@pytest.mark.parametrize(("mean", "std"), MEANS_STDS)
4464
@pytest.mark.parametrize("dtype", [torch.float16, torch.float32, torch.float64])
4465
@pytest.mark.parametrize("fn", [F.normalize, transform_cls_to_functional(transforms.Normalize)])
4466
def test_correctness_image(self, mean, std, dtype, fn):
4467
image = make_image(dtype=dtype)
4469
actual = fn(image, mean=mean, std=std)
4470
expected = self._reference_normalize_image(image, mean=mean, std=std)
4472
assert_equal(actual, expected)
4475
class TestClampBoundingBoxes:
4476
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
4477
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
4478
@pytest.mark.parametrize("device", cpu_and_cuda())
4479
def test_kernel(self, format, dtype, device):
4480
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
4482
F.clamp_bounding_boxes,
4484
format=bounding_boxes.format,
4485
canvas_size=bounding_boxes.canvas_size,
4488
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
4489
def test_functional(self, format):
4490
check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format))
4492
def test_errors(self):
4493
input_tv_tensor = make_bounding_boxes()
4494
input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor)
4495
format, canvas_size = input_tv_tensor.format, input_tv_tensor.canvas_size
4497
for format_, canvas_size_ in [(None, None), (format, None), (None, canvas_size)]:
4499
ValueError, match="For pure tensor inputs, `format` and `canvas_size` have to be passed."
4501
F.clamp_bounding_boxes(input_pure_tensor, format=format_, canvas_size=canvas_size_)
4503
for format_, canvas_size_ in [(format, canvas_size), (format, None), (None, canvas_size)]:
4505
ValueError, match="For bounding box tv_tensor inputs, `format` and `canvas_size` must not be passed."
4507
F.clamp_bounding_boxes(input_tv_tensor, format=format_, canvas_size=canvas_size_)
4509
def test_transform(self):
4510
check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes())
4514
@pytest.mark.parametrize("dtype", [torch.uint8, torch.int16, torch.float32])
4515
@pytest.mark.parametrize("device", cpu_and_cuda())
4516
def test_kernel_image(self, dtype, device):
4517
check_kernel(F.invert_image, make_image(dtype=dtype, device=device))
4519
def test_kernel_video(self):
4520
check_kernel(F.invert_video, make_video())
4522
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4523
def test_functional(self, make_input):
4524
check_functional(F.invert, make_input())
4526
@pytest.mark.parametrize(
4527
("kernel", "input_type"),
4529
(F.invert_image, torch.Tensor),
4530
(F._color._invert_image_pil, PIL.Image.Image),
4531
(F.invert_image, tv_tensors.Image),
4532
(F.invert_video, tv_tensors.Video),
4535
def test_functional_signature(self, kernel, input_type):
4536
check_functional_kernel_signature_match(F.invert, kernel=kernel, input_type=input_type)
4538
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
4539
def test_transform(self, make_input):
4540
check_transform(transforms.RandomInvert(p=1), make_input())
4542
@pytest.mark.parametrize("fn", [F.invert, transform_cls_to_functional(transforms.RandomInvert, p=1)])
4543
def test_correctness_image(self, fn):
4544
image = make_image(dtype=torch.uint8, device="cpu")
4547
expected = F.to_image(F.invert(F.to_pil_image(image)))
4549
assert_equal(actual, expected)
4553
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4554
@pytest.mark.parametrize("device", cpu_and_cuda())
4555
def test_kernel_image(self, dtype, device):
4556
check_kernel(F.posterize_image, make_image(dtype=dtype, device=device), bits=1)
4558
def test_kernel_video(self):
4559
check_kernel(F.posterize_video, make_video(), bits=1)
4561
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4562
def test_functional(self, make_input):
4563
check_functional(F.posterize, make_input(), bits=1)
4565
@pytest.mark.parametrize(
4566
("kernel", "input_type"),
4568
(F.posterize_image, torch.Tensor),
4569
(F._color._posterize_image_pil, PIL.Image.Image),
4570
(F.posterize_image, tv_tensors.Image),
4571
(F.posterize_video, tv_tensors.Video),
4574
def test_functional_signature(self, kernel, input_type):
4575
check_functional_kernel_signature_match(F.posterize, kernel=kernel, input_type=input_type)
4577
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
4578
def test_transform(self, make_input):
4579
check_transform(transforms.RandomPosterize(bits=1, p=1), make_input())
4581
@pytest.mark.parametrize("bits", [1, 4, 8])
4582
@pytest.mark.parametrize("fn", [F.posterize, transform_cls_to_functional(transforms.RandomPosterize, p=1)])
4583
def test_correctness_image(self, bits, fn):
4584
image = make_image(dtype=torch.uint8, device="cpu")
4586
actual = fn(image, bits=bits)
4587
expected = F.to_image(F.posterize(F.to_pil_image(image), bits=bits))
4589
assert_equal(actual, expected)
4593
def _make_threshold(self, input, *, factor=0.5):
4594
dtype = input.dtype if isinstance(input, torch.Tensor) else torch.uint8
4595
return (float if dtype.is_floating_point else int)(get_max_value(dtype) * factor)
4597
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4598
@pytest.mark.parametrize("device", cpu_and_cuda())
4599
def test_kernel_image(self, dtype, device):
4600
image = make_image(dtype=dtype, device=device)
4601
check_kernel(F.solarize_image, image, threshold=self._make_threshold(image))
4603
def test_kernel_video(self):
4604
video = make_video()
4605
check_kernel(F.solarize_video, video, threshold=self._make_threshold(video))
4607
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4608
def test_functional(self, make_input):
4609
input = make_input()
4610
check_functional(F.solarize, input, threshold=self._make_threshold(input))
4612
@pytest.mark.parametrize(
4613
("kernel", "input_type"),
4615
(F.solarize_image, torch.Tensor),
4616
(F._color._solarize_image_pil, PIL.Image.Image),
4617
(F.solarize_image, tv_tensors.Image),
4618
(F.solarize_video, tv_tensors.Video),
4621
def test_functional_signature(self, kernel, input_type):
4622
check_functional_kernel_signature_match(F.solarize, kernel=kernel, input_type=input_type)
4624
@pytest.mark.parametrize(("dtype", "threshold"), [(torch.uint8, 256), (torch.float, 1.5)])
4625
def test_functional_error(self, dtype, threshold):
4626
with pytest.raises(TypeError, match="Threshold should be less or equal the maximum value of the dtype"):
4627
F.solarize(make_image(dtype=dtype), threshold=threshold)
4629
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
4630
def test_transform(self, make_input):
4631
input = make_input()
4632
check_transform(transforms.RandomSolarize(threshold=self._make_threshold(input), p=1), input)
4634
@pytest.mark.parametrize("threshold_factor", [0.0, 0.1, 0.5, 0.9, 1.0])
4635
@pytest.mark.parametrize("fn", [F.solarize, transform_cls_to_functional(transforms.RandomSolarize, p=1)])
4636
def test_correctness_image(self, threshold_factor, fn):
4637
image = make_image(dtype=torch.uint8, device="cpu")
4638
threshold = self._make_threshold(image, factor=threshold_factor)
4640
actual = fn(image, threshold=threshold)
4641
expected = F.to_image(F.solarize(F.to_pil_image(image), threshold=threshold))
4643
assert_equal(actual, expected)
4646
class TestAutocontrast:
4647
@pytest.mark.parametrize("dtype", [torch.uint8, torch.int16, torch.float32])
4648
@pytest.mark.parametrize("device", cpu_and_cuda())
4649
def test_kernel_image(self, dtype, device):
4650
check_kernel(F.autocontrast_image, make_image(dtype=dtype, device=device))
4652
def test_kernel_video(self):
4653
check_kernel(F.autocontrast_video, make_video())
4655
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4656
def test_functional(self, make_input):
4657
check_functional(F.autocontrast, make_input())
4659
@pytest.mark.parametrize(
4660
("kernel", "input_type"),
4662
(F.autocontrast_image, torch.Tensor),
4663
(F._color._autocontrast_image_pil, PIL.Image.Image),
4664
(F.autocontrast_image, tv_tensors.Image),
4665
(F.autocontrast_video, tv_tensors.Video),
4668
def test_functional_signature(self, kernel, input_type):
4669
check_functional_kernel_signature_match(F.autocontrast, kernel=kernel, input_type=input_type)
4671
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
4672
def test_transform(self, make_input):
4673
check_transform(transforms.RandomAutocontrast(p=1), make_input(), check_v1_compatibility=dict(rtol=0, atol=1))
4675
@pytest.mark.parametrize("fn", [F.autocontrast, transform_cls_to_functional(transforms.RandomAutocontrast, p=1)])
4676
def test_correctness_image(self, fn):
4677
image = make_image(dtype=torch.uint8, device="cpu")
4680
expected = F.to_image(F.autocontrast(F.to_pil_image(image)))
4682
assert_close(actual, expected, rtol=0, atol=1)
4685
class TestAdjustSharpness:
4686
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4687
@pytest.mark.parametrize("device", cpu_and_cuda())
4688
def test_kernel_image(self, dtype, device):
4689
check_kernel(F.adjust_sharpness_image, make_image(dtype=dtype, device=device), sharpness_factor=0.5)
4691
def test_kernel_video(self):
4692
check_kernel(F.adjust_sharpness_video, make_video(), sharpness_factor=0.5)
4694
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4695
def test_functional(self, make_input):
4696
check_functional(F.adjust_sharpness, make_input(), sharpness_factor=0.5)
4698
@pytest.mark.parametrize(
4699
("kernel", "input_type"),
4701
(F.adjust_sharpness_image, torch.Tensor),
4702
(F._color._adjust_sharpness_image_pil, PIL.Image.Image),
4703
(F.adjust_sharpness_image, tv_tensors.Image),
4704
(F.adjust_sharpness_video, tv_tensors.Video),
4707
def test_functional_signature(self, kernel, input_type):
4708
check_functional_kernel_signature_match(F.adjust_sharpness, kernel=kernel, input_type=input_type)
4710
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
4711
def test_transform(self, make_input):
4712
check_transform(transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1), make_input())
4714
def test_functional_error(self):
4715
with pytest.raises(TypeError, match="can have 1 or 3 channels"):
4716
F.adjust_sharpness(make_image(color_space="RGBA"), sharpness_factor=0.5)
4718
with pytest.raises(ValueError, match="is not non-negative"):
4719
F.adjust_sharpness(make_image(), sharpness_factor=-1)
4721
@pytest.mark.parametrize("sharpness_factor", [0.1, 0.5, 1.0])
4722
@pytest.mark.parametrize(
4723
"fn", [F.adjust_sharpness, transform_cls_to_functional(transforms.RandomAdjustSharpness, p=1)]
4725
def test_correctness_image(self, sharpness_factor, fn):
4726
image = make_image(dtype=torch.uint8, device="cpu")
4728
actual = fn(image, sharpness_factor=sharpness_factor)
4729
expected = F.to_image(F.adjust_sharpness(F.to_pil_image(image), sharpness_factor=sharpness_factor))
4731
assert_equal(actual, expected)
4734
class TestAdjustContrast:
4735
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4736
@pytest.mark.parametrize("device", cpu_and_cuda())
4737
def test_kernel_image(self, dtype, device):
4738
check_kernel(F.adjust_contrast_image, make_image(dtype=dtype, device=device), contrast_factor=0.5)
4740
def test_kernel_video(self):
4741
check_kernel(F.adjust_contrast_video, make_video(), contrast_factor=0.5)
4743
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4744
def test_functional(self, make_input):
4745
check_functional(F.adjust_contrast, make_input(), contrast_factor=0.5)
4747
@pytest.mark.parametrize(
4748
("kernel", "input_type"),
4750
(F.adjust_contrast_image, torch.Tensor),
4751
(F._color._adjust_contrast_image_pil, PIL.Image.Image),
4752
(F.adjust_contrast_image, tv_tensors.Image),
4753
(F.adjust_contrast_video, tv_tensors.Video),
4756
def test_functional_signature(self, kernel, input_type):
4757
check_functional_kernel_signature_match(F.adjust_contrast, kernel=kernel, input_type=input_type)
4759
def test_functional_error(self):
4760
with pytest.raises(TypeError, match="permitted channel values are 1 or 3"):
4761
F.adjust_contrast(make_image(color_space="RGBA"), contrast_factor=0.5)
4763
with pytest.raises(ValueError, match="is not non-negative"):
4764
F.adjust_contrast(make_image(), contrast_factor=-1)
4766
@pytest.mark.parametrize("contrast_factor", [0.1, 0.5, 1.0])
4767
def test_correctness_image(self, contrast_factor):
4768
image = make_image(dtype=torch.uint8, device="cpu")
4770
actual = F.adjust_contrast(image, contrast_factor=contrast_factor)
4771
expected = F.to_image(F.adjust_contrast(F.to_pil_image(image), contrast_factor=contrast_factor))
4773
assert_close(actual, expected, rtol=0, atol=1)
4776
class TestAdjustGamma:
4777
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4778
@pytest.mark.parametrize("device", cpu_and_cuda())
4779
def test_kernel_image(self, dtype, device):
4780
check_kernel(F.adjust_gamma_image, make_image(dtype=dtype, device=device), gamma=0.5)
4782
def test_kernel_video(self):
4783
check_kernel(F.adjust_gamma_video, make_video(), gamma=0.5)
4785
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4786
def test_functional(self, make_input):
4787
check_functional(F.adjust_gamma, make_input(), gamma=0.5)
4789
@pytest.mark.parametrize(
4790
("kernel", "input_type"),
4792
(F.adjust_gamma_image, torch.Tensor),
4793
(F._color._adjust_gamma_image_pil, PIL.Image.Image),
4794
(F.adjust_gamma_image, tv_tensors.Image),
4795
(F.adjust_gamma_video, tv_tensors.Video),
4798
def test_functional_signature(self, kernel, input_type):
4799
check_functional_kernel_signature_match(F.adjust_gamma, kernel=kernel, input_type=input_type)
4801
def test_functional_error(self):
4802
with pytest.raises(ValueError, match="Gamma should be a non-negative real number"):
4803
F.adjust_gamma(make_image(), gamma=-1)
4805
@pytest.mark.parametrize("gamma", [0.1, 0.5, 1.0])
4806
@pytest.mark.parametrize("gain", [0.1, 1.0, 2.0])
4807
def test_correctness_image(self, gamma, gain):
4808
image = make_image(dtype=torch.uint8, device="cpu")
4810
actual = F.adjust_gamma(image, gamma=gamma, gain=gain)
4811
expected = F.to_image(F.adjust_gamma(F.to_pil_image(image), gamma=gamma, gain=gain))
4813
assert_equal(actual, expected)
4817
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4818
@pytest.mark.parametrize("device", cpu_and_cuda())
4819
def test_kernel_image(self, dtype, device):
4820
check_kernel(F.adjust_hue_image, make_image(dtype=dtype, device=device), hue_factor=0.25)
4822
def test_kernel_video(self):
4823
check_kernel(F.adjust_hue_video, make_video(), hue_factor=0.25)
4825
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4826
def test_functional(self, make_input):
4827
check_functional(F.adjust_hue, make_input(), hue_factor=0.25)
4829
@pytest.mark.parametrize(
4830
("kernel", "input_type"),
4832
(F.adjust_hue_image, torch.Tensor),
4833
(F._color._adjust_hue_image_pil, PIL.Image.Image),
4834
(F.adjust_hue_image, tv_tensors.Image),
4835
(F.adjust_hue_video, tv_tensors.Video),
4838
def test_functional_signature(self, kernel, input_type):
4839
check_functional_kernel_signature_match(F.adjust_hue, kernel=kernel, input_type=input_type)
4841
def test_functional_error(self):
4842
with pytest.raises(TypeError, match="permitted channel values are 1 or 3"):
4843
F.adjust_hue(make_image(color_space="RGBA"), hue_factor=0.25)
4845
for hue_factor in [-1, 1]:
4846
with pytest.raises(ValueError, match=re.escape("is not in [-0.5, 0.5]")):
4847
F.adjust_hue(make_image(), hue_factor=hue_factor)
4849
@pytest.mark.parametrize("hue_factor", [-0.5, -0.3, 0.0, 0.2, 0.5])
4850
def test_correctness_image(self, hue_factor):
4851
image = make_image(dtype=torch.uint8, device="cpu")
4853
actual = F.adjust_hue(image, hue_factor=hue_factor)
4854
expected = F.to_image(F.adjust_hue(F.to_pil_image(image), hue_factor=hue_factor))
4856
mae = (actual.float() - expected.float()).abs().mean()
4860
class TestAdjustSaturation:
4861
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4862
@pytest.mark.parametrize("device", cpu_and_cuda())
4863
def test_kernel_image(self, dtype, device):
4864
check_kernel(F.adjust_saturation_image, make_image(dtype=dtype, device=device), saturation_factor=0.5)
4866
def test_kernel_video(self):
4867
check_kernel(F.adjust_saturation_video, make_video(), saturation_factor=0.5)
4869
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
4870
def test_functional(self, make_input):
4871
check_functional(F.adjust_saturation, make_input(), saturation_factor=0.5)
4873
@pytest.mark.parametrize(
4874
("kernel", "input_type"),
4876
(F.adjust_saturation_image, torch.Tensor),
4877
(F._color._adjust_saturation_image_pil, PIL.Image.Image),
4878
(F.adjust_saturation_image, tv_tensors.Image),
4879
(F.adjust_saturation_video, tv_tensors.Video),
4882
def test_functional_signature(self, kernel, input_type):
4883
check_functional_kernel_signature_match(F.adjust_saturation, kernel=kernel, input_type=input_type)
4885
def test_functional_error(self):
4886
with pytest.raises(TypeError, match="permitted channel values are 1 or 3"):
4887
F.adjust_saturation(make_image(color_space="RGBA"), saturation_factor=0.5)
4889
with pytest.raises(ValueError, match="is not non-negative"):
4890
F.adjust_saturation(make_image(), saturation_factor=-1)
4892
@pytest.mark.parametrize("saturation_factor", [0.1, 0.5, 1.0])
4893
def test_correctness_image(self, saturation_factor):
4894
image = make_image(dtype=torch.uint8, device="cpu")
4896
actual = F.adjust_saturation(image, saturation_factor=saturation_factor)
4897
expected = F.to_image(F.adjust_saturation(F.to_pil_image(image), saturation_factor=saturation_factor))
4899
assert_close(actual, expected, rtol=0, atol=1)
4902
class TestFiveTenCrop:
4903
INPUT_SIZE = (17, 11)
4904
OUTPUT_SIZE = (3, 5)
4906
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
4907
@pytest.mark.parametrize("device", cpu_and_cuda())
4908
@pytest.mark.parametrize("kernel", [F.five_crop_image, F.ten_crop_image])
4909
def test_kernel_image(self, dtype, device, kernel):
4912
make_image(self.INPUT_SIZE, dtype=dtype, device=device),
4913
size=self.OUTPUT_SIZE,
4914
check_batched_vs_unbatched=False,
4917
@pytest.mark.parametrize("kernel", [F.five_crop_video, F.ten_crop_video])
4918
def test_kernel_video(self, kernel):
4919
check_kernel(kernel, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZE, check_batched_vs_unbatched=False)
4921
def _functional_wrapper(self, fn):
4924
@functools.wraps(fn)
4925
def wrapper(*args, **kwargs):
4926
outputs = fn(*args, **kwargs)
4931
@pytest.mark.parametrize(
4933
[make_image_tensor, make_image_pil, make_image, make_video],
4935
@pytest.mark.parametrize("functional", [F.five_crop, F.ten_crop])
4936
def test_functional(self, make_input, functional):
4938
self._functional_wrapper(functional),
4939
make_input(self.INPUT_SIZE),
4940
size=self.OUTPUT_SIZE,
4941
check_scripted_smoke=False,
4944
@pytest.mark.parametrize(
4945
("functional", "kernel", "input_type"),
4947
(F.five_crop, F.five_crop_image, torch.Tensor),
4948
(F.five_crop, F._geometry._five_crop_image_pil, PIL.Image.Image),
4949
(F.five_crop, F.five_crop_image, tv_tensors.Image),
4950
(F.five_crop, F.five_crop_video, tv_tensors.Video),
4951
(F.ten_crop, F.ten_crop_image, torch.Tensor),
4952
(F.ten_crop, F._geometry._ten_crop_image_pil, PIL.Image.Image),
4953
(F.ten_crop, F.ten_crop_image, tv_tensors.Image),
4954
(F.ten_crop, F.ten_crop_video, tv_tensors.Video),
4957
def test_functional_signature(self, functional, kernel, input_type):
4958
check_functional_kernel_signature_match(functional, kernel=kernel, input_type=input_type)
4960
class _TransformWrapper(nn.Module):
4963
_v1_transform_cls = None
4965
def _extract_params_for_v1_transform(self):
4966
return dict(five_ten_crop_transform=self.five_ten_crop_transform)
4968
def __init__(self, five_ten_crop_transform):
4970
type(self)._v1_transform_cls = type(self)
4971
self.five_ten_crop_transform = five_ten_crop_transform
4973
def forward(self, input: torch.Tensor) -> torch.Tensor:
4974
outputs = self.five_ten_crop_transform(input)
4977
@pytest.mark.parametrize(
4979
[make_image_tensor, make_image_pil, make_image, make_video],
4981
@pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop])
4982
def test_transform(self, make_input, transform_cls):
4984
self._TransformWrapper(transform_cls(size=self.OUTPUT_SIZE)),
4985
make_input(self.INPUT_SIZE),
4986
check_sample_input=False,
4989
@pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_masks])
4990
@pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop])
4991
def test_transform_error(self, make_input, transform_cls):
4992
transform = transform_cls(size=self.OUTPUT_SIZE)
4994
with pytest.raises(TypeError, match="not supported"):
4995
transform(make_input(self.INPUT_SIZE))
4997
@pytest.mark.parametrize("fn", [F.five_crop, transform_cls_to_functional(transforms.FiveCrop)])
4998
def test_correctness_image_five_crop(self, fn):
4999
image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu")
5001
actual = fn(image, size=self.OUTPUT_SIZE)
5002
expected = F.five_crop(F.to_pil_image(image), size=self.OUTPUT_SIZE)
5004
assert isinstance(actual, tuple)
5005
assert_equal(actual, [F.to_image(e) for e in expected])
5007
@pytest.mark.parametrize("fn_or_class", [F.ten_crop, transforms.TenCrop])
5008
@pytest.mark.parametrize("vertical_flip", [False, True])
5009
def test_correctness_image_ten_crop(self, fn_or_class, vertical_flip):
5010
if fn_or_class is transforms.TenCrop:
5011
fn = transform_cls_to_functional(fn_or_class, size=self.OUTPUT_SIZE, vertical_flip=vertical_flip)
5015
kwargs = dict(size=self.OUTPUT_SIZE, vertical_flip=vertical_flip)
5017
image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu")
5019
actual = fn(image, **kwargs)
5020
expected = F.ten_crop(F.to_pil_image(image), size=self.OUTPUT_SIZE, vertical_flip=vertical_flip)
5022
assert isinstance(actual, tuple)
5023
assert_equal(actual, [F.to_image(e) for e in expected])
5026
class TestColorJitter:
5027
@pytest.mark.parametrize(
5029
[make_image_tensor, make_image_pil, make_image, make_video],
5031
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
5032
@pytest.mark.parametrize("device", cpu_and_cuda())
5033
def test_transform(self, make_input, dtype, device):
5034
if make_input is make_image_pil and not (dtype is torch.uint8 and device == "cpu"):
5036
"PIL image tests with parametrization other than dtype=torch.uint8 and device='cpu' "
5037
"will degenerate to that anyway."
5041
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25),
5042
make_input(dtype=dtype, device=device),
5045
def test_transform_noop(self):
5046
input = make_image()
5047
input_version = input._version
5049
transform = transforms.ColorJitter()
5050
output = transform(input)
5052
assert output is input
5053
assert output.data_ptr() == input.data_ptr()
5054
assert output._version == input_version
5056
def test_transform_error(self):
5057
with pytest.raises(ValueError, match="must be non negative"):
5058
transforms.ColorJitter(brightness=-1)
5060
for brightness in [object(), [1, 2, 3]]:
5061
with pytest.raises(TypeError, match="single number or a sequence with length 2"):
5062
transforms.ColorJitter(brightness=brightness)
5064
with pytest.raises(ValueError, match="values should be between"):
5065
transforms.ColorJitter(brightness=(-1, 0.5))
5067
with pytest.raises(ValueError, match="values should be between"):
5068
transforms.ColorJitter(hue=1)
5070
@pytest.mark.parametrize("brightness", [None, 0.1, (0.2, 0.3)])
5071
@pytest.mark.parametrize("contrast", [None, 0.4, (0.5, 0.6)])
5072
@pytest.mark.parametrize("saturation", [None, 0.7, (0.8, 0.9)])
5073
@pytest.mark.parametrize("hue", [None, 0.3, (-0.1, 0.2)])
5074
def test_transform_correctness(self, brightness, contrast, saturation, hue):
5075
image = make_image(dtype=torch.uint8, device="cpu")
5077
transform = transforms.ColorJitter(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue)
5079
with freeze_rng_state():
5080
torch.manual_seed(0)
5081
actual = transform(image)
5083
torch.manual_seed(0)
5084
expected = F.to_image(transform(F.to_pil_image(image)))
5086
mae = (actual.float() - expected.float()).abs().mean()
5090
class TestRgbToGrayscale:
5091
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
5092
@pytest.mark.parametrize("device", cpu_and_cuda())
5093
def test_kernel_image(self, dtype, device):
5094
check_kernel(F.rgb_to_grayscale_image, make_image(dtype=dtype, device=device))
5096
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image])
5097
def test_functional(self, make_input):
5098
check_functional(F.rgb_to_grayscale, make_input())
5100
@pytest.mark.parametrize(
5101
("kernel", "input_type"),
5103
(F.rgb_to_grayscale_image, torch.Tensor),
5104
(F._color._rgb_to_grayscale_image_pil, PIL.Image.Image),
5105
(F.rgb_to_grayscale_image, tv_tensors.Image),
5108
def test_functional_signature(self, kernel, input_type):
5109
check_functional_kernel_signature_match(F.rgb_to_grayscale, kernel=kernel, input_type=input_type)
5111
@pytest.mark.parametrize("transform", [transforms.Grayscale(), transforms.RandomGrayscale(p=1)])
5112
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image])
5113
def test_transform(self, transform, make_input):
5114
check_transform(transform, make_input())
5116
@pytest.mark.parametrize("num_output_channels", [1, 3])
5117
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
5118
@pytest.mark.parametrize("fn", [F.rgb_to_grayscale, transform_cls_to_functional(transforms.Grayscale)])
5119
def test_image_correctness(self, num_output_channels, color_space, fn):
5120
image = make_image(dtype=torch.uint8, device="cpu", color_space=color_space)
5122
actual = fn(image, num_output_channels=num_output_channels)
5123
expected = F.to_image(F.rgb_to_grayscale(F.to_pil_image(image), num_output_channels=num_output_channels))
5125
assert_equal(actual, expected, rtol=0, atol=1)
5127
def test_expanded_channels_are_not_views_into_the_same_underlying_tensor(self):
5128
image = make_image(dtype=torch.uint8, device="cpu", color_space="GRAY")
5130
output_image = F.rgb_to_grayscale(image, num_output_channels=3)
5131
assert_equal(output_image[0][0][0], output_image[1][0][0])
5132
output_image[0][0][0] = output_image[0][0][0] + 1
5133
assert output_image[0][0][0] != output_image[1][0][0]
5135
@pytest.mark.parametrize("num_input_channels", [1, 3])
5136
def test_random_transform_correctness(self, num_input_channels):
5141
}[num_input_channels],
5146
transform = transforms.RandomGrayscale(p=1)
5148
actual = transform(image)
5149
expected = F.to_image(F.rgb_to_grayscale(F.to_pil_image(image), num_output_channels=num_input_channels))
5151
assert_equal(actual, expected, rtol=0, atol=1)
5154
class TestGrayscaleToRgb:
5155
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
5156
@pytest.mark.parametrize("device", cpu_and_cuda())
5157
def test_kernel_image(self, dtype, device):
5158
check_kernel(F.grayscale_to_rgb_image, make_image(dtype=dtype, device=device))
5160
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image])
5161
def test_functional(self, make_input):
5162
check_functional(F.grayscale_to_rgb, make_input())
5164
@pytest.mark.parametrize(
5165
("kernel", "input_type"),
5167
(F.rgb_to_grayscale_image, torch.Tensor),
5168
(F._color._rgb_to_grayscale_image_pil, PIL.Image.Image),
5169
(F.rgb_to_grayscale_image, tv_tensors.Image),
5172
def test_functional_signature(self, kernel, input_type):
5173
check_functional_kernel_signature_match(F.grayscale_to_rgb, kernel=kernel, input_type=input_type)
5175
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image])
5176
def test_transform(self, make_input):
5177
check_transform(transforms.RGB(), make_input(color_space="GRAY"))
5179
@pytest.mark.parametrize("fn", [F.grayscale_to_rgb, transform_cls_to_functional(transforms.RGB)])
5180
def test_image_correctness(self, fn):
5181
image = make_image(dtype=torch.uint8, device="cpu", color_space="GRAY")
5184
expected = F.to_image(F.grayscale_to_rgb(F.to_pil_image(image)))
5186
assert_equal(actual, expected, rtol=0, atol=1)
5188
def test_expanded_channels_are_not_views_into_the_same_underlying_tensor(self):
5189
image = make_image(dtype=torch.uint8, device="cpu", color_space="GRAY")
5191
output_image = F.grayscale_to_rgb(image)
5192
assert_equal(output_image[0][0][0], output_image[1][0][0])
5193
output_image[0][0][0] = output_image[0][0][0] + 1
5194
assert output_image[0][0][0] != output_image[1][0][0]
5196
def test_rgb_image_is_unchanged(self):
5197
image = make_image(dtype=torch.uint8, device="cpu", color_space="RGB")
5198
assert_equal(image.shape[-3], 3)
5199
assert_equal(F.grayscale_to_rgb(image), image)
5202
class TestRandomZoomOut:
5205
@pytest.mark.parametrize(
5211
make_bounding_boxes,
5212
make_segmentation_mask,
5213
make_detection_masks,
5217
def test_transform(self, make_input):
5218
check_transform(transforms.RandomZoomOut(p=1), make_input())
5220
def test_transform_error(self):
5221
for side_range in [None, 1, [1, 2, 3]]:
5223
ValueError if isinstance(side_range, list) else TypeError, match="should be a sequence of length 2"
5225
transforms.RandomZoomOut(side_range=side_range)
5227
for side_range in [[0.5, 1.5], [2.0, 1.0]]:
5228
with pytest.raises(ValueError, match="Invalid side range"):
5229
transforms.RandomZoomOut(side_range=side_range)
5231
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
5232
@pytest.mark.parametrize(
5238
make_bounding_boxes,
5239
make_segmentation_mask,
5240
make_detection_masks,
5244
@pytest.mark.parametrize("device", cpu_and_cuda())
5245
def test_transform_params_correctness(self, side_range, make_input, device):
5246
if make_input is make_image_pil and device != "cpu":
5247
pytest.skip("PIL image tests with parametrization device!='cpu' will degenerate to that anyway.")
5249
transform = transforms.RandomZoomOut(side_range=side_range)
5251
input = make_input()
5252
height, width = F.get_size(input)
5254
params = transform._get_params([input])
5255
assert "padding" in params
5257
padding = params["padding"]
5258
assert len(padding) == 4
5260
assert 0 <= padding[0] <= (side_range[1] - 1) * width
5261
assert 0 <= padding[1] <= (side_range[1] - 1) * height
5262
assert 0 <= padding[2] <= (side_range[1] - 1) * width
5263
assert 0 <= padding[3] <= (side_range[1] - 1) * height
5266
class TestRandomPhotometricDistort:
5270
@pytest.mark.parametrize(
5272
[make_image_tensor, make_image_pil, make_image, make_video],
5274
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
5275
@pytest.mark.parametrize("device", cpu_and_cuda())
5276
def test_transform(self, make_input, dtype, device):
5277
if make_input is make_image_pil and not (dtype is torch.uint8 and device == "cpu"):
5279
"PIL image tests with parametrization other than dtype=torch.uint8 and device='cpu' "
5280
"will degenerate to that anyway."
5284
transforms.RandomPhotometricDistort(
5285
brightness=(0.3, 0.4), contrast=(0.5, 0.6), saturation=(0.7, 0.8), hue=(-0.1, 0.2), p=1
5287
make_input(dtype=dtype, device=device),
5291
class TestScaleJitter:
5294
INPUT_SIZE = (17, 11)
5295
TARGET_SIZE = (12, 13)
5297
@pytest.mark.parametrize(
5299
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
5301
@pytest.mark.parametrize("device", cpu_and_cuda())
5302
def test_transform(self, make_input, device):
5303
if make_input is make_image_pil and device != "cpu":
5304
pytest.skip("PIL image tests with parametrization device!='cpu' will degenerate to that anyway.")
5306
check_transform(transforms.ScaleJitter(self.TARGET_SIZE), make_input(self.INPUT_SIZE, device=device))
5308
def test__get_params(self):
5309
input_size = self.INPUT_SIZE
5310
target_size = self.TARGET_SIZE
5311
scale_range = (0.5, 1.5)
5313
transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range)
5314
params = transform._get_params([make_image(input_size)])
5316
assert "size" in params
5317
size = params["size"]
5319
assert isinstance(size, tuple) and len(size) == 2
5320
height, width = size
5322
r_min = min(target_size[1] / input_size[0], target_size[0] / input_size[1]) * scale_range[0]
5323
r_max = min(target_size[1] / input_size[0], target_size[0] / input_size[1]) * scale_range[1]
5325
assert int(input_size[0] * r_min) <= height <= int(input_size[0] * r_max)
5326
assert int(input_size[1] * r_min) <= width <= int(input_size[1] * r_max)
5329
class TestLinearTransform:
5330
def _make_matrix_and_vector(self, input, *, device=None):
5331
device = device or input.device
5332
numel = math.prod(F.get_dimensions(input))
5333
transformation_matrix = torch.randn((numel, numel), device=device)
5334
mean_vector = torch.randn((numel,), device=device)
5335
return transformation_matrix, mean_vector
5337
def _sample_input_adapter(self, transform, input, device):
5338
return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)}
5340
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
5341
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
5342
@pytest.mark.parametrize("device", cpu_and_cuda())
5343
def test_transform(self, make_input, dtype, device):
5344
input = make_input(dtype=dtype, device=device)
5346
transforms.LinearTransformation(*self._make_matrix_and_vector(input)),
5348
check_sample_input=self._sample_input_adapter,
5353
check_v1_compatibility=(sys.platform != "darwin"),
5356
def test_transform_error(self):
5357
with pytest.raises(ValueError, match="transformation_matrix should be square"):
5358
transforms.LinearTransformation(transformation_matrix=torch.rand(2, 3), mean_vector=torch.rand(2))
5360
with pytest.raises(ValueError, match="mean_vector should have the same length"):
5361
transforms.LinearTransformation(transformation_matrix=torch.rand(2, 2), mean_vector=torch.rand(1))
5363
for matrix_dtype, vector_dtype in [(torch.float32, torch.float64), (torch.float64, torch.float32)]:
5364
with pytest.raises(ValueError, match="Input tensors should have the same dtype"):
5365
transforms.LinearTransformation(
5366
transformation_matrix=torch.rand(2, 2, dtype=matrix_dtype),
5367
mean_vector=torch.rand(2, dtype=vector_dtype),
5370
image = make_image()
5371
transform = transforms.LinearTransformation(transformation_matrix=torch.rand(2, 2), mean_vector=torch.rand(2))
5372
with pytest.raises(ValueError, match="Input tensor and transformation matrix have incompatible shape"):
5375
transform = transforms.LinearTransformation(*self._make_matrix_and_vector(image))
5376
with pytest.raises(TypeError, match="does not support PIL images"):
5377
transform(F.to_pil_image(image))
5380
def test_transform_error_cuda(self):
5381
for matrix_device, vector_device in [("cuda", "cpu"), ("cpu", "cuda")]:
5382
with pytest.raises(ValueError, match="Input tensors should be on the same device"):
5383
transforms.LinearTransformation(
5384
transformation_matrix=torch.rand(2, 2, device=matrix_device),
5385
mean_vector=torch.rand(2, device=vector_device),
5388
for input_device, param_device in [("cuda", "cpu"), ("cpu", "cuda")]:
5389
input = make_image(device=input_device)
5390
transform = transforms.LinearTransformation(*self._make_matrix_and_vector(input, device=param_device))
5392
ValueError, match="Input tensor should be on the same device as transformation matrix and mean vector"
5397
def make_image_numpy(*args, **kwargs):
5398
image = make_image_tensor(*args, **kwargs)
5399
return image.permute((1, 2, 0)).numpy()
5403
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_image_numpy])
5404
@pytest.mark.parametrize("fn", [F.to_image, transform_cls_to_functional(transforms.ToImage)])
5405
def test_functional_and_transform(self, make_input, fn):
5406
input = make_input()
5409
assert isinstance(output, tv_tensors.Image)
5411
input_size = list(input.shape[:2]) if isinstance(input, np.ndarray) else F.get_size(input)
5412
assert F.get_size(output) == input_size
5414
if isinstance(input, torch.Tensor):
5415
assert output.data_ptr() == input.data_ptr()
5417
def test_2d_np_array(self):
5419
input = np.random.rand(10, 10)
5420
assert F.to_image(input).shape == (1, 10, 10)
5422
def test_functional_error(self):
5423
with pytest.raises(TypeError, match="Input can either be a pure Tensor, a numpy array, or a PIL image"):
5424
F.to_image(object())
5427
class TestToPILImage:
5428
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_numpy])
5429
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
5430
@pytest.mark.parametrize("fn", [F.to_pil_image, transform_cls_to_functional(transforms.ToPILImage)])
5431
def test_functional_and_transform(self, make_input, color_space, fn):
5432
input = make_input(color_space=color_space)
5435
assert isinstance(output, PIL.Image.Image)
5437
input_size = list(input.shape[:2]) if isinstance(input, np.ndarray) else F.get_size(input)
5438
assert F.get_size(output) == input_size
5440
def test_functional_error(self):
5441
with pytest.raises(TypeError, match="pic should be Tensor or ndarray"):
5442
F.to_pil_image(object())
5445
with pytest.raises(ValueError, match="pic should be 2/3 dimensional"):
5446
F.to_pil_image(torch.empty(*[1] * ndim))
5448
with pytest.raises(ValueError, match="pic should not have > 4 channels"):
5450
F.to_pil_image(torch.empty(num_channels, 1, 1))
5454
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_image_numpy])
5455
def test_smoke(self, make_input):
5456
with pytest.warns(UserWarning, match="deprecated and will be removed"):
5457
transform = transforms.ToTensor()
5459
input = make_input()
5460
output = transform(input)
5462
input_size = list(input.shape[:2]) if isinstance(input, np.ndarray) else F.get_size(input)
5463
assert F.get_size(output) == input_size
5466
class TestPILToTensor:
5467
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
5468
@pytest.mark.parametrize("fn", [F.pil_to_tensor, transform_cls_to_functional(transforms.PILToTensor)])
5469
def test_functional_and_transform(self, color_space, fn):
5470
input = make_image_pil(color_space=color_space)
5473
assert isinstance(output, torch.Tensor) and not isinstance(output, tv_tensors.TVTensor)
5474
assert F.get_size(output) == F.get_size(input)
5476
def test_functional_error(self):
5477
with pytest.raises(TypeError, match="pic should be PIL Image"):
5478
F.pil_to_tensor(object())
5482
@pytest.mark.parametrize("input", [object(), torch.empty(()), np.empty(()), "string", 1, 0.0])
5483
@pytest.mark.parametrize("types", [(), (torch.Tensor, np.ndarray)])
5484
def test_transform(self, input, types):
5487
def was_applied_fn(input):
5488
nonlocal was_applied
5492
transform = transforms.Lambda(was_applied_fn, *types)
5493
output = transform(input)
5495
assert output is input
5496
assert was_applied is (not types or isinstance(input, types))
5499
@pytest.mark.parametrize(
5500
("alias", "target"),
5502
pytest.param(alias, target, id=alias.__name__)
5503
for alias, target in [
5504
(F.hflip, F.horizontal_flip),
5505
(F.vflip, F.vertical_flip),
5506
(F.get_image_num_channels, F.get_num_channels),
5507
(F.to_pil_image, F.to_pil_image),
5508
(F.elastic_transform, F.elastic),
5509
(F.to_grayscale, F.rgb_to_grayscale),
5513
def test_alias(alias, target):
5514
assert alias is target
5517
@pytest.mark.parametrize(
5519
itertools.permutations(
5530
def test_pure_tensor_heuristic(make_inputs):
5531
flat_inputs = [make_input() for make_input in make_inputs]
5533
def split_on_pure_tensor(to_split):
5542
for item, inpt in zip(to_split, flat_inputs):
5543
(pure_tensors if is_pure_tensor(inpt) else others).append(item)
5544
return pure_tensors[0] if pure_tensors else None, pure_tensors[1:], others
5546
class CopyCloneTransform(transforms.Transform):
5547
def _transform(self, inpt, params):
5548
return inpt.clone() if isinstance(inpt, torch.Tensor) else inpt.copy()
5551
def was_applied(output, inpt):
5552
identity = output is inpt
5557
assert_equal(output, inpt)
5560
first_pure_tensor_input, other_pure_tensor_inputs, other_inputs = split_on_pure_tensor(flat_inputs)
5562
transform = CopyCloneTransform()
5563
transformed_sample = transform(flat_inputs)
5565
first_pure_tensor_output, other_pure_tensor_outputs, other_outputs = split_on_pure_tensor(transformed_sample)
5567
if first_pure_tensor_input is not None:
5569
assert not transform.was_applied(first_pure_tensor_output, first_pure_tensor_input)
5571
assert transform.was_applied(first_pure_tensor_output, first_pure_tensor_input)
5573
for output, inpt in zip(other_pure_tensor_outputs, other_pure_tensor_inputs):
5574
assert not transform.was_applied(output, inpt)
5576
for input, output in zip(other_inputs, other_outputs):
5577
assert transform.was_applied(output, input)
5580
class TestRandomIoUCrop:
5581
@pytest.mark.parametrize("device", cpu_and_cuda())
5582
@pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]])
5583
def test__get_params(self, device, options):
5584
orig_h, orig_w = size = (24, 32)
5585
image = make_image(size)
5586
bboxes = tv_tensors.BoundingBoxes(
5587
torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]),
5592
sample = [image, bboxes]
5594
transform = transforms.RandomIoUCrop(sampler_options=options)
5597
for _ in range(n_samples):
5599
params = transform._get_params(sample)
5601
if options == [2.0]:
5602
assert len(params) == 0
5605
assert len(params["is_within_crop_area"]) > 0
5606
assert params["is_within_crop_area"].dtype == torch.bool
5608
assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h)
5609
assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w)
5611
left, top = params["left"], params["top"]
5612
new_h, new_w = params["height"], params["width"]
5615
torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device),
5617
assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}"
5619
def test__transform_empty_params(self, mocker):
5620
transform = transforms.RandomIoUCrop(sampler_options=[2.0])
5621
image = tv_tensors.Image(torch.rand(1, 3, 4, 4))
5622
bboxes = tv_tensors.BoundingBoxes(torch.tensor([[1, 1, 2, 2]]), format="XYXY", canvas_size=(4, 4))
5623
label = torch.tensor([1])
5624
sample = [image, bboxes, label]
5626
transform._get_params = mocker.MagicMock(return_value={})
5627
output = transform(sample)
5628
torch.testing.assert_close(output, sample)
5630
def test_forward_assertion(self):
5631
transform = transforms.RandomIoUCrop()
5634
match="requires input sample to contain tensor or PIL images and bounding boxes",
5636
transform(torch.tensor(0))
5638
def test__transform(self, mocker):
5639
transform = transforms.RandomIoUCrop()
5642
image = make_image(size)
5643
bboxes = make_bounding_boxes(format="XYXY", canvas_size=size, num_boxes=6)
5644
masks = make_detection_masks(size, num_masks=6)
5646
sample = [image, bboxes, masks]
5648
is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool)
5650
params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area)
5651
transform._get_params = mocker.MagicMock(return_value=params)
5652
output = transform(sample)
5655
output_bboxes = output[1]
5656
assert isinstance(output_bboxes, tv_tensors.BoundingBoxes)
5657
assert (output_bboxes[~is_within_crop_area] == 0).all()
5659
output_masks = output[2]
5660
assert isinstance(output_masks, tv_tensors.Mask)
5663
class TestRandomShortestSize:
5664
@pytest.mark.parametrize("min_size,max_size", [([5, 9], 20), ([5, 9], None)])
5665
def test__get_params(self, min_size, max_size):
5666
canvas_size = (3, 10)
5668
transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size, antialias=True)
5670
sample = make_image(canvas_size)
5671
params = transform._get_params([sample])
5673
assert "size" in params
5674
size = params["size"]
5676
assert isinstance(size, tuple) and len(size) == 2
5680
if max_size is not None:
5681
assert longer <= max_size
5682
assert shorter <= max_size
5684
assert shorter in min_size
5687
class TestRandomResize:
5688
def test__get_params(self):
5692
transform = transforms.RandomResize(min_size=min_size, max_size=max_size, antialias=True)
5695
params = transform._get_params([])
5697
assert isinstance(params["size"], list) and len(params["size"]) == 1
5698
size = params["size"][0]
5700
assert min_size <= size < max_size
5703
@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, tv_tensors.Image))
5704
@pytest.mark.parametrize("label_type", (torch.Tensor, int))
5705
@pytest.mark.parametrize("dataset_return_type", (dict, tuple))
5706
@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImage))
5707
def test_classification_preset(image_type, label_type, dataset_return_type, to_tensor):
5709
image = tv_tensors.Image(torch.randint(0, 256, size=(1, 3, 250, 250), dtype=torch.uint8))
5710
if image_type is PIL.Image:
5711
image = to_pil_image(image[0])
5712
elif image_type is torch.Tensor:
5713
image = image.as_subclass(torch.Tensor)
5714
assert is_pure_tensor(image)
5716
label = 1 if label_type is int else torch.tensor([1])
5718
if dataset_return_type is dict:
5724
sample = image, label
5726
if to_tensor is transforms.ToTensor:
5727
with pytest.warns(UserWarning, match="deprecated and will be removed"):
5728
to_tensor = to_tensor()
5730
to_tensor = to_tensor()
5732
t = transforms.Compose(
5734
transforms.RandomResizedCrop((224, 224), antialias=True),
5735
transforms.RandomHorizontalFlip(p=1),
5736
transforms.RandAugment(),
5737
transforms.TrivialAugmentWide(),
5738
transforms.AugMix(),
5739
transforms.AutoAugment(),
5745
transforms.ConvertImageDtype(torch.float),
5746
transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1]),
5747
transforms.RandomErasing(p=1),
5753
assert type(out) == type(sample)
5755
if dataset_return_type is tuple:
5756
out_image, out_label = out
5758
assert out.keys() == sample.keys()
5759
out_image, out_label = out.values()
5761
assert out_image.shape[-2:] == (224, 224)
5762
assert out_label == label
5765
@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, tv_tensors.Image))
5766
@pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite"))
5767
@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImage))
5768
@pytest.mark.parametrize("sanitize", (True, False))
5769
def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
5770
torch.manual_seed(0)
5772
if to_tensor is transforms.ToTensor:
5773
with pytest.warns(UserWarning, match="deprecated and will be removed"):
5774
to_tensor = to_tensor()
5776
to_tensor = to_tensor()
5778
if data_augmentation == "hflip":
5780
transforms.RandomHorizontalFlip(p=1),
5782
transforms.ConvertImageDtype(torch.float),
5784
elif data_augmentation == "lsj":
5786
transforms.ScaleJitter(target_size=(1024, 1024), antialias=True),
5793
transforms.RandomCrop((1024, 1024), pad_if_needed=True),
5794
transforms.RandomHorizontalFlip(p=1),
5796
transforms.ConvertImageDtype(torch.float),
5798
elif data_augmentation == "multiscale":
5800
transforms.RandomShortestSize(
5801
min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333, antialias=True
5803
transforms.RandomHorizontalFlip(p=1),
5805
transforms.ConvertImageDtype(torch.float),
5807
elif data_augmentation == "ssd":
5809
transforms.RandomPhotometricDistort(p=1),
5810
transforms.RandomZoomOut(fill={"others": (123.0, 117.0, 104.0), tv_tensors.Mask: 0}, p=1),
5811
transforms.RandomIoUCrop(),
5812
transforms.RandomHorizontalFlip(p=1),
5814
transforms.ConvertImageDtype(torch.float),
5816
elif data_augmentation == "ssdlite":
5818
transforms.RandomIoUCrop(),
5819
transforms.RandomHorizontalFlip(p=1),
5821
transforms.ConvertImageDtype(torch.float),
5824
t += [transforms.SanitizeBoundingBoxes()]
5825
t = transforms.Compose(t)
5830
image = tv_tensors.Image(torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8))
5831
if image_type is PIL.Image:
5832
image = to_pil_image(image[0])
5833
elif image_type is torch.Tensor:
5834
image = image.as_subclass(torch.Tensor)
5835
assert is_pure_tensor(image)
5837
label = torch.randint(0, 10, size=(num_boxes,))
5839
boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4))
5840
boxes[:, 2:] += boxes[:, :2]
5841
boxes = boxes.clamp(min=0, max=min(H, W))
5842
boxes = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=(H, W))
5844
masks = tv_tensors.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8))
5855
if isinstance(to_tensor, transforms.ToTensor) and image_type is not tv_tensors.Image:
5856
assert is_pure_tensor(out["image"])
5858
assert isinstance(out["image"], tv_tensors.Image)
5859
assert isinstance(out["label"], type(sample["label"]))
5861
num_boxes_expected = {
5870
(True, "ssdlite"): 4,
5871
}.get((sanitize, data_augmentation), num_boxes)
5873
assert out["boxes"].shape[0] == out["masks"].shape[0] == out["label"].shape[0] == num_boxes_expected
5876
class TestSanitizeBoundingBoxes:
5877
def _get_boxes_and_valid_mask(self, H=256, W=128, min_size=10, min_area=10):
5878
boxes_and_validity = [
5879
([0, 1, 10, 1], False),
5880
([0, 1, 0, 20], False),
5881
([0, 0, min_size - 1, 10], False),
5882
([0, 0, 10, min_size - 1], False),
5883
([0, 0, 10, H + 1], False),
5884
([0, 0, W + 1, 10], False),
5885
([-1, 1, 10, 20], False),
5886
([0, 0, -1, 20], False),
5887
([0, 0, -10, -1], False),
5888
([0, 0, min_size, 10], min_size * 10 >= min_area),
5889
([0, 0, 10, min_size], min_size * 10 >= min_area),
5890
([0, 0, W, H], W * H >= min_area),
5891
([1, 1, 30, 20], 29 * 19 >= min_area),
5892
([0, 0, 10, 10], 9 * 9 >= min_area),
5893
([1, 1, 30, 20], 29 * 19 >= min_area),
5896
random.shuffle(boxes_and_validity)
5897
boxes, expected_valid_mask = zip(*boxes_and_validity)
5898
boxes = tv_tensors.BoundingBoxes(
5900
format=tv_tensors.BoundingBoxFormat.XYXY,
5904
return boxes, expected_valid_mask
5906
@pytest.mark.parametrize("min_size, min_area", ((1, 1), (10, 1), (10, 101)))
5907
@pytest.mark.parametrize(
5911
lambda inputs: inputs["labels"],
5912
lambda inputs: (inputs["labels"], inputs["other_labels"]),
5913
lambda inputs: [inputs["labels"], inputs["other_labels"]],
5915
lambda inputs: None,
5918
@pytest.mark.parametrize("sample_type", (tuple, dict))
5919
def test_transform(self, min_size, min_area, labels_getter, sample_type):
5921
if sample_type is tuple and not isinstance(labels_getter, str):
5927
boxes, expected_valid_mask = self._get_boxes_and_valid_mask(H=H, W=W, min_size=min_size, min_area=min_area)
5928
valid_indices = [i for (i, is_valid) in enumerate(expected_valid_mask) if is_valid]
5930
labels = torch.arange(boxes.shape[0])
5931
masks = tv_tensors.Mask(torch.randint(0, 2, size=(boxes.shape[0], H, W)))
5934
other_labels = torch.arange(boxes.shape[0])
5935
whatever = torch.rand(10)
5936
input_img = torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8)
5941
"other_labels": other_labels,
5942
"whatever": whatever,
5947
if sample_type is tuple:
5948
img = sample.pop("image")
5949
sample = (img, sample)
5951
out = transforms.SanitizeBoundingBoxes(min_size=min_size, min_area=min_area, labels_getter=labels_getter)(
5955
if sample_type is tuple:
5957
out_labels = out[1]["labels"]
5958
out_other_labels = out[1]["other_labels"]
5959
out_boxes = out[1]["boxes"]
5960
out_masks = out[1]["masks"]
5961
out_whatever = out[1]["whatever"]
5963
out_image = out["image"]
5964
out_labels = out["labels"]
5965
out_other_labels = out["other_labels"]
5966
out_boxes = out["boxes"]
5967
out_masks = out["masks"]
5968
out_whatever = out["whatever"]
5970
assert out_image is input_img
5971
assert out_whatever is whatever
5973
assert isinstance(out_boxes, tv_tensors.BoundingBoxes)
5974
assert isinstance(out_masks, tv_tensors.Mask)
5976
if labels_getter is None or (callable(labels_getter) and labels_getter(sample) is None):
5977
assert out_labels is labels
5978
assert out_other_labels is other_labels
5980
assert isinstance(out_labels, torch.Tensor)
5981
assert out_boxes.shape[0] == out_labels.shape[0] == out_masks.shape[0]
5983
assert out_labels.tolist() == valid_indices
5985
if callable(labels_getter) and isinstance(labels_getter(sample), (tuple, list)):
5986
assert_equal(out_other_labels, out_labels)
5988
assert_equal(out_other_labels, other_labels)
5990
@pytest.mark.parametrize("input_type", (torch.Tensor, tv_tensors.BoundingBoxes))
5991
def test_functional(self, input_type):
5995
H, W, min_size = 256, 128, 10
5997
boxes, expected_valid_mask = self._get_boxes_and_valid_mask(H=H, W=W, min_size=min_size)
5999
if input_type is tv_tensors.BoundingBoxes:
6000
format = canvas_size = None
6003
format, canvas_size = "XYXY", boxes.canvas_size
6004
boxes = boxes.as_subclass(torch.Tensor)
6006
boxes, valid = F.sanitize_bounding_boxes(boxes, format=format, canvas_size=canvas_size, min_size=min_size)
6008
assert_equal(valid, torch.tensor(expected_valid_mask))
6009
assert type(valid) == torch.Tensor
6010
assert boxes.shape[0] == sum(valid)
6011
assert isinstance(boxes, input_type)
6013
def test_kernel(self):
6014
H, W, min_size = 256, 128, 10
6015
boxes, _ = self._get_boxes_and_valid_mask(H=H, W=W, min_size=min_size)
6017
format, canvas_size = boxes.format, boxes.canvas_size
6018
boxes = boxes.as_subclass(torch.Tensor)
6021
F.sanitize_bounding_boxes,
6024
canvas_size=canvas_size,
6025
check_batched_vs_unbatched=False,
6028
def test_no_label(self):
6032
boxes = make_bounding_boxes()
6034
with pytest.raises(ValueError, match="or a two-tuple whose second item is a dict"):
6035
transforms.SanitizeBoundingBoxes()(img, boxes)
6037
out_img, out_boxes = transforms.SanitizeBoundingBoxes(labels_getter=None)(img, boxes)
6038
assert isinstance(out_img, tv_tensors.Image)
6039
assert isinstance(out_boxes, tv_tensors.BoundingBoxes)
6041
def test_errors_transform(self):
6042
good_bbox = tv_tensors.BoundingBoxes(
6044
format=tv_tensors.BoundingBoxFormat.XYXY,
6045
canvas_size=(20, 20),
6048
with pytest.raises(ValueError, match="min_size must be >= 1"):
6049
transforms.SanitizeBoundingBoxes(min_size=0)
6050
with pytest.raises(ValueError, match="min_area must be >= 1"):
6051
transforms.SanitizeBoundingBoxes(min_area=0)
6052
with pytest.raises(ValueError, match="labels_getter should either be 'default'"):
6053
transforms.SanitizeBoundingBoxes(labels_getter=12)
6055
with pytest.raises(ValueError, match="Could not infer where the labels are"):
6056
bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])}
6057
transforms.SanitizeBoundingBoxes()(bad_labels_key)
6059
with pytest.raises(ValueError, match="must be a tensor"):
6060
not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()}
6061
transforms.SanitizeBoundingBoxes()(not_a_tensor)
6063
with pytest.raises(ValueError, match="Number of boxes"):
6064
different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)}
6065
transforms.SanitizeBoundingBoxes()(different_sizes)
6067
def test_errors_functional(self):
6069
good_bbox = tv_tensors.BoundingBoxes(
6071
format=tv_tensors.BoundingBoxFormat.XYXY,
6072
canvas_size=(20, 20),
6075
with pytest.raises(ValueError, match="canvas_size cannot be None if bounding_boxes is a pure tensor"):
6076
F.sanitize_bounding_boxes(good_bbox.as_subclass(torch.Tensor), format="XYXY", canvas_size=None)
6078
with pytest.raises(ValueError, match="canvas_size cannot be None if bounding_boxes is a pure tensor"):
6079
F.sanitize_bounding_boxes(good_bbox.as_subclass(torch.Tensor), format=None, canvas_size=(10, 10))
6081
with pytest.raises(ValueError, match="canvas_size must be None when bounding_boxes is a tv_tensors"):
6082
F.sanitize_bounding_boxes(good_bbox, format="XYXY", canvas_size=None)
6084
with pytest.raises(ValueError, match="canvas_size must be None when bounding_boxes is a tv_tensors"):
6085
F.sanitize_bounding_boxes(good_bbox, format="XYXY", canvas_size=None)
6087
with pytest.raises(ValueError, match="bounding_boxes must be a tv_tensors.BoundingBoxes instance or a"):
6088
F.sanitize_bounding_boxes(good_bbox.tolist())
6092
@pytest.mark.parametrize("quality", [5, 75])
6093
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
6094
def test_kernel_image(self, quality, color_space):
6095
check_kernel(F.jpeg_image, make_image(color_space=color_space), quality=quality)
6097
def test_kernel_video(self):
6098
check_kernel(F.jpeg_video, make_video(), quality=5)
6100
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
6101
def test_functional(self, make_input):
6102
check_functional(F.jpeg, make_input(), quality=5)
6104
@pytest.mark.parametrize(
6105
("kernel", "input_type"),
6107
(F.jpeg_image, torch.Tensor),
6108
(F._augment._jpeg_image_pil, PIL.Image.Image),
6109
(F.jpeg_image, tv_tensors.Image),
6110
(F.jpeg_video, tv_tensors.Video),
6113
def test_functional_signature(self, kernel, input_type):
6114
check_functional_kernel_signature_match(F.jpeg, kernel=kernel, input_type=input_type)
6116
@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
6117
@pytest.mark.parametrize("quality", [5, (10, 20)])
6118
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
6119
def test_transform(self, make_input, quality, color_space):
6120
check_transform(transforms.JPEG(quality=quality), make_input(color_space=color_space))
6122
@pytest.mark.parametrize("quality", [5])
6123
def test_functional_image_correctness(self, quality):
6124
image = make_image()
6126
actual = F.jpeg(image, quality=quality)
6127
expected = F.to_image(F.jpeg(F.to_pil_image(image), quality=quality))
6130
torch.testing.assert_close(actual, expected, rtol=0, atol=1)
6132
@pytest.mark.parametrize("quality", [5, (10, 20)])
6133
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
6134
@pytest.mark.parametrize("seed", list(range(5)))
6135
def test_transform_image_correctness(self, quality, color_space, seed):
6136
image = make_image(color_space=color_space)
6138
transform = transforms.JPEG(quality=quality)
6140
with freeze_rng_state():
6141
torch.manual_seed(seed)
6142
actual = transform(image)
6144
torch.manual_seed(seed)
6145
expected = F.to_image(transform(F.to_pil_image(image)))
6147
torch.testing.assert_close(actual, expected, rtol=0, atol=1)
6149
@pytest.mark.parametrize("quality", [5, (10, 20)])
6150
@pytest.mark.parametrize("seed", list(range(10)))
6151
def test_transform_get_params_bounds(self, quality, seed):
6152
transform = transforms.JPEG(quality=quality)
6154
with freeze_rng_state():
6155
torch.manual_seed(seed)
6156
params = transform._get_params([])
6158
if isinstance(quality, int):
6159
assert params["quality"] == quality
6161
assert quality[0] <= params["quality"] <= quality[1]
6163
@pytest.mark.parametrize("quality", [[0], [0, 0, 0]])
6164
def test_transform_sequence_len_error(self, quality):
6165
with pytest.raises(ValueError, match="quality should be a sequence of length 2"):
6166
transforms.JPEG(quality=quality)
6168
@pytest.mark.parametrize("quality", [-1, 0, 150])
6169
def test_transform_invalid_quality_error(self, quality):
6170
with pytest.raises(ValueError, match="quality must be an integer from 1 to 100"):
6171
transforms.JPEG(quality=quality)