1
# Owner(s): ["module: nn"]
6
from itertools import product
9
import torch.autograd.forward_ad as fwAD
10
import torch.backends.cudnn as cudnn
12
import torch.nn.functional as F
13
from torch.testing import make_tensor
14
from torch.testing._internal.common_cuda import (
20
from torch.testing._internal.common_device_type import (
25
instantiate_device_type_tests,
29
onlyNativeDeviceTypes,
32
skipCUDAIfCudnnVersionLessThan,
36
skipCUDAIfNotMiopenSuggestNHWC,
38
skipCUDAIfRocmVersionLessThan,
41
from torch.testing._internal.common_dtype import (
42
floating_and_complex_types_and,
45
from torch.testing._internal.common_nn import _test_module_empty_input, NNTestCase
46
from torch.testing._internal.common_utils import (
52
instantiate_parametrized_tests,
53
parametrize as parametrize_test,
56
skipIfNotMiopenSuggestNHWC,
57
skipIfRocmVersionLessThan,
64
AMPERE_OR_ROCM = TEST_WITH_ROCM or tf32_is_not_fp32()
72
class TestConvolutionNN(NNTestCase):
73
_do_cuda_memory_leak_check = True
74
_do_cuda_non_default_stream = True
76
def test_conv_backcompat(self):
77
from torch.serialization import SourceChangeWarning
79
# This file was generated by running on PyTorch 1.0.1 on Python 2:
82
# from torch import nn
83
# m = nn.Conv2d(1, 1, 1)
84
# torch.save(m, 'legacy_conv2d.pt')
86
# NB: This Pickle also contains some Unicode data!
87
path = download_file("https://download.pytorch.org/test_data/legacy_conv2d.pt")
88
with warnings.catch_warnings():
89
warnings.simplefilter("ignore", SourceChangeWarning)
90
# weights_only=False as this is legacy code that saves the model
91
m = torch.load(path, encoding="utf-8", weights_only=False)
92
input = torch.randn((1, 1, 1, 1), dtype=torch.float)
93
self.assertEqual(m(input).size(), (1, 1, 1, 1))
95
def test_invalid_conv1d(self):
105
in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=True
107
input = torch.randn(1, 3, 4).to(dtype)
108
with self.assertRaisesRegex(
110
r"Calculated padded input size per channel: \(4\). "
111
+ r"Kernel size: \(10\). Kernel size can\'t be greater than actual input size",
115
# Negative stride check
117
in_channels=3, out_channels=6, kernel_size=3, stride=-1, bias=True
119
input = torch.randn(1, 3, 4).to(dtype)
120
with self.assertRaisesRegex(
121
RuntimeError, "non-positive stride is not supported"
125
def test_mismatch_shape_conv2d(self):
126
for dtype in (torch.float, torch.cfloat):
127
x = torch.randn(1, 10, 1, 28, 28, dtype=dtype)
128
w = torch.randn(6, 1, 5, 5, dtype=dtype)
130
with self.assertRaisesRegex(
132
r"Expected 3D \(unbatched\) or 4D \(batched\) input to conv2d, but got "
133
+ r"input of size: \[1, 10, 1, 28, 28\]",
137
def test_conv2d_discontiguous_weight(self):
138
for dtype in (torch.float, torch.cfloat):
139
# Test for https://github.com/pytorch/pytorch/issues/55781
140
x = torch.ones(64, 16, 16, 16, dtype=dtype)
142
torch.arange(0, 1.0, 1 / 2.0**10)
143
.reshape(32, 16, 1, 2)
144
.to(dtype)[:, :, :, ::2]
146
self.assertFalse(weight.is_contiguous())
147
y = torch.nn.functional.conv2d(x, weight, None)
148
if torch.backends.mkldnn.is_available():
149
# Disable MKLDNN explicitly, so that either NNPACK or THCNN will be used
150
with torch.backends.mkldnn.flags(enabled=False):
151
y_ = torch.nn.functional.conv2d(x, weight, None)
152
self.assertEqual(y, y_)
153
self.assertEqual(y.sum(), 4186112.0)
155
def test_invalid_conv2d(self):
164
module = torch.nn.Conv2d(1, 1, kernel_size=3, dilation=2, stride=2).to(
167
input = torch.empty(1, 1, 4, 4).to(dtype)
168
self.assertRaises(RuntimeError, lambda: module(input))
171
in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=True
173
input = torch.randn(1, 3, 1, 1)
174
with self.assertRaisesRegex(
176
r"Calculated padded input size per channel: \(1 x 1\). "
177
+ r"Kernel size: \(10 x 10\). Kernel size can\'t be greater than actual input size",
181
# Negative stride check
183
in_channels=3, out_channels=6, kernel_size=4, stride=-1, bias=True
185
input = torch.randn(1, 3, 4, 4).to(dtype)
186
with self.assertRaisesRegex(
187
RuntimeError, "non-positive stride is not supported"
193
in_channels=3, out_channels=6, kernel_size=4, stride=0, bias=True
195
input = torch.randn(1, 3, 4, 4).to(dtype)
196
with self.assertRaisesRegex(
197
RuntimeError, "non-positive stride is not supported"
201
def test_invalid_conv3d(self):
210
module = torch.nn.Conv3d(1, 1, kernel_size=3, dilation=2, stride=2).to(
213
input = torch.empty(1, 1, 4, 4, 4).to(dtype)
214
self.assertRaises(RuntimeError, lambda: module(input))
216
# Negative stride check
217
module = torch.nn.Conv3d(1, 1, kernel_size=3, stride=-2)
218
input = torch.empty(1, 1, 4, 4, 4)
219
with self.assertRaisesRegex(
220
RuntimeError, "non-positive stride is not supported"
224
def test_conv_invalid_groups(self):
225
with self.assertRaisesRegex(ValueError, "groups must be a positive integer"):
226
torch.nn.Conv1d(1, 1, kernel_size=3, dilation=2, stride=2, groups=0)
227
with self.assertRaisesRegex(ValueError, "groups must be a positive integer"):
228
torch.nn.Conv2d(1, 1, kernel_size=3, dilation=2, stride=2, groups=-1)
229
with self.assertRaisesRegex(ValueError, "groups must be a positive integer"):
230
torch.nn.Conv3d(1, 1, kernel_size=3, dilation=2, stride=2, groups=-2)
232
def test_Conv1d_module_same_padding(self):
233
# Compare module against functional: without strides/dilation, asymmetric padding
234
x = torch.rand(1, 1, 20)
236
in_channels=1, out_channels=1, kernel_size=10, padding="same"
238
expect = F.conv1d(x, module.weight, module.bias, padding="same")
239
self.assertEqual(expect, module(x))
241
# Test dilation, symmetric padding
243
in_channels=1, out_channels=1, kernel_size=10, padding="same", dilation=2
245
expect = F.conv1d(x, module.weight, module.bias, padding="same", dilation=2)
246
self.assertEqual(expect, module(x))
248
# Test non-zero padding_mode, requiring explicit padding
254
padding_mode="replicate",
256
x_padded = F.pad(x, [4, 5], mode="replicate")
257
expect = F.conv1d(x_padded, module.weight, module.bias, padding="valid")
258
self.assertEqual(expect, module(x))
259
self.assertEqual(x.size(), expect.size())
261
# Test connstruction with invalid padding string raises
262
with self.assertRaisesRegex(ValueError, "Invalid padding string"):
264
in_channels=3, out_channels=33, kernel_size=10, padding="foo"
267
# Test connstruction with same padding and strides raises
268
with self.assertRaisesRegex(ValueError, "padding='same'"):
270
in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=2
273
def test_Conv2d_module_same_padding(self):
274
# Compare module against functional:
275
# without strides/dilation, both symmetric and asymmetric padding
276
x = torch.rand(1, 1, 9, 20)
278
in_channels=1, out_channels=1, kernel_size=(5, 10), padding="same"
280
expect = F.conv2d(x, module.weight, module.bias, padding="same")
281
self.assertEqual(expect, module(x))
283
# with dilation, symmetric padding
292
x, module.weight, module.bias, padding="same", dilation=(1, 2)
294
self.assertEqual(expect, module(x))
296
# Test non-zero padding_mode, requiring explicit padding
302
padding_mode="reflect",
304
x_padded = F.pad(x, [1, 2, 1, 1], mode="reflect")
305
expect = F.conv2d(x_padded, module.weight, module.bias, padding="valid")
306
self.assertEqual(expect, module(x))
307
self.assertEqual(x.size(), expect.size())
309
# Test connstruction with invalid padding string raises
310
with self.assertRaisesRegex(ValueError, "Invalid padding string"):
312
in_channels=3, out_channels=33, kernel_size=10, padding="foo"
315
# Test connstruction with same padding and strides raises
316
with self.assertRaisesRegex(ValueError, "padding='same'"):
318
in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=2
320
with self.assertRaisesRegex(ValueError, "padding='same'"):
328
with self.assertRaisesRegex(ValueError, "padding='same'"):
337
def test_Conv3d_module_same_padding(self):
338
# Compare module against functional:
339
x = torch.rand(1, 1, 4, 4, 4)
340
# without dilation, both symmetric and asymmetric padding
342
in_channels=1, out_channels=1, kernel_size=(2, 3, 4), padding="same"
344
expect = F.conv3d(x, module.weight, module.bias, padding="same")
345
self.assertEqual(expect, module(x))
347
# with dilation, both symmetric and asymmetric padding
351
kernel_size=(2, 3, 4),
356
x, module.weight, module.bias, padding="same", dilation=(3, 2, 1)
358
self.assertEqual(expect, module(x))
360
# Test non-zero padding_mode, requiring explicit padding
364
kernel_size=(2, 3, 4),
366
padding_mode="circular",
368
x_padded = F.pad(x, [1, 2, 1, 1, 0, 1], mode="circular")
369
expect = F.conv3d(x_padded, module.weight, module.bias, padding="valid")
370
self.assertEqual(expect, module(x))
371
self.assertEqual(x.size(), expect.size())
373
# Test connstruction with invalid padding string raises
374
with self.assertRaisesRegex(ValueError, "Invalid padding string"):
376
in_channels=3, out_channels=33, kernel_size=10, padding="foo"
379
# Test connstruction with same padding and strides raises
380
with self.assertRaisesRegex(ValueError, "padding='same'"):
382
in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=2
384
with self.assertRaisesRegex(ValueError, "padding='same'"):
392
with self.assertRaisesRegex(ValueError, "padding='same'"):
400
with self.assertRaisesRegex(ValueError, "padding='same'"):
409
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
410
def test_thnn_conv_strided_padded_dilated(self):
411
for convfn, dims, transposed in (
412
(torch.nn.functional.conv2d, 2, False),
413
(torch.nn.functional.conv_transpose2d, 2, True),
414
(torch.nn.functional.conv3d, 3, False),
415
(torch.nn.functional.conv_transpose3d, 3, True),
417
for stride, padding, dilation in (
423
kwargs = {"stride": stride, "padding": padding, "dilation": dilation}
424
inp_shape = (1, 2) + dims * (4,)
425
weight_shape = (2, 2) + dims * (1,)
426
inputs = torch.randn(
427
inp_shape, dtype=torch.double, device="cuda", requires_grad=True
429
weight = torch.randn(
430
weight_shape, dtype=torch.double, device="cuda", requires_grad=True
433
2, dtype=torch.double, device="cuda", requires_grad=True
435
with torch.backends.cudnn.flags(enabled=False):
436
res = convfn(inputs, weight, bias, **kwargs)
437
res_cpu = convfn(inputs.cpu(), weight.cpu(), bias.cpu(), **kwargs)
438
self.assertEqual(res, res_cpu)
439
with torch.backends.cudnn.flags(enabled=False):
440
torch.autograd.gradcheck(
441
lambda x, w, b: convfn(x, w, b, **kwargs),
442
(inputs, weight, bias),
444
torch.autograd.gradcheck(
445
lambda x, w, b: convfn(x, w, b, **kwargs),
446
(inputs.cpu(), weight.cpu(), bias.cpu()),
449
def test_Conv2d_inconsistent_types(self):
450
inputs = torch.randn(4, 1, 7, 7, dtype=torch.float)
451
weights = torch.randn(1, 1, 3, 3, dtype=torch.double)
452
# inconsistent types should raise an exception
453
self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
454
# but it should work with the same type
455
nn.functional.conv2d(inputs.float(), weights.float())
457
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
458
def test_Conv2d_inconsistent_types_on_GPU_without_cudnn(self):
459
inputs = torch.randn(4, 1, 7, 7, dtype=torch.float, device="cuda")
460
weights = torch.randn(1, 1, 3, 3, dtype=torch.double, device="cuda")
461
bias = torch.randn(1, dtype=torch.double, device="cuda")
463
with torch.backends.cudnn.flags(enabled=False):
464
# inconsistent types should raise an exception
466
RuntimeError, lambda: nn.functional.conv2d(inputs, weights)
470
lambda: nn.functional.conv2d(inputs, weights.float(), bias),
473
# but it should work with the same type
474
nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
476
def test_Conv2d_1x1(self):
479
mod = torch.nn.Conv2d(2, 2, 1, bias=False).to(dtype=torch.double)
481
1, in_channels, 5, 5, requires_grad=True, dtype=torch.double
483
for enabled in (False, True):
484
with torch.backends.mkldnn.flags(enabled=enabled):
485
gradcheck(F.conv2d, (input, mod.weight))
487
def test_Conv2d_OneDNN(self):
488
def run_once(group_val=24, dilation=1):
489
ifm = torch.ones([1, group_val, 6, 6], dtype=torch.float32)
490
weights = torch.ones([group_val, 1, 3, 3], dtype=torch.float32)
491
op = torch.nn.Conv2d(
492
in_channels=group_val,
493
out_channels=group_val,
497
dilation=[dilation, dilation],
500
padding_mode="zeros",
503
op.weight.data = weights
505
grad_in = torch.ones(res.shape, dtype=torch.float32)
506
res.backward(grad_in)
507
return op.weight.grad
509
for gorup_val in (24, 48, 23, 25):
510
for dilation in (1, 2):
511
with torch.backends.mkldnn.flags(enabled=False):
512
without_onednn = run_once(gorup_val, dilation)
514
with torch.backends.mkldnn.flags(enabled=True):
515
with_onednn = run_once(gorup_val, dilation)
517
self.assertEqual(without_onednn, with_onednn)
519
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
520
@unittest.skipIf(not TEST_CUDNN, "CUDNN not available")
521
def test_cudnn_non_contiguous(self):
522
x = torch.randn(192, 16, 50).cuda()
523
x = x.permute(0, 2, 1).contiguous().permute(0, 2, 1)
525
in_channels=16, out_channels=32, kernel_size=2, bias=True
529
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
530
@unittest.skipIf(not TEST_CUDNN, "CUDNN not available")
531
def test_cudnn_not_mutate_stride(self):
532
weight = torch.randn(64, 64, 1, 1)
533
x = torch.randn(2, 64, 10, 10).to(memory_format=torch.channels_last)
534
weight_stride = weight.stride()
537
return torch.convolution(
544
output_padding=(0, 0),
549
# should have run in nhwc without mutating input strides
550
out_nhwc = conv(x, weight)
551
self.assertEqual(weight.stride(), weight_stride)
552
self.assertTrue(out_nhwc.is_contiguous(memory_format=torch.channels_last))
554
x = x.contiguous(memory_format=torch.contiguous_format)
555
out_c = conv(x, weight)
556
self.assertTrue(out_c.is_contiguous(memory_format=torch.contiguous_format))
557
self.assertEqual(out_c, out_nhwc)
558
self.assertEqual(weight.stride(), weight_stride)
560
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
561
@unittest.skipIf(not TEST_CUDNN, "CUDNN not available")
562
def test_Conv2d_inconsistent_types_on_GPU_with_cudnn(self):
563
inputs = torch.randn(4, 1, 7, 7, dtype=torch.float, device="cuda")
564
weights = torch.randn(1, 1, 3, 3, dtype=torch.double, device="cuda")
565
bias = torch.randn(1, dtype=torch.double, device="cuda")
567
with torch.backends.cudnn.flags(enabled=True):
568
# inconsistent types should raise an exception
570
RuntimeError, lambda: nn.functional.conv2d(inputs, weights)
574
lambda: nn.functional.conv2d(inputs, weights.float(), bias),
577
# but it should work with the same type
578
nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
580
def test_Conv2d_missing_argument(self):
581
c = nn.Conv2d(3, 3, 3)
582
self.assertRaises(TypeError, lambda: c(None))
584
def test_Conv2d_backward_twice(self):
585
input = torch.randn(2, 3, 5, 5)
586
c = nn.Conv2d(3, 3, 3)
589
self.assertRaisesRegex(
590
RuntimeError, "Specify retain_graph=True", lambda: o1.sum().backward()
593
def test_conv_modules_raise_error_on_incorrect_input_size(self):
594
for dtype in [torch.half, torch.bfloat16, torch.double, torch.float]:
596
nn.Conv1d(3, 8, 3).to(dtype),
597
nn.ConvTranspose1d(3, 8, 3).to(dtype),
598
nn.Conv2d(3, 8, 3).to(dtype),
599
nn.ConvTranspose2d(3, 8, 3).to(dtype),
600
nn.Conv3d(3, 8, 3).to(dtype),
601
nn.ConvTranspose3d(3, 8, 3).to(dtype),
604
invalid_input_dims = [(1, 4), (1, 4), (2, 5), (2, 5), (3, 6), (3, 6)]
606
for invalid_dims, module in zip(invalid_input_dims, modules):
607
for dims in invalid_dims:
608
input = torch.empty(torch.Size((3,) * dims))
609
self.assertRaises(RuntimeError, lambda: module(input))
611
def test_conv_shapecheck(self):
612
def test(should_raise, module, input_size, dtype):
613
input = torch.empty(3, *input_size).to(dtype)
615
self.assertRaises(RuntimeError, lambda: module(input))
617
# just run it to ensure no exception raised.
629
test(True, nn.Conv1d(1, 1, 3).to(dtype), (1, 2), dtype)
630
test(True, nn.Conv1d(1, 1, 3, stride=2).to(dtype), (1, 2), dtype)
631
test(False, nn.Conv1d(1, 1, 2).to(dtype), (1, 2), dtype)
632
test(False, nn.Conv1d(1, 1, 2, stride=2).to(dtype), (1, 2), dtype)
634
False, nn.Conv1d(1, 1, 3, stride=2, padding=1).to(dtype), (1, 2), dtype
638
test(True, nn.Conv2d(1, 1, (3, 3)).to(dtype), (1, 2, 2), dtype)
639
test(False, nn.Conv2d(1, 1, (3, 3)).to(dtype), (1, 3, 3), dtype)
640
test(False, nn.Conv2d(1, 1, (3, 3), padding=1).to(dtype), (1, 2, 2), dtype)
643
test(True, nn.Conv3d(1, 1, (3, 3, 3)).to(dtype), (1, 2, 2, 2), dtype)
644
test(False, nn.Conv3d(1, 1, (3, 3, 3)).to(dtype), (1, 3, 3, 3), dtype)
647
nn.Conv3d(1, 1, (3, 3, 3), padding=1).to(dtype),
652
def test_ConvTranspose2d_output_size(self):
653
m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2)
654
i = torch.randn(2, 3, 6, 6)
655
for h in range(15, 22):
656
for w in range(15, 22):
657
if 18 <= h <= 20 and 18 <= w <= 20:
658
output = m(i, output_size=(h, w))
659
self.assertEqual(output.size()[2:], (h, w))
661
self.assertRaises(ValueError, lambda: m(i, (h, w)))
663
def test_ConvTranspose2d_output_size_downsample_upsample(self):
664
b, c, hid_c = 2, 3, 2
665
for h in range(13, 24):
666
for w in range(13, 17):
667
for k in range(2, 5):
668
for d in range(1, 5):
669
for s in range(1, 4):
680
t_conv = nn.ConvTranspose2d(
689
i = torch.randn(b, c, h, w)
691
out = t_conv(conv(i), output_size=i.shape)
693
self.assertEqual(out.size()[2:], i.size()[2:])
695
def test_ConvTranspose3d_correct_output_size(self):
696
# Check that ConvTranspose3d can take a 5d output_size.
697
m = nn.ConvTranspose3d(2, 2, 2)
698
i = torch.rand(1, 2, 1, 1, 1)
699
out = m(i, output_size=(1, 2, 2, 2, 2))
701
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
702
def test_ConvTranspose2d_half_cublas_gemm(self):
703
with torch.backends.cudnn.flags(enabled=False):
704
inputs = torch.randn(1, 1, 16, 16, device="cuda", dtype=torch.half)
706
nn.ConvTranspose2d(1, 1, 3, stride=2, padding=1, output_padding=1)
710
output = deconv(inputs)
711
output.mean().backward()
713
# For https://github.com/pytorch/pytorch/pull/1273
714
# Almost identical to the above `test_Conv2d_naive_groups`
715
@torch.backends.cudnn.flags(enabled=True, benchmark=False)
716
@unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7")
717
def test_Conv2d_groups_nobias(self):
718
dev_dtypes = [("cpu", torch.float)]
720
dev_dtypes += [("cuda", torch.float), ("cuda", torch.half)]
722
dev_dtypes += [("cuda", torch.bfloat16)]
723
for device, dtype in dev_dtypes:
724
m = nn.Conv2d(4, 4, kernel_size=3, groups=2, bias=False).to(device, dtype)
725
i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True)
727
grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype)
728
output.backward(grad_output)
730
m1 = nn.Conv2d(2, 2, kernel_size=3, bias=False).to(device, dtype)
731
m1.weight.data.copy_(m.weight.data[:2])
732
i1 = i.data[:, :2].contiguous().requires_grad_(True)
734
output1.backward(grad_output[:, :2].contiguous())
736
m2 = nn.Conv2d(2, 2, kernel_size=3, bias=False).to(device, dtype)
737
m2.weight.data.copy_(m.weight.data[2:])
738
i2 = i.data[:, 2:].contiguous().requires_grad_(True)
740
output2.backward(grad_output[:, 2:].contiguous())
742
self.assertEqual(output, torch.cat([output1, output2], 1))
745
torch.cat([i1.grad.data, i2.grad.data], 1),
746
atol=dtype2prec_DONTUSE[dtype],
751
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
752
atol=1e-1 if dtype == torch.half else dtype2prec_DONTUSE[dtype],
756
# Almost identical to the above `test_Conv2d_naive_groups`
757
# Covering special case when group > 1, input-channel / group < 16 and output-channel is multiple of 16
758
# See also https://github.com/pytorch/pytorch/pull/18463#issuecomment-476563686
759
# and https://github.com/pytorch/pytorch/pull/18463#issuecomment-477001024
760
@torch.backends.cudnn.flags(enabled=True, benchmark=False)
761
@unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7")
762
def test_Conv2d_groups_nobias_v2(self):
763
torch.manual_seed(123)
764
dev_dtypes = [("cpu", torch.float)]
766
dev_dtypes += [("cuda", torch.float), ("cuda", torch.half)]
768
dev_dtypes += [("cuda", torch.bfloat16)]
769
for device, dtype in dev_dtypes:
770
m = nn.Conv2d(4, 16, kernel_size=3, groups=2, bias=False).to(device, dtype)
771
i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True)
773
grad_output = torch.randn(2, 16, 4, 4, device=device, dtype=dtype)
774
output.backward(grad_output)
776
m1 = nn.Conv2d(2, 8, kernel_size=3, bias=False).to(device, dtype)
777
m1.weight.data.copy_(m.weight.data[:8])
778
i1 = i.data[:, :2].contiguous().requires_grad_(True)
780
output1.backward(grad_output[:, :8].contiguous())
782
m2 = nn.Conv2d(2, 8, kernel_size=3, bias=False).to(device, dtype)
783
m2.weight.data.copy_(m.weight.data[8:])
784
i2 = i.data[:, 2:].contiguous().requires_grad_(True)
786
output2.backward(grad_output[:, 8:].contiguous())
788
self.assertEqual(output, torch.cat([output1, output2], 1))
791
torch.cat([i1.grad.data, i2.grad.data], 1),
792
atol=dtype2prec_DONTUSE[dtype],
797
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
798
atol=1e-1 if dtype == torch.half else dtype2prec_DONTUSE[dtype],
802
# CPU-only test for group conv3d fast implementation using bmm
803
# See: https://github.com/pytorch/pytorch/pull/36355
804
def test_Conv3d_groups_nobias(self):
805
torch.manual_seed(123)
806
m = nn.Conv3d(4, 16, kernel_size=3, groups=2, bias=False).to("cpu", torch.float)
808
2, 4, 6, 6, 6, device="cpu", dtype=torch.float, requires_grad=True
811
grad_output = torch.randn(2, 16, 4, 4, 4, device="cpu", dtype=torch.float)
812
output.backward(grad_output)
814
m1 = nn.Conv3d(2, 8, kernel_size=3, bias=False).to("cpu", torch.float)
815
m1.weight.data.copy_(m.weight.data[:8])
816
i1 = i.data[:, :2].contiguous().requires_grad_(True)
818
output1.backward(grad_output[:, :8].contiguous())
820
m2 = nn.Conv3d(2, 8, kernel_size=3, bias=False).to("cpu", torch.float)
821
m2.weight.data.copy_(m.weight.data[8:])
822
i2 = i.data[:, 2:].contiguous().requires_grad_(True)
824
output2.backward(grad_output[:, 8:].contiguous())
826
self.assertEqual(output, torch.cat([output1, output2], 1))
829
torch.cat([i1.grad.data, i2.grad.data], 1),
830
atol=dtype2prec_DONTUSE[torch.float],
835
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
836
atol=dtype2prec_DONTUSE[torch.float],
837
rtol=dtype2prec_DONTUSE[torch.float],
840
def test_Conv3d_groups_wbias(self):
841
torch.manual_seed(123)
842
m = nn.Conv3d(4, 16, kernel_size=3, groups=2, bias=True).to("cpu", torch.float)
844
2, 4, 6, 6, 6, device="cpu", dtype=torch.float, requires_grad=True
847
grad_output = torch.randn(2, 16, 4, 4, 4, device="cpu", dtype=torch.float)
848
output.backward(grad_output)
850
m1 = nn.Conv3d(2, 8, kernel_size=3, bias=True).to("cpu", torch.float)
851
m1.weight.data.copy_(m.weight.data[:8])
852
m1.bias.data.copy_(m.bias.data[:8])
853
i1 = i.data[:, :2].contiguous().requires_grad_(True)
855
output1.backward(grad_output[:, :8].contiguous())
857
m2 = nn.Conv3d(2, 8, kernel_size=3, bias=True).to("cpu", torch.float)
858
m2.weight.data.copy_(m.weight.data[8:])
859
m2.bias.data.copy_(m.bias.data[8:])
860
i2 = i.data[:, 2:].contiguous().requires_grad_(True)
862
output2.backward(grad_output[:, 8:].contiguous())
864
self.assertEqual(output, torch.cat([output1, output2], 1))
867
torch.cat([i1.grad.data, i2.grad.data], 1),
868
atol=dtype2prec_DONTUSE[torch.float],
869
rtol=dtype2prec_DONTUSE[torch.float],
873
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
874
atol=dtype2prec_DONTUSE[torch.float],
875
rtol=dtype2prec_DONTUSE[torch.float],
879
torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0),
880
atol=dtype2prec_DONTUSE[torch.float],
881
rtol=dtype2prec_DONTUSE[torch.float],
884
def test_conv_tbc(self):
885
with set_default_dtype(torch.double):
886
inp = torch.randn(9, 4, 5, requires_grad=True)
887
weight = torch.randn(3, 5, 6, requires_grad=True)
888
bias = torch.randn(6, requires_grad=True)
891
lambda i, w, b, pad: F.conv_tbc(i, w, b, pad), (inp, weight, bias, 3)
894
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
895
@unittest.skipIf(not TEST_CUDNN, "needs cudnn")
896
@skipIfRocmVersionLessThan((4, 3))
897
@skipIfNotMiopenSuggestNHWC
898
def test_grouped_conv_cudnn_nhwc_support(self):
899
# in order to catch the hols in grouped convolution in nhwc support for earlier cudnn version
900
input = torch.randn((16, 16, 8, 8), dtype=torch.float16, device="cuda").to(
901
memory_format=torch.channels_last
903
weight = torch.randn((8, 4, 3, 3), dtype=torch.float16, device="cuda").to(
904
memory_format=torch.channels_last
906
out = torch.convolution(
907
input, weight, None, (1, 1), (1, 1), (1, 1), False, (0, 0), 4
909
input = torch.randn((16, 8, 8, 8), dtype=torch.float16, device="cuda").to(
910
memory_format=torch.channels_last
912
out_transpose = torch.convolution(
913
input, weight, None, (1, 1), (1, 1), (1, 1), True, (0, 0), 4
916
@unittest.expectedFailure
917
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
918
@unittest.skipIf(not TEST_CUDNN, "needs cudnn")
919
def test_conv_cudnn_memory_layout_dominance(self):
920
# desired behavior here is to have the memory_layout of conv.weight to
921
# dominante the layout of output.
922
# which is not the same as current behavior, we'll fix this in
923
# following up PRs and remove the `expectedFailure` tag
924
input = torch.randint(
925
1, 10, (2, 8, 4, 4), dtype=torch.float32, device="cuda", requires_grad=True
927
conv = nn.Conv2d(8, 4, 3).cuda().float()
930
self.assertTrue(out.is_contiguous())
932
input = input.contiguous(memory_format=torch.channels_last)
934
self.assertTrue(out.is_contiguous())
936
conv.weight.data = conv.weight.contiguous(memory_format=torch.channels_last)
938
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
940
input = input.contiguous()
942
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
944
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
945
def test_cudnn_noncontiguous_weight(self):
946
# Noncontiguous weights must be contiguous() before being
948
input = torch.tensor([1, 1, 1], dtype=torch.double, device="cuda").view(1, 1, 3)
949
weights1 = torch.tensor([1], dtype=torch.double, device="cuda").expand(1, 1, 2)
951
torch.tensor([1], dtype=torch.double, device="cuda")
956
F.conv1d(input, weights1, bias=None, stride=2, dilation=2),
957
F.conv1d(input, weights2, bias=None, stride=2, dilation=2),
960
def run_grad_conv_test(self, func_forward, func_backward, dim=1, gradient="input"):
961
for kern, inp_size in [(3, 6), (3, 7), (4, 9)]:
962
for batch, stride, padding, chan_in, chan_out, dilation in product(
963
[1, 2], [1, 2], [0, 1, 2], [2], [3], [1]
965
for has_bias in [True, False]:
966
input_shape = [batch, chan_in]
967
weight_shape = [chan_out, chan_in]
969
input_shape.append(inp_size)
970
weight_shape.append(kern)
972
input = torch.randn(input_shape, requires_grad=True)
973
weight = torch.randn(weight_shape, requires_grad=True)
975
bias = torch.randn([chan_out], requires_grad=True)
976
output = func_forward(
985
gradient_o = torch.randn(output.shape)
986
gradient_w = torch.autograd.grad(
987
output, input if (gradient == "input") else weight, gradient_o
993
input_shape if (gradient == "input") else input,
994
weight_shape if (gradient == "weight") else weight,
1002
def test_grad_conv1d_input(self):
1003
self.run_grad_conv_test(F.conv1d, F.grad.conv1d_input, 1, "input")
1005
def test_grad_conv1d_weight(self):
1006
self.run_grad_conv_test(F.conv1d, F.grad.conv1d_weight, 1, "weight")
1008
def test_grad_conv2d_input(self):
1009
self.run_grad_conv_test(F.conv2d, F.grad.conv2d_input, 2, "input")
1011
def test_grad_conv2d_weight(self):
1012
self.run_grad_conv_test(F.conv2d, F.grad.conv2d_weight, 2, "weight")
1014
def test_grad_conv3d_input(self):
1015
self.run_grad_conv_test(F.conv3d, F.grad.conv3d_input, 3, "input")
1017
def test_grad_conv3d_weight(self):
1018
self.run_grad_conv_test(F.conv3d, F.grad.conv3d_weight, 3, "weight")
1020
@unittest.skipIf(not torch._nnpack_available(), "NNPACK unavailable")
1021
def test_nnpack_conv(self):
1022
for kern, inp_size in [(3, 6), (3, 7), (4, 9)]:
1023
for batch, stride, padding, chan_in, chan_out in product(
1024
[1, 2, 3, 4], [1, 2], [0, 1, 2], [2], [3]
1026
for has_bias in [True, False]:
1027
input_shape = [batch, chan_in]
1028
weight_shape = [chan_out, chan_in]
1030
input_shape.append(inp_size)
1031
weight_shape.append(kern)
1033
input = torch.randn(
1034
input_shape, requires_grad=True, dtype=torch.float
1036
weight = torch.randn(
1037
weight_shape, requires_grad=True, dtype=torch.float
1041
[chan_out], requires_grad=True, dtype=torch.float
1043
output = torch._nnpack_spatial_convolution(
1044
input, weight, stride=stride, padding=padding, bias=bias
1046
output_expected = torch.nn.functional.conv2d(
1047
input, weight, stride=stride, padding=padding, bias=bias
1049
self.assertEqual(output, output_expected, atol=3e-4, rtol=0)
1051
gradient_o = torch.randn(output.shape, dtype=torch.float)
1053
grads = torch.autograd.grad(output, [input, weight], gradient_o)
1054
grads_expected = torch.autograd.grad(
1055
output_expected, [input, weight], gradient_o
1057
for gr, gr_expected in zip(grads, grads_expected):
1058
self.assertEqual(gr, gr_expected, atol=3e-4, rtol=0)
1060
def test_conv_padding_mode(self):
1061
with self.assertRaisesRegex(ValueError, "padding_mode must be one of"):
1062
nn.Conv2d(3, 3, 3, padding_mode="xyz")
1064
with self.assertRaisesRegex(ValueError, "padding_mode must be one of"):
1065
nn.Conv2d(3, 3, 3, padding_mode=3)
1067
with self.assertRaisesRegex(ValueError, 'Only "zeros" '):
1068
nn.ConvTranspose2d(3, 3, 3, padding_mode="reflect")
1070
def test_functional_grad_conv(self):
1072
input = torch.randn(1, 1, 5, requires_grad=True)
1073
weight = torch.randn(1, 1, 3, requires_grad=True)
1074
output = F.conv1d(input, weight, dilation=2)
1075
grad_output = torch.randn(output.shape)
1077
grad_input_autograd, grad_weight_autograd = torch.autograd.grad(
1078
output, (input, weight), grad_output
1081
grad_input_functional = torch.nn.grad.conv1d_input(
1082
input.shape, weight, grad_output, dilation=2
1084
self.assertEqual(grad_input_functional, grad_input_autograd)
1086
grad_weight_functional = torch.nn.grad.conv1d_weight(
1087
input, weight.shape, grad_output, dilation=2
1089
self.assertEqual(grad_weight_functional, grad_weight_autograd)
1092
input = torch.randn(1, 1, 5, 5, requires_grad=True)
1093
weight = torch.randn(1, 1, 3, 3, requires_grad=True)
1094
output = F.conv2d(input, weight, dilation=2)
1095
grad_output = torch.randn(output.shape)
1097
(grad_input_autograd, grad_weight_autograd) = torch.autograd.grad(
1098
output, (input, weight), grad_output
1101
grad_input_functional = torch.nn.grad.conv2d_input(
1102
input.shape, weight, grad_output, dilation=2
1104
self.assertEqual(grad_input_functional, grad_input_autograd)
1106
grad_weight_functional = torch.nn.grad.conv2d_weight(
1107
input, weight.shape, grad_output, dilation=2
1109
self.assertEqual(grad_weight_functional, grad_weight_autograd)
1112
input = torch.randn(1, 1, 5, 5, 5, requires_grad=True)
1113
weight = torch.randn(1, 1, 3, 3, 3, requires_grad=True)
1114
output = F.conv3d(input, weight, dilation=2)
1115
grad_output = torch.randn(output.shape)
1117
(grad_input_autograd, grad_weight_autograd) = torch.autograd.grad(
1118
output, (input, weight), grad_output
1121
grad_input_functional = torch.nn.grad.conv3d_input(
1122
input.shape, weight, grad_output, dilation=2
1124
self.assertEqual(grad_input_functional, grad_input_autograd)
1126
grad_weight_functional = torch.nn.grad.conv3d_weight(
1127
input, weight.shape, grad_output, dilation=2
1129
self.assertEqual(grad_weight_functional, grad_weight_autograd)
1131
def test_functional_grad_conv2d(self):
1137
def _test_conv2d(stride, kernel_size, groups, dilation):
1138
padding = kernel_size // 2
1141
torch.empty(BATCH_SIZE, IN_CH, SPATIAL, SPATIAL)
1142
.uniform_(-8.0, 8.0)
1143
.requires_grad_(True)
1147
torch.empty(OUT_CH, IN_CH // groups, kernel_size, kernel_size)
1148
.uniform_(-4.0, 4.0)
1149
.requires_grad_(True)
1161
grad_output = torch.randn(output.shape)
1163
(grad_input_autograd, grad_weight_autograd) = torch.autograd.grad(
1164
output, (input, weight), grad_output
1167
grad_input_functional = torch.nn.grad.conv2d_input(
1176
self.assertEqual(grad_input_functional, grad_input_autograd)
1178
grad_weight_functional = torch.nn.grad.conv2d_weight(
1187
self.assertEqual(grad_weight_functional, grad_weight_autograd)
1190
kernel_sizes = [1, 3, 5]
1194
for s, k, g, d in product(strides, kernel_sizes, groups, dilates):
1195
_test_conv2d(s, k, g, d)
1197
def test_permute_conv2d_issue_120211(self):
1198
def reproducer(radius: int):
1199
image = torch.rand(1, 1024, 1024, 3)
1200
image = image.permute(0, 3, 1, 2)
1201
kernel_x = torch.zeros([3, 1, 1, radius * 2 + 1], device=image.device)
1202
image = torch.nn.functional.conv2d(image, kernel_x, groups=image.shape[-3])
1204
for i in range(0, 128):
1205
# This should not fail
1206
reproducer(radius=i)
1208
def test_conv3d_issue_120406(self):
1209
# This should not fail
1210
F.conv3d(torch.ones(2, 3, 8, 9, 26), torch.ones(3, 1, 1, 1, 17), groups=3)
1212
def test_conv1d_issue_120547(self):
1213
weight = torch.ones([16, 1, 32])
1214
bias = torch.ones([16])
1215
stride, padding, dilation, groups = (1, 16, 1, 16)
1216
input = torch.rand((1, 1, 16))
1217
input = input.transpose(1, 2)
1218
# This should not fail
1219
F.conv1d(input, weight, bias, stride, padding, dilation, groups)
1222
class TestConvolutionNNDeviceType(NNTestCase):
1223
def run_conv_double_back_test(
1240
device = torch.device("cuda")
1242
device = torch.device("cpu")
1253
weight = torch.randn(
1260
requires_grad=not no_weight,
1263
bias = torch.randn(chan_out, device=device, dtype=dtype, requires_grad=True)
1269
lx, lweight, lbias = inputs
1271
lx, lweight = inputs
1273
# We disable cudnn during forward to avoid finite difference imprecision issues
1274
with cudnn.flags(enabled=False):
1275
out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups)
1279
inputs = x, weight, bias
1283
dummy_out = func(*inputs)
1284
grad_y = torch.randn_like(
1285
dummy_out, device=device, dtype=dtype, requires_grad=True
1288
# Issue #15353: test mkldnn double backward, don't run gradgradcheck due
1289
# to imprecision issues
1290
if dtype == torch.float:
1291
(g,) = torch.autograd.grad(dummy_out.sum(), x, create_graph=True)
1292
return g.requires_grad
1294
return gradgradcheck(func, inputs, (grad_y,))
1299
*floating_and_complex_types_and(
1300
torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else []
1303
def test_Conv2d_deterministic_cudnn(self, device, dtype):
1304
inputs = torch.randn(2, 3, 5, 5, device=device, dtype=dtype, requires_grad=True)
1305
with cudnn.flags(enabled=True, benchmark=True, deterministic=True):
1306
conv1 = torch.nn.Conv2d(3, 3, 3).to(device, dtype)
1307
conv2 = torch.nn.Conv2d(3, 3, 3).to(device, dtype)
1308
conv2.bias.data.copy_(conv1.bias.data)
1309
conv2.weight.data.copy_(conv1.weight.data)
1310
out1 = conv1(inputs)
1311
out2 = conv2(inputs)
1312
self.assertEqual(out1, out2, atol=0.0, rtol=0)
1313
y = torch.randn(out1.size(), device=device, dtype=dtype)
1317
conv1.bias.grad.data, conv2.bias.grad.data, atol=0.0, rtol=0
1320
conv1.weight.grad.data, conv2.weight.grad.data, atol=0.0, rtol=0
1325
*floating_types_and(torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else [])
1327
def test_Conv2d_large_workspace(self, device, dtype):
1328
# These sizes require huge cuDNN workspaces. Make sure we choose a
1329
# reasonable algorithm that does not run out of memory
1336
def run_test(benchmark):
1337
with torch.backends.cudnn.flags(enabled=True, benchmark=benchmark):
1338
conv = torch.nn.Conv2d(256, 256, kernel_size=3, padding=1).to(
1342
x = torch.randn(size, device=device, dtype=dtype)
1343
out = conv(x.detach().clone().requires_grad_())
1344
out.backward(torch.ones_like(out))
1346
run_test(benchmark=False)
1347
run_test(benchmark=True)
1350
@dtypes(torch.half, torch.float)
1351
def test_ConvTranspose2d_large_output_padding(self, device, dtype):
1352
net1 = torch.nn.ConvTranspose2d(
1353
128, 64, kernel_size=3, stride=2, padding=1, output_padding=1
1354
).to(device=device, dtype=dtype)
1355
net2 = torch.nn.ConvTranspose2d(
1356
64, 32, kernel_size=3, stride=2, padding=1, output_padding=1
1357
).to(device=device, dtype=dtype)
1358
net3 = torch.nn.ConvTranspose2d(
1359
32, 3, kernel_size=3, stride=2, padding=1, output_padding=1
1360
).to(device=device, dtype=dtype)
1361
x = torch.rand(1, 128, 6, 6, device=device, dtype=dtype, requires_grad=True)
1365
x.backward(torch.randn_like(x))
1366
torch.cuda.synchronize()
1369
@dtypes(torch.float, torch.double, torch.half)
1370
# Very similar to test_Conv2d_naive_groups but with special care to handle
1371
# the number of groups == number of input channels
1372
@torch.backends.cudnn.flags(enabled=True, benchmark=False)
1373
@tf32_on_and_off(0.01)
1374
def test_Conv2d_depthwise_naive_groups(self, device, dtype):
1375
for depth_multiplier in [1, 2]:
1376
m = nn.Conv2d(2, 2 * depth_multiplier, kernel_size=3, groups=2).to(
1380
torch.randn(2, 2, 6, 6, device="cuda", dtype=dtype)
1386
torch.randn(2, 2 * depth_multiplier, 4, 4, device=device, dtype=dtype)
1389
output.backward(grad_output)
1391
offset = 1 * depth_multiplier
1393
m1 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype)
1394
m1.weight.data = m.weight.data[:offset].clone()
1395
m1.bias.data = m.bias.data[:offset].clone()
1396
i1 = i.detach()[:, :1].clone().requires_grad_()
1398
output1.backward(grad_output[:, :offset].contiguous())
1400
m2 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype)
1401
m2.weight.data.copy_(m.weight.data[offset:])
1402
m2.bias.data.copy_(m.bias.data[offset:])
1403
i2 = i.detach()[:, 1:].clone().requires_grad_()
1405
output2.backward(grad_output[:, offset:].contiguous())
1409
torch.cat([output1, output2], 1),
1410
atol=dtype2prec_DONTUSE[dtype],
1415
torch.cat([i1.grad.data, i2.grad.data], 1),
1416
atol=dtype2prec_DONTUSE[dtype],
1421
torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0),
1422
atol=dtype2prec_DONTUSE[dtype],
1427
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
1428
atol=dtype2prec_DONTUSE[dtype],
1433
@dtypes(torch.float, torch.double, torch.half)
1434
@torch.backends.cudnn.flags(enabled=True, benchmark=False)
1435
@tf32_on_and_off(0.01)
1436
def test_Conv3d_depthwise_naive_groups(self, device, dtype):
1437
for depth_multiplier in [1, 2]:
1438
m = nn.Conv3d(2, 2 * depth_multiplier, kernel_size=3, groups=2).to(
1442
torch.randn(2, 2, 6, 6, 6, device="cuda", dtype=dtype)
1449
2, 2 * depth_multiplier, 4, 4, 4, device=device, dtype=dtype
1453
output.backward(grad_output)
1455
offset = 1 * depth_multiplier
1457
m1 = nn.Conv3d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype)
1458
m1.weight.data = m.weight.data[:offset].clone()
1459
m1.bias.data = m.bias.data[:offset].clone()
1460
i1 = i.detach()[:, :1].clone().requires_grad_()
1462
output1.backward(grad_output[:, :offset].contiguous())
1464
m2 = nn.Conv3d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype)
1465
m2.weight.data.copy_(m.weight.data[offset:])
1466
m2.bias.data.copy_(m.bias.data[offset:])
1467
i2 = i.detach()[:, 1:].clone().requires_grad_()
1469
output2.backward(grad_output[:, offset:].contiguous())
1470
is_cuda_sm86 = device.startswith(
1472
) and torch.cuda.get_device_capability(0) == (8, 6)
1475
if dtype == torch.float32 and is_cuda_sm86
1476
else (dtype2prec_DONTUSE[dtype], 0)
1480
output, torch.cat([output1, output2], 1), atol=atol, rtol=rtol
1484
torch.cat([i1.grad.data, i2.grad.data], 1),
1485
atol=dtype2prec_DONTUSE[dtype],
1490
torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0),
1491
atol=dtype2prec_DONTUSE[dtype],
1496
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
1503
*floating_types_and(torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else [])
1505
def test_noncontig_conv_grad(self, device, dtype):
1506
# FIXME: remove after adding non-contiguous grad tests for all modules
1507
module = nn.Conv2d(3, 5, kernel_size=3, padding=1).to(device, dtype)
1508
input = torch.randn(
1509
2, 3, 10, 10, dtype=dtype, device=device, requires_grad=True
1511
output = module(input)
1513
grad = torch.randn(2, 2, 5, 10, 10, dtype=dtype, device=device)[:, 1]
1514
assert not grad.is_contiguous()
1515
output.backward(grad, retain_graph=True)
1516
self.assertIsNotNone(input.grad)
1517
result = input.grad.data.clone()
1518
input.grad.data.zero_()
1520
output.backward(grad.contiguous())
1522
result, input.grad.data, atol=dtype2prec_DONTUSE[dtype], rtol=0
1526
@dtypes(torch.double)
1527
def test_conv_double_backward(self, device, dtype):
1528
with torch.backends.cudnn.flags(enabled=True, deterministic=True):
1529
# Double backward only runs with DoubleTensor due to precision reason
1531
for kern, inp_size, dilations in [(3, 5, [1, 2]), (4, 9, [1])]:
1532
for stride, padding, chan_in, chan_out, dilation in product(
1533
[1], [2], [2], [3], dilations
1535
no_weight = stride == 2
1536
result = self.run_conv_double_back_test(
1551
"Conv double backward test failed with parameters:"
1570
def test_conv_double_backward_no_bias(self):
1573
chan_in, chan_out = 2, 4
1580
result = self.run_conv_double_back_test(
1594
"Conv double backward test failed with parameters:"
1613
def test_conv_double_backward_groups(self):
1617
chan_in, chan_out = 2, 4
1623
result = self.run_conv_double_back_test(
1637
"Conv double backward test failed with parameters:"
1658
def test_conv_double_backward_stride(self):
1661
# Cannot provide ggW when stride is > 1
1662
for kern, inp_size, dilations in [(3, 5, [1, 2]), (3, 7, [1])]:
1663
for stride, padding, chan_in, chan_out, dilation in product(
1664
[2], [0, 1], [1], [2], dilations
1667
self.run_conv_double_back_test(
1679
@dtypes(torch.float, torch.cfloat)
1680
@torch.backends.cudnn.flags(enabled=True, benchmark=False)
1681
def test_conv1d_same_padding(self, device, dtype):
1682
# Test padding='same' outputs the correct shape
1693
for in_size, k_size, dilation, stride in itertools.product(*test_args):
1694
x = torch.rand(1, 1, in_size, device=device, dtype=dtype)
1695
y = torch.rand(1, 1, k_size, device=device, dtype=dtype)
1696
z = F.conv1d(x, y, padding="same", dilation=dilation, stride=stride)
1697
self.assertEqual(z.size(2), int(math.ceil(in_size / stride)))
1699
# Compare F.conv1d padding='same' output against manual padding
1700
# Without strides/dilation
1701
x = torch.rand(1, 1, 12, device=device, dtype=dtype)
1702
y = torch.rand(1, 1, 3, device=device, dtype=dtype)
1703
expect = F.conv1d(x, y, padding=1)
1704
actual = F.conv1d(x, y, padding="same")
1705
self.assertEqual(expect, actual)
1708
x = torch.rand(1, 1, 12, device=device, dtype=dtype)
1709
y = torch.rand(1, 1, 4, device=device, dtype=dtype)
1710
expect = F.conv1d(x, y, padding=3, dilation=2)
1711
actual = F.conv1d(x, y, padding="same", dilation=2)
1712
self.assertEqual(expect, actual)
1714
# Dilation with asymmetric padding
1715
expect = F.conv1d(x, y, padding=5, dilation=3)[..., 1:]
1716
actual = F.conv1d(x, y, padding="same", dilation=3)
1717
self.assertEqual(expect, actual)
1719
@dtypes(torch.float, torch.cfloat)
1720
def test_conv2d_same_padding(self, device, dtype):
1721
if dtype is torch.cfloat:
1722
rtol, atol = 2e-6, 2e-6
1724
rtol, atol = None, None
1725
# Compare F.conv2d padding='same' output against manual padding
1726
# Without strides/dilation
1727
x = torch.rand(1, 1, 10, 11, device=device, dtype=dtype)
1728
y = torch.rand(1, 1, 4, 5, device=device, dtype=dtype)
1729
expect = F.conv2d(x, y, padding=(2, 2))[..., 1:, :]
1730
actual = F.conv2d(x, y, padding="same")
1731
self.assertEqual(expect, actual, rtol=rtol, atol=atol)
1734
y = torch.rand(1, 1, 3, 4, device=device, dtype=dtype)
1735
expect = F.conv2d(x, y, padding=(2, 3), dilation=2)
1736
actual = F.conv2d(x, y, padding="same", dilation=2)
1737
self.assertEqual(expect, actual, rtol=rtol, atol=atol)
1739
# Dilation with asymmetric padding
1740
y = torch.rand(1, 1, 4, 4, device=device, dtype=dtype)
1741
expect = F.conv2d(x, y, padding=5, dilation=3)[..., 1:, 1:]
1742
actual = F.conv2d(x, y, padding="same", dilation=3)
1743
self.assertEqual(expect, actual, rtol=rtol, atol=atol)
1745
@dtypes(torch.float, torch.cfloat)
1746
def test_conv3d_same_padding(self, device, dtype):
1747
if dtype is torch.cfloat:
1748
rtol, atol = 2e-6, 2e-6
1750
rtol, atol = None, None
1751
# Compare F.conv3d padding='same' output against manual padding
1752
# Without strides/dilation
1753
x = torch.rand(1, 1, 10, 11, 12, device=device, dtype=dtype)
1754
y = torch.rand(1, 1, 1, 2, 5, device=device, dtype=dtype)
1755
expect = F.conv3d(x, y, padding=(0, 1, 2))[..., :, 1:, :]
1756
actual = F.conv3d(x, y, padding="same")
1757
self.assertEqual(expect, actual, rtol=rtol, atol=atol)
1760
expect = F.conv3d(x, y, padding=(0, 1, 4), dilation=2)
1761
actual = F.conv3d(x, y, padding="same", dilation=2)
1762
self.assertEqual(expect, actual, rtol=rtol, atol=atol)
1764
# Dilation with asymmetric padding
1765
y = torch.rand(1, 1, 4, 4, 4, device=device, dtype=dtype)
1766
expect = F.conv3d(x, y, padding=5, dilation=3)[..., 1:, 1:, 1:]
1767
actual = F.conv3d(x, y, padding="same", dilation=3)
1768
self.assertEqual(expect, actual, rtol=rtol, atol=atol)
1770
@dtypes(torch.float, torch.cfloat)
1771
def test_conv1d_valid_padding(self, device, dtype):
1772
# Test F.conv1d padding='valid' is the same as no padding
1773
x = torch.rand(1, 1, 10, device=device, dtype=dtype)
1774
y = torch.rand(1, 1, 4, device=device, dtype=dtype)
1775
expect = F.conv1d(x, y)
1776
actual = F.conv1d(x, y, padding="valid")
1777
self.assertEqual(expect, actual)
1779
@dtypes(torch.float, torch.cfloat)
1780
def test_conv2d_valid_padding(self, device, dtype):
1781
# Test F.conv2d padding='valid' is the same as no padding
1782
x = torch.rand(1, 1, 1, 10, device=device, dtype=dtype)
1783
y = torch.rand(1, 1, 1, 4, device=device, dtype=dtype)
1784
expect = F.conv2d(x, y)
1785
actual = F.conv2d(x, y, padding="valid")
1786
self.assertEqual(expect, actual)
1788
@dtypes(torch.float, torch.cfloat)
1789
def test_conv3d_valid_padding(self, device, dtype):
1790
# Test F.conv3d padding='valid' is the same as no padding
1791
x = torch.rand(1, 1, 1, 1, 10, dtype=dtype, device=device)
1792
y = torch.rand(1, 1, 1, 1, 4, dtype=dtype, device=device)
1793
expect = F.conv3d(x, y)
1794
actual = F.conv3d(x, y, padding="valid")
1795
self.assertEqual(expect, actual)
1797
@dtypes(torch.float, torch.cfloat)
1798
def test_conv1d_same_padding_backward(self, device, dtype):
1799
# Test F.conv1d gradients work with padding='same'
1800
x = torch.rand(1, 1, 12, dtype=dtype, device=device, requires_grad=True)
1801
y = torch.rand(1, 1, 4, dtype=dtype, device=device, requires_grad=True)
1804
z = F.conv1d(x, y, padding=3, dilation=2)
1805
z.sum().abs().backward()
1806
gx_expect, gy_expect = x.grad, y.grad
1807
x.grad, y.grad = None, None
1809
z = F.conv1d(x, y, padding="same", dilation=2)
1810
z.sum().abs().backward()
1811
self.assertEqual(gx_expect, x.grad)
1812
self.assertEqual(gy_expect, y.grad)
1813
x.grad, y.grad = None, None
1815
# Asymmetric padding
1816
z = F.conv1d(x, y, padding=2)[..., 1:]
1817
z.sum().abs().backward()
1818
gx_expect, gy_expect = x.grad, y.grad
1819
x.grad, y.grad = None, None
1821
z = F.conv1d(x, y, padding="same")
1822
z.sum().abs().backward()
1823
self.assertEqual(gx_expect, x.grad)
1824
self.assertEqual(gy_expect, y.grad)
1826
@dtypes(torch.float, torch.cfloat)
1827
@tf32_on_and_off(0.001)
1828
def test_conv2d_same_padding_backward(self, device, dtype):
1829
# Test F.conv2d gradients work with padding='same'
1830
x = torch.rand(1, 1, 10, 11, device=device, dtype=dtype, requires_grad=True)
1831
y = torch.rand(1, 1, 4, 5, device=device, dtype=dtype, requires_grad=True)
1834
z = F.conv2d(x, y, padding=(3, 4), dilation=2)
1835
z.sum().abs().backward()
1836
gx_expect, gy_expect = x.grad, y.grad
1837
x.grad, y.grad = None, None
1839
z = F.conv2d(x, y, padding="same", dilation=2)
1840
z.sum().abs().backward()
1841
self.assertEqual(gx_expect, x.grad)
1842
self.assertEqual(gy_expect, y.grad)
1843
x.grad, y.grad = None, None
1845
# Asymmetric padding
1846
y = torch.rand(1, 1, 4, 4, device=device, dtype=dtype, requires_grad=True)
1847
z = F.conv2d(x, y, padding=2)[..., 1:, 1:]
1848
z.sum().abs().backward()
1849
gx_expect, gy_expect = x.grad, y.grad
1850
x.grad, y.grad = None, None
1852
z = F.conv2d(x, y, padding="same")
1853
z.sum().abs().backward()
1854
self.assertEqual(gx_expect, x.grad)
1855
self.assertEqual(gy_expect, y.grad)
1857
@dtypes(torch.double, torch.cdouble)
1858
def test_conv3d_same_padding_backward(self, device, dtype):
1859
check_forward_ad = torch.device(device).type != "xla"
1861
# Test F.conv3d gradients work with padding='same'
1862
x = torch.rand(1, 1, 1, 11, 12, dtype=dtype, device=device, requires_grad=True)
1863
y = torch.rand(1, 1, 1, 2, 5, dtype=dtype, device=device, requires_grad=True)
1866
z = F.conv3d(x, y, padding=(0, 1, 4), dilation=2)
1867
z.sum().abs().backward()
1868
gx_expect, gy_expect = x.grad, y.grad
1869
x.grad, y.grad = None, None
1871
z = F.conv3d(x, y, padding="same", dilation=2)
1872
z.sum().abs().backward()
1873
self.assertEqual(gx_expect, x.grad)
1874
self.assertEqual(gy_expect, y.grad)
1875
x.grad, y.grad = None, None
1878
lambda x, y: F.conv3d(x, y, padding="same", dilation=2),
1880
check_forward_ad=check_forward_ad,
1883
if torch.device(device).type != "cuda":
1884
# https://github.com/pytorch/pytorch/issues/70702
1886
lambda x, y: F.conv3d(x, y, padding="same", dilation=2),
1888
check_fwd_over_rev=True,
1891
# Asymmetric padding
1892
y = torch.rand(1, 1, 1, 4, 4, dtype=dtype, device=device, requires_grad=True)
1893
z = F.conv3d(x, y, padding=2)[..., 1:, 1:]
1894
z.sum().abs().backward()
1895
gx_expect, gy_expect = x.grad, y.grad
1896
x.grad, y.grad = None, None
1898
z = F.conv3d(x, y, padding="same")
1899
z.sum().abs().backward()
1900
self.assertEqual(gx_expect, x.grad)
1901
self.assertEqual(gy_expect, y.grad)
1904
lambda x, y: F.conv3d(x, y, padding="same"),
1906
check_forward_ad=check_forward_ad,
1909
if torch.device(device).type != "cuda":
1910
# https://github.com/pytorch/pytorch/issues/70702
1912
lambda x, y: F.conv3d(x, y, padding="same"),
1914
check_fwd_over_rev=True,
1917
@dtypes(torch.float, torch.cfloat)
1918
def test_conv1d_valid_padding_backward(self, device, dtype):
1919
# Test F.conv1d gradients work with padding='valid'
1920
x = torch.rand(1, 1, 10, dtype=dtype, device=device, requires_grad=True)
1921
y = torch.rand(1, 1, 4, dtype=dtype, device=device, requires_grad=True)
1922
F.conv1d(x, y, padding=0).sum().abs().backward()
1923
gx_expect, gy_expect = x.grad, y.grad
1924
x.grad, y.grad = None, None
1926
F.conv1d(x, y, padding="valid").sum().abs().backward()
1927
gx_actual, gy_actual = x.grad, y.grad
1928
self.assertEqual(gx_expect, gx_actual)
1929
self.assertEqual(gy_expect, gy_actual)
1931
@unittest.skipIf(not TEST_SCIPY, "Scipy required for the test.")
1932
@dtypes(torch.float, torch.cfloat)
1933
@parametrize_test("mode", ("valid", "same"))
1934
def test_conv1d_vs_scipy(self, device, dtype, mode):
1935
t = make_tensor((1, 10), device=device, dtype=dtype)
1936
feat_dim = t.shape[1]
1937
weight_even = make_tensor((1, 1, 4), device=device, dtype=dtype)
1938
weight_odd = make_tensor((1, 1, 5), device=device, dtype=dtype)
1940
def _test(t, weight, mode):
1941
# SciPy expects two 1-D inputs.
1942
t_a = t.view(-1).cpu().numpy()
1943
w_a = weight.view(-1).cpu().numpy()
1944
expected = scipy.signal.convolve(t_a, w_a, mode=mode)
1946
kwargs = {"padding": mode}
1948
# `same` padding in PyTorch conv1d is different
1950
p = weight.shape[2] // 2
1951
t = torch.nn.functional.pad(t, (p, p))
1952
# We have already taken care of padding
1953
kwargs.pop("padding")
1955
# second input is flipped in SciPy's convolve
1956
weight_flipped = torch.flip(weight, (2,))
1957
actual = torch.nn.functional.conv1d(t, weight_flipped, **kwargs).squeeze(0)
1959
actual = actual[:feat_dim]
1961
self.assertEqual(actual, expected, atol=2e-5, rtol=2e-5)
1963
# Global dtype for this test suite is torch.double
1964
# This leads to change in type-promotion
1965
# and conv1d outputs `complex128` for `complex64` input.
1966
with set_default_dtype(torch.float):
1967
_test(t, weight_even, mode)
1968
_test(t, weight_odd, mode)
1970
@unittest.skipIf(not TEST_SCIPY, "Scipy required for the test.")
1971
@dtypes(torch.float, torch.cfloat)
1972
@parametrize_test("mode", ("valid", "same"))
1973
def test_conv2d_vs_scipy(self, device, dtype, mode):
1974
t = make_tensor((1, 5, 10), device=device, dtype=dtype)
1975
weight_even = make_tensor((1, 1, 2, 4), device=device, dtype=dtype)
1976
weight_odd = make_tensor((1, 1, 3, 5), device=device, dtype=dtype)
1978
def _test(t, weight, mode):
1979
# SciPy expects two 2-D inputs.
1980
t_a = t.squeeze(0).cpu().numpy()
1981
w_a = weight.squeeze(0).squeeze(0).cpu().numpy()
1982
expected = scipy.signal.convolve2d(t_a, w_a, mode=mode)
1984
kwargs = {"padding": mode}
1986
# `same` padding in PyTorch conv2d is different
1988
left_right_pad = weight.shape[3] // 2
1989
top_bottom_pad = weight.shape[2] // 2
1990
p = (left_right_pad, left_right_pad, top_bottom_pad, top_bottom_pad)
1991
t = torch.nn.functional.pad(t, p)
1992
# We have already taken care of padding
1993
kwargs.pop("padding")
1995
# second input is flipped in SciPy's convolve2d
1996
weight_flipped = torch.flip(weight, (2, 3))
1997
actual = torch.nn.functional.conv2d(t, weight_flipped, **kwargs).squeeze(0)
1999
actual = actual[:5, :10]
2001
self.assertEqual(actual, expected, rtol=2e-5, atol=5e-6)
2003
# Global dtype for this test suite is torch.double
2004
# This leads to change in type-promotion
2005
# and conv1d outputs `complex128` for `complex64` input.
2006
with set_default_dtype(torch.float):
2007
_test(t, weight_even, mode)
2008
_test(t, weight_odd, mode)
2010
@unittest.skipIf(not TEST_SCIPY, "Scipy required for the test.")
2011
@dtypes(torch.float, torch.cfloat)
2012
@parametrize_test("mode", ("valid", "same"))
2013
def test_conv3d_vs_scipy(self, device, dtype, mode):
2014
t = make_tensor((1, 5, 5, 10), device=device, dtype=dtype)
2015
weight_even = make_tensor((1, 1, 2, 2, 4), device=device, dtype=dtype)
2016
weight_odd = make_tensor((1, 1, 2, 3, 5), device=device, dtype=dtype)
2018
def _test(t, weight, mode):
2019
# SciPy expects two 3-D inputs.
2020
t_a = t.squeeze(0).cpu().numpy()
2021
w_a = weight.squeeze(0).squeeze(0).cpu().numpy()
2022
expected = scipy.signal.convolve(t_a, w_a, mode=mode)
2024
kwargs = {"padding": mode}
2026
# `same` padding in PyTorch conv3d is different
2028
left_right_pad = weight.shape[4] // 2
2029
top_bottom_pad = weight.shape[3] // 2
2030
front_back_pad = weight.shape[2] // 2
2039
t = torch.nn.functional.pad(t, p)
2040
# We have already taken care of padding
2041
kwargs.pop("padding")
2043
# second input is flipped in SciPy's convolve
2044
weight_flipped = torch.flip(weight, (2, 3, 4))
2045
actual = torch.nn.functional.conv3d(t, weight_flipped, **kwargs).squeeze(0)
2047
actual = actual[:5, :5, :10]
2049
if tf32_is_not_fp32() and (
2050
dtype == torch.float or dtype == torch.complex64
2052
self.assertEqual(actual, expected, atol=0.05, rtol=0.05)
2054
self.assertEqual(actual, expected, rtol=2e-5, atol=5e-6)
2056
# Global dtype for this test suite is torch.double
2057
# This leads to change in type-promotion
2058
# and conv1d outputs `complex128` for `complex64` input.
2059
with set_default_dtype(torch.float):
2060
_test(t, weight_even, mode)
2061
_test(t, weight_odd, mode)
2063
@dtypes(torch.float, torch.complex64)
2064
def test_conv2d_valid_padding_backward(self, device, dtype):
2065
# Test F.conv2d gradients work with padding='valid'
2066
x = torch.rand(1, 1, 1, 10, device=device, dtype=dtype, requires_grad=True)
2067
y = torch.rand(1, 1, 1, 4, device=device, dtype=dtype, requires_grad=True)
2068
F.conv2d(x, y, padding=0).sum().abs().backward()
2069
gx_expect, gy_expect = x.grad, y.grad
2070
x.grad, y.grad = None, None
2072
F.conv2d(x, y, padding="valid").sum().abs().backward()
2073
gx_actual, gy_actual = x.grad, y.grad
2074
self.assertEqual(gx_expect, gx_actual)
2075
self.assertEqual(gy_expect, gy_actual)
2077
@dtypes(torch.double, torch.cdouble)
2078
def test_conv3d_valid_padding_backward(self, device, dtype):
2079
check_forward_ad = torch.device(device).type != "xla"
2081
# Test F.conv3d gradients work with padding='valid'
2082
x = torch.rand(1, 1, 1, 1, 10, dtype=dtype, device=device, requires_grad=True)
2083
y = torch.rand(1, 1, 1, 1, 4, dtype=dtype, device=device, requires_grad=True)
2084
F.conv3d(x, y, padding=0).sum().abs().backward()
2085
gx_expect, gy_expect = x.grad, y.grad
2086
x.grad, y.grad = None, None
2088
F.conv3d(x, y, padding="valid").sum().abs().backward()
2089
gx_actual, gy_actual = x.grad, y.grad
2090
self.assertEqual(gx_expect, gx_actual)
2091
self.assertEqual(gy_expect, gy_actual)
2094
lambda x, y: F.conv3d(x, y, padding="valid"),
2096
check_forward_ad=check_forward_ad,
2099
lambda x, y: F.conv3d(x, y, padding="valid"),
2101
check_fwd_over_rev=check_forward_ad,
2104
@parametrize_test("N", range(2, 4), name_fn=lambda N: f"ConvTranspose{N}d")
2105
def test_conv_transpose_with_output_size_and_no_batch_dim(self, device, N):
2106
# For inputs with no batch dim, verify output is the correct shape when output_size is set.
2107
# See https://github.com/pytorch/pytorch/issues/75889
2108
inp = torch.randn((1, 15, 13) if N == 2 else (1, 15, 13, 13), device=device)
2109
output_size = (1, 240, 200) if N == 2 else (1, 240, 200, 200)
2110
ConvTransposeNd = getattr(nn, f"ConvTranspose{N}d")
2111
m = ConvTransposeNd(
2112
1, 1, kernel_size=16, stride=16, padding=7, bias=False, device=device
2114
output = m(inp, output_size=output_size)
2115
self.assertEqual(output.shape, output_size)
2119
"input_shape,transposed,dilated,groups,layout,backend_expected",
2129
torch._C._ConvBackend.Slow2d,
2131
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2141
torch._C._ConvBackend.SlowTranspose2d,
2143
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2144
name="slow1d_transposed",
2153
torch._C._ConvBackend.SlowDilated2d,
2155
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2156
name="slow1d_dilated",
2165
torch._C._ConvBackend.SlowTranspose2d,
2167
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2168
name="slow1d_dilated_transposed",
2177
torch._C._ConvBackend.Slow2d,
2179
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2189
torch._C._ConvBackend.SlowTranspose2d,
2191
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2192
name="slow2d_transposed",
2201
torch._C._ConvBackend.SlowDilated2d,
2203
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2204
name="slow2d_dilated",
2213
torch._C._ConvBackend.SlowTranspose2d,
2215
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2216
name="slow2d_dilated_transposed",
2225
torch._C._ConvBackend.Slow3d,
2227
decorators=[onlyCPU, disableMkldnn],
2230
# CUDA doesn't have a slow 3D implementation, so it goes to the dilated 3D implementation instead
2238
torch._C._ConvBackend.SlowDilated3d,
2240
decorators=[onlyCUDA, disablecuDNN],
2243
# FIXME: RuntimeError: CUDA out of memory.
2244
# subtest(((2, 6, 7, 8, 9), True, False, 3, torch.strided, torch._C._ConvBackend.SlowTranspose3d),
2245
# decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name='slow3d_transposed'),
2253
torch._C._ConvBackend.SlowDilated3d,
2255
decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN],
2256
name="slow3d_dilated",
2258
# FIXME: RuntimeError: CUDA out of memory.
2259
# subtest(((2, 6, 7, 8, 9), True, True, 3, torch.strided, torch._C._ConvBackend.SlowTranspose3d),
2260
# decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name='slow3d_dilated_transposed'),
2268
torch._C._ConvBackend.Empty,
2270
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2271
name="empty_batch1d",
2280
torch._C._ConvBackend.Empty,
2282
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2283
name="empty_channel1d",
2292
torch._C._ConvBackend.Empty,
2294
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2295
name="empty_batch_channel1d",
2304
torch._C._ConvBackend.Empty,
2306
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2307
name="empty_batch2d",
2316
torch._C._ConvBackend.Empty,
2318
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2319
name="empty_channel2d",
2328
torch._C._ConvBackend.Empty,
2330
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2331
name="empty_batch_channel2d",
2340
torch._C._ConvBackend.Empty,
2342
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2343
name="empty_batch3d",
2352
torch._C._ConvBackend.Empty,
2354
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2355
name="empty_channel3d",
2364
torch._C._ConvBackend.Empty,
2366
decorators=[onlyNativeDeviceTypes, disableMkldnn],
2367
name="empty_batch_channel3d",
2370
# Note that disablecuDNN disables miopen as well.
2378
torch._C._ConvBackend.CudaDepthwise2d,
2380
decorators=[onlyCUDA, disablecuDNN],
2381
name="cuda_depthwise1d",
2390
torch._C._ConvBackend.CudaDepthwise2d,
2392
decorators=[onlyCUDA, disablecuDNN],
2393
name="cuda_depthwise2d",
2402
torch._C._ConvBackend.CudaDepthwise3d,
2404
decorators=[onlyCUDA, disablecuDNN],
2405
name="cuda_depthwise3d",
2415
torch._C._ConvBackend.Cudnn,
2417
decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen],
2427
torch._C._ConvBackend.Cudnn,
2429
decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen],
2439
torch._C._ConvBackend.Cudnn,
2441
decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen],
2451
torch._C._ConvBackend.CudnnTranspose,
2453
decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen],
2454
name="cudnn1d_transposed",
2463
torch._C._ConvBackend.CudnnTranspose,
2465
decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen],
2466
name="cudnn2d_transposed",
2468
# FIXME: RuntimeError: CUDA out of memory.
2469
# subtest(((2, 6, 7, 8, 9), True, False, 3, torch.strided, torch._C._ConvBackend.CudnnTranspose),
2470
# decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name='cudnn3d_transposed'),
2479
torch._C._ConvBackend.Miopen,
2481
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2491
torch._C._ConvBackend.Miopen,
2493
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2503
torch._C._ConvBackend.Miopen,
2505
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2515
torch._C._ConvBackend.MiopenTranspose,
2517
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2518
name="miopen1d_transposed",
2527
torch._C._ConvBackend.MiopenTranspose,
2529
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2530
name="miopen2d_transposed",
2539
torch._C._ConvBackend.MiopenTranspose,
2541
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2542
name="miopen3d_transposed",
2551
torch._C._ConvBackend.MiopenDepthwise,
2553
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2554
name="miopen_depthwise1d",
2563
torch._C._ConvBackend.MiopenDepthwise,
2565
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2566
name="miopen_depthwise2d",
2575
torch._C._ConvBackend.MiopenDepthwise,
2577
decorators=[onlyCUDA, skipCUDAIfNoMiopen],
2578
name="miopen_depthwise3d",
2588
torch._C._ConvBackend.Mkldnn,
2590
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2600
torch._C._ConvBackend.Mkldnn,
2602
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2612
torch._C._ConvBackend.Mkldnn,
2614
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2617
# Transposed convolution is broken for mkldnn. See https://github.com/pytorch/pytorch/issues/68775.
2625
torch._C._ConvBackend.Mkldnn,
2627
decorators=[onlyCPU, skipCPUIfNoMkldnn, unittest.expectedFailure],
2628
name="mkldnn1d_transposed",
2637
torch._C._ConvBackend.Mkldnn,
2639
decorators=[onlyCPU, skipCPUIfNoMkldnn, unittest.expectedFailure],
2640
name="mkldnn2d_transposed",
2649
torch._C._ConvBackend.Mkldnn,
2651
decorators=[onlyCPU, skipCPUIfNoMkldnn, unittest.expectedFailure],
2652
name="mkldnn3d_transposed",
2661
torch._C._ConvBackend.Mkldnn,
2663
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2664
name="mkldnn1d_cpu_input",
2673
torch._C._ConvBackend.Mkldnn,
2675
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2676
name="mkldnn2d_cpu_input",
2685
torch._C._ConvBackend.Mkldnn,
2687
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2688
name="mkldnn3d_cpu_input",
2697
torch._C._ConvBackend.MkldnnEmpty,
2699
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2700
name="mkldnn_empty_batch1d",
2709
torch._C._ConvBackend.MkldnnEmpty,
2711
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2712
name="mkldnn_empty_channel1d",
2721
torch._C._ConvBackend.MkldnnEmpty,
2723
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2724
name="mkldnn_empty_batch_channel1d",
2733
torch._C._ConvBackend.MkldnnEmpty,
2735
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2736
name="mkldnn_empty_batch2d",
2745
torch._C._ConvBackend.MkldnnEmpty,
2747
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2748
name="mkldnn_empty_channel2d",
2757
torch._C._ConvBackend.MkldnnEmpty,
2759
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2760
name="mkldnn_empty_batch_channel2d",
2769
torch._C._ConvBackend.MkldnnEmpty,
2771
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2772
name="mkldnn_empty_batch3d",
2781
torch._C._ConvBackend.MkldnnEmpty,
2783
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2784
name="mkldnn_empty_channel3d",
2793
torch._C._ConvBackend.MkldnnEmpty,
2795
decorators=[onlyCPU, skipCPUIfNoMkldnn],
2796
name="mkldnn_empty_batch_channel3d",
2798
# Note: Tests for mobile backends are not currently supported. This comprises
2799
# NnpackSpatial, Winograd3x3Depthwise, and Xnnpack2d backends. Testing these
2800
# requires the ability to gate tests by whether PyTorch is built with USE_MOBILE=1.
2803
# Test with both bias and no bias.
2804
@parametrize_test("has_bias", [False, True])
2805
# Test with both stride=1 and stride>1 cases.
2806
@parametrize_test("strided", [False, True])
2807
# Test with both contiguous and non-contiguous inputs.
2808
@parametrize_test("contiguous", [False, True])
2809
def test_conv_backend(
2823
dtype = torch.float32
2824
C_in, C_out, dim, kernel_size = input_shape[1], 12, len(input_shape) - 2, 3
2825
x = torch.randn(*input_shape, device=device, dtype=dtype, requires_grad=True)
2826
weight = torch.randn(
2827
C_in if transposed else C_out,
2828
C_out // groups if transposed else C_in // groups,
2829
*[kernel_size for _ in range(dim)],
2835
torch.randn(C_out, device=device, dtype=dtype, requires_grad=True)
2840
def _make_noncontiguous(inp):
2843
old_requires_grad = inp.requires_grad
2844
inp = torch.repeat_interleave(inp, 2, dim=-1)
2845
inp = inp[..., ::2].detach().requires_grad_(old_requires_grad)
2849
x = _make_noncontiguous(x)
2850
weight = _make_noncontiguous(weight)
2851
bias = _make_noncontiguous(bias)
2853
if layout is torch._mkldnn:
2855
# Note that weight and bias are not supported as mkldnn tensors during training.
2857
stride = (2,) * dim if strided else (1,) * dim
2858
padding = (0,) * dim
2859
dilation = (2,) * dim if dilated else (1,) * dim
2860
output_padding = (0,) * dim
2873
# Ensure correct backend is selected.
2874
backend_actual = torch._C._select_conv_backend(*inputs)
2875
self.assertEqual(backend_actual, backend_expected)
2877
# Ensure backward call succeeds.
2878
convolution = torch.ops.aten.convolution
2879
output = convolution(*inputs)
2880
grad_output = torch.randn(output.shape, device=device, dtype=dtype)
2882
grad_output = _make_noncontiguous(grad_output)
2883
if layout is torch._mkldnn:
2884
grad_output = grad_output.to_mkldnn()
2885
output.backward(grad_output)
2887
# mkldnn doesn't support gradcheck :(
2888
if layout is torch._mkldnn:
2891
if backend_actual != torch._C._ConvBackend.Empty: # FIXME: forward AD fails
2892
# Forward AD and forward-over-reverse AD smoke test in float32
2893
# TODO: remove this if we introduce per-op gradient tests for float32
2894
with fwAD.dual_level():
2897
fwAD.make_dual(i, torch.rand_like(i))
2898
if isinstance(i, torch.Tensor)
2904
output = convolution(*dual_inputs)
2905
# Forward over reverse AD
2906
grad_output_d = fwAD.make_dual(
2907
torch.rand_like(output), torch.rand_like(output)
2910
torch.autograd.grad(output, [x, weight, bias], grad_output_d)
2912
torch.autograd.grad(output, [x, weight], grad_output_d)
2914
# Convert to float64 for gradcheck.
2915
x = x.to(torch.float64).detach().requires_grad_(True)
2916
weight = weight.to(torch.float64).detach().requires_grad_(True)
2917
if bias is not None:
2918
bias = bias.to(torch.float64).detach().requires_grad_(True)
2931
# Set some backend-specific validation settings.
2932
gradcheck_nondet_tol = 0.0
2933
if torch.backends.cudnn.is_available():
2934
# cuDNN introduces non-determinism
2935
gradcheck_nondet_tol = GRADCHECK_NONDET_TOL
2937
self.assertTrue(gradcheck(convolution, inputs, nondet_tol=gradcheck_nondet_tol))
2939
# double backward doesn't support bias gradients
2940
if bias is not None:
2941
bias.requires_grad_(False)
2943
gradgradcheck(convolution, inputs, nondet_tol=gradcheck_nondet_tol)
2947
def test_conv_contiguous_for_oneDNN(self):
2948
# See https://github.com/pytorch/pytorch/issues/80837.
2949
for dtype in [torch.float, torch.bfloat16, torch.half]:
2959
padding_mode="zeros",
2962
x = torch.rand([1, 2, 321, 201, 1]).to(dtype=dtype)
2963
x = torch.transpose(x, 1, 4)
2976
if torch.backends.mkldnn.is_available():
2978
# Disable MKLDNN explicitly
2979
with torch.backends.mkldnn.flags(enabled=False):
2981
self.assertEqual(y, y_)
2984
def test_conv_ic1_channels_last_for_oneDNN(self):
2985
# See https://github.com/pytorch/pytorch/issues/82060, N > 1 will call in OneDNN path.
2986
for dtype in [torch.float, torch.bfloat16, torch.half]:
2987
conv = torch.nn.Conv2d(
2988
1, 64, kernel_size=(3, 3), padding=(1, 1), bias=False
2990
conv = conv.to(memory_format=torch.channels_last).to(dtype=dtype)
2991
x = torch.rand(2, 1, 100, 100).to(dtype=dtype)
2992
if torch.backends.mkldnn.is_available():
2994
# Disable MKLDNN explicitly
2995
with torch.backends.mkldnn.flags(enabled=False):
2997
self.assertEqual(y, y_)
2999
@dtypes(torch.float, torch.cfloat)
3000
def test_conv_empty_channel(self, device, dtype):
3002
mod = torch.nn.Conv1d(in_channels, 8, 2, stride=2, dtype=dtype).to(device)
3003
inp = torch.randn(2, 0, 15, device=device, dtype=dtype)
3004
_test_module_empty_input(self, mod, inp, check_size=False)
3006
with self.assertRaisesRegex(RuntimeError, "Given groups=1, weight"):
3007
inp = torch.randn(2, 1, 0, device=device, dtype=dtype)
3010
mod = torch.nn.Conv2d(in_channels, 33, 3, stride=2, dtype=dtype).to(device)
3011
inp = torch.randn(2, 0, 50, 100, device=device, dtype=dtype)
3012
_test_module_empty_input(self, mod, inp, check_size=False)
3014
with self.assertRaisesRegex(RuntimeError, "Given groups=1, weight"):
3015
inp = torch.randn(2, 1, 40, 0, device=device, dtype=dtype)
3018
mod = torch.nn.Conv3d(in_channels, 33, 3, stride=2, dtype=dtype).to(device)
3019
inp = torch.randn(2, 0, 50, 20, 40, device=device, dtype=dtype)
3020
_test_module_empty_input(self, mod, inp, check_size=False)
3022
with self.assertRaisesRegex(RuntimeError, "Given groups=1, weight"):
3023
inp = torch.randn(2, 1, 50, 0, 40, device=device, dtype=dtype)
3026
def test_group_conv_empty(self, device):
3027
mod = torch.nn.Conv2d(4, 4, stride=2, kernel_size=3, padding=1, groups=4).to(
3030
inp = torch.randn(0, 4, 4, 4, device=device)
3031
_test_module_empty_input(self, mod, inp, check_size=False)
3032
if self.device_type == "cuda" and self.has_cudnn():
3033
with torch.backends.cudnn.flags(enabled=False):
3034
_test_module_empty_input(self, mod, inp, check_size=False)
3036
def test_group_convTranspose_empty(self, device):
3037
mod = torch.nn.ConvTranspose2d(
3038
4, 4, stride=2, kernel_size=3, padding=1, groups=4
3040
inp = torch.randn(0, 4, 4, 4, device=device)
3041
_test_module_empty_input(self, mod, inp, check_size=False)
3042
if self.device_type == "cuda" and self.has_cudnn():
3043
with torch.backends.cudnn.flags(enabled=False):
3044
_test_module_empty_input(self, mod, inp, check_size=False)
3046
def test_convTranspose_empty(self, device):
3047
mod = torch.nn.ConvTranspose2d(4, 4, stride=2, kernel_size=3, padding=1).to(
3050
inp = torch.randn(0, 4, 4, 4, device=device)
3051
_test_module_empty_input(self, mod, inp, check_size=False)
3052
if self.device_type == "cuda" and self.has_cudnn():
3053
with torch.backends.cudnn.flags(enabled=False):
3054
_test_module_empty_input(self, mod, inp, check_size=False)
3057
@largeTensorTest("12GB")
3058
def test_conv_large_nosplit(self, device):
3059
# Here we just test the convolution correctly route to the fallback implementation
3060
# that is, it does not crash. The correctness of fallback implementation should be
3061
# covered in other tests
3062
dtype = torch.half if self.device_type == "cuda" else torch.float
3063
conv1 = nn.Conv2d(2, 2, 8, 8).to(device).to(dtype)
3064
input_large = torch.randn(1, 2, 1024, 1024 * 1024, dtype=dtype, device=device)
3066
conv2 = torch.nn.Conv2d(1, 1024, 1, 1).to(device).to(dtype)
3067
input_large = torch.randn(1, 1, 2048, 1024, dtype=dtype, device=device)
3070
def test_conv_noncontig_weights(self, device):
3071
for dim in (1, 2, 3):
3072
for grouped in (False, True):
3074
groups = 3 if grouped else 1
3075
w = torch.randn([3] * dim, device=device)
3076
w = w.expand([nc, int(nc / groups)] + list(w.shape))
3077
w = w.detach().requires_grad_()
3079
[1, nc] + ([5] * dim), device=device, requires_grad=True
3081
y = getattr(F, f"conv{dim}d")(x, w, groups=groups)
3083
y = getattr(F, f"conv_transpose{dim}d")(x, w, groups=groups)
3086
def test_conv_noncontig_weights_and_bias(self, device):
3087
# need floats to exercise https://github.com/pytorch/pytorch/issues/16018
3088
for bias in [True, False]:
3089
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=bias).to(
3093
input_nc = torch.randn(
3094
(1, 3, 224, 224, 2), device=device, dtype=torch.float
3096
input_c = input_nc.contiguous()
3098
weight_nc = torch.randn((64, 3, 7, 7, 2), device=device, dtype=torch.float)[
3101
conv1.weight = nn.Parameter(weight_nc)
3102
weight_c = conv1.weight.contiguous()
3105
bias_nc = torch.randn((64, 2), device=device, dtype=torch.float)[:, 1]
3106
conv1.bias = nn.Parameter(bias_nc)
3107
bias_c = conv1.bias.contiguous()
3109
out1 = conv1(input_nc)
3110
conv1.weight = nn.Parameter(weight_c)
3112
conv1.bias = nn.Parameter(bias_c)
3113
out2 = conv1(input_c)
3114
self.assertEqual(out1, out2)
3117
@largeTensorTest("12GB")
3118
@skipIfRocmVersionLessThan((6, 0))
3119
def test_conv_transposed_large(self, device):
3120
dtype = torch.half if self.device_type == "cuda" else torch.float
3121
conv = nn.ConvTranspose2d(1, 1, 1, 1, bias=False).to(device).to(dtype)
3122
input_large = torch.randn(4096, 1, 512, 1024, dtype=dtype, device=device)
3124
ret = conv(input_large)
3126
(ret.narrow(0, 0, 1024) - conv(input_large.narrow(0, 0, 1024)))
3132
(ret.narrow(0, 1024, 1024) - conv(input_large.narrow(0, 1024, 1024)))
3138
(ret.narrow(0, 2048, 1024) - conv(input_large.narrow(0, 2048, 1024)))
3144
(ret.narrow(0, 3072, 1024) - conv(input_large.narrow(0, 3072, 1024)))
3149
if self.device_type == "cuda":
3150
# cuDNN may use algorithms such as FFT that don't guarantee a diff of 0
3151
self.assertEqual(maxdiff0, 0, atol=2e-3, rtol=1e-5)
3152
self.assertEqual(maxdiff1, 0, atol=2e-3, rtol=1e-5)
3153
self.assertEqual(maxdiff2, 0, atol=2e-3, rtol=1e-5)
3154
self.assertEqual(maxdiff3, 0, atol=2e-3, rtol=1e-5)
3156
self.assertEqual(maxdiff0, 0)
3157
self.assertEqual(maxdiff1, 0)
3158
self.assertEqual(maxdiff2, 0)
3159
self.assertEqual(maxdiff3, 0)
3163
@largeTensorTest("12GB")
3164
def test_conv_large(self, device):
3165
dtype = torch.half if self.device_type == "cuda" else torch.float
3166
conv = nn.Conv2d(2, 2, 8, 8, bias=False).to(device).to(dtype)
3167
input_large = torch.randn(4097, 2, 512, 512, dtype=dtype, device=device)
3169
ret = conv(input_large)
3170
self.assertEqual(ret[:2048], conv(input_large[:2048]))
3171
self.assertEqual(ret[2048:4096], conv(input_large[2048:4096]))
3172
self.assertEqual(ret[4096:], conv(input_large[4096:]))
3176
# When computing the backward, we are using the `max(dim=1)`` to create
3177
# some sparsity. Without this sparsity, the rounding error would be
3178
# too large (as large as 1e-5) to satisfy the creterion (1e-6) of `assertEqual`
3179
ret.view(4097, -1).max(dim=1).values.sum().backward()
3181
grad1 = conv.weight.grad.detach().clone()
3183
conv(input_large[:2048]).view(2048, -1).max(dim=1).values.sum().backward()
3184
conv(input_large[2048:4096]).view(2048, -1).max(dim=1).values.sum().backward()
3185
conv(input_large[4096:]).view(1, -1).max(dim=1).values.sum().backward()
3186
grad2 = conv.weight.grad.detach().clone()
3187
# gradients are at the order of hundreds, we need to scale it to
3188
# the order of one so that we can compare
3189
scale = 1 / grad2.abs().mean()
3190
grad1 = grad1 * scale
3191
grad2 = grad2 * scale
3192
self.assertEqual(grad1, grad2, atol=5e-2, rtol=5e-3)
3196
@largeTensorTest("20GB", "cpu")
3197
@largeTensorTest("60GB", "cuda")
3198
def test_conv_large_batch_1(self, device):
3206
input_tensor = torch.ones(1, in_channels, dim, dim).cuda().half()
3208
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
3212
output = model(input_tensor)
3213
model_cpu = model.cpu().float()
3214
output_cpu = model(input_tensor.float().cpu())
3215
self.assertEqual(output.cpu().float(), output_cpu, atol=1e-3, rtol=1e-3)
3219
@largeTensorTest("24GB", "cpu")
3220
@largeTensorTest("20GB", "cuda")
3221
def test_conv3d_large_batch_1(self, device):
3222
x = torch.rand(1, 32, 512, 512, 256)
3223
m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False)
3225
y = m.to(device=device)(x.to(device=device))
3226
self.assertEqual(yref, y.cpu())
3230
def test_contig_wrong_stride_cudnn(self, device):
3231
# x has to have batch_size 1 to test contiguous checks
3232
x = torch.randn(1, 16, 5, 5, device=device)
3233
stride = list(x.stride())
3235
# change the stride in dimension 0. the tensor is still contiguous because size[0] is 1
3236
x.set_(x.storage(), 0, x.size(), stride)
3237
self.assertTrue(x.is_contiguous())
3238
F.conv_transpose2d(x, torch.randn(16, 1, 1, 1, device=device))
3239
F.conv2d(x, torch.randn(1, 16, 1, 1, device=device))
3242
@tf32_on_and_off(0.005)
3243
def test_Conv2d_size_1_kernel(self, device):
3244
x_cpu = torch.randn(2, 3, 5, 5)
3245
conv_cpu = torch.nn.Conv2d(3, 3, kernel_size=1)
3246
y_cpu = conv_cpu(x_cpu)
3247
y = torch.rand_like(y_cpu)
3250
with cudnn.flags(enabled=False):
3251
conv_cuda = torch.nn.Conv2d(3, 3, kernel_size=1).to(device)
3252
conv_cuda.bias.data.copy_(conv_cpu.bias.data)
3253
conv_cuda.weight.data.copy_(conv_cpu.weight.data)
3254
y_cuda = conv_cuda(x_cpu.to(device))
3255
y_cuda.backward(y.to(device))
3257
self.assertEqual(y_cpu, y_cuda, atol=1e-5, rtol=0, exact_device=False)
3259
conv_cpu.bias.grad.data,
3260
conv_cuda.bias.grad.data,
3266
conv_cpu.weight.grad.data,
3267
conv_cuda.weight.grad.data,
3274
@tf32_on_and_off(0.005)
3275
def test_ConvTranspose2d_size_1_kernel(self, device):
3276
x_cpu = torch.randn(2, 3, 5, 5)
3277
conv_cpu = torch.nn.ConvTranspose2d(3, 3, kernel_size=1)
3278
y_cpu = conv_cpu(x_cpu)
3279
y = torch.rand_like(y_cpu)
3282
with cudnn.flags(enabled=False):
3283
conv_cuda = torch.nn.ConvTranspose2d(3, 3, kernel_size=1).to(device)
3284
conv_cuda.bias.data.copy_(conv_cpu.bias.data)
3285
conv_cuda.weight.data.copy_(conv_cpu.weight.data)
3286
y_cuda = conv_cuda(x_cpu.to(device))
3287
y_cuda.backward(y.to(device))
3289
self.assertEqual(y_cpu, y_cuda, atol=1e-5, rtol=0, exact_device=False)
3291
conv_cpu.bias.grad.data,
3292
conv_cuda.bias.grad.data,
3298
conv_cpu.weight.grad.data,
3299
conv_cuda.weight.grad.data,
3306
def test_ConvTranspose3d_size_1_kernel(self, device):
3307
with set_default_dtype(torch.double):
3308
x_cpu = torch.randn(2, 3, 3, 5, 5)
3309
conv_cpu = torch.nn.ConvTranspose3d(3, 3, kernel_size=1)
3310
y_cpu = conv_cpu(x_cpu)
3311
y = torch.rand_like(y_cpu)
3314
with cudnn.flags(enabled=False):
3315
conv_cuda = torch.nn.ConvTranspose3d(3, 3, kernel_size=1).to(device)
3316
conv_cuda.bias.data.copy_(conv_cpu.bias.data)
3317
conv_cuda.weight.data.copy_(conv_cpu.weight.data)
3318
y_cuda = conv_cuda(x_cpu.to(device))
3319
y_cuda.backward(y.to(device))
3321
self.assertEqual(y_cpu, y_cuda, atol=1e-5, rtol=0, exact_device=False)
3323
conv_cpu.bias.grad.data,
3324
conv_cuda.bias.grad.data,
3330
conv_cpu.weight.grad.data,
3331
conv_cuda.weight.grad.data,
3338
*floating_types_and(torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else [])
3340
@dtypes(torch.float)
3341
@torch.backends.cudnn.flags(enabled=True, benchmark=False)
3342
@unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7")
3343
def test_Conv2d_naive_groups(self, device, dtype):
3344
# Check that grouped convolutions matches two half convolutions
3345
m = nn.Conv2d(4, 4, kernel_size=3, groups=2).to(device, dtype)
3346
i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True)
3348
grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype)
3349
output.backward(grad_output)
3351
m1 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype)
3352
m1.weight.data.copy_(m.weight.data[:2])
3353
m1.bias.data.copy_(m.bias.data[:2])
3354
i1 = i.data[:, :2].contiguous().requires_grad_(True)
3356
output1.backward(grad_output[:, :2].contiguous())
3358
m2 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype)
3359
m2.weight.data.copy_(m.weight.data[2:])
3360
m2.bias.data.copy_(m.bias.data[2:])
3361
i2 = i.data[:, 2:].contiguous().requires_grad_(True)
3363
output2.backward(grad_output[:, 2:].contiguous())
3365
self.assertEqual(output, torch.cat([output1, output2], 1))
3368
torch.cat([i1.grad.data, i2.grad.data], 1),
3369
atol=dtype2prec_DONTUSE[dtype],
3374
torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0),
3375
atol=dtype2prec_DONTUSE[dtype],
3380
torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
3381
atol=dtype2prec_DONTUSE[dtype],
3385
@dtypes(torch.double, torch.cdouble)
3386
def test_Conv2d_backward_depthwise(self, device, dtype):
3387
x = torch.randn(2, 2, 4, 20, device=device, dtype=dtype, requires_grad=True)
3388
weight = torch.randn(2, 1, 3, 5, device=device, dtype=dtype, requires_grad=True)
3390
def conv2d_depthwise(x, weight):
3391
return torch.nn.functional.conv2d(
3392
x, weight, bias=None, stride=(1, 10), groups=2
3395
for cudnn_enabled in [False, True]:
3396
with torch.backends.cudnn.flags(enabled=cudnn_enabled):
3397
torch.autograd.gradcheck(conv2d_depthwise, (x, weight))
3400
@dtypes(torch.float, torch.double)
3401
def test_conv_thnn_nhwc(self, device, dtype):
3415
input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device).to(
3416
memory_format=input_format
3418
input.requires_grad_()
3420
c, out_channels, kernel_size, dilation=dilation, groups=groups
3421
).to(device="cpu", dtype=dtype, memory_format=weight_format)
3422
for p in conv.parameters():
3423
p.data = torch.randint_like(p, -3, 3)
3425
ref_input = input.detach().clone().contiguous().requires_grad_()
3427
c, out_channels, kernel_size, dilation=dilation, groups=groups
3429
# load_state_dict will restore the stride & memory_layout on ref_conv.weight.
3430
ref_conv.load_state_dict(conv.state_dict())
3431
ref_conv = ref_conv.to(
3432
device="cpu", dtype=dtype, memory_format=torch.contiguous_format
3436
ref_out = ref_conv(ref_input)
3438
grad = torch.randint_like(out, -3, 3)
3439
ref_grad = grad.detach().clone().contiguous()
3442
ref_out.backward(ref_grad)
3444
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
3445
self.assertTrue(ref_out.is_contiguous())
3446
self.assertEqual(out, ref_out, exact_dtype=False)
3447
self.assertEqual(conv.weight.grad, ref_conv.weight.grad, exact_dtype=False)
3448
self.assertEqual(conv.bias.grad, ref_conv.bias.grad, exact_dtype=False)
3449
self.assertEqual(input.grad, ref_input.grad, exact_dtype=False)
3451
with torch.backends.mkldnn.flags(enabled=False):
3453
[torch.channels_last, torch.channels_last],
3454
[torch.channels_last, torch.contiguous_format],
3455
[torch.contiguous_format, torch.channels_last],
3457
for input_format, weight_format in formats:
3458
# non-dilated conv: thnn_conv2d normal path (with im2col)
3469
input_format=input_format,
3470
weight_format=weight_format,
3482
input_format=input_format,
3483
weight_format=weight_format,
3485
# test when input chanels is 1 and not converted to channels last
3496
input_format=torch.contiguous_format,
3497
weight_format=torch.channels_last,
3499
# non-dilated conv: thnn_conv2d fast path (skip im2col)
3510
input_format=input_format,
3511
weight_format=weight_format,
3513
# ic == oc == 1 here, so need to stick input to CL to activate channels last
3524
input_format=torch.channels_last,
3525
weight_format=weight_format,
3527
# dilated conv: slow_conv_dilated2d
3538
input_format=input_format,
3539
weight_format=weight_format,
3551
input_format=input_format,
3552
weight_format=weight_format,
3554
# transposed-conv: slow_conv_transpose2d
3565
input_format=input_format,
3566
weight_format=weight_format,
3578
input_format=input_format,
3579
weight_format=weight_format,
3591
input_format=input_format,
3592
weight_format=weight_format,
3604
input_format=input_format,
3605
weight_format=weight_format,
3609
@skipCUDAIfRocmVersionLessThan((4, 3))
3610
@skipCUDAIfNotMiopenSuggestNHWC
3611
@skipCUDAIfCudnnVersionLessThan(7603)
3612
@dtypes(torch.half, torch.float, torch.cfloat)
3613
def test_conv_cudnn_nhwc(self, device, dtype):
3614
def helper(n, c, h, w, out_channels, kernel_size, groups):
3615
input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device).to(
3616
memory_format=torch.channels_last
3618
input.requires_grad_()
3619
conv = nn.Conv2d(c, out_channels, kernel_size, groups=groups).to(
3620
device="cuda", dtype=dtype, memory_format=torch.channels_last
3622
for p in conv.parameters():
3623
p.data = torch.randint_like(p, -3, 3)
3625
# use FP64 channels-first conv as reference
3626
ref_input = input.detach().clone().contiguous().double().requires_grad_()
3627
ref_conv = nn.Conv2d(c, out_channels, kernel_size, groups=groups)
3628
# load_state_dict will restore the stride & memory_layout on ref_conv.weight.
3629
ref_conv.load_state_dict(conv.state_dict())
3630
ref_conv = ref_conv.to(
3631
device="cuda", dtype=torch.double, memory_format=torch.contiguous_format
3635
ref_out = ref_conv(ref_input)
3637
grad = torch.randint_like(out, -3, 3)
3638
ref_grad = grad.detach().clone().double().contiguous()
3641
ref_out.backward(ref_grad)
3643
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
3644
self.assertTrue(input.grad.is_contiguous(memory_format=torch.channels_last))
3646
conv.weight.grad.is_contiguous(memory_format=torch.channels_last)
3649
self.assertTrue(ref_out.is_contiguous())
3650
self.assertTrue(ref_input.grad.is_contiguous())
3651
self.assertTrue(ref_conv.weight.grad.is_contiguous())
3653
self.assertEqual(out, ref_out, exact_dtype=False)
3654
self.assertEqual(conv.weight.grad, ref_conv.weight.grad, exact_dtype=False)
3655
self.assertEqual(conv.bias.grad, ref_conv.bias.grad, exact_dtype=False)
3656
self.assertEqual(input.grad, ref_input.grad, exact_dtype=False)
3658
helper(2, 8, 4, 4, out_channels=4, kernel_size=3, groups=1)
3659
helper(2, 8, 4, 4, out_channels=8, kernel_size=3, groups=8)
3660
helper(1, 16, 56, 56, out_channels=16, kernel_size=3, groups=1)
3661
helper(1, 16, 56, 56, out_channels=16, kernel_size=3, groups=16)
3665
@skipCUDAIfCudnnVersionLessThan(8005)
3666
@dtypes(torch.half, torch.float)
3667
def test_conv_cudnn_ndhwc(self, device, dtype):
3668
def helper(n, c, d, h, w, out_channels, kernel_size, groups):
3669
input = torch.randint(
3670
-2, 2, (n, c, d, h, w), dtype=dtype, device=device
3671
).to(memory_format=torch.channels_last_3d)
3672
input.requires_grad_()
3673
conv = nn.Conv3d(c, out_channels, kernel_size, groups=groups).to(
3674
device="cuda", dtype=dtype, memory_format=torch.channels_last_3d
3676
for p in conv.parameters():
3677
p.data = torch.randint_like(p, -2, 2)
3679
# use FP64 channels-first conv as reference
3680
ref_input = input.detach().clone().contiguous().double().requires_grad_()
3681
ref_conv = nn.Conv3d(c, out_channels, kernel_size, groups=groups)
3682
# load_state_dict will restore the stride & memory_layout on ref_conv.weight.
3683
ref_conv.load_state_dict(conv.state_dict())
3684
ref_conv = ref_conv.to(
3685
device="cuda", dtype=torch.double, memory_format=torch.contiguous_format
3689
ref_out = ref_conv(ref_input)
3691
grad = torch.randint_like(out, -2, 2)
3692
ref_grad = grad.detach().clone().double().contiguous()
3695
ref_out.backward(ref_grad)
3697
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last_3d))
3699
input.grad.is_contiguous(memory_format=torch.channels_last_3d)
3702
conv.weight.grad.is_contiguous(memory_format=torch.channels_last_3d)
3705
self.assertTrue(ref_out.is_contiguous())
3706
self.assertTrue(ref_input.grad.is_contiguous())
3707
self.assertTrue(ref_conv.weight.grad.is_contiguous())
3709
self.assertEqual(out, ref_out, exact_dtype=False)
3710
self.assertEqual(conv.weight.grad, ref_conv.weight.grad, exact_dtype=False)
3711
self.assertEqual(conv.bias.grad, ref_conv.bias.grad, exact_dtype=False)
3712
self.assertEqual(input.grad, ref_input.grad, exact_dtype=False)
3714
helper(2, 8, 4, 4, 4, out_channels=4, kernel_size=3, groups=1)
3715
helper(2, 8, 4, 4, 4, out_channels=8, kernel_size=3, groups=8)
3716
helper(1, 16, 18, 18, 18, out_channels=16, kernel_size=3, groups=1)
3717
helper(1, 16, 18, 18, 18, out_channels=16, kernel_size=3, groups=16)
3734
layer(inp.size(1), grad.size(1), ref_conv.weight.size(2)).float().to(device)
3736
# load_state_dict will restore the stride & memory_layout on ref_conv.weight.
3737
conv.load_state_dict(ref_conv.state_dict())
3739
conv.weight.detach().clone().contiguous(memory_format=weight_format)
3741
conv.weight.data = weight_data.resize_(
3742
weight_data.size(), memory_format=weight_format
3744
input = inp.clone().contiguous(memory_format=input_format)
3745
input.resize_(input.size(), memory_format=input_format)
3746
input = input.requires_grad_()
3747
grad = grad.contiguous(memory_format=grad_format)
3748
grad.resize_(grad.size(), memory_format=grad_format)
3751
self.assertTrue(out.is_contiguous(memory_format=output_format))
3752
self.assertEqual(out, ref_out)
3753
self.assertEqual(conv.weight.grad, ref_conv.weight.grad)
3754
self.assertEqual(conv.bias.grad, ref_conv.bias.grad)
3755
self.assertEqual(input.grad, ref_input.grad)
3757
def _test_conv_cudnn_nhwc_nchw(self, layer, n, c, h, w, k, filter_size, device):
3758
data = torch.randint(1, 10, (n, c, h, w), dtype=torch.float32, device=device)
3759
ref_input = data.clone().contiguous().requires_grad_(True)
3760
ref_conv = layer(c, k, filter_size).float().to(device)
3761
ref_out = ref_conv(ref_input)
3762
grad = torch.randint(1, 10, ref_out.size(), dtype=torch.float32, device="cuda")
3763
ref_out.backward(grad)
3765
for w_f in [torch.contiguous_format, torch.channels_last]:
3766
for g_f in [torch.contiguous_format, torch.channels_last]:
3767
for input_format in [torch.contiguous_format, torch.channels_last]:
3768
output_format = torch.contiguous_format
3769
# Older versions of CudNN have Channels Last support disabled
3770
if torch.backends.cudnn.version() >= 7603:
3771
if input_format == torch.channels_last:
3772
output_format = torch.channels_last
3773
# This is because we have N111 weight that cannot handle
3774
# the ambiguous memory_format
3775
if w_f == torch.channels_last:
3776
if layer == nn.Conv2d and filter_size * c != 1:
3777
output_format = torch.channels_last
3778
if layer == nn.ConvTranspose2d and filter_size * k != 1:
3779
output_format = torch.channels_last
3795
@skipCUDAIfRocmVersionLessThan((4, 3))
3796
@skipCUDAIfNotMiopenSuggestNHWC
3797
@skipCUDAIfCudnnVersionLessThan(7603)
3798
@tf32_on_and_off(0.05)
3799
def test_conv_cudnn_mismatch_memory_format(self, device):
3809
for n, c, h, w, k, filter_size in configs:
3810
self._test_conv_cudnn_nhwc_nchw(
3811
nn.Conv2d, n, c, h, w, k, filter_size, device
3813
self._test_conv_cudnn_nhwc_nchw(
3814
nn.ConvTranspose2d, n, c, h, w, k, filter_size, device
3817
# torch.half is erroring out on Windows with CUDA 10.1 + cuDNN 7.6.4
3818
# returning CUDNN_STATUS_BAD_PARAM
3819
# Disabling that specific test for now [see issue # 33918]
3822
@dtypes(torch.float, torch.double)
3823
def test_conv_cudnn_nhwc_support(self, device, dtype):
3824
input = torch.randn(
3825
(1, 16, 1, 1), dtype=dtype, device="cuda", requires_grad=True
3827
weight = torch.randn(
3828
(8, 16, 3, 3), dtype=dtype, device="cuda", requires_grad=True
3830
weight = weight.to(memory_format=torch.channels_last)
3831
o = torch.conv2d(input, weight, None, (2, 1), (1, 1), (1, 1), 1)
3832
self.assertTrue(o.is_contiguous(memory_format=torch.channels_last))
3835
# Test that faster algorithms used for inference produce the same results
3836
# Validates depthwise3x3 bug reported in https://github.com/pytorch/pytorch/issues/60176
3838
@dtypes(torch.float)
3839
def test_conv2d_no_grad(self, device, dtype):
3840
for batch in [1, 2, 3]:
3841
for groups in [1, 2, 4]:
3842
input = torch.rand(batch, groups, 8, 8, dtype=dtype, device=device)
3851
with torch.no_grad():
3852
output_ng = m(input)
3854
self.assertEqual(output, output_ng, rtol=1e-2, atol=1e-5)
3858
@dtypes(torch.float, torch.float16)
3859
@precisionOverride({torch.half: 0.002, torch.float: 1e-4})
3860
def test_cudnn_convolution_relu(self, device, dtype):
3861
for batch, groups, image_size, kernel_size, memory_format in product(
3866
(torch.channels_last, torch.contiguous_format),
3868
if image_size[0] < kernel_size[0]:
3870
inp = torch.rand(batch, groups, *image_size, dtype=dtype, device=device)
3871
w = torch.randn(8, groups, *kernel_size, dtype=dtype, device=device)
3872
conv2d_out = torch.conv2d(inp, w, None, (1, 1), (0, 0), (1, 1), 1)
3873
inp = inp.to(memory_format=memory_format)
3874
w = w.to(memory_format=memory_format)
3875
if torch.version.hip:
3876
cudnn_out = torch.miopen_convolution_relu(
3877
inp, w, None, (1, 1), (0, 0), (1, 1), 1
3880
cudnn_out = torch.cudnn_convolution_relu(
3881
inp, w, None, (1, 1), (0, 0), (1, 1), 1
3883
self.assertTrue(cudnn_out.is_contiguous(memory_format=memory_format))
3884
if tf32_is_not_fp32() and dtype == torch.float:
3885
self.assertEqual(conv2d_out.relu(), cudnn_out, atol=4e-3, rtol=0.006)
3887
self.assertEqual(conv2d_out.relu(), cudnn_out)
3891
@dtypes(torch.float, torch.float16)
3892
@precisionOverride({torch.half: 0.002, torch.float: 1e-4})
3893
def test_cudnn_convolution_add_relu(self, device, dtype):
3894
for batch, groups, image_size, kernel_size, memory_format in product(
3899
(torch.channels_last, torch.contiguous_format),
3901
if image_size[0] < kernel_size[0]:
3903
inp = torch.rand(batch, groups, *image_size, dtype=dtype, device=device)
3904
w = torch.randn(8, groups, *kernel_size, dtype=dtype, device=device)
3905
conv2d_out = torch.conv2d(inp, w, None, (1, 1), (0, 0), (1, 1), 1)
3907
z = torch.randn_like(conv2d_out)
3909
inp = inp.to(memory_format=memory_format)
3910
w = w.to(memory_format=memory_format)
3911
z = z.to(memory_format=memory_format)
3912
if torch.version.hip:
3913
cudnn_out = torch.miopen_convolution_add_relu(
3914
inp, w, z, alpha, None, (1, 1), (0, 0), (1, 1), 1
3917
cudnn_out = torch.cudnn_convolution_add_relu(
3918
inp, w, z, alpha, None, (1, 1), (0, 0), (1, 1), 1
3921
self.assertTrue(cudnn_out.is_contiguous(memory_format=memory_format))
3922
if tf32_is_not_fp32() and dtype == torch.float:
3924
F.relu(conv2d_out + alpha * z), cudnn_out, atol=2e-3, rtol=0.006
3927
self.assertEqual(F.relu(conv2d_out + alpha * z), cudnn_out)
3931
@skipCUDAIfCudnnVersionLessThan(7603)
3932
def test_convert_conv2d_weight_memory_format(self, device):
3933
input = torch.randint(1, 10, (2, 8, 4, 4), dtype=torch.float32, device=device)
3934
model = nn.Sequential(nn.Conv2d(8, 4, 3), nn.BatchNorm2d(4)).to(device).float()
3935
for memory_format in [torch.channels_last, torch.contiguous_format]:
3936
model = nn.utils.convert_conv2d_weight_memory_format(model, memory_format)
3938
self.assertTrue(out.is_contiguous(memory_format=memory_format))
3941
nn.Sequential(nn.ConvTranspose2d(8, 4, 3), nn.BatchNorm2d(4))
3945
for memory_format in [torch.channels_last, torch.contiguous_format]:
3946
model = nn.utils.convert_conv2d_weight_memory_format(model, memory_format)
3948
self.assertTrue(out.is_contiguous(memory_format=memory_format))
3952
@skipCUDAIfCudnnVersionLessThan(7603)
3953
def test_convert_conv3d_weight_memory_format(self, device):
3954
input = torch.randint(
3955
1, 10, (2, 8, 4, 4, 4), dtype=torch.float32, device=device
3958
nn.Sequential(nn.ConvTranspose3d(8, 4, 3), nn.BatchNorm3d(4))
3962
for memory_format in [torch.channels_last_3d, torch.contiguous_format]:
3963
model = nn.utils.convert_conv3d_weight_memory_format(model, memory_format)
3965
self.assertTrue(out.is_contiguous(memory_format=memory_format))
3967
def test_conv_double_backward_strided_with_3D_input_and_weight(self, device):
3968
# Test that _convolution_double_backward() outputs the correct grad shapes
3969
# for 3D input / weight when stride > 1. This is an ad-hoc regression test for a
3970
# specific case that was uncovered during the convolution consolidation effort.
3971
# The test can be safely deleted if _convolution_double_backward() is removed.
3973
input = torch.randn(2, 3, 6, device=device)
3974
weight = torch.randn(3, 3, 3, device=device)
3975
bias = torch.randn(3, device=device)
3980
output_padding = (0,)
3982
output = torch.ops.aten.convolution(
3994
ggI = torch.randn(input.shape, device=device)
3995
ggW = torch.randn(weight.shape, device=device)
3996
ggB = torch.randn(bias.shape, device=device)
3997
gO = torch.randn(output.shape, device=device)
3998
output_mask = [True, True, True]
4003
) = torch.ops.aten._convolution_double_backward(
4019
# Make sure the correct shapes are computed.
4020
self.assertEqual(grad_grad_output.shape, gO.shape)
4021
self.assertEqual(grad_input.shape, input.shape)
4022
self.assertEqual(grad_weight.shape, weight.shape)
4025
@largeTensorTest("40GB")
4026
@largeTensorTest("24GB", "cpu")
4027
def test_conv3d_64bit_indexing(self, device):
4028
x = torch.rand(1, 32, 512, 512, 256)
4029
m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False)
4031
y = m.to(device=device)(x.to(device=device))
4032
self.assertEqual(yref, y)
4035
instantiate_device_type_tests(TestConvolutionNNDeviceType, globals())
4036
instantiate_parametrized_tests(TestConvolutionNN)
4038
if __name__ == "__main__":