Review fixes - 2

mdabek-nvidia · mdabek-nvidia · commit 34bcd569e687 · 2026-03-05T15:48:01.000+01:00
Modified resize operator size calculation

Signed-off-by: Marek Dabek &lt;mdabek@nvidia.com&gt;
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/resize.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/resize.py
@@ -40,8 +40,22 @@ def resize(
     effective_size, mode = Resize.infer_effective_size(size, max_size)
     interpolation = Resize.interpolation_modes[interpolation]
 
+    if isinstance(img, ndd.Tensor):
+        img_shape = img.shape
+    elif isinstance(img, ndd.Batch):
+        img_shape = img.shape[0]   # Batches have uniform layout
+    else:
+        raise TypeError(f"Input must be ndd.Tensor or ndd.Batch got {type(img)}")
+
+    if img.layout in ["HWC", "NHWC"]:
+        original_h = img_shape[-3]
+        original_w = img_shape[-2]
+    elif img.layout in ["CHW", "NCHW"]:
+        original_h = img_shape[-2]
+        original_w = img_shape[-1]
+
     target_h, target_w = Resize.calculate_target_size(
-        img.shape, effective_size, max_size, size is None
+            (original_h, original_w), effective_size, max_size, size is None
     )
 
     # Shorter edge limited by max size
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/operator.py b/dali/python/nvidia/dali/experimental/torchvision/v2/operator.py
@@ -206,7 +206,7 @@ def _kernel(self, data_input):
 
     def __call__(self, data_input):
 
-        Operator.verify_data(data_input)
+        type(self).verify_data(data_input)
 
         if self.device == "gpu":
             data_input = data_input.gpu()
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/resize.py b/dali/python/nvidia/dali/experimental/torchvision/v2/resize.py
@@ -24,6 +24,36 @@
 import numpy as np
 
 
+def get_inputHW(data_input):
+    """
+    Gets the height and width of the input data.
+
+    Parameters
+    ----------
+    data_input : Tensor
+       Input data to get the height and width of.
+
+    Returns
+    -------
+    input_height : int
+        Height of the input data.
+    input_width : int
+        Width of the input data.
+    """
+    layout = data_input.property("layout")[0]
+
+    # CWH
+    if layout == np.frombuffer(bytes("C", "utf-8"), dtype=np.uint8)[0]:
+        input_height = data_input.shape()[-1]
+        input_width = data_input.shape()[-2]
+    # HWC
+    else:
+        input_height = data_input.shape()[-3]
+        input_width = data_input.shape()[-2]
+
+    return input_height, input_width, data_input
+
+
 class VerificationSize(ArgumentVerificationRule):
     @classmethod
     def verify(cls, *, size, max_size, interpolation, **_):
@@ -84,7 +114,9 @@ class Resize(Operator):
         InterpolationMode.HAMMING: DALIInterpType.INTERP_GAUSSIAN,  # TODO:
         InterpolationMode.LANCZOS: DALIInterpType.INTERP_LANCZOS3,
     }
+
     arg_rules = [VerificationSize]
+    preprocess_data = get_inputHW
 
     @classmethod
     def infer_effective_size(
@@ -120,6 +152,7 @@ def calculate_target_size(
     ):
         orig_h = orig_size[0]
         orig_w = orig_size[1]
+
         target_h = effective_size[0]
         target_w = effective_size[1]
 
@@ -160,15 +193,24 @@ def _kernel(self, data_input):
         with ``torchvision.transforms.Resize`` documentation and applies DALI operator on the
         ``data_input``.
         """
+        input_height, input_width, data_input = data_input
 
         target_h, target_w = Resize.calculate_target_size(
-            data_input.shape(), self.effective_size, self.max_size, self.size is None
+            orig_size = (input_height, input_width),
+            effective_size = self.effective_size, 
+            max_size = self.max_size,
+            no_size = self.size is None
         )
 
         # Shorter edge limited by max size
         if self.mode == "resize_shorter":
             return fn.resize(
-                data_input, device=self.device, resize_shorter=target_h, max_size=self.max_size
+                data_input,
+                device=self.device,
+                resize_shorter=target_h,
+                max_size=self.max_size,
+                antialias=self.antialias,
+                interp_type=self.interpolation,
             )
 
         return fn.resize(
@@ -179,4 +221,6 @@ def _kernel(self, data_input):
                 fn.cast(target_w, dtype=dali.types.FLOAT),
             ),
             mode=self.mode,
+            antialias=self.antialias,
+            interp_type=self.interpolation,
         )
diff --git a/dali/test/python/torchvision/test_tv_resize.py b/dali/test/python/torchvision/test_tv_resize.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import os
-from typing import Sequence
+from typing import Sequence, Literal, Union
 
 import numpy as np
 from nose2.tools import params, cartesian_params
@@ -50,6 +50,7 @@ def build_resize_transform(
     max_size: int = None,
     interpolation: transforms.InterpolationMode = transforms.InterpolationMode.BILINEAR,
     antialias: bool = False,
+    device: Literal["cpu", "gpu"] = "cpu",
 ):
     t = transforms.Compose(
         [
@@ -61,13 +62,59 @@ def build_resize_transform(
     td = Compose(
         [
             Resize(
-                size=resize, max_size=max_size, interpolation=interpolation, antialias=antialias
+                size=resize,
+                max_size=max_size,
+                interpolation=interpolation,
+                antialias=antialias,
+                device=device,
             ),
         ]
     )
     return t, td
 
 
+def _internal_loop(
+    input_data: Union[Image.Image, torch.Tensor],
+    t: transforms.Resize,
+    td: Resize,
+    resize: int | Sequence[int],
+    max_size: int = None,
+    interpolation: transforms.InterpolationMode = transforms.InterpolationMode.BILINEAR,
+    antialias: bool = False,
+):
+    out_fn = fn_tv.resize(
+        input_data,
+        size=resize,
+        max_size=max_size,
+        interpolation=interpolation,
+        antialias=antialias,
+    )
+    out_dali_fn = fn_dali.resize(
+        input_data,
+        size=resize,
+        max_size=max_size,
+        interpolation=interpolation,
+        antialias=antialias,
+    )
+    out_tv = t(input_data)
+    out_dali_tv = td(input_data)
+
+    if isinstance(input_data, Image.Image):
+        out_tv = transforms.functional.pil_to_tensor(out_tv).unsqueeze(0).permute(0, 2, 3, 1)
+        out_dali_tv = (
+            transforms.functional.pil_to_tensor(out_dali_tv).unsqueeze(0).permute(0, 2, 3, 1)
+        )
+        out_fn = transforms.functional.pil_to_tensor(out_fn)
+        out_dali_fn = transforms.functional.pil_to_tensor(out_dali_fn)
+
+    assert torch.allclose(
+        torch.tensor(out_tv.shape[1:3]), torch.tensor(out_dali_tv.shape[1:3]), rtol=0, atol=1
+    ), f"Should be:{out_tv.shape} is:{out_dali_tv.shape}"
+    assert torch.allclose(
+        torch.tensor(out_fn.shape[1:3]), torch.tensor(out_dali_fn.shape[1:3]), rtol=0, atol=1
+    ), f"Should be:{out_fn.shape} is:{out_dali_fn.shape}"
+
+
 def loop_images_test_no_build(
     t: transforms.Resize,
     td: Resize,
@@ -78,52 +125,66 @@ def loop_images_test_no_build(
 ):
     for fn in test_files:
         img = Image.open(fn)
-        out_fn = transforms.functional.pil_to_tensor(
-            fn_tv.resize(
-                img,
-                size=resize,
-                max_size=max_size,
-                interpolation=interpolation,
-                antialias=antialias,
-            )
-        )
-        out_dali_fn = transforms.functional.pil_to_tensor(
-            fn_dali.resize(
-                img,
-                size=resize,
-                max_size=max_size,
-                interpolation=interpolation,
-                antialias=antialias,
-            )
-        )
+        _internal_loop(img, t, td, resize, max_size, interpolation, antialias)
+        # assert torch.equal(out_tv, out_dali_tv)
 
-        out_tv = transforms.functional.pil_to_tensor(t(img)).unsqueeze(0).permute(0, 2, 3, 1)
-        out_dali_tv = transforms.functional.pil_to_tensor(td(img)).unsqueeze(0).permute(0, 2, 3, 1)
 
-        assert torch.allclose(
-            torch.tensor(out_tv.shape[1:3]), torch.tensor(out_dali_tv.shape[1:3]), rtol=0, atol=1
-        ), f"Should be:{out_tv.shape} is:{out_dali_tv.shape}"
-        assert torch.allclose(
-            torch.tensor(out_fn.shape[1:3]), torch.tensor(out_dali_fn.shape[1:3]), rtol=0, atol=1
-        ), f"Should be:{out_fn.shape} is:{out_dali_fn.shape}"
+def build_tensors(max_size: int = 512, channels: int = 3):
+    h = torch.randint(10, max_size, (1,)).item()
+    w = torch.randint(10, max_size, (1,)).item()
+    tensors = [
+        torch.ones((channels, max_size, max_size)),
+        torch.ones((1, channels, max_size, max_size)),
+        torch.ones((10, channels, max_size, max_size)),
+        torch.ones((channels, max_size // 2, max_size)),
+        torch.ones((1, channels, max_size // 2, max_size)),
+        torch.ones((10, channels, max_size // 2, max_size)),
+        torch.ones((channels, max_size, max_size // 2)),
+        torch.ones((1, channels, max_size, max_size // 2)),
+        torch.ones((10, channels, max_size, max_size // 2)),
+        torch.ones((channels, h, w)),
+        torch.ones((1, channels, h, w)),
+        torch.ones((10, channels, h, w)),
+    ]
+
+    return tensors
+
+
+def loop_tensors_test(
+    resize: int | Sequence[int],
+    max_size: int = None,
+    interpolation: transforms.InterpolationMode = transforms.InterpolationMode.BILINEAR,
+    antialias: bool = False,
+    device: Literal["cpu", "gpu"] = "cpu",
+):
+    t, td = build_resize_transform(resize, max_size, interpolation, antialias, device)
+    tensors = build_tensors()
 
-        # assert torch.equal(out_tv, out_dali_tv)
+    for tn in tensors:
+        _internal_loop(tn, t, td, resize, max_size, interpolation, antialias)
 
 
 def loop_images_test(
     resize: int | Sequence[int],
     max_size: int = None,
     interpolation: transforms.InterpolationMode = transforms.InterpolationMode.BILINEAR,
     antialias: bool = False,
+    device: Literal["cpu", "gpu"] = "cpu",
 ):
-    t, td = build_resize_transform(resize, max_size, interpolation, antialias)
+    t, td = build_resize_transform(resize, max_size, interpolation, antialias, device)
     loop_images_test_no_build(t, td, resize, max_size, interpolation, antialias)
 
 
-@params(512, 2048, ([512, 512]), ([2048, 2048]))
-def test_resize_sizes(resize):
+@cartesian_params((512, 2048, ([512, 512]), ([2048, 2048])), ("cpu", "gpu"))
+def test_resize_sizes_images(resize, device):
+    # Resize with single int (preserve aspect ratio)
+    loop_images_test(resize=resize, device=device)
+
+
+@cartesian_params((512, 2048, ([512, 512]), ([2048, 2048])), ("cpu", "gpu"))
+def test_resize_sizes_tensors(resize, device):
     # Resize with single int (preserve aspect ratio)
-    loop_images_test(resize=resize)
+    loop_tensors_test(resize=resize, device=device)
 
 
 @params((480, 512), (100, 124), (None, 512), (1024, 512), ([256, 256], 512), (None, None))
@@ -180,7 +241,7 @@ def test_resize_max_sizes(resize, max_size):
     ([256, 256], transforms.InterpolationMode.BILINEAR),
     (640, transforms.InterpolationMode.BICUBIC),
 )
-def test_resize_interploation(resize, interpolation):
+def test_resize_interpoation(resize, interpolation):
     loop_images_test(resize=resize, interpolation=interpolation)