NVIDIA · mdabek-nvidia · Apr 8, 2026 · May 8, 2026 · May 13, 2026 · May 13, 2026
diff --git a/dali/python/nvidia/dali/experimental/torchvision/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/__init__.py
@@ -20,6 +20,7 @@
 from .v2.normalize import Normalize
 from .v2.pad import Pad
 from .v2.rand_apply import RandomApply
+from .v2.randomcrop import RandomCrop
 from .v2.resize import Resize
 from .v2.totensor import ToPureTensor, PILToTensor, ToPILImage
 
@@ -33,6 +34,7 @@
     "Pad",
     "PILToTensor",
     "RandomApply",
+    "RandomCrop",
     "RandomGrayscale",
     "RandomHorizontalFlip",
     "RandomVerticalFlip",

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/__init__.py
@@ -14,16 +14,21 @@
 
 from .centercrop import center_crop
 from .color import to_grayscale, rgb_to_grayscale
+from .crop import crop
 from .flips import horizontal_flip, vertical_flip
 from .gaussian_blur import gaussian_blur
+from .image_metadata import get_dimensions, get_image_size
 from .normalize import normalize
 from .pad import pad
 from .resize import resize
 from .totensor import pil_to_tensor, to_tensor, to_pil_image
 
 __all__ = [
     "center_crop",
+    "crop",
     "gaussian_blur",
+    "get_dimensions",
+    "get_image_size",
     "horizontal_flip",
     "normalize",
     "pad",

diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/crop.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nvidia.dali.experimental.dynamic as ndd
+from nvidia.dali._typing import TensorLike
+from nvidia.dali.experimental.dynamic._device import DeviceLike
+
+from ..operator import adjust_input
+from ..randomcrop import RandomCrop
+
+
+def _get_crop_axes(inpt: TensorLike | ndd.Batch) -> list[int]:
+    layout = inpt.layout[-3:]
+    if layout == "HWC":
+        return [-3, -2]
+    if layout == "CHW":
+        return [-2, -1]
+    if inpt.layout[-2:] == "HW":
+        return [-2, -1]
+    raise ValueError(f"Unsupported layout: {inpt.layout!r}. Expected one of HWC, CHW, HW.")
+
+
+def _verify_crop_coordinate(value, name: str) -> None:
+    if not isinstance(value, int):
+        raise TypeError(f"{name} must be int, got {type(value)}")
+
+
+@adjust_input
+def crop(
+    inpt: TensorLike | ndd.Batch,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    device: DeviceLike = "cpu",
+) -> ndd.Tensor | ndd.Batch:
+    """
+    Please refer to the ``RandomCrop`` operator for more details.
+    """
+    _verify_crop_coordinate(top, "top")
+    _verify_crop_coordinate(left, "left")
+    RandomCrop.verify_args(
+        size=(height, width),
+        padding=None,
+        pad_if_needed=False,
+        padding_mode="constant",
+        fill=0,
+    )
+
+    return ndd.slice(
+        inpt,
+        (top, left),
+        (height, width),
+        axes=_get_crop_axes(inpt),
+        out_of_bounds_policy="pad",
+        fill_values=0,
+        device=device,
+    )
diff --git a/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py b/dali/python/nvidia/dali/experimental/torchvision/v2/functional/image_metadata.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from PIL import Image
+import torch
+
+
+def get_image_size(inpt: Image.Image | torch.Tensor) -> List[int]:
+    """
+    Return the spatial size of an image as ``[width, height]``.
+
+    Mirrors ``torchvision.transforms.v2.functional.get_image_size``.
+
+    .. note::
+        This function is provided for compatibility.  The torchvision successor
+        ``get_size`` returns ``[height, width]`` instead.
+
+    Parameters
+    ----------
+    inpt : PIL Image or torch.Tensor
+        Input image.  Tensors are expected in ``[…, H, W]`` layout (leading
+        channel / batch dimensions are ignored).
+
+    Returns
+    -------
+    List[int]
+        ``[width, height]``
+    """
+    if isinstance(inpt, Image.Image):
+        return list(inpt.size)  # PIL .size is (W, H)
+    elif isinstance(inpt, torch.Tensor):
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}"
+            )
+        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
+    raise TypeError(f"Unsupported input type: {type(inpt)}")
+
+
+def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
-        if inpt.ndim < 2:
-            raise TypeError(
-                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}"
-            )
-        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
-    raise TypeError(f"Unsupported input type: {type(inpt)}")
-
-
-def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}."
+            )
+        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
+    raise TypeError(f"Unsupported input type: {type(inpt)}.")
+
+
+def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
-        if inpt.ndim < 2:
-            raise TypeError(
-                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}"
-            )
-        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
-    raise TypeError(f"Unsupported input type: {type(inpt)}")
-
-
-def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_image_size requires a tensor with at least 2 dimensions, got {inpt.ndim}."
+            )
+        return [inpt.shape[-1], inpt.shape[-2]]  # [W, H]
+    raise TypeError(f"Unsupported input type: {type(inpt)}.")
+
+
+def get_dimensions(inpt: Image.Image | torch.Tensor) -> List[int]:
+    """
+    Return the number of channels, height, and width of an image as
+    ``[channels, height, width]``.
+
+    Mirrors ``torchvision.transforms.v2.functional.get_dimensions``.
+
+    Parameters
+    ----------
+    inpt : PIL Image or torch.Tensor
+        Input image.  Tensors are expected in ``[H, W]`` or ``[…, C, H, W]`` layout
+        (leading batch dimensions are ignored).
+
+    Returns
+    -------
+    List[int]
+        ``[channels, height, width]``
+    """
+    if isinstance(inpt, Image.Image):
+        w, h = inpt.size
+        return [len(inpt.getbands()), h, w]
+    elif isinstance(inpt, torch.Tensor):
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}"
+            )
+        if inpt.ndim == 2:
-        if inpt.ndim < 2:
-            raise TypeError(
-                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}"
-            )
-        if inpt.ndim == 2:
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}."
+            )
+        if inpt.ndim == 2:
-        if inpt.ndim < 2:
-            raise TypeError(
-                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}"
-            )
-        if inpt.ndim == 2:
+        if inpt.ndim < 2:
+            raise TypeError(
+                f"get_dimensions requires a tensor with at least 2 dimensions, got {inpt.ndim}."
+            )
+        if inpt.ndim == 2:
+            return [1, inpt.shape[-2], inpt.shape[-1]]
+        return [inpt.shape[-3], inpt.shape[-2], inpt.shape[-1]]  # [C, H, W]
+    raise TypeError(f"Unsupported input type: {type(inpt)}")