Source code for kornia.contrib.distance_transform
import math
import torch
import torch.nn as nn
from kornia.filters import filter2d
from kornia.utils import create_meshgrid
[docs]def distance_transform(image: torch.Tensor, kernel_size: int = 3, h: float = 0.35) -> torch.Tensor:
r"""Approximates the Manhattan distance transform of images using cascaded convolution operations.
The value at each pixel in the output represents the distance to the nearest non-zero pixel in the image image.
It uses the method described in :cite:`pham2021dtlayer`.
The transformation is applied independently across the channel dimension of the images.
Args:
image: Image with shape :math:`(B,C,H,W)`.
kernel_size: size of the convolution kernel.
h: value that influence the approximation of the min function.
Returns:
tensor with shape :math:`(B,C,H,W)`.
Example:
>>> tensor = torch.zeros(1, 1, 5, 5)
>>> tensor[:,:, 1, 2] = 1
>>> dt = kornia.contrib.distance_transform(tensor)
"""
if not isinstance(image, torch.Tensor):
raise TypeError(f"image type is not a torch.Tensor. Got {type(image)}")
if not len(image.shape) == 4:
raise ValueError(f"Invalid image shape, we expect BxCxHxW. Got: {image.shape}")
if kernel_size % 2 == 0:
raise ValueError("Kernel size must be an odd number.")
# n_iters is set such that the DT will be able to propagate from any corner of the image to its far,
# diagonally opposite corner
n_iters: int = math.ceil(max(image.shape[2], image.shape[3]) / math.floor(kernel_size / 2))
grid = create_meshgrid(
kernel_size, kernel_size, normalized_coordinates=False, device=image.device, dtype=image.dtype
)
grid -= math.floor(kernel_size / 2)
kernel = torch.hypot(grid[0, :, :, 0], grid[0, :, :, 1])
kernel = torch.exp(kernel / -h).unsqueeze(0)
out = torch.zeros_like(image)
# It is possible to avoid cloning the image if boundary = image, but this would require modifying the image tensor.
boundary = image.clone()
signal_ones = torch.ones_like(boundary)
for i in range(n_iters):
cdt = filter2d(boundary, kernel, border_type='replicate')
cdt = -h * torch.log(cdt)
# We are calculating log(0) above.
cdt = torch.nan_to_num(cdt, posinf=0.0)
mask = torch.where(cdt > 0, 1.0, 0.0)
if mask.sum() == 0:
break
offset: int = i * kernel_size // 2
out += (offset + cdt) * mask
boundary = torch.where(mask == 1, signal_ones, boundary)
return out
[docs]class DistanceTransform(nn.Module):
r"""Module that approximates the Manhattan (city block) distance transform of images using convolutions.
Args:
kernel_size: size of the convolution kernel.
h: value that influence the approximation of the min function.
"""
def __init__(self, kernel_size: int = 3, h: float = 0.35):
super().__init__()
self.kernel_size = kernel_size
self.h = h
def forward(self, image: torch.Tensor) -> torch.Tensor:
# If images have multiple channels, view the channels in the batch dimension to match kernel shape.
if image.shape[1] > 1:
image_in = image.view(-1, 1, image.shape[-2], image.shape[-1])
else:
image_in = image
return distance_transform(image_in, self.kernel_size, self.h).view_as(image)