import torch
from torchvision.transforms import Normalize, ToPILImage, Resize
from shared.utils import plot_pil_images
Tensor
= torch.randn([1,256,256])
x = x.squeeze()
x x.shape
torch.Size([256, 256])
= torch.randn([3,256,256])
x x.shape
torch.Size([3, 256, 256])
0).shape torch.unsqueeze(x,
torch.Size([1, 3, 256, 256])
1,2).shape x.flatten(
torch.Size([3, 65536])
1,2).mean(axis=1).shape x.flatten(
torch.Size([3])
= torch.randn([1,256,256])
x 0,1).shape x.transpose(
torch.Size([256, 1, 256])
12)
torch.manual_seed(= torch.randn([1,3])
x max(), x.argmax() x.
(tensor(-0.0546), tensor(2))
One can index a matrix using notation similar to numpy:
= X[torch.arange(X.shape[0]), y] scores
or using the gather
function:
= X.gather(1, y.view(-1, 1)).squeeze() scores
cat vs stack
assert (torch.Tensor([1,2,3]) == torch.tensor([1,2,3])).all()
= torch.Tensor([1])
a = torch.Tensor([2])
b =0) torch.cat([a,b], dim
tensor([1., 2.])
=0) torch.stack([a,b], dim
tensor([[1.],
[2.]])
=1) torch.stack([a,b], dim
tensor([[1., 2.]])
Element size
a.nelement()
1
# in bytes a.element_size()
4
Total memory allocation:
* a.element_size() a.nelement()
4
Resize tensor
= torch.randn([3,256,256])
x = Resize([100, 100])(x)
x1 x1.shape
/Users/nenad.bozinovic/mambaforge/envs/blog/lib/python3.10/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).
warnings.warn(
torch.Size([3, 100, 100])
Inverse Normalizer
new_x = (x - mean) / std
x = new_x * std + mean
x = (new_x + mean/std) * std
x = (new_x - mean/std) / (1/std)
inv_std = 1/(std + epsilon)
inv_mean = -mean/std = mean*inv_std
class InverseNormalize(Normalize):
"""
Undoes the normalization and returns the reconstructed images in the input domain.
"""
def __init__(self, normalizer):
= normalizer.mean
mean = normalizer.std
std = 1 / (std + 1e-7)
std_inv = -mean * std_inv
mean_inv super().__init__(mean=mean_inv, std=std_inv)
def __call__(self, tensor):
return super().__call__(tensor.clone())
= Normalize(mean=torch.Tensor([0.1902, 0.2077, 0.1599]), std=torch.Tensor([0.1060, 0.1060, 0.1071])) normalizer
Let’s see an example:
12)
torch.manual_seed(= torch.ones([3,2,2])
a a
tensor([[[1., 1.],
[1., 1.]],
[[1., 1.],
[1., 1.]],
[[1., 1.],
[1., 1.]]])
normalizer(a)
tensor([[[7.6396, 7.6396],
[7.6396, 7.6396]],
[[7.4745, 7.4745],
[7.4745, 7.4745]],
[[7.8441, 7.8441],
[7.8441, 7.8441]]])
which is the same for a[0,0,0] as:
1 - 0.1902) / 0.1060 (
7.639622641509434
Inverse normalizer:
= InverseNormalize(normalizer)
inv_normalizer inv_normalizer
InverseNormalize(mean=tensor([-1.7943, -1.9594, -1.4930]), std=tensor([9.4340, 9.4340, 9.3371]))
Let’s see an example:
12)
torch.manual_seed(= torch.randn(3,2,2)
a a
tensor([[[-0.2138, -1.3780],
[-0.0546, 0.4515]],
[[ 0.7858, -1.0884],
[-0.5599, -0.9336]],
[[ 0.0479, -0.0844],
[-0.1471, 0.7590]]])
inv_normalizer(normalizer(a))
tensor([[[-0.2138, -1.3780],
[-0.0546, 0.4515]],
[[ 0.7858, -1.0884],
[-0.5599, -0.9336]],
[[ 0.0479, -0.0844],
[-0.1471, 0.7590]]])
torch.allclose(inv_normalizer(normalizer(a)), a)
True
Masking via masked_fill
12)
torch.manual_seed(= 200
N = torch.rand(3,N,N)
a = torch.cat([torch.ones(N,N//2), torch.zeros(N,N//2)], dim=1).long()
b = plot_pil_images([ToPILImage()(b.float()), ToPILImage()(a), ToPILImage()(a.masked_fill_(b, 0))]) _
note that shape of a
is {{a.shape}} and shape of b
is {{b.shape}} so b
was broadcasted in a.masked_fill_(b, 0)
.
masked_fill
works similar in 4 dimensions:
12)
torch.manual_seed(= 300
N = torch.rand(2,3,N,N)
a = torch.cat([torch.ones(N,N//2), torch.zeros(N,N//2)], dim=1).long()
b1 = torch.cat([torch.ones(N,N//3), torch.zeros(N,2*N//3)], dim=1).long()
b2 = torch.stack([b1,b2])
b print(b.shape)
print(a.shape)
= b.unsqueeze(axis=1) # this adds channel dimension = 1 that is needed for masked_fill
b print(b.shape)
= a.masked_fill(b, 0)
c print(c.shape)
for i in range(2):
= plot_pil_images([ToPILImage()(a[i,:,:,:]),
_ float()),
ToPILImage()(b[i,:,:,:]. ToPILImage()(c[i,:,:,:])])
torch.Size([2, 300, 300])
torch.Size([2, 3, 300, 300])
torch.Size([2, 1, 300, 300])
torch.Size([2, 3, 300, 300])
Note that we had to add unsqueeze
for b
to achieve proper broadcasting.
Masked mean and stdev
This is rather useless since final normalization results tend to be similar. Keeping it here in case I need it in the future.
Sometimes we don’t want the mean of the whole image but only of the non-zero values. For that we need to find limit the array to non zero values first then find the mean and stdev. Let’s do this for one image then extend to 3 channels, and finally for a full batch:
= 10
n_images = 3
n_channels = torch.randn([n_images, n_channels, 100,100]) imgs
Let’s first flatten tensor:
= imgs.flatten(2,3)
flatten_imgs flatten_imgs.shape
torch.Size([10, 3, 10000])
For one image, one channel:
= flatten_imgs[0,0,:] array
= array[array > 0]
array array.mean(), array.std()
(tensor(0.7873), tensor(0.6029))
For one image, all three channels:
= flatten_imgs[0,:,:] array
= torch.zeros(n_channels)
means_per_channel = torch.zeros(n_channels)
stdevs_per_channel for i in range(n_channels):
= array[i, :]
array_per_channel = array_per_channel[array_per_channel>0]
array_per_channel = array_per_channel.mean()
means_per_channel[i] = array_per_channel.std()
stdevs_per_channel[i] means_per_channel, stdevs_per_channel
(tensor([0.7873, 0.8032, 0.7925]), tensor([0.6029, 0.5983, 0.5995]))
And now for the whole tensor:
= torch.zeros(n_images, n_channels)
means_per_image_per_channel = torch.zeros(n_images, n_channels)
stdevs_per_image_per_channel for i in range(n_images):
for j in range(n_channels):
= flatten_imgs[i, j, :]
array_per_image_per_channel = array_per_image_per_channel[array_per_image_per_channel>0]
array_per_image_per_channel = array_per_image_per_channel.mean()
means_per_image_per_channel[i,j] = array_per_image_per_channel.std()
stdevs_per_image_per_channel[i,j] means_per_image_per_channel, stdevs_per_image_per_channel
(tensor([[0.7873, 0.8032, 0.7925],
[0.7860, 0.7971, 0.8038],
[0.7988, 0.8052, 0.8056],
[0.7989, 0.8025, 0.7976],
[0.7914, 0.7798, 0.7897],
[0.7992, 0.8158, 0.7668],
[0.8038, 0.7937, 0.7977],
[0.7868, 0.7956, 0.8038],
[0.7891, 0.8032, 0.7991],
[0.7872, 0.8016, 0.8135]]),
tensor([[0.6029, 0.5983, 0.5995],
[0.5981, 0.5994, 0.6075],
[0.6066, 0.6124, 0.6055],
[0.6031, 0.5922, 0.6137],
[0.5902, 0.6009, 0.6004],
[0.6131, 0.6234, 0.5902],
[0.6138, 0.6055, 0.6082],
[0.5974, 0.5982, 0.6019],
[0.6044, 0.6084, 0.6079],
[0.5932, 0.6091, 0.6187]]))