Convert and scale a PIL Image to an Image in PyTorch

Buy Me a Coffee☕ Memos: My post explains ToImage(). My post explains OxfordIIITPet(). Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=True)]) can convert a PIL(Pillow library) Image, tensor or ndarray to an Image and scale the values as shown below: Memos: ToTensor() can convert a PIL Image or ndarray to a tensor and scale the values but it's deprecated so Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=True)]) should be used according to the doc. v2 is recommended to use according to V1 or V2? Which one should I use?. from torchvision.datasets import OxfordIIITPet from torchvision.transforms.v2 import Compose, ToImage, ToDtype import torch import numpy as np origin_data = OxfordIIITPet( # It's PIL Images. root="data", transform=None ) ImageScaleTrue_data = OxfordIIITPet( root="data", transform=Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=True)]) ) ImageScaleFalse_data = OxfordIIITPet( root="data", transform=Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=False)]) ) ImageScaleTrue_data # Dataset OxfordIIITPet # Number of datapoints: 3680 # Root location: data # StandardTransform # Transform: Compose( # ToImage() # ToDtype(scale=True) # ) ImageScaleTrue_data[0] # (Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],), 0) ImageScaleTrue_data[0][0].size() # torchtorch.Size([3, 500, 394]) ImageScaleTrue_data[0][0] # Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],) ImageScaleTrue_data[0][1] # 0 import matplotlib.pyplot as plt plt.imshow(X=ImageScaleTrue_data[0][0]) # TypeError: Invalid shape (3, 500, 394) for image data c = Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=True)]) c(origin_data[0][0]) # Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],) plt.imshow(c(origin_data[0][0])) # TypeError: Invalid shape (3, 500, 394) for image data c(torch.tensor([[0, 1, 2]])) # int64 c(torch.tensor([[0, 1, 2]], dtype=torch.int64)) c(torch.tensor([[[0, 1, 2]]])) # int64 # Image([[[0.0000e+00, 1.0842e-19, 2.1684e-19]]],) c(torch.tensor([[[[0, 1, 2]]]])) # int64 # Image([[[[0.0000e+00, 1.0842e-19, 2.1684e-19]]]],) c(torch.tensor([[[[[0, 1, 2]]]]])) # int64 # Image([[[[[0.0000e+00, 1.0842e-19, 2.1684e-19]]]]],) c(torch.tensor([[0, 1, 2]], dtype=torch.int32)) # Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10]]],) c(torch.tensor([[0., 1., 2.]])) # float32 c(torch.tensor([[0., 1., 2.]], dtype=torch.float32)) c(torch.tensor([[0., 1., 2.]], dtype=torch.float64)) c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex64 c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex64)) c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex32)) # Image([[[0., 1., 2.]]],) c(torch.tensor([[True, False, True]])) # bool c(torch.tensor([[True, False, True]], dtype=torch.bool)) # Image([[[1., 0., 1.]]],) c(np.array(2)) # int32 c(np.array(2, dtype=np.int32)) # Image([[[9.3132e-10]]],) c(np.array([0, 1, 2])) # int32 c(np.array([[0, 1, 2]])) # int32 # Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10]]],) c(np.array([[[0, 1, 2]]])) # int32 # Image([[[0.0000e+00]

Apr 20, 2025 - 21:45

Convert and scale a PIL Image to an Image in PyTorch

*Memos:

My post explains ToImage().
My post explains OxfordIIITPet().

Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=True)]) can convert a PIL(Pillow library) Image, tensor or ndarray to an Image and scale the values as shown below:
*Memos:

ToTensor() can convert a PIL Image or ndarray to a tensor and scale the values but it's deprecated so Compose(transforms=[ToImage(), ToDtype(torch.float32, scale=True)]) should be used according to the doc.
v2 is recommended to use according to V1 or V2? Which one should I use?.

from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import Compose, ToImage, ToDtype
import torch
import numpy as np

origin_data = OxfordIIITPet( # It's PIL Images. 
    root="data",
    transform=None
)

ImageScaleTrue_data = OxfordIIITPet(
    root="data",
    transform=Compose(transforms=[ToImage(),
                      ToDtype(torch.float32, scale=True)])
)

ImageScaleFalse_data = OxfordIIITPet(
    root="data",
    transform=Compose(transforms=[ToImage(),
                      ToDtype(torch.float32, scale=False)])
)

ImageScaleTrue_data
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data
#     StandardTransform
# Transform: Compose(
#                  ToImage()
#                  ToDtype(scale=True)
#            )

ImageScaleTrue_data[0]
# (Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#          [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#          ...,
#          [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]],
#         [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725],
#          [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725],
#          ...,
#          [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]],
#         [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922],
#          [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922],
#          ...,
#          [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],), 0)

ImageScaleTrue_data[0][0].size()
# torchtorch.Size([3, 500, 394])

ImageScaleTrue_data[0][0]
# Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#         [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#         ...,
#         [0.1098, 0.1098, 0.1059,  ..., 0.2314, 0.2549, 0.2980]],
#        [[0.0784, 0.0706, 0.0745,  ..., 0.9725, 0.9725, 0.9725],
#         [0.0706, 0.0706, 0.0784,  ..., 0.9686, 0.9686, 0.9725],
#         ...,
#         [0.1059, 0.1059, 0.1059,  ..., 0.3686, 0.4157, 0.4588]],
#        [[0.0471, 0.0392, 0.0431,  ..., 0.9922, 0.9922, 0.9922],
#         [0.0392, 0.0392, 0.0471,  ..., 0.9843, 0.9882, 0.9922],
#         ...,
#         [0.1373, 0.1373, 0.1373,  ..., 0.8392, 0.9098, 0.8745]]],)

ImageScaleTrue_data[0][1]
# 0

import matplotlib.pyplot as plt

plt.imshow(X=ImageScaleTrue_data[0][0])
# TypeError: Invalid shape (3, 500, 394) for image data

c = Compose(transforms=[ToImage(),
                        ToDtype(torch.float32, scale=True)])
c(origin_data[0][0])
# Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#         [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#         ...,
#         [0.1098, 0.1098, 0.1059,  ..., 0.2314, 0.2549, 0.2980]],
#        [[0.0784, 0.0706, 0.0745,  ..., 0.9725, 0.9725, 0.9725],
#         [0.0706, 0.0706, 0.0784,  ..., 0.9686, 0.9686, 0.9725],
#         ...,
#         [0.1059, 0.1059, 0.1059,  ..., 0.3686, 0.4157, 0.4588]],
#        [[0.0471, 0.0392, 0.0431,  ..., 0.9922, 0.9922, 0.9922],
#         [0.0392, 0.0392, 0.0471,  ..., 0.9843, 0.9882, 0.9922],
#         ...,
#         [0.1373, 0.1373, 0.1373,  ..., 0.8392, 0.9098, 0.8745]]],)

plt.imshow(c(origin_data[0][0]))
# TypeError: Invalid shape (3, 500, 394) for image data

c(torch.tensor([[0, 1, 2]])) # int64
c(torch.tensor([[0, 1, 2]], dtype=torch.int64))
c(torch.tensor([[[0, 1, 2]]])) # int64
# Image([[[0.0000e+00, 1.0842e-19, 2.1684e-19]]],)

c(torch.tensor([[[[0, 1, 2]]]])) # int64
# Image([[[[0.0000e+00, 1.0842e-19, 2.1684e-19]]]],)

c(torch.tensor([[[[[0, 1, 2]]]]])) # int64
# Image([[[[[0.0000e+00, 1.0842e-19, 2.1684e-19]]]]],)

c(torch.tensor([[0, 1, 2]], dtype=torch.int32))
# Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10]]],)

c(torch.tensor([[0., 1., 2.]])) # float32
c(torch.tensor([[0., 1., 2.]], dtype=torch.float32))
c(torch.tensor([[0., 1., 2.]], dtype=torch.float64))
c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex64
c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex64))
c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex32))
# Image([[[0., 1., 2.]]],)

c(torch.tensor([[True, False, True]])) # bool
c(torch.tensor([[True, False, True]], dtype=torch.bool))
# Image([[[1., 0., 1.]]],)

c(np.array(2)) # int32
c(np.array(2, dtype=np.int32))
# Image([[[9.3132e-10]]],)

c(np.array([0, 1, 2])) # int32
c(np.array([[0, 1, 2]])) # int32
# Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10]]],)

c(np.array([[[0, 1, 2]]])) # int32
# Image([[[0.0000e+00]], [[4.6566e-10]], [[9.3132e-10]]],)

c(np.array([[0, 1, 2]], dtype=np.int64))
# Image([[[0.0000e+00, 1.0842e-19, 2.1684e-19]]],)

c(np.array([[0., 1., 2.]])) # float64
c(np.array([[0., 1., 2.]], dtype=np.float64))
c(np.array([[0., 1., 2.]], dtype=np.float32))
c(np.array([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex128
c(np.array([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=np.complex128))
c(np.array([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=np.complex64))
# Image([[[0., 1., 2.]]],)

c(np.array([[True, False, True]])) # bool
c(np.array([[True, False, True]], dtype=bool))
# Image([[[1., 0., 1.]]],)

ImageScaleFalse_data
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data
#     StandardTransform
# Transform: Compose(
#                  ToImage()
#                  ToDtype(scale=False)
#            )

ImageScaleFalse_data[0]
# (Image([[[37., 35., 36., ..., 247., 249., 249.],
#          [35., 35., 37., ..., 246., 248., 249.],
#          ...,
#          [28., 28., 27., ..., 59., 65., 76.]],
#         [[20., 18., 19., ..., 248., 248., 248.],
#          [18., 18., 20., ..., 247., 247., 248.],
#          ...,
#          [27., 27., 27., ..., 94., 106., 117.]],
#         [[12., 10., 11., ..., 253., 253., 253.],
#          [10., 10., 12., ..., 251., 252., 253.],
#          ...,
#          [35., 35., 35., ..., 214., 232., 223.]]],), 0)

ImageScaleFalse_data[0][0].size()
# torch.Size([3, 500, 394])

ImageScaleFalse_data[0][0]
# Image([[[37., 35., 36., ..., 247., 249., 249.],
#         [35., 35., 37., ..., 246., 248., 249.],
#         ...,
#         [28., 28., 27., ..., 59., 65., 76.]],
#        [[20., 18., 19., ..., 248., 248., 248.],
#         [18., 18., 20., ..., 247., 247., 248.],
#         ...,
#         [27., 27., 27., ..., 94., 106., 117.]],
#        [[12., 10., 11., ..., 253., 253., 253.],
#         [10., 10., 12., ..., 251., 252., 253.],
#         ...,
#         [35., 35., 35., ..., 214., 232., 223.]]],)

ImageScaleFalse_data[0][1]
# 0

plt.imshow(X=ImageScaleFalse_data[0][0])
# TypeError: Invalid shape (3, 500, 394) for image data

c = Compose(transforms=[ToImage(),
                        ToDtype(torch.float32, scale=False)])
c(origin_data[0][0])
# Image([[[37., 35., 36., ..., 247., 249., 249.],
#         [35., 35., 37., ..., 246., 248., 249.],
#         ...,
#         [28., 28., 27., ..., 59., 65., 76.]],
#        [[20., 18., 19., ..., 248., 248., 248.],
#         [18., 18., 20., ..., 247., 247., 248.],
#         ...,
#         [27., 27., 27., ..., 94., 106., 117.]],
#        [[12., 10., 11., ..., 253., 253., 253.],
#         [10., 10., 12., ..., 251., 252., 253.],
#         ...,
#         [35., 35., 35., ..., 214., 232., 223.]]],)

plt.imshow(c(origin_data[0][0]))
# TypeError: Invalid shape (3, 500, 394) for image data

c(torch.tensor([[0, 1, 2]])) # int64
c(torch.tensor([[0, 1, 2]], dtype=torch.int64))
c(torch.tensor([[0, 1, 2]], dtype=torch.int32))
c(torch.tensor([[0., 1., 2.]])) # float32
c(torch.tensor([[0., 1., 2.]], dtype=torch.float32))
c(torch.tensor([[0., 1., 2.]], dtype=torch.float64))
c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex64
c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex64))
c(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex32))
# Image([[[0., 1., 2.]]],)

c(torch.tensor([[[[0, 1, 2]]]])) # int64
# Image([[[[0., 1., 2.]]]],)

c(torch.tensor([[[[[0, 1, 2]]]]])) # int64
# Image([[[[[0., 1., 2.]]]]],)

c(torch.tensor([[True, False, True]])) # bool
c(torch.tensor([[True, False, True]], dtype=torch.bool))
# Image([[[1., 0., 1.]]],)

c(np.array(2)) # int32
c(np.array(2, dtype=np.int32))
# Image([[[2.]]],)

c(np.array([0, 1, 2])) # int32
c(np.array([[0, 1, 2]])) # int32
c(np.array([[0, 1, 2]], dtype=np.int64))
c(np.array([[0., 1., 2.]])) # float64
c(np.array([[0., 1., 2.]], dtype=np.float64))
c(np.array([[0., 1., 2.]], dtype=np.float32))
c(np.array([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex128
c(np.array([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=np.complex128))
c(np.array([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=np.complex64))
# Image([[[0., 1., 2.]]],)

c(np.array([[[0, 1, 2]]])) # int32
# Image([[[0.]], [[1.]], [[2.]]],)

c(np.array([[True, False, True]])) # bool
c(np.array([[True, False, True]], dtype=bool))
# Image([[[1., 0., 1.]]],)