AutoAugment in PyTorch

Buy Me a Coffee☕ *Memos: My post explains OxfordIIITPet(). AutoAugment() can randomly augment an image with AutoAugmentPolicy as shown below: *Memos: The 1st argument for initialization is policy(Optional-Default:AutoAugmentPolicy.IMAGENET-Type:AutoAugmentPolicy). *AutoAugmentPolicy.IMAGENET, AutoAugmentPolicy.CIFAR10 or AutoAugmentPolicy.SVHN can be set to it. The 2nd argument for initialization is interpolation(Optional-Default:InterpolationMode.NEAREST-Type:InterpolationMode). *If the input is a tensor, only InterpolationMode.NEAREST, InterpolationMode.BILINEAR can be set to it. The 3rd argument for initialization is fill(Optional-Default:0-Type:int, float or tuple/list(int or float)): *Memos: It can change the background of an image. A tuple/list must be the 1D with 1 or 3 elements. The 1st argument is img(Required-Type:PIL Image or tensor(int)): *Memos: A tensor must be 2D or 3D. Don't use img=. v2 is recommended to use according to V1 or V2? Which one should I use?. from torchvision.datasets import OxfordIIITPet from torchvision.transforms.v2 import AutoAugment from torchvision.transforms.v2 import AutoAugmentPolicy from torchvision.transforms.functional import InterpolationMode aa = AutoAugment() aa = AutoAugment(policy=AutoAugmentPolicy.IMAGENET, interpolation = InterpolationMode.NEAREST, fill=None) aa # AutoAugment(interpolation=InterpolationMode.NEAREST, # policy=AutoAugmentPolicy.IMAGENET) aa.policy # aa.interpolation # print(aa.fill) # None origin_data = OxfordIIITPet( root="data", transform=None ) pIMAGENET_data = OxfordIIITPet( # `p` is policy. root="data", transform=AutoAugment(policy=AutoAugmentPolicy.IMAGENET) ) pCIFAR10_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.CIFAR10) ) pSVHN_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.SVHN) ) pIMAGENETf150_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.IMAGENET, fill=150) ) pIMAGENETf160_32_240_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.IMAGENET, fill=[160, 32, 240]) ) pCIFAR10f150_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.CIFAR10, fill=150) ) pCIFAR10f160_32_240_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.CIFAR10, fill=[160, 32, 240]) ) pSVHNf150_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.SVHN, fill=150) ) pSVHNf160_32_240_data = OxfordIIITPet( root="data", transform=AutoAugment(policy=AutoAugmentPolicy.SVHN, fill=[160, 32, 240]) ) import matplotlib.pyplot as plt def show_images1(data, main_title=None): plt.figure(figsize=[10, 5]) plt.suptitle(t=main_title, y=0.8, fontsize=14) for i, (im, _) in zip(range(1, 6), data): plt.subplot(1, 5, i) plt.imshow(X=im) plt.xticks(ticks=[]) plt.yticks(ticks=[]) plt.tight_layout() plt.show() show_images1(data=origin_data, main_title="origin_data") print() show_images1(data=pIMAGENET_data, main_title="pIMAGENET_data") show_images1(data=pIMAGENET_data, main_title="pIMAGENET_data") show_images1(data=pIMAGENET_data, main_title="pIMAGENET_data") print() show_images1(data=pCIFAR10_data, main_title="pCIFAR10_data") show_images1(data=pCIFAR10_data, main_title="pCIFAR10_data") show_images1(data=pCIFAR10_data, main_title="pCIFAR10_data") print() show_images1(data=pSVHN_data, main_title="pSVHN_data") show_images1(data=pSVHN_data, main_title="pSVHN_data") show_images1(data=pSVHN_data, main_title="pSVHN_data") print() show_images1(data=pIMAGENETf150_data, main_title="pIMAGENETf150_data") show_images1(data=pIMAGENETf160_32_240_data, main_title="pIMAGENETf160_32_240_data") print() show_images1(data=pCIFAR10f150_data, main_title="pCIFAR10f150_data") show_images1(data=pCIFAR10f160_32_240_data, main_title="pCIFAR10f160_32_240_data") print() show_images1(data=pSVHNf150_data, main_title="pSVHNf150_data") show_images1(data=pSVHNf160_32_240_data, main_title="pSVHNf160_32_240_data") # ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓ def show_images2(data, main_title=None, p=None, ip=InterpolationMode.NEAREST, f=None): plt.figure(figsize=[10, 5]) plt.suptitle(t=main_title, y=0.8, fontsize=14) if p != None: for i, (im, _) in zip(range(1, 6), data): plt.subplot(1, 5, i) aa = AutoAugment(policy=p, interpolation=ip, fill=f) plt.imshow(X=aa(im)) plt.xticks(ticks=[]) plt.yticks(ticks=[]) else: for i, (im, _) in zip(range(1, 6), data): plt.subplot(1, 5, i) plt.imshow(X=im)

Feb 21, 2025 - 01:39
 0
AutoAugment in PyTorch

Buy Me a Coffee

*Memos:

AutoAugment() can randomly augment an image with AutoAugmentPolicy as shown below:

*Memos:

  • The 1st argument for initialization is policy(Optional-Default:AutoAugmentPolicy.IMAGENET-Type:AutoAugmentPolicy). *AutoAugmentPolicy.IMAGENET, AutoAugmentPolicy.CIFAR10 or AutoAugmentPolicy.SVHN can be set to it.
  • The 2nd argument for initialization is interpolation(Optional-Default:InterpolationMode.NEAREST-Type:InterpolationMode). *If the input is a tensor, only InterpolationMode.NEAREST, InterpolationMode.BILINEAR can be set to it.
  • The 3rd argument for initialization is fill(Optional-Default:0-Type:int, float or tuple/list(int or float)): *Memos:
    • It can change the background of an image.
    • A tuple/list must be the 1D with 1 or 3 elements.
  • The 1st argument is img(Required-Type:PIL Image or tensor(int)): *Memos:
    • A tensor must be 2D or 3D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import AutoAugment
from torchvision.transforms.v2 import AutoAugmentPolicy
from torchvision.transforms.functional import InterpolationMode

aa = AutoAugment()
aa = AutoAugment(policy=AutoAugmentPolicy.IMAGENET,
                 interpolation = InterpolationMode.NEAREST,
                 fill=None)
aa
# AutoAugment(interpolation=InterpolationMode.NEAREST,
#             policy=AutoAugmentPolicy.IMAGENET)

aa.policy
# 

aa.interpolation
# 

print(aa.fill)
# None

origin_data = OxfordIIITPet(
    root="data",
    transform=None
)

pIMAGENET_data = OxfordIIITPet( # `p` is policy.
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.IMAGENET)
)

pCIFAR10_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.CIFAR10)
)

pSVHN_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.SVHN)
)

pIMAGENETf150_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.IMAGENET, fill=150)
)

pIMAGENETf160_32_240_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.IMAGENET,
                          fill=[160, 32, 240])
)

pCIFAR10f150_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.CIFAR10, fill=150)
)

pCIFAR10f160_32_240_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.CIFAR10,
                          fill=[160, 32, 240])
)

pSVHNf150_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.SVHN, fill=150)
)

pSVHNf160_32_240_data = OxfordIIITPet(
    root="data",
    transform=AutoAugment(policy=AutoAugmentPolicy.SVHN,
                          fill=[160, 32, 240])
)

import matplotlib.pyplot as plt

def show_images1(data, main_title=None):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        plt.imshow(X=im)
        plt.xticks(ticks=[])
        plt.yticks(ticks=[])
    plt.tight_layout()
    plt.show()

show_images1(data=origin_data, main_title="origin_data")
print()
show_images1(data=pIMAGENET_data, main_title="pIMAGENET_data")
show_images1(data=pIMAGENET_data, main_title="pIMAGENET_data")
show_images1(data=pIMAGENET_data, main_title="pIMAGENET_data")
print()
show_images1(data=pCIFAR10_data, main_title="pCIFAR10_data")
show_images1(data=pCIFAR10_data, main_title="pCIFAR10_data")
show_images1(data=pCIFAR10_data, main_title="pCIFAR10_data")
print()
show_images1(data=pSVHN_data, main_title="pSVHN_data")
show_images1(data=pSVHN_data, main_title="pSVHN_data")
show_images1(data=pSVHN_data, main_title="pSVHN_data")
print()
show_images1(data=pIMAGENETf150_data, main_title="pIMAGENETf150_data")
show_images1(data=pIMAGENETf160_32_240_data,
             main_title="pIMAGENETf160_32_240_data")
print()
show_images1(data=pCIFAR10f150_data, main_title="pCIFAR10f150_data")
show_images1(data=pCIFAR10f160_32_240_data,
             main_title="pCIFAR10f160_32_240_data")
print()
show_images1(data=pSVHNf150_data, main_title="pSVHNf150_data")
show_images1(data=pSVHNf160_32_240_data,
             main_title="pSVHNf160_32_240_data")

# ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓
def show_images2(data, main_title=None, p=None,
                 ip=InterpolationMode.NEAREST,
                 f=None):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    if p != None:
        for i, (im, _) in zip(range(1, 6), data):
            plt.subplot(1, 5, i)
            aa = AutoAugment(policy=p, interpolation=ip, fill=f)
            plt.imshow(X=aa(im))
            plt.xticks(ticks=[])
            plt.yticks(ticks=[])
    else:
        for i, (im, _) in zip(range(1, 6), data):
            plt.subplot(1, 5, i)
            plt.imshow(X=im)
            plt.xticks(ticks=[])
            plt.yticks(ticks=[])
    plt.tight_layout()
    plt.show()

show_images2(data=origin_data, main_title="origin_data")
print()
show_images2(data=origin_data, main_title="pIMAGENET_data", 
             p=AutoAugmentPolicy.IMAGENET)
show_images2(data=origin_data, main_title="pIMAGENET_data", 
             p=AutoAugmentPolicy.IMAGENET)
show_images2(data=origin_data, main_title="pIMAGENET_data", 
             p=AutoAugmentPolicy.IMAGENET)
print()
show_images2(data=origin_data, main_title="pCIFAR10_data", 
             p=AutoAugmentPolicy.CIFAR10)
show_images2(data=origin_data, main_title="pCIFAR10_data", 
             p=AutoAugmentPolicy.CIFAR10)
show_images2(data=origin_data, main_title="pCIFAR10_data", 
             p=AutoAugmentPolicy.CIFAR10)
print()
show_images2(data=origin_data, main_title="pSVHN_data", 
             p=AutoAugmentPolicy.SVHN)
show_images2(data=origin_data, main_title="pSVHN_data", 
             p=AutoAugmentPolicy.SVHN)
show_images2(data=origin_data, main_title="pSVHN_data", 
             p=AutoAugmentPolicy.SVHN)
print()
show_images2(data=origin_data, main_title="pIMAGENETf150_data", 
             p=AutoAugmentPolicy.IMAGENET, f=150)
show_images2(data=origin_data, main_title="pIMAGENETf160_32_240_data", 
             p=AutoAugmentPolicy.IMAGENET, f=[160, 32, 240])
print()
show_images2(data=origin_data, main_title="pCIFAR10f150_data", 
             p=AutoAugmentPolicy.CIFAR10, f=150)
show_images2(data=origin_data, main_title="pCIFAR10f160_32_240_data", 
             p=AutoAugmentPolicy.CIFAR10, f=[160, 32, 240])
print()
show_images2(data=origin_data, main_title="pSVHNf150_data", 
             p=AutoAugmentPolicy.SVHN, f=150)
show_images2(data=origin_data, main_title="pSVHNf160_32_240_data", 
             p=AutoAugmentPolicy.SVHN, f=[160, 32, 240])

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description