本文介绍了十个实用的Python数据增强库,提升模型性能至关重要。
原文标题:用于数据增强的十个Python库
原文作者:数据派THU
冷月清谈:
怜星夜思:
2、数据增强对模型的影响到底有多大?有没有具体例子?
3、有哪些实际场景是需要用到数据增强的?
原文内容
来源:DeepHub IMBA本文约3800字,建议阅读10分钟本文将介绍数据增强的十个Python库,并为每个库提供代码片段和解释。
Augmentor
import Augmentor
p = Augmentor.Pipeline(“path/to/your/images”)
p.rotate(probability=0.7, max_left_rotation=25, max_right_rotation=25)
p.flip_left_right(probability=0.5)
p.sample(100)
Albumentations
import albumentations as A
transform = A.Compose([
A.RandomRotate90(),
A.HorizontalFlip(),
A.RandomBrightnessContrast(),
])
augmented_image = transform(image=image)[“image”]
Imgaug
import imgaug.augmenters as iaa
augmenter = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 2.0))),
iaa.ContrastNormalization((0.5, 2.0)),
])
augmented_image = augmenter.augment_image(image)
nlpaug
import nlpaug.augmenter.word as naw
aug = naw.ContextualWordEmbsAug(model_path=‘bert-base-uncased’, action=“insert”)
augmented_text = aug.augment(“This is a sample text.”)
imgaugment
from imgaug import augmenters as iaa
seq = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 2.0))),
iaa.ContrastNormalization((0.5, 2.0)),
])
augmented_image = seq(image=image)
TextAttack
from textattack.augmentation import WordNetAugmenter
augmenter = WordNetAugmenter()
augmented_text = augmenter.augment(“The quick brown fox”)
TAAE
from taae import SynonymAugmenter
augmenter = SynonymAugmenter()
augmented_text = augmenter.augment(“This is a test sentence.”)
Audiomentations
import audiomentations as A
augmenter = A.Compose([
A.PitchShift(),
A.TimeStretch(),
A.AddBackgroundNoise(),
])
augmented_audio = augmenter(samples=audio_data, sample_rate=sample_rate)
ImageDataAugmentor
from ImageDataAugmentor.image_data_augmentor import * import tensorflow as tf
datagen = ImageDataAugmentor(
augment=augmentor,
preprocess_input=None,
)
train_generator = datagen.flow_from_directory(“data/train”, batch_size=32, class_mode=“binary”)
Keras ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode=“nearest”,
)
augmented_images = datagen.flow_from_directory(“data/train”, batch_size=32)


