一文概览五大深度学习模型:RNN、CNN、Transformer、BERT、GPT,解析其关键技术、数据处理及应用场景。
原文标题:一网打尽!5大深度学习模型!RNN、CNN、Transformer、BERT、GPT
原文作者:数据派THU
冷月清谈:
怜星夜思:
2、文章提到了各种模型的经典案例,如果让你用这些模型解决一个你感兴趣的现实问题,你会选择哪个模型,为什么?
3、这些模型都是基于大量数据训练的,那么在数据量不足的情况下,有没有什么方法可以提高模型的性能?
原文内容
本文约3600字,建议阅读5分钟
本文介绍了5大深度学习模型。
深度学习,在人工智能领域不断取得了发展成就。其中,RNN、CNN、Transformer、BERT以及GPT五种深度学习模型,凭借其独特的优势,在计算机视觉、自然语言处理等诸多领域实现了重要突破。
import torchimport torch.nn as nnimport torch.optim as optimfrom torchtext.legacy import data, datasetsfrom torchtext.legacy import Field# 定义文本字段和标签字段TEXT = Field(tokenize='spacy', lower=True)LABEL = Field(sequential=False, use_vocab=False)# 定义数据集和迭代器train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)train_iterator, test_iterator = data.BucketIterator.splits( (train_data, test_data), batch_size=64, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))# 加载预训练词向量TEXT.build_vocab(train_data, max_size=10000, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_)class RNN(nn.Module):def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim): super().__init__() self.embedding = nn.Embedding(input_dim, embedding_dim) self.rnn = nn.RNN(embedding_dim, hidden_dim) self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, text):# 将文本转化为词嵌入 embedded = self.embedding(text)# 对词嵌入应用RNN output, hidden = self.rnn(embedded)# 取RNN的最后一个输出assert torch.equal(output[-1,:,:], hidden.squeeze(0))# 通过全连接层进行分类return self.fc(hidden.squeeze(0))INPUT_DIM = len(TEXT.vocab)EMBEDDING_DIM = 100HIDDEN_DIM = 256OUTPUT_DIM = 1model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)predictor = model.to(device)optimizer = optim.Adam(predictor.parameters())criterion = nn.BCEWithLogitsLoss()def train(model, iterator, optimizer, criterion): model.train() epoch_loss = 0for batch in iterator: text, labels = batch.text.to(device), batch.label.float().unsqueeze(1).to(device) optimizer.zero_grad() predictions = model(text).squeeze(1) loss = criterion(predictions, labels) loss.backward() optimizer.step() epoch_loss += loss.item()return epoch_loss / len(iterator)N_EPOCHS = 5for epoch in range(N_EPOCHS): train_loss = train(predictor, train_iterator, optimizer, criterion) print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}')
Python# 导入所需的库 import numpy as np from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Conv2D, MaxPooling2D from keras.layers import Activation, Dropout, Flatten, Dense from keras import backend as K
图像的尺寸
img_width, img_height = 150, 150
设定训练数据和验证数据的路径
train_data_dir = ‘data/train’
validation_data_dir = ‘data/validation’
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16if K.image_data_format() == ‘channels_first’:
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)构建CNN模型
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation(‘relu’))
model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Conv2D(32, (3, 3)))
model.add(Activation(‘relu’))
model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Conv2D(64, (3, 3)))
model.add(Activation(‘relu’))
model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Flatten()) # 将3D特征图展平为1D特征向量
model.add(Dense(64))
model.add(Activation(‘relu’))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation(‘sigmoid’)) # 二分类问题使用sigmoid激活函数编译模型
model.compile(loss=‘binary_crossentropy’,
optimizer=‘rmsprop’,
metrics=[‘accuracy’])数据增强,增加模型的泛化能力
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=‘binary’)validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=‘binary’)训练模型
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)评估模型
score = model.evaluate_generator(validation_generator, nb_validation_samples // batch_size)
print(‘Test loss:’, score[0])
print(‘Test accuracy:’, score[1])
Python
from transformers import GPT2LMHeadModel, GPT2Tokenizer
加载预训练的模型和分词器
model_name = “gpt2-medium”
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)输入的文本
input_text = “The quick brown fox”
对输入文本进行编码
input_ids = tokenizer.encode(input_text, return_tensors=“pt”)
生成文本
generated = model.generate(input_ids, max_length=50, num_return_sequences=1)
解码生成的文本
output_text = tokenizer.decode(generated[0], skip_special_tokens=True)
print(output_text)
Python
import torch from transformers import BertTokenizer, BertForMaskedLM
初始化BERT模型和分词器
tokenizer = BertTokenizer.from_pretrained(‘bert-base-uncased’)
model = BertForMaskedLM.from_pretrained(‘bert-base-uncased’)待生成文本的句子
sentence = “BERT is a powerful NLP model that can be used for a wide range of tasks, including text generation. It is based on the Transformer architecture and has been pre-trained on a large corpus of text.”
对句子进行分词和编码
input_ids = torch.tensor([tokenizer.encode(sentence, add_special_tokens=True)])
选择需要生成文本的位置,此处我们假设需要生成一个词替换句子中的"[MASK]"
masked_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
使用BERT模型进行预测
outputs = model(input_ids)
predictions = outputs[0]获取预测结果中概率最高的词
predicted_token = tokenizer.convert_ids_to_tokens(torch.argmax(predictions[0, masked_index], dim=-1).tolist())
输出预测结果
print(f"Predicted token: {predicted_token}")
from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch
初始化tokenizer和model
tokenizer = GPT2Tokenizer.from_pretrained(‘gpt2-medium’)
model = GPT2LMHeadModel.from_pretrained(‘gpt2-medium’)定义想要开始文本生成的内容,并转换为token IDs
context = “人工智能的发展给社会带来了巨大变革,”
input_ids = tokenizer.encode(context, return_tensors=‘pt’)设置生成文本的长度
length = 100
设置为评估模式
model.eval()
生成文本
with torch.no_grad():
output = model.generate(input_ids, max_length=length+len(input_ids[0]), pad_token_id=tokenizer.eos_token_id)将生成的token IDs转换回文本
generated_text = tokenizer.decode(output[0][len(input_ids[0]):], skip_special_tokens=True)
打印生成的文本
print(generated_text)