一文概览五大深度学习模型:RNN、CNN、Transformer、BERT、GPT,解析其关键技术、数据处理及应用场景。
原文标题:一网打尽!5大深度学习模型!RNN、CNN、Transformer、BERT、GPT
原文作者:数据派THU
冷月清谈:
怜星夜思:
2、文章提到了各种模型的经典案例,如果让你用这些模型解决一个你感兴趣的现实问题,你会选择哪个模型,为什么?
3、这些模型都是基于大量数据训练的,那么在数据量不足的情况下,有没有什么方法可以提高模型的性能?
原文内容
本文约3600字,建议阅读5分钟
本文介绍了5大深度学习模型。
深度学习,在人工智能领域不断取得了发展成就。其中,RNN、CNN、Transformer、BERT以及GPT五种深度学习模型,凭借其独特的优势,在计算机视觉、自然语言处理等诸多领域实现了重要突破。
import torchimport torch.nn as nnimport torch.optim as optimfrom torchtext.legacy import data, datasetsfrom torchtext.legacy import Field# 定义文本字段和标签字段TEXT = Field(tokenize='spacy', lower=True)LABEL = Field(sequential=False, use_vocab=False)# 定义数据集和迭代器train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)train_iterator, test_iterator = data.BucketIterator.splits( (train_data, test_data), batch_size=64, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))# 加载预训练词向量TEXT.build_vocab(train_data, max_size=10000, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_)class RNN(nn.Module):def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim): super().__init__() self.embedding = nn.Embedding(input_dim, embedding_dim) self.rnn = nn.RNN(embedding_dim, hidden_dim) self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, text):# 将文本转化为词嵌入 embedded = self.embedding(text)# 对词嵌入应用RNN output, hidden = self.rnn(embedded)# 取RNN的最后一个输出assert torch.equal(output[-1,:,:], hidden.squeeze(0))# 通过全连接层进行分类return self.fc(hidden.squeeze(0))INPUT_DIM = len(TEXT.vocab)EMBEDDING_DIM = 100HIDDEN_DIM = 256OUTPUT_DIM = 1model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)predictor = model.to(device)optimizer = optim.Adam(predictor.parameters())criterion = nn.BCEWithLogitsLoss()def train(model, iterator, optimizer, criterion): model.train() epoch_loss = 0for batch in iterator: text, labels = batch.text.to(device), batch.label.float().unsqueeze(1).to(device) optimizer.zero_grad() predictions = model(text).squeeze(1) loss = criterion(predictions, labels) loss.backward() optimizer.step() epoch_loss += loss.item()return epoch_loss / len(iterator)N_EPOCHS = 5for epoch in range(N_EPOCHS): train_loss = train(predictor, train_iterator, optimizer, criterion) print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}')
Python# 导入所需的库 import numpy as np from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Conv2D, MaxPooling2D from keras.layers import Activation, Dropout, Flatten, Dense from keras import backend as K图像的尺寸
img_width, img_height = 150, 150
设定训练数据和验证数据的路径
train_data_dir = ‘data/train’
validation_data_dir = ‘data/validation’
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16if K.image_data_format() == ‘channels_first’:
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)构建CNN模型
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation(‘relu’))
model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Conv2D(32, (3, 3)))
model.add(Activation(‘relu’))
model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Conv2D(64, (3, 3)))
model.add(Activation(‘relu’))
model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Flatten()) # 将3D特征图展平为1D特征向量
model.add(Dense(64))
model.add(Activation(‘relu’))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation(‘sigmoid’)) # 二分类问题使用sigmoid激活函数编译模型
model.compile(loss=‘binary_crossentropy’,
optimizer=‘rmsprop’,
metrics=[‘accuracy’])数据增强,增加模型的泛化能力
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=‘binary’)validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=‘binary’)训练模型
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)评估模型
score = model.evaluate_generator(validation_generator, nb_validation_samples // batch_size)
print(‘Test loss:’, score[0])
print(‘Test accuracy:’, score[1])
Python
from transformers import GPT2LMHeadModel, GPT2Tokenizer加载预训练的模型和分词器
model_name = “gpt2-medium”
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)输入的文本
input_text = “The quick brown fox”
对输入文本进行编码
input_ids = tokenizer.encode(input_text, return_tensors=“pt”)
生成文本
generated = model.generate(input_ids, max_length=50, num_return_sequences=1)
解码生成的文本
output_text = tokenizer.decode(generated[0], skip_special_tokens=True)
print(output_text)
Python
import torch from transformers import BertTokenizer, BertForMaskedLM初始化BERT模型和分词器
tokenizer = BertTokenizer.from_pretrained(‘bert-base-uncased’)
model = BertForMaskedLM.from_pretrained(‘bert-base-uncased’)待生成文本的句子
sentence = “BERT is a powerful NLP model that can be used for a wide range of tasks, including text generation. It is based on the Transformer architecture and has been pre-trained on a large corpus of text.”
对句子进行分词和编码
input_ids = torch.tensor([tokenizer.encode(sentence, add_special_tokens=True)])
选择需要生成文本的位置,此处我们假设需要生成一个词替换句子中的"[MASK]"
masked_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
使用BERT模型进行预测
outputs = model(input_ids)
predictions = outputs[0]获取预测结果中概率最高的词
predicted_token = tokenizer.convert_ids_to_tokens(torch.argmax(predictions[0, masked_index], dim=-1).tolist())
输出预测结果
print(f"Predicted token: {predicted_token}")
from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch初始化tokenizer和model
tokenizer = GPT2Tokenizer.from_pretrained(‘gpt2-medium’)
model = GPT2LMHeadModel.from_pretrained(‘gpt2-medium’)定义想要开始文本生成的内容,并转换为token IDs
context = “人工智能的发展给社会带来了巨大变革,”
input_ids = tokenizer.encode(context, return_tensors=‘pt’)设置生成文本的长度
length = 100
设置为评估模式
model.eval()
生成文本
with torch.no_grad():
output = model.generate(input_ids, max_length=length+len(input_ids[0]), pad_token_id=tokenizer.eos_token_id)将生成的token IDs转换回文本
generated_text = tokenizer.decode(output[0][len(input_ids[0]):], skip_special_tokens=True)
打印生成的文本
print(generated_text)





