RNN经典案例——人名分类器
- 一、数据处理
- 1.1 去掉语言中的重音标记
- 1.2 读取数据
- 1.3 构建人名类别与人名对应关系字典
- 1.4 将人名转换为对应的onehot张量
- 二、构建RNN模型
- 2.1 构建传统RNN模型
- 2.2 构建LSTM模型
- 2.3 构建GRU模型
- 三、构建训练函数并进行训练
- 3.1 从输出结果中获得指定类别函数
- 3.2 随机生成训练数据
- 3.3 构建传统的RNN训练函数
- 3.4 构建LSTM训练函数
- 3.5 构建GRU训练函数
- 3.6 构建时间计算函数
- 3.7 构建训练过程的日志打印函数
- 3.8 调用train函数, 进行模型的训练
- 四、构建评估模型并预测
- 4.1 构建传统RNN评估函数
- 4.2 构建LSTM评估函数
- 4.3 构建GRU评估函数
- 4.4 构建预测函数
一、数据处理
from io import open
import glob
import os
import string
import unicodedata
import random
import time
import math
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
1.1 去掉语言中的重音标记
# 获取常用字符数量和常用标点
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)
print("all_letters:",all_letters)
print("n_letters:",n_letters)
# 去掉一些语言中的重音标记
# 如: Ślusàrski ---> Slusarski
def unicodeToAscii(s):ascii = ''.join(# NFD会将每个字符分解为其基本字符和组合标记,Ś会拆分为音掉和S#'Mn'这类字符通常用于表示重音符号、音调符号等c for c in unicodedata.normalize('NFD',s) if unicodedata.category(c) != 'Mn' and c in all_letters)return ascii
1.2 读取数据
# 读取数据
data_path = "./data/names/"def readLines(filename):lines = open(filename,encoding='utf-8').read().strip().split('\n')return [unicodeToAscii(line) for line in lines]
# 调试
filename = data_path + "Chinese.txt"
lines = readLines(filename)
print(lines)
1.3 构建人名类别与人名对应关系字典
# 类别名字列表
category_lines = {}
# 类别名称
all_category = []for filename in glob.glob(data_path + '*.txt'):category = os.path.splitext(os.path.basename(filename))[0]all_category.append(category)lines = readLines(filename)category_lines[category] = lines# 查看类别总数
n_categories = len(all_category)
print("n_categories:",n_categories)
1.4 将人名转换为对应的onehot张量
def lineToTensor(line):tensor = torch.zeros(len(line),1,n_letters)for i,letter in enumerate(line):tensor[i][0][all_letters.find(letter)] = 1return tensor
# 调试
line = 'cui'
line_tensor = lineToTensor(line)
line_tensor
二、构建RNN模型
2.1 构建传统RNN模型
class RNN(nn.Module):def __init__(self,input_size,hidden_size,output_size,num_layers=1):super(RNN,self).__init__()self.hidden_size = hidden_sizeself.num_layers = num_layers# 实例化RNNself.rnn = nn.RNN(input_size,hidden_size,num_layers)# RNN 层的输出转换为最终的输出特征self.linear = nn.Linear(hidden_size,output_size)# 将全连接层的输出特征转换为概率分布self.softmax = nn.LogSoftmax(dim=-1)def forward(self,input,hidden):# input 形状为1*n_letters需要变换为三维张量input = input.unsqueeze(0)rr,hn = self.rnn(input,hidden)return self.softmax(self.linear(rr)),hn# 定义初始化隐藏状态def initHidden(self):return torch.zeros(self.num_layers,1,self.hidden_size)
2.2 构建LSTM模型
class LSTM(nn.Module):def __init__(self,input_size,hidden_size,output_size,num_layers=1):super(LSTM,self).__init__()self.hidden_size = hidden_sizeself.num_layers = num_layersself.lstm = nn.LSTM(input_size,hidden_size,num_layers)self.linear = nn.Linear(hidden_size,output_size)self.softmax = nn.LogSoftmax(dim=-1)def forward(self,input,hidden,c):input = input.unsqueeze(0)rr,(hn,c) = self.lstm(input,(hidden,c))return self.softmax(self.linear(rr)),hn,cdef initHidden(self):hidden = c = torch.zeros(self.num_layers,1,self.hidden_size)return hidden,c
2.3 构建GRU模型
class GRU(nn.Module):def __init__(self,input_size,hidden_size,output_size,num_layers=1):super(GRU,self).__init__()self.hidden_size = hidden_sizeself.num_layers = num_layersself.gru = nn.GRU(input_size,hidden_size,num_layers)self.linear = nn.Linear(hidden_size,output_size)self.softmax = nn.LogSoftmax(dim=-1)def forward(self,input,hidden):input = input.unsqueeze(0)rr,hn = self.gru(input,hidden)return self.softmax(self.linear(rr)),hndef initHidden(self):return torch.zeros(self.num_layers,1,self.hidden_size)
# 调用
# 实例化参数
input_size = n_letters
n_hidden = 128
output_size = n_categoriesinput = lineToTensor('B').squeeze(0)
hidden = c = torch.zeros(1,1,n_hidden)rnn = RNN(n_letters,n_hidden,n_categories)
lstm = LSTM(n_letters,n_hidden,n_categories)
gru = GRU(n_letters,n_hidden,n_categories)rnn_output, next_hidden = rnn(input, hidden)
print("rnn:", rnn_output)
lstm_output, next_hidden, c = lstm(input, hidden, c)
print("lstm:", lstm_output)
gru_output, next_hidden = gru(input, hidden)
print("gru:", gru_output)
三、构建训练函数并进行训练
3.1 从输出结果中获得指定类别函数
def categoryFromOutput(output):# 从输出张量中返回最大的值和索引top_n,top_i = output.topk(1)category_i = top_i[0].item()# 获取对应语言类别, 返回语⾔类别和索引值return all_category[category_i],category_i
# 调试
category, category_i = categoryFromOutput(gru_output)
print("category:", category)
print("category_i:", category_i)
3.2 随机生成训练数据
def randomTrainingExample():# 随机获取一个类别category = random.choice(all_category)# 随机获取该类别中的名字line = random.choice(category_lines[category])# 将类别索引转换为tensor张量category_tensor = torch.tensor([all_category.index(category)],dtype=torch.long)# 对名字进行onehot编码line_tensor = lineToTensor(line)return category,line,category_tensor,line_tensor
# 调试
for i in range(10):category,line,category_tensor,line_tensor = randomTrainingExample()print('category =',category,'/ line =',line,'/ category_tensor =',category_tensor,'/ line_tensor =',line_tensor)
3.3 构建传统的RNN训练函数
# 定义损失函数
criterion = nn.NLLLoss()
# 设置学习率为0.005
learning_rate = 0.005
import torch.optim as optim
def trainRNN(category_tensor,line_tensor):# 实例化对象rnn初始化隐层张量hidden = rnn.initHidden()# 梯度清零optimizer = optim.SGD(rnn.parameters(),lr=0.01,momentum=0.9)optimizer.zero_grad()# 前向传播for i in range(line_tensor.size()[0]):# output 是 RNN 在每个时间步的输出。每个时间步的输出是一个隐藏状态,这些隐藏状态可以用于后续的处理,例如分类、回归等任务。# hidden是 RNN 在最后一个时间步的隐藏状态。这些隐藏状态可以用于捕获整个序列的信息,通常用于后续的处理,例如作为下一个层的输入。output,hidden = rnn(line_tensor[i],hidden)# 计算损失loss = criterion(output.squeeze(0),category_tensor)# 反向传播loss.backward()optimizer.step()# 更新模型中的参数#for p in rnn.parameters():#p.data.add_(-learning_rate,p.grad.data)return output,loss.item()
3.4 构建LSTM训练函数
def trainLSTM(category_tensor,line_tensor):hidden,c = lstm.initHidden()lstm.zero_grad()for i in range(line_tensor.size()[0]):output,hidden,c = lstm(line_tensor[i],hidden,c)loss = criterion(output.squeeze(0),category_tensor)loss.backward()for p in lstm.parameters():p.data.add_(-learning_rate,p.grad.data)return output,loss.item()
3.5 构建GRU训练函数
def trainGRU(category_tensor,line_tensor):hidden = gru.initHidden()gru.zero_grad()for i in range(line_tensor.size()[0]):output,hidden = gru(line_tensor[i],hidden)loss = criterion(output.squeeze(0),category_tensor)loss.backward()for p in gru.parameters():p.data.add_(-learning_rate,p.grad.data)return output,loss.item()
3.6 构建时间计算函数
# 获取每次打印的训练耗时
def timeSince(since):# 获得当前时间now = time.time()# 获取时间差s = now - since# 将秒转换为分m = s // 60# 计算不够1分钟的秒数s -= m * 60return '%dm %ds' % (m,s)
3.7 构建训练过程的日志打印函数
# 设置训练迭代次数
n_iters= 1000
# 设置结果的打印间隔
print_every = 50
# 设置绘制损失曲线上的打印间隔
plot_every = 10
def train(train_typr_fn):# 保存每个间隔的损失函数all_losses = []# 获得训练开始的时间戳start = time.time()# 设置当前间隔损失为0current_loss = 0# 循环训练for iter in range(1,n_iters+1):category,line,category_tensor,line_tensor = randomTrainingExample()output,loss = train_typr_fn(category_tensor,line_tensor)# 计算打印间隔的总损失current_loss += lossif iter % print_every == 0:# 获得预测的类别和索引guess,guess_i = categoryFromOutput(output)if guess == category:correct = '✓'else:correct = '✗(%s)' % categoryprint('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters *100, timeSince(start), loss, line, guess, correct))if iter % plot_every == 0:all_losses.append(current_loss / plot_every)current_loss = 0 return all_losses,int(time.time()-start)
3.8 调用train函数, 进行模型的训练
# 调⽤train函数, 分别进⾏RNN, LSTM, GRU模型的训练
all_losses1, period1 = train(trainRNN)
all_losses2, period2 = train(trainLSTM)
all_losses3, period3 = train(trainGRU)
# 创建画布0
plt.figure(0)
# 绘制损失对⽐曲线
plt.plot(all_losses1, label="RNN")
plt.plot(all_losses2, color="red", label="LSTM")
plt.plot(all_losses3, color="orange", label="GRU")
plt.legend(loc='upper left')# 创建画布1
plt.figure(1)
x_data=["RNN", "LSTM", "GRU"]
y_data = [period1, period2, period3]
# 绘制训练耗时对⽐柱状图
plt.bar(range(len(x_data)), y_data, tick_label=x_data)
四、构建评估模型并预测
4.1 构建传统RNN评估函数
# 构建传统RNN评估函数
def evaluateRNN(line_tensor):hidden = rnn.initHidden()for i in range(line_tensor.size()[0]):output,hidden = rnn(line_tensor[i],hidden)return output.squeeze(0)
4.2 构建LSTM评估函数
# 构建LSTM评估函数
def evaluateLSTM(line_tensor):hidden,c = lstm.initHidden()for i in range(line_tensor.size()[0]):output,hidden,c = lstm(line_tensor[i],hidden,c)return output.squeeze(0)
4.3 构建GRU评估函数
# 构建GRU评估函数
def evaluateGRU(line_tensor):hidden = gru.initHidden()for i in range(line_tensor.size()[0]):output,hidden = gru(line_tensor[i],hidden)return output.squeeze(0)
# 调试
line = "Bai"
line_tensor = lineToTensor(line)rnn_output = evaluateRNN(line_tensor)
lstm_output = evaluateLSTM(line_tensor)
gru_output = evaluateGRU(line_tensor)
print("rnn_output:", rnn_output)
print("gru_output:", lstm_output)
print("gru_output:", gru_output)
4.4 构建预测函数
def predict(input_line,evaluate,n_predictions=3):print('\n> %s' % input_line)with torch.no_grad():output = evaluate(lineToTensor(input_line))topv,topi = output.topk(n_predictions,1,True)predictions = []for i in range(n_predictions):# 从topv中取出的output值value = topv[0][i].item()# 取出索引并找到对应的类别category_index = topi[0][i].item()# 打印ouput的值, 和对应的类别print('(%.2f) %s' % (value, all_category[category_index]))# 将结果装进predictions中predictions.append([value, all_category[category_index]])
for evaluate_fn in [evaluateRNN, evaluateLSTM, evaluateGRU]:print("-"*18)predict('Dovesky', evaluate_fn)predict('Jackson', evaluate_fn)predict('Satoshi', evaluate_fn)