文章目录
- 模型
- 构建数据
- 数值微分实现(梯度计算)
- 模型封装
- 运行测试
- 运行结果
主要介绍 数值微分法 求梯度,以及基于此对参数作随机梯度下降,并封装一个简单的线性回归模型以作调试,最后绘制loss图像。
模型
y = X W + b y = XW + b y=XW+b
y为标量,X列数为2. 损失函数使用均方误差。
构建数据
def build_data(weights, bias, num_examples): x = np.random.randn(num_examples, len(weights)) y = x.dot(weights) + bias # 给y加个噪声 y += np.random.rand(1) return x, y def data_iter(features, labels, batch_size): num_examples = len(features) # 按样本数量构造索引 indices = list(range(num_examples)) # 打乱索引数组 np.random.shuffle(indices) for i in range(0, num_examples, batch_size): batch_indices = np.array(indices[i:min(i + batch_size, num_examples)]) yield features[batch_indices], labels[batch_indices]
数值微分实现(梯度计算)
就是求偏导。
# 基于数值微分+中心差分法 求偏导(梯度)
def numerical_gradient(f, x): h = 1e-4 # 0.0001 grad = np.zeros_like(x) it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) while not it.finished: idx = it.multi_index tmp_val = x[idx] x[idx] = float(tmp_val) + h fxh1 = f(x) # f(x+h) x[idx] = tmp_val - h fxh2 = f(x) # f(x-h) grad[idx] = (fxh1 - fxh2) / (2 * h) x[idx] = tmp_val # 还原值 it.iternext() return grad
模型封装
class Network: def __init__(self, input_size, output_size, weight_init_std=0.01): self.params = {'w1': np.random.rand(input_size, output_size), 'b1': np.array([0.0])} def predict(self, x): w1, b1 = self.params['w1'], self.params['b1'] return x.dot(w1) + b1 def loss(self, x, y): pred_y = self.predict(x) return np.mean(np.square(y - pred_y)) def numerical_gradient(self, x, y): loss_w = lambda w: self.loss(x, y) grads = dict() grads['w1'] = numerical_gradient(loss_w, self.params['w1']) grads['b1'] = numerical_gradient(loss_w, self.params['b1']) return grads
运行测试
if __name__ == '__main__': start = time.perf_counter() # np.random.seed(1) true_w1 = np.random.rand(2, 1) true_b1 = np.random.rand(1) # true_w1 = np.array([[3.0], [4.0]]) # true_b1 = np.array([5.0]) x_train, y_train = build_data(true_w1, true_b1, 5000) net = Network(2, 1, 0.01) init_loss = net.loss(x_train, y_train) print(net.params) loss_history = list() loss_history.append(init_loss) num_epochs = 2 batch_size = 50 learning_rate = 0.01 for i in range(num_epochs): # running_loss = 0.0 for x, y in data_iter(x_train, y_train, batch_size): grads = net.numerical_gradient(x, y) for key in grads: net.params[key] -= learning_rate * grads[key] running_loss = net.loss(x, y) loss_history.append(running_loss) # current_loss = net.loss(x_train, y_train) # loss_history.append(current_loss) # print(f'第{i}次:{net.params}') plt.title("基于 数值微分+中心差分法 的单层简单线性模型") plt.xlabel("epoch") plt.ylabel("loss") plt.plot(loss_history, linestyle='dotted') plt.show() # print(loss_history) print(f'初始损失值:{init_loss}') print(f'最后一次损失值:{loss_history[-1]}') print() print(f'正确参数: true_w1={true_w1}, true_b1={true_b1}') print(f'预测参数: true_w1={net.params["w1"]}, true_b1={net.params["b1"]}') print() end = time.perf_counter() print(f"运行时间:{(end - start)*1000}毫秒")
运行结果