python使用数值微分法求梯度，实现单层线性回归

2025/4/20 5:17:32 来源：https://blog.csdn.net/weixin_52341477/article/details/141168156 浏览: 次关键词：python使用数值微分法求梯度，实现单层线性回归

文章目录

模型
构建数据
数值微分实现（梯度计算）
模型封装
运行测试
运行结果

主要介绍 数值微分法 求梯度，以及基于此对参数作随机梯度下降，并封装一个简单的线性回归模型以作调试，最后绘制loss图像。

模型

$y = X W + b$

y为标量，X列数为2. 损失函数使用均方误差。

构建数据

def build_data(weights, bias, num_examples):  x = np.random.randn(num_examples, len(weights))  y = x.dot(weights) + bias  # 给y加个噪声  y += np.random.rand(1)  return x, y  def data_iter(features, labels, batch_size):  num_examples = len(features)  # 按样本数量构造索引  indices = list(range(num_examples))  # 打乱索引数组  np.random.shuffle(indices)  for i in range(0, num_examples, batch_size):  batch_indices = np.array(indices[i:min(i + batch_size, num_examples)])  yield features[batch_indices], labels[batch_indices]

数值微分实现（梯度计算）

就是求偏导。

# 基于数值微分+中心差分法 求偏导（梯度）  
def numerical_gradient(f, x):  h = 1e-4  # 0.0001  grad = np.zeros_like(x)  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])  while not it.finished:  idx = it.multi_index  tmp_val = x[idx]  x[idx] = float(tmp_val) + h  fxh1 = f(x)  # f(x+h)  x[idx] = tmp_val - h  fxh2 = f(x)  # f(x-h)  grad[idx] = (fxh1 - fxh2) / (2 * h)  x[idx] = tmp_val  # 还原值  it.iternext()  return grad

模型封装

class Network:  def __init__(self, input_size, output_size, weight_init_std=0.01):  self.params = {'w1': np.random.rand(input_size, output_size),  'b1': np.array([0.0])}  def predict(self, x):  w1, b1 = self.params['w1'], self.params['b1']  return x.dot(w1) + b1  def loss(self, x, y):  pred_y = self.predict(x)  return np.mean(np.square(y - pred_y))  def numerical_gradient(self, x, y):  loss_w = lambda w: self.loss(x, y)  grads = dict()  grads['w1'] = numerical_gradient(loss_w, self.params['w1'])  grads['b1'] = numerical_gradient(loss_w, self.params['b1'])  return grads

运行测试

if __name__ == '__main__':  start = time.perf_counter()  # np.random.seed(1)  true_w1 = np.random.rand(2, 1)  true_b1 = np.random.rand(1)  # true_w1 = np.array([[3.0], [4.0]])  # true_b1 = np.array([5.0])    x_train, y_train = build_data(true_w1, true_b1, 5000)  net = Network(2, 1, 0.01)  init_loss = net.loss(x_train, y_train)  print(net.params)  loss_history = list()  loss_history.append(init_loss)  num_epochs = 2  batch_size = 50  learning_rate = 0.01  for i in range(num_epochs):  # running_loss = 0.0  for x, y in data_iter(x_train, y_train, batch_size):  grads = net.numerical_gradient(x, y)  for key in grads:  net.params[key] -= learning_rate * grads[key]  running_loss = net.loss(x, y)  loss_history.append(running_loss)  # current_loss = net.loss(x_train, y_train)  # loss_history.append(current_loss)  # print(f'第{i}次：{net.params}')  plt.title("基于 数值微分+中心差分法 的单层简单线性模型")  plt.xlabel("epoch")  plt.ylabel("loss")  plt.plot(loss_history, linestyle='dotted')  plt.show()  # print(loss_history)  print(f'初始损失值：{init_loss}')  print(f'最后一次损失值：{loss_history[-1]}')  print()  print(f'正确参数: true_w1={true_w1}, true_b1={true_b1}')  print(f'预测参数: true_w1={net.params["w1"]}, true_b1={net.params["b1"]}')  print()  end = time.perf_counter()  print(f"运行时间：{(end - start)*1000}毫秒")