欢迎来到尧图网

客户服务 关于我们

您的位置:首页 > 教育 > 锐评 > 22、深度学习 - 自学之路 - 隐藏层常用激活使用tanh函数,输出层激活函数使用softmax函数,对MNIST进行升级后的程序展示。

22、深度学习 - 自学之路 - 隐藏层常用激活使用tanh函数,输出层激活函数使用softmax函数,对MNIST进行升级后的程序展示。

2025/2/21 3:15:38 来源:https://blog.csdn.net/weixin_43006478/article/details/145583726  浏览:    关键词:22、深度学习 - 自学之路 - 隐藏层常用激活使用tanh函数,输出层激活函数使用softmax函数,对MNIST进行升级后的程序展示。
import sys,numpy as np  from keras.utils import np_utilsimport osfrom keras.datasets import mnist
print("licheng:"+"19"+'\n')(x_train,y_train),(x_test,y_test) = mnist.load_data()   #第一次进行Mnist 数据的下载
images,labels = (x_train[0:1000].reshape(1000,28*28)/255,y_train[0:1000])  #将图片信息和图片标识信息赋值给images 和labels
'''
print("x_train[0:1000]"+str(x_train[0:1000]))
print("x_train[0:1000].reshape(1000,28*28)"+str(x_train[0:1000].reshape(1000,28*28)))#是一个全零的矩阵
print("images:"+str(images))#感觉是一个10*100的矩阵。
print("labels"+str(labels))#0-10的杂乱的数字
'''
one_hot_lables = np.zeros((len(labels),10))#创建一个1000行,10列的全零矩阵
#print("one_hot_lables"+str(one_hot_lables))#for i,l in enumerate(labels):one_hot_lables[i][l] =1;
labels = one_hot_lablestest_images = x_test.reshape(len(x_test),28*28)/256
test_lables = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):test_lables[i][l] = 1np.random.seed(1)def tanh(x):return np.tanh(x)def tanh2deriv(output):return 1-(output**2)def softmax(x):temp = np.exp(x)return temp/np.sum(temp,axis=1,keepdims=True)#relu = lambda x:(x>=0)*x
#relu2deriv = lambda x:x>=0batch_size = 100
alpha,iterations,hidden_size,pixels_per_image,num_labels = (0.05,300,100,784,10)weight_0_1 = 0.02*np.random.random((pixels_per_image,hidden_size)) -0.01
weight_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1for j in range(iterations):#一共循环350次error,correct_cnt = (0.0,0)for i in range(int(len(images)/batch_size)):  #有多少个图片就有多少个循环,#for i in range(1):batch_start,batch_end = ((i*batch_size),((i+1)*batch_size))layer_0 = images[batch_start:batch_end]   #每一张图片解析出来的对应的像素点的单列矩阵或者是单行#print("layer_0:"+str(layer_0))#layer_1 = relu(np.dot(layer_0,weight_0_1))#对二层神经网络的数据进行rule处理。小于0的数字都为0,大于0的数字都是本身。layer_1 = tanh(np.dot(layer_0,weight_0_1))dropout_mask = np.random.randint(2,size=layer_1.shape)layer_1 *= dropout_mask*2#layer_2 = np.dot(layer_1,weight_1_2)#将第二层神经网络的值和第二层的权重加权和得到输出数据。layer_2 = softmax(np.dot(layer_1,weight_1_2))#print("layer_2:"+str("   ")+str(layer_2))#error += np.sum((labels[batch_start:batch_end] - layer_2)**2)#把每一张图片的误差值进行累加for k in range(batch_size):correct_cnt += int(np.argmax(layer_2[k:k+1])== \np.argmax(labels[batch_start+k:batch_start+k+1]))#把每次预测成功率进行累加。#layer_2_delta = np.full((100,10),(np.sum(labels[batch_start:batch_end]-layer_2))/batch_size)#print(layer_2.shape)layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size#计算权重反向误差第二层#layer_2_delta = (labels[batch_start:batch_end]-layer_2)        #计算权重反向误差第二层layer_1_delta = layer_2_delta.dot(weight_1_2.T)*tanh2deriv(layer_1)#第一层权重误差layer_1_delta *= dropout_maskweight_1_2 += alpha *layer_1.T.dot(layer_2_delta)#修改第一层权重weight_0_1 += alpha *layer_0.T.dot(layer_1_delta)#修改第二层权重text_correct_cnt = 0#sys.stdout.write("\r"+"I:"+str(j)+"error"+str(error/float(len(images)))[0:5] + "correct"+str(correct/float(len(images))))#验证测试组的数字被预测出来的概率。
#for j in range(10):
#    if(j%10 == 0 or j == iterations-1):
#        error,correct = (0.0,0)for i in range(len(test_images)):layer_0 = test_images[i:i+1]layer_1 = tanh(np.dot(layer_0,weight_0_1))layer_2 = np.dot(layer_1,weight_1_2)#error += np.sum((test_lables[i:i+1]-layer_2)**2)text_correct_cnt += int(np.argmax(layer_2)==np.argmax(test_lables[i:i+1]))if(j % 10 == 0):    print("\n"+"j"+str(j))sys.stdout.write("test-acc:"+str(text_correct_cnt/float(len(test_images))) + \"train-acc:"+str(correct_cnt/float(len(images))))#print()
''' 
#测试条件,alpha=0.001时,测试结果为0.667
j0
test-acc:0.0805train-acc:0.089
j10
test-acc:0.2187train-acc:0.163
j20
test-acc:0.4167train-acc:0.316
j30
test-acc:0.5306train-acc:0.407
j40
test-acc:0.5863train-acc:0.517
j50
test-acc:0.6194train-acc:0.567
j60
test-acc:0.6385train-acc:0.628
j70
test-acc:0.6517train-acc:0.643
j80
test-acc:0.6587train-acc:0.663
j90
test-acc:0.6637train-acc:0.691
j100
test-acc:0.6668train-acc:0.696
j110
test-acc:0.6712train-acc:0.711
j120
test-acc:0.6743train-acc:0.696
j130
test-acc:0.6787train-acc:0.709
j140
test-acc:0.6819train-acc:0.722
j150
test-acc:0.6826train-acc:0.727
j160
test-acc:0.6839train-acc:0.723
j170
test-acc:0.6848train-acc:0.721
j180
test-acc:0.6845train-acc:0.721
j190
test-acc:0.6851train-acc:0.712
j200
test-acc:0.686train-acc:0.716
j210
test-acc:0.6872train-acc:0.714
j220
test-acc:0.6884train-acc:0.711
j230
test-acc:0.6893train-acc:0.734
j240
test-acc:0.69train-acc:0.731
j250
test-acc:0.6904train-acc:0.723
j260
test-acc:0.6902train-acc:0.74
j270
test-acc:0.6909train-acc:0.724
j280
test-acc:0.6922train-acc:0.725
j290
test-acc:0.6928train-acc:0.718
''''''
licheng:19#测试条件,alpha=0.01时,测试结果为0.8542
j0
test-acc:0.2059train-acc:0.112
j10
test-acc:0.6728train-acc:0.688
j20
test-acc:0.6871train-acc:0.73
j30
test-acc:0.6949train-acc:0.719
j40
test-acc:0.7011train-acc:0.734
j50
test-acc:0.7144train-acc:0.752
j60
test-acc:0.7314train-acc:0.759
j70
test-acc:0.7507train-acc:0.786
j80
test-acc:0.7659train-acc:0.796
j90
test-acc:0.7795train-acc:0.812
j100
test-acc:0.791train-acc:0.821
j110
test-acc:0.8015train-acc:0.831
j120
test-acc:0.8102train-acc:0.829
j130
test-acc:0.8163train-acc:0.863
j140
test-acc:0.8214train-acc:0.862
j150
test-acc:0.8269train-acc:0.867
j160
test-acc:0.8303train-acc:0.872
j170
test-acc:0.8333train-acc:0.877
j180
test-acc:0.836train-acc:0.887
j190
test-acc:0.838train-acc:0.878
j200
test-acc:0.8409train-acc:0.888
j210
test-acc:0.8438train-acc:0.887
j220
test-acc:0.846train-acc:0.888
j230
test-acc:0.8475train-acc:0.905
j240
test-acc:0.8487train-acc:0.902
j250
test-acc:0.8495train-acc:0.9
j260
test-acc:0.8503train-acc:0.908
j270
test-acc:0.8518train-acc:0.915
j280
test-acc:0.852train-acc:0.911
j290
test-acc:0.8542train-acc:0.905
''''''
0.02
test-acc:0.8701train-acc:0.94
'''
'''
0.03
test-acc:0.8769train-acc:0.948
''''''
0.05 0.8775 已经出现了过拟合的情况。
licheng:19
j0
test-acc:0.5966train-acc:0.304
j10
test-acc:0.7186train-acc:0.74
j20
test-acc:0.7934train-acc:0.816
j30
test-acc:0.8263train-acc:0.864
j40
test-acc:0.8409train-acc:0.89
j50
test-acc:0.8497train-acc:0.904
j60
test-acc:0.8552train-acc:0.903
j70
test-acc:0.8616train-acc:0.931
j80
test-acc:0.8643train-acc:0.931
j90
test-acc:0.8663train-acc:0.934
j100
test-acc:0.868train-acc:0.942
j110
test-acc:0.8706train-acc:0.944
j120
test-acc:0.8711train-acc:0.935
j130
test-acc:0.8734train-acc:0.941
j140
test-acc:0.8739train-acc:0.953
j150
test-acc:0.874train-acc:0.954
j160
test-acc:0.8759train-acc:0.961
j170
test-acc:0.8762train-acc:0.96
j180
test-acc:0.8791train-acc:0.955
j190
test-acc:0.8782train-acc:0.953
j200
test-acc:0.8796train-acc:0.963
j210
test-acc:0.8788train-acc:0.969
j220
test-acc:0.8795train-acc:0.956
j230
test-acc:0.8788train-acc:0.966
j240
test-acc:0.8788train-acc:0.958
j250
test-acc:0.8796train-acc:0.955
j260
test-acc:0.8776train-acc:0.957
j270
test-acc:0.8775train-acc:0.953
j280
test-acc:0.8769train-acc:0.96
j290
test-acc:0.8775train-acc:0.95'''

从与运行结果看,程序升级后,识别率提高到了0.8775左右。比原来的隐藏层和原来的输出层运行结果有一定的提高。而且识别率比较稳定,没有出现很强的过拟合问题。

同时程序也展示了,对应的激活函数,正向传递和反向梯度的处理方法。

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com

热搜词