tensorflow实践-LeNet-5模型处理mnist手写数字识别

时间 2020-08-01 标签 tensorflow 实践 lenet 5 模型处理 mnist 手写数字识别

LeNet-5模型处理mnist手写数字识别

模型结构定义

对比以前利用简单的神经网络训练的mnist手写数字识别，如今利用卷积神经网络进行训练。
利用经典的 lenet-5 模型进行训练。
首先先对模型框架进行定义：
tf_conv_sample.pynode

# -*- coding: utf-8 -*-
""" Created on Tue Oct 15 16:23:00 2019 @author: JustMo """

''' 实现简单卷积网络LeNet-5模型 lenet-5总共有7层（conv,pool,conv,pool,dense,dense,dense） Input (28*28*1) | | | conv conv... out-size:28*28*32, pram:5*5*1*32+32, link:28*28*32*(5*5+1) (32@5*5) (32@5*5) | | pool pool... out-size:14*14*32, link:14*14*32*(2*2) (2*2) (2*2) | | conv conv... out-size:14*14*64, pram:5*5*1*64+64, link:14*14*64*(5*5+1) (64@5*5) (64@5*5) | | pool pool... out-size:7*7*64, link:7*7*64*(2*2) (2*2) (2*2) \ / dense(flatten) out-size:3136 (l*w*h)(3136) | dense(+dropout) out-size:512 (512) | dense out-size:10 (10) | Output(softmax) '''

import tensorflow as tf

##配置基本的网络参数
INPUT_NODE = 784 #图片为28*28,mnist的size，根据实际进行调整
OUTPUT_NODE = 10 #数字识别分类为10，故输出为10

IMAGE_SIZE = 28 #图片的大小，mnist中大小都是固定的28*28
NUM_CHANNELS = 1 #图片的频道，在mnist中只有一个频道
NUM_LABELS = 10 #识别0-9共10个分类

CONV1_SIZE = 5 #第一个卷积的size：5*5
CONV1_DEEP = 32 #第一个卷积的deep：32

CONV2_SIZE = 5 
CONV2_DEEP = 64

DENSE_SIZE = 512 #全链接层节点数
##

##首先根据 lenet-5 定义模型的前向传播框架
def inference(input_tensor, train, regularizer):
    ''' train: 用来区分模型处于训练仍是测试阶段。 在定义模型框架时，新增dropout，以必定几率断开与下一层的链接，加快训练过程的同时防止模型过拟合，通常选0.5，由于这个时候熵最大 '''
    #定义第一层-卷积层,使用变量控制空间，这样能够不用考虑变量同名
    with tf.variable_scope('layer1-conv1'):
        #定义卷积层的权重
        #在卷积层的定义中，shape的定义为[卷积size，卷积size，当前的深度，卷积深度]，而后定义初始化方法
        conv1_weights = tf.get_variable(name='weight',shape=[CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1))
        
        #定义卷积层的误差，对于加偏置的缘由，能够从f(wx+b)来解释，对于wx=0这个函数来讲，若是不增长一个常数控制函数图像上下移动时，函数会一直在原点，
        #而实际中问题不多是按照这种状况进行分类，因此须要一个偏置项进行调节函数图像至最佳位置
        conv1_biases = tf.get_variable(name='biases', shape=[CONV1_DEEP], initializer=tf.constant_initializer(0.0))
        
        #实现卷积
        #tf.nn.conv2d提供了一个函数来实现卷积层的算法
        #第一个参数用来接收当前层的节点矩阵，这个是个4维的矩阵，第一维对应一个输入batch，后面的3维对应一个节点矩阵，[0,:,:,:]-第一张图，[1,:,:,:]第二张图
        #第二个参数用来接收卷积层的权重
        #第三个参数是步长参数，虽然有4维，可是第一维和最后一维要求必定为1
        #最后对边界使用全0填充（same），因此这里进过卷积以后的图层大小是不变的
        #size=5,padding=floor(size/2),size为奇数，因此在先后各补两个0，保证核中心对应数据点
        
        #输入矩阵 W×W，若是不相等，推导方法同样
        #filter矩阵 F×F，卷积核
        #stride值 S，步长
        #输出宽高为 new_height、new_width
        #padding = ‘VALID’:new_height = new_width = (W – F + 1) / S （结果向上取整）
        #padding = ‘SAME’:new_height = new_width = W / S （结果向上取整）
        
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1,1,1,1], padding='SAME')
        print('input_tensor',input_tensor.get_shape().as_list())
        print('conv1_tensor',conv1.get_shape().as_list())
        
        #对卷积使用relu激活函数完成去线性化，增长特征表示能力
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
    
    #定义第二层-池化层，用来对卷积出来的特征进一步进行筛选重要特征
    #name_scope : 为了更好的管理变量的命名空间提出的，可让整个模型会更加有条理；variable_scope ：大部分状况下与tf.get_variable()配合使用，实现变量共享的功能
    #tf.name_scope() 并不会对 tf.get_variable() 建立的变量有任何影响。 
    with tf.name_scope('layer2-pool1'):
        #使用最大池化，从窗口中选择最大值做为最主要特征
        #nn.max_pool中实现了最大池化层的前向传播过程，参数与conv2d相似，第一个参数为上一层，第二个参数ksize为为过滤器的尺寸，strides为步长信息
        pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        print('pool1_tensor',pool1.get_shape().as_list())
        
    #定义第三层-卷积层，用来继续提取特征，依旧是以5的窗口大小来进行的卷积
    with tf.variable_scope('layer3-conv2'):
        conv2_weights = tf.get_variable(name='weight', shape=[CONV2_SIZE,CONV2_SIZE,CONV1_DEEP,CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1))
        
        conv2_biases = tf.get_variable(name='biases', shape=[CONV2_DEEP], initializer=tf.constant_initializer(0.0))
        
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1,1,1,1], padding='SAME')
        
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
        print('conv2_tensor',conv2.get_shape().as_list())

    #定义第四层-池化层，这个和以前的池化层是同样的
    with tf.name_scope('layer4-pool2'):
        pool2 = tf.nn.max_pool(relu2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        print('pool2_tensor',pool2.get_shape().as_list())
    
    #定义第五层-拉伸层 
    with tf.name_scope('layer5-flatten'):
        #接下来将前面获得的池化以后的特征进行展平
        #即将pool2的矩阵拉平为一个向量
        #经过pool2.get_shape()能够获取pool2的维度信息。注意由于在整个网络中，每一层计算的都是一个batch的矩阵，因此经过get_shape获得的也是一个batch的向量
        #get_shape()返回的是一个元组，因此用as_list转换为数组
        pool_shape = pool2.get_shape().as_list()
        
        #而后计算须要展平以后的向量长度，为矩阵宽*高*深度
        #get_shape()的第一个维度为batch数据，后面3个维度分别是是每一个batch矩阵的长、宽、深度
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        
        #由于每一层都是一个batch的数据，因此这里须要将拉伸以后的向量构成一个batch
        flatten_reshape = tf.reshape(pool2, [pool_shape[0],nodes])
        print('flatten_tensor',flatten_reshape.get_shape().as_list())
        
    #定义第六层-全链接层
    with tf.variable_scope('layer6-fc1'):
        #将以前拉伸的向量做为输入，输出DENSE_SIZE
        fc1_weights = tf.get_variable(name='weight', shape=[nodes,DENSE_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1))
        
        #在全链接层中加入正则化。只有全链接层须要加入正则化
        if regularizer != None:
            #regularizer为定义的正则化函数
            tf.add_to_collection('losses', regularizer(fc1_weights))
        
        #全链接层的偏置项
        fc1_biases = tf.get_variable(name='bias', shape=[DENSE_SIZE], initializer=tf.constant_initializer(0.1))
        
        #计算全链接层
        fc1 = tf.nn.relu(tf.matmul(flatten_reshape, fc1_weights) + fc1_biases)
        
        #在这里将引入dropout来规避过拟合问题，dropout会随机将部分节点的输出改成0，通常只在全链接层中使用
        #由于卷积层的特征图中相邻位置元素在空间上共享语义信息，DropOut方法在整幅特征图随机丢弃元素，但与其相邻的元素依然能够保有该位置的语义信息
        #因此在卷积层中加入dropout方法的做用不大
        if train:
            fc1 = tf.nn.dropout(fc1, 0.5)
        
        print('fc1_tensor',fc1.get_shape().as_list())
            
    #定义最后一层全链接层，将输出长度为10的向量，最后经过softmax获得最后的分类结果
    with tf.variable_scope('layer7-fc2'):
        fc2_weights = tf.get_variable(name='weight', shape=[DENSE_SIZE, NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1))
        
        if regularizer != None:
            tf.add_to_collection('losses', regularizer(fc2_weights))
            
        fc2_biases = tf.get_variable('bias',shape=[NUM_LABELS], initializer=tf.constant_initializer(0.1))
        
        logit = tf.matmul(fc1, fc2_weights) + fc2_biases
        print('fc2_tensor',logit.get_shape().as_list())
        
    #返回第7层的结果
    return logit

训练

训练过程与前面使用简单神经网络的训练步骤差很少，区别在于在输入维度上不一样，由于模型是一个batch一个batch的，因此训练的输入也要变成一个batch的输入。
tf_conv_sample_train.pypython

# -*- coding: utf-8 -*-
""" Created on Fri Nov 22 08:54:10 2019 @author: JustMo """

''' DO: 用来训练mnist数据集 '''

import os
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

##加载在tf_minist_inter中定义的前向网络结构
import tf_conv_sample

##定义训练须要的各类参数
BATCH_SIZE = 100
TRAINING_STRP = 30

##定义各类率
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99##通常设置比较大
REGULARAZTION_RATE = 0.0001
MOVING_AVERAGE_DECAY = 0.99##通常初始较大，能够保证较为稳定的滑动

##定义模型保存路径以及文件名;tf保存模型会3个文件分别是model.ckpt\model.ckpt.meta\checkpoint
''' checkpoint: tf.train.Saver自动生成且自动维护，保存了一个目录下全部模型文件列表 model.ckpt: 保存了tf程序中每个变量的取值 model.ckpt.meta: 保存了tf的图结构即网络结构 '''
MODEL_SAVE_PATH = '/path/to/model/'
MODEL_NAME = 'model_conv.ckpt'

##定义模型的训练函数
def train(mnist):
    ''' tips: 用做模型的训练，其中含有bp反向传播、以及参数的更新 '''    
    
    ##定义模型的输入，卷积的输入是四维的
    x = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.NUM_CHANNELS], name='x-input')
    y_ = tf.placeholder(dtype=tf.float32, shape=[None, tf_conv_sample.OUTPUT_NODE], name='y-input')
    
    ##定义正则函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    
    ##调用前向神经网络预测输出
    y = tf_conv_sample.inference(x, True, regularizer)
    
    ##接下来定义滑动平均模型
    global_step = tf.Variable(0, trainable=False) ##记录训练轮数，不可训练
    
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)##定义滑动平均模型
    variable_averages_op = variable_averages.apply(tf.trainable_variables())##将滑动平均模型应用于模型中可训练的参数
    
    ##定义交叉熵损失
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))##计算了批量下的每一行数据损失
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    
    ##定义学习率函数以及bp神经网络优化
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    ##因为有参数以及滑动平均参数须要优化，故
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name='train')
    
    ##初始化持久类，用来保存模型以及测试验证的时候调用模型
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        
        for i in range(TRAINING_STRP):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            reshape_xs = np.reshape(xs, [BATCH_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.NUM_CHANNELS])
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x:reshape_xs, y_:ys})
            
            #没训练1000轮保存一次模型
            if i % 10 == 0:
                print('在训练 %d 轮时，模型此时的损失为 %g .' % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)

def main(avgv=None):
    mnist = input_data.read_data_sets('/path/to/mnist_data', one_hot=True)
    train(mnist)
    
if __name__ == '__main__':
    tf.app.run()

训练过程：
git

验证

每10s将全部的测试数据利用训练好的最新模型进行预测。
tf_conv_sample_val.pyweb

# -*- coding: utf-8 -*-
""" Created on Mon Dec 2 08:39:43 2019 @author: JustMo """

''' DO： 用来从保存的模型中，选择模型进行数据测试与验证 '''

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

##加载在tf_minist_inter中定义的前向网络结构；以及训练部分
import tf_conv_sample
import tf_conv_sample_train

import time
import numpy as np

BATCH_SIZE = 5000

##定义调取模型间隔
EVAL_INTERVAL_SECS = 10

def evaluate(mnist):
    ''' tips：用来调取保存的模型进行数据测试或者预测 '''
    with tf.Graph().as_default() as g:##实例化一个类，并将这个类做为整个tf运行环境的默认图
        ##定义输入以及输出
        print(mnist.validation.images.shape)
        x = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.NUM_CHANNELS], name='x-input')
        y_ = tf.placeholder(dtype=tf.float32, shape=[None,tf_conv_sample.OUTPUT_NODE], name='y-input')
        
        ##定义feed数据
        reshape_xs = np.reshape(mnist.validation.images, [BATCH_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.IMAGE_SIZE, tf_conv_sample.NUM_CHANNELS])
        validata_feed = {x:reshape_xs, y_:mnist.validation.labels}
        
        ##用前向网络定义输出网络，注意由于在验证以及测试时，不须要计算损失，因此不用传regularizer
        y = tf_conv_sample.inference(x, None, None)
        
        ##计算预测数据的准确率
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        ##为了彻底共用定义的前向传播网络的各类变量，因此使用变量重命名的方式来加载滑动平均模型
        variable_averages = tf.train.ExponentialMovingAverage(tf_conv_sample_train.MOVING_AVERAGE_DECAY)
        variable_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variable_to_restore)
        
        
        ##一下每隔EVAL_INTERVAL_SECS时间，选择最新的模型来进行预测以及计算准确率
        while True:
            with tf.Session() as sess:
                ##在会话中经过checkpoint文件中的列表获取最新的模型文件名
                ckpt = tf.train.get_checkpoint_state(tf_conv_sample_train.MODEL_SAVE_PATH)
                
                if ckpt and ckpt.model_checkpoint_path:
                    ##若是ckpt存在以及模型中的路径存在
                    ##加载模型
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    ##经过模型文件名，获取训练的轮数
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    
                    accuracy_score = sess.run(accuracy, validata_feed)
                    
                    print('在训练了 %s 轮后，验证集的准确率为 %g'%(global_step, accuracy_score))
                else:
                    print('没有找到 checkpoint文件')
                    return
            time.sleep(EVAL_INTERVAL_SECS)##间隔EVAL_INTERVAL_SECS时间运行验证会话

def main(argv=None):
    mnist = input_data.read_data_sets('/path/to/mnist_data', one_hot=True)
    evaluate(mnist)

if __name__ == '__main__':
    tf.app.run()