# 0. 简介

Tensorflow基础

1.1 常数和变量

1.2 Tensorflow中的图和会话

1.3 占位符和feed_dicts

Tensorflow中的神经网络

2.1 介绍

2.2 数据加载

2.3 创建一个简单的一层神经网络

2.4 Tensorflow的多个方面

2.5 创建LeNet5卷积神经网络

2.6 影响层输出大小的参数

2.7 调整LeNet5架构

2.8 学习速率和优化器的影响

Tensorflow中的深度神经网络

3.1 AlexNet

3.2 VGG Net-16

3.3 AlexNet性能

# 1. Tensorflow 基础

#### 1.1 常量与变量

Tensorflow中最基本的单元是常量、变量和占位符。

tf.constant()和tf.Variable()之间的区别很清楚；一个常量有着恒定不变的值，一旦设置了它，它的值不能被改变。而变量的值可以在设置完成后改变，但变量的数据类型和形状无法改变。

#We can create constants and variables of different types.

#However, the different types do not mix well together.

a = tf.constant(2, tf.int16)

b = tf.constant(4, tf.float32)

c = tf.constant(8, tf.float32)

d = tf.Variable(2, tf.int16)

e = tf.Variable(4, tf.float32)

f = tf.Variable(8, tf.float32)

#we can perform computations on variable of the same type: e + f

#but the following can not be done: d + e

#everything in Tensorflow is a tensor, these can have different dimensions:

#0D, 1D, 2D, 3D, 4D, or nD-tensors

g = tf.constant(np.zeros(shape=(2,2), dtype=np.float32)) #does work

h = tf.zeros([11], tf.int16)

i = tf.ones([2,2], tf.float32)

j = tf.zeros([1000,4,3], tf.float64)

k = tf.Variable(tf.zeros([2,2], tf.float32))

l = tf.Variable(tf.zeros([5,6,5], tf.float32))

weights = tf.Variable(tf.truncated_normal([256 * 256, 10]))

biases = tf.Variable(tf.zeros([10]))

print(weights.get_shape().as_list())

print(biases.get_shape().as_list())

>>>[65536, 10]

>>>[10]

#### 1.2 Tensorflow 中的图与会话

graph = tf.Graph()

with graph.as_default():

a = tf.Variable(8, tf.float32)

b = tf.Variable(tf.zeros([2,2], tf.float32))

with tf.Session(graph=graph) as session:

tf.global_variables_initializer().run()

print(f) print(session.run(f))

print(session.run(k))

>>>>>> 8

>>> [[ 0.  0.]

>>>  [ 0.  0.]]

#### 1.3 占位符 与 feed_dicts

list_of_points1_ = [[1,2], [3,4], [5,6], [7,8]]

list_of_points2_ = [[15,16], [13,14], [11,12], [9,10]]

list_of_points1 = np.array([np.array(elem).reshape(1,2) for elem in list_of_points1_])

list_of_points2 = np.array([np.array(elem).reshape(1,2) for elem in list_of_points2_])

graph = tf.Graph()

with graph.as_default():

#we should use a tf.placeholder() to create a variable whose value you will fill in later (during session.run()).

#this can be done by 'feeding' the data into the placeholder.

#below we see an example of a method which uses two placeholder arrays of size [2,1] to calculate the eucledian distance

point1 = tf.placeholder(tf.float32, shape=(1, 2))

point2 = tf.placeholder(tf.float32, shape=(1, 2))

def calculate_eucledian_distance(point1, point2):

difference = tf.subtract(point1, point2)

power2 = tf.pow(difference, tf.constant(2.0, shape=(1,2)))

return eucledian_distance

dist = calculate_eucledian_distance(point1, point2)

with tf.Session(graph=graph) as session:

tf.global_variables_initializer().run()

for ii in range(len(list_of_points1)):

point1_ = list_of_points1[ii]

point2_ = list_of_points2[ii]

feed_dict = {point1 : point1_, point2 : point2_}

distance = session.run([dist], feed_dict=feed_dict)

print("the distance between {} and {} -> {}".format(point1_, point2_, distance))

>>> the distance between [[1 2]] and [[15 16]] -> [19.79899]

>>> the distance between [[3 4]] and [[13 14]] -> [14.142136]

>>> the distance between [[5 6]] and [[11 12]] -> [8.485281]

>>> the distance between [[7 8]] and [[ 9 10]] -> [2.8284271]

# 2. Tensorflow 中的神经网络

#### 2.2 数据加载

def randomize(dataset, labels):

permutation = np.random.permutation(labels.shape[0])

shuffled_dataset = dataset[permutation, :, :]

shuffled_labels = labels[permutation]

return shuffled_dataset, shuffled_labels

def one_hot_encode(np_array):

return (np.arange(10) == np_array[:,None]).astype(np.float32)

def reformat_data(dataset, labels, image_width, image_height, image_depth):

np_dataset_ = np.array([np.array(image_data).reshape(image_width, image_height, image_depth) for image_data in dataset])

np_labels_ = one_hot_encode(np.array(labels, dtype=np.float32))

np_dataset, np_labels = randomize(np_dataset_, np_labels_)

return np_dataset, np_labels

def flatten_tf_array(array):

shape = array.get_shape().as_list()

return tf.reshape(array, [shape[0], shape[1] * shape[2] * shape[3]])

def accuracy(predictions, labels):

return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])

mnist_folder = './data/mnist/'

mnist_image_width = 28

mnist_image_height = 28

mnist_image_depth = 1

mnist_num_labels = 10

mndata = MNIST(mnist_folder)

mnist_train_dataset, mnist_train_labels = reformat_data(mnist_train_dataset_, mnist_train_labels_, mnist_image_size, mnist_image_size, mnist_image_depth)

mnist_test_dataset, mnist_test_labels = reformat_data(mnist_test_dataset_, mnist_test_labels_, mnist_image_size, mnist_image_size, mnist_image_depth)

print("There are {} images, each of size {}".format(len(mnist_train_dataset), len(mnist_train_dataset[0])))

print("Meaning each image has the size of 28*28*1 = {}".format(mnist_image_size*mnist_image_size*1))

print("The training set contains the following {} labels: {}".format(len(np.unique(mnist_train_labels_)), np.unique(mnist_train_labels_)))

print('Training set shape', mnist_train_dataset.shape, mnist_train_labels.shape)

print('Test set shape', mnist_test_dataset.shape, mnist_test_labels.shape)

train_dataset_mnist, train_labels_mnist = mnist_train_dataset, mnist_train_labels

test_dataset_mnist, test_labels_mnist = mnist_test_dataset, mnist_test_labels

######################################################################################

cifar10_folder = './data/cifar10/'

train_datasets = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', ]

test_dataset = ['test_batch']

c10_image_height = 32

c10_image_width = 32

c10_image_depth = 3

c10_num_labels = 10

with open(cifar10_folder + test_dataset[0], 'rb') as f0:

c10_test_dataset, c10_test_labels = c10_test_dict[b'data'], c10_test_dict[b'labels']

test_dataset_cifar10, test_labels_cifar10 = reformat_data(c10_test_dataset, c10_test_labels, c10_image_size, c10_image_size, c10_image_depth)

c10_train_dataset, c10_train_labels = [], []

for train_dataset in train_datasets:

with open(cifar10_folder + train_dataset, 'rb') as f0:

c10_train_dataset_, c10_train_labels_ = c10_train_dict[b'data'], c10_train_dict[b'labels']

c10_train_dataset.append(c10_train_dataset_)

c10_train_labels += c10_train_labels_

c10_train_dataset = np.concatenate(c10_train_dataset, axis=0)

train_dataset_cifar10, train_labels_cifar10 = reformat_data(c10_train_dataset, c10_train_labels, c10_image_size, c10_image_size, c10_image_depth)

del c10_train_dataset

del c10_train_labels

print("The training set contains the following labels: {}".format(np.unique(c10_train_dict[b'labels'])))

print('Training set shape', train_dataset_cifar10.shape, train_labels_cifar10.shape)

print('Test set shape', test_dataset_cifar10.shape, test_labels_cifar10.shape)

#### 2.3 创建一个简单的一层神经网络

image_width = mnist_image_width

image_height = mnist_image_height

image_depth = mnist_image_depth

num_labels = mnist_num_labels

#the dataset

train_dataset = mnist_train_dataset

train_labels = mnist_train_labels

test_dataset = mnist_test_dataset

test_labels = mnist_test_labels

#number of iterations and learning rate

num_steps = 10001

display_step = 1000

learning_rate = 0.5

graph = tf.Graph()

with graph.as_default():

#1) First we put the input data in a Tensorflow friendly form.

tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_width, image_height, image_depth))

tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, num_labels))

tf_test_dataset = tf.constant(test_dataset, tf.float32)

#2) Then, the weight matrices and bias vectors are initialized

#as a default, tf.truncated_normal() is used for the weight matrix and tf.zeros() is used for the bias vector.

weights = tf.Variable(tf.truncated_normal([image_width * image_height * image_depth, num_labels]), tf.float32)

bias = tf.Variable(tf.zeros([num_labels]), tf.float32)

#3) define the model:

#A one layered fccd simply consists of a matrix multiplication

def model(data, weights, bias):

return tf.matmul(flatten_tf_array(data), weights) + bias

logits = model(tf_train_dataset, weights, bias)

#4) calculate the loss, which will be used in the optimization of the weights

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))

#5) Choose an optimizer. Many are available.

#6) The predicted values for the images in the train dataset and test dataset are assigned to the variables train_prediction and test_prediction.

#It is only necessary if you want to know the accuracy by comparing it with the actual values.

train_prediction = tf.nn.softmax(logits)

test_prediction = tf.nn.softmax(model(tf_test_dataset, weights, bias))

with tf.Session(graph=graph) as session:

tf.global_variables_initializer().run()

print('Initialized')

for step in range(num_steps):

_, l, predictions = session.run([optimizer, loss, train_prediction])

if (step % display_step == 0):

train_accuracy = accuracy(predictions, train_labels[:, :])

test_accuracy = accuracy(test_prediction.eval(), test_labels)

message = "step {:04d} : loss is {:06.2f}, accuracy on training set {:02.2f} %, accuracy on test set {:02.2f} %".format(step, l, train_accuracy, test_accuracy)

print(message)

>>> Initialized>>> step0000: lossis2349.55, accuracyontrainingset10.43%, accuracyontestset34.12%>>> step0100: lossis3612.48, accuracyontrainingset89.26%, accuracyontestset90.15%>>> step0200: lossis2634.40, accuracyontrainingset91.10%, accuracyontestset91.26%>>> step0300: lossis2109.42, accuracyontrainingset91.62%, accuracyontestset91.56%>>> step0400: lossis2093.56, accuracyontrainingset91.85%, accuracyontestset91.67%>>> step0500: lossis2325.58, accuracyontrainingset91.83%, accuracyontestset91.67%>>> step0600: lossis22140.44, accuracyontrainingset68.39%, accuracyontestset75.06%>>> step0700: lossis5920.29, accuracyontrainingset83.73%, accuracyontestset87.76%>>> step0800: lossis9137.66, accuracyontrainingset79.72%, accuracyontestset83.33%>>> step0900: lossis15949.15, accuracyontrainingset69.33%, accuracyontestset77.05%>>> step1000: lossis1758.80, accuracyontrainingset92.45%, accuracyontestset91.79%

Sebastian Ruder有一篇不错的博文介绍了不同优化器之间的区别，通过这篇文章，你可以更详细地了解它们。

#### 2.4 Tensorflow的几个方面

Tensorflow包含许多层，这意味着可以通过不同的抽象级别来完成相同的操作。这里有一个简单的例子，操作

logits = tf.matmul(tf_train_dataset, weights) + biases，

logits = tf.nn.xw_plus_b(train_dataset, weights, biases)。

import Tensorflow as tf

w1 = tf.Variable(tf.truncated_normal([filter_size, filter_size, image_depth, filter_depth], stddev=0.1))

b1 = tf.Variable(tf.zeros([filter_depth]))

layer1_conv = tf.nn.conv2d(data, w1, [1, 1, 1, 1], padding='SAME')

layer1_relu = tf.nn.relu(layer1_conv + b1)

layer1_pool = tf.nn.max_pool(layer1_pool, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

from tflearn.layers.conv import conv_2d, max_pool_2d

layer1_conv = conv_2d(data, filter_depth, filter_size, activation='relu')

layer1_pool = max_pool_2d(layer1_conv_relu, 2, strides=2)

#### 2.5 创建 LeNet5 卷积神经网络

LeNet5 CNN架构最早是在1998年由Yann Lecun见论文）提出的。它是最早的CNN之一，专门用于对手写数字进行分类。尽管它在由大小为28 x 28的灰度图像组成的MNIST数据集上运行良好，但是如果用于其他包含更多图片、更大分辨率以及更多类别的数据集时，它的性能会低很多。对于这些较大的数据集，更深的ConvNets（如AlexNet、VGGNet或ResNet）会表现得更好。

Lenet5架构如下图所示：

LENET5_BATCH_SIZE = 32

LENET5_PATCH_SIZE = 5

LENET5_PATCH_DEPTH_1 = 6

LENET5_PATCH_DEPTH_2 = 16

LENET5_NUM_HIDDEN_1 = 120

LENET5_NUM_HIDDEN_2 = 84

def variables_lenet5(patch_size = LENET5_PATCH_SIZE, patch_depth1 = LENET5_PATCH_DEPTH_1,

patch_depth2 = LENET5_PATCH_DEPTH_2,

num_hidden1 = LENET5_NUM_HIDDEN_1, num_hidden2 = LENET5_NUM_HIDDEN_2,

image_depth = 1, num_labels = 10):

w1 = tf.Variable(tf.truncated_normal([patch_size, patch_size, image_depth, patch_depth1], stddev=0.1))

b1 = tf.Variable(tf.zeros([patch_depth1]))

w2 = tf.Variable(tf.truncated_normal([patch_size, patch_size, patch_depth1, patch_depth2], stddev=0.1))

b2 = tf.Variable(tf.constant(1.0, shape=[patch_depth2]))

w3 = tf.Variable(tf.truncated_normal([5*5*patch_depth2, num_hidden1], stddev=0.1))

b3 = tf.Variable(tf.constant(1.0, shape = [num_hidden1]))

w4 = tf.Variable(tf.truncated_normal([num_hidden1, num_hidden2], stddev=0.1))

b4 = tf.Variable(tf.constant(1.0, shape = [num_hidden2]))

w5 = tf.Variable(tf.truncated_normal([num_hidden2, num_labels], stddev=0.1))

b5 = tf.Variable(tf.constant(1.0, shape = [num_labels]))

variables = {

'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5,

'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5

}

return variables

def model_lenet5(data, variables):

layer1_conv = tf.nn.conv2d(data, variables['w1'], [1, 1, 1, 1], padding='SAME')

layer1_actv = tf.sigmoid(layer1_conv + variables['b1'])

layer1_pool = tf.nn.avg_pool(layer1_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

layer2_conv = tf.nn.conv2d(layer1_pool, variables['w2'], [1, 1, 1, 1], padding='VALID')

layer2_actv = tf.sigmoid(layer2_conv + variables['b2'])

layer2_pool = tf.nn.avg_pool(layer2_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

flat_layer = flatten_tf_array(layer2_pool)

layer3_fccd = tf.matmul(flat_layer, variables['w3']) + variables['b3']

layer3_actv = tf.nn.sigmoid(layer3_fccd)

layer4_fccd = tf.matmul(layer3_actv, variables['w4']) + variables['b4']

layer4_actv = tf.nn.sigmoid(layer4_fccd)

logits = tf.matmul(layer4_actv, variables['w5']) + variables['b5']

return logits

#parameters determining the model size

image_size = mnist_image_size

num_labels = mnist_num_labels

#the datasets

train_dataset = mnist_train_dataset

train_labels = mnist_train_labels

test_dataset = mnist_test_dataset

test_labels = mnist_test_labels

#number of iterations and learning rate

num_steps = 10001

display_step = 1000

learning_rate = 0.001

graph = tf.Graph()

with graph.as_default():

#1) First we put the input data in a Tensorflow friendly form.

tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_width, image_height, image_depth))

tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, num_labels))

tf_test_dataset = tf.constant(test_dataset, tf.float32)

#2) Then, the weight matrices and bias vectors are initialized

variables = variables_lenet5(image_depth = image_depth, num_labels = num_labels)

#3. The model used to calculate the logits (predicted labels)

model = model_lenet5

logits = model(tf_train_dataset, variables)

#4. then we compute the softmax cross entropy between the logits and the (actual) labels

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))

#5. The optimizer is used to calculate the gradients of the loss function

# Predictions for the training, validation, and test data.

train_prediction = tf.nn.softmax(logits)

test_prediction = tf.nn.softmax(model(tf_test_dataset, variables))

with tf.Session(graph=graph) as session:

tf.global_variables_initializer().run()

print('Initialized with learning_rate', learning_rate)

for step in range(num_steps):

#Since we are using stochastic gradient descent, we are selecting  small batches from the training dataset,

#and training the convolutional neural network each time with a batch.

offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

batch_data = train_dataset[offset:(offset + batch_size), :, :, :]

batch_labels = train_labels[offset:(offset + batch_size), :]

feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}

_, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)

if step % display_step == 0:

train_accuracy = accuracy(predictions, batch_labels)

test_accuracy = accuracy(test_prediction.eval(), test_labels)

message = "step {:04d} : loss is {:06.2f}, accuracy on training set {:02.2f} %, accuracy on test set {:02.2f} %".format(step, l, train_accuracy, test_accuracy)

print(message)

>>> Initialized with learning_rate 0.1

>>> step 0000 : loss is 002.49, accuracy on training set 3.12 %, accuracy on test set 10.09 %

>>> step 1000 : loss is 002.29, accuracy on training set 21.88 %, accuracy on test set 9.58 %

>>> step 2000 : loss is 000.73, accuracy on training set 75.00 %, accuracy on test set 78.20 %

>>> step 3000 : loss is 000.41, accuracy on training set 81.25 %, accuracy on test set 86.87 %

>>> step 4000 : loss is 000.26, accuracy on training set 93.75 %, accuracy on test set 90.49 %

>>> step 5000 : loss is 000.28, accuracy on training set 87.50 %, accuracy on test set 92.79 %

>>> step 6000 : loss is 000.23, accuracy on training set 96.88 %, accuracy on test set 93.64 %

>>> step 7000 : loss is 000.18, accuracy on training set 90.62 %, accuracy on test set 95.14 %

>>> step 8000 : loss is 000.14, accuracy on training set 96.88 %, accuracy on test set 95.80 %

>>> step 9000 : loss is 000.35, accuracy on training set 90.62 %, accuracy on test set 96.33 %

>>> step 10000 : loss is 000.12, accuracy on training set 93.75 %, accuracy on test set 96.76 %

#### 2.7 调整 LeNet5 的架构

LENET5_LIKE_BATCH_SIZE = 32

LENET5_LIKE_FILTER_SIZE = 5

LENET5_LIKE_FILTER_DEPTH = 16

LENET5_LIKE_NUM_HIDDEN = 120

def variables_lenet5_like(filter_size = LENET5_LIKE_FILTER_SIZE,

filter_depth = LENET5_LIKE_FILTER_DEPTH,

num_hidden = LENET5_LIKE_NUM_HIDDEN,

image_width = 28, image_depth = 1, num_labels = 10):

w1 = tf.Variable(tf.truncated_normal([filter_size, filter_size, image_depth, filter_depth], stddev=0.1))

b1 = tf.Variable(tf.zeros([filter_depth]))

w2 = tf.Variable(tf.truncated_normal([filter_size, filter_size, filter_depth, filter_depth], stddev=0.1))

b2 = tf.Variable(tf.constant(1.0, shape=[filter_depth]))

w3 = tf.Variable(tf.truncated_normal([(image_width // 4)*(image_width // 4)*filter_depth , num_hidden], stddev=0.1))

b3 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))

w4 = tf.Variable(tf.truncated_normal([num_hidden, num_hidden], stddev=0.1))

b4 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))

w5 = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))

b5 = tf.Variable(tf.constant(1.0, shape = [num_labels]))

variables = {

'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5,

'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5

}

return variables

def model_lenet5_like(data, variables):

layer1_conv = tf.nn.conv2d(data, variables['w1'], [1, 1, 1, 1], padding='SAME')

layer1_actv = tf.nn.relu(layer1_conv + variables['b1'])

layer1_pool = tf.nn.avg_pool(layer1_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

layer2_conv = tf.nn.conv2d(layer1_pool, variables['w2'], [1, 1, 1, 1], padding='SAME')

layer2_actv = tf.nn.relu(layer2_conv + variables['b2'])

layer2_pool = tf.nn.avg_pool(layer2_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

flat_layer = flatten_tf_array(layer2_pool)

layer3_fccd = tf.matmul(flat_layer, variables['w3']) + variables['b3']

layer3_actv = tf.nn.relu(layer3_fccd)

#layer3_drop = tf.nn.dropout(layer3_actv, 0.5)

layer4_fccd = tf.matmul(layer3_actv, variables['w4']) + variables['b4']

layer4_actv = tf.nn.relu(layer4_fccd)

#layer4_drop = tf.nn.dropout(layer4_actv, 0.5)

logits = tf.matmul(layer4_actv, variables['w5']) + variables['b5']

return logits

# 3. Tensorflow 中的深度神经网络

3.1 AlexNet

ox17_image_width = 224

ox17_image_height = 224

ox17_image_depth = 3

ox17_num_labels = 17

import tflearn.datasets.oxflower17 as oxflower17

train_dataset_ox17, train_labels_ox17 = train_dataset_[:1000,:,:,:], train_labels_[:1000,:]

test_dataset_ox17, test_labels_ox17 = train_dataset_[1000:,:,:,:], train_labels_[1000:,:]

print('Training set', train_dataset_ox17.shape, train_labels_ox17.shape)

print('Test set', test_dataset_ox17.shape, test_labels_ox17.shape)

ALEX_PATCH_DEPTH_1, ALEX_PATCH_DEPTH_2, ALEX_PATCH_DEPTH_3, ALEX_PATCH_DEPTH_4 = 96, 256, 384, 256

ALEX_PATCH_SIZE_1, ALEX_PATCH_SIZE_2, ALEX_PATCH_SIZE_3, ALEX_PATCH_SIZE_4 = 11, 5, 3, 3

ALEX_NUM_HIDDEN_1, ALEX_NUM_HIDDEN_2 = 4096, 4096

def variables_alexnet(patch_size1 = ALEX_PATCH_SIZE_1, patch_size2 = ALEX_PATCH_SIZE_2,

patch_size3 = ALEX_PATCH_SIZE_3, patch_size4 = ALEX_PATCH_SIZE_4,

patch_depth1 = ALEX_PATCH_DEPTH_1, patch_depth2 = ALEX_PATCH_DEPTH_2,

patch_depth3 = ALEX_PATCH_DEPTH_3, patch_depth4 = ALEX_PATCH_DEPTH_4,

num_hidden1 = ALEX_NUM_HIDDEN_1, num_hidden2 = ALEX_NUM_HIDDEN_2,

image_width = 224, image_height = 224, image_depth = 3, num_labels = 17):

w1 = tf.Variable(tf.truncated_normal([patch_size1, patch_size1, image_depth, patch_depth1], stddev=0.1))

b1 = tf.Variable(tf.zeros([patch_depth1]))

w2 = tf.Variable(tf.truncated_normal([patch_size2, patch_size2, patch_depth1, patch_depth2], stddev=0.1))

b2 = tf.Variable(tf.constant(1.0, shape=[patch_depth2]))

w3 = tf.Variable(tf.truncated_normal([patch_size3, patch_size3, patch_depth2, patch_depth3], stddev=0.1))

b3 = tf.Variable(tf.zeros([patch_depth3]))

w4 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth3, patch_depth3], stddev=0.1))

b4 = tf.Variable(tf.constant(1.0, shape=[patch_depth3]))

w5 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth3, patch_depth3], stddev=0.1))

b5 = tf.Variable(tf.zeros([patch_depth3]))

pool_reductions = 3

conv_reductions = 2

no_reductions = pool_reductions + conv_reductions

w6 = tf.Variable(tf.truncated_normal([(image_width // 2**no_reductions)*(image_height // 2**no_reductions)*patch_depth3, num_hidden1], stddev=0.1))

b6 = tf.Variable(tf.constant(1.0, shape = [num_hidden1]))

w7 = tf.Variable(tf.truncated_normal([num_hidden1, num_hidden2], stddev=0.1))

b7 = tf.Variable(tf.constant(1.0, shape = [num_hidden2]))

w8 = tf.Variable(tf.truncated_normal([num_hidden2, num_labels], stddev=0.1))

b8 = tf.Variable(tf.constant(1.0, shape = [num_labels]))

variables = {

'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8,

'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5, 'b6': b6, 'b7': b7, 'b8': b8

}

return variables

def model_alexnet(data, variables):

layer1_conv = tf.nn.conv2d(data, variables['w1'], [1, 4, 4, 1], padding='SAME')

layer1_relu = tf.nn.relu(layer1_conv + variables['b1'])

layer1_pool = tf.nn.max_pool(layer1_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')

layer1_norm = tf.nn.local_response_normalization(layer1_pool)

layer2_conv = tf.nn.conv2d(layer1_norm, variables['w2'], [1, 1, 1, 1], padding='SAME')

layer2_relu = tf.nn.relu(layer2_conv + variables['b2'])

layer2_pool = tf.nn.max_pool(layer2_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')

layer2_norm = tf.nn.local_response_normalization(layer2_pool)

layer3_conv = tf.nn.conv2d(layer2_norm, variables['w3'], [1, 1, 1, 1], padding='SAME')

layer3_relu = tf.nn.relu(layer3_conv + variables['b3'])

layer4_conv = tf.nn.conv2d(layer3_relu, variables['w4'], [1, 1, 1, 1], padding='SAME')

layer4_relu = tf.nn.relu(layer4_conv + variables['b4'])

layer5_conv = tf.nn.conv2d(layer4_relu, variables['w5'], [1, 1, 1, 1], padding='SAME')

layer5_relu = tf.nn.relu(layer5_conv + variables['b5'])

layer5_pool = tf.nn.max_pool(layer4_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')

layer5_norm = tf.nn.local_response_normalization(layer5_pool)

flat_layer = flatten_tf_array(layer5_norm)

layer6_fccd = tf.matmul(flat_layer, variables['w6']) + variables['b6']

layer6_tanh = tf.tanh(layer6_fccd)

layer6_drop = tf.nn.dropout(layer6_tanh, 0.5)

layer7_fccd = tf.matmul(layer6_drop, variables['w7']) + variables['b7']

layer7_tanh = tf.tanh(layer7_fccd)

layer7_drop = tf.nn.dropout(layer7_tanh, 0.5)

logits = tf.matmul(layer7_drop, variables['w8']) + variables['b8']

return logits

#### 3.2 VGG Net-16

VGG Net于2014年由牛津大学的Karen Simonyan和Andrew Zisserman创建出来。 它包含了更多的层（16-19层），但是每一层的设计更为简单；所有卷积层都具有3×3以及步长为3的过滤器，并且所有最大池化层的步长都为2。

#The VGGNET Neural Network

VGG16_PATCH_SIZE_1, VGG16_PATCH_SIZE_2, VGG16_PATCH_SIZE_3, VGG16_PATCH_SIZE_4 = 3, 3, 3, 3

VGG16_PATCH_DEPTH_1, VGG16_PATCH_DEPTH_2, VGG16_PATCH_DEPTH_3, VGG16_PATCH_DEPTH_4 = 64, 128, 256, 512

VGG16_NUM_HIDDEN_1, VGG16_NUM_HIDDEN_2 = 4096, 1000

def variables_vggnet16(patch_size1 = VGG16_PATCH_SIZE_1, patch_size2 = VGG16_PATCH_SIZE_2,

patch_size3 = VGG16_PATCH_SIZE_3, patch_size4 = VGG16_PATCH_SIZE_4,

patch_depth1 = VGG16_PATCH_DEPTH_1, patch_depth2 = VGG16_PATCH_DEPTH_2,

patch_depth3 = VGG16_PATCH_DEPTH_3, patch_depth4 = VGG16_PATCH_DEPTH_4,

num_hidden1 = VGG16_NUM_HIDDEN_1, num_hidden2 = VGG16_NUM_HIDDEN_2,

image_width = 224, image_height = 224, image_depth = 3, num_labels = 17):

w1 = tf.Variable(tf.truncated_normal([patch_size1, patch_size1, image_depth, patch_depth1], stddev=0.1))

b1 = tf.Variable(tf.zeros([patch_depth1]))

w2 = tf.Variable(tf.truncated_normal([patch_size1, patch_size1, patch_depth1, patch_depth1], stddev=0.1))

b2 = tf.Variable(tf.constant(1.0, shape=[patch_depth1]))

w3 = tf.Variable(tf.truncated_normal([patch_size2, patch_size2, patch_depth1, patch_depth2], stddev=0.1))

b3 = tf.Variable(tf.constant(1.0, shape = [patch_depth2]))

w4 = tf.Variable(tf.truncated_normal([patch_size2, patch_size2, patch_depth2, patch_depth2], stddev=0.1))

b4 = tf.Variable(tf.constant(1.0, shape = [patch_depth2]))

w5 = tf.Variable(tf.truncated_normal([patch_size3, patch_size3, patch_depth2, patch_depth3], stddev=0.1))

b5 = tf.Variable(tf.constant(1.0, shape = [patch_depth3]))

w6 = tf.Variable(tf.truncated_normal([patch_size3, patch_size3, patch_depth3, patch_depth3], stddev=0.1))

b6 = tf.Variable(tf.constant(1.0, shape = [patch_depth3]))

w7 = tf.Variable(tf.truncated_normal([patch_size3, patch_size3, patch_depth3, patch_depth3], stddev=0.1))

b7 = tf.Variable(tf.constant(1.0, shape=[patch_depth3]))

w8 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth3, patch_depth4], stddev=0.1))

b8 = tf.Variable(tf.constant(1.0, shape = [patch_depth4]))

w9 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth4, patch_depth4], stddev=0.1))

b9 = tf.Variable(tf.constant(1.0, shape = [patch_depth4]))

w10 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth4, patch_depth4], stddev=0.1))

b10 = tf.Variable(tf.constant(1.0, shape = [patch_depth4]))

w11 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth4, patch_depth4], stddev=0.1))

b11 = tf.Variable(tf.constant(1.0, shape = [patch_depth4]))

w12 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth4, patch_depth4], stddev=0.1))

b12 = tf.Variable(tf.constant(1.0, shape=[patch_depth4]))

w13 = tf.Variable(tf.truncated_normal([patch_size4, patch_size4, patch_depth4, patch_depth4], stddev=0.1))

b13 = tf.Variable(tf.constant(1.0, shape = [patch_depth4]))

no_pooling_layers = 5

w14 = tf.Variable(tf.truncated_normal([(image_width // (2**no_pooling_layers))*(image_height // (2**no_pooling_layers))*patch_depth4 , num_hidden1], stddev=0.1))

b14 = tf.Variable(tf.constant(1.0, shape = [num_hidden1]))

w15 = tf.Variable(tf.truncated_normal([num_hidden1, num_hidden2], stddev=0.1))

b15 = tf.Variable(tf.constant(1.0, shape = [num_hidden2]))

w16 = tf.Variable(tf.truncated_normal([num_hidden2, num_labels], stddev=0.1))

b16 = tf.Variable(tf.constant(1.0, shape = [num_labels]))

variables = {

'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8, 'w9': w9, 'w10': w10,

'w11': w11, 'w12': w12, 'w13': w13, 'w14': w14, 'w15': w15, 'w16': w16,

'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5, 'b6': b6, 'b7': b7, 'b8': b8, 'b9': b9, 'b10': b10,

'b11': b11, 'b12': b12, 'b13': b13, 'b14': b14, 'b15': b15, 'b16': b16

}

return variables

def model_vggnet16(data, variables):

layer1_conv = tf.nn.conv2d(data, variables['w1'], [1, 1, 1, 1], padding='SAME')

layer1_actv = tf.nn.relu(layer1_conv + variables['b1'])

layer2_conv = tf.nn.conv2d(layer1_actv, variables['w2'], [1, 1, 1, 1], padding='SAME')

layer2_actv = tf.nn.relu(layer2_conv + variables['b2'])

layer2_pool = tf.nn.max_pool(layer2_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

layer3_conv = tf.nn.conv2d(layer2_pool, variables['w3'], [1, 1, 1, 1], padding='SAME')

layer3_actv = tf.nn.relu(layer3_conv + variables['b3'])

layer4_conv = tf.nn.conv2d(layer3_actv, variables['w4'], [1, 1, 1, 1], padding='SAME')

layer4_actv = tf.nn.relu(layer4_conv + variables['b4'])

layer4_pool = tf.nn.max_pool(layer4_pool, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

layer5_conv = tf.nn.conv2d(layer4_pool, variables['w5'], [1, 1, 1, 1], padding='SAME')

layer5_actv = tf.nn.relu(layer5_conv + variables['b5'])

layer6_conv = tf.nn.conv2d(layer5_actv, variables['w6'], [1, 1, 1, 1], padding='SAME')

layer6_actv = tf.nn.relu(layer6_conv + variables['b6'])

layer7_conv = tf.nn.conv2d(layer6_actv, variables['w7'], [1, 1, 1, 1], padding='SAME')

layer7_actv = tf.nn.relu(layer7_conv + variables['b7'])

layer7_pool = tf.nn.max_pool(layer7_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

layer8_conv = tf.nn.conv2d(layer7_pool, variables['w8'], [1, 1, 1, 1], padding='SAME')

layer8_actv = tf.nn.relu(layer8_conv + variables['b8'])

layer9_conv = tf.nn.conv2d(layer8_actv, variables['w9'], [1, 1, 1, 1], padding='SAME')

layer9_actv = tf.nn.relu(layer9_conv + variables['b9'])

layer10_conv = tf.nn.conv2d(layer9_actv, variables['w10'], [1, 1, 1, 1], padding='SAME')

layer10_actv = tf.nn.relu(layer10_conv + variables['b10'])

layer10_pool = tf.nn.max_pool(layer10_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

layer11_conv = tf.nn.conv2d(layer10_pool, variables['w11'], [1, 1, 1, 1], padding='SAME')

layer11_actv = tf.nn.relu(layer11_conv + variables['b11'])

layer12_conv = tf.nn.conv2d(layer11_actv, variables['w12'], [1, 1, 1, 1], padding='SAME')

layer12_actv = tf.nn.relu(layer12_conv + variables['b12'])

layer13_conv = tf.nn.conv2d(layer12_actv, variables['w13'], [1, 1, 1, 1], padding='SAME')

layer13_actv = tf.nn.relu(layer13_conv + variables['b13'])

layer13_pool = tf.nn.max_pool(layer13_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

flat_layer  = flatten_tf_array(layer13_pool)

layer14_fccd = tf.matmul(flat_layer, variables['w14']) + variables['b14']

layer14_actv = tf.nn.relu(layer14_fccd)

layer14_drop = tf.nn.dropout(layer14_actv, 0.5)

layer15_fccd = tf.matmul(layer14_drop, variables['w15']) + variables['b15']

layer15_actv = tf.nn.relu(layer15_fccd)

layer15_drop = tf.nn.dropout(layer15_actv, 0.5)

logits = tf.matmul(layer15_drop, variables['w16']) + variables['b16']

return logits