# 李航统计学习方法（二）-感知机算法

## 感知机算法

《统计学习方法》系列笔记的第二篇，对应原著第二章。大量引用原著讲解，加入了自己的理解。对书中算法采用Python实现，并用Matplotlib可视化了动画出来，应该算是很硬派了。一套干货下来，很是辛苦，要是能坚持下去就好。

##### 感知机模型

image

x和y分属这两个空间，那么由输入空间到输出空间的如下函数：

image

image

image

image

image

image

image

image

image

image

image

image

image

image

image

image

### 感知机学习算法

#### 原始形式

image

image

image

image

image

image

``````    import copy
from matplotlib import pyplot as plt
from matplotlib import animation

training_set = [[(3, 3), 1], [(4, 3), 1], [(1, 1), -1]]
w = [0, 0]
b = 0
history = []

def update(item):
"""
update parameters using stochastic gradient descent
:param item: an item which is classified into wrong class
:return: nothing
"""
global w, b, history
w[0] += 1 * item[1] * item[0][0]
w[1] += 1 * item[1] * item[0][1]
b += 1 * item[1]
print w, b
history.append([copy.copy(w), b])
# you can uncomment this line to check the process of stochastic gradient descent

def cal(item):
"""
calculate the functional distance between 'item' an the dicision surface. output yi(w*xi+b).
:param item:
:return:
"""
res = 0
for i in range(len(item[0])):
res += item[0][i] * w[i]
res += b
res *= item[1]
return res

def check():
"""
check if the hyperplane can classify the examples correctly
:return: true if it can
"""
flag = False
for item in training_set:
if cal(item) <= 0:
flag = True
update(item)
# draw a graph to show the process
if not flag:
print "RESULT: w: " + str(w) + " b: " + str(b)
return flag

if __name__ == "__main__":
for i in range(1000):
if not check(): break

# first set up the figure, the axis, and the plot element we want to animate
fig = plt.figure()
ax = plt.axes(xlim=(0, 2), ylim=(-2, 2))
line, = ax.plot([], [], 'g', lw=2)
label = ax.text([], [], '')

# initialization function: plot the background of each frame
def init():
line.set_data([], [])
x, y, x_, y_ = [], [], [], []
for p in training_set:
if p[1] > 0:
x.append(p[0][0])
y.append(p[0][1])
else:
x_.append(p[0][0])
y_.append(p[0][1])

plt.plot(x, y, 'bo', x_, y_, 'rx')
plt.axis([-6, 6, -6, 6])
plt.grid(True)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Perceptron Algorithm (www.hankcs.com)')
return line, label

# animation function.  this is called sequentially
def animate(i):
global history, ax, line, label

w = history[i][0]
b = history[i][1]
if w[1] == 0: return line, label
x1 = -7
y1 = -(b + w[0] * x1) / w[1]
x2 = 7
y2 = -(b + w[0] * x2) / w[1]
line.set_data([x1, x2], [y1, y2])
x1 = 0
y1 = -(b + w[0] * x1) / w[1]
label.set_text(history[i])
label.set_position([x1, y1])
return line, label

# call the animator.  blit=true means only re-draw the parts that have changed.
print history
anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(history), interval=1000, repeat=True,
blit=True)
plt.show()
anim.save('perceptron.gif', fps=2, writer='imagemagick')
``````

image.png

### 算法的收敛性

image

，其最大长度为
image

，记感知机的参数向量
image

，设满足条件
image

image

#### 感知机学习算法的对偶形式

image

，这里
image

，则最终求解到的参数分别表示为：

image

image

image

``````    # -*- coding:utf-8 -*-
# Filename: train2.2.py
# Author：hankcs
# Date: 2015/1/31 15:15
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation

# An example in that book, the training set and parameters' sizes are fixed
training_set = np.array([[[3, 3], 1], [[4, 3], 1], [[1, 1], -1]])

a = np.zeros(len(training_set), np.float)
b = 0.0
Gram = None
y = np.array(training_set[:, 1])
x = np.empty((len(training_set), 2), np.float)
for i in range(len(training_set)):
x[i] = training_set[i][0]
history = []

def cal_gram():
"""
calculate the Gram matrix
:return:
"""
g = np.empty((len(training_set), len(training_set)), np.int)
for i in range(len(training_set)):
for j in range(len(training_set)):
g[i][j] = np.dot(training_set[i][0], training_set[j][0])
return g

def update(i):
"""
update parameters using stochastic gradient descent
:param i:
:return:
"""
global a, b
a[i] += 1
b = b + y[i]
history.append([np.dot(a * y, x), b])
# print a, b # you can uncomment this line to check the process of stochastic gradient descent

# calculate the judge condition
def cal(i):
global a, b, x, y

res = np.dot(a * y, Gram[i])
res = (res + b) * y[i]
return res

# check if the hyperplane can classify the examples correctly
def check():
global a, b, x, y
flag = False
for i in range(len(training_set)):
if cal(i) <= 0:
flag = True
update(i)
if not flag:

w = np.dot(a * y, x)
print "RESULT: w: " + str(w) + " b: " + str(b)
return False
return True

if __name__ == "__main__":
Gram = cal_gram()  # initialize the Gram matrix
for i in range(1000):
if not check(): break

# draw an animation to show how it works, the data comes from history
# first set up the figure, the axis, and the plot element we want to animate
fig = plt.figure()
ax = plt.axes(xlim=(0, 2), ylim=(-2, 2))
line, = ax.plot([], [], 'g', lw=2)
label = ax.text([], [], '')

# initialization function: plot the background of each frame
def init():
line.set_data([], [])
x, y, x_, y_ = [], [], [], []
for p in training_set:
if p[1] > 0:
x.append(p[0][0])
y.append(p[0][1])
else:
x_.append(p[0][0])
y_.append(p[0][1])

plt.plot(x, y, 'bo', x_, y_, 'rx')
plt.axis([-6, 6, -6, 6])
plt.grid(True)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Perceptron Algorithm 2 (www.hankcs.com)')
return line, label

# animation function.  this is called sequentially
def animate(i):
global history, ax, line, label

w = history[i][0]
b = history[i][1]
if w[1] == 0: return line, label
x1 = -7.0
y1 = -(b + w[0] * x1) / w[1]
x2 = 7.0
y2 = -(b + w[0] * x2) / w[1]
line.set_data([x1, x2], [y1, y2])
x1 = 0.0
y1 = -(b + w[0] * x1) / w[1]
label.set_text(str(history[i][0]) + ' ' + str(b))
label.set_position([x1, y1])
return line, label

# call the animator.  blit=true means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(history), interval=1000, repeat=True,
blit=True)
plt.show()
# anim.save('perceptron2.gif', fps=2, writer='imagemagick')
``````

#### 可视化

image

<pre class="brush:python;toolbar:false prettyprint linenums" style="font-family: Menlo, Monaco, Consolas, "Courier New", monospace; box-sizing: border-box; overflow: hidden; font-size: 13px; display: block; padding: 10px 15px; margin: 20px 0px; line-height: 1.42857; color: rgb(248, 248, 212); word-break: break-all; word-wrap: normal !important; background: rgb(39, 40, 34); border: none; border-radius: 4px; box-shadow: rgb(57, 56, 46) 40px 0px 0px inset, rgb(70, 71, 65) 41px 0px 0px inset; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration-style: initial; text-decoration-color: initial;">

1. training_set = np.array([[[3, 3], 1], [[4, 3], 1], [[1, 1], -1], [[5, 2], -1]])

</pre>

image

## 读后感

### 推荐阅读更多精彩内容

• 感知机 概述 感知机是二类分类的线性分类模型，其输入为实例的特征向量，输出为实例的类别，取+1和-1二值。感知机学...
_Joe阅读 4,642评论 2 7
• 【概述】 1、感知机模型特征：感知机对应于输入空间中将实例划分为正负两类的分离超平面，属于判别模型。 2、感知机策...
sealaes阅读 2,430评论 2 3
• 【概述】 SVM训练分类器的方法是寻找到超平面，使正负样本在超平面的两侧（分类正确性即“分得开”），且样本到超平面...
sealaes阅读 8,091评论 0 7
• 注：题中所指的『机器学习』不包括『深度学习』。本篇文章以理论推导为主，不涉及代码实现。 前些日子定下了未来三年左右...
我偏笑_NSNirvana阅读 38,909评论 12 145
• 1、烤香蕉：把香蕉连着皮一起放进微波炉，转三分钟就可以了，烤出来的香蕉皮有点发黑了，剥了皮之后，里面的香蕉已经烤得...
彩云之南阅读 210评论 0 0
• 感恩祖国的繁荣安定，让我们每天都能自由自在的做自己想做的事 女儿昨天晚上说想早上去树木园跑步，于是我就早早的到女儿...
危志霞阅读 36评论 0 0
• 没有记录就没有发生（2017年7月8日～10月5日）第二个90天（6日～2018年1月4日） 没有反思的人生不值得...
Lisa_829e阅读 117评论 0 1