3-多层感知机¶

Score: 0.96486

Softmax Regression 和传统意义上的神经网络的最大区别是没有隐含层。这里实现的多层感知机实际上是在 Softmax Regression 的基础上加上一个隐含层。结构如下：

x=tf.placeholder(tf.float32,[None,784])
hidden1=tf.nn.relu(tf.matmul(x,W1)+b1)
hidden1_drop=tf.nn.dropout(hidden1,keep_prob)
y=tf.nn.softmax(tf.matmul(hidden1_drop,W2)+b2)
代价函数：交叉熵
最小化代价函数：AdagradOptimizer，学习率0.01

import pandas as pd

train_data=pd.read_csv('train.csv')
test_data=pd.read_csv('test.csv')
train_data.shape,test_data.shape

((42000, 785), (28000, 784))

train_data.head()

images=train_data.drop(['label'],axis=1).values
images.shape

(42000, 784)

labels=train_data['label']
labels=pd.get_dummies(labels)
print labels.shape
labels.head()

(42000, 10)

labels=labels.values
labels.shape

(42000, 10)

import numpy as np

images=np.multiply(images,1.0/255.0)
images.shape

(42000, 784)

TRAIN_SIZE=40000
train_images=images[:TRAIN_SIZE]
train_labels=labels[:TRAIN_SIZE]
val_images=images[TRAIN_SIZE:]
val_labels=labels[TRAIN_SIZE:]

train_images.shape,len(train_labels),val_images.shape,len(val_labels)

((40000, 784), 40000, (2000, 784), 2000)

train_images[:5]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

train_labels[:5]

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)

多层感知机¶

import tensorflow as tf

sess=tf.InteractiveSession()

in_units=784 #输入节点数
h1_units=300 #隐含层输出结点数

W1=tf.Variable(tf.truncated_normal([in_units,h1_units],stddev=0.1))
b1=tf.Variable(tf.zeros([h1_units]))

# 输出层softmax，初始化为0，Sigmoid函数在0附近最敏感，梯度大
W2=tf.Variable(tf.zeros([h1_units,10]))
b2=tf.Variable(tf.zeros([10]))

x=tf.placeholder(tf.float32,[None,in_units])
keep_prob=tf.placeholder(tf.float32) # keep_prob是变化的，所以placeholder

定义模型结构¶

hidden1=tf.nn.relu(tf.matmul(x,W1)+b1)
hidden1_drop=tf.nn.dropout(hidden1,keep_prob)
y=tf.nn.softmax(tf.matmul(hidden1_drop,W2)+b2)

y_=tf.placeholder(tf.float32,[None,10])
cross_entropy=-tf.reduce_sum(y_*tf.log(y))
train_step=tf.train.AdagradOptimizer(0.01).minimize(cross_entropy)

训练模型¶

BATCH_SIZE=100
index_in_epoch=0
num_examples=train_images.shape[0]

def next_batch(batch_size):
    global train_images
    global train_labels
    global index_in_epoch
    
    start=index_in_epoch
    index_in_epoch+=batch_size
    
    if index_in_epoch>num_examples:
        print 'epoches copleted!'
        # 一轮完毕，shuffle数据
        perm=np.arange(num_examples) # 生成一个序列
        np.random.shuffle(perm)
        train_images=train_images[perm]
        train_labels=train_labels[perm]
        # 开始下个epoch
        start=0
        index_in_epoch=batch_size
        assert batch_size<=num_examples
    end=index_in_epoch
    return train_images[start:end],train_labels[start:end]

tf.global_variables_initializer().run()
correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,'float'))

for i in range(4000):
    batch_xs,batch_ys=next_batch(BATCH_SIZE)
    if i%200==0:
        train_accuracy=accuracy.eval({x:batch_xs,y_:batch_ys,keep_prob:1.0})
        test_accuracy=accuracy.eval({x:val_images[:BATCH_SIZE],y_:val_labels[:BATCH_SIZE],keep_prob:1.0})
        print 'step:{}, train_accuracy={},test_accuracy={}'.format(i,train_accuracy,test_accuracy)
    sess.run(train_step,feed_dict={x:batch_xs,y_:batch_ys,keep_prob:0.75})

step:0, train_accuracy=0.109999999404,test_accuracy=0.070000000298
step:200, train_accuracy=0.959999978542,test_accuracy=0.889999985695
epoches copleted!
step:400, train_accuracy=0.939999997616,test_accuracy=0.889999985695
step:600, train_accuracy=0.910000026226,test_accuracy=0.920000016689
epoches copleted!
step:800, train_accuracy=0.959999978542,test_accuracy=0.920000016689
step:1000, train_accuracy=0.939999997616,test_accuracy=0.930000007153
epoches copleted!
step:1200, train_accuracy=0.949999988079,test_accuracy=0.939999997616
step:1400, train_accuracy=0.980000019073,test_accuracy=0.939999997616
epoches copleted!
step:1600, train_accuracy=0.959999978542,test_accuracy=0.930000007153
step:1800, train_accuracy=0.959999978542,test_accuracy=0.939999997616
epoches copleted!
step:2000, train_accuracy=0.97000002861,test_accuracy=0.97000002861
step:2200, train_accuracy=0.959999978542,test_accuracy=0.949999988079
epoches copleted!
step:2400, train_accuracy=0.990000009537,test_accuracy=0.97000002861
step:2600, train_accuracy=0.949999988079,test_accuracy=0.980000019073
epoches copleted!
step:2800, train_accuracy=0.97000002861,test_accuracy=0.97000002861
step:3000, train_accuracy=0.959999978542,test_accuracy=0.97000002861
epoches copleted!
step:3200, train_accuracy=0.97000002861,test_accuracy=0.97000002861
step:3400, train_accuracy=1.0,test_accuracy=0.980000019073
epoches copleted!
step:3600, train_accuracy=0.959999978542,test_accuracy=0.990000009537
step:3800, train_accuracy=0.990000009537,test_accuracy=0.990000009537

test_accuracy=accuracy.eval({x:val_images,y_:val_labels,keep_prob:1.0})
test_accuracy

0.96499997

test_data.head()

test_images=test_data.values
test_images=np.multiply(test_images,1.0/255.0)
predict = tf.argmax(y,1)
predicted_labels=predict.eval(feed_dict={x:test_images,keep_prob:1.0})
predicted_labels[:5]

array([2, 0, 9, 9, 3])

submissions=pd.DataFrame({"ImageId": list(range(1,len(predicted_labels)+1)),
                         "Label": predicted_labels})
submissions.to_csv('submission_3.csv',index=False)

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0