2-Softmax Regression¶

Score: 0.90971

y=softmax(xW+b)（特征缩放：特征/255）

详细分析见：TensorFlow (2): Softmax Regression识别手写数字

import pandas as pd

train_data=pd.read_csv('train.csv')
test_df=pd.read_csv('test.csv')
train_data.shape,test_df.shape

((42000, 785), (28000, 784))

train_data.head()

labels=train_data['label']
images=train_data.drop(['label'],axis=1).values
len(labels),images.shape

(42000, (42000, 784))

labels=pd.get_dummies(labels)
print labels.shape
labels.head()

(42000, 10)

labels=labels.values
labels.shape

(42000, 10)

import numpy as np

images=np.multiply(images,1.0/255.0)
images.shape

(42000, 784)

TRAIN_SIZE=40000
train_images=images[:TRAIN_SIZE]
train_labels=labels[:TRAIN_SIZE]
test_images=images[TRAIN_SIZE:]
test_labels=labels[TRAIN_SIZE:]

train_images.shape,len(train_labels),test_images.shape,len(test_labels)

((40000, 784), 40000, (2000, 784), 2000)

train_images[:5]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

train_labels[:5]

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)

import tensorflow as tf
x=tf.placeholder(tf.float32,[None,784])
W=tf.Variable(tf.zeros([784,10]))
b=tf.Variable(tf.zeros([10]))
y=tf.nn.softmax(tf.matmul(x,W)+b)

y_=tf.placeholder(tf.float32,[None,10])
cross_entropy=-tf.reduce_sum(y_*tf.log(y))

BATCH_SIZE=50
index_in_epoch=0
num_examples=train_images.shape[0]

def next_batch(batch_size):
    global train_images
    global train_labels
    global index_in_epoch
    
    start=index_in_epoch
    index_in_epoch+=batch_size
    
    if index_in_epoch>num_examples:
        print 'epoches copleted!'
        # 一轮完毕，shuffle数据
        perm=np.arange(num_examples) # 生成一个序列
        np.random.shuffle(perm)
        train_images=train_images[perm]
        train_labels=train_labels[perm]
        # 开始下个epoch
        start=0
        index_in_epoch=batch_size
        assert batch_size<=num_examples
    end=index_in_epoch
    return train_images[start:end],train_labels[start:end]

train_step=tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
sess=tf.InteractiveSession()
tf.global_variables_initializer().run()

correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,'float'))

for i in range(3000):
    batch_xs,batch_ys=next_batch(BATCH_SIZE)
    if i%200==0:
        train_accuracy=accuracy.eval({x:batch_xs,y_:batch_ys})
        test_accuracy=accuracy.eval({x:test_images[:BATCH_SIZE],y_:test_labels[:BATCH_SIZE]})
        print 'step:{}, train_accuracy={},test_accuracy={}'.format(i,train_accuracy,test_accuracy)
    sess.run(train_step,feed_dict={x:batch_xs,y_:batch_ys})

step:0, train_accuracy=0.10000000149,test_accuracy=0.10000000149
step:200, train_accuracy=0.879999995232,test_accuracy=0.759999990463
step:400, train_accuracy=0.920000016689,test_accuracy=0.77999997139
step:600, train_accuracy=0.899999976158,test_accuracy=0.759999990463
epoches copleted!
step:800, train_accuracy=0.879999995232,test_accuracy=0.77999997139
step:1000, train_accuracy=0.879999995232,test_accuracy=0.800000011921
step:1200, train_accuracy=0.879999995232,test_accuracy=0.819999992847
step:1400, train_accuracy=0.860000014305,test_accuracy=0.800000011921
epoches copleted!
step:1600, train_accuracy=0.879999995232,test_accuracy=0.819999992847
step:1800, train_accuracy=0.819999992847,test_accuracy=0.800000011921
step:2000, train_accuracy=0.899999976158,test_accuracy=0.819999992847
step:2200, train_accuracy=0.899999976158,test_accuracy=0.77999997139
epoches copleted!
step:2400, train_accuracy=0.959999978542,test_accuracy=0.800000011921
step:2600, train_accuracy=0.860000014305,test_accuracy=0.860000014305
step:2800, train_accuracy=0.959999978542,test_accuracy=0.819999992847

test_accuracy=accuracy.eval({x:test_images,y_:test_labels})
test_accuracy

0.90450001

test_df.head()

test_images=test_df.values
test_images=np.multiply(test_images,1.0/255.0)
predict = tf.argmax(y,1)
predicted_labels=predict.eval(feed_dict={x:test_images})
predicted_labels[:5]

array([2, 0, 9, 9, 3])

submissions=pd.DataFrame({"ImageId": list(range(1,len(predicted_labels)+1)),
                         "Label": predicted_labels})
submissions.to_csv('submission_2.csv',index=False)

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	label	...
0	1	...
1	0	...
2	1	...
3	4	...
4	0	...

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0