Score: 0.96486
Softmax Regression 和传统意义上的神经网络的最大区别是没有隐含层。 这里实现的多层感知机实际上是在 Softmax Regression 的基础上加上一个隐含层。 结构如下:
import pandas as pd
train_data=pd.read_csv('train.csv')
test_data=pd.read_csv('test.csv')
train_data.shape,test_data.shape
train_data.head()
images=train_data.drop(['label'],axis=1).values
images.shape
labels=train_data['label']
labels=pd.get_dummies(labels)
print labels.shape
labels.head()
labels=labels.values
labels.shape
import numpy as np
images=np.multiply(images,1.0/255.0)
images.shape
TRAIN_SIZE=40000
train_images=images[:TRAIN_SIZE]
train_labels=labels[:TRAIN_SIZE]
val_images=images[TRAIN_SIZE:]
val_labels=labels[TRAIN_SIZE:]
train_images.shape,len(train_labels),val_images.shape,len(val_labels)
train_images[:5]
train_labels[:5]
import tensorflow as tf
sess=tf.InteractiveSession()
in_units=784 #输入节点数
h1_units=300 #隐含层输出结点数
W1=tf.Variable(tf.truncated_normal([in_units,h1_units],stddev=0.1))
b1=tf.Variable(tf.zeros([h1_units]))
# 输出层softmax,初始化为0,Sigmoid函数在0附近最敏感,梯度大
W2=tf.Variable(tf.zeros([h1_units,10]))
b2=tf.Variable(tf.zeros([10]))
x=tf.placeholder(tf.float32,[None,in_units])
keep_prob=tf.placeholder(tf.float32) # keep_prob是变化的,所以placeholder
hidden1=tf.nn.relu(tf.matmul(x,W1)+b1)
hidden1_drop=tf.nn.dropout(hidden1,keep_prob)
y=tf.nn.softmax(tf.matmul(hidden1_drop,W2)+b2)
y_=tf.placeholder(tf.float32,[None,10])
cross_entropy=-tf.reduce_sum(y_*tf.log(y))
train_step=tf.train.AdagradOptimizer(0.01).minimize(cross_entropy)
BATCH_SIZE=100
index_in_epoch=0
num_examples=train_images.shape[0]
def next_batch(batch_size):
global train_images
global train_labels
global index_in_epoch
start=index_in_epoch
index_in_epoch+=batch_size
if index_in_epoch>num_examples:
print 'epoches copleted!'
# 一轮完毕,shuffle数据
perm=np.arange(num_examples) # 生成一个序列
np.random.shuffle(perm)
train_images=train_images[perm]
train_labels=train_labels[perm]
# 开始下个epoch
start=0
index_in_epoch=batch_size
assert batch_size<=num_examples
end=index_in_epoch
return train_images[start:end],train_labels[start:end]
tf.global_variables_initializer().run()
correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,'float'))
for i in range(4000):
batch_xs,batch_ys=next_batch(BATCH_SIZE)
if i%200==0:
train_accuracy=accuracy.eval({x:batch_xs,y_:batch_ys,keep_prob:1.0})
test_accuracy=accuracy.eval({x:val_images[:BATCH_SIZE],y_:val_labels[:BATCH_SIZE],keep_prob:1.0})
print 'step:{}, train_accuracy={},test_accuracy={}'.format(i,train_accuracy,test_accuracy)
sess.run(train_step,feed_dict={x:batch_xs,y_:batch_ys,keep_prob:0.75})
test_accuracy=accuracy.eval({x:val_images,y_:val_labels,keep_prob:1.0})
test_accuracy
test_data.head()
test_images=test_data.values
test_images=np.multiply(test_images,1.0/255.0)
predict = tf.argmax(y,1)
predicted_labels=predict.eval(feed_dict={x:test_images,keep_prob:1.0})
predicted_labels[:5]
submissions=pd.DataFrame({"ImageId": list(range(1,len(predicted_labels)+1)),
"Label": predicted_labels})
submissions.to_csv('submission_3.csv',index=False)