5-CNN(0.99514, Top 12%, 198/1789)

参考:https://www.kaggle.com/toregil/welcome-to-deep-learning-cnn-99

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
In [4]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
Using TensorFlow backend.
In [5]:
train_data=pd.read_csv('train.csv')
test_data=pd.read_csv('test.csv')
train_data.shape,test_data.shape
Out[5]:
((42000, 785), (28000, 784))
In [6]:
train_data.head()
Out[6]:
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 4 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 785 columns

In [7]:
labels=train_data['label'].values
images=train_data.drop(['label'],axis=1).values
labels.shape,images.shape
Out[7]:
((42000,), (42000, 784))
In [8]:
x_train, x_val, y_train, y_val = train_test_split(
    images, labels, test_size=0.04)
x_train.shape,x_val.shape
Out[8]:
((40320, 784), (1680, 784))
In [9]:
x_train = x_train.reshape(-1, 28, 28, 1)
x_val = x_val.reshape(-1, 28, 28, 1)
In [10]:
x_train = x_train.astype("float32")/255.
x_val = x_val.astype("float32")/255.
In [11]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
#example:
print(y_train[0])
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
In [12]:
model = Sequential()

model.add(Conv2D(filters = 16, kernel_size = (3, 3), activation='relu',
                 input_shape = (28, 28, 1)))
model.add(BatchNormalization())
model.add(Conv2D(filters = 16, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(strides=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
In [13]:
datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 10)
In [14]:
model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-4), metrics=["accuracy"])
In [15]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)
In [ ]:
hist = model.fit_generator(datagen.flow(x_train, y_train, batch_size=16),
                           steps_per_epoch=500,
                           epochs=100, #Increase this when not on Kaggle kernel
                           verbose=2,  #1 for ETA, 0 for silent
                           validation_data=(x_val[:400,:], y_val[:400,:]), #For speed
                           callbacks=[annealer])
Epoch 1/100
36s - loss: 0.2783 - acc: 0.9293 - val_loss: 0.0618 - val_acc: 0.9850
Epoch 2/100
36s - loss: 0.2110 - acc: 0.9475 - val_loss: 0.0620 - val_acc: 0.9825
Epoch 3/100
35s - loss: 0.1766 - acc: 0.9544 - val_loss: 0.0609 - val_acc: 0.9875
Epoch 4/100
37s - loss: 0.1485 - acc: 0.9591 - val_loss: 0.0405 - val_acc: 0.9925
Epoch 5/100
35s - loss: 0.1412 - acc: 0.9633 - val_loss: 0.0227 - val_acc: 0.9900
Epoch 6/100
36s - loss: 0.1337 - acc: 0.9625 - val_loss: 0.0140 - val_acc: 0.9975
Epoch 7/100
36s - loss: 0.1251 - acc: 0.9631 - val_loss: 0.0368 - val_acc: 0.9950
Epoch 8/100
37s - loss: 0.1147 - acc: 0.9675 - val_loss: 0.0180 - val_acc: 0.9975
Epoch 9/100
36s - loss: 0.0984 - acc: 0.9730 - val_loss: 0.0152 - val_acc: 0.9975
Epoch 10/100
36s - loss: 0.0996 - acc: 0.9725 - val_loss: 0.0126 - val_acc: 0.9950
Epoch 11/100
37s - loss: 0.1031 - acc: 0.9726 - val_loss: 0.0254 - val_acc: 0.9950
Epoch 12/100
36s - loss: 0.0943 - acc: 0.9726 - val_loss: 0.0262 - val_acc: 0.9950
Epoch 13/100
36s - loss: 0.0888 - acc: 0.9726 - val_loss: 0.0222 - val_acc: 0.9950
Epoch 14/100
31s - loss: 0.0608 - acc: 0.9816 - val_loss: 0.0292 - val_acc: 0.9900
Epoch 15/100
29s - loss: 0.0797 - acc: 0.9778 - val_loss: 0.0268 - val_acc: 0.9950
Epoch 16/100
32s - loss: 0.0787 - acc: 0.9786 - val_loss: 0.0251 - val_acc: 0.9950
Epoch 17/100
29s - loss: 0.0733 - acc: 0.9778 - val_loss: 0.0170 - val_acc: 0.9950
Epoch 18/100
30s - loss: 0.0654 - acc: 0.9814 - val_loss: 0.0271 - val_acc: 0.9950
Epoch 19/100
29s - loss: 0.0827 - acc: 0.9764 - val_loss: 0.0234 - val_acc: 0.9950
Epoch 20/100
33s - loss: 0.0750 - acc: 0.9792 - val_loss: 0.0248 - val_acc: 0.9950
Epoch 21/100
29s - loss: 0.0690 - acc: 0.9819 - val_loss: 0.0305 - val_acc: 0.9950
Epoch 22/100
31s - loss: 0.0660 - acc: 0.9811 - val_loss: 0.0255 - val_acc: 0.9950
Epoch 23/100
30s - loss: 0.0693 - acc: 0.9812 - val_loss: 0.0197 - val_acc: 0.9950
Epoch 24/100
32s - loss: 0.0617 - acc: 0.9809 - val_loss: 0.0205 - val_acc: 0.9950
Epoch 25/100
30s - loss: 0.0532 - acc: 0.9853 - val_loss: 0.0236 - val_acc: 0.9950
Epoch 26/100
33s - loss: 0.0658 - acc: 0.9816 - val_loss: 0.0225 - val_acc: 0.9950
Epoch 27/100
36s - loss: 0.0640 - acc: 0.9814 - val_loss: 0.0189 - val_acc: 0.9950
Epoch 28/100
35s - loss: 0.0630 - acc: 0.9824 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 29/100
36s - loss: 0.0662 - acc: 0.9816 - val_loss: 0.0223 - val_acc: 0.9950
Epoch 30/100
37s - loss: 0.0552 - acc: 0.9838 - val_loss: 0.0226 - val_acc: 0.9950
Epoch 31/100
36s - loss: 0.0492 - acc: 0.9841 - val_loss: 0.0237 - val_acc: 0.9950
Epoch 32/100
36s - loss: 0.0605 - acc: 0.9842 - val_loss: 0.0237 - val_acc: 0.9950
Epoch 33/100
33s - loss: 0.0563 - acc: 0.9832 - val_loss: 0.0221 - val_acc: 0.9950
Epoch 34/100
35s - loss: 0.0557 - acc: 0.9832 - val_loss: 0.0194 - val_acc: 0.9950
Epoch 35/100
37s - loss: 0.0644 - acc: 0.9814 - val_loss: 0.0208 - val_acc: 0.9950
Epoch 36/100
36s - loss: 0.0486 - acc: 0.9845 - val_loss: 0.0226 - val_acc: 0.9950
Epoch 37/100
38s - loss: 0.0538 - acc: 0.9823 - val_loss: 0.0216 - val_acc: 0.9950
Epoch 38/100
35s - loss: 0.0584 - acc: 0.9840 - val_loss: 0.0230 - val_acc: 0.9950
Epoch 39/100
36s - loss: 0.0589 - acc: 0.9834 - val_loss: 0.0209 - val_acc: 0.9950
Epoch 40/100
37s - loss: 0.0631 - acc: 0.9820 - val_loss: 0.0206 - val_acc: 0.9950
Epoch 41/100
34s - loss: 0.0570 - acc: 0.9834 - val_loss: 0.0209 - val_acc: 0.9950
Epoch 42/100
35s - loss: 0.0607 - acc: 0.9825 - val_loss: 0.0216 - val_acc: 0.9950
Epoch 43/100
34s - loss: 0.0622 - acc: 0.9816 - val_loss: 0.0211 - val_acc: 0.9950
Epoch 44/100
36s - loss: 0.0606 - acc: 0.9825 - val_loss: 0.0202 - val_acc: 0.9950
Epoch 45/100
36s - loss: 0.0517 - acc: 0.9845 - val_loss: 0.0204 - val_acc: 0.9950
Epoch 46/100
37s - loss: 0.0619 - acc: 0.9830 - val_loss: 0.0204 - val_acc: 0.9950
Epoch 47/100
37s - loss: 0.0493 - acc: 0.9856 - val_loss: 0.0206 - val_acc: 0.9950
Epoch 48/100
35s - loss: 0.0602 - acc: 0.9831 - val_loss: 0.0206 - val_acc: 0.9950
Epoch 49/100
39s - loss: 0.0491 - acc: 0.9866 - val_loss: 0.0207 - val_acc: 0.9950
Epoch 50/100
36s - loss: 0.0530 - acc: 0.9844 - val_loss: 0.0194 - val_acc: 0.9950
Epoch 51/100
34s - loss: 0.0599 - acc: 0.9831 - val_loss: 0.0203 - val_acc: 0.9950
Epoch 52/100
38s - loss: 0.0511 - acc: 0.9851 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 53/100
35s - loss: 0.0543 - acc: 0.9856 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 54/100
36s - loss: 0.0493 - acc: 0.9842 - val_loss: 0.0194 - val_acc: 0.9950
Epoch 55/100
38s - loss: 0.0548 - acc: 0.9846 - val_loss: 0.0196 - val_acc: 0.9950
Epoch 56/100
35s - loss: 0.0551 - acc: 0.9832 - val_loss: 0.0197 - val_acc: 0.9950
Epoch 57/100
36s - loss: 0.0612 - acc: 0.9830 - val_loss: 0.0202 - val_acc: 0.9950
Epoch 58/100
36s - loss: 0.0518 - acc: 0.9840 - val_loss: 0.0197 - val_acc: 0.9950
Epoch 59/100
38s - loss: 0.0546 - acc: 0.9835 - val_loss: 0.0201 - val_acc: 0.9950
Epoch 60/100
37s - loss: 0.0481 - acc: 0.9855 - val_loss: 0.0199 - val_acc: 0.9950
Epoch 61/100
33s - loss: 0.0567 - acc: 0.9839 - val_loss: 0.0198 - val_acc: 0.9950
Epoch 62/100
37s - loss: 0.0560 - acc: 0.9842 - val_loss: 0.0198 - val_acc: 0.9950
Epoch 63/100
33s - loss: 0.0546 - acc: 0.9851 - val_loss: 0.0204 - val_acc: 0.9950
Epoch 64/100
35s - loss: 0.0476 - acc: 0.9865 - val_loss: 0.0199 - val_acc: 0.9950
Epoch 65/100
36s - loss: 0.0601 - acc: 0.9825 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 66/100
35s - loss: 0.0538 - acc: 0.9851 - val_loss: 0.0192 - val_acc: 0.9950
Epoch 67/100
35s - loss: 0.0535 - acc: 0.9844 - val_loss: 0.0194 - val_acc: 0.9950
Epoch 68/100
33s - loss: 0.0577 - acc: 0.9810 - val_loss: 0.0197 - val_acc: 0.9950
Epoch 69/100
36s - loss: 0.0586 - acc: 0.9820 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 70/100
36s - loss: 0.0505 - acc: 0.9851 - val_loss: 0.0203 - val_acc: 0.9950
Epoch 71/100
35s - loss: 0.0522 - acc: 0.9854 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 72/100
36s - loss: 0.0475 - acc: 0.9865 - val_loss: 0.0198 - val_acc: 0.9950
Epoch 73/100
35s - loss: 0.0538 - acc: 0.9840 - val_loss: 0.0192 - val_acc: 0.9950
Epoch 74/100
36s - loss: 0.0612 - acc: 0.9812 - val_loss: 0.0195 - val_acc: 0.9950
Epoch 75/100
38s - loss: 0.0519 - acc: 0.9839 - val_loss: 0.0198 - val_acc: 0.9950
Epoch 76/100
32s - loss: 0.0551 - acc: 0.9828 - val_loss: 0.0200 - val_acc: 0.9950
Epoch 77/100
36s - loss: 0.0413 - acc: 0.9866 - val_loss: 0.0194 - val_acc: 0.9950
Epoch 78/100
34s - loss: 0.0564 - acc: 0.9821 - val_loss: 0.0201 - val_acc: 0.9950
Epoch 79/100
32s - loss: 0.0635 - acc: 0.9832 - val_loss: 0.0202 - val_acc: 0.9950
Epoch 80/100
In [18]:
final_loss, final_acc = model.evaluate(x_val, y_val, verbose=0)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss, final_acc))
Final loss: 0.0135, final accuracy: 0.9952
In [19]:
y_hat = model.predict(x_val)
y_pred = np.argmax(y_hat, axis=1)
y_true = np.argmax(y_val, axis=1)
cm = confusion_matrix(y_true, y_pred)
print(cm)
[[172   0   0   0   0   0   0   0   0   0]
 [  0 196   0   0   0   0   1   1   0   0]
 [  0   0 186   0   0   0   0   0   0   0]
 [  0   0   0 188   0   0   0   0   1   0]
 [  0   0   0   0 157   0   1   0   0   0]
 [  0   0   0   0   0 124   0   1   0   0]
 [  0   0   0   0   0   0 155   0   0   0]
 [  0   0   0   0   0   0   0 177   0   0]
 [  0   0   0   0   0   0   2   0 155   0]
 [  0   0   0   0   1   0   0   0   0 162]]
/usr/lib/python2.7/dist-packages/scipy/sparse/coo.py:200: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
  if np.rank(self.data) != 1 or np.rank(self.row) != 1 or np.rank(self.col) != 1:
In [21]:
test_data=test_data.values.astype('float32')
x_test = test_data.reshape(-1, 28, 28, 1)/255.
In [22]:
y_hat = model.predict(x_test, batch_size=64)
y_pred = np.argmax(y_hat,axis=1)
In [23]:
with open('submission_5.csv', 'w') as f :
    f.write('ImageId,Label\n')
    for i in range(len(y_pred)) :
        f.write("".join([str(i+1),',',str(y_pred[i]),'\n']))