import pandas as pd
train_df=pd.read_csv('train.csv')
train_df.head()
test_df=pd.read_csv('test.csv')
test_df.head()
train_df.shape
# 使用全部数据训练慢,这里只取5000组数据。
labels=train_df['label'][:5000]
images=train_df.drop(['label'],axis=1)[:5000]
len(labels),images.shape
from sklearn.model_selection import train_test_split
train_images,test_images,train_labels,test_labels=train_test_split(images,labels,train_size=0.8)
train_images.shape,test_images.shape,train_labels.shape,test_labels.shape
from sklearn import svm
clf=svm.SVC()
clf.fit(train_images,train_labels)
clf.score(test_images,test_labels)
# 特征缩放
train_images[train_images>0]=1
test_images[test_images>0]=1
clf.fit(train_images,train_labels)
clf.score(test_images,test_labels)
test_df[test_df>0]=1
results=clf.predict(test_df)
results[:5]
submissions=pd.DataFrame({"ImageId": list(range(1,len(results)+1)),
"Label": results})
submissions.to_csv('submission_1.csv',index=False)