KNN实现CIFAR-10数据集识别
生活随笔
收集整理的這篇文章主要介紹了
KNN实现CIFAR-10数据集识别
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
cs231n鏈接:http://cs231n.github.io/linear-classify/,
訓練集鏈接:https://download.csdn.net/download/fanzonghao/10592049
KNN缺點:每個測試樣本都要循環一遍訓練樣本。
該數據集由5個data_batch和一個test_batch構成,測試代碼
import pickle import numpy as np fo=open('./datasets/cifar-10-batches-py/data_batch_1','rb') dict=pickle.load(fo,encoding='bytes') print(dict) print(dict[b'data'].shape)print(dict[b'labels']) print(len(dict[b'labels']))print(dict[b'filenames']) print(len(dict[b'filenames'])) fo.close()可看出,一個data_batch由10000個,32×32×3大小的圖片組成,5個就是50000個,test_batch也是10000張,故有50000張訓練樣本,10000張測試樣本。
將5個訓練集合成一個代碼如下:
import pickle import numpy as np""" 解壓數據集 """ def unpickle(file):fo=open(file,'rb')dict=pickle.load(fo,encoding='bytes')fo.close()return dict """ 5個data_batch和1個test_batch合成一個 """ def load_cifar10(file):data_train = []label_train=[]#融合訓練集for i in range(1,6):dic=unpickle(file+'data_batch_'+str(i))for i_data in dic[b'data']:data_train.append(i_data)for i_label in dic[b'labels']:label_train.append(i_label)# print(np.array(data_train).shape)# print(np.array(label_train).shape)# 融合測試集data_test=[]label_test=[]dic = unpickle(file + 'test_batch')for i_data in dic[b'data']:data_test.append(i_data)for i_label in dic[b'labels']:label_test.append(i_label)# print(np.array(data_test).shape)# print(np.array(label_test).shape)return (np.array(data_train),np.array(label_train),np.array(data_test),np.array(label_test)) path='./datasets/cifar-10-batches-py/' # #(50000,3072) (50000,) (10000,3072) (10000,) (data_train,label_train,data_test,label_test)=load_cifar10(path) print(data_train.shape) print(label_train.shape) print(label_train[:10]) print(data_test.shape) print(label_test.shape)KNN代碼:
import numpy as np import pickle """ 程序功能:k近鄰實現cifar10上的樣本分類 精度低 測試時間長 """ #輸入訓練集和測試集 #解壓數據集 def unpickle(file):fo=open(file,'rb')dict=pickle.load(fo,encoding='bytes')print(dict)fo.close()return dict #融合訓練集和測試集作為輸出總樣本 def load_cifar10(file):data_train = []label_train=[]#融合訓練集for i in range(1,6):dic=unpickle(file+'data_batch_'+str(i))for i_data in dic[b'data']:data_train.append(i_data)for i_label in dic[b'labels']:label_train.append(i_label)# print(np.array(data_train).shape)# print(np.array(label_train).shape)# 融合測試集data_test=[]label_test=[]dic = unpickle(file + 'test_batch')for i_data in dic[b'data']:data_test.append(i_data)for i_label in dic[b'labels']:label_test.append(i_label)# print(np.array(data_test).shape)# print(np.array(label_test).shape)return (np.array(data_train),np.array(label_train),np.array(data_test),np.array(label_test)) path='./datasets/cifar-10-batches-py/' #(50000,3072) (50000,) (10000,3072) (10000,) (data_train,label_train,data_test,label_test)=load_cifar10(path) #print(label_train) print(data_train.shape,label_train.shape,data_test.shape,label_test.shape) #print(data_test.shape[0])""" 實現最近鄰的預測 """ class NearestNeighbor:def __init__(self):passdef train(self,X,y):self.Xtr=Xself.ytr=ydef predict(self,X):num_test=X.shape[0]self.X=XY_pred=np.zeros(num_test,dtype=self.ytr.dtype)for i in range(num_test):distances=np.sum(np.abs(self.Xtr-self.X[i,:]),axis=1)#distances=np.sqrt(np.sum(np.square(self.Xtr-self.X[i,:]),axis=1))min_index=np.argmin(distances)Y_pred[i]=self.ytr[min_index]if i%100==0:print('運行到{}步'.format(i))return Y_pred nn=NearestNeighbor() nn.train(data_train,label_train) Y_pred=nn.predict(data_test) accuarcy=np.mean(label_test==Y_pred) print('accuarcy={}'.format(accuarcy))打印結果:精度不高,后面引入神經網絡
?
SVM損失函數:
loss.py
import numpy as np """ 程序功能:利用SVM代價函數實現損失值的積累 """ def L(X,y,W):#X [3073,50000]#y 一維(50000,)#W [10,3073]delta=1.0scores=np.dot(W,X)#print(y)#對應訓練樣本的輸出y#print(scores[y, np.arange(scores.shape[1])])#(10,50000)#SVM函數margins=np.maximum(0,scores-scores[y, np.arange(scores.shape[1])]+delta)#print('margins.shape={}'.format(margins.shape))margins[y,np.arange(scores.shape[1])]=0loss=np.mean(margins)return lossoptimizer_grand.py
import numpy as np import pickle import loss """ 函數功能:利用隨機搜索和局部隨機搜索來獲取W和b采用SVM損失函數 獲取最佳的W和b """ #輸入訓練集和測試集 #解壓數據集 def unpickle(file):fo=open(file,'rb')dict=pickle.load(fo,encoding='bytes')fo.close()return dict #融合訓練集和測試集作為輸出總樣本 def load_cifar10(file):data_train = []label_train=[]#融合訓練集for i in range(1,6):dic=unpickle(file+'data_batch_'+str(i))for i_data in dic[b'data']:data_train.append(i_data)for i_label in dic[b'labels']:label_train.append(i_label)# print(np.array(data_train).shape)# print(np.array(label_train).shape)# 融合測試集data_test=[]label_test=[]dic = unpickle(file + 'test_batch')for i_data in dic[b'data']:data_test.append(i_data)for i_label in dic[b'labels']:label_test.append(i_label)# print(np.array(data_test).shape)# print(np.array(label_test).shape)return (np.array(data_train),np.array(label_train),np.array(data_test),np.array(label_test)) path='./datasets/cifar-10-batches-py/' #(50000,3072) (50000,) (10000,3072) (10000,) (data_train,label_train,data_test,label_test)=load_cifar10(path) #print(label_train) print(data_train.shape,label_train.shape,data_test.shape,label_test.shape) #(3072,50000) train_data=np.transpose(data_train) #增加一行 處理偏置值 bias=np.ones((1,train_data.shape[1])) #(3073,50000) train_data=np.vstack((train_data,bias)) print(train_data.shape) #隨機選擇最佳的權值 輸出最佳的W def random_search():bestloss=float('inf')for number in range(1000):# 隨機搜索 權值隨機更新 選出比較好的W = np.random.randn(10, 3073) * 0.0001# 計算損失值lost = loss.L(train_data, label_train, W)if lost<bestloss:bestloss=lostbestW=Wif number%100==0:print('number={},the lost={},bestloss={}'.format(number,lost,bestloss))return bestW #調用隨機產生的最佳權值產生預測值與標簽值算精確度 def random_search_accu():bestW=random_search()#(10,50000)scores=np.dot(bestW,train_data)#找出每列分數最大值的索引Y_predict=np.argmax(scores,axis=0)accurarcy=np.mean(Y_predict==label_train)print('accurarcy={}'.format(accurarcy)) def random_local_search():W = np.random.randn(10, 3073) * 0.001bestloss=float('inf')for number in range(1000):# 隨機搜索 權值隨機更新 選出比較好的step_size=0.0001W_try=W+np.random.randn(10, 3073) * step_size# 計算損失值lost = loss.L(train_data, label_train, W_try)if lost<bestloss:bestloss=lostbestW=W_tryif number%100==0:print('number={},the lost={},bestloss={}'.format(number,lost,bestloss))return bestW #調用隨機產生的最佳權值產生預測值與標簽值算精確度 def random_local_search_accu():bestW=random_local_search()#(10,50000)scores=np.dot(bestW,train_data)#找出每列分數最大值的索引Y_predict=np.argmax(scores,axis=0)accurarcy=np.mean(Y_predict==label_train)print('accurarcy={}'.format(accurarcy)) if __name__ == '__main__':#隨機搜索# random_search_accu()#局部隨機搜索random_local_search_accu()#梯度跟隨隨機最佳權重的打印結果:
在迭代過程中,權重還變化的結果
總結
以上是生活随笔為你收集整理的KNN实现CIFAR-10数据集识别的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: ❤『知识集锦』一文搞懂mysql索引!!
- 下一篇: epoll精讲