python实现tsne
生活随笔
收集整理的這篇文章主要介紹了
python实现tsne
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
參考鏈接:
https://blog.csdn.net/zhangweiguo_717/article/details/70998959
原博客代碼完整如下:
#-*- coding:utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8') import time import numpy import copy from sklearn import * #計算高維空間分布P def cal_matrix_P(X,neighbors): entropy=numpy.log(neighbors) n1,n2=X.shape #n1,多少條數據;n2,數據的屬性 from sklearn import *D=numpy.square(metrics.pairwise_distances(X)) D_sort=numpy.argsort(D,axis=1) P=numpy.zeros((n1,n1)) for i in xrange(n1): Di=D[i,D_sort[i,1:]] P[i,D_sort[i,1:]]=cal_p(Di,entropy=entropy) P=(P+numpy.transpose(P))/(2*n1) P=numpy.maximum(P,1e-100) return Pdef cal_p(D,entropy,K=50):beta=1.0H=cal_entropy(D,beta)error=H-entropyk=0betamin=-numpy.infbetamax=numpy.infwhile numpy.abs(error)>1e-4 and k<=K:if error > 0:betamin=copy.deepcopy(beta)if betamax==numpy.inf:beta=beta*2else:beta=(beta+betamax)/2else:betamax=copy.deepcopy(beta)if betamin==-numpy.inf:beta=beta/2else:beta=(beta+betamin)/2H=cal_entropy(D,beta)error=H-entropyk+=1P=numpy.exp(-D*beta)P=P/numpy.sum(P)return Pdef cal_entropy(D,beta): #計算熵# P=numpy.exp(-(numpy.sqrt(D))*beta) P=numpy.exp(-D*beta) sumP=sum(P) sumP=numpy.maximum(sumP,1e-200) H=numpy.log(sumP) + beta * numpy.sum(D * P) / sumP return H#計算低維空間分布Q #這里修改掉幾個注釋就能在TSNE和Largevis之間轉換。 def cal_matrix_Q(Y): n1,n2=Y.shape D=numpy.square(metrics.pairwise_distances(Y)) #Q=1/(1+numpy.exp(D)) #Q=1/(1+numpy.square(D)) #Q=1/(1+2*D) #Q=1/(1+0.5*D) Q=(1/(1+D))/(numpy.sum(1/(1+D))-n1) Q=Q/(numpy.sum(Q)-numpy.sum(Q[range(n1),range(n1)])) Q[range(n1),range(n1)]=0 Q=numpy.maximum(Q,1e-100) return Q#計算梯度:def cal_gradients(P,Q,Y): n1,n2=Y.shape DC=numpy.zeros((n1,n2)) for i in xrange(n1): E=(1+numpy.sum((Y[i,:]-Y)**2,axis=1))**(-1) F=Y[i,:]-Y G=(P[i,:]-Q[i,:]) E=E.reshape((-1,1)) G=G.reshape((-1,1)) G=numpy.tile(G,(1,n2)) E=numpy.tile(E,(1,n2)) DC[i,:]=numpy.sum(4*G*E*F,axis=0) return DC# 4、計算損失函數KL散度,同時這個也是損失函數 def cal_loss(P,Q): C=numpy.sum(P * numpy.log(P / Q)) return Cdef tsne(X,n=2,neighbors=30,max_iter=200): import shelvetsne_dat=shelve.open('tsne.dat') data=[] n1,n2=X.shape P=cal_matrix_P(X,neighbors) Y=numpy.random.randn(n1,n)*1e-4 Q = cal_matrix_Q(Y) DY = cal_gradients(P, Q, Y) A=200.0 B=0.1 for i in xrange(max_iter): data.append(Y) if i==0: Y=Y-A*DY Y1=Y error1=cal_loss(P,Q) elif i==1: Y=Y-A*DY Y2=Y error2=cal_loss(P,Q) else: YY=Y-A*DY+B*(Y2-Y1) QQ = cal_matrix_Q(YY) error=cal_loss(P,QQ) if error>error2: A=A*0.7 continue elif (error-error2)>(error2-error1): A=A*1.2 Y=YY error1=error2 error2=error Q = QQ DY = cal_gradients(P, Q, Y) Y1=Y2 Y2=Y if cal_loss(P,Q)<1e-3: return Y if numpy.fmod(i+1,10)==0: print '%s iterations the error is %s, A is %s'%(str(i+1),str(round(cal_loss(P,Q),2)),str(round(A,3))) tsne_dat['data']=data tsne_dat.close() return Ydef test_iris(): from sklearn import *from sklearn.datasets import load_irisdata=datasets.load_iris() X=data.data #屬性target=data.target#標簽 t1=time.time() Y=tsne(X,n=2,max_iter=300,neighbors=20) t2=time.time() print "Custom TSNE cost time: %s"%str(round(t2-t1,2)) import matplotlib.pyplot as pltfigure1=plt.figure() plt.subplot(1,2,1) plt.plot(Y[0:50,0],Y[0:50,1],'ro',markersize=30) plt.plot(Y[50:100,0],Y[50:100,1],'gx',markersize=30) plt.plot(Y[100:150,0],Y[100:150,1],'b*',markersize=30) plt.title('CUSTOM') plt.subplot(1,2,2) t1=time.time() Y1=manifold.TSNE(2).fit_transform(data.data) t2=time.time() print "Sklearn TSNE cost time: %s"%str(round(t2-t1,2)) plt.plot(Y1[0:50,0],Y1[0:50,1],'ro',markersize=30) plt.plot(Y1[50:100,0],Y1[50:100,1],'gx',markersize=30) plt.plot(Y1[100:150,0],Y1[100:150,1],'b*',markersize=30) plt.title('SKLEARN') plt.show() if __name__ == '__main__':test_iris()總結
以上是生活随笔為你收集整理的python实现tsne的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 把windows键盘作为xfce环境中的
- 下一篇: sublime突然中文乱码