當(dāng)前位置：首頁 > 编程语言 > python >内容正文

python

python3源码剖析_T-SNE源码剖析(python版)

發(fā)布時間：2025/3/15 python 23 豆豆

生活随笔收集整理的這篇文章主要介紹了 python3源码剖析_T-SNE源码剖析(python版) 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

import numpy as np

import matplotlib.pyplot as plt

# 輸入為(n*m)的矩陣，表示n個樣本，m個屬性

# 返回一個距離矩陣

def cal_pairwise_dist(x):

# '''計算pairwise 距離, x是matrix

# (a-b)^2 = a^2 + b^2 - 2*a*b

# '''

sum_x = np.sum(np.square(x), 1)

# print -2 * np.dot(x, x.T)

# print np.add(-2 * np.dot(x, x.T), sum_x).T

dist = np.add(np.add(-2 * np.dot(x, x.T), sum_x).T, sum_x)

#返回任意兩個點(diǎn)之間距離的平方

return dist

# 計算困惑度，最終會選擇合適的beta，也就是每個點(diǎn)的方差啦

def cal_perplexity(dist, idx=0, beta=1.0):

# '''計算perplexity, D是距離向量，

# idx指dist中自己與自己距離的位置，beta是高斯分布參數(shù)

# 這里的perp僅計算了熵，方便計算

# '''

prob = np.exp(-dist * beta)

# 設(shè)置自身prob為0

prob[idx] = 0

sum_prob = np.sum(prob)

if sum_prob == 0:

prob = np.maximum(prob, 1e-12)

perp = -12

else:

prob /= sum_prob

perp = 0

for pj in prob:

if pj != 0:

perp += -pj*np.log(pj)

# 困惑度和pi\j的概率分布

return perp, prob

def seach_prob(x, tol=1e-5, perplexity=30.0):

# '''二分搜索尋找beta,并計算pairwise的prob

# '''

# 初始化參數(shù)

print("Computing pairwise distances...")

(n, d) = x.shape

dist = cal_pairwise_dist(x)

pair_prob = np.zeros((n, n))

beta = np.ones((n, 1))

# 取log，方便后續(xù)計算

base_perp = np.log(perplexity)

for i in range(n):

if i % 500 == 0:

print("Computing pair_prob for point%sof%s..." %(i,n))

betamin = -np.inf

betamax = np.inf

#dist[i]需要換不能是所有點(diǎn)

perp, this_prob = cal_perplexity(dist[i], i, beta[i])

# 二分搜索,尋找最佳sigma下的prob

perp_diff = perp - base_perp

tries = 0

while np.abs(perp_diff) > tol and tries < 50:

if perp_diff > 0:

betamin = beta[i].copy()

if betamax == np.inf or betamax == -np.inf:

beta[i] = beta[i] * 2

else:

beta[i] = (beta[i] + betamax) / 2

else:

betamax = beta[i].copy()

if betamin == np.inf or betamin == -np.inf:

beta[i] = beta[i] / 2

else:

beta[i] = (beta[i] + betamin) / 2

# 更新perb,prob值

perp, this_prob = cal_perplexity(dist[i], i, beta[i])

perp_diff = perp - base_perp

tries = tries + 1

# 記錄prob值

pair_prob[i,] = this_prob

print("Mean value of sigma: ", np.mean(np.sqrt(1 / beta)))

#每個點(diǎn)對其他點(diǎn)的條件概率分布pi\j

return pair_prob

def tsne(x, no_dims=2, initial_dims=50, perplexity=30.0, max_iter=800):

"""Runs t-SNE on the dataset in the NxD array xto reduce its dimensionality to no_dims dimensions.The syntaxis of the function is Y = tsne.tsne(x, no_dims, perplexity),where x is an NxD NumPy array."""

# Check inputs

if isinstance(no_dims, float):

print("Error: array x should have type float.")

return -1

if round(no_dims) != no_dims:

print("Error: number of dimensions should be an integer.")

return -1

(n, d) = x.shape

print (x.shape)

#動量

eta = 500

# 隨機(jī)初始化Y

y = np.random.randn(n, no_dims)

# dy梯度

dy = np.zeros((n, no_dims))

# 對稱化

P = seach_prob(x, 1e-5, perplexity)

P = P + np.transpose(P)

P = P / np.sum(P) #pij

# early exaggeration

# pi\j

P = P * 4

P = np.maximum(P, 1e-12)

# Run iterations

for iter in range(max_iter):

# Compute pairwise affinities

sum_y = np.sum(np.square(y), 1)

num = 1 / (1 + np.add(np.add(-2 * np.dot(y, y.T), sum_y).T, sum_y))

num[range(n), range(n)] = 0

Q = num / np.sum(num) #qij

Q = np.maximum(Q, 1e-12) #X與Y逐位比較取其大者

# Compute gradient

#pij-qij

PQ = P - Q

#梯度dy

for i in range(n):

dy[i,:] = np.sum(np.tile(PQ[:,i] * num[:,i], (no_dims, 1)).T * (y[i,:] - y), 0)

# 更新y

y = y - eta*dy

# 減去均值

y = y - np.tile(np.mean(y, 0), (n, 1))

# Compute current value of cost function

if (iter + 1) % 50 == 0:

if iter > 100:

C = np.sum(P * np.log(P / Q))

else:

C = np.sum( P/4 * np.log( P/4 / Q))

print("Iteration ", (iter + 1), ": error is ", C)

# Stop lying about P-values

if iter == 100:

P = P / 4

print("finished training!")

return y

if __name__ == "__main__":

print("Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset.")

print("Running example on 2,500 MNIST digits...")

X = np.loadtxt("mnist2500_X.txt")

labels = np.loadtxt("mnist2500_labels.txt")

Y = tsne(X, 2, 50, 20.0)

plt.scatter(Y[:, 0], Y[:, 1], 20, labels)

plt.show()

總結(jié)

以上是生活随笔為你收集整理的python3源码剖析_T-SNE源码剖析(python版)的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇：柠檬班python自动化视频百度云_py
下一篇：边坡稳定性分析软件slope/w用户指南

3atv精品不卡视频,97人人超碰国产精品最新,中文字幕av一区二区三区人妻少妇,久久久精品波多野结衣,日韩一区二区三区精品

python

python3源码剖析_T-SNE源码剖析(python版)

總結(jié)