word2vec代码_TensorFlow2.0 代码实战专栏(四):Word2Vec (Word Embedding)
生活随笔
收集整理的這篇文章主要介紹了
word2vec代码_TensorFlow2.0 代码实战专栏(四):Word2Vec (Word Embedding)
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
作者 |??Aymeric Damien編輯?|?奇予紀出品 |?磐創(chuàng)AI團隊
import?collections
import?os
import?random
import?urllib
import?zipfile
import?numpy?as?np
import?tensorflow?as?tf
learning_rate?=?0.1
batch_size?=?128
num_steps?=?3000000
display_step?=?10000
eval_step?=?200000
#?訓(xùn)練參數(shù)
learning_rate?=?0.1
batch_size?=?128
num_steps?=?3000000
display_step?=?10000
eval_step?=?200000
#?評估參數(shù)
eval_words?=?['five',?'of',?'going',?'hardware',?'american',?'britain']
#?Word2Vec?參數(shù)
embedding_size?=?200?#?嵌入向量的維度?vector.
max_vocabulary_size?=?50000?#?詞匯表中不同單詞的總數(shù)words?in?the?vocabulary.
min_occurrence?=?10??#?刪除出現(xiàn)小于n次的所有單詞
skip_window?=?3?#?左右各要考慮多少個單詞
num_skips?=?2?#?重復(fù)使用輸入生成標簽的次數(shù)
num_sampled?=?64?#?負采樣數(shù)量
#?下載一小部分維基百科文章集
url?=?'http://mattmahoney.net/dc/text8.zip'
data_path?=?'text8.zip'
if?not?os.path.exists(data_path):
????print("Downloading?the?dataset...?(It?may?take?some?time)")
????filename,?_?=?urllib.urlretrieve(url,?data_path)
????print("Done!")
#?解壓數(shù)據(jù)集文件,文本已處理完畢
with?zipfile.ZipFile(data_path)?as?f:
????text_words?=?f.read(f.namelist()[0]).lower().split()
#?構(gòu)建詞典并用?UNK?標記替換頻數(shù)較低的詞
count?=?[('UNK',?-1)]
#?檢索最常見的單詞
count.extend(collections.Counter(text_words).most_common(max_vocabulary_size?-?1))
#?刪除少于'min_occurrence'次數(shù)的樣本
for?i?in?range(len(count)?-?1,?-1,?-1):
????if?count[i][1]?????????count.pop(i)
????else:
????????#該集合是有序的,因此在當出現(xiàn)小于'min_occurrence'時停止
????????break
#?計算單詞表單詞個數(shù)
vocabulary_size?=?len(count)
#?為每一個詞分配id
word2id?=?dict()
for?i,?(word,?_)in?enumerate(count):
????word2id[word]?=?i
data?=?list()
unk_count?=?0
for?word?in?text_words:
?????#?檢索單詞id,或者如果不在字典中則為其指定索引0('UNK')
????index?=?word2id.get(word,?0)
????if?index?==?0:
????????unk_count?+=?1
????data.append(index)
count[0]?=?('UNK',?unk_count)
id2word?=?dict(zip(word2id.values(),?word2id.keys()))
print("Words?count:",?len(text_words))
print("Unique?words:",?len(set(text_words)))
print("Vocabulary?size:",?vocabulary_size)
print("Most?common?words:",?count[:10])
Unique?words:?253854
Vocabulary?size:?47135
Most?common?words:?[('UNK',?444176),?('the',?1061396),?('of',?593677),?('and',?416629),?('one',?411764),?('in',?372201),?('a',?325873),?('to',?316376),?('zero',?264975),?('nine',?250430)]
data_index?=?0
#?為skip-gram模型生成訓(xùn)練批次
def?next_batch(batch_size,?num_skips,?skip_window):
????global?data_index
????assert?batch_size?%?num_skips?==?0
????assert?num_skips?<=?2?*?skip_window
????batch?=?np.ndarray(shape=(batch_size),?dtype=np.int32)
????labels?=?np.ndarray(shape=(batch_size,?1),?dtype=np.int32)
????#?得到窗口長度(?當前單詞左邊和右邊?+?當前單詞)
????span?=?2?*?skip_window?+?1
????buffer?=?collections.deque(maxlen=span)
????if?data_index?+?span?>?len(data):
????????data_index?=?0
????buffer.extend(data[data_index:data_index?+?span])
????data_index?+=?span
????for?i?in?range(batch_size?//?num_skips):
????????context_words?=?[w?for?w?in?range(span)?if?w?!=?skip_window]
????????words_to_use?=?random.sample(context_words,?num_skips)
????????for?j,?context_word?in?enumerate(words_to_use):
????????????batch[i?*?num_skips?+?j]?=?buffer[skip_window]
????????????labels[i?*?num_skips?+?j,?0]?=?buffer[context_word]
????????if?data_index?==?len(data):
????????????buffer.extend(data[0:span])
????????????data_index?=?span
????????else:
????????????buffer.append(data[data_index])
????????????data_index?+=?1
????#回溯一點,以避免在批處理結(jié)束時跳過單詞
????data_index?=?(data_index?+?len(data)?-?span)?%?len(data)
????return?batch,?labels
#?確保在CPU上分配以下操作和變量
#?(某些操作在GPU上不兼容)
with?tf.device('/cpu:0'):
????#?創(chuàng)建嵌入變量(每一行代表一個詞嵌入向量)?embedding?vector).
????embedding?=?tf.Variable(tf.random.normal([vocabulary_size,?embedding_size]))
????#?構(gòu)造NCE損失的變量
????nce_weights?=?tf.Variable(tf.random.normal([vocabulary_size,?embedding_size]))
????nce_biases?=?tf.Variable(tf.zeros([vocabulary_size]))
def?get_embedding(x):
????with?tf.device('/cpu:0'):
???????#?對于X中的每一個樣本查找對應(yīng)的嵌入向量
????????x_embed?=?tf.nn.embedding_lookup(embedding,?x)
????????return?x_embed
def?nce_loss(x_embed,?y):
????with?tf.device('/cpu:0'):
????????#?計算批處理的平均NCE損失
????????y?=?tf.cast(y,?tf.int64)
????????loss?=?tf.reduce_mean(
????????????tf.nn.nce_loss(weights=nce_weights,
???????????????????????????biases=nce_biases,
???????????????????????????labels=y,
???????????????????????????inputs=x_embed,
???????????????????????????num_sampled=num_sampled,
???????????????????????????num_classes=vocabulary_size))
????????return?loss
#?評估
def?evaluate(x_embed):
????with?tf.device('/cpu:0'):
?????????#?計算輸入數(shù)據(jù)嵌入與每個嵌入向量之間的余弦相似度
????????x_embed?=?tf.cast(x_embed,?tf.float32)
????????x_embed_norm?=?x_embed?/?tf.sqrt(tf.reduce_sum(tf.square(x_embed)))
????????embedding_norm?=?embedding?/?tf.sqrt(tf.reduce_sum(tf.square(embedding),?1,?keepdims=True),?tf.float32)
????????cosine_sim_op?=?tf.matmul(x_embed_norm,?embedding_norm,?transpose_b=True)
????????return?cosine_sim_op
#?定義優(yōu)化器
optimizer?=?tf.optimizers.SGD(learning_rate)
#?優(yōu)化過程
def?run_optimization(x,?y):
????with?tf.device('/cpu:0'):
???????#?將計算封裝在GradientTape中以實現(xiàn)自動微分
????????with?tf.GradientTape()?as?g:
????????????emb?=?get_embedding(x)
????????????loss?=?nce_loss(emb,?y)
????????#?計算梯度
????????gradients?=?g.gradient(loss,?[embedding,?nce_weights,?nce_biases])
?????????#?按gradients更新?W?和?b
????????optimizer.apply_gradients(zip(gradients,?[embedding,?nce_weights,?nce_biases]))
#?用于測試的單詞
x_test?=?np.array([word2id[w]?for?w?in?eval_words])
#?針對給定步驟數(shù)進行訓(xùn)練
for?step?in?xrange(1,?num_steps?+?1):
????batch_x,?batch_y?=?next_batch(batch_size,?num_skips,?skip_window)
????run_optimization(batch_x,?batch_y)
????if?step?%?display_step?==?0?or?step?==?1:
????????loss?=?nce_loss(get_embedding(batch_x),?batch_y)
????????print("step:?%i,?loss:?%f"?%?(step,?loss))
????#?評估
????if?step?%?eval_step?==?0?or?step?==?1:
????????print("Evaluation...")
????????sim?=?evaluate(get_embedding(x_test)).numpy()
????????for?i?in?xrange(len(eval_words)):
????????????top_k?=?8??#?最相似的單詞數(shù)量
????????????nearest?=?(-sim[i,?:]).argsort()[1:top_k?+?1]
????????????log_str?=?'"%s"?nearest?neighbors:'?%?eval_words[i]
????????????for?k?in?xrange(top_k):
????????????????log_str?=?'%s?%s,'?%?(log_str,?id2word[nearest[k]])
????????????print(log_str)
step:?1,?loss:?504.444214
Evaluation...
"five"?nearest?neighbors:?censure,?stricken,?anglicanism,?stick,?streetcars,?shrines,?horrified,?sparkle,
"of"?nearest?neighbors:?jolly,?weary,?clinicians,?kerouac,?economist,?owls,?safe,?playoff,
"going"?nearest?neighbors:?filament,?platforms,?moderately,?micheal,?despotic,?krag,?disclosed,?your,
"hardware"?nearest?neighbors:?occupants,?paraffin,?vera,?reorganized,?rename,?declares,?prima,?condoned,
"american"?nearest?neighbors:?portfolio,?rhein,?aalto,?angle,?lifeson,?tucker,?sexton,?dench,
"britain"?nearest?neighbors:?indivisible,?disbelief,?scripture,?pepsi,?scriptores,?sighting,?napalm,?strike,
step:?10000,?loss:?117.166962
step:?20000,?loss:?65.478333
step:?30000,?loss:?46.580460
step:?40000,?loss:?25.563128
step:?50000,?loss:?50.924446
step:?60000,?loss:?51.696526
step:?70000,?loss:?17.272142
step:?80000,?loss:?32.579414
step:?90000,?loss:?68.372032
step:?100000,?loss:?36.026573
step:?110000,?loss:?22.502020
step:?120000,?loss:?15.788742
step:?130000,?loss:?31.832420
step:?140000,?loss:?25.096617
step:?150000,?loss:?12.013027
step:?160000,?loss:?20.574780
step:?170000,?loss:?12.201975
step:?180000,?loss:?20.983793
step:?190000,?loss:?11.366720
step:?200000,?loss:?19.431549
Evaluation...
"five"?nearest?neighbors:?three,?four,?eight,?six,?two,?seven,?nine,?zero,
"of"?nearest?neighbors:?the,?a,?and,?first,?with,?on,?but,?from,
"going"?nearest?neighbors:?have,?more,?used,?out,?be,?with,?on,?however,
"hardware"?nearest?neighbors:?be,?known,?system,?apollo,?and,?a,?such,?used,
"american"?nearest?neighbors:?UNK,?and,?from,?s,?at,?in,?after,?about,
"britain"?nearest?neighbors:?of,?and,?many,?the,?as,?used,?but,?such,
step:?210000,?loss:?16.361233
step:?220000,?loss:?17.529526
step:?230000,?loss:?16.805817
step:?240000,?loss:?6.365625
step:?250000,?loss:?8.083097
step:?260000,?loss:?11.262514
step:?270000,?loss:?9.842708
step:?280000,?loss:?6.363440
step:?290000,?loss:?8.732617
step:?300000,?loss:?10.484728
step:?310000,?loss:?12.099487
step:?320000,?loss:?11.496288
step:?330000,?loss:?9.283813
step:?340000,?loss:?10.777218
step:?350000,?loss:?16.310440
step:?360000,?loss:?7.495782
step:?370000,?loss:?9.287696
step:?380000,?loss:?6.982735
step:?390000,?loss:?8.549622
step:?400000,?loss:?8.388112
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?two,?seven,?eight,?one,?zero,
"of"?nearest?neighbors:?the,?a,?with,?also,?for,?and,?which,?by,
"going"?nearest?neighbors:?have,?are,?both,?called,?being,?a,?of,?had,
"hardware"?nearest?neighbors:?may,?de,?some,?have,?so,?which,?other,?also,
"american"?nearest?neighbors:?s,?british,?UNK,?from,?in,?including,?first,?see,
"britain"?nearest?neighbors:?against,?include,?including,?both,?british,?other,?an,?most,
step:?410000,?loss:?8.757725
step:?420000,?loss:?12.303110
step:?430000,?loss:?12.325478
step:?440000,?loss:?7.659882
step:?450000,?loss:?6.028089
step:?460000,?loss:?12.700299
step:?470000,?loss:?7.063077
step:?480000,?loss:?18.004183
step:?490000,?loss:?7.510474
step:?500000,?loss:?10.089376
step:?510000,?loss:?11.404436
step:?520000,?loss:?9.494527
step:?530000,?loss:?7.797963
step:?540000,?loss:?7.390718
step:?550000,?loss:?13.911215
step:?560000,?loss:?6.975731
step:?570000,?loss:?6.179163
step:?580000,?loss:?7.066525
step:?590000,?loss:?6.487288
step:?600000,?loss:?5.361528
Evaluation...
"five"?nearest?neighbors:?four,?six,?three,?seven,?two,?one,?eight,?zero,
"of"?nearest?neighbors:?the,?and,?from,?with,?a,?including,?in,?include,
"going"?nearest?neighbors:?have,?even,?they,?term,?who,?many,?which,?were,
"hardware"?nearest?neighbors:?include,?computer,?an,?which,?other,?each,?than,?may,
"american"?nearest?neighbors:?english,?french,?s,?german,?from,?in,?film,?see,
"britain"?nearest?neighbors:?several,?first,?modern,?part,?government,?german,?was,?were,
step:?610000,?loss:?4.144980
step:?620000,?loss:?5.865635
step:?630000,?loss:?6.826498
step:?640000,?loss:?8.376097
step:?650000,?loss:?7.117930
step:?660000,?loss:?7.639544
step:?670000,?loss:?5.973255
step:?680000,?loss:?4.908459
step:?690000,?loss:?6.164993
step:?700000,?loss:?7.360281
step:?710000,?loss:?12.693079
step:?720000,?loss:?6.410182
step:?730000,?loss:?7.499201
step:?740000,?loss:?6.509094
step:?750000,?loss:?10.625893
step:?760000,?loss:?7.177696
step:?770000,?loss:?12.639092
step:?780000,?loss:?8.441635
step:?790000,?loss:?7.529139
step:?800000,?loss:?6.579177
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?eight,?two,?one,?zero,
"of"?nearest?neighbors:?and,?with,?in,?the,?its,?from,?by,?including,
"going"?nearest?neighbors:?have,?they,?how,?include,?people,?however,?also,?their,
"hardware"?nearest?neighbors:?computer,?large,?include,?may,?or,?which,?other,?there,
"american"?nearest?neighbors:?born,?french,?british,?english,?german,?b,?john,?d,
"britain"?nearest?neighbors:?country,?including,?include,?general,?part,?various,?several,?by,
step:?810000,?loss:?6.934138
step:?820000,?loss:?5.686094
step:?830000,?loss:?7.310243
step:?840000,?loss:?5.028157
step:?850000,?loss:?7.079705
step:?860000,?loss:?6.768996
step:?870000,?loss:?5.604030
step:?880000,?loss:?8.208309
step:?890000,?loss:?6.301597
step:?900000,?loss:?5.733234
step:?910000,?loss:?6.577081
step:?920000,?loss:?6.774826
step:?930000,?loss:?7.068932
step:?940000,?loss:?6.694956
step:?950000,?loss:?7.944673
step:?960000,?loss:?5.988618
step:?970000,?loss:?6.651366
step:?980000,?loss:?4.595577
step:?990000,?loss:?6.564834
step:?1000000,?loss:?4.327858
Evaluation...
"five"?nearest?neighbors:?four,?three,?seven,?six,?eight,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?first,?and,?became,?from,?under,?at,?with,
"going"?nearest?neighbors:?others,?has,?then,?have,?how,?become,?had,?also,
"hardware"?nearest?neighbors:?computer,?large,?systems,?these,?different,?either,?include,?using,
"american"?nearest?neighbors:?b,?born,?d,?UNK,?nine,?english,?german,?french,
"britain"?nearest?neighbors:?government,?island,?local,?country,?by,?including,?control,?within,
step:?1010000,?loss:?5.841236
step:?1020000,?loss:?5.805200
step:?1030000,?loss:?9.962063
step:?1040000,?loss:?6.281199
step:?1050000,?loss:?7.147995
step:?1060000,?loss:?5.721184
step:?1070000,?loss:?7.080662
step:?1080000,?loss:?6.638658
step:?1090000,?loss:?5.814178
step:?1100000,?loss:?5.195928
step:?1110000,?loss:?6.724787
step:?1120000,?loss:?6.503905
step:?1130000,?loss:?5.762966
step:?1140000,?loss:?5.790243
step:?1150000,?loss:?5.958191
step:?1160000,?loss:?5.997983
step:?1170000,?loss:?7.065348
step:?1180000,?loss:?6.073387
step:?1190000,?loss:?6.644097
step:?1200000,?loss:?5.934450
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?eight,?seven,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?and,?including,?in,?its,?with,?from,?on,
"going"?nearest?neighbors:?others,?then,?through,?has,?had,?another,?people,?when,
"hardware"?nearest?neighbors:?computer,?control,?systems,?either,?these,?large,?small,?other,
"american"?nearest?neighbors:?born,?german,?john,?d,?british,?b,?UNK,?french,
"britain"?nearest?neighbors:?local,?against,?british,?island,?country,?general,?including,?within,
step:?1210000,?loss:?5.832344
step:?1220000,?loss:?6.453851
step:?1230000,?loss:?6.583966
step:?1240000,?loss:?5.571673
step:?1250000,?loss:?5.720917
step:?1260000,?loss:?7.663424
step:?1270000,?loss:?6.583741
step:?1280000,?loss:?8.503859
step:?1290000,?loss:?5.540640
step:?1300000,?loss:?6.703249
step:?1310000,?loss:?5.274101
step:?1320000,?loss:?5.846446
step:?1330000,?loss:?5.438172
step:?1340000,?loss:?6.367691
step:?1350000,?loss:?6.558622
step:?1360000,?loss:?9.822924
step:?1370000,?loss:?4.982378
step:?1380000,?loss:?6.159739
step:?1390000,?loss:?5.819083
step:?1400000,?loss:?7.775135
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?two,?eight,?one,?zero,
"of"?nearest?neighbors:?and,?the,?in,?with,?its,?within,?for,?including,
"going"?nearest?neighbors:?others,?through,?while,?has,?to,?how,?particularly,?their,
"hardware"?nearest?neighbors:?computer,?systems,?large,?control,?research,?using,?information,?either,
"american"?nearest?neighbors:?english,?french,?german,?born,?film,?british,?s,?former,
"britain"?nearest?neighbors:?british,?country,?europe,?local,?military,?island,?against,?western,
step:?1410000,?loss:?8.214248
step:?1420000,?loss:?4.696859
step:?1430000,?loss:?5.873761
step:?1440000,?loss:?5.971557
step:?1450000,?loss:?4.992722
step:?1460000,?loss:?5.197714
step:?1470000,?loss:?6.916918
step:?1480000,?loss:?6.441984
step:?1490000,?loss:?5.443647
step:?1500000,?loss:?5.178482
step:?1510000,?loss:?6.060414
step:?1520000,?loss:?6.373306
step:?1530000,?loss:?5.098322
step:?1540000,?loss:?6.674916
step:?1550000,?loss:?6.712685
step:?1560000,?loss:?5.280202
step:?1570000,?loss:?6.454964
step:?1580000,?loss:?4.896697
step:?1590000,?loss:?6.239226
step:?1600000,?loss:?5.709726
Evaluation...
"five"?nearest?neighbors:?three,?four,?two,?six,?seven,?eight,?one,?zero,
"of"?nearest?neighbors:?the,?and,?including,?in,?with,?within,?its,?following,
"going"?nearest?neighbors:?others,?people,?who,?they,?that,?far,?were,?have,
"hardware"?nearest?neighbors:?computer,?systems,?include,?high,?research,?some,?information,?large,
"american"?nearest?neighbors:?born,?english,?french,?british,?german,?d,?john,?b,
"britain"?nearest?neighbors:?country,?military,?china,?europe,?against,?local,?central,?british,
step:?1610000,?loss:?6.334940
step:?1620000,?loss:?5.093616
step:?1630000,?loss:?6.119366
step:?1640000,?loss:?4.975187
step:?1650000,?loss:?6.490408
step:?1660000,?loss:?7.464082
step:?1670000,?loss:?4.977184
step:?1680000,?loss:?5.658133
step:?1690000,?loss:?5.352454
step:?1700000,?loss:?6.810776
step:?1710000,?loss:?5.687447
step:?1720000,?loss:?5.992206
step:?1730000,?loss:?5.513011
step:?1740000,?loss:?5.548522
step:?1750000,?loss:?6.200248
step:?1760000,?loss:?13.070073
step:?1770000,?loss:?4.621058
step:?1780000,?loss:?5.301342
step:?1790000,?loss:?4.777030
step:?1800000,?loss:?6.912136
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?seven,?eight,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?in,?first,?from,?became,?and,?following,?under,
"going"?nearest?neighbors:?others,?their,?through,?which,?therefore,?open,?how,?that,
"hardware"?nearest?neighbors:?computer,?systems,?include,?research,?standard,?different,?system,?small,
"american"?nearest?neighbors:?b,?d,?born,?actor,?UNK,?english,?nine,?german,
"britain"?nearest?neighbors:?china,?country,?europe,?against,?canada,?military,?island,?including,
step:?1810000,?loss:?5.584600
step:?1820000,?loss:?5.619820
step:?1830000,?loss:?6.078709
step:?1840000,?loss:?5.052518
step:?1850000,?loss:?5.430106
step:?1860000,?loss:?7.396770
step:?1870000,?loss:?5.344787
step:?1880000,?loss:?5.937998
step:?1890000,?loss:?5.706491
step:?1900000,?loss:?5.140662
step:?1910000,?loss:?5.607048
step:?1920000,?loss:?5.407231
step:?1930000,?loss:?6.238531
step:?1940000,?loss:?5.567973
step:?1950000,?loss:?4.894245
step:?1960000,?loss:?6.104193
step:?1970000,?loss:?5.282631
step:?1980000,?loss:?6.189069
step:?1990000,?loss:?6.169409
step:?2000000,?loss:?6.470152
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?eight,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?its,?in,?with,?and,?including,?within,?against,
"going"?nearest?neighbors:?others,?only,?therefore,?will,?how,?a,?far,?though,
"hardware"?nearest?neighbors:?computer,?systems,?for,?network,?software,?program,?research,?system,
"american"?nearest?neighbors:?born,?actor,?d,?italian,?german,?john,?robert,?b,
"britain"?nearest?neighbors:?china,?country,?europe,?canada,?british,?former,?island,?france,
step:?2010000,?loss:?5.298714
step:?2020000,?loss:?5.494207
step:?2030000,?loss:?5.410875
step:?2040000,?loss:?6.228232
step:?2050000,?loss:?5.044596
step:?2060000,?loss:?4.624638
step:?2070000,?loss:?4.919327
step:?2080000,?loss:?4.639625
step:?2090000,?loss:?4.865627
step:?2100000,?loss:?4.951073
step:?2110000,?loss:?5.973768
step:?2120000,?loss:?7.366824
step:?2130000,?loss:?5.149571
step:?2140000,?loss:?7.846234
step:?2150000,?loss:?5.449315
step:?2160000,?loss:?5.359211
step:?2170000,?loss:?5.171029
step:?2180000,?loss:?6.106437
step:?2190000,?loss:?6.043995
step:?2200000,?loss:?5.642351
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?two,?eight,?seven,?zero,?one,
"of"?nearest?neighbors:?the,?and,?its,?see,?for,?in,?with,?including,
"going"?nearest?neighbors:?others,?therefore,?how,?even,?them,?your,?have,?although,
"hardware"?nearest?neighbors:?computer,?systems,?system,?network,?program,?research,?software,?include,
"american"?nearest?neighbors:?english,?french,?german,?canadian,?british,?film,?author,?italian,
"britain"?nearest?neighbors:?europe,?china,?country,?germany,?british,?england,?france,?throughout,
step:?2210000,?loss:?4.427110
step:?2220000,?loss:?6.240989
step:?2230000,?loss:?5.184978
step:?2240000,?loss:?8.035570
step:?2250000,?loss:?5.793781
step:?2260000,?loss:?4.908427
step:?2270000,?loss:?8.807668
step:?2280000,?loss:?6.083229
step:?2290000,?loss:?5.773360
step:?2300000,?loss:?5.613671
step:?2310000,?loss:?6.080076
step:?2320000,?loss:?5.288568
step:?2330000,?loss:?5.949232
step:?2340000,?loss:?5.479994
step:?2350000,?loss:?7.717686
step:?2360000,?loss:?5.163609
step:?2370000,?loss:?5.989407
step:?2380000,?loss:?5.785729
step:?2390000,?loss:?5.345478
step:?2400000,?loss:?6.627133
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?two,?seven,?eight,?zero,?nine,
"of"?nearest?neighbors:?the,?in,?and,?including,?from,?within,?its,?with,
"going"?nearest?neighbors:?therefore,?people,?they,?out,?only,?according,?your,?now,
"hardware"?nearest?neighbors:?computer,?systems,?network,?program,?system,?software,?run,?design,
"american"?nearest?neighbors:?author,?born,?actor,?english,?canadian,?british,?italian,?d,
"britain"?nearest?neighbors:?china,?europe,?country,?throughout,?france,?canada,?england,?western,
step:?2410000,?loss:?5.666146
step:?2420000,?loss:?5.316198
step:?2430000,?loss:?5.129625
step:?2440000,?loss:?5.247949
step:?2450000,?loss:?5.741394
step:?2460000,?loss:?5.833083
step:?2470000,?loss:?7.704844
step:?2480000,?loss:?5.398345
step:?2490000,?loss:?5.089633
step:?2500000,?loss:?5.620508
step:?2510000,?loss:?4.976034
step:?2520000,?loss:?5.884676
step:?2530000,?loss:?6.649922
step:?2540000,?loss:?5.002588
step:?2550000,?loss:?5.072144
step:?2560000,?loss:?5.165375
step:?2570000,?loss:?5.310089
step:?2580000,?loss:?5.481957
step:?2590000,?loss:?6.104440
step:?2600000,?loss:?5.339644
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?seven,?eight,?nine,?two,?zero,
"of"?nearest?neighbors:?the,?first,?from,?with,?became,?in,?following,?and,
"going"?nearest?neighbors:?how,?therefore,?back,?will,?through,?always,?your,?make,
"hardware"?nearest?neighbors:?computer,?systems,?system,?network,?program,?technology,?design,?software,
"american"?nearest?neighbors:?actor,?singer,?born,?b,?author,?d,?english,?writer,
"britain"?nearest?neighbors:?europe,?china,?throughout,?great,?england,?france,?country,?india,
step:?2610000,?loss:?7.754117
step:?2620000,?loss:?5.979313
step:?2630000,?loss:?5.394362
step:?2640000,?loss:?4.866740
step:?2650000,?loss:?5.219806
step:?2660000,?loss:?6.074809
step:?2670000,?loss:?6.216953
step:?2680000,?loss:?5.944881
step:?2690000,?loss:?5.863350
step:?2700000,?loss:?6.128705
step:?2710000,?loss:?5.502523
step:?2720000,?loss:?5.300839
step:?2730000,?loss:?6.358493
step:?2740000,?loss:?6.058306
step:?2750000,?loss:?4.689510
step:?2760000,?loss:?6.032880
step:?2770000,?loss:?5.844904
step:?2780000,?loss:?5.385874
step:?2790000,?loss:?5.370956
step:?2800000,?loss:?4.912577
Evaluation...
"five"?nearest?neighbors:?four,?six,?three,?eight,?seven,?two,?nine,?one,
"of"?nearest?neighbors:?in,?the,?and,?from,?including,?following,?with,?under,
"going"?nearest?neighbors:?your,?then,?through,?will,?how,?so,?back,?even,
"hardware"?nearest?neighbors:?computer,?systems,?program,?network,?design,?standard,?physical,?software,
"american"?nearest?neighbors:?actor,?singer,?born,?author,?writer,?canadian,?italian,?d,
"britain"?nearest?neighbors:?europe,?china,?england,?throughout,?france,?india,?great,?germany,
step:?2810000,?loss:?5.897756
step:?2820000,?loss:?7.194932
step:?2830000,?loss:?7.430175
step:?2840000,?loss:?7.258231
step:?2850000,?loss:?5.837617
step:?2860000,?loss:?5.496673
step:?2870000,?loss:?6.173716
step:?2880000,?loss:?6.095749
step:?2890000,?loss:?6.064944
step:?2900000,?loss:?5.560488
step:?2910000,?loss:?4.966107
step:?2920000,?loss:?5.789579
step:?2930000,?loss:?4.525987
step:?2940000,?loss:?6.704808
step:?2950000,?loss:?4.506433
step:?2960000,?loss:?6.251270
step:?2970000,?loss:?5.588204
step:?2980000,?loss:?5.423235
step:?2990000,?loss:?5.613834
step:?3000000,?loss:?5.137326
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?eight,?two,?zero,?one,
"of"?nearest?neighbors:?the,?including,?and,?with,?in,?its,?includes,?within,
"going"?nearest?neighbors:?how,?they,?when,?them,?make,?always,?your,?though,
"hardware"?nearest?neighbors:?computer,?systems,?network,?program,?physical,?design,?technology,?software,
"american"?nearest?neighbors:?canadian,?english,?australian,?british,?german,?film,?italian,?author,
"britain"?nearest?neighbors:?europe,?england,?china,?throughout,?india,?france,?great,?british,
Word2Vec (Word Embedding)
使用TensorFlow 2.0實現(xiàn)Word2Vec算法計算單詞的向量表示,這個例子是使用一小部分維基百科文章來訓(xùn)練的。
更多信息請查看論文:?Mikolov, Tomas et al. "Efficient Estimation of Word Representations in Vector Space.", 2013[1]from?__future__?import?division,?print_function,?absolute_importimport?collections
import?os
import?random
import?urllib
import?zipfile
import?numpy?as?np
import?tensorflow?as?tf
learning_rate?=?0.1
batch_size?=?128
num_steps?=?3000000
display_step?=?10000
eval_step?=?200000
#?訓(xùn)練參數(shù)
learning_rate?=?0.1
batch_size?=?128
num_steps?=?3000000
display_step?=?10000
eval_step?=?200000
#?評估參數(shù)
eval_words?=?['five',?'of',?'going',?'hardware',?'american',?'britain']
#?Word2Vec?參數(shù)
embedding_size?=?200?#?嵌入向量的維度?vector.
max_vocabulary_size?=?50000?#?詞匯表中不同單詞的總數(shù)words?in?the?vocabulary.
min_occurrence?=?10??#?刪除出現(xiàn)小于n次的所有單詞
skip_window?=?3?#?左右各要考慮多少個單詞
num_skips?=?2?#?重復(fù)使用輸入生成標簽的次數(shù)
num_sampled?=?64?#?負采樣數(shù)量
#?下載一小部分維基百科文章集
url?=?'http://mattmahoney.net/dc/text8.zip'
data_path?=?'text8.zip'
if?not?os.path.exists(data_path):
????print("Downloading?the?dataset...?(It?may?take?some?time)")
????filename,?_?=?urllib.urlretrieve(url,?data_path)
????print("Done!")
#?解壓數(shù)據(jù)集文件,文本已處理完畢
with?zipfile.ZipFile(data_path)?as?f:
????text_words?=?f.read(f.namelist()[0]).lower().split()
#?構(gòu)建詞典并用?UNK?標記替換頻數(shù)較低的詞
count?=?[('UNK',?-1)]
#?檢索最常見的單詞
count.extend(collections.Counter(text_words).most_common(max_vocabulary_size?-?1))
#?刪除少于'min_occurrence'次數(shù)的樣本
for?i?in?range(len(count)?-?1,?-1,?-1):
????if?count[i][1]?????????count.pop(i)
????else:
????????#該集合是有序的,因此在當出現(xiàn)小于'min_occurrence'時停止
????????break
#?計算單詞表單詞個數(shù)
vocabulary_size?=?len(count)
#?為每一個詞分配id
word2id?=?dict()
for?i,?(word,?_)in?enumerate(count):
????word2id[word]?=?i
data?=?list()
unk_count?=?0
for?word?in?text_words:
?????#?檢索單詞id,或者如果不在字典中則為其指定索引0('UNK')
????index?=?word2id.get(word,?0)
????if?index?==?0:
????????unk_count?+=?1
????data.append(index)
count[0]?=?('UNK',?unk_count)
id2word?=?dict(zip(word2id.values(),?word2id.keys()))
print("Words?count:",?len(text_words))
print("Unique?words:",?len(set(text_words)))
print("Vocabulary?size:",?vocabulary_size)
print("Most?common?words:",?count[:10])
output:
Words?count:?17005207Unique?words:?253854
Vocabulary?size:?47135
Most?common?words:?[('UNK',?444176),?('the',?1061396),?('of',?593677),?('and',?416629),?('one',?411764),?('in',?372201),?('a',?325873),?('to',?316376),?('zero',?264975),?('nine',?250430)]
data_index?=?0
#?為skip-gram模型生成訓(xùn)練批次
def?next_batch(batch_size,?num_skips,?skip_window):
????global?data_index
????assert?batch_size?%?num_skips?==?0
????assert?num_skips?<=?2?*?skip_window
????batch?=?np.ndarray(shape=(batch_size),?dtype=np.int32)
????labels?=?np.ndarray(shape=(batch_size,?1),?dtype=np.int32)
????#?得到窗口長度(?當前單詞左邊和右邊?+?當前單詞)
????span?=?2?*?skip_window?+?1
????buffer?=?collections.deque(maxlen=span)
????if?data_index?+?span?>?len(data):
????????data_index?=?0
????buffer.extend(data[data_index:data_index?+?span])
????data_index?+=?span
????for?i?in?range(batch_size?//?num_skips):
????????context_words?=?[w?for?w?in?range(span)?if?w?!=?skip_window]
????????words_to_use?=?random.sample(context_words,?num_skips)
????????for?j,?context_word?in?enumerate(words_to_use):
????????????batch[i?*?num_skips?+?j]?=?buffer[skip_window]
????????????labels[i?*?num_skips?+?j,?0]?=?buffer[context_word]
????????if?data_index?==?len(data):
????????????buffer.extend(data[0:span])
????????????data_index?=?span
????????else:
????????????buffer.append(data[data_index])
????????????data_index?+=?1
????#回溯一點,以避免在批處理結(jié)束時跳過單詞
????data_index?=?(data_index?+?len(data)?-?span)?%?len(data)
????return?batch,?labels
#?確保在CPU上分配以下操作和變量
#?(某些操作在GPU上不兼容)
with?tf.device('/cpu:0'):
????#?創(chuàng)建嵌入變量(每一行代表一個詞嵌入向量)?embedding?vector).
????embedding?=?tf.Variable(tf.random.normal([vocabulary_size,?embedding_size]))
????#?構(gòu)造NCE損失的變量
????nce_weights?=?tf.Variable(tf.random.normal([vocabulary_size,?embedding_size]))
????nce_biases?=?tf.Variable(tf.zeros([vocabulary_size]))
def?get_embedding(x):
????with?tf.device('/cpu:0'):
???????#?對于X中的每一個樣本查找對應(yīng)的嵌入向量
????????x_embed?=?tf.nn.embedding_lookup(embedding,?x)
????????return?x_embed
def?nce_loss(x_embed,?y):
????with?tf.device('/cpu:0'):
????????#?計算批處理的平均NCE損失
????????y?=?tf.cast(y,?tf.int64)
????????loss?=?tf.reduce_mean(
????????????tf.nn.nce_loss(weights=nce_weights,
???????????????????????????biases=nce_biases,
???????????????????????????labels=y,
???????????????????????????inputs=x_embed,
???????????????????????????num_sampled=num_sampled,
???????????????????????????num_classes=vocabulary_size))
????????return?loss
#?評估
def?evaluate(x_embed):
????with?tf.device('/cpu:0'):
?????????#?計算輸入數(shù)據(jù)嵌入與每個嵌入向量之間的余弦相似度
????????x_embed?=?tf.cast(x_embed,?tf.float32)
????????x_embed_norm?=?x_embed?/?tf.sqrt(tf.reduce_sum(tf.square(x_embed)))
????????embedding_norm?=?embedding?/?tf.sqrt(tf.reduce_sum(tf.square(embedding),?1,?keepdims=True),?tf.float32)
????????cosine_sim_op?=?tf.matmul(x_embed_norm,?embedding_norm,?transpose_b=True)
????????return?cosine_sim_op
#?定義優(yōu)化器
optimizer?=?tf.optimizers.SGD(learning_rate)
#?優(yōu)化過程
def?run_optimization(x,?y):
????with?tf.device('/cpu:0'):
???????#?將計算封裝在GradientTape中以實現(xiàn)自動微分
????????with?tf.GradientTape()?as?g:
????????????emb?=?get_embedding(x)
????????????loss?=?nce_loss(emb,?y)
????????#?計算梯度
????????gradients?=?g.gradient(loss,?[embedding,?nce_weights,?nce_biases])
?????????#?按gradients更新?W?和?b
????????optimizer.apply_gradients(zip(gradients,?[embedding,?nce_weights,?nce_biases]))
#?用于測試的單詞
x_test?=?np.array([word2id[w]?for?w?in?eval_words])
#?針對給定步驟數(shù)進行訓(xùn)練
for?step?in?xrange(1,?num_steps?+?1):
????batch_x,?batch_y?=?next_batch(batch_size,?num_skips,?skip_window)
????run_optimization(batch_x,?batch_y)
????if?step?%?display_step?==?0?or?step?==?1:
????????loss?=?nce_loss(get_embedding(batch_x),?batch_y)
????????print("step:?%i,?loss:?%f"?%?(step,?loss))
????#?評估
????if?step?%?eval_step?==?0?or?step?==?1:
????????print("Evaluation...")
????????sim?=?evaluate(get_embedding(x_test)).numpy()
????????for?i?in?xrange(len(eval_words)):
????????????top_k?=?8??#?最相似的單詞數(shù)量
????????????nearest?=?(-sim[i,?:]).argsort()[1:top_k?+?1]
????????????log_str?=?'"%s"?nearest?neighbors:'?%?eval_words[i]
????????????for?k?in?xrange(top_k):
????????????????log_str?=?'%s?%s,'?%?(log_str,?id2word[nearest[k]])
????????????print(log_str)
step:?1,?loss:?504.444214
Evaluation...
"five"?nearest?neighbors:?censure,?stricken,?anglicanism,?stick,?streetcars,?shrines,?horrified,?sparkle,
"of"?nearest?neighbors:?jolly,?weary,?clinicians,?kerouac,?economist,?owls,?safe,?playoff,
"going"?nearest?neighbors:?filament,?platforms,?moderately,?micheal,?despotic,?krag,?disclosed,?your,
"hardware"?nearest?neighbors:?occupants,?paraffin,?vera,?reorganized,?rename,?declares,?prima,?condoned,
"american"?nearest?neighbors:?portfolio,?rhein,?aalto,?angle,?lifeson,?tucker,?sexton,?dench,
"britain"?nearest?neighbors:?indivisible,?disbelief,?scripture,?pepsi,?scriptores,?sighting,?napalm,?strike,
step:?10000,?loss:?117.166962
step:?20000,?loss:?65.478333
step:?30000,?loss:?46.580460
step:?40000,?loss:?25.563128
step:?50000,?loss:?50.924446
step:?60000,?loss:?51.696526
step:?70000,?loss:?17.272142
step:?80000,?loss:?32.579414
step:?90000,?loss:?68.372032
step:?100000,?loss:?36.026573
step:?110000,?loss:?22.502020
step:?120000,?loss:?15.788742
step:?130000,?loss:?31.832420
step:?140000,?loss:?25.096617
step:?150000,?loss:?12.013027
step:?160000,?loss:?20.574780
step:?170000,?loss:?12.201975
step:?180000,?loss:?20.983793
step:?190000,?loss:?11.366720
step:?200000,?loss:?19.431549
Evaluation...
"five"?nearest?neighbors:?three,?four,?eight,?six,?two,?seven,?nine,?zero,
"of"?nearest?neighbors:?the,?a,?and,?first,?with,?on,?but,?from,
"going"?nearest?neighbors:?have,?more,?used,?out,?be,?with,?on,?however,
"hardware"?nearest?neighbors:?be,?known,?system,?apollo,?and,?a,?such,?used,
"american"?nearest?neighbors:?UNK,?and,?from,?s,?at,?in,?after,?about,
"britain"?nearest?neighbors:?of,?and,?many,?the,?as,?used,?but,?such,
step:?210000,?loss:?16.361233
step:?220000,?loss:?17.529526
step:?230000,?loss:?16.805817
step:?240000,?loss:?6.365625
step:?250000,?loss:?8.083097
step:?260000,?loss:?11.262514
step:?270000,?loss:?9.842708
step:?280000,?loss:?6.363440
step:?290000,?loss:?8.732617
step:?300000,?loss:?10.484728
step:?310000,?loss:?12.099487
step:?320000,?loss:?11.496288
step:?330000,?loss:?9.283813
step:?340000,?loss:?10.777218
step:?350000,?loss:?16.310440
step:?360000,?loss:?7.495782
step:?370000,?loss:?9.287696
step:?380000,?loss:?6.982735
step:?390000,?loss:?8.549622
step:?400000,?loss:?8.388112
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?two,?seven,?eight,?one,?zero,
"of"?nearest?neighbors:?the,?a,?with,?also,?for,?and,?which,?by,
"going"?nearest?neighbors:?have,?are,?both,?called,?being,?a,?of,?had,
"hardware"?nearest?neighbors:?may,?de,?some,?have,?so,?which,?other,?also,
"american"?nearest?neighbors:?s,?british,?UNK,?from,?in,?including,?first,?see,
"britain"?nearest?neighbors:?against,?include,?including,?both,?british,?other,?an,?most,
step:?410000,?loss:?8.757725
step:?420000,?loss:?12.303110
step:?430000,?loss:?12.325478
step:?440000,?loss:?7.659882
step:?450000,?loss:?6.028089
step:?460000,?loss:?12.700299
step:?470000,?loss:?7.063077
step:?480000,?loss:?18.004183
step:?490000,?loss:?7.510474
step:?500000,?loss:?10.089376
step:?510000,?loss:?11.404436
step:?520000,?loss:?9.494527
step:?530000,?loss:?7.797963
step:?540000,?loss:?7.390718
step:?550000,?loss:?13.911215
step:?560000,?loss:?6.975731
step:?570000,?loss:?6.179163
step:?580000,?loss:?7.066525
step:?590000,?loss:?6.487288
step:?600000,?loss:?5.361528
Evaluation...
"five"?nearest?neighbors:?four,?six,?three,?seven,?two,?one,?eight,?zero,
"of"?nearest?neighbors:?the,?and,?from,?with,?a,?including,?in,?include,
"going"?nearest?neighbors:?have,?even,?they,?term,?who,?many,?which,?were,
"hardware"?nearest?neighbors:?include,?computer,?an,?which,?other,?each,?than,?may,
"american"?nearest?neighbors:?english,?french,?s,?german,?from,?in,?film,?see,
"britain"?nearest?neighbors:?several,?first,?modern,?part,?government,?german,?was,?were,
step:?610000,?loss:?4.144980
step:?620000,?loss:?5.865635
step:?630000,?loss:?6.826498
step:?640000,?loss:?8.376097
step:?650000,?loss:?7.117930
step:?660000,?loss:?7.639544
step:?670000,?loss:?5.973255
step:?680000,?loss:?4.908459
step:?690000,?loss:?6.164993
step:?700000,?loss:?7.360281
step:?710000,?loss:?12.693079
step:?720000,?loss:?6.410182
step:?730000,?loss:?7.499201
step:?740000,?loss:?6.509094
step:?750000,?loss:?10.625893
step:?760000,?loss:?7.177696
step:?770000,?loss:?12.639092
step:?780000,?loss:?8.441635
step:?790000,?loss:?7.529139
step:?800000,?loss:?6.579177
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?eight,?two,?one,?zero,
"of"?nearest?neighbors:?and,?with,?in,?the,?its,?from,?by,?including,
"going"?nearest?neighbors:?have,?they,?how,?include,?people,?however,?also,?their,
"hardware"?nearest?neighbors:?computer,?large,?include,?may,?or,?which,?other,?there,
"american"?nearest?neighbors:?born,?french,?british,?english,?german,?b,?john,?d,
"britain"?nearest?neighbors:?country,?including,?include,?general,?part,?various,?several,?by,
step:?810000,?loss:?6.934138
step:?820000,?loss:?5.686094
step:?830000,?loss:?7.310243
step:?840000,?loss:?5.028157
step:?850000,?loss:?7.079705
step:?860000,?loss:?6.768996
step:?870000,?loss:?5.604030
step:?880000,?loss:?8.208309
step:?890000,?loss:?6.301597
step:?900000,?loss:?5.733234
step:?910000,?loss:?6.577081
step:?920000,?loss:?6.774826
step:?930000,?loss:?7.068932
step:?940000,?loss:?6.694956
step:?950000,?loss:?7.944673
step:?960000,?loss:?5.988618
step:?970000,?loss:?6.651366
step:?980000,?loss:?4.595577
step:?990000,?loss:?6.564834
step:?1000000,?loss:?4.327858
Evaluation...
"five"?nearest?neighbors:?four,?three,?seven,?six,?eight,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?first,?and,?became,?from,?under,?at,?with,
"going"?nearest?neighbors:?others,?has,?then,?have,?how,?become,?had,?also,
"hardware"?nearest?neighbors:?computer,?large,?systems,?these,?different,?either,?include,?using,
"american"?nearest?neighbors:?b,?born,?d,?UNK,?nine,?english,?german,?french,
"britain"?nearest?neighbors:?government,?island,?local,?country,?by,?including,?control,?within,
step:?1010000,?loss:?5.841236
step:?1020000,?loss:?5.805200
step:?1030000,?loss:?9.962063
step:?1040000,?loss:?6.281199
step:?1050000,?loss:?7.147995
step:?1060000,?loss:?5.721184
step:?1070000,?loss:?7.080662
step:?1080000,?loss:?6.638658
step:?1090000,?loss:?5.814178
step:?1100000,?loss:?5.195928
step:?1110000,?loss:?6.724787
step:?1120000,?loss:?6.503905
step:?1130000,?loss:?5.762966
step:?1140000,?loss:?5.790243
step:?1150000,?loss:?5.958191
step:?1160000,?loss:?5.997983
step:?1170000,?loss:?7.065348
step:?1180000,?loss:?6.073387
step:?1190000,?loss:?6.644097
step:?1200000,?loss:?5.934450
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?eight,?seven,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?and,?including,?in,?its,?with,?from,?on,
"going"?nearest?neighbors:?others,?then,?through,?has,?had,?another,?people,?when,
"hardware"?nearest?neighbors:?computer,?control,?systems,?either,?these,?large,?small,?other,
"american"?nearest?neighbors:?born,?german,?john,?d,?british,?b,?UNK,?french,
"britain"?nearest?neighbors:?local,?against,?british,?island,?country,?general,?including,?within,
step:?1210000,?loss:?5.832344
step:?1220000,?loss:?6.453851
step:?1230000,?loss:?6.583966
step:?1240000,?loss:?5.571673
step:?1250000,?loss:?5.720917
step:?1260000,?loss:?7.663424
step:?1270000,?loss:?6.583741
step:?1280000,?loss:?8.503859
step:?1290000,?loss:?5.540640
step:?1300000,?loss:?6.703249
step:?1310000,?loss:?5.274101
step:?1320000,?loss:?5.846446
step:?1330000,?loss:?5.438172
step:?1340000,?loss:?6.367691
step:?1350000,?loss:?6.558622
step:?1360000,?loss:?9.822924
step:?1370000,?loss:?4.982378
step:?1380000,?loss:?6.159739
step:?1390000,?loss:?5.819083
step:?1400000,?loss:?7.775135
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?two,?eight,?one,?zero,
"of"?nearest?neighbors:?and,?the,?in,?with,?its,?within,?for,?including,
"going"?nearest?neighbors:?others,?through,?while,?has,?to,?how,?particularly,?their,
"hardware"?nearest?neighbors:?computer,?systems,?large,?control,?research,?using,?information,?either,
"american"?nearest?neighbors:?english,?french,?german,?born,?film,?british,?s,?former,
"britain"?nearest?neighbors:?british,?country,?europe,?local,?military,?island,?against,?western,
step:?1410000,?loss:?8.214248
step:?1420000,?loss:?4.696859
step:?1430000,?loss:?5.873761
step:?1440000,?loss:?5.971557
step:?1450000,?loss:?4.992722
step:?1460000,?loss:?5.197714
step:?1470000,?loss:?6.916918
step:?1480000,?loss:?6.441984
step:?1490000,?loss:?5.443647
step:?1500000,?loss:?5.178482
step:?1510000,?loss:?6.060414
step:?1520000,?loss:?6.373306
step:?1530000,?loss:?5.098322
step:?1540000,?loss:?6.674916
step:?1550000,?loss:?6.712685
step:?1560000,?loss:?5.280202
step:?1570000,?loss:?6.454964
step:?1580000,?loss:?4.896697
step:?1590000,?loss:?6.239226
step:?1600000,?loss:?5.709726
Evaluation...
"five"?nearest?neighbors:?three,?four,?two,?six,?seven,?eight,?one,?zero,
"of"?nearest?neighbors:?the,?and,?including,?in,?with,?within,?its,?following,
"going"?nearest?neighbors:?others,?people,?who,?they,?that,?far,?were,?have,
"hardware"?nearest?neighbors:?computer,?systems,?include,?high,?research,?some,?information,?large,
"american"?nearest?neighbors:?born,?english,?french,?british,?german,?d,?john,?b,
"britain"?nearest?neighbors:?country,?military,?china,?europe,?against,?local,?central,?british,
step:?1610000,?loss:?6.334940
step:?1620000,?loss:?5.093616
step:?1630000,?loss:?6.119366
step:?1640000,?loss:?4.975187
step:?1650000,?loss:?6.490408
step:?1660000,?loss:?7.464082
step:?1670000,?loss:?4.977184
step:?1680000,?loss:?5.658133
step:?1690000,?loss:?5.352454
step:?1700000,?loss:?6.810776
step:?1710000,?loss:?5.687447
step:?1720000,?loss:?5.992206
step:?1730000,?loss:?5.513011
step:?1740000,?loss:?5.548522
step:?1750000,?loss:?6.200248
step:?1760000,?loss:?13.070073
step:?1770000,?loss:?4.621058
step:?1780000,?loss:?5.301342
step:?1790000,?loss:?4.777030
step:?1800000,?loss:?6.912136
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?seven,?eight,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?in,?first,?from,?became,?and,?following,?under,
"going"?nearest?neighbors:?others,?their,?through,?which,?therefore,?open,?how,?that,
"hardware"?nearest?neighbors:?computer,?systems,?include,?research,?standard,?different,?system,?small,
"american"?nearest?neighbors:?b,?d,?born,?actor,?UNK,?english,?nine,?german,
"britain"?nearest?neighbors:?china,?country,?europe,?against,?canada,?military,?island,?including,
step:?1810000,?loss:?5.584600
step:?1820000,?loss:?5.619820
step:?1830000,?loss:?6.078709
step:?1840000,?loss:?5.052518
step:?1850000,?loss:?5.430106
step:?1860000,?loss:?7.396770
step:?1870000,?loss:?5.344787
step:?1880000,?loss:?5.937998
step:?1890000,?loss:?5.706491
step:?1900000,?loss:?5.140662
step:?1910000,?loss:?5.607048
step:?1920000,?loss:?5.407231
step:?1930000,?loss:?6.238531
step:?1940000,?loss:?5.567973
step:?1950000,?loss:?4.894245
step:?1960000,?loss:?6.104193
step:?1970000,?loss:?5.282631
step:?1980000,?loss:?6.189069
step:?1990000,?loss:?6.169409
step:?2000000,?loss:?6.470152
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?eight,?two,?nine,?zero,
"of"?nearest?neighbors:?the,?its,?in,?with,?and,?including,?within,?against,
"going"?nearest?neighbors:?others,?only,?therefore,?will,?how,?a,?far,?though,
"hardware"?nearest?neighbors:?computer,?systems,?for,?network,?software,?program,?research,?system,
"american"?nearest?neighbors:?born,?actor,?d,?italian,?german,?john,?robert,?b,
"britain"?nearest?neighbors:?china,?country,?europe,?canada,?british,?former,?island,?france,
step:?2010000,?loss:?5.298714
step:?2020000,?loss:?5.494207
step:?2030000,?loss:?5.410875
step:?2040000,?loss:?6.228232
step:?2050000,?loss:?5.044596
step:?2060000,?loss:?4.624638
step:?2070000,?loss:?4.919327
step:?2080000,?loss:?4.639625
step:?2090000,?loss:?4.865627
step:?2100000,?loss:?4.951073
step:?2110000,?loss:?5.973768
step:?2120000,?loss:?7.366824
step:?2130000,?loss:?5.149571
step:?2140000,?loss:?7.846234
step:?2150000,?loss:?5.449315
step:?2160000,?loss:?5.359211
step:?2170000,?loss:?5.171029
step:?2180000,?loss:?6.106437
step:?2190000,?loss:?6.043995
step:?2200000,?loss:?5.642351
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?two,?eight,?seven,?zero,?one,
"of"?nearest?neighbors:?the,?and,?its,?see,?for,?in,?with,?including,
"going"?nearest?neighbors:?others,?therefore,?how,?even,?them,?your,?have,?although,
"hardware"?nearest?neighbors:?computer,?systems,?system,?network,?program,?research,?software,?include,
"american"?nearest?neighbors:?english,?french,?german,?canadian,?british,?film,?author,?italian,
"britain"?nearest?neighbors:?europe,?china,?country,?germany,?british,?england,?france,?throughout,
step:?2210000,?loss:?4.427110
step:?2220000,?loss:?6.240989
step:?2230000,?loss:?5.184978
step:?2240000,?loss:?8.035570
step:?2250000,?loss:?5.793781
step:?2260000,?loss:?4.908427
step:?2270000,?loss:?8.807668
step:?2280000,?loss:?6.083229
step:?2290000,?loss:?5.773360
step:?2300000,?loss:?5.613671
step:?2310000,?loss:?6.080076
step:?2320000,?loss:?5.288568
step:?2330000,?loss:?5.949232
step:?2340000,?loss:?5.479994
step:?2350000,?loss:?7.717686
step:?2360000,?loss:?5.163609
step:?2370000,?loss:?5.989407
step:?2380000,?loss:?5.785729
step:?2390000,?loss:?5.345478
step:?2400000,?loss:?6.627133
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?two,?seven,?eight,?zero,?nine,
"of"?nearest?neighbors:?the,?in,?and,?including,?from,?within,?its,?with,
"going"?nearest?neighbors:?therefore,?people,?they,?out,?only,?according,?your,?now,
"hardware"?nearest?neighbors:?computer,?systems,?network,?program,?system,?software,?run,?design,
"american"?nearest?neighbors:?author,?born,?actor,?english,?canadian,?british,?italian,?d,
"britain"?nearest?neighbors:?china,?europe,?country,?throughout,?france,?canada,?england,?western,
step:?2410000,?loss:?5.666146
step:?2420000,?loss:?5.316198
step:?2430000,?loss:?5.129625
step:?2440000,?loss:?5.247949
step:?2450000,?loss:?5.741394
step:?2460000,?loss:?5.833083
step:?2470000,?loss:?7.704844
step:?2480000,?loss:?5.398345
step:?2490000,?loss:?5.089633
step:?2500000,?loss:?5.620508
step:?2510000,?loss:?4.976034
step:?2520000,?loss:?5.884676
step:?2530000,?loss:?6.649922
step:?2540000,?loss:?5.002588
step:?2550000,?loss:?5.072144
step:?2560000,?loss:?5.165375
step:?2570000,?loss:?5.310089
step:?2580000,?loss:?5.481957
step:?2590000,?loss:?6.104440
step:?2600000,?loss:?5.339644
Evaluation...
"five"?nearest?neighbors:?three,?four,?six,?seven,?eight,?nine,?two,?zero,
"of"?nearest?neighbors:?the,?first,?from,?with,?became,?in,?following,?and,
"going"?nearest?neighbors:?how,?therefore,?back,?will,?through,?always,?your,?make,
"hardware"?nearest?neighbors:?computer,?systems,?system,?network,?program,?technology,?design,?software,
"american"?nearest?neighbors:?actor,?singer,?born,?b,?author,?d,?english,?writer,
"britain"?nearest?neighbors:?europe,?china,?throughout,?great,?england,?france,?country,?india,
step:?2610000,?loss:?7.754117
step:?2620000,?loss:?5.979313
step:?2630000,?loss:?5.394362
step:?2640000,?loss:?4.866740
step:?2650000,?loss:?5.219806
step:?2660000,?loss:?6.074809
step:?2670000,?loss:?6.216953
step:?2680000,?loss:?5.944881
step:?2690000,?loss:?5.863350
step:?2700000,?loss:?6.128705
step:?2710000,?loss:?5.502523
step:?2720000,?loss:?5.300839
step:?2730000,?loss:?6.358493
step:?2740000,?loss:?6.058306
step:?2750000,?loss:?4.689510
step:?2760000,?loss:?6.032880
step:?2770000,?loss:?5.844904
step:?2780000,?loss:?5.385874
step:?2790000,?loss:?5.370956
step:?2800000,?loss:?4.912577
Evaluation...
"five"?nearest?neighbors:?four,?six,?three,?eight,?seven,?two,?nine,?one,
"of"?nearest?neighbors:?in,?the,?and,?from,?including,?following,?with,?under,
"going"?nearest?neighbors:?your,?then,?through,?will,?how,?so,?back,?even,
"hardware"?nearest?neighbors:?computer,?systems,?program,?network,?design,?standard,?physical,?software,
"american"?nearest?neighbors:?actor,?singer,?born,?author,?writer,?canadian,?italian,?d,
"britain"?nearest?neighbors:?europe,?china,?england,?throughout,?france,?india,?great,?germany,
step:?2810000,?loss:?5.897756
step:?2820000,?loss:?7.194932
step:?2830000,?loss:?7.430175
step:?2840000,?loss:?7.258231
step:?2850000,?loss:?5.837617
step:?2860000,?loss:?5.496673
step:?2870000,?loss:?6.173716
step:?2880000,?loss:?6.095749
step:?2890000,?loss:?6.064944
step:?2900000,?loss:?5.560488
step:?2910000,?loss:?4.966107
step:?2920000,?loss:?5.789579
step:?2930000,?loss:?4.525987
step:?2940000,?loss:?6.704808
step:?2950000,?loss:?4.506433
step:?2960000,?loss:?6.251270
step:?2970000,?loss:?5.588204
step:?2980000,?loss:?5.423235
step:?2990000,?loss:?5.613834
step:?3000000,?loss:?5.137326
Evaluation...
"five"?nearest?neighbors:?four,?three,?six,?seven,?eight,?two,?zero,?one,
"of"?nearest?neighbors:?the,?including,?and,?with,?in,?its,?includes,?within,
"going"?nearest?neighbors:?how,?they,?when,?them,?make,?always,?your,?though,
"hardware"?nearest?neighbors:?computer,?systems,?network,?program,?physical,?design,?technology,?software,
"american"?nearest?neighbors:?canadian,?english,?australian,?british,?german,?film,?italian,?author,
"britain"?nearest?neighbors:?europe,?england,?china,?throughout,?india,?france,?great,?british,
[1]: https://arxiv.org/pdf/1301.3781.pdf
還想看更多TensorFlow專欄文章?可在公眾號底部菜單欄子菜單“獨家原創(chuàng)”中找到TensorFlow系列文章,同步更新中,關(guān)注公眾號了解更多吧~或點擊下方“閱讀原文”,進入TensorFlow專欄,即可查看往期文章。
嗨,你還在看嗎?總結(jié)
以上是生活随笔為你收集整理的word2vec代码_TensorFlow2.0 代码实战专栏(四):Word2Vec (Word Embedding)的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: springboot 搭建分布式_爱了!
- 下一篇: @query传参_VueRouter之q