生活随笔
收集整理的這篇文章主要介紹了
python统计三国演义中人物出现的频次
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
方式一. 簡化版
安裝jieba庫/numpy庫編程讀取《三國演義》電子書,輸出出場次數最高的10個人物名字
代碼注釋:
import numpy
import jieba# numpy輸出有省略號的問題,無法顯示全部數據
numpy
.set_printoptions(threshold
=numpy
.inf
)def
readFile(path
):with open(path
, mode
='r', encoding
='utf-8') as f
:try:data
= f
.read()if data is not
None or data
!= ''
:return dataexcept
:print("讀取文件失敗!")if __name__
== "__main__":# 讀取文本內容text
= readFile('三國演義
.txt'
)# 搜索引擎模式:在精確模式基礎上,對長詞再次切分arr
= jieba
.cut_for_search(text
)obj
= {}for name in arr
:# 分詞長度為
2、
3收錄對象
if len(name
) == 2 or
len(name
) == 3:# 定義對象屬性和統計當前對象出現頻次obj
[name
] = obj
.get(name
, 0) + 1# 對象轉化為列表items
= list(obj
.items())"""提供同質數組基本類型的字符串基本字符串格式由3部分組成: 描述數據字節順序的字符(<: little-endian,>: big-endian,|: not-relevant),給出數組基本類型的字符代碼,以及提供類型使用的字節數的整數。基本類型字符代碼為:代碼 描述t 位字段(Bit field,后面的整數表示位字段中的位數)。b Boolean(Boolean 整數類型,其中所有值僅為True或False)。i Integer(整數)u 無符號整數(Unsigned integer)f 浮點數(Floating point)c 復浮點數(Complex floating point)m 時間增量(Timedelta)M 日期增量(Datetime)O 對象(即內存包含指向 PyObject 的指針)S 字符串(固定長度的char序列)U Unicode(Py_UNICODE的固定長度序列)V 其他(void * - 每個項目都是固定大小的內存塊"""people
= numpy
.dtype([('name', 'U2'), ('count', int)])# 列表轉化為數組ar
= numpy
.array(items
, dtype
=people
)"""axis=0 列遞增kind='mergesort' 堆排序order='count' 排序字段flipud() 倒置排序"""
print(numpy
.flipud(numpy
.sort(ar
, axis
=0, kind
='mergesort'
, order
='count')))
二.方式二 詞云統計–轉自
Python 三國演義文本可視化(詞云,人物關系圖,主要人物出場次數,章回字數)
alice_mask.png
"""
Created on Wed Jun 23 11:41:01 2021@author: 陳建兵
"""
import networkx
as nx
import matplotlib
.pyplot
as plt
import jieba
.posseg
as pseg
import random
import codecs
from pyecharts
import options
as opts
from pyecharts
.charts
import Bar
from pyecharts
.charts
import WordCloud
from pyecharts
.charts
import Line
import wordcloud
import imageio
mainTop
= 15
def read_txt(filepath
):file = open(filepath
, 'r+', encoding
='utf-8')txt
= file.read
()file.close
()return txt
txt
= read_txt
('三國演義.txt')
def stopwordslist(filepath
):stopwords
= [line
.strip
() for line
in open(filepath
, 'r', encoding
='utf-8').readlines
()]return stopwordsexcludes
= {'將軍', '卻說', '令人', '趕來', '徐州', '不見', '下馬', '喊聲', '因此', '未知', '大敗', '百姓', '大事','一軍', '之后', '接應', '起兵','成都', '原來', '江東', '正是', '忽然', '原來', '大叫', '上馬', '天子', '一面', '太守', '不如', '忽報','后人', '背后', '先主', '此人','城中', '然后', '大軍', '何不', '先生', '何故', '夫人', '不如', '先鋒', '二人', '不可', '如何', '荊州','不能', '如此', '主公', '軍士','商議', '引兵', '次日', '大喜', '魏兵', '軍馬', '于是', '東吳', '今日', '左右', '天下', '不敢', '陛下','人馬', '不知', '都督', '漢中','一人', '眾將', '后主', '只見', '蜀兵', '馬軍', '黃巾', '立功', '白發', '大吉', '紅旗', '士卒', '錢糧','于漢', '郎舅', '龍鳳', '古之', '白虎','古人云', '爾乃', '馬飛報', '軒昂', '史官', '侍臣', '列陣', '玉璽', '車駕', '老夫', '伏兵', '都尉', '侍中','西涼', '安民', '張曰', '文武', '白旗','祖宗', '尋思'}
counts
= {}
def getWordTimes():poss
= pseg
.cut
(txt
)for w
in poss
:if w
.flag
!= 'nr' or len(w
.word
) < 2 or w
.word
in excludes
:continue elif w
.word
== '孔明' or w
.word
== '孔明曰' or w
.word
== '臥龍先生':real_word
= '諸葛亮'elif w
.word
== '云長' or w
.word
== '關公曰' or w
.word
== '關公':real_word
= '關羽'elif w
.word
== '玄德' or w
.word
== '玄德曰' or w
.word
== '玄德甚' or w
.word
== '玄德遂' or w
.word
== '玄德兵' or w
.word
== '玄德領' \
or w
.word
== '玄德同' or w
.word
== '劉豫州' or w
.word
== '劉玄德':real_word
= '劉備'elif w
.word
== '孟德' or w
.word
== '丞相' or w
.word
== '曹賊' or w
.word
== '阿瞞' or w
.word
== '曹丞相' or w
.word
== '曹將軍':real_word
= '曹操'elif w
.word
== '高祖':real_word
= '劉邦'elif w
.word
== '光武':real_word
= '劉秀'elif w
.word
== '桓帝':real_word
= '劉志'elif w
.word
== '靈帝':real_word
= '劉宏'elif w
.word
== '公瑾':real_word
= '周瑜'elif w
.word
== '伯符':real_word
= '孫策'elif w
.word
== '呂奉先' or w
.word
== '布乃' or w
.word
== '布大怒' or w
.word
== '呂布之':real_word
= '呂布'elif w
.word
== '趙子龍' or w
.word
== '子龍':real_word
= '趙云'elif w
.word
== '卓大喜' or w
.word
== '卓大怒':real_word
= '董卓' else:real_word
= w
.wordcounts
[real_word
] = counts
.get
(real_word
, 0) + 1getWordTimes
()
items
= list(counts
.items
())
items
.sort
(key
=lambda x
: x
[1], reverse
=True)
def wordFreq(filepath
, topn
):with codecs
.open(filepath
, "w", "utf-8") as f
:for i
in range(topn
):word
, count
= items
[i
]f
.write
("{}:{}\n".format(word
, count
))
wordFreq
("三國演義詞頻_人名.txt", 300)
fr
= open('三國演義詞頻_人名.txt', 'r', encoding
='utf-8')
dic
= {}
keys
= []
for line
in fr
:v
= line
.strip
().split
(':')dic
[v
[0]] = v
[1]keys
.append
(v
[0])
fr
.close
()
print("人物出現次數TOP", mainTop
)
print(list(dic
.items
())[:mainTop
])
list_name
= list(dic
.keys
())
list_name_times
= list(dic
.values
())
def creat_people_view():bar
= Bar
()bar
.add_xaxis
(list_name
[0:mainTop
])bar
.add_yaxis
("人物出場次數", list_name_times
)bar
.set_global_opts
(title_opts
=opts
.TitleOpts
(title
="人物出場次數可視化圖", subtitle
="三國人物TOP" + str(mainTop
)),toolbox_opts
=opts
.ToolboxOpts
(is_show
=True),xaxis_opts
=opts
.AxisOpts
(axislabel_opts
={"rotate": 45}))bar
.set_series_opts
(label_opts
=opts
.LabelOpts
(position
="top"))bar
.render_notebook
() bar
.render
("三國演義人物出場次數可視化圖.html")
def creat_wordcloud():bg_pic
= imageio
.imread
(uri
='alice_mask.png')wc
= wordcloud
.WordCloud
(font_path
='c:\Windows\Fonts\simhei.ttf',background_color
='white',width
=1000, height
=800,max_words
=500,mask
=bg_pic
)wc
.generate_from_frequencies
(counts
)wc
.to_file
('三國演義詞云_人名.png')plt
.imshow
(wc
)plt
.axis
('off')plt
.show
()
def creat_wordcloud_pyecharts():wordsAndTimes
= list(dic
.items
())(WordCloud
().add
(series_name
="人物次數", data_pair
=wordsAndTimes
,word_size_range
=[20, 100], textstyle_opts
=opts
.TextStyleOpts
(font_family
="cursive"), ).set_global_opts
(title_opts
=opts
.TitleOpts
(title
="三國演義詞云")).render
("三國演義詞云_人名.html"))
def chapter_word():list2
= txt
.split
("------------")chapter_list
= [i
for i
in range((len(list2
)))]word_list
= [len(i
) for i
in list2
](Line
(init_opts
=opts
.InitOpts
(width
="1400px", height
="700px")).add_xaxis
(xaxis_data
=chapter_list
).add_yaxis
(series_name
="章回字數",y_axis
=word_list
,markpoint_opts
=opts
.MarkPointOpts
(data
=[opts
.MarkPointItem
(type_
="max", name
="最大值"),opts
.MarkPointItem
(type_
="min", name
="最小值"),]),markline_opts
=opts
.MarkLineOpts
(data
=[opts
.MarkLineItem
(type_
="average", name
="平均值")]),).set_global_opts
(title_opts
=opts
.TitleOpts
(title
="三國演義章回字數", subtitle
=""),tooltip_opts
=opts
.TooltipOpts
(trigger
="axis"),toolbox_opts
=opts
.ToolboxOpts
(is_show
=True),xaxis_opts
=opts
.AxisOpts
(type_
="category", boundary_gap
=False),).render
("三國演義章回字數.html"))
colorNum
= len(list_name
[0:mainTop
])
def randomcolor():colorArr
= ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']color
= ""for i
in range(6):color
+= colorArr
[random
.randint
(0, 14)]return "#" + color
def color_list():colorList
= []for i
in range(colorNum
):colorList
.append
(randomcolor
())return colorList
plt
.rcParams
['font.sans-serif'] = ['SimHei']
def creat_relationship():colors
= color_list
()Names
= list_name
[0:mainTop
]relations
= {}lst_para
= (txt
).split
('\n') for text
in lst_para
:for name_0
in Names
:if name_0
in text
:for name_1
in Names
:if name_1
in text
and name_0
!= name_1
and (name_1
, name_0
) not in relations
:relations
[(name_0
, name_1
)] = relations
.get
((name_0
, name_1
), 0) + 1maxRela
= max([v
for k
, v
in relations
.items
()])relations
= {k
: v
/ maxRela
for k
, v
in relations
.items
()}plt
.figure
(figsize
=(15, 15))G
= nx
.Graph
()for k
, v
in relations
.items
():G
.add_edge
(k
[0], k
[1], weight
=v
)elarge
= [(u
, v
) for (u
, v
, d
) in G
.edges
(data
=True) if d
['weight'] > 0.6]emidle
= [(u
, v
) for (u
, v
, d
) in G
.edges
(data
=True) if (d
['weight'] > 0.3) & (d
['weight'] <= 0.6)]esmall
= [(u
, v
) for (u
, v
, d
) in G
.edges
(data
=True) if d
['weight'] <= 0.3]pos
= nx
.spring_layout
(G
) nx
.draw_networkx_nodes
(G
, pos
, alpha
=0.8, node_size
=1300, node_color
=colors
)nx
.draw_networkx_edges
(G
, pos
, edgelist
=elarge
, width
=2.5, alpha
=0.9, edge_color
='g')nx
.draw_networkx_edges
(G
, pos
, edgelist
=emidle
, width
=1.5, alpha
=0.6, edge_color
='y')nx
.draw_networkx_edges
(G
, pos
, edgelist
=esmall
, width
=1, alpha
=0.4, edge_color
='b', style
='dashed')nx
.draw_networkx_labels
(G
, pos
, font_size
=14)plt
.title
("《三國演義》主要人物社交關系網絡圖")plt
.axis
('off')plt
.savefig
('《三國演義》主要人物社交關系網絡圖.png', bbox_inches
='tight')plt
.show
()def main():creat_people_view
()creat_wordcloud
()creat_wordcloud_pyecharts
()creat_relationship
()chapter_word
()if __name__
== '__main__':main
()
總結
以上是生活随笔為你收集整理的python统计三国演义中人物出现的频次的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。