python去除停用词_python jieba分词如何去除停用词
展開全部
import jieba
# 創建停2113用詞52614102list
def stopwordslist(filepath):
stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
return stopwords
# 對句子進行分詞
def seg_sentence(sentence):
sentence_seged = jieba.cut(sentence.strip())
stopwords = stopwordslist('./test/stopwords.txt') # 這里加載停用詞的路徑
outstr = ''
for word in sentence_seged:
if word not in stopwords:
if word != '\t':
outstr += word
outstr += " "
return outstr
inputs = open('./test/input.txt', 'r', encoding='utf-8')
outputs = open('./test/output.txt', 'w')
for line in inputs:
line_seg = seg_sentence(line) # 這里的返回值是1653字符串
outputs.write(line_seg + '\n')
outputs.close()
inputs.close()
總結
以上是生活随笔為你收集整理的python去除停用词_python jieba分词如何去除停用词的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: ubuntu15.10下安装opencv
- 下一篇: ubuntu 15.10下cmake 的