import stringdef read_file(file):"""接收文件名為參數,將文件中的內容讀為字符串,只保留文件中的英文字母和西文符號,過濾掉中文所有字符轉為小寫,將其中所有標點、符號替換為空格,返回字符串"""########## Begin ##########with open (file) as f :txt = f.read().lower()for i in ',."-':txt = txt.replace(i,' ')return txt########## End ##########if __name__ == '__main__':filename = 'Who Moved My Cheese.txt' # 文件名content = read_file(filename) # 調用函數返回字典類型的數據n = int(input())print(content[:n])
第2關?統計單詞數量
本關任務:編寫一個能計算單詞數量的小程序。
import stringdef count_of_words(txt):"""接收去除標點、符號的字符串,統計并返回其中單詞數量和不重復的單詞數量"""########## Begin ##########txt = txt.split()counts = {}for i in txt:counts[i] = counts.get(i,0) + 1return len(txt),len(counts)########## End ##########def read_file(file):"""接收文件名為參數,將文件中的內容讀為字符串,只保留文件中的英文字母和西文符號,過濾掉中文所有字符轉為小寫,將其中所有標點、符號替換為空格,返回字符串"""with open(file, 'r', encoding='utf-8') as novel:txt = novel.read()english_only_txt = ''.join(x for x in txt if ord(x) < 256)english_only_txt = english_only_txt.lower()for character in string.punctuation:english_only_txt = english_only_txt.replace(character, ' ')return english_only_txtif __name__ == '__main__':filename = 'Who Moved My Cheese.txt' # 文件名content = read_file(filename) # 調用函數返回字典類型的數據amount_results = count_of_words(content)print('文章共有單詞{}個,其中不重復單詞{}個'.format(*amount_results))
第3關?統計單詞出現的次數
預期輸出:
the 369
he 337
to 333
and 312
cheese 214
it 187
they 166
of 158
a 146
had 142
import stringdef word_frequency(txt):"""接收去除標點、符號的字符串,統計并返回每個單詞出現的次數返回值為字典類型,單詞為鍵,對應出現的次數為值"""########## Begin ##########txt = txt.split()counts = {}for i in txt:counts[i] = counts.get(i,0) + 1return counts########## End ##########def top_ten_words(frequency, cnt):"""接收詞頻字典,輸出出現次數最多的cnt個單詞及其出現次數"""########## Begin ##########dic = sorted(frequency.items(),key = lambda x: x[1], reverse = True)for i in dic[0:cnt]:print(*i)########## End ##########def read_file(file):"""接收文件名為參數,將文件中的內容讀為字符串,只保留文件中的英文字母和西文符號,過濾掉中文所有字符轉為小寫,將其中所有標點、符號替換為空格,返回字符串"""with open(file, 'r', encoding='utf-8') as novel:txt = novel.read()english_only_txt = ''.join(x for x in txt if ord(x) < 256)english_only_txt = english_only_txt.lower()for character in string.punctuation:english_only_txt = english_only_txt.replace(character, ' ')return english_only_txtif __name__ == '__main__':filename = 'Who Moved My Cheese.txt' # 文件名content = read_file(filename) # 調用函數返回字典類型的數據frequency_result = word_frequency(content) # 統計詞頻n = int(input())top_ten_words(frequency_result, n)