23 Jan 2017
我们拥有一个列表,需要统计元素的频度
# 创建一个列表 >>> from random import randint >>> data = [rand>>> >>> data [3, 4, 13, 20, 14, 0, 11, 17, 13, 18, 8, 18, 12, 12, 17, 10, 13, 9, 7, 0, 18, 15, 15, 17, 0, 2, 2, 5, 15, 8]
# 使用列表中的元素作为key,创造一个value全部为0的字典 >>> r = dict.fromkeys(data, 0) >>> r {0: 0, 2: 0, 3: 0, 4: 0, 5: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0, 17: 0, 18: 0, 20: 0} >>> for x in data: ... r[x] += 1 ... >>> r {0: 3, 2: 2, 3: 1, 4: 1, 5: 1, 7: 1, 8: 2, 9: 1, 10: 1, 11: 1, 12: 2, 13: 3, 14: 1, 15: 3, 17: 3, 18: 3, 20: 1} # 获取频度前八的值 >>> sorted(r.items(), key=lambda x:x[1])[-8:] [(2, 2), (8, 2), (12, 2), (0, 3), (13, 3), (15, 3), (17, 3), (18, 3)]
# 导入Counter >>> from collections import Counter >>> r2 = Counter(data) # 结果r2是一个counter对象 >>> r2 Counter({0: 3, 13: 3, 15: 3, 17: 3, 18: 3, 2: 2, 8: 2, 12: 2, 3: 1, 4: 1, 5: 1, 7: 1, 9: 1, 10: 1, 11: 1, 14: 1, 20: 1}) >>> type(r2) <class 'collections.Counter'> # 但是r2可以跟dict一样使用 >>> r2[0] 3 # 查询频度前八的值 >>> r2.most_common(8) [(0, 3), (13, 3), (15, 3), (17, 3), (18, 3), (2, 2), (8, 2), (12, 2)]
# 读取一个文件 txt = open('/root/install.log').read() # 使用re使用正则来对t文档进行分割 >>> import re # 使用非字母的字符分割 >>> r3 = re.split('\W+', txt) >>> r3 = Counter(r3) >>> r3.most_common(8) [('Installing', 229), ('x86_64', 209), ('el6', 190), ('1', 149), ('2', 101), ('4', 75), ('0', 74), ('7', 52)]