协同过滤算法简单实现
生活随笔
收集整理的這篇文章主要介紹了
协同过滤算法简单实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
以下的代碼主要是來自《推薦系統實踐》第二章節,修改了一些書上的錯誤,簡單的實現了基于用戶的協同過濾算法和基于物品的協同過濾算法,可供參考:?
import math import random from collections import defaultdict from operator import itemgetterdef user_similarity(train):"""基于用戶的協同過濾算法UserCF:param train: 訓練集:return: 用戶相似度矩陣"""# build inverse table for item_usersitem_users = dict()for u, items in train.items():for i in items.keys():if i not in item_users:item_users[i] = set()item_users[i].add(u)# calculate co-rated items between usersc = dict()n = defaultdict(int)for i, users in item_users.items():for u in users:n[u] += 1for v in users:if u == v:continuec.setdefault(u, defaultdict(int))c[u][v] += 1# calculate finial similarity matrix ww = dict()for u, related_users in c.items():for v, cuv in related_users.items():w.setdefault(u, defaultdict(int))w[u][v] = cuv / math.sqrt(n[u] * n[v])return wdef user_similarity2(train):"""基于用戶的協同過濾算法UserCF-IIF,添加熱門物品懲罰因子:param train: 訓練集:return: 用戶相似度矩陣"""# build inverse table for item_usersitem_users = dict()for u, items in train.items():for i in items.keys():if i not in item_users:item_users[i] = set()item_users[i].add(u)# calculate co-rated items between usersc = dict()n = defaultdict(int)for i, users in item_users.items():for u in users:n[u] += 1for v in users:if u == v:continuec.setdefault(u, defaultdict(int))# 添加熱門物品懲罰因子c[u][v] += 1 / math.log(1 + len(users))# calculate finial similarity matrix ww = dict()for u, related_users in c.items():for v, cuv in related_users.items():w.setdefault(u, defaultdict(int))w[u][v] = cuv / math.sqrt(n[u] * n[v])return wdef item_similarity(train):"""基于物品的協同過濾算法ItemCF:param train: 訓練集:return: 物品相似度矩陣"""# calculate co-rated users between itemsc = dict()n = defaultdict(int)for users, items in train.items():for i in items:n[i] += 1c.setdefault(i, dict())for j in items:if i == j:continuec[i].setdefault(j, 0)c[i][j] += 1# calculate finial similarity matrix ww = dict()for i, related_items in c.items():for j, cij in related_items.items():w.setdefault(i, defaultdict(float))w[i][j] = cij / math.sqrt(n[i] * n[j])return wdef item_similarity2(train):"""基于物品的協同過濾算法ItemCF-IUF,添加對活躍性用戶的懲罰因子:param train: 訓練集:return: 物品相似度矩陣"""# calculate co-rated users between itemsc = dict()n = defaultdict(int)for users, items in train.items():for i in items:n[i] += 1c.setdefault(i, dict())for j in items:if i == j:continuec[i].setdefault(j, 0)# ItemCF-IUF 添加對活躍性用戶的懲罰因子c[i][j] += 1 / math.log(1 + len(items) * 1.0)# calculate finial similarity matrix ww = dict()for i, related_items in c.items():for j, cij in related_items.items():w.setdefault(i, defaultdict(float))w[i][j] = cij / math.sqrt(n[i] * n[j])return wdef item_similarity3(train):"""基于物品的協同過濾算法ItemCF-IUF,添加對活躍性用戶的懲罰因子以及對相似矩陣的歸一化處理:param train: 訓練集:return: 物品相似度矩陣"""# calculate co-rated users between itemsc = dict()n = defaultdict(int)for users, items in train.items():for i in items:n[i] += 1c.setdefault(i, dict())for j in items:if i == j:continuec[i].setdefault(j, 0)# ItemCF-IUF 添加對活躍性用戶的懲罰因子c[i][j] += 1 / math.log(1 + len(items) * 1.0)# calculate finial similarity matrix ww = dict()for i, related_items in c.items():for j, cij in related_items.items():w.setdefault(i, defaultdict(float))w[i][j] = cij / math.sqrt(n[i] * n[j])# 添加對相似矩陣的歸一化處理for item in w:max_value = max(w[item].values())for items_related in w[item]:w[item][items_related] /= max_valuereturn wdef recommend_by_item(train, user_id, w, k):rank = defaultdict(float)ru = train[user_id]for i, pi in ru.items():for j, wj in sorted(w[i].items(), key=itemgetter(1), reverse=True)[0:k]:if j in ru:continuerank[j] += pi * wjreturn rankdef recommend_by_user(user, train, w, k):rank = defaultdict(float)interacted_items = train[user]for v, wuv in sorted(w[user].items(), key=itemgetter(1), reverse=True)[0:k]:for i, rvi in train[v].items():if i in interacted_items:# we should filter items user interacted beforecontinuerank[i] += wuv * rvireturn rankif __name__ == '__main__':train = {'A': {'a': 1, 'b': 1, 'd': 1}, 'B': {'a': 1, 'c': 1},'C': {'b': 1, 'e': 1}, 'D': {'c': 1, 'd': 1, 'e': 1}}rank = recommend_by_user('A', train, user_similarity(train), 3)print('UserCF:', dict(rank))rank2 = recommend_by_user('A', train, user_similarity2(train), 3)print('UserCF-IIF:', dict(rank2))train2 = {'A': {'a': 1, 'b': 1, 'd': 1}, 'B': {'b': 1, 'c': 1, 'e': 1},'C': {'c': 1, 'd': 1}, 'D': {'b': 1, 'c': 1, 'd': 1},'E': {'a': 1, 'd': 1}}rank3 = recommend_by_item(train2, 'A', item_similarity(train2), 5)print('ItemCF:', dict(rank3))rank4 = recommend_by_item(train2, 'A', item_similarity2(train2), 5)print('ItemCF-IUF:', dict(rank4))rank5 = recommend_by_item(train2, 'A', item_similarity3(train2), 5)print('ItemCF-IUF+Normalization:', dict(rank5))?
轉載于:https://www.cnblogs.com/goingforward/p/10191937.html
總結
以上是生活随笔為你收集整理的协同过滤算法简单实现的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: [Leetcode] 第306题 累加数
- 下一篇: 第五天总结 运算符 职业化 运算符优先