Source code for cornac.models.pmf.recom_pmf

# -*- coding: utf-8 -*-

"""
@author: Aghiles Salah
"""

import numpy as np
import scipy.sparse as sp
from cornac.models.pmf import pmf
from ..recommender import Recommender
from ...utils.common import sigmoid
from ...utils.common import scale
from ...exception import ScoreException


[docs]class PMF(Recommender): """Probabilistic Matrix Factorization. Parameters ---------- k: int, optional, default: 5 The dimension of the latent factors. max_iter: int, optional, default: 100 Maximum number of iterations or the number of epochs for SGD. learning_rate: float, optional, default: 0.001 The learning rate for SGD_RMSProp. gamma: float, optional, default: 0.9 The weight for previous/current gradient in RMSProp. lamda: float, optional, default: 0.001 The regularization parameter. name: string, optional, default: 'PMF' The name of the recommender model. variant: {"linear","non_linear"}, optional, default: 'non_linear' Pmf variant. If 'non_linear', the Gaussian mean is the output of a Sigmoid function.\ If 'linear' the Gaussian mean is the output of the identity function. trainable: boolean, optional, default: True When False, the model is not trained and Cornac assumes that the model already \ pre-trained (U and V are not None). verbose: boolean, optional, default: False When True, some running logs are displayed. init_params: dictionary, optional, default: {'U':None,'V':None} List of initial parameters, e.g., init_params = {'U':U, 'V':V}. \ U: a csc_matrix of shape (n_users,k), containing the user latent factors. \ V: a csc_matrix of shape (n_items,k), containing the item latent factors. References ---------- * Mnih, Andriy, and Ruslan R. Salakhutdinov. Probabilistic matrix factorization. \ In NIPS, pp. 1257-1264. 2008. """ def __init__(self, k=5, max_iter=100, learning_rate=0.001, gamma=0.9, lamda=0.001, name="PMF", variant='non_linear', trainable=True, verbose=False, init_params={'U': None, 'V': None}): Recommender.__init__(self, name=name, trainable=trainable, verbose=verbose) self.k = k self.init_params = init_params self.max_iter = max_iter self.learning_rate = learning_rate self.gamma = gamma self.lamda = lamda self.variant = variant self.ll = np.full(max_iter, 0) self.eps = 0.000000001 self.U = init_params['U'] # matrix of user factors self.V = init_params['V'] # matrix of item factors # fit the recommender model to the traning data
[docs] def fit(self, train_set): """Fit the model to observations. Parameters ---------- train_set: object of type TrainSet, required An object contraining the user-item preference in csr scipy sparse format,\ as well as some useful attributes such as mappings to the original user/item ids.\ Please refer to the class TrainSet in the "data" module for details. """ Recommender.fit(self, train_set) #X = self.train_set.matrix if self.trainable: # converting data to the triplet format (needed for cython function pmf) (uid, iid, rat) = train_set.uir_tuple rat = np.array(rat, dtype='float32') if self.variant == 'non_linear': # need to map the ratings to [0,1] if [self.train_set.min_rating, self.train_set.max_rating] != [0, 1]: if self.train_set.min_rating == self.train_set.max_rating: rat = scale(rat, 0., 1., 0., self.train_set.max_rating) else: rat = scale(rat, 0., 1., self.train_set.min_rating, self.train_set.max_rating) uid = np.array(uid, dtype='int32') iid = np.array(iid, dtype='int32') #tX = np.concatenate((np.concatenate(([rid], [cid]), axis=0).T, val.reshape((len(val), 1))), axis=1) #del rid, cid, val if self.verbose: print('Learning...') if self.variant == 'linear': res = pmf.pmf_linear(uid, iid, rat, k=self.k, n_users=train_set.num_users, n_items=train_set.num_items, n_ratings = len(rat), n_epochs=self.max_iter, lamda=self.lamda, learning_rate=self.learning_rate, gamma=self.gamma, init_params=self.init_params) elif self.variant == 'non_linear': res = pmf.pmf_non_linear(uid, iid, rat, k=self.k, n_users=train_set.num_users, n_items=train_set.num_items, n_ratings = len(rat), n_epochs=self.max_iter, lamda=self.lamda, learning_rate=self.learning_rate, gamma=self.gamma, init_params=self.init_params) else: raise ValueError('variant must be one of {"linear","non_linear"}') self.U = np.asarray(res['U']) self.V = np.asarray(res['V']) if self.verbose: print('Learning completed') elif self.verbose: print('%s is trained already (trainable = False)' % (self.name))
[docs] def score(self, user_id, item_id=None): """Predict the scores/ratings of a user for an item. Parameters ---------- user_id: int, required The index of the user for whom to perform score prediction. item_id: int, optional, default: None The index of the item for that to perform score prediction. If None, scores for all known items will be returned. Returns ------- res : A scalar or a Numpy array Relative scores that the user gives to the item or to all known items """ if item_id is None: if self.train_set.is_unk_user(user_id): raise ScoreException("Can't make score prediction for (user_id=%d)" % user_id) known_item_scores = self.V.dot(self.U[user_id, :]) return known_item_scores else: if self.train_set.is_unk_user(user_id) or self.train_set.is_unk_item(item_id): raise ScoreException("Can't make score prediction for (user_id=%d, item_id=%d)" % (user_id, item_id)) user_pred = self.V[item_id, :].dot(self.U[user_id, :]) if self.variant == "non_linear": user_pred = sigmoid(user_pred) if self.train_set.min_rating == self.train_set.max_rating: user_pred = scale(user_pred, 0., self.train_set.max_rating, 0., 1.) else: user_pred = scale(user_pred, self.train_set.min_rating, self.train_set.max_rating, 0., 1.) return user_pred