Source code for cornac.eval_methods.base_method

# -*- coding: utf-8 -*-

"""
@author: Quoc-Tuan Truong <tuantq.vnu@gmail.com>
"""

from ..data import TextModule
from ..data import ImageModule
from ..data import GraphModule
from ..data import MultimodalTrainSet
from ..data import MultimodalTestSet
from ..utils.common import validate_format
from ..metrics.rating import RatingMetric
from ..metrics.ranking import RankingMetric
from ..experiment.result import Result
from collections import OrderedDict, defaultdict
import numpy as np
import tqdm
import time

VALID_DATA_FORMATS = ['UIR', 'UIRT']


[docs]class BaseMethod: """Base Evaluation Method Parameters ---------- data: array-like The original data. data_format: str, default: 'UIR' The format of given data. total_users: int, optional, default: None Total number of unique users in the data including train, val, and test sets. total_users: int, optional, default: None Total number of unique items in the data including train, val, and test sets. rating_threshold: float, optional, default: 1.0 The threshold to convert ratings into positive or negative feedback for ranking metrics. exclude_unknowns: bool, optional, default: False Ignore unknown users and items (cold-start) during evaluation. verbose: bool, optional, default: False Output running log """ def __init__(self, data=None, fmt='UIR', rating_threshold=1.0, exclude_unknowns=False, verbose=False, **kwargs): self._data = data self.data_format = validate_format(fmt, VALID_DATA_FORMATS) self.train_set = None self.test_set = None self.val_set = None self.rating_threshold = rating_threshold self.exclude_unknowns = exclude_unknowns self.verbose = verbose self.global_uid_map = OrderedDict() self.global_iid_map = OrderedDict() self.user_text = kwargs.get('user_text', None) self.user_image = kwargs.get('user_image', None) self.user_graph = kwargs.get('user_graph', None) self.item_text = kwargs.get('item_text', None) self.item_image = kwargs.get('item_image', None) self.item_graph = kwargs.get('item_graph', None) if verbose: print('rating_threshold = {:.1f}'.format(rating_threshold)) print('exclude_unknowns = {}'.format(exclude_unknowns)) @property def total_users(self): return len(self.global_uid_map) @property def total_items(self): return len(self.global_iid_map) @property def user_text(self): return self.__user_text @user_text.setter def user_text(self, input_module): if input_module is not None and not isinstance(input_module, TextModule): raise ValueError('input_module has to be instance of TextModule but {}'.format(type(input_module))) self.__user_text = input_module @property def user_image(self): return self.__user_image @user_image.setter def user_image(self, input_module): if input_module is not None and not isinstance(input_module, ImageModule): raise ValueError('input_module has to be instance of ImageModule but {}'.format(type(input_module))) self.__user_image = input_module @property def user_graph(self): return self.__user_graph @user_graph.setter def user_graph(self, input_module): if input_module is not None and not isinstance(input_module, GraphModule): raise ValueError('input_module has to be instance of GraphModule but {}'.format(type(input_module))) self.__user_graph = input_module @property def item_text(self): return self.__item_text @item_text.setter def item_text(self, input_module): if input_module is not None and not isinstance(input_module, TextModule): raise ValueError('input_module has to be instance of TextModule but {}'.format(type(input_module))) self.__item_text = input_module @property def item_image(self): return self.__item_image @item_image.setter def item_image(self, input_module): if input_module is not None and not isinstance(input_module, ImageModule): raise ValueError('input_module has to be instance of ImageModule but {}'.format(type(input_module))) self.__item_image = input_module @property def item_graph(self): return self.__item_graph @item_graph.setter def item_graph(self, input_module): if input_module is not None and not isinstance(input_module, GraphModule): raise ValueError('input_module has to be instance of GraphModule but {}'.format(type(input_module))) self.__item_graph = input_module def _organize_metrics(self, metrics): """Organize metrics according to their types (rating or raking) Parameters ---------- metrics: :obj:`iterable` List of metrics. """ if isinstance(metrics, dict): rating_metrics = metrics.get('rating', []) ranking_metrics = metrics.get('ranking', []) elif isinstance(metrics, list): rating_metrics = [mt for mt in metrics if isinstance(mt, RatingMetric)] ranking_metrics = [mt for mt in metrics if isinstance(mt, RankingMetric)] else: raise ValueError('Type of metrics has to be either dict or list!') return rating_metrics, ranking_metrics def _build_uir(self, train_data, test_data, val_data=None): if train_data is None: raise ValueError('train_data is required but None!') if test_data is None: raise ValueError('test_data is required but None!') global_ui_set = set() # avoid duplicate ratings in the data if self.verbose: print('Building training set') self.train_set = MultimodalTrainSet.from_uir( train_data, self.global_uid_map, self.global_iid_map, global_ui_set, self.verbose) if self.verbose: print('Building test set') self.test_set = MultimodalTestSet.from_uir( test_data, self.global_uid_map, self.global_iid_map, global_ui_set, self.verbose) if val_data is not None: if self.verbose: print('Building validation set') self.val_set = MultimodalTestSet.from_uir( val_data, self.global_uid_map, self.global_iid_map, global_ui_set, self.verbose) def _build_modules(self): for user_module in [self.user_text, self.user_image, self.user_graph]: if user_module is None: continue user_module.build(id_map=self.global_uid_map) for item_module in [self.item_text, self.item_image, self.item_graph]: if item_module is None: continue item_module.build(id_map=self.global_iid_map) for data_set in [self.train_set, self.test_set, self.val_set]: if data_set is None: continue data_set.add_modules(user_text=self.user_text, user_image=self.user_image, user_graph=self.user_graph, item_text=self.item_text, item_image=self.item_image, item_graph=self.item_graph) def build(self, train_data, test_data, val_data=None): self.global_uid_map.clear() self.global_iid_map.clear() if self.data_format == 'UIR': self._build_uir(train_data, test_data, val_data) self._build_modules()
[docs] def evaluate(self, model, metrics, user_based): """Evaluate given models according to given metrics Parameters ---------- model: :obj:`cornac.models.Recommender` Recommender model to be evaluated. metrics: :obj:`iterable` List of metrics. user_based: bool Evaluation mode. Whether results are averaging based on number of users or number of ratings. """ rating_metrics, ranking_metrics = self._organize_metrics(metrics) if self.train_set is None: raise ValueError('train_set is required but None!') if self.test_set is None: raise ValueError('test_set is required but None!') print('\n[{}] Training started!'.format(model.name)) start = time.time() model.fit(self.train_set) train_time = time.time() - start print('\n[{}] Evaluation started!'.format(model.name)) start = time.time() all_pd_ratings = [] all_gt_ratings = [] metric_user_results = defaultdict() for mt in (rating_metrics + ranking_metrics): metric_user_results[mt.name] = {} for user_id in tqdm.tqdm(self.test_set.users): # ignore unknown users when self.exclude_unknown if self.exclude_unknowns and self.train_set.is_unk_user(user_id): continue u_pd_ratings = [] u_gt_ratings = [] if self.exclude_unknowns: u_gt_pos = np.zeros(self.train_set.num_items, dtype=np.int) item_ids = None # all known items else: u_gt_pos = np.zeros(self.total_items, dtype=np.int) item_ids = np.arange(self.total_items) u_gt_neg = np.ones_like(u_gt_pos, dtype=np.int) if not self.train_set.is_unk_user(user_id): u_train_ratings = self.train_set.matrix[user_id].A.ravel() u_train_neg = np.where(u_train_ratings < self.rating_threshold, 1, 0) u_gt_neg[:len(u_train_neg)] = u_train_neg for item_id, rating in self.test_set.get_ratings(user_id): # ignore unknown items when self.exclude_unknown if self.exclude_unknowns and self.train_set.is_unk_item(item_id): continue if len(rating_metrics) > 0: all_gt_ratings.append(rating) u_gt_ratings.append(rating) rating_pred = model.rate(user_id, item_id) all_pd_ratings.append(rating_pred) u_pd_ratings.append(rating_pred) # constructing ranking ground-truth if rating >= self.rating_threshold: u_gt_pos[item_id] = 1 u_gt_neg[item_id] = 0 # per user evaluation of rating metrics if len(rating_metrics) > 0 and len(u_gt_ratings) > 0: for mt in rating_metrics: mt_score = mt.compute(gt_ratings=np.asarray(u_gt_ratings), pd_ratings=np.asarray(u_pd_ratings)) metric_user_results[mt.name][user_id] = mt_score # evaluation of ranking metrics if len(ranking_metrics) > 0 and u_gt_pos.sum() > 0: item_rank, item_scores = model.rank(user_id, item_ids) for mt in ranking_metrics: mt_score = mt.compute(gt_pos=u_gt_pos, gt_neg=u_gt_neg, pd_rank=item_rank, pd_scores=item_scores) metric_user_results[mt.name][user_id] = mt_score metric_avg_results = OrderedDict() # avg results of rating metrics for mt in rating_metrics: if user_based: # averaging over users user_results = list(metric_user_results[mt.name].values()) metric_avg_results[mt.name] = np.mean(user_results) else: # averaging over ratings metric_avg_results[mt.name] = mt.compute(gt_ratings=np.asarray(all_gt_ratings), pd_ratings=np.asarray(all_pd_ratings)) # avg results of ranking metrics for mt in ranking_metrics: user_results = list(metric_user_results[mt.name].values()) metric_avg_results[mt.name] = np.mean(user_results) test_time = time.time() - start metric_avg_results['Train (s)'] = train_time metric_avg_results['Test (s)'] = test_time return Result(model.name, metric_avg_results, metric_user_results)
[docs] @classmethod def from_splits(cls, train_data, test_data, val_data=None, data_format='UIR', rating_threshold=1.0, exclude_unknowns=False, verbose=False): """Constructing evaluation method given data. Parameters ---------- train_data: array-like Training data test_data: array-like Test data val_data: array-like Validation data data_format: str, default: 'UIR' The format of given data. rating_threshold: float, default: 1.0 Threshold to decide positive or negative preferences. exclude_unknowns: bool, default: False Whether to exclude unknown users/items in evaluation. verbose: bool, default: False The verbosity flag. Returns ------- method: :obj:`<cornac.eval_methods.BaseMethod>` Evaluation method object. """ method = cls(data_format=data_format, rating_threshold=rating_threshold, exclude_unknowns=exclude_unknowns, verbose=verbose) method.build(train_data=train_data, test_data=test_data, val_data=val_data) return method