Source code for cornac.data.graph

# -*- coding: utf-8 -*-

"""
@author: Aghiles Salah <asalah@smu.edu.sg>
"""

from . import FeatureModule
import scipy.sparse as sp
import numpy as np


[docs]class GraphModule(FeatureModule): """Graph module """ def __init__(self, **kwargs): super().__init__(**kwargs) self.raw_data = kwargs.get('data', None) self.matrix = None self.map_data = [] def _build_triplet(self, global_id_map): """Build adjacency matrix in sparse triplet format using mapped ids """ for i, j, val in self.raw_data: self.map_data.append([global_id_map[i], global_id_map[j], val]) self.map_data = np.asanyarray(self.map_data) #self.raw_data = None def _build_sparse_matrix(self, triplet): """Build sparse adjacency matrix """ n_rows = max(triplet[:, 0]) + 1 n_cols = max(triplet[:, 1]) + 1 # TODO: csr_matrix is more efficient for row slicing in batch function self.matrix = sp.csc_matrix((triplet[:, 2], (triplet[:, 0], triplet[:, 1])), shape=(n_rows, n_cols))
[docs] def get_train_triplet(self, train_row_ids, train_col_ids): """Get the training tuples """ train_triplet = [] # this makes operations much more efficient train_row_ids = np.asanyarray(list(train_row_ids)) train_col_ids = np.asanyarray(list(train_col_ids)) for i, j, val in self.map_data: if (i not in train_row_ids) or (j not in train_col_ids): continue train_triplet.append([i, j, val]) return np.asarray(train_triplet)
# TODO: id_map can be None to support GraphModule as an independent component
[docs] def build(self, id_map=None): self._build_triplet(id_map) self._build_sparse_matrix(self.map_data)
# TODO: add feature_fallback decorator and rename the API more meaningful
[docs] def batch(self, batch_ids): """Return batch of vectors from the sparse adjacency matrix corresponding to provided batch_ids. Parameters ---------- batch_ids: array, required An array contains the ids of rows to be returned from the sparse adjacency matrix. """ return self.matrix[batch_ids]