# -*- coding: utf-8 -*-
"""
@author: Quoc-Tuan Truong <tuantq.vnu@gmail.com>
MovieLens: https://grouplens.org/datasets/movielens/
"""
from ..utils import validate_format
from ..utils import cache
from ..data import reader
VALID_DATA_FORMATS = ['UIR', 'UIRT']
[docs]def load_100k(fmt='UIR'):
"""Load the MovieLens 100K dataset
Parameters
----------
fmt: str, default: 'UIR'
Data format to be returned.
Returns
-------
data: array-like
Data in the form of a list of tuples depending on the given data format.
"""
fmt = validate_format(fmt, VALID_DATA_FORMATS)
fpath = cache(url='http://files.grouplens.org/datasets/movielens/ml-100k/u.data',
relative_path='ml-100k/u.data')
if fmt == 'UIR':
return reader.read_uir(fpath)
[docs]def load_1m(fmt='UIR'):
"""Load the MovieLens 1M dataset
Parameters
----------
fmt: str, default: 'UIR'
Data format to be returned.
Returns
-------
data: array-like
Data in the form of a list of tuples depending on the given data format.
"""
fmt = validate_format(fmt, VALID_DATA_FORMATS)
fpath = cache(url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
relative_path='ml-1m/ratings.dat', unzip=True)
if fmt == 'UIR':
return reader.read_uir(fpath, sep='::')
[docs]def load_plot():
"""Load the plots of movies provided @ http://dm.postech.ac.kr/~cartopy/ConvMF/
Returns
-------
movie_plots: Dict
A dictionary with keys are movie ids and values are text plots.
"""
movie_plots = {}
fpath = cache(url='https://static.preferred.ai/cornac/datasets/movielens/ml_plot.zip',
unzip=True, relative_path='movielens/ml_plot.dat')
with open(fpath, 'r') as f:
for line in f:
movie_id, plot = line.strip().split('::')
movie_plots[movie_id] = plot
return movie_plots