Simple example of building recommendations with RecTools

  • Building simple model

  • Visual recommendations checking

[ ]:
import numpy as np
import pandas as pd

from implicit.nearest_neighbours import TFIDFRecommender

from rectools import Columns
from rectools.dataset import Dataset
from rectools.models import ImplicitItemKNNWrapperModel

Load data

[2]:
%%time
!wget -q https://files.grouplens.org/datasets/movielens/ml-1m.zip -O ml-1m.zip
!unzip -o ml-1m.zip
!rm ml-1m.zip
Archive:  ml-1m.zip
  inflating: ml-1m/movies.dat
  inflating: ml-1m/ratings.dat
  inflating: ml-1m/README
  inflating: ml-1m/users.dat
CPU times: user 134 ms, sys: 415 ms, total: 548 ms
Wall time: 4.39 s
[2]:
%%time
ratings = pd.read_csv(
    "ml-1m/ratings.dat",
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],
)
print(ratings.shape)
ratings.head()
(1000209, 4)
CPU times: user 5.76 s, sys: 409 ms, total: 6.17 s
Wall time: 6.16 s
[2]:
user_id item_id weight datetime
0 1 1193 5 978300760
1 1 661 3 978302109
2 1 914 3 978301968
3 1 3408 4 978300275
4 1 2355 5 978824291
[3]:
%%time
movies = pd.read_csv(
    "ml-1m/movies.dat",
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.Item, "title", "genres"],
    encoding_errors="ignore",
)
print(movies.shape)
movies.head()
(3883, 3)
CPU times: user 9.55 ms, sys: 1.62 ms, total: 11.2 ms
Wall time: 10.4 ms
[3]:
item_id title genres
0 1 Toy Story (1995) Animation|Children's|Comedy
1 2 Jumanji (1995) Adventure|Children's|Fantasy
2 3 Grumpier Old Men (1995) Comedy|Romance
3 4 Waiting to Exhale (1995) Comedy|Drama
4 5 Father of the Bride Part II (1995) Comedy

Build model

[4]:
# Prepare a dataset to build a model
dataset = Dataset.construct(ratings)
[5]:
%%time
# Fit model and generate recommendations for all users
model = ImplicitItemKNNWrapperModel(TFIDFRecommender(K=10))
model.fit(dataset)
recos = model.recommend(
    users=ratings[Columns.User].unique(),
    dataset=dataset,
    k=10,
    filter_viewed=True,
)
CPU times: user 6.05 s, sys: 274 ms, total: 6.32 s
Wall time: 1.42 s
[6]:
# Sample of recommendations - it's sorted by relevance (= rank) for each user
recos.head()
[6]:
user_id item_id score rank
0 1 364 20.436578 1
1 1 1196 15.716834 2
2 1 318 15.625371 3
3 1 2096 14.876911 4
4 1 2571 12.718620 5

Check recommendations

[7]:
# Select random user, see history of views and reco for this user
user_id = 3883
user_viewed = ratings.query("user_id == @user_id").merge(movies, on="item_id")
user_recos = recos.query("user_id == @user_id").merge(movies, on="item_id")
[8]:
# History, but only films that user likes
user_viewed.query("weight > 3")
[8]:
user_id item_id weight datetime title genres
0 3883 2997 5 967134212 Being John Malkovich (1999) Comedy
2 3883 1265 5 967134285 Groundhog Day (1993) Comedy|Romance
4 3883 2858 5 965822230 American Beauty (1999) Comedy|Drama
10 3883 2369 4 965822136 Desperately Seeking Susan (1985) Comedy|Romance
14 3883 3189 4 965822296 My Dog Skip (1999) Comedy
16 3883 1784 4 965822136 As Good As It Gets (1997) Comedy|Drama
17 3883 2599 4 967134250 Election (1999) Comedy
18 3883 34 4 967134285 Babe (1995) Children's|Comedy|Drama
[9]:
# Recommendations
user_recos.sort_values("rank")
[9]:
user_id item_id score rank title genres
0 3883 2396 13.991358 1 Shakespeare in Love (1998) Comedy|Romance
1 3883 2762 10.249648 2 Sixth Sense, The (1999) Thriller
2 3883 318 7.728188 3 Shawshank Redemption, The (1994) Drama
3 3883 608 7.617913 4 Fargo (1996) Crime|Drama|Thriller
4 3883 356 5.674010 5 Forrest Gump (1994) Comedy|Romance|War
5 3883 2395 5.508895 6 Rushmore (1998) Comedy
6 3883 223 5.398012 7 Clerks (1994) Comedy
7 3883 593 5.335058 8 Silence of the Lambs, The (1991) Drama|Thriller
8 3883 296 4.828189 9 Pulp Fiction (1994) Crime|Drama
9 3883 2959 4.615653 10 Fight Club (1999) Drama

Here is the simple example, we only used ratings to train the model and we only prepared recommendations for users who have rated movies before. But some models allow you to use explicit features, e.g. user age or item genre. And some models allow you to generate recommendations for users that have not rated any movies before. See documentation for the details.

[ ]: