Simple example of building recommendations with RecTools

  • Building simple model

  • Visual recommendations checking

[2]:
import numpy as np
import pandas as pd

from implicit.nearest_neighbours import TFIDFRecommender

from rectools import Columns
from rectools.dataset import Dataset
from rectools.models import ImplicitItemKNNWrapperModel

Load data

[3]:
%%time
!wget https://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip ml-1m.zip
--2022-07-28 11:00:39--  https://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5,6M) [application/zip]
Saving to: ‘ml-1m.zip.1’

ml-1m.zip.1         100%[===================>]   5,64M  4,04MB/s    in 1,4s

2022-07-28 11:00:41 (4,04 MB/s) - ‘ml-1m.zip.1’ saved [5917549/5917549]

Archive:  ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat
  inflating: ml-1m/ratings.dat
  inflating: ml-1m/README
  inflating: ml-1m/users.dat
CPU times: user 43.8 ms, sys: 28.6 ms, total: 72.4 ms
Wall time: 2.38 s
[4]:
%%time
ratings = pd.read_csv(
    "ml-1m/ratings.dat",
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],
)
print(ratings.shape)
ratings.head()
(1000209, 4)
CPU times: user 4 s, sys: 173 ms, total: 4.17 s
Wall time: 4.18 s
[4]:
user_id item_id weight datetime
0 1 1193 5 978300760
1 1 661 3 978302109
2 1 914 3 978301968
3 1 3408 4 978300275
4 1 2355 5 978824291
[5]:
%%time
movies = pd.read_csv(
    "ml-1m/movies.dat",
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.Item, "title", "genres"],
)
print(movies.shape)
movies.head()
(3883, 3)
CPU times: user 13.3 ms, sys: 1.91 ms, total: 15.2 ms
Wall time: 14 ms
[5]:
item_id title genres
0 1 Toy Story (1995) Animation|Children's|Comedy
1 2 Jumanji (1995) Adventure|Children's|Fantasy
2 3 Grumpier Old Men (1995) Comedy|Romance
3 4 Waiting to Exhale (1995) Comedy|Drama
4 5 Father of the Bride Part II (1995) Comedy

Build model

[6]:
# Prepare dataset to build and use model
dataset = Dataset.construct(ratings)
[7]:
%%time
# Fit model and generate recommendations for all users
model = ImplicitItemKNNWrapperModel(TFIDFRecommender(K=10))
model.fit(dataset)
recos = model.recommend(
    users=ratings[Columns.User].unique(),
    dataset=dataset,
    k=10,
    filter_viewed=True,
)
CPU times: user 2.35 s, sys: 55.5 ms, total: 2.41 s
Wall time: 2.42 s
[8]:
# Sample of recommendations - it's sorted by relevance (= rank) for each user
recos.head()
[8]:
user_id item_id score rank
0 1 364 20.436578 1
1 1 1196 15.716834 2
2 1 318 15.625371 3
3 1 2096 14.876911 4
4 1 2571 12.718620 5

Check recommendations

[9]:
# Select random user, see history of views and reco for this user
user_id = 3883
user_viewed = ratings.query("user_id == @user_id").merge(movies, on="item_id")
user_recos = recos.query("user_id == @user_id").merge(movies, on="item_id")
[10]:
# History, but only films that user likes
user_viewed.query("weight > 3")
[10]:
user_id item_id weight datetime title genres
0 3883 2997 5 967134212 Being John Malkovich (1999) Comedy
2 3883 1265 5 967134285 Groundhog Day (1993) Comedy|Romance
4 3883 2858 5 965822230 American Beauty (1999) Comedy|Drama
10 3883 2369 4 965822136 Desperately Seeking Susan (1985) Comedy|Romance
14 3883 3189 4 965822296 My Dog Skip (1999) Comedy
16 3883 1784 4 965822136 As Good As It Gets (1997) Comedy|Drama
17 3883 2599 4 967134250 Election (1999) Comedy
18 3883 34 4 967134285 Babe (1995) Children's|Comedy|Drama
[11]:
# Recommendations
user_recos.sort_values("rank")
[11]:
user_id item_id score rank title genres
0 3883 2396 13.991358 1 Shakespeare in Love (1998) Comedy|Romance
1 3883 2762 10.249648 2 Sixth Sense, The (1999) Thriller
2 3883 318 7.728188 3 Shawshank Redemption, The (1994) Drama
3 3883 608 7.617913 4 Fargo (1996) Crime|Drama|Thriller
4 3883 356 5.674010 5 Forrest Gump (1994) Comedy|Romance|War
5 3883 2395 5.508895 6 Rushmore (1998) Comedy
6 3883 223 5.398012 7 Clerks (1994) Comedy
7 3883 593 5.335058 8 Silence of the Lambs, The (1991) Drama|Thriller
8 3883 296 4.828189 9 Pulp Fiction (1994) Crime|Drama
9 3883 2959 4.615653 10 Fight Club (1999) Drama
[ ]: