Simple example of building recommendations with RecTools
Building simple model
Visual recommendations checking
[2]:
import numpy as np
import pandas as pd
from implicit.nearest_neighbours import TFIDFRecommender
from rectools import Columns
from rectools.dataset import Dataset
from rectools.models import ImplicitItemKNNWrapperModel
Load data
[3]:
%%time
!wget https://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip ml-1m.zip
--2022-07-28 11:00:39-- https://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5,6M) [application/zip]
Saving to: ‘ml-1m.zip.1’
ml-1m.zip.1 100%[===================>] 5,64M 4,04MB/s in 1,4s
2022-07-28 11:00:41 (4,04 MB/s) - ‘ml-1m.zip.1’ saved [5917549/5917549]
Archive: ml-1m.zip
creating: ml-1m/
inflating: ml-1m/movies.dat
inflating: ml-1m/ratings.dat
inflating: ml-1m/README
inflating: ml-1m/users.dat
CPU times: user 43.8 ms, sys: 28.6 ms, total: 72.4 ms
Wall time: 2.38 s
[4]:
%%time
ratings = pd.read_csv(
"ml-1m/ratings.dat",
sep="::",
engine="python", # Because of 2-chars separators
header=None,
names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],
)
print(ratings.shape)
ratings.head()
(1000209, 4)
CPU times: user 4 s, sys: 173 ms, total: 4.17 s
Wall time: 4.18 s
[4]:
| user_id | item_id | weight | datetime | |
|---|---|---|---|---|
| 0 | 1 | 1193 | 5 | 978300760 |
| 1 | 1 | 661 | 3 | 978302109 |
| 2 | 1 | 914 | 3 | 978301968 |
| 3 | 1 | 3408 | 4 | 978300275 |
| 4 | 1 | 2355 | 5 | 978824291 |
[5]:
%%time
movies = pd.read_csv(
"ml-1m/movies.dat",
sep="::",
engine="python", # Because of 2-chars separators
header=None,
names=[Columns.Item, "title", "genres"],
)
print(movies.shape)
movies.head()
(3883, 3)
CPU times: user 13.3 ms, sys: 1.91 ms, total: 15.2 ms
Wall time: 14 ms
[5]:
| item_id | title | genres | |
|---|---|---|---|
| 0 | 1 | Toy Story (1995) | Animation|Children's|Comedy |
| 1 | 2 | Jumanji (1995) | Adventure|Children's|Fantasy |
| 2 | 3 | Grumpier Old Men (1995) | Comedy|Romance |
| 3 | 4 | Waiting to Exhale (1995) | Comedy|Drama |
| 4 | 5 | Father of the Bride Part II (1995) | Comedy |
Build model
[6]:
# Prepare dataset to build and use model
dataset = Dataset.construct(ratings)
[7]:
%%time
# Fit model and generate recommendations for all users
model = ImplicitItemKNNWrapperModel(TFIDFRecommender(K=10))
model.fit(dataset)
recos = model.recommend(
users=ratings[Columns.User].unique(),
dataset=dataset,
k=10,
filter_viewed=True,
)
CPU times: user 2.35 s, sys: 55.5 ms, total: 2.41 s
Wall time: 2.42 s
[8]:
# Sample of recommendations - it's sorted by relevance (= rank) for each user
recos.head()
[8]:
| user_id | item_id | score | rank | |
|---|---|---|---|---|
| 0 | 1 | 364 | 20.436578 | 1 |
| 1 | 1 | 1196 | 15.716834 | 2 |
| 2 | 1 | 318 | 15.625371 | 3 |
| 3 | 1 | 2096 | 14.876911 | 4 |
| 4 | 1 | 2571 | 12.718620 | 5 |
Check recommendations
[9]:
# Select random user, see history of views and reco for this user
user_id = 3883
user_viewed = ratings.query("user_id == @user_id").merge(movies, on="item_id")
user_recos = recos.query("user_id == @user_id").merge(movies, on="item_id")
[10]:
# History, but only films that user likes
user_viewed.query("weight > 3")
[10]:
| user_id | item_id | weight | datetime | title | genres | |
|---|---|---|---|---|---|---|
| 0 | 3883 | 2997 | 5 | 967134212 | Being John Malkovich (1999) | Comedy |
| 2 | 3883 | 1265 | 5 | 967134285 | Groundhog Day (1993) | Comedy|Romance |
| 4 | 3883 | 2858 | 5 | 965822230 | American Beauty (1999) | Comedy|Drama |
| 10 | 3883 | 2369 | 4 | 965822136 | Desperately Seeking Susan (1985) | Comedy|Romance |
| 14 | 3883 | 3189 | 4 | 965822296 | My Dog Skip (1999) | Comedy |
| 16 | 3883 | 1784 | 4 | 965822136 | As Good As It Gets (1997) | Comedy|Drama |
| 17 | 3883 | 2599 | 4 | 967134250 | Election (1999) | Comedy |
| 18 | 3883 | 34 | 4 | 967134285 | Babe (1995) | Children's|Comedy|Drama |
[11]:
# Recommendations
user_recos.sort_values("rank")
[11]:
| user_id | item_id | score | rank | title | genres | |
|---|---|---|---|---|---|---|
| 0 | 3883 | 2396 | 13.991358 | 1 | Shakespeare in Love (1998) | Comedy|Romance |
| 1 | 3883 | 2762 | 10.249648 | 2 | Sixth Sense, The (1999) | Thriller |
| 2 | 3883 | 318 | 7.728188 | 3 | Shawshank Redemption, The (1994) | Drama |
| 3 | 3883 | 608 | 7.617913 | 4 | Fargo (1996) | Crime|Drama|Thriller |
| 4 | 3883 | 356 | 5.674010 | 5 | Forrest Gump (1994) | Comedy|Romance|War |
| 5 | 3883 | 2395 | 5.508895 | 6 | Rushmore (1998) | Comedy |
| 6 | 3883 | 223 | 5.398012 | 7 | Clerks (1994) | Comedy |
| 7 | 3883 | 593 | 5.335058 | 8 | Silence of the Lambs, The (1991) | Drama|Thriller |
| 8 | 3883 | 296 | 4.828189 | 9 | Pulp Fiction (1994) | Crime|Drama |
| 9 | 3883 | 2959 | 4.615653 | 10 | Fight Club (1999) | Drama |
[ ]: