{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Simple example of building recommendations with RecTools\n", "\n", "\n", "- Building simple model\n", "- Visual recommendations checking" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from implicit.nearest_neighbours import TFIDFRecommender\n", "\n", "from rectools import Columns\n", "from rectools.dataset import Dataset\n", "from rectools.models import ImplicitItemKNNWrapperModel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: ml-1m.zip\n", " inflating: ml-1m/movies.dat \n", " inflating: ml-1m/ratings.dat \n", " inflating: ml-1m/README \n", " inflating: ml-1m/users.dat \n", "CPU times: user 134 ms, sys: 415 ms, total: 548 ms\n", "Wall time: 4.39 s\n" ] } ], "source": [ "%%time\n", "!wget -q https://files.grouplens.org/datasets/movielens/ml-1m.zip -O ml-1m.zip\n", "!unzip -o ml-1m.zip\n", "!rm ml-1m.zip" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1000209, 4)\n", "CPU times: user 5.76 s, sys: 409 ms, total: 6.17 s\n", "Wall time: 6.16 s\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iditem_idweightdatetime
0111935978300760
116613978302109
219143978301968
3134084978300275
4123555978824291
\n", "
" ], "text/plain": [ " user_id item_id weight datetime\n", "0 1 1193 5 978300760\n", "1 1 661 3 978302109\n", "2 1 914 3 978301968\n", "3 1 3408 4 978300275\n", "4 1 2355 5 978824291" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "ratings = pd.read_csv(\n", " \"ml-1m/ratings.dat\", \n", " sep=\"::\",\n", " engine=\"python\", # Because of 2-chars separators\n", " header=None,\n", " names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],\n", ")\n", "print(ratings.shape)\n", "ratings.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(3883, 3)\n", "CPU times: user 9.55 ms, sys: 1.62 ms, total: 11.2 ms\n", "Wall time: 10.4 ms\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item_idtitlegenres
01Toy Story (1995)Animation|Children's|Comedy
12Jumanji (1995)Adventure|Children's|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama
45Father of the Bride Part II (1995)Comedy
\n", "
" ], "text/plain": [ " item_id title genres\n", "0 1 Toy Story (1995) Animation|Children's|Comedy\n", "1 2 Jumanji (1995) Adventure|Children's|Fantasy\n", "2 3 Grumpier Old Men (1995) Comedy|Romance\n", "3 4 Waiting to Exhale (1995) Comedy|Drama\n", "4 5 Father of the Bride Part II (1995) Comedy" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "movies = pd.read_csv(\n", " \"ml-1m/movies.dat\", \n", " sep=\"::\",\n", " engine=\"python\", # Because of 2-chars separators\n", " header=None,\n", " names=[Columns.Item, \"title\", \"genres\"],\n", " encoding_errors=\"ignore\",\n", ")\n", "print(movies.shape)\n", "movies.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build model" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Prepare a dataset to build a model\n", "dataset = Dataset.construct(ratings)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 6.05 s, sys: 274 ms, total: 6.32 s\n", "Wall time: 1.42 s\n" ] } ], "source": [ "%%time\n", "# Fit model and generate recommendations for all users\n", "model = ImplicitItemKNNWrapperModel(TFIDFRecommender(K=10))\n", "model.fit(dataset)\n", "recos = model.recommend(\n", " users=ratings[Columns.User].unique(),\n", " dataset=dataset,\n", " k=10,\n", " filter_viewed=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iditem_idscorerank
0136420.4365781
11119615.7168342
2131815.6253713
31209614.8769114
41257112.7186205
\n", "
" ], "text/plain": [ " user_id item_id score rank\n", "0 1 364 20.436578 1\n", "1 1 1196 15.716834 2\n", "2 1 318 15.625371 3\n", "3 1 2096 14.876911 4\n", "4 1 2571 12.718620 5" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Sample of recommendations - it's sorted by relevance (= rank) for each user\n", "recos.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check recommendations" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Select random user, see history of views and reco for this user\n", "user_id = 3883\n", "user_viewed = ratings.query(\"user_id == @user_id\").merge(movies, on=\"item_id\")\n", "user_recos = recos.query(\"user_id == @user_id\").merge(movies, on=\"item_id\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iditem_idweightdatetimetitlegenres
0388329975967134212Being John Malkovich (1999)Comedy
2388312655967134285Groundhog Day (1993)Comedy|Romance
4388328585965822230American Beauty (1999)Comedy|Drama
10388323694965822136Desperately Seeking Susan (1985)Comedy|Romance
14388331894965822296My Dog Skip (1999)Comedy
16388317844965822136As Good As It Gets (1997)Comedy|Drama
17388325994967134250Election (1999)Comedy
183883344967134285Babe (1995)Children's|Comedy|Drama
\n", "
" ], "text/plain": [ " user_id item_id weight datetime title \\\n", "0 3883 2997 5 967134212 Being John Malkovich (1999) \n", "2 3883 1265 5 967134285 Groundhog Day (1993) \n", "4 3883 2858 5 965822230 American Beauty (1999) \n", "10 3883 2369 4 965822136 Desperately Seeking Susan (1985) \n", "14 3883 3189 4 965822296 My Dog Skip (1999) \n", "16 3883 1784 4 965822136 As Good As It Gets (1997) \n", "17 3883 2599 4 967134250 Election (1999) \n", "18 3883 34 4 967134285 Babe (1995) \n", "\n", " genres \n", "0 Comedy \n", "2 Comedy|Romance \n", "4 Comedy|Drama \n", "10 Comedy|Romance \n", "14 Comedy \n", "16 Comedy|Drama \n", "17 Comedy \n", "18 Children's|Comedy|Drama " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# History, but only films that user likes\n", "user_viewed.query(\"weight > 3\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_iditem_idscoreranktitlegenres
03883239613.9913581Shakespeare in Love (1998)Comedy|Romance
13883276210.2496482Sixth Sense, The (1999)Thriller
238833187.7281883Shawshank Redemption, The (1994)Drama
338836087.6179134Fargo (1996)Crime|Drama|Thriller
438833565.6740105Forrest Gump (1994)Comedy|Romance|War
5388323955.5088956Rushmore (1998)Comedy
638832235.3980127Clerks (1994)Comedy
738835935.3350588Silence of the Lambs, The (1991)Drama|Thriller
838832964.8281899Pulp Fiction (1994)Crime|Drama
9388329594.61565310Fight Club (1999)Drama
\n", "
" ], "text/plain": [ " user_id item_id score rank title \\\n", "0 3883 2396 13.991358 1 Shakespeare in Love (1998) \n", "1 3883 2762 10.249648 2 Sixth Sense, The (1999) \n", "2 3883 318 7.728188 3 Shawshank Redemption, The (1994) \n", "3 3883 608 7.617913 4 Fargo (1996) \n", "4 3883 356 5.674010 5 Forrest Gump (1994) \n", "5 3883 2395 5.508895 6 Rushmore (1998) \n", "6 3883 223 5.398012 7 Clerks (1994) \n", "7 3883 593 5.335058 8 Silence of the Lambs, The (1991) \n", "8 3883 296 4.828189 9 Pulp Fiction (1994) \n", "9 3883 2959 4.615653 10 Fight Club (1999) \n", "\n", " genres \n", "0 Comedy|Romance \n", "1 Thriller \n", "2 Drama \n", "3 Crime|Drama|Thriller \n", "4 Comedy|Romance|War \n", "5 Comedy \n", "6 Comedy \n", "7 Drama|Thriller \n", "8 Crime|Drama \n", "9 Drama " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Recommendations\n", "user_recos.sort_values(\"rank\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here is the simple example, we only used ratings to train the model and we only prepared recommendations for users who have rated movies before. But some models allow you to use explicit features, e.g. user age or item genre. And some models allow you to generate recommendations for users that have not rated any movies before. See [documentation](https://rectools.readthedocs.io/en/stable/features.html#models) for the details." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }