Source code for rectools.models.vector

#  Copyright 2022-2025 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Base classes for vector models."""

import typing as tp

import attr
import numpy as np
from implicit.gpu import HAS_CUDA

from rectools import InternalIds
from rectools.dataset import Dataset
from rectools.types import InternalIdsArray

from .base import ModelBase, ModelConfig_T, Scores
from .rank import Distance, ImplicitRanker


[docs]@attr.s(auto_attribs=True) class Factors: """Embeddings and biases""" embeddings: np.ndarray biases: tp.Optional[np.ndarray] = None
[docs]class VectorModel(ModelBase[ModelConfig_T]): """Base class for models that represents users and items as vectors""" u2i_dist: Distance = NotImplemented i2i_dist: Distance = NotImplemented def __init__(self, verbose: int = 0, **kwargs: tp.Any) -> None: super().__init__(verbose=verbose) self.recommend_n_threads: int self.recommend_use_gpu_ranking: bool def _recommend_u2i( self, user_ids: InternalIdsArray, dataset: Dataset, k: int, filter_viewed: bool, sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray], ) -> tp.Tuple[InternalIds, InternalIds, Scores]: if filter_viewed: user_items = dataset.get_user_item_matrix(include_weights=False) ui_csr_for_filter = user_items[user_ids] else: ui_csr_for_filter = None user_vectors, item_vectors = self._get_u2i_vectors(dataset) ranker = ImplicitRanker( self.u2i_dist, user_vectors, item_vectors, num_threads=self.recommend_n_threads, use_gpu=self.recommend_use_gpu_ranking and HAS_CUDA, ) return ranker.rank( subject_ids=user_ids, k=k, filter_pairs_csr=ui_csr_for_filter, sorted_object_whitelist=sorted_item_ids_to_recommend, ) def _recommend_i2i( self, target_ids: InternalIdsArray, dataset: Dataset, k: int, sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray], ) -> tp.Tuple[InternalIds, InternalIds, Scores]: item_vectors_1, item_vectors_2 = self._get_i2i_vectors(dataset) ranker = ImplicitRanker( self.i2i_dist, item_vectors_1, item_vectors_2, num_threads=self.recommend_n_threads, use_gpu=self.recommend_use_gpu_ranking and HAS_CUDA, ) return ranker.rank( subject_ids=target_ids, k=k, filter_pairs_csr=None, sorted_object_whitelist=sorted_item_ids_to_recommend, ) def _process_biases_to_vectors( self, distance: Distance, subject_embeddings: np.ndarray, subject_biases: np.ndarray, object_embeddings: np.ndarray, object_biases: np.ndarray, ) -> tp.Tuple[np.ndarray, np.ndarray]: # TODO: make it possible to control if add biases or not (even if they are present) if distance == Distance.DOT: subject_vectors = np.hstack( ( subject_biases[:, np.newaxis], np.ones((subject_biases.size, 1)), subject_embeddings, ) ) object_vectors = np.hstack( ( np.ones((object_biases.size, 1)), object_biases[:, np.newaxis], object_embeddings, ) ) elif distance in (Distance.COSINE, Distance.EUCLIDEAN): subject_vectors = np.hstack((subject_biases[:, np.newaxis], subject_embeddings)) object_vectors = np.hstack((object_biases[:, np.newaxis], object_embeddings)) else: raise ValueError(f"Unexpected distance `{distance}`") return subject_vectors, object_vectors def _get_u2i_vectors(self, dataset: Dataset) -> tp.Tuple[np.ndarray, np.ndarray]: user_factors = self._get_users_factors(dataset) item_factors = self._get_items_factors(dataset) user_vectors = user_factors.embeddings item_vectors = item_factors.embeddings user_biases = user_factors.biases item_biases = item_factors.biases if user_biases is not None and item_biases is not None: user_vectors, item_vectors = self._process_biases_to_vectors( self.u2i_dist, user_vectors, user_biases, item_vectors, item_biases ) return user_vectors, item_vectors def _get_i2i_vectors(self, dataset: Dataset) -> tp.Tuple[np.ndarray, np.ndarray]: item_factors = self._get_items_factors(dataset) item_vectors = item_factors.embeddings item_biases = item_factors.biases item_vectors_1 = item_vectors_2 = item_vectors if item_biases is not None: item_vectors_1, item_vectors_2 = self._process_biases_to_vectors( self.i2i_dist, item_vectors, item_biases, item_vectors, item_biases ) return item_vectors_1, item_vectors_2 def _get_users_factors(self, dataset: Dataset) -> Factors: raise NotImplementedError() def _get_items_factors(self, dataset: Dataset) -> Factors: raise NotImplementedError()