# Copyright 2022-2025 MTS (Mobile Telesystems)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base classes for vector models."""
import typing as tp
import attr
import numpy as np
from implicit.gpu import HAS_CUDA
from rectools import InternalIds
from rectools.dataset import Dataset
from rectools.types import InternalIdsArray
from .base import ModelBase, ModelConfig_T, Scores
from .rank import Distance, ImplicitRanker
[docs]@attr.s(auto_attribs=True)
class Factors:
"""Embeddings and biases"""
embeddings: np.ndarray
biases: tp.Optional[np.ndarray] = None
[docs]class VectorModel(ModelBase[ModelConfig_T]):
"""Base class for models that represents users and items as vectors"""
u2i_dist: Distance = NotImplemented
i2i_dist: Distance = NotImplemented
def __init__(self, verbose: int = 0, **kwargs: tp.Any) -> None:
super().__init__(verbose=verbose)
self.recommend_n_threads: int
self.recommend_use_gpu_ranking: bool
def _recommend_u2i(
self,
user_ids: InternalIdsArray,
dataset: Dataset,
k: int,
filter_viewed: bool,
sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray],
) -> tp.Tuple[InternalIds, InternalIds, Scores]:
if filter_viewed:
user_items = dataset.get_user_item_matrix(include_weights=False)
ui_csr_for_filter = user_items[user_ids]
else:
ui_csr_for_filter = None
user_vectors, item_vectors = self._get_u2i_vectors(dataset)
ranker = ImplicitRanker(
self.u2i_dist,
user_vectors,
item_vectors,
num_threads=self.recommend_n_threads,
use_gpu=self.recommend_use_gpu_ranking and HAS_CUDA,
)
return ranker.rank(
subject_ids=user_ids,
k=k,
filter_pairs_csr=ui_csr_for_filter,
sorted_object_whitelist=sorted_item_ids_to_recommend,
)
def _recommend_i2i(
self,
target_ids: InternalIdsArray,
dataset: Dataset,
k: int,
sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray],
) -> tp.Tuple[InternalIds, InternalIds, Scores]:
item_vectors_1, item_vectors_2 = self._get_i2i_vectors(dataset)
ranker = ImplicitRanker(
self.i2i_dist,
item_vectors_1,
item_vectors_2,
num_threads=self.recommend_n_threads,
use_gpu=self.recommend_use_gpu_ranking and HAS_CUDA,
)
return ranker.rank(
subject_ids=target_ids,
k=k,
filter_pairs_csr=None,
sorted_object_whitelist=sorted_item_ids_to_recommend,
)
def _process_biases_to_vectors(
self,
distance: Distance,
subject_embeddings: np.ndarray,
subject_biases: np.ndarray,
object_embeddings: np.ndarray,
object_biases: np.ndarray,
) -> tp.Tuple[np.ndarray, np.ndarray]:
# TODO: make it possible to control if add biases or not (even if they are present)
if distance == Distance.DOT:
subject_vectors = np.hstack(
(
subject_biases[:, np.newaxis],
np.ones((subject_biases.size, 1)),
subject_embeddings,
)
)
object_vectors = np.hstack(
(
np.ones((object_biases.size, 1)),
object_biases[:, np.newaxis],
object_embeddings,
)
)
elif distance in (Distance.COSINE, Distance.EUCLIDEAN):
subject_vectors = np.hstack((subject_biases[:, np.newaxis], subject_embeddings))
object_vectors = np.hstack((object_biases[:, np.newaxis], object_embeddings))
else:
raise ValueError(f"Unexpected distance `{distance}`")
return subject_vectors, object_vectors
def _get_u2i_vectors(self, dataset: Dataset) -> tp.Tuple[np.ndarray, np.ndarray]:
user_factors = self._get_users_factors(dataset)
item_factors = self._get_items_factors(dataset)
user_vectors = user_factors.embeddings
item_vectors = item_factors.embeddings
user_biases = user_factors.biases
item_biases = item_factors.biases
if user_biases is not None and item_biases is not None:
user_vectors, item_vectors = self._process_biases_to_vectors(
self.u2i_dist, user_vectors, user_biases, item_vectors, item_biases
)
return user_vectors, item_vectors
def _get_i2i_vectors(self, dataset: Dataset) -> tp.Tuple[np.ndarray, np.ndarray]:
item_factors = self._get_items_factors(dataset)
item_vectors = item_factors.embeddings
item_biases = item_factors.biases
item_vectors_1 = item_vectors_2 = item_vectors
if item_biases is not None:
item_vectors_1, item_vectors_2 = self._process_biases_to_vectors(
self.i2i_dist, item_vectors, item_biases, item_vectors, item_biases
)
return item_vectors_1, item_vectors_2
def _get_users_factors(self, dataset: Dataset) -> Factors:
raise NotImplementedError()
def _get_items_factors(self, dataset: Dataset) -> Factors:
raise NotImplementedError()