Source code for rectools.models.pure_svd

#  Copyright 2022-2025 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""SVD Model."""

import typing as tp
import warnings

import numpy as np
import typing_extensions as tpe
from scipy.sparse.linalg import svds

from rectools.dataset import Dataset
from rectools.exceptions import NotFittedError
from rectools.models.base import ModelConfig
from rectools.models.rank import Distance
from rectools.models.vector import Factors, VectorModel

try:
    import cupy as cp
    from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix
    from cupyx.scipy.sparse.linalg import svds as cupy_svds
except ImportError:  # pragma: no cover
    cupy_svds = None
    cp_csr_matrix = None
    cp = None


[docs]class PureSVDModelConfig(ModelConfig): """Config for `PureSVD` model.""" factors: int = 10 tol: float = 0 maxiter: tp.Optional[int] = None random_state: tp.Optional[int] = None use_gpu: tp.Optional[bool] = False recommend_n_threads: int = 0 recommend_use_gpu_ranking: bool = True
[docs]class PureSVDModel(VectorModel[PureSVDModelConfig]): """ PureSVD matrix factorization model. See https://dl.acm.org/doi/10.1145/1864708.1864721 Parameters ---------- factors : int, default ``10`` The number of latent factors to compute. tol : float, default 0 Tolerance for singular values. Zero means machine precision. maxiter : int, optional, default ``None`` Maximum number of iterations. random_state : int, optional, default ``None`` Pseudorandom number generator state used to generate resamples. Omitted if use_gpu is True. use_gpu : bool, default ``False`` If ``True``, `cupyx.scipy.sparse.linalg.svds()` is used instead of SciPy. CuPy is required. verbose : int, default ``0`` Degree of verbose output. If ``0``, no output will be provided. recommend_n_threads: int, default 0 Number of threads to use for recommendation ranking on CPU. Specifying ``0`` means to default to the number of cores on the machine. If you want to change this parameter after model is initialized, you can manually assign new value to model `recommend_n_threads` attribute. recommend_use_gpu_ranking: bool, default ``True`` Flag to use GPU for recommendation ranking. Please note that GPU and CPU ranking may provide different ordering of items with identical scores in recommendation table. If ``True``, `implicit.gpu.HAS_CUDA` will also be checked before ranking. If you want to change this parameter after model is initialized, you can manually assign new value to model `recommend_use_gpu_ranking` attribute. """ recommends_for_warm = False recommends_for_cold = False u2i_dist = Distance.DOT i2i_dist = Distance.COSINE config_class = PureSVDModelConfig def __init__( self, factors: int = 10, tol: float = 0, maxiter: tp.Optional[int] = None, random_state: tp.Optional[int] = None, use_gpu: tp.Optional[bool] = False, verbose: int = 0, recommend_n_threads: int = 0, recommend_use_gpu_ranking: bool = True, ): super().__init__(verbose=verbose) self.factors = factors self.tol = tol self.maxiter = maxiter self.random_state = random_state self._use_gpu = use_gpu # for making a config if use_gpu: # pragma: no cover if not cp: warnings.warn("Forced to use CPU. CuPy is not available.") use_gpu = False elif not cp.cuda.is_available(): warnings.warn("Forced to use CPU. GPU is not available.") use_gpu = False self.use_gpu = use_gpu self.recommend_n_threads = recommend_n_threads self.recommend_use_gpu_ranking = recommend_use_gpu_ranking self.user_factors: np.ndarray self.item_factors: np.ndarray def _get_config(self) -> PureSVDModelConfig: return PureSVDModelConfig( cls=self.__class__, factors=self.factors, tol=self.tol, maxiter=self.maxiter, random_state=self.random_state, use_gpu=self._use_gpu, verbose=self.verbose, recommend_n_threads=self.recommend_n_threads, recommend_use_gpu_ranking=self.recommend_use_gpu_ranking, ) @classmethod def _from_config(cls, config: PureSVDModelConfig) -> tpe.Self: return cls( factors=config.factors, tol=config.tol, maxiter=config.maxiter, random_state=config.random_state, use_gpu=config.use_gpu, verbose=config.verbose, recommend_n_threads=config.recommend_n_threads, recommend_use_gpu_ranking=config.recommend_use_gpu_ranking, ) def _fit(self, dataset: Dataset) -> None: # type: ignore ui_csr = dataset.get_user_item_matrix(include_weights=True) if self.use_gpu: # pragma: no cover ui_csr = cp_csr_matrix(ui_csr) # To prevent IndexError, we need to subtract 1 from factors u, sigma, vt = cupy_svds(ui_csr.toarray(), k=self.factors - 1, tol=self.tol, maxiter=self.maxiter) u = u.get() self.item_factors = (cp.diag(sigma) @ vt).T.get() else: u, sigma, vt = svds( ui_csr, k=self.factors, tol=self.tol, maxiter=self.maxiter, random_state=self.random_state ) self.item_factors = (np.diag(sigma) @ vt).T self.user_factors = u def _get_users_factors(self, dataset: Dataset) -> Factors: return Factors(self.user_factors) def _get_items_factors(self, dataset: Dataset) -> Factors: return Factors(self.item_factors)
[docs] def get_vectors(self) -> tp.Tuple[np.ndarray, np.ndarray]: """ Return user and item vector representations from fitted model. Returns ------- (np.ndarray, np.ndarray) User and item embeddings. Shapes are (n_users, n_factors) and (n_items, n_factors). """ if not self.is_fitted: raise NotFittedError(self.__class__.__name__) return self.user_factors, self.item_factors