Source code for rectools.models.pure_svd

#  Copyright 2022-2024 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""SVD Model."""

import typing as tp

import numpy as np
import typing_extensions as tpe
from scipy.sparse.linalg import svds

from rectools.dataset import Dataset
from rectools.exceptions import NotFittedError
from rectools.models.base import ModelConfig
from rectools.models.rank import Distance
from rectools.models.vector import Factors, VectorModel


[docs]class PureSVDModelConfig(ModelConfig): """Config for `PureSVD` model.""" factors: int = 10 tol: float = 0 maxiter: tp.Optional[int] = None random_state: tp.Optional[int] = None
[docs]class PureSVDModel(VectorModel[PureSVDModelConfig]): """ PureSVD matrix factorization model. See https://dl.acm.org/doi/10.1145/1864708.1864721 Parameters ---------- factors : int, default ``10`` The number of latent factors to compute. tol : float, default 0 Tolerance for singular values. Zero means machine precision. maxiter : int, optional, default ``None`` Maximum number of iterations. random_state : int, optional, default ``None`` Pseudorandom number generator state used to generate resamples. verbose : int, default ``0`` Degree of verbose output. If ``0``, no output will be provided. """ recommends_for_warm = False recommends_for_cold = False u2i_dist = Distance.DOT i2i_dist = Distance.COSINE config_class = PureSVDModelConfig def __init__( self, factors: int = 10, tol: float = 0, maxiter: tp.Optional[int] = None, random_state: tp.Optional[int] = None, verbose: int = 0, ): super().__init__(verbose=verbose) self.factors = factors self.tol = tol self.maxiter = maxiter self.random_state = random_state self.user_factors: np.ndarray self.item_factors: np.ndarray def _get_config(self) -> PureSVDModelConfig: return PureSVDModelConfig( cls=self.__class__, factors=self.factors, tol=self.tol, maxiter=self.maxiter, random_state=self.random_state, verbose=self.verbose, ) @classmethod def _from_config(cls, config: PureSVDModelConfig) -> tpe.Self: return cls( factors=config.factors, tol=config.tol, maxiter=config.maxiter, random_state=config.random_state, verbose=config.verbose, ) def _fit(self, dataset: Dataset) -> None: # type: ignore ui_csr = dataset.get_user_item_matrix(include_weights=True) u, sigma, vt = svds(ui_csr, k=self.factors, tol=self.tol, maxiter=self.maxiter, random_state=self.random_state) self.user_factors = u self.item_factors = (np.diag(sigma) @ vt).T def _get_users_factors(self, dataset: Dataset) -> Factors: return Factors(self.user_factors) def _get_items_factors(self, dataset: Dataset) -> Factors: return Factors(self.item_factors)
[docs] def get_vectors(self) -> tp.Tuple[np.ndarray, np.ndarray]: """ Return user and item vector representations from fitted model. Returns ------- (np.ndarray, np.ndarray) User and item embeddings. Shapes are (n_users, n_factors) and (n_items, n_factors). """ if not self.is_fitted: raise NotFittedError(self.__class__.__name__) return self.user_factors, self.item_factors