Source code for rectools.metrics.ranking

#  Copyright 2022-2025 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Ranking recommendations metrics."""

import typing as tp

import attr
import numpy as np
import pandas as pd
from scipy import sparse

from rectools import Columns
from rectools.metrics.base import merge_reco
from rectools.utils import log_at_base, select_by_type

from .debias import DebiasableMetrikAtK, calc_debiased_fit_task, debias_for_metric_configs, debias_interactions


[docs]@attr.s class _RankingMetric(DebiasableMetrikAtK): """ Ranking metric base class. Warning: This class should not be used directly. Use derived classes instead. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> float: """ Calculate metric value. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user(reco, interactions) return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ raise NotImplementedError()
[docs]@attr.s class MAPFitted: """ Container with meta data got from `MAP.fit` method. Parameters ---------- precision_at_k : csr_matrix CSR matrix where rows corresponds to users, rows corresponds all possible k from 0 to `k_max`, and values are weighted precisions for relevant recommended items. users : np.ndarray Array of user ids. n_relevant_items : np.ndarray Tally of relevant items for each user. Users are in the same order as in `precision_at_k` matrix. """ precision_at_k: sparse.csr_matrix = attr.ib() users: np.ndarray = attr.ib() n_relevant_items: np.ndarray = attr.ib()
[docs]@attr.s class MAP(_RankingMetric): r""" Mean Average Precision at k (MAP@k). Mean AP calculates as mean value of AP among all users. Average Precision estimates precision of recommendations taking into account their order. .. math:: AP@k = (\sum_{i=1}^{k+1} p@i * rel(i)) / divider where - `p@i` is ``precision at i``, see `Precision` metric documentation for details; - `rel(i)` is an indicator function, it equals to ``1`` if the item at rank ``i`` is relevant, ``0`` otherwise; - `divider` can be equal to ``k`` or be equal to number of relevant items per user, depending on `divide_by_k` parameter. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. divide_by_k : bool, default False If ``True``, ``k`` will be used as divider in ``AP@k``. If ``False``, number of relevant items for each user will be used. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). Examples -------- >>> reco = pd.DataFrame( ... { ... Columns.User: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4], ... Columns.Item: [7, 8, 1, 2, 1, 2, 3, 4, 1, 2, 3], ... Columns.Rank: [1, 2, 1, 2, 1, 2, 3, 4, 1, 2, 3], ... } ... ) >>> interactions = pd.DataFrame( ... { ... Columns.User: [1, 1, 2, 3, 3, 3, 4, 4, 4], ... Columns.Item: [1, 2, 1, 1, 3, 4, 1, 2, 3], ... } ... ) >>> # Here >>> # - for user ``1`` we return non-relevant recommendations; >>> # - for user ``2`` we return 2 items and relevant is first; >>> # - for user ``3`` we return 4 items, 1st, 3rd and 4th are relevant; >>> # - for user ``4`` we return 3 items and all are relevant; >>> MAP(k=1).calc_per_user(reco, interactions).values array([0. , 1. , 0.33333333, 0.33333333]) >>> MAP(k=3).calc_per_user(reco, interactions).values array([0. , 1. , 0.55555556, 1. ]) >>> MAP(k=1, divide_by_k=True).calc_per_user(reco, interactions).values array([0., 1., 1., 1.]) >>> MAP(k=3, divide_by_k=True).calc_per_user(reco, interactions).values array([0. , 0.33333333, 0.55555556, 1. ]) """ divide_by_k: bool = attr.ib(default=False)
[docs] @classmethod def fit(cls, merged: pd.DataFrame, k_max: int) -> MAPFitted: """ Prepare intermediate data for effective calculation. You can use this method to prepare some intermediate data for later calculation. It can optimize calculations if you want calculate metric value for different `k`. Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. k_max : int k is number of items at the top of recommendations list that will be used to calculate metric. So `k_max` is maximum number of items for which you want to calculate metric. Returns ------- MAPFitted """ users = np.unique(merged[Columns.User]) if users.size == 0: prec_at_k_csr = sparse.csr_matrix(np.array([]).reshape(0, 0)) return MAPFitted(prec_at_k_csr, users, np.array([])) n_relevant_items = merged.groupby(Columns.User, sort=False)[Columns.Item].agg("size")[users].values user_to_idx_map = pd.Series(np.arange(users.size), index=users) df_prepared = merged.query(f"{Columns.Rank} <= @k_max") csr = sparse.csr_matrix( ( np.ones(len(df_prepared)), ( df_prepared[Columns.User].map(user_to_idx_map), df_prepared[Columns.Rank].round().astype(int), ), ), shape=(users.size, k_max + 1), # +1 because numeration from 0, but ranks from 1 ) # Now let calc cumulative ranks - it's equal to number of relevant items at k # Here rows - users, columns - all possible k full_cumsum = np.cumsum(csr.data) n_row_elements = np.diff(csr.indptr) row_sums = np.asarray(csr.sum(axis=1)).ravel() sum_n_elements_in_prev_rows = np.repeat( # add [0] because no elements before first row np.concatenate((np.array([0]), np.cumsum(row_sums)[:-1])), n_row_elements, ) n_relevant_items_at_k = csr n_relevant_items_at_k.data = full_cumsum - sum_n_elements_in_prev_rows # And finally calculate precision for every k counts = np.arange(k_max + 1) counts_indexed = counts[n_relevant_items_at_k.indices] prec_at_k = n_relevant_items_at_k prec_at_k.data = prec_at_k.data / counts_indexed return MAPFitted(prec_at_k, users, n_relevant_items)
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ is_debiased = False if self.debias_config is not None: interactions = debias_interactions(interactions, self.debias_config) is_debiased = True self._check(reco, interactions=interactions) merged_reco = merge_reco(reco, interactions) fitted = self.fit(merged_reco, k_max=self.k) return self.calc_per_user_from_fitted(fitted, is_debiased)
[docs] def calc_per_user_from_fitted(self, fitted: MAPFitted, is_debiased: bool = False) -> pd.Series: """ Calculate metric values for all users from fitted data. For parameters used result of `fit` method. Parameters ---------- fitted : MAPFitted Meta data that got from `.fit` method. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check_debias(is_debiased, obj_name="MAPFitted") valid_precisions = fitted.precision_at_k[:, 1 : self.k + 1] sum_precisions = np.asarray(valid_precisions.sum(axis=1)).reshape(-1) if self.divide_by_k: sum_precisions = sum_precisions / self.k else: sum_precisions = sum_precisions / fitted.n_relevant_items avg_precisions = pd.Series(sum_precisions, index=pd.Series(fitted.users, name=Columns.User)).rename(None) return avg_precisions
[docs] def calc_from_fitted(self, fitted: MAPFitted, is_debiased: bool = False) -> float: """ Calculate metric value from fitted data. For parameters used result of `fit` method. Parameters ---------- fitted : MAPFitted Meta data that got from `.fit` method. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_fitted(fitted, is_debiased) return per_user.mean()
[docs]@attr.s class NDCG(_RankingMetric): r""" Normalized Discounted Cumulative Gain at k (NDCG@k). Estimates relevance of recommendations taking in account their order. `"Discounted Gain"` means that original item relevance is being discounted based on this items rank. The closer is item to the top the, the more gain is achieved. `"Cumulative"` means that all items discounted gains from ``k`` ranks are being summed. `"Normalized"` means that the actual value of DCG is being divided by the `"Ideal DCG"` (IDCG). This is the maximum possible value of `DCG@k`, used as normalization coefficient to ensure that `NDCG@k` values lie in ``[0, 1]``. .. math:: NDCG@k=\frac{1}{|U|}\sum_{u \in U}\frac{DCG_u@k}{IDCG_u@k} DCG_u@k = \sum_{i=1}^{k} \frac{rel_u(i)}{log(i + 1)} where - :math:`IDCG_u@k = \sum_{i=1}^{k} \frac{1}{log(i + 1)}` when `divide_by_achievable` is set to ``False`` (default). - :math:`IDCG_u@k = \sum_{i=1}^{\min (|R(u)|, k)} \frac{1}{log(i + 1)}` when `divide_by_achievable` is set to ``True``. - :math:`rel_u(i)` is `"Gain"`. Here it is an indicator function, it equals to ``1`` if the item at rank ``i`` is relevant to user ``u``, ``0`` otherwise. - :math:`|R_u|` is number of relevant (ground truth) items for user ``u``. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. log_base : int, default ``2`` Base of logarithm used to weight relevant items. divide_by_achievable: bool, default ``False`` When set to ``False`` (default) IDCG is calculated as one value for all of the users and equals to the maximum gain, achievable when all ``k`` positions are relevant. When set to ``True``, IDCG is calculated for each user individually, considering the maximum possible amount of user test items on top ``k`` positions. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). Examples -------- >>> reco = pd.DataFrame( ... { ... Columns.User: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4], ... Columns.Item: [7, 8, 1, 2, 1, 2, 3, 4, 1, 2, 3], ... Columns.Rank: [1, 2, 1, 2, 1, 2, 3, 4, 1, 2, 3], ... } ... ) >>> interactions = pd.DataFrame( ... { ... Columns.User: [1, 1, 2, 3, 3, 3, 4, 4, 4], ... Columns.Item: [1, 2, 1, 1, 3, 4, 1, 2, 3], ... } ... ) >>> # Here >>> # - for user ``1`` we return non-relevant recommendations; >>> # - for user ``2`` we return 2 items and relevant is first; >>> # - for user ``3`` we return 4 items, 1st, 3rd and 4th are relevant; >>> # - for user ``4`` we return 3 items and all are relevant; >>> NDCG(k=1).calc_per_user(reco, interactions).values array([0., 1., 1., 1.]) >>> NDCG(k=3).calc_per_user(reco, interactions).values array([0. , 0.46927873, 0.70391809, 1. ]) """ log_base: int = attr.ib(default=2) divide_by_achievable: bool = attr.ib(default=False)
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check(reco, interactions=interactions) merged_reco = merge_reco(reco, interactions) return self.calc_per_user_from_merged(merged_reco)
[docs] def calc_from_merged(self, merged: pd.DataFrame, is_debiased: bool = False) -> float: """ Calculate metric value from merged recommendations. Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_merged(merged, is_debiased) return per_user.mean()
[docs] def calc_per_user_from_merged(self, merged: pd.DataFrame, is_debiased: bool = False) -> pd.Series: """ Calculate metric values for all users from merged recommendations. Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ if not is_debiased and self.debias_config is not None: merged = debias_interactions(merged, self.debias_config) # DCG # Avoid division by 0 with `+1` for rank value in denominator before taking logarithm merged["__DCG"] = (merged[Columns.Rank] <= self.k).astype(int) / log_at_base( merged[Columns.Rank] + 1, self.log_base ) ranks = np.arange(1, self.k + 1) discounted_gains = 1 / log_at_base(ranks + 1, self.log_base) if self.divide_by_achievable: grouped = merged.groupby(Columns.User, sort=False) stats = grouped.agg(n_items=(Columns.Item, "count"), dcg=("__DCG", "sum")) # IDCG n_items_to_ndcg_map = dict(zip(ranks, discounted_gains.cumsum())) n_items_to_ndcg_map[0] = 0 idcg = stats["n_items"].clip(upper=self.k).map(n_items_to_ndcg_map) # NDCG ndcg = stats["dcg"] / idcg else: idcg = discounted_gains.sum() ndcg = ( pd.DataFrame({Columns.User: merged[Columns.User], "__ndcg": merged["__DCG"] / idcg}) .groupby(Columns.User, sort=False)["__ndcg"] .sum() ) del merged["__DCG"] return ndcg.rename(None)
[docs]class MRR(_RankingMetric): r""" Mean Reciprocal Rank at k (MRR@k). MRR calculates as mean value of reciprocal rank of first relevant recommendation among all users. Estimates relevance of recommendations taking in account their order. .. math:: MRR@K = \frac{1}{|U|} \sum_{i=1}^{|U|} \frac{1}{rank_{i}} where - :math:`{|U|}` is a number of unique users; - :math:`rank_{i}` is a rank of first relevant recommendation starting from ``1``. If a user doesn't have any relevant recommendation then his metric value will be ``0``. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). Examples -------- >>> reco = pd.DataFrame( ... { ... Columns.User: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4], ... Columns.Item: [7, 8, 1, 2, 2, 1, 3, 4, 7, 8, 3], ... Columns.Rank: [1, 2, 1, 2, 1, 2, 3, 4, 1, 2, 3], ... } ... ) >>> interactions = pd.DataFrame( ... { ... Columns.User: [1, 1, 2, 3, 3, 3, 4, 4, 4], ... Columns.Item: [1, 2, 1, 1, 3, 4, 1, 2, 3], ... } ... ) >>> # Here >>> # - for user ``1`` we return non-relevant recommendations; >>> # - for user ``2`` we return 2 items and relevant is first; >>> # - for user ``3`` we return 4 items, 2nd, 3rd and 4th are relevant; >>> # - for user ``4`` we return 3 items and relevant is last; >>> MRR(k=1).calc_per_user(reco, interactions).values array([0., 1., 0., 0.]) >>> MRR(k=3).calc_per_user(reco, interactions).values array([0. , 1. , 0.5 , 0.33333333]) """
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check(reco, interactions=interactions) merged_reco = merge_reco(reco, interactions) return self.calc_per_user_from_merged(merged_reco)
[docs] def calc_per_user_from_merged(self, merged: pd.DataFrame, is_debiased: bool = False) -> pd.Series: """ Calculate metric values for all users from merged recommendations. Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ if not is_debiased and self.debias_config is not None: merged = debias_interactions(merged, self.debias_config) cutted_rank = np.where(merged[Columns.Rank] <= self.k, merged[Columns.Rank], np.nan) min_rank_per_user = ( pd.DataFrame({Columns.User: merged[Columns.User], "__cutted_rank": cutted_rank}) .groupby(Columns.User, sort=False)["__cutted_rank"] .min() ) return (1.0 / min_rank_per_user).fillna(0).rename(None)
[docs] def calc_from_merged(self, merged: pd.DataFrame, is_debiased: bool = False) -> float: """ Calculate metric value from merged recommendations. Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_merged(merged, is_debiased) return per_user.mean()
RankingMetric = tp.Union[NDCG, MAP, MRR]
[docs]def calc_ranking_metrics( metrics: tp.Dict[str, RankingMetric], merged: pd.DataFrame, ) -> tp.Dict[str, float]: """ Calculate any ranking metrics (MAP, NDCG and MRR for now). Works with pre-prepared data. Warning: It is not recommended to use this function directly. Use `calc_metrics` instead. Parameters ---------- metrics : dict(str -> (MAP | NDCG | MRR)) Dict of metric objects to calculate, where key is metric name and value is metric object. merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. Returns ------- dict(str->float) Dictionary where keys are the same with keys in `metrics` and values are metric calculation results. """ results = {} merged_debiased = None for ranking_metric_cls in [NDCG, MRR]: ranking_metrics: tp.Dict[str, tp.Union[NDCG, MRR]] = select_by_type(metrics, ranking_metric_cls) merged_debiased = debias_for_metric_configs(ranking_metrics.values(), merged) for name, metric in ranking_metrics.items(): results[name] = metric.calc_from_merged(merged_debiased[metric.debias_config], is_debiased=True) map_metrics: tp.Dict[str, MAP] = select_by_type(metrics, MAP) if map_metrics: debiased_fit_task = calc_debiased_fit_task(map_metrics.values(), merged, merged_debiased) fitted_debiased = {} for debias_config, (k_max_d, merged_d) in debiased_fit_task.items(): fitted_debiased[debias_config] = MAP.fit(merged_d, k_max_d) for name, map_metric in map_metrics.items(): is_debiased = map_metric.debias_config is not None results[name] = map_metric.calc_from_fitted( fitted=fitted_debiased[map_metric.debias_config], is_debiased=is_debiased ) return results