Source code for rectools.metrics.classification

#  Copyright 2022-2024 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Classification recommendations metrics."""

import typing as tp

import attr
import numpy as np
import pandas as pd

from rectools import Columns

from .base import Catalog, merge_reco
from .debias import DebiasableMetrikAtK, debias_for_metric_configs, debias_interactions

TP = "__TP"
FP = "__FP"
FN = "__FN"
TN = "__TN"
LIKED = "__LIKED"


[docs]@attr.s class ClassificationMetric(DebiasableMetrikAtK): """ Classification metric base class. Warning: This class should not be used directly. Use derived classes instead. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> float: """ Calculate metric value. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. catalog : collection Collection of unique item ids that could be used for recommendations. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user(reco, interactions, catalog) return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. catalog : collection Collection of unique item ids that could be used for recommendations. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ is_debiased = False if self.debias_config is not None: interactions = debias_interactions(interactions, self.debias_config) is_debiased = True self._check(reco, interactions=interactions) confusion_df = make_confusions(reco, interactions, self.k) return self.calc_per_user_from_confusion_df(confusion_df, catalog, is_debiased)
[docs] def calc_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog, is_debiased: bool = False) -> float: """ Calculate metric value from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. catalog : collection Collection of unique item ids that could be used for recommendations. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_confusion_df(confusion_df, catalog, is_debiased) return per_user.mean()
[docs] def calc_per_user_from_confusion_df( self, confusion_df: pd.DataFrame, catalog: Catalog, is_debiased: bool = False ) -> pd.Series: """ Calculate metric values for all users from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. catalog : collection Collection of unique item ids that could be used for recommendations. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check_debias(is_debiased, obj_name="confusion_df") if TN not in confusion_df: confusion_df[TN] = len(catalog) - self.k - confusion_df[FN] return self._calc_per_user_from_confusion_df(confusion_df, catalog).rename(None)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: raise NotImplementedError()
[docs]@attr.s class SimpleClassificationMetric(DebiasableMetrikAtK): """ Simple classification metric base class. Warning: This class should not be used directly. Use derived classes instead. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> float: """ Calculate metric value. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user(reco, interactions) return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ is_debiased = False if self.debias_config is not None: interactions = debias_interactions(interactions, self.debias_config) is_debiased = True self._check(reco, interactions=interactions) confusion_df = make_confusions(reco, interactions, self.k) return self.calc_per_user_from_confusion_df(confusion_df, is_debiased)
[docs] def calc_from_confusion_df(self, confusion_df: pd.DataFrame, is_debiased: bool = False) -> float: """ Calculate metric value from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_confusion_df(confusion_df, is_debiased) return per_user.mean()
[docs] def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, is_debiased: bool = False) -> pd.Series: """ Calculate metric values for all users from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. is_debiased : bool, default False An indicator of whether the debias transformation has been applied before or not. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check_debias(is_debiased, obj_name="confusion_df") return self._calc_per_user_from_confusion_df(confusion_df).rename(None)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: raise NotImplementedError()
[docs]@attr.s class Precision(SimpleClassificationMetric): """ Ratio of relevant items among top-`k` recommended items. The Precision@k equals to ``tp / k`` where ``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list. The R-Precision equals to ``tp / min(k, tp+fn)`` where ``tp + fn`` is the total number of items in user test interactions. Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. r_precision: bool, default `False` Whether to calculate R-Precision instead of simple Precision. If `True` number of user true positives (`tp`) in recommendations will be divided by minimum of `k` and number of user test positives (`tp+fn`) instead of division by `k`. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """ r_precision: bool = attr.ib(default=False) def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: denominator = np.minimum(self.k, confusion_df[TP] + confusion_df[FN]) if self.r_precision else self.k return confusion_df[TP] / denominator
[docs]@attr.s class Recall(SimpleClassificationMetric): """ Ratio of relevant recommended items among all items user interacted with after recommendations were made. The recall@k equals to ``tp / liked`` where - ``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list; - ``liked`` is the number of items the user has interacted (bought, liked) with (in period after recommendations were given). Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: return confusion_df[TP] / confusion_df[LIKED]
[docs]@attr.s class Accuracy(ClassificationMetric): """ Ratio of correctly recommended items among all items. The accuracy@k equals to ``(tp + tn) / n_items`` where - ``tp`` is the number of relevant recommendations among the first ``k`` items in recommendation list; - ``tn`` is the number of items with which user has not interacted (bought, liked) with (in period after recommendations were given) and we do not recommend to him (in the top ``k`` items of recommendation list); - ``n_items`` - an overall number of items that could be used for recommendations. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: accuracy = (confusion_df[TP] + confusion_df[TN]) / len(catalog) return accuracy
[docs]@attr.s class F1Beta(SimpleClassificationMetric): """ Fbeta score for k first recommendations. See more: https://en.wikipedia.org/wiki/F-score The f1_beta equals to ``(1 + beta_sqr) * p@k * r@k / (beta_sqr * p@k + r@k)`` where - beta_sqr equals to beta ** 2 - p@k: precision@k equals to ``tp / k`` where -``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list. - r@k: recall@k equals to ``tp / liked`` where - ``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list; - ``liked`` is the number of items the user has interacted (bought, liked) with (in period after recommendations were given). Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. beta : float Weight of recall. Default value: beta = 1.0 debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """ beta: float = attr.ib(default=1.0) def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: beta_sqr = self.beta**2 p_k = confusion_df[TP] / self.k r_k = confusion_df[TP] / confusion_df[LIKED] f1 = (1 + beta_sqr) * p_k * r_k / (beta_sqr * p_k + r_k) f1.loc[(p_k == 0.0) & (r_k == 0.0)] = 0.0 return f1
[docs]@attr.s class MCC(ClassificationMetric): """ Matthew correlation coefficient calculates correlation between actual and predicted classification. Min value = -1 (negative correlation), Max value = 1 (positive correlation), zero means no correlation See more: https://en.wikipedia.org/wiki/Phi_coefficient The MCC equals to ``(tp * tn - fp * fn) / sqrt((tp + fp)(tp + fn)(tn + fp)(tn + fn))`` where - ``tp`` is the number of relevant recommendations among the first ``k`` items in recommendation list; - ``tn`` is the number of items with which user has not interacted (bought, liked) with (in period after recommendations were given) and we do not recommend to him (in the top ``k`` items of recommendation list); - ``fp`` - number of non-relevant recommendations among the first `k` items of recommendation list; - ``fn`` - number of items the user has interacted with but that weren't recommended (in top-`k`). Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: tp_ = confusion_df[TP] tn_ = confusion_df[TN] fp_ = confusion_df[FP] fn_ = confusion_df[FN] mcc_numerator = tp_ * tn_ - fp_ * fn_ mcc_denominator = np.sqrt((tp_ + fp_) * (tp_ + fn_) * (tn_ + fp_) * (tn_ + fn_)) mcc = mcc_numerator / mcc_denominator mcc.loc[mcc_denominator == 0.0] = 0.0 # if denominator == 0 than numerator is also equals 0 return mcc
[docs]@attr.s class HitRate(SimpleClassificationMetric): """ HitRate calculates the fraction of users for which the correct answer is included in the recommendation list. The HitRate equals to ``1 if tp > 0, otherwise 0`` where - ``tp`` is the number of relevant recommendations among the first ``k`` items in recommendation list. Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. debias_config : DebiasConfig, optional, default None Config with debias method parameters (iqr_coef, random_state). """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: hit_rate = (confusion_df[TP] > 0).astype(float) return hit_rate
[docs]def calc_classification_metrics( metrics: tp.Dict[str, tp.Union[ClassificationMetric, SimpleClassificationMetric]], merged: pd.DataFrame, catalog: tp.Optional[Catalog] = None, ) -> tp.Dict[str, float]: """ Calculate any classification metrics. Works with prepared data. Warning: It is not recommended to use this function directly. Use `calc_metrics` instead. Parameters ---------- metrics : dict(str -> (ClassificationMetric | SimpleClassificationMetric)) Dict of metric objects to calculate, where key is a metric name and value is a metric object. merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. catalog : collection, optional Collection of unique item ids that could be used for recommendations. Obligatory only if `metrics` contains `ClassificationMetric` instances. Returns ------- dict(str->float) Dictionary where keys are the same as keys in `metrics` and values are metric calculation results. Raises ------ ValueError If `n_items` is not passed and `ClassificationMetric` is present in `metrics`. TypeError If unexpected metric is present in `metrics`. """ results = {} merged_debiased = debias_for_metric_configs(metrics.values(), merged) confusions = {} for metric_name, metric in metrics.items(): k, debias_config = metric.k, metric.debias_config confusion_task = (k, debias_config) is_debiased = debias_config is not None if confusion_task not in confusions: confusions[confusion_task] = calc_confusions(merged=merged_debiased[debias_config], k=k) confusion_df = confusions[confusion_task] if isinstance(metric, SimpleClassificationMetric): res = metric.calc_from_confusion_df(confusion_df, is_debiased=is_debiased) elif isinstance(metric, ClassificationMetric): if catalog is None: raise ValueError(f"For calculating '{metric.__class__.__name__}' it's necessary to set `catalog`") res = metric.calc_from_confusion_df(confusion_df, catalog, is_debiased=is_debiased) results[metric_name] = res return results
[docs]def calc_confusions(merged: pd.DataFrame, k: int) -> pd.DataFrame: """ Calculate some intermediate metrics from prepared data (it's a helper function). For each user (`Columns.User`) the following metrics are calculated: - `LIKED` - number of items the user has interacted (bought, liked) with; - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list; - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list; - `FN` - number of items the user has interacted with but that weren't recommended (in top `k`). Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. k : int Number of items at the top of recommendations list that will be used to calculate metric. Returns ------- pd.DataFrame Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. Notes ----- left = all - K TP = sum(rank) FP = K - TP FN = liked - TP TN = all - K - FN = left - FN = left - liked + TP """ confusion_df = merged.groupby(Columns.User)[Columns.Item].agg("size").rename(LIKED).to_frame() confusion_df[TP] = merged.eval(f"__is_hit = {Columns.Rank} <= @k").groupby(Columns.User)["__is_hit"].agg("sum") confusion_df[FP] = k - confusion_df[TP] confusion_df[FN] = confusion_df[LIKED] - confusion_df[TP] return confusion_df
[docs]def make_confusions(reco: pd.DataFrame, interactions: pd.DataFrame, k: int) -> pd.DataFrame: """ Calculate some intermediate metrics from raw data (it's a helper function). For each user the following metrics are calculated: - `LIKED` - number of items the user has interacted (bought, liked) with; - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list; - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list; - `FN` - number of items the user has interacted with but that weren't recommended (in top-`k`). Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. k : int Number of items at the top of recommendations list that will be used to calculate metric. Returns ------- pd.DataFrame Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. """ merged = merge_reco(reco, interactions) confusion_df = calc_confusions(merged, k) return confusion_df