Source code for rectools.metrics.classification

#  Copyright 2022-2024 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Classification recommendations metrics."""

import typing as tp
from collections import defaultdict

import attr
import numpy as np
import pandas as pd

from rectools import Columns

from .base import Catalog, MetricAtK, merge_reco

TP = "__TP"
FP = "__FP"
FN = "__FN"
TN = "__TN"
LIKED = "__LIKED"


[docs]@attr.s class ClassificationMetric(MetricAtK): """ Classification metric base class. Warning: This class should not be used directly. Use derived classes instead. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. """
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> float: """ Calculate metric value. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. catalog : collection Collection of unique item ids that could be used for recommendations. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user(reco, interactions, catalog) return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. catalog : collection Collection of unique item ids that could be used for recommendations. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check(reco, interactions=interactions) confusion_df = make_confusions(reco, interactions, self.k) return self.calc_per_user_from_confusion_df(confusion_df, catalog)
[docs] def calc_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> float: """ Calculate metric value from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. catalog : collection Collection of unique item ids that could be used for recommendations. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_confusion_df(confusion_df, catalog) return per_user.mean()
[docs] def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: """ Calculate metric values for all users from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. catalog : collection Collection of unique item ids that could be used for recommendations. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ if TN not in confusion_df: confusion_df[TN] = len(catalog) - self.k - confusion_df[FN] return self._calc_per_user_from_confusion_df(confusion_df, catalog).rename(None)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: raise NotImplementedError()
[docs]@attr.s class SimpleClassificationMetric(MetricAtK): """ Simple classification metric base class. Warning: This class should not be used directly. Use derived classes instead. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. """
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> float: """ Calculate metric value. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user(reco, interactions) return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users. Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ self._check(reco, interactions=interactions) confusion_df = make_confusions(reco, interactions, self.k) return self.calc_per_user_from_confusion_df(confusion_df)
[docs] def calc_from_confusion_df(self, confusion_df: pd.DataFrame) -> float: """ Calculate metric value from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. Returns ------- float Value of metric (average between users). """ per_user = self.calc_per_user_from_confusion_df(confusion_df) return per_user.mean()
[docs] def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: """ Calculate metric values for all users from prepared confusion matrix. Parameters ---------- confusion_df : pd.DataFrame Table with some confusion values for every user. Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. This table can be generated by `make_confusions` (or `calc_confusions`) function. See its description for details. Returns ------- pd.Series Values of metric (index - user id, values - metric value for every user). """ return self._calc_per_user_from_confusion_df(confusion_df).rename(None)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: raise NotImplementedError()
[docs]@attr.s class Precision(SimpleClassificationMetric): """ Ratio of relevant items among top-`k` recommended items. The Precision@k equals to ``tp / k`` where ``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list. The R-Precision equals to ``tp / min(k, tp+fn)`` where ``tp + fn`` is the total number of items in user test interactions. Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. r_precision: bool, default `False` Whether to calculate R-Precision instead of simple Precision. If `True` number of user true positives (`tp`) in recommendations will be divided by minimum of `k` and number of user test positives (`tp+fn`) instead of division by `k`. """ r_precision: bool = attr.ib(default=False) def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: denominator = np.minimum(self.k, confusion_df[TP] + confusion_df[FN]) if self.r_precision else self.k return confusion_df[TP] / denominator
[docs]@attr.s class Recall(SimpleClassificationMetric): """ Ratio of relevant recommended items among all items user interacted with after recommendations were made. The recall@k equals to ``tp / liked`` where - ``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list; - ``liked`` is the number of items the user has interacted (bought, liked) with (in period after recommendations were given). Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: return confusion_df[TP] / confusion_df[LIKED]
[docs]@attr.s class Accuracy(ClassificationMetric): """ Ratio of correctly recommended items among all items. The accuracy@k equals to ``(tp + tn) / n_items`` where - ``tp`` is the number of relevant recommendations among the first ``k`` items in recommendation list; - ``tn`` is the number of items with which user has not interacted (bought, liked) with (in period after recommendations were given) and we do not recommend to him (in the top ``k`` items of recommendation list); - ``n_items`` - an overall number of items that could be used for recommendations. Parameters ---------- k : int Number of items at the top of recommendations list that will be used to calculate metric. """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: accuracy = (confusion_df[TP] + confusion_df[TN]) / len(catalog) return accuracy
[docs]@attr.s class F1Beta(SimpleClassificationMetric): """ Fbeta score for k first recommendations. See more: https://en.wikipedia.org/wiki/F-score The f1_beta equals to ``(1 + beta_sqr) * p@k * r@k / (beta_sqr * p@k + r@k)`` where - beta_sqr equals to beta ** 2 - p@k: precision@k equals to ``tp / k`` where -``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list. - r@k: recall@k equals to ``tp / liked`` where - ``tp`` is the number of relevant recommendations among first ``k`` items in the top of recommendation list; - ``liked`` is the number of items the user has interacted (bought, liked) with (in period after recommendations were given). Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. beta : float Weight of recall. Default value: beta = 1.0 """ beta: float = attr.ib(default=1.0) def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: beta_sqr = self.beta**2 p_k = confusion_df[TP] / self.k r_k = confusion_df[TP] / confusion_df[LIKED] f1 = (1 + beta_sqr) * p_k * r_k / (beta_sqr * p_k + r_k) f1.loc[(p_k == 0.0) & (r_k == 0.0)] = 0.0 return f1
[docs]@attr.s class MCC(ClassificationMetric): """ Matthew correlation coefficient calculates correlation between actual and predicted classification. Min value = -1 (negative correlation), Max value = 1 (positive correlation), zero means no correlation See more: https://en.wikipedia.org/wiki/Phi_coefficient The MCC equals to ``(tp * tn - fp * fn) / sqrt((tp + fp)(tp + fn)(tn + fp)(tn + fn))`` where - ``tp`` is the number of relevant recommendations among the first ``k`` items in recommendation list; - ``tn`` is the number of items with which user has not interacted (bought, liked) with (in period after recommendations were given) and we do not recommend to him (in the top ``k`` items of recommendation list); - ``fp`` - number of non-relevant recommendations among the first `k` items of recommendation list; - ``fn`` - number of items the user has interacted with but that weren't recommended (in top-`k`). Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series: tp_ = confusion_df[TP] tn_ = confusion_df[TN] fp_ = confusion_df[FP] fn_ = confusion_df[FN] mcc_numerator = tp_ * tn_ - fp_ * fn_ mcc_denominator = np.sqrt((tp_ + fp_) * (tp_ + fn_) * (tn_ + fp_) * (tn_ + fn_)) mcc = mcc_numerator / mcc_denominator mcc.loc[mcc_denominator == 0.0] = 0.0 # if denominator == 0 than numerator is also equals 0 return mcc
[docs]@attr.s class HitRate(SimpleClassificationMetric): """ HitRate calculates the fraction of users for which the correct answer is included in the recommendation list. The HitRate equals to ``1 if tp > 0, otherwise 0`` where - ``tp`` is the number of relevant recommendations among the first ``k`` items in recommendation list. Parameters ---------- k : int Number of items in top of recommendations list that will be used to calculate metric. """ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series: hit_rate = (confusion_df[TP] > 0).astype(float) return hit_rate
[docs]def calc_classification_metrics( metrics: tp.Dict[str, tp.Union[ClassificationMetric, SimpleClassificationMetric]], merged: pd.DataFrame, catalog: tp.Optional[Catalog] = None, ) -> tp.Dict[str, float]: """ Calculate any classification metrics. Works with prepared data. Warning: It is not recommended to use this function directly. Use `calc_metrics` instead. Parameters ---------- metrics : dict(str -> (ClassificationMetric | SimpleClassificationMetric)) Dict of metric objects to calculate, where key is a metric name and value is a metric object. merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. catalog : collection, optional Collection of unique item ids that could be used for recommendations. Obligatory only if `metrics` contains `ClassificationMetric` instances. Returns ------- dict(str->float) Dictionary where keys are the same as keys in `metrics` and values are metric calculation results. Raises ------ ValueError If `n_items` is not passed and `ClassificationMetric` is present in `metrics`. TypeError If unexpected metric is present in `metrics`. """ k_map = defaultdict(list) for name, metric in metrics.items(): k_map[metric.k].append(name) results = {} for k, k_metrics in k_map.items(): confusion_df = calc_confusions(merged, k) for metric_name in k_metrics: metric = metrics[metric_name] if isinstance(metric, SimpleClassificationMetric): res = metric.calc_from_confusion_df(confusion_df) elif isinstance(metric, ClassificationMetric): if catalog is None: raise ValueError(f"For calculating '{metric.__class__.__name__}' it's necessary to set `catalog`") res = metric.calc_from_confusion_df(confusion_df, catalog) else: raise TypeError(f"Unexpected classification metric {metric}") results[metric_name] = res return results
[docs]def calc_confusions(merged: pd.DataFrame, k: int) -> pd.DataFrame: """ Calculate some intermediate metrics from prepared data (it's a helper function). For each user (`Columns.User`) the following metrics are calculated: - `LIKED` - number of items the user has interacted (bought, liked) with; - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list; - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list; - `FN` - number of items the user has interacted with but that weren't recommended (in top `k`). Parameters ---------- merged : pd.DataFrame Result of merging recommendations and interactions tables. Can be obtained using `merge_reco` function. k : int Number of items at the top of recommendations list that will be used to calculate metric. Returns ------- pd.DataFrame Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. Notes ----- left = all - K TP = sum(rank) FP = K - TP FN = liked - TP TN = all - K - FN = left - FN = left - liked + TP """ confusion_df = merged.groupby(Columns.User)[Columns.Item].agg("size").rename(LIKED).to_frame() confusion_df[TP] = merged.eval(f"__is_hit = {Columns.Rank} <= @k").groupby(Columns.User)["__is_hit"].agg("sum") confusion_df[FP] = k - confusion_df[TP] confusion_df[FN] = confusion_df[LIKED] - confusion_df[TP] return confusion_df
[docs]def make_confusions(reco: pd.DataFrame, interactions: pd.DataFrame, k: int) -> pd.DataFrame: """ Calculate some intermediate metrics from raw data (it's a helper function). For each user the following metrics are calculated: - `LIKED` - number of items the user has interacted (bought, liked) with; - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list; - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list; - `FN` - number of items the user has interacted with but that weren't recommended (in top-`k`). Parameters ---------- reco : pd.DataFrame Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. interactions : pd.DataFrame Interactions table with columns `Columns.User`, `Columns.Item`. k : int Number of items at the top of recommendations list that will be used to calculate metric. Returns ------- pd.DataFrame Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`. """ merged = merge_reco(reco, interactions) confusion_df = calc_confusions(merged, k) return confusion_df