# Copyright 2022-2024 MTS (Mobile Telesystems)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classification recommendations metrics."""
import typing as tp
import attr
import numpy as np
import pandas as pd
from rectools import Columns
from .base import Catalog, merge_reco
from .debias import DebiasableMetrikAtK, debias_for_metric_configs, debias_interactions
TP = "__TP"
FP = "__FP"
FN = "__FN"
TN = "__TN"
LIKED = "__LIKED"
[docs]@attr.s
class ClassificationMetric(DebiasableMetrikAtK):
"""
Classification metric base class.
Warning: This class should not be used directly.
Use derived classes instead.
Parameters
----------
k : int
Number of items at the top of recommendations list that will be used to calculate metric.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> float:
"""
Calculate metric value.
Parameters
----------
reco : pd.DataFrame
Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
interactions : pd.DataFrame
Interactions table with columns `Columns.User`, `Columns.Item`.
catalog : collection
Collection of unique item ids that could be used for recommendations.
Returns
-------
float
Value of metric (average between users).
"""
per_user = self.calc_per_user(reco, interactions, catalog)
return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> pd.Series:
"""
Calculate metric values for all users.
Parameters
----------
reco : pd.DataFrame
Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
interactions : pd.DataFrame
Interactions table with columns `Columns.User`, `Columns.Item`.
catalog : collection
Collection of unique item ids that could be used for recommendations.
Returns
-------
pd.Series
Values of metric (index - user id, values - metric value for every user).
"""
is_debiased = False
if self.debias_config is not None:
interactions = debias_interactions(interactions, self.debias_config)
is_debiased = True
self._check(reco, interactions=interactions)
confusion_df = make_confusions(reco, interactions, self.k)
return self.calc_per_user_from_confusion_df(confusion_df, catalog, is_debiased)
[docs] def calc_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog, is_debiased: bool = False) -> float:
"""
Calculate metric value from prepared confusion matrix.
Parameters
----------
confusion_df : pd.DataFrame
Table with some confusion values for every user.
Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
This table can be generated by `make_confusions` (or `calc_confusions`) function.
See its description for details.
catalog : collection
Collection of unique item ids that could be used for recommendations.
is_debiased : bool, default False
An indicator of whether the debias transformation has been applied before or not.
Returns
-------
float
Value of metric (average between users).
"""
per_user = self.calc_per_user_from_confusion_df(confusion_df, catalog, is_debiased)
return per_user.mean()
[docs] def calc_per_user_from_confusion_df(
self, confusion_df: pd.DataFrame, catalog: Catalog, is_debiased: bool = False
) -> pd.Series:
"""
Calculate metric values for all users from prepared confusion matrix.
Parameters
----------
confusion_df : pd.DataFrame
Table with some confusion values for every user.
Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
This table can be generated by `make_confusions` (or `calc_confusions`) function.
See its description for details.
catalog : collection
Collection of unique item ids that could be used for recommendations.
is_debiased : bool, default False
An indicator of whether the debias transformation has been applied before or not.
Returns
-------
pd.Series
Values of metric (index - user id, values - metric value for every user).
"""
self._check_debias(is_debiased, obj_name="confusion_df")
if TN not in confusion_df:
confusion_df[TN] = len(catalog) - self.k - confusion_df[FN]
return self._calc_per_user_from_confusion_df(confusion_df, catalog).rename(None)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
raise NotImplementedError()
[docs]@attr.s
class SimpleClassificationMetric(DebiasableMetrikAtK):
"""
Simple classification metric base class.
Warning: This class should not be used directly.
Use derived classes instead.
Parameters
----------
k : int
Number of items at the top of recommendations list that will be used to calculate metric.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
[docs] def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> float:
"""
Calculate metric value.
Parameters
----------
reco : pd.DataFrame
Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
interactions : pd.DataFrame
Interactions table with columns `Columns.User`, `Columns.Item`.
Returns
-------
float
Value of metric (average between users).
"""
per_user = self.calc_per_user(reco, interactions)
return per_user.mean()
[docs] def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series:
"""
Calculate metric values for all users.
Parameters
----------
reco : pd.DataFrame
Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
interactions : pd.DataFrame
Interactions table with columns `Columns.User`, `Columns.Item`.
Returns
-------
pd.Series
Values of metric (index - user id, values - metric value for every user).
"""
is_debiased = False
if self.debias_config is not None:
interactions = debias_interactions(interactions, self.debias_config)
is_debiased = True
self._check(reco, interactions=interactions)
confusion_df = make_confusions(reco, interactions, self.k)
return self.calc_per_user_from_confusion_df(confusion_df, is_debiased)
[docs] def calc_from_confusion_df(self, confusion_df: pd.DataFrame, is_debiased: bool = False) -> float:
"""
Calculate metric value from prepared confusion matrix.
Parameters
----------
confusion_df : pd.DataFrame
Table with some confusion values for every user.
Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
This table can be generated by `make_confusions` (or `calc_confusions`) function.
See its description for details.
is_debiased : bool, default False
An indicator of whether the debias transformation has been applied before or not.
Returns
-------
float
Value of metric (average between users).
"""
per_user = self.calc_per_user_from_confusion_df(confusion_df, is_debiased)
return per_user.mean()
[docs] def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, is_debiased: bool = False) -> pd.Series:
"""
Calculate metric values for all users from prepared confusion matrix.
Parameters
----------
confusion_df : pd.DataFrame
Table with some confusion values for every user.
Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
This table can be generated by `make_confusions` (or `calc_confusions`) function.
See its description for details.
is_debiased : bool, default False
An indicator of whether the debias transformation has been applied before or not.
Returns
-------
pd.Series
Values of metric (index - user id, values - metric value for every user).
"""
self._check_debias(is_debiased, obj_name="confusion_df")
return self._calc_per_user_from_confusion_df(confusion_df).rename(None)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
raise NotImplementedError()
[docs]@attr.s
class Precision(SimpleClassificationMetric):
"""
Ratio of relevant items among top-`k` recommended items.
The Precision@k equals to ``tp / k``
where ``tp`` is the number of relevant recommendations
among first ``k`` items in the top of recommendation list.
The R-Precision equals to ``tp / min(k, tp+fn)``
where ``tp + fn`` is the total number of items in user test interactions.
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
r_precision: bool, default `False`
Whether to calculate R-Precision instead of simple Precision. If `True` number of user
true positives (`tp`) in recommendations will be divided by minimum of `k` and number of
user test positives (`tp+fn`) instead of division by `k`.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
r_precision: bool = attr.ib(default=False)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
denominator = np.minimum(self.k, confusion_df[TP] + confusion_df[FN]) if self.r_precision else self.k
return confusion_df[TP] / denominator
[docs]@attr.s
class Recall(SimpleClassificationMetric):
"""
Ratio of relevant recommended items among all items user interacted with
after recommendations were made.
The recall@k equals to ``tp / liked`` where
- ``tp`` is the number of relevant recommendations
among first ``k`` items in the top of recommendation list;
- ``liked`` is the number of items the user has interacted
(bought, liked) with (in period after recommendations were given).
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
return confusion_df[TP] / confusion_df[LIKED]
[docs]@attr.s
class Accuracy(ClassificationMetric):
"""
Ratio of correctly recommended items among all items.
The accuracy@k equals to ``(tp + tn) / n_items`` where
- ``tp`` is the number of relevant recommendations
among the first ``k`` items in recommendation list;
- ``tn`` is the number of items with which user has not interacted (bought, liked) with
(in period after recommendations were given) and we do not recommend to him
(in the top ``k`` items of recommendation list);
- ``n_items`` - an overall number of items that could be used for recommendations.
Parameters
----------
k : int
Number of items at the top of recommendations list that will be used to calculate metric.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
accuracy = (confusion_df[TP] + confusion_df[TN]) / len(catalog)
return accuracy
[docs]@attr.s
class F1Beta(SimpleClassificationMetric):
"""
Fbeta score for k first recommendations.
See more: https://en.wikipedia.org/wiki/F-score
The f1_beta equals to ``(1 + beta_sqr) * p@k * r@k / (beta_sqr * p@k + r@k)``
where
- beta_sqr equals to beta ** 2
- p@k: precision@k equals to ``tp / k`` where
-``tp`` is the number of relevant recommendations
among first ``k`` items in the top of recommendation list.
- r@k: recall@k equals to ``tp / liked`` where
- ``tp`` is the number of relevant recommendations
among first ``k`` items in the top of recommendation list;
- ``liked`` is the number of items the user has interacted
(bought, liked) with (in period after recommendations were given).
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
beta : float
Weight of recall. Default value: beta = 1.0
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
beta: float = attr.ib(default=1.0)
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
beta_sqr = self.beta**2
p_k = confusion_df[TP] / self.k
r_k = confusion_df[TP] / confusion_df[LIKED]
f1 = (1 + beta_sqr) * p_k * r_k / (beta_sqr * p_k + r_k)
f1.loc[(p_k == 0.0) & (r_k == 0.0)] = 0.0
return f1
[docs]@attr.s
class MCC(ClassificationMetric):
"""
Matthew correlation coefficient calculates correlation between actual and predicted classification.
Min value = -1 (negative correlation), Max value = 1 (positive correlation), zero means no correlation
See more: https://en.wikipedia.org/wiki/Phi_coefficient
The MCC equals to ``(tp * tn - fp * fn) / sqrt((tp + fp)(tp + fn)(tn + fp)(tn + fn))`` where
- ``tp`` is the number of relevant recommendations
among the first ``k`` items in recommendation list;
- ``tn`` is the number of items with which user has not interacted (bought, liked) with
(in period after recommendations were given) and we do not recommend to him
(in the top ``k`` items of recommendation list);
- ``fp`` - number of non-relevant recommendations among the first `k` items of recommendation list;
- ``fn`` - number of items the user has interacted with but that weren't recommended (in top-`k`).
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
tp_ = confusion_df[TP]
tn_ = confusion_df[TN]
fp_ = confusion_df[FP]
fn_ = confusion_df[FN]
mcc_numerator = tp_ * tn_ - fp_ * fn_
mcc_denominator = np.sqrt((tp_ + fp_) * (tp_ + fn_) * (tn_ + fp_) * (tn_ + fn_))
mcc = mcc_numerator / mcc_denominator
mcc.loc[mcc_denominator == 0.0] = 0.0 # if denominator == 0 than numerator is also equals 0
return mcc
[docs]@attr.s
class HitRate(SimpleClassificationMetric):
"""
HitRate calculates the fraction of users for which the correct answer is included in the recommendation list.
The HitRate equals to ``1 if tp > 0, otherwise 0`` where
- ``tp`` is the number of relevant recommendations
among the first ``k`` items in recommendation list.
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
debias_config : DebiasConfig, optional, default None
Config with debias method parameters (iqr_coef, random_state).
"""
def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
hit_rate = (confusion_df[TP] > 0).astype(float)
return hit_rate
[docs]def calc_classification_metrics(
metrics: tp.Dict[str, tp.Union[ClassificationMetric, SimpleClassificationMetric]],
merged: pd.DataFrame,
catalog: tp.Optional[Catalog] = None,
) -> tp.Dict[str, float]:
"""
Calculate any classification metrics.
Works with prepared data.
Warning: It is not recommended to use this function directly.
Use `calc_metrics` instead.
Parameters
----------
metrics : dict(str -> (ClassificationMetric | SimpleClassificationMetric))
Dict of metric objects to calculate,
where key is a metric name and value is a metric object.
merged : pd.DataFrame
Result of merging recommendations and interactions tables.
Can be obtained using `merge_reco` function.
catalog : collection, optional
Collection of unique item ids that could be used for recommendations.
Obligatory only if `metrics` contains `ClassificationMetric` instances.
Returns
-------
dict(str->float)
Dictionary where keys are the same as keys in `metrics`
and values are metric calculation results.
Raises
------
ValueError
If `n_items` is not passed and `ClassificationMetric` is present in `metrics`.
TypeError
If unexpected metric is present in `metrics`.
"""
results = {}
merged_debiased = debias_for_metric_configs(metrics.values(), merged)
confusions = {}
for metric_name, metric in metrics.items():
k, debias_config = metric.k, metric.debias_config
confusion_task = (k, debias_config)
is_debiased = debias_config is not None
if confusion_task not in confusions:
confusions[confusion_task] = calc_confusions(merged=merged_debiased[debias_config], k=k)
confusion_df = confusions[confusion_task]
if isinstance(metric, SimpleClassificationMetric):
res = metric.calc_from_confusion_df(confusion_df, is_debiased=is_debiased)
elif isinstance(metric, ClassificationMetric):
if catalog is None:
raise ValueError(f"For calculating '{metric.__class__.__name__}' it's necessary to set `catalog`")
res = metric.calc_from_confusion_df(confusion_df, catalog, is_debiased=is_debiased)
results[metric_name] = res
return results
[docs]def calc_confusions(merged: pd.DataFrame, k: int) -> pd.DataFrame:
"""
Calculate some intermediate metrics from prepared data (it's a helper function).
For each user (`Columns.User`) the following metrics are calculated:
- `LIKED` - number of items the user has interacted (bought, liked) with;
- `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list;
- `FP` - number of non-relevant recommendations among the first `k` items of recommendation list;
- `FN` - number of items the user has interacted with but that weren't recommended (in top `k`).
Parameters
----------
merged : pd.DataFrame
Result of merging recommendations and interactions tables.
Can be obtained using `merge_reco` function.
k : int
Number of items at the top of recommendations list that will be used to calculate metric.
Returns
-------
pd.DataFrame
Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
Notes
-----
left = all - K
TP = sum(rank)
FP = K - TP
FN = liked - TP
TN = all - K - FN = left - FN = left - liked + TP
"""
confusion_df = merged.groupby(Columns.User)[Columns.Item].agg("size").rename(LIKED).to_frame()
confusion_df[TP] = merged.eval(f"__is_hit = {Columns.Rank} <= @k").groupby(Columns.User)["__is_hit"].agg("sum")
confusion_df[FP] = k - confusion_df[TP]
confusion_df[FN] = confusion_df[LIKED] - confusion_df[TP]
return confusion_df
[docs]def make_confusions(reco: pd.DataFrame, interactions: pd.DataFrame, k: int) -> pd.DataFrame:
"""
Calculate some intermediate metrics from raw data (it's a helper function).
For each user the following metrics are calculated:
- `LIKED` - number of items the user has interacted (bought, liked) with;
- `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list;
- `FP` - number of non-relevant recommendations among the first `k` items of recommendation list;
- `FN` - number of items the user has interacted with but that weren't recommended (in top-`k`).
Parameters
----------
reco : pd.DataFrame
Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
interactions : pd.DataFrame
Interactions table with columns `Columns.User`, `Columns.Item`.
k : int
Number of items at the top of recommendations list that will be used to calculate metric.
Returns
-------
pd.DataFrame
Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
"""
merged = merge_reco(reco, interactions)
confusion_df = calc_confusions(merged, k)
return confusion_df