Source code for rectools.metrics.classification

#  Copyright 2022-2024 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Classification recommendations metrics."""

import typing as tp

import attr
import numpy as np
import pandas as pd

from rectools import Columns

from .base import Catalog, merge_reco
from .debias import DebiasableMetrikAtK, debias_for_metric_configs, debias_interactions

TP = "__TP"
FP = "__FP"
FN = "__FN"
TN = "__TN"
LIKED = "__LIKED"


[docs]@attr.s
class ClassificationMetric(DebiasableMetrikAtK):
    """
    Classification metric base class.

    Warning: This class should not be used directly.
    Use derived classes instead.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

[docs]    def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> float:
        """
        Calculate metric value.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user(reco, interactions, catalog)
        return per_user.mean()

[docs]    def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> pd.Series:
        """
        Calculate metric values for all users.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        is_debiased = False
        if self.debias_config is not None:
            interactions = debias_interactions(interactions, self.debias_config)
            is_debiased = True

        self._check(reco, interactions=interactions)
        confusion_df = make_confusions(reco, interactions, self.k)
        return self.calc_per_user_from_confusion_df(confusion_df, catalog, is_debiased)

[docs]    def calc_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog, is_debiased: bool = False) -> float:
        """
        Calculate metric value from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.
        is_debiased : bool, default False
            An indicator of whether the debias transformation has been applied before or not.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user_from_confusion_df(confusion_df, catalog, is_debiased)
        return per_user.mean()

[docs]    def calc_per_user_from_confusion_df(
        self, confusion_df: pd.DataFrame, catalog: Catalog, is_debiased: bool = False
    ) -> pd.Series:
        """
        Calculate metric values for all users from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.
        is_debiased : bool, default False
            An indicator of whether the debias transformation has been applied before or not.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        self._check_debias(is_debiased, obj_name="confusion_df")
        if TN not in confusion_df:
            confusion_df[TN] = len(catalog) - self.k - confusion_df[FN]
        return self._calc_per_user_from_confusion_df(confusion_df, catalog).rename(None)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        raise NotImplementedError()


[docs]@attr.s
class SimpleClassificationMetric(DebiasableMetrikAtK):
    """
    Simple classification metric base class.

    Warning: This class should not be used directly.
    Use derived classes instead.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

[docs]    def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> float:
        """
        Calculate metric value.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user(reco, interactions)
        return per_user.mean()

[docs]    def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series:
        """
        Calculate metric values for all users.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        is_debiased = False
        if self.debias_config is not None:
            interactions = debias_interactions(interactions, self.debias_config)
            is_debiased = True

        self._check(reco, interactions=interactions)
        confusion_df = make_confusions(reco, interactions, self.k)
        return self.calc_per_user_from_confusion_df(confusion_df, is_debiased)

[docs]    def calc_from_confusion_df(self, confusion_df: pd.DataFrame, is_debiased: bool = False) -> float:
        """
        Calculate metric value from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.
        is_debiased : bool, default False
            An indicator of whether the debias transformation has been applied before or not.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user_from_confusion_df(confusion_df, is_debiased)
        return per_user.mean()

[docs]    def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, is_debiased: bool = False) -> pd.Series:
        """
        Calculate metric values for all users from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.
        is_debiased : bool, default False
            An indicator of whether the debias transformation has been applied before or not.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        self._check_debias(is_debiased, obj_name="confusion_df")
        return self._calc_per_user_from_confusion_df(confusion_df).rename(None)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        raise NotImplementedError()


[docs]@attr.s
class Precision(SimpleClassificationMetric):
    """
    Ratio of relevant items among top-`k` recommended items.

    The Precision@k equals to ``tp / k``
    where ``tp`` is the number of relevant recommendations
    among first ``k`` items in the top of recommendation list.

    The R-Precision equals to ``tp / min(k, tp+fn)``
    where ``tp + fn`` is the total number of items in user test interactions.


    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    r_precision: bool, default `False`
        Whether to calculate R-Precision instead of simple Precision. If `True` number of user
        true positives (`tp`) in recommendations will be divided by minimum of `k` and number of
        user test positives (`tp+fn`) instead of division by `k`.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

    r_precision: bool = attr.ib(default=False)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        denominator = np.minimum(self.k, confusion_df[TP] + confusion_df[FN]) if self.r_precision else self.k
        return confusion_df[TP] / denominator


[docs]@attr.s
class Recall(SimpleClassificationMetric):
    """
    Ratio of relevant recommended items among all items user interacted with
    after recommendations were made.

    The recall@k equals to ``tp / liked`` where
        - ``tp`` is the number of relevant recommendations
          among first ``k`` items in the top of recommendation list;
        - ``liked`` is the number of items the user has interacted
          (bought, liked) with (in period after recommendations were given).

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        return confusion_df[TP] / confusion_df[LIKED]


[docs]@attr.s
class Accuracy(ClassificationMetric):
    """
    Ratio of correctly recommended items among all items.

    The accuracy@k equals to ``(tp + tn) / n_items`` where
        - ``tp`` is the number of relevant recommendations
          among the first ``k`` items in recommendation list;
        - ``tn`` is the number of items with which user has not interacted (bought, liked) with
          (in period after recommendations were given) and we do not recommend to him
          (in the top ``k`` items of recommendation list);
        - ``n_items`` - an overall number of items that could be used for recommendations.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        accuracy = (confusion_df[TP] + confusion_df[TN]) / len(catalog)
        return accuracy


[docs]@attr.s
class F1Beta(SimpleClassificationMetric):
    """
    Fbeta score for k first recommendations.
    See more: https://en.wikipedia.org/wiki/F-score

    The f1_beta equals to ``(1 + beta_sqr) * p@k * r@k / (beta_sqr * p@k + r@k)``
    where
        - beta_sqr equals to beta ** 2
        - p@k: precision@k equals to ``tp / k`` where
            -``tp`` is the number of relevant recommendations
                among first ``k`` items in the top of recommendation list.
        - r@k: recall@k equals to ``tp / liked`` where
            - ``tp`` is the number of relevant recommendations
                among first ``k`` items in the top of recommendation list;
            - ``liked`` is the number of items the user has interacted
                (bought, liked) with (in period after recommendations were given).

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    beta : float
        Weight of recall. Default value: beta = 1.0
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

    beta: float = attr.ib(default=1.0)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        beta_sqr = self.beta**2
        p_k = confusion_df[TP] / self.k
        r_k = confusion_df[TP] / confusion_df[LIKED]

        f1 = (1 + beta_sqr) * p_k * r_k / (beta_sqr * p_k + r_k)
        f1.loc[(p_k == 0.0) & (r_k == 0.0)] = 0.0
        return f1


[docs]@attr.s
class MCC(ClassificationMetric):
    """
    Matthew correlation coefficient calculates correlation between actual and predicted classification.
    Min value = -1 (negative correlation), Max value = 1 (positive correlation), zero means no correlation
    See more: https://en.wikipedia.org/wiki/Phi_coefficient

    The MCC equals to ``(tp * tn - fp * fn) / sqrt((tp + fp)(tp + fn)(tn + fp)(tn + fn))`` where
        - ``tp`` is the number of relevant recommendations
          among the first ``k`` items in recommendation list;
        - ``tn`` is the number of items with which user has not interacted (bought, liked) with
          (in period after recommendations were given) and we do not recommend to him
          (in the top ``k`` items of recommendation list);
        - ``fp`` - number of non-relevant recommendations among the first `k` items of recommendation list;
        - ``fn`` - number of items the user has interacted with but that weren't recommended (in top-`k`).

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        tp_ = confusion_df[TP]
        tn_ = confusion_df[TN]
        fp_ = confusion_df[FP]
        fn_ = confusion_df[FN]
        mcc_numerator = tp_ * tn_ - fp_ * fn_
        mcc_denominator = np.sqrt((tp_ + fp_) * (tp_ + fn_) * (tn_ + fp_) * (tn_ + fn_))
        mcc = mcc_numerator / mcc_denominator
        mcc.loc[mcc_denominator == 0.0] = 0.0  # if denominator == 0 than numerator is also equals 0
        return mcc


[docs]@attr.s
class HitRate(SimpleClassificationMetric):
    """
    HitRate calculates the fraction of users for which the correct answer is included in the recommendation list.

    The HitRate equals to ``1 if tp > 0, otherwise 0`` where
        - ``tp`` is the number of relevant recommendations
          among the first ``k`` items in recommendation list.

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    debias_config : DebiasConfig, optional, default None
        Config with debias method parameters (iqr_coef, random_state).
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        hit_rate = (confusion_df[TP] > 0).astype(float)
        return hit_rate


[docs]def calc_classification_metrics(
    metrics: tp.Dict[str, tp.Union[ClassificationMetric, SimpleClassificationMetric]],
    merged: pd.DataFrame,
    catalog: tp.Optional[Catalog] = None,
) -> tp.Dict[str, float]:
    """
    Calculate any classification metrics.

    Works with prepared data.

    Warning: It is not recommended to use this function directly.
    Use `calc_metrics` instead.

    Parameters
    ----------
    metrics : dict(str -> (ClassificationMetric | SimpleClassificationMetric))
        Dict of metric objects to calculate,
        where key is a metric name and value is a metric object.
    merged : pd.DataFrame
        Result of merging recommendations and interactions tables.
        Can be obtained using `merge_reco` function.
    catalog : collection, optional
        Collection of unique item ids that could be used for recommendations.
        Obligatory only if `metrics` contains `ClassificationMetric` instances.

    Returns
    -------
    dict(str->float)
        Dictionary where keys are the same as keys in `metrics`
        and values are metric calculation results.

    Raises
    ------
    ValueError
        If `n_items` is not passed and `ClassificationMetric` is present in `metrics`.
    TypeError
        If unexpected metric is present in `metrics`.
    """
    results = {}
    merged_debiased = debias_for_metric_configs(metrics.values(), merged)

    confusions = {}
    for metric_name, metric in metrics.items():
        k, debias_config = metric.k, metric.debias_config
        confusion_task = (k, debias_config)
        is_debiased = debias_config is not None
        if confusion_task not in confusions:
            confusions[confusion_task] = calc_confusions(merged=merged_debiased[debias_config], k=k)

        confusion_df = confusions[confusion_task]
        if isinstance(metric, SimpleClassificationMetric):
            res = metric.calc_from_confusion_df(confusion_df, is_debiased=is_debiased)
        elif isinstance(metric, ClassificationMetric):
            if catalog is None:
                raise ValueError(f"For calculating '{metric.__class__.__name__}' it's necessary to set `catalog`")
            res = metric.calc_from_confusion_df(confusion_df, catalog, is_debiased=is_debiased)
        results[metric_name] = res
    return results


[docs]def calc_confusions(merged: pd.DataFrame, k: int) -> pd.DataFrame:
    """
    Calculate some intermediate metrics from prepared data (it's a helper function).

    For each user (`Columns.User`) the following metrics are calculated:
        - `LIKED` - number of items the user has interacted (bought, liked) with;
        - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list;
        - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list;
        - `FN` - number of items the user has interacted with but that weren't recommended (in top `k`).

    Parameters
    ----------
    merged : pd.DataFrame
        Result of merging recommendations and interactions tables.
        Can be obtained using `merge_reco` function.
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.

    Returns
    -------
    pd.DataFrame
        Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.

    Notes
    -----
    left = all - K
    TP = sum(rank)
    FP = K - TP
    FN = liked - TP
    TN = all - K - FN = left - FN = left - liked + TP
    """
    confusion_df = merged.groupby(Columns.User)[Columns.Item].agg("size").rename(LIKED).to_frame()
    confusion_df[TP] = merged.eval(f"__is_hit = {Columns.Rank} <= @k").groupby(Columns.User)["__is_hit"].agg("sum")
    confusion_df[FP] = k - confusion_df[TP]
    confusion_df[FN] = confusion_df[LIKED] - confusion_df[TP]
    return confusion_df


[docs]def make_confusions(reco: pd.DataFrame, interactions: pd.DataFrame, k: int) -> pd.DataFrame:
    """
    Calculate some intermediate metrics from raw data (it's a helper function).

    For each user the following metrics are calculated:
        - `LIKED` - number of items the user has interacted (bought, liked) with;
        - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list;
        - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list;
        - `FN` - number of items the user has interacted with but that weren't recommended (in top-`k`).

    Parameters
    ----------
    reco : pd.DataFrame
        Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
    interactions : pd.DataFrame
        Interactions table with columns `Columns.User`, `Columns.Item`.
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.

    Returns
    -------
    pd.DataFrame
        Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
    """
    merged = merge_reco(reco, interactions)
    confusion_df = calc_confusions(merged, k)
    return confusion_df