Source code for rectools.metrics.classification

#  Copyright 2022-2024 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Classification recommendations metrics."""

import typing as tp
from collections import defaultdict

import attr
import numpy as np
import pandas as pd

from rectools import Columns

from .base import Catalog, MetricAtK, merge_reco

TP = "__TP"
FP = "__FP"
FN = "__FN"
TN = "__TN"
LIKED = "__LIKED"


[docs]@attr.s
class ClassificationMetric(MetricAtK):
    """
    Classification metric base class.

    Warning: This class should not be used directly.
    Use derived classes instead.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    """

[docs]    def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> float:
        """
        Calculate metric value.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user(reco, interactions, catalog)
        return per_user.mean()

[docs]    def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame, catalog: Catalog) -> pd.Series:
        """
        Calculate metric values for all users.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        self._check(reco, interactions=interactions)
        confusion_df = make_confusions(reco, interactions, self.k)
        return self.calc_per_user_from_confusion_df(confusion_df, catalog)

[docs]    def calc_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> float:
        """
        Calculate metric value from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user_from_confusion_df(confusion_df, catalog)
        return per_user.mean()

[docs]    def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        """
        Calculate metric values for all users from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        if TN not in confusion_df:
            confusion_df[TN] = len(catalog) - self.k - confusion_df[FN]
        return self._calc_per_user_from_confusion_df(confusion_df, catalog).rename(None)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        raise NotImplementedError()


[docs]@attr.s
class SimpleClassificationMetric(MetricAtK):
    """
    Simple classification metric base class.

    Warning: This class should not be used directly.
    Use derived classes instead.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    """

[docs]    def calc(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> float:
        """
        Calculate metric value.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user(reco, interactions)
        return per_user.mean()

[docs]    def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series:
        """
        Calculate metric values for all users.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        interactions : pd.DataFrame
            Interactions table with columns `Columns.User`, `Columns.Item`.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        self._check(reco, interactions=interactions)
        confusion_df = make_confusions(reco, interactions, self.k)
        return self.calc_per_user_from_confusion_df(confusion_df)

[docs]    def calc_from_confusion_df(self, confusion_df: pd.DataFrame) -> float:
        """
        Calculate metric value from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.

        Returns
        -------
        float
            Value of metric (average between users).
        """
        per_user = self.calc_per_user_from_confusion_df(confusion_df)
        return per_user.mean()

[docs]    def calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        """
        Calculate metric values for all users from prepared confusion matrix.

        Parameters
        ----------
        confusion_df : pd.DataFrame
            Table with some confusion values for every user.
            Columns are: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
            This table can be generated by `make_confusions` (or `calc_confusions`) function.
            See its description for details.

        Returns
        -------
        pd.Series
            Values of metric (index - user id, values - metric value for every user).
        """
        return self._calc_per_user_from_confusion_df(confusion_df).rename(None)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        raise NotImplementedError()


[docs]@attr.s
class Precision(SimpleClassificationMetric):
    """
    Ratio of relevant items among top-`k` recommended items.

    The Precision@k equals to ``tp / k``
    where ``tp`` is the number of relevant recommendations
    among first ``k`` items in the top of recommendation list.

    The R-Precision equals to ``tp / min(k, tp+fn)``
    where ``tp + fn`` is the total number of items in user test interactions.


    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    r_precision: bool, default `False`
        Whether to calculate R-Precision instead of simple Precision. If `True` number of user
        true positives (`tp`) in recommendations will be divided by minimum of `k` and number of
        user test positives (`tp+fn`) instead of division by `k`.
    """

    r_precision: bool = attr.ib(default=False)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        denominator = np.minimum(self.k, confusion_df[TP] + confusion_df[FN]) if self.r_precision else self.k
        return confusion_df[TP] / denominator


[docs]@attr.s
class Recall(SimpleClassificationMetric):
    """
    Ratio of relevant recommended items among all items user interacted with
    after recommendations were made.

    The recall@k equals to ``tp / liked`` where
        - ``tp`` is the number of relevant recommendations
          among first ``k`` items in the top of recommendation list;
        - ``liked`` is the number of items the user has interacted
          (bought, liked) with (in period after recommendations were given).

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        return confusion_df[TP] / confusion_df[LIKED]


[docs]@attr.s
class Accuracy(ClassificationMetric):
    """
    Ratio of correctly recommended items among all items.

    The accuracy@k equals to ``(tp + tn) / n_items`` where
        - ``tp`` is the number of relevant recommendations
          among the first ``k`` items in recommendation list;
        - ``tn`` is the number of items with which user has not interacted (bought, liked) with
          (in period after recommendations were given) and we do not recommend to him
          (in the top ``k`` items of recommendation list);
        - ``n_items`` - an overall number of items that could be used for recommendations.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        accuracy = (confusion_df[TP] + confusion_df[TN]) / len(catalog)
        return accuracy


[docs]@attr.s
class F1Beta(SimpleClassificationMetric):
    """
    Fbeta score for k first recommendations.
    See more: https://en.wikipedia.org/wiki/F-score

    The f1_beta equals to ``(1 + beta_sqr) * p@k * r@k / (beta_sqr * p@k + r@k)``
    where
        - beta_sqr equals to beta ** 2
        - p@k: precision@k equals to ``tp / k`` where
            -``tp`` is the number of relevant recommendations
                among first ``k`` items in the top of recommendation list.
        - r@k: recall@k equals to ``tp / liked`` where
            - ``tp`` is the number of relevant recommendations
                among first ``k`` items in the top of recommendation list;
            - ``liked`` is the number of items the user has interacted
                (bought, liked) with (in period after recommendations were given).

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    beta : float
        Weight of recall. Default value: beta = 1.0
    """

    beta: float = attr.ib(default=1.0)

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        beta_sqr = self.beta**2
        p_k = confusion_df[TP] / self.k
        r_k = confusion_df[TP] / confusion_df[LIKED]

        f1 = (1 + beta_sqr) * p_k * r_k / (beta_sqr * p_k + r_k)
        f1.loc[(p_k == 0.0) & (r_k == 0.0)] = 0.0
        return f1


[docs]@attr.s
class MCC(ClassificationMetric):
    """
    Matthew correlation coefficient calculates correlation between actual and predicted classification.
    Min value = -1 (negative correlation), Max value = 1 (positive correlation), zero means no correlation
    See more: https://en.wikipedia.org/wiki/Phi_coefficient

    The MCC equals to ``(tp * tn - fp * fn) / sqrt((tp + fp)(tp + fn)(tn + fp)(tn + fn))`` where
        - ``tp`` is the number of relevant recommendations
          among the first ``k`` items in recommendation list;
        - ``tn`` is the number of items with which user has not interacted (bought, liked) with
          (in period after recommendations were given) and we do not recommend to him
          (in the top ``k`` items of recommendation list);
        - ``fp`` - number of non-relevant recommendations among the first `k` items of recommendation list;
        - ``fn`` - number of items the user has interacted with but that weren't recommended (in top-`k`).

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
        tp_ = confusion_df[TP]
        tn_ = confusion_df[TN]
        fp_ = confusion_df[FP]
        fn_ = confusion_df[FN]
        mcc_numerator = tp_ * tn_ - fp_ * fn_
        mcc_denominator = np.sqrt((tp_ + fp_) * (tp_ + fn_) * (tn_ + fp_) * (tn_ + fn_))
        mcc = mcc_numerator / mcc_denominator
        mcc.loc[mcc_denominator == 0.0] = 0.0  # if denominator == 0 than numerator is also equals 0
        return mcc


[docs]@attr.s
class HitRate(SimpleClassificationMetric):
    """
    HitRate calculates the fraction of users for which the correct answer is included in the recommendation list.

    The HitRate equals to ``1 if tp > 0, otherwise 0`` where
        - ``tp`` is the number of relevant recommendations
          among the first ``k`` items in recommendation list.

    Parameters
    ----------
    k : int
        Number of items in top of recommendations list that will be used to calculate metric.
    """

    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
        hit_rate = (confusion_df[TP] > 0).astype(float)
        return hit_rate


[docs]def calc_classification_metrics(
    metrics: tp.Dict[str, tp.Union[ClassificationMetric, SimpleClassificationMetric]],
    merged: pd.DataFrame,
    catalog: tp.Optional[Catalog] = None,
) -> tp.Dict[str, float]:
    """
    Calculate any classification metrics.

    Works with prepared data.

    Warning: It is not recommended to use this function directly.
    Use `calc_metrics` instead.

    Parameters
    ----------
    metrics : dict(str -> (ClassificationMetric | SimpleClassificationMetric))
        Dict of metric objects to calculate,
        where key is a metric name and value is a metric object.
    merged : pd.DataFrame
        Result of merging recommendations and interactions tables.
        Can be obtained using `merge_reco` function.
    catalog : collection, optional
        Collection of unique item ids that could be used for recommendations.
        Obligatory only if `metrics` contains `ClassificationMetric` instances.

    Returns
    -------
    dict(str->float)
        Dictionary where keys are the same as keys in `metrics`
        and values are metric calculation results.

    Raises
    ------
    ValueError
        If `n_items` is not passed and `ClassificationMetric` is present in `metrics`.
    TypeError
        If unexpected metric is present in `metrics`.
    """
    k_map = defaultdict(list)
    for name, metric in metrics.items():
        k_map[metric.k].append(name)

    results = {}
    for k, k_metrics in k_map.items():
        confusion_df = calc_confusions(merged, k)

        for metric_name in k_metrics:
            metric = metrics[metric_name]
            if isinstance(metric, SimpleClassificationMetric):
                res = metric.calc_from_confusion_df(confusion_df)
            elif isinstance(metric, ClassificationMetric):
                if catalog is None:
                    raise ValueError(f"For calculating '{metric.__class__.__name__}' it's necessary to set `catalog`")
                res = metric.calc_from_confusion_df(confusion_df, catalog)
            else:
                raise TypeError(f"Unexpected classification metric {metric}")
            results[metric_name] = res

    return results


[docs]def calc_confusions(merged: pd.DataFrame, k: int) -> pd.DataFrame:
    """
    Calculate some intermediate metrics from prepared data (it's a helper function).

    For each user (`Columns.User`) the following metrics are calculated:
        - `LIKED` - number of items the user has interacted (bought, liked) with;
        - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list;
        - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list;
        - `FN` - number of items the user has interacted with but that weren't recommended (in top `k`).

    Parameters
    ----------
    merged : pd.DataFrame
        Result of merging recommendations and interactions tables.
        Can be obtained using `merge_reco` function.
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.

    Returns
    -------
    pd.DataFrame
        Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.

    Notes
    -----
    left = all - K
    TP = sum(rank)
    FP = K - TP
    FN = liked - TP
    TN = all - K - FN = left - FN = left - liked + TP
    """
    confusion_df = merged.groupby(Columns.User)[Columns.Item].agg("size").rename(LIKED).to_frame()
    confusion_df[TP] = merged.eval(f"__is_hit = {Columns.Rank} <= @k").groupby(Columns.User)["__is_hit"].agg("sum")
    confusion_df[FP] = k - confusion_df[TP]
    confusion_df[FN] = confusion_df[LIKED] - confusion_df[TP]
    return confusion_df


[docs]def make_confusions(reco: pd.DataFrame, interactions: pd.DataFrame, k: int) -> pd.DataFrame:
    """
    Calculate some intermediate metrics from raw data (it's a helper function).

    For each user the following metrics are calculated:
        - `LIKED` - number of items the user has interacted (bought, liked) with;
        - `TP` - number of relevant recommendations among the first `k` items at the top of recommendation list;
        - `FP` - number of non-relevant recommendations among the first `k` items of recommendation list;
        - `FN` - number of items the user has interacted with but that weren't recommended (in top-`k`).

    Parameters
    ----------
    reco : pd.DataFrame
        Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
    interactions : pd.DataFrame
        Interactions table with columns `Columns.User`, `Columns.Item`.
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.

    Returns
    -------
    pd.DataFrame
        Table with columns: `Columns.User`, `LIKED`, `TP`, `FP`, `FN`.
    """
    merged = merge_reco(reco, interactions)
    confusion_df = calc_confusions(merged, k)
    return confusion_df