Source code for rectools.metrics.catalog

#  Copyright 2025 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Catalog statistics recommendations metrics."""

import typing as tp

import attr
import pandas as pd

from rectools import Columns

from .base import Catalog, MetricAtK


[docs]@attr.s
class CatalogCoverage(MetricAtK):
    """
    Count (or share) of items from catalog that is present in recommendations for all users.

    Parameters
    ----------
    k : int
        Number of items at the top of recommendations list that will be used to calculate metric.
    normalize: bool, default ``False``
        Flag, which says whether to normalize metric or not.
    """

    normalize: bool = attr.ib(default=False)

[docs]    def calc(self, reco: pd.DataFrame, catalog: Catalog) -> float:
        """
        Calculate metric value.

        Parameters
        ----------
        reco : pd.DataFrame
            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
        catalog : collection
            Collection of unique item ids that could be used for recommendations.

        Returns
        -------
        float
            Value of metric (aggregated for all users).
        """
        res = reco.loc[reco[Columns.Rank] <= self.k, Columns.Item].nunique()
        if self.normalize:
            return res / len(catalog)
        return res


CatalogMetric = CatalogCoverage


[docs]def calc_catalog_metrics(
    metrics: tp.Dict[str, CatalogMetric],
    reco: pd.DataFrame,
    catalog: Catalog,
) -> tp.Dict[str, float]:
    """
    Calculate metrics of catalog statistics for recommendations.

    Warning: It is not recommended to use this function directly.
    Use `calc_metrics` instead.

    Parameters
    ----------
    metrics : dict(str -> CatalogMetric)
        Dict of metric objects to calculate,
        where key is a metric name and value is a metric object.
    reco : pd.DataFrame
        Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
    catalog : collection
        Collection of unique item ids that could be used for recommendations.

    Returns
    -------
    dict(str->float)
        Dictionary where keys are the same as keys in `metrics`
        and values are metric calculation results.
    """
    return {metric_name: metric.calc(reco, catalog) for metric_name, metric in metrics.items()}