Source code for rectools.models.nn.transformers.utils

#  Copyright 2025 MTS (Mobile Telesystems)
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import typing as tp

import numpy as np
import pandas as pd

from rectools import Columns, ExternalIds


[docs]def leave_one_out_mask(interactions: pd.DataFrame, val_users: tp.Union[ExternalIds, int, None] = None) -> np.ndarray: """ Create a boolean mask for leave-one-out validation by selecting the last interaction per user. Identifies the most recent interaction for specified validation users based on timestamp ranking. Users can be filtered using `val_users` parameter which supports slicing or explicit user IDs. Parameters ---------- interactions : pd.DataFrame User-item interactions data with at least three columns: Columns.User, Columns.Item and Columns.Datetime val_users : Optional[Union[ExternalIds, int]], default ``None`` Validation user filter. Can be: - None: use all users - int: randomly sample N users from unique user list without replacement - array-like: explicit list of user IDs to include Returns ------- np.ndarray Boolean array where True indicates the interaction is the last one for its user in the validation set. """ groups = interactions.groupby(Columns.User) time_order = groups[Columns.Datetime].rank(method="first", ascending=True).astype(int) n_interactions = groups.transform("size").astype(int) inv_ranks = n_interactions - time_order last_interact_mask = inv_ranks == 0 if isinstance(val_users, int): users = interactions[Columns.User].unique() val_users = np.random.choice(users, size=val_users, replace=False) elif val_users is None: return last_interact_mask.values mask = interactions[Columns.User].isin(val_users) & last_interact_mask return mask.values