| """Implements the feature tranformers of the VAEP framework.""" |
|
|
| from functools import wraps |
| from typing import Any, Callable, Union, no_type_check |
|
|
| import numpy as np |
| import pandas as pd |
| from pandera.typing import DataFrame |
|
|
| import socceraction.spadl.config as spadlcfg |
| from socceraction.atomic.spadl import AtomicSPADLSchema |
| from socceraction.spadl.schema import SPADLSchema |
|
|
| SPADLActions = DataFrame[SPADLSchema] |
| Actions = Union[DataFrame[SPADLSchema], DataFrame[AtomicSPADLSchema]] |
| GameStates = list[Actions] |
| Features = DataFrame[Any] |
| FeatureTransfomer = Callable[[GameStates], Features] |
|
|
|
|
| def feature_column_names(fs: list[FeatureTransfomer], nb_prev_actions: int = 3) -> list[str]: |
| """Return the names of the features generated by a list of transformers. |
| |
| Parameters |
| ---------- |
| fs : list(callable) |
| A list of feature transformers. |
| nb_prev_actions : int, default=3 # noqa: DAR103 |
| The number of previous actions included in the game state. |
| |
| Returns |
| ------- |
| list(str) |
| The name of each generated feature. |
| """ |
| spadlcolumns = [ |
| "game_id", |
| "original_event_id", |
| "action_id", |
| "period_id", |
| "time_seconds", |
| "team_id", |
| "player_id", |
| "start_x", |
| "start_y", |
| "end_x", |
| "end_y", |
| "result_id", |
| "result_name", |
| "bodypart_id", |
| "bodypart_name", |
| "type_id", |
| "type_name", |
| ] |
| dummy_actions = pd.DataFrame(np.zeros((10, len(spadlcolumns))), columns=spadlcolumns) |
| for c in spadlcolumns: |
| if "name" in c: |
| dummy_actions[c] = dummy_actions[c].astype(str) |
| gs = gamestates(dummy_actions, nb_prev_actions) |
| return list(pd.concat([f(gs) for f in fs], axis=1).columns.values) |
|
|
|
|
| def gamestates(actions: Actions, nb_prev_actions: int = 3) -> GameStates: |
| r"""Convert a dataframe of actions to gamestates. |
| |
| Each gamestate is represented as the <nb_prev_actions> previous actions. |
| |
| The list of gamestates is internally represented as a list of actions |
| dataframes :math:`[a_0,a_1,\ldots]` where each row in the a_i dataframe contains the |
| previous action of the action in the same row in the :math:`a_{i-1}` dataframe. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| A DataFrame with the actions of a game. |
| nb_prev_actions : int, default=3 # noqa: DAR103 |
| The number of previous actions included in the game state. |
| |
| Raises |
| ------ |
| ValueError |
| If the number of actions is smaller 1. |
| |
| Returns |
| ------- |
| GameStates |
| The <nb_prev_actions> previous actions for each action. |
| """ |
| if nb_prev_actions < 1: |
| raise ValueError("The game state should include at least one preceding action.") |
| states = [actions] |
| for i in range(1, nb_prev_actions): |
| prev_actions = actions.groupby(["game_id", "period_id"], sort=False, as_index=False).apply( |
| lambda x: x.shift(i, fill_value=float("nan")).fillna(x.iloc[0]) |
| ) |
| prev_actions.index = actions.index.copy() |
| states.append(prev_actions) |
| return states |
|
|
|
|
| def play_left_to_right(gamestates: GameStates, home_team_id: int) -> GameStates: |
| """Perform all actions in a gamestate in the same playing direction. |
| |
| This changes the start and end location of each action in a gamestate, |
| such that all actions are performed as if the team that performs the first |
| action in the gamestate plays from left to right. |
| |
| Parameters |
| ---------- |
| gamestates : GameStates |
| The game states of a game. |
| home_team_id : int |
| The ID of the home team. |
| |
| Returns |
| ------- |
| GameStates |
| The game states with all actions performed left to right. |
| |
| See Also |
| -------- |
| socceraction.vaep.features.play_left_to_right : For transforming actions. |
| """ |
| a0 = gamestates[0] |
| away_idx = a0.team_id != home_team_id |
| for actions in gamestates: |
| for col in ["start_x", "end_x"]: |
| actions.loc[away_idx, col] = spadlcfg.field_length - actions[away_idx][col].values |
| for col in ["start_y", "end_y"]: |
| actions.loc[away_idx, col] = spadlcfg.field_width - actions[away_idx][col].values |
| return gamestates |
|
|
|
|
| @no_type_check |
| def simple(actionfn: Callable) -> FeatureTransfomer: |
| """Make a function decorator to apply actionfeatures to game states. |
| |
| Parameters |
| ---------- |
| actionfn : Callable |
| A feature transformer that operates on actions. |
| |
| Returns |
| ------- |
| FeatureTransfomer |
| A feature transformer that operates on game states. |
| """ |
|
|
| @wraps(actionfn) |
| def _wrapper(gamestates: list[Actions]) -> pd.DataFrame: |
| if not isinstance(gamestates, (list,)): |
| gamestates = [gamestates] |
| X = [] |
| for i, a in enumerate(gamestates): |
| Xi = actionfn(a) |
| Xi.columns = [c + "_a" + str(i) for c in Xi.columns] |
| X.append(Xi) |
| return pd.concat(X, axis=1) |
|
|
| return _wrapper |
|
|
|
|
| |
|
|
|
|
| @simple |
| def actiontype(actions: Actions) -> Features: |
| """Get the type of each action. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'type_id' of each action. |
| """ |
| X = pd.DataFrame(index=actions.index) |
| X["actiontype"] = pd.Categorical( |
| actions["type_id"].replace(spadlcfg.actiontypes_df().type_name.to_dict()), |
| categories=spadlcfg.actiontypes, |
| ordered=False, |
| ) |
| return X |
|
|
|
|
| @simple |
| def actiontype_onehot(actions: SPADLActions) -> Features: |
| """Get the one-hot-encoded type of each action. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| A one-hot encoding of each action's type. |
| """ |
| X = {} |
| for type_id, type_name in enumerate(spadlcfg.actiontypes): |
| col = "actiontype_" + type_name |
| X[col] = actions["type_id"] == type_id |
| return pd.DataFrame(X, index=actions.index) |
|
|
|
|
| @simple |
| def result(actions: SPADLActions) -> Features: |
| """Get the result of each action. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'result_id' of each action. |
| """ |
| X = pd.DataFrame(index=actions.index) |
| X["result"] = pd.Categorical( |
| actions["result_id"].replace(spadlcfg.results_df().result_name.to_dict()), |
| categories=spadlcfg.results, |
| ordered=False, |
| ) |
| return X |
|
|
|
|
| @simple |
| def result_onehot(actions: SPADLActions) -> Features: |
| """Get the one-hot-encode result of each action. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The one-hot encoding of each action's result. |
| """ |
| X = {} |
| for result_id, result_name in enumerate(spadlcfg.results): |
| col = "result_" + result_name |
| X[col] = actions["result_id"] == result_id |
| return pd.DataFrame(X, index=actions.index) |
|
|
|
|
| @simple |
| def actiontype_result_onehot(actions: SPADLActions) -> Features: |
| """Get a one-hot encoding of the combination between the type and result of each action. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The one-hot encoding of each action's type and result. |
| """ |
| res = result_onehot.__wrapped__(actions) |
| tys = actiontype_onehot.__wrapped__(actions) |
| df = {} |
| for tyscol in list(tys.columns): |
| for rescol in list(res.columns): |
| df[tyscol + "_" + rescol] = tys[tyscol] & res[rescol] |
| return pd.DataFrame(df, index=actions.index) |
|
|
|
|
| @simple |
| def bodypart(actions: Actions) -> Features: |
| """Get the body part used to perform each action. |
| |
| This feature generator does not distinguish between the left and right foot. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'bodypart_id' of each action. |
| |
| See Also |
| -------- |
| bodypart_detailed : |
| An alternative version that splits between the left and right foot. |
| """ |
| X = pd.DataFrame(index=actions.index) |
| foot_id = spadlcfg.bodyparts.index("foot") |
| left_foot_id = spadlcfg.bodyparts.index("foot_left") |
| right_foot_id = spadlcfg.bodyparts.index("foot_right") |
| X["bodypart"] = pd.Categorical( |
| actions["bodypart_id"] |
| .replace([left_foot_id, right_foot_id], foot_id) |
| .replace(spadlcfg.bodyparts_df().bodypart_name.to_dict()), |
| categories=["foot", "head", "other", "head/other"], |
| ordered=False, |
| ) |
| return X |
|
|
|
|
| @simple |
| def bodypart_detailed(actions: Actions) -> Features: |
| """Get the body part with split by foot used to perform each action. |
| |
| This feature generator distinguishes between the left and right foot, if |
| supported by the dataprovider. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'bodypart_id' of each action. |
| |
| See Also |
| -------- |
| bodypart : |
| An alternative version that does not split between the left and right foot. |
| """ |
| X = pd.DataFrame(index=actions.index) |
| X["bodypart"] = pd.Categorical( |
| actions["bodypart_id"].replace(spadlcfg.bodyparts_df().bodypart_name.to_dict()), |
| categories=spadlcfg.bodyparts, |
| ordered=False, |
| ) |
| return X |
|
|
|
|
| @simple |
| def bodypart_onehot(actions: Actions) -> Features: |
| """Get the one-hot-encoded bodypart of each action. |
| |
| This feature generator does not distinguish between the left and right foot. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The one-hot encoding of each action's bodypart. |
| |
| See Also |
| -------- |
| bodypart_detailed_onehot : |
| An alternative version that splits between the left and right foot. |
| """ |
| X = {} |
| for bodypart_id, bodypart_name in enumerate(spadlcfg.bodyparts): |
| if bodypart_name in ("foot_left", "foot_right"): |
| continue |
| col = "bodypart_" + bodypart_name |
| if bodypart_name == "foot": |
| foot_id = spadlcfg.bodyparts.index("foot") |
| left_foot_id = spadlcfg.bodyparts.index("foot_left") |
| right_foot_id = spadlcfg.bodyparts.index("foot_right") |
| X[col] = actions["bodypart_id"].isin([foot_id, left_foot_id, right_foot_id]) |
| elif bodypart_name == "head/other": |
| head_id = spadlcfg.bodyparts.index("head") |
| other_id = spadlcfg.bodyparts.index("other") |
| head_other_id = spadlcfg.bodyparts.index("head/other") |
| X[col] = actions["bodypart_id"].isin([head_id, other_id, head_other_id]) |
| else: |
| X[col] = actions["bodypart_id"] == bodypart_id |
| return pd.DataFrame(X, index=actions.index) |
|
|
|
|
| @simple |
| def bodypart_detailed_onehot(actions: Actions) -> Features: |
| """Get the one-hot-encoded bodypart with split by foot of each action. |
| |
| This feature generator distinguishes between the left and right foot, if |
| supported by the dataprovider. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The one-hot encoding of each action's bodypart. |
| |
| See Also |
| -------- |
| bodypart_onehot : |
| An alternative version that does not split between the left and right foot. |
| """ |
| X = {} |
| for bodypart_id, bodypart_name in enumerate(spadlcfg.bodyparts): |
| col = "bodypart_" + bodypart_name |
| if bodypart_name == "foot": |
| foot_id = spadlcfg.bodyparts.index("foot") |
| left_foot_id = spadlcfg.bodyparts.index("foot_left") |
| right_foot_id = spadlcfg.bodyparts.index("foot_right") |
| X[col] = actions["bodypart_id"].isin([foot_id, left_foot_id, right_foot_id]) |
| elif bodypart_name == "head/other": |
| head_id = spadlcfg.bodyparts.index("head") |
| other_id = spadlcfg.bodyparts.index("other") |
| head_other_id = spadlcfg.bodyparts.index("head/other") |
| X[col] = actions["bodypart_id"].isin([head_id, other_id, head_other_id]) |
| else: |
| X[col] = actions["bodypart_id"] == bodypart_id |
| return pd.DataFrame(X, index=actions.index) |
|
|
|
|
| @simple |
| def time(actions: Actions) -> Features: |
| """Get the time when each action was performed. |
| |
| This generates the following features: |
| :period_id: |
| The ID of the period. |
| :time_seconds: |
| Seconds since the start of the period. |
| :time_seconds_overall: |
| Seconds since the start of the game. Stoppage time during previous |
| periods is ignored. |
| |
| Parameters |
| ---------- |
| actions : Actions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'period_id', 'time_seconds' and 'time_seconds_overall' when each |
| action was performed. |
| """ |
| match_time_at_period_start = {1: 0, 2: 45, 3: 90, 4: 105, 5: 120} |
| timedf = actions[["period_id", "time_seconds"]].copy() |
| timedf["time_seconds_overall"] = ( |
| timedf.period_id.map(match_time_at_period_start) * 60 |
| ) + timedf.time_seconds |
| return timedf |
|
|
|
|
| @simple |
| def startlocation(actions: SPADLActions) -> Features: |
| """Get the location where each action started. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'start_x' and 'start_y' location of each action. |
| """ |
| return actions[["start_x", "start_y"]] |
|
|
|
|
| @simple |
| def endlocation(actions: SPADLActions) -> Features: |
| """Get the location where each action ended. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'end_x' and 'end_y' location of each action. |
| """ |
| return actions[["end_x", "end_y"]] |
|
|
|
|
| _goal_x: float = spadlcfg.field_length |
| _goal_y: float = spadlcfg.field_width / 2 |
|
|
|
|
| @simple |
| def startpolar(actions: SPADLActions) -> Features: |
| """Get the polar coordinates of each action's start location. |
| |
| The center of the opponent's goal is used as the origin. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'start_dist_to_goal' and 'start_angle_to_goal' of each action. |
| """ |
| polardf = pd.DataFrame(index=actions.index) |
| dx = (_goal_x - actions["start_x"]).abs().values |
| dy = (_goal_y - actions["start_y"]).abs().values |
| polardf["start_dist_to_goal"] = np.sqrt(dx**2 + dy**2) |
| with np.errstate(divide="ignore", invalid="ignore"): |
| polardf["start_angle_to_goal"] = np.nan_to_num(np.arctan(dy / dx)) |
| return polardf |
|
|
|
|
| @simple |
| def endpolar(actions: SPADLActions) -> Features: |
| """Get the polar coordinates of each action's end location. |
| |
| The center of the opponent's goal is used as the origin. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'end_dist_to_goal' and 'end_angle_to_goal' of each action. |
| """ |
| polardf = pd.DataFrame(index=actions.index) |
| dx = (_goal_x - actions["end_x"]).abs().values |
| dy = (_goal_y - actions["end_y"]).abs().values |
| polardf["end_dist_to_goal"] = np.sqrt(dx**2 + dy**2) |
| with np.errstate(divide="ignore", invalid="ignore"): |
| polardf["end_angle_to_goal"] = np.nan_to_num(np.arctan(dy / dx)) |
| return polardf |
|
|
|
|
| @simple |
| def movement(actions: SPADLActions) -> Features: |
| """Get the distance covered by each action. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The horizontal ('dx'), vertical ('dy') and total ('movement') distance |
| covered by each action. |
| """ |
| mov = pd.DataFrame(index=actions.index) |
| mov["dx"] = actions.end_x - actions.start_x |
| mov["dy"] = actions.end_y - actions.start_y |
| mov["movement"] = np.sqrt(mov.dx**2 + mov.dy**2) |
| return mov |
|
|
|
|
| @simple |
| def player_possession_time(actions: SPADLActions) -> Features: |
| """Get the time (sec) a player was in ball possession before attempting the action. |
| |
| We only look at the dribble preceding the action and reset the possession |
| time after a defensive interception attempt or a take-on. |
| |
| Parameters |
| ---------- |
| actions : SPADLActions |
| The actions of a game. |
| |
| Returns |
| ------- |
| Features |
| The 'player_possession_time' of each action. |
| """ |
| cur_action = actions[["period_id", "time_seconds", "player_id", "type_id"]] |
| prev_action = actions.copy().shift(1)[["period_id", "time_seconds", "player_id", "type_id"]] |
| df = cur_action.join(prev_action, rsuffix="_prev") |
| same_player = df.player_id == df.player_id_prev |
| same_period = df.period_id == df.period_id_prev |
| prev_dribble = df.type_id_prev == spadlcfg.actiontypes.index("dribble") |
| mask = same_period & same_player & prev_dribble |
| df.loc[mask, "player_possession_time"] = ( |
| df.loc[mask, "time_seconds"] - df.loc[mask, "time_seconds_prev"] |
| ) |
| return df[["player_possession_time"]].fillna(0.0) |
|
|
|
|
| |
|
|
|
|
| def team(gamestates: GameStates) -> Features: |
| """Check whether the possession changed during the game state. |
| |
| For each action in the game state, True if the team that performed the |
| action is the same team that performed the last action of the game state; |
| otherwise False. |
| |
| Parameters |
| ---------- |
| gamestates : GameStates |
| The game states of a game. |
| |
| Returns |
| ------- |
| Features |
| A dataframe with a column 'team_ai' for each <nb_prev_actions> indicating |
| whether the team that performed action a0 is in possession. |
| """ |
| a0 = gamestates[0] |
| teamdf = pd.DataFrame(index=a0.index) |
| for i, a in enumerate(gamestates[1:]): |
| teamdf["team_" + (str(i + 1))] = a.team_id == a0.team_id |
| return teamdf |
|
|
|
|
| def time_delta(gamestates: GameStates) -> Features: |
| """Get the number of seconds between the last and previous actions. |
| |
| Parameters |
| ---------- |
| gamestates : GameStates |
| The game states of a game. |
| |
| Returns |
| ------- |
| Features |
| A dataframe with a column 'time_delta_i' for each <nb_prev_actions> |
| containing the number of seconds between action ai and action a0. |
| """ |
| a0 = gamestates[0] |
| dt = pd.DataFrame(index=a0.index) |
| for i, a in enumerate(gamestates[1:]): |
| dt["time_delta_" + (str(i + 1))] = a0.time_seconds - a.time_seconds |
| return dt |
|
|
|
|
| def space_delta(gamestates: GameStates) -> Features: |
| """Get the distance covered between the last and previous actions. |
| |
| Parameters |
| ---------- |
| gamestates : GameStates |
| The gamestates of a game. |
| |
| Returns |
| ------- |
| Features |
| A dataframe with a column for the horizontal ('dx_a0i'), vertical |
| ('dy_a0i') and total ('mov_a0i') distance covered between each |
| <nb_prev_actions> action ai and action a0. |
| """ |
| a0 = gamestates[0] |
| spaced = pd.DataFrame(index=a0.index) |
| for i, a in enumerate(gamestates[1:]): |
| dx = a.end_x - a0.start_x |
| spaced["dx_a0" + (str(i + 1))] = dx |
| dy = a.end_y - a0.start_y |
| spaced["dy_a0" + (str(i + 1))] = dy |
| spaced["mov_a0" + (str(i + 1))] = np.sqrt(dx**2 + dy**2) |
| return spaced |
|
|
|
|
| def speed(gamestates: GameStates) -> Features: |
| """Get the speed at which the ball moved during the previous actions. |
| |
| Parameters |
| ---------- |
| gamestates : GameStates |
| The game states of a game. |
| |
| Returns |
| ------- |
| Features |
| A dataframe with columns 'speedx_a0i', 'speedy_a0i', 'speed_a0i' |
| for each <nb_prev_actions> containing the ball speed in m/s between |
| action ai and action a0. |
| """ |
| a0 = gamestates[0] |
| speed = pd.DataFrame(index=a0.index) |
| for i, a in enumerate(gamestates[1:]): |
| dx = a.end_x - a0.start_x |
| dy = a.end_y - a0.start_y |
| dt = a0.time_seconds - a.time_seconds |
| dt[dt <= 0] = 1e-6 |
| speed["speedx_a0" + (str(i + 1))] = dx.abs() / dt |
| speed["speedy_a0" + (str(i + 1))] = dy.abs() / dt |
| speed["speed_a0" + (str(i + 1))] = np.sqrt(dx**2 + dy**2) / dt |
| return speed |
|
|
|
|
| |
|
|
|
|
| def goalscore(gamestates: GameStates) -> Features: |
| """Get the number of goals scored by each team after the action. |
| |
| Parameters |
| ---------- |
| gamestates : GameStates |
| The gamestates of a game. |
| |
| Returns |
| ------- |
| Features |
| The number of goals scored by the team performing the last action of the |
| game state ('goalscore_team'), by the opponent ('goalscore_opponent'), |
| and the goal difference between both teams ('goalscore_diff'). |
| """ |
| actions = gamestates[0] |
| teamA = actions["team_id"].values[0] |
| goals = actions["type_name"].str.contains("shot") & ( |
| actions["result_id"] == spadlcfg.results.index("success") |
| ) |
| owngoals = actions["type_name"].str.contains("shot") & ( |
| actions["result_id"] == spadlcfg.results.index("owngoal") |
| ) |
| teamisA = actions["team_id"] == teamA |
| teamisB = ~teamisA |
| goalsteamA = (goals & teamisA) | (owngoals & teamisB) |
| goalsteamB = (goals & teamisB) | (owngoals & teamisA) |
| goalscoreteamA = goalsteamA.cumsum() - goalsteamA |
| goalscoreteamB = goalsteamB.cumsum() - goalsteamB |
|
|
| scoredf = pd.DataFrame(index=actions.index) |
| scoredf["goalscore_team"] = (goalscoreteamA * teamisA) + (goalscoreteamB * teamisB) |
| scoredf["goalscore_opponent"] = (goalscoreteamB * teamisA) + (goalscoreteamA * teamisB) |
| scoredf["goalscore_diff"] = scoredf["goalscore_team"] - scoredf["goalscore_opponent"] |
| return scoredf |
|
|