import numpy as np
import pandas as pd
from footix.data_io.data_reader import EloDataReader
from footix.models.team_elo import EloTeam
from footix.utils.typing import ProbaResult
# TODO: A dataclass for agnostic_probs?
[docs]
class EloDavidson:
def __init__(
self,
n_teams: int,
k0: int,
lambd: float,
sigma: int,
agnostic_probs: ProbaResult,
**kwargs,
):
self.n_teams = n_teams
self.check_probas(agnostic_probs)
agn_home_proba, agn_draw_proba, agn_away_proba = agnostic_probs
self.kappa = self.compute_kappa(P_H=agn_home_proba, P_D=agn_draw_proba, P_A=agn_away_proba)
self.eta = self.compute_eta(P_H=agn_home_proba, P_A=agn_away_proba)
self.k0 = k0
self.lamda = lambd
self.sigma = sigma
self.championnat: dict[str, EloTeam] = {}
# TODO: check the game.result warning
[docs]
def fit(self, X_train: pd.DataFrame | EloDataReader):
if isinstance(X_train, pd.DataFrame):
X_train = EloDataReader(df_data=X_train)
clubs = X_train.unique_teams()
if len(clubs) != self.n_teams:
raise ValueError(
"Number of teams in the training dataset is not the same as in the class"
"instanciation"
)
for club in clubs:
self.championnat[club] = EloTeam(club)
for game in X_train:
Home = game.home_team
Away = game.away_team
result = self.correspondance_result(game.result)
gamma = np.abs(game.home_goals - game.away_goals)
K = self.define_k_param(gamma)
self.update_rank(self.championnat[Home], self.championnat[Away], result, K)
[docs]
def reset(self):
self.championnat = {}
[docs]
@staticmethod
def compute_kappa(P_H: float, P_D: float, P_A: float) -> float:
return P_D / np.sqrt(P_H * P_A)
[docs]
@staticmethod
def compute_eta(P_H: float, P_A: float) -> float:
return np.log10(P_H / P_A)
[docs]
@staticmethod
def check_probas(agnostic_probs: ProbaResult) -> None:
if not np.isclose(np.sum(agnostic_probs), b=1.0):
raise ValueError("Probabilities do not sum to one.\n")
[docs]
def define_k_param(self, gamma: int) -> float:
return self.k0 * (1.0 + gamma) ** self.lamda
[docs]
@staticmethod
def correspondance_result(result: str) -> float:
if result not in ["D", "H", "A"]:
raise ValueError("result must be 'H', 'D' or 'A'")
if result == "D":
return 0.5
if result == "H":
return 1.0
return 0.0
[docs]
def estimated_res(self, difference: float) -> float:
denom = 0.5 * difference / self.sigma
return (10**denom + 0.5 * self.kappa) / (10**denom + 10 ** (-denom) + self.kappa)
[docs]
def update_rank(self, home_team: EloTeam, away_team: EloTeam, result: float, k: float) -> None:
diff_rank = home_team.rank - away_team.rank + self.eta * self.sigma
new_rankH = home_team.rank + k * (result - self.estimated_res(diff_rank))
new_rankA = away_team.rank + k * (1.0 - result - self.estimated_res(-diff_rank))
home_team.rank = new_rankH
away_team.rank = new_rankA
def __str__(self):
if hasattr(self, "championnat"):
classement = ""
sorted_championnat = {
k: v for k, v in sorted(self.championnat.items(), key=lambda item: -item[1].rank)
}
for i, k in enumerate(sorted_championnat.keys()):
classement += f"{i+1}. {k} : {sorted_championnat[k].rank} \n"
return classement
else:
return "{}"
[docs]
def predict(self, home_team: str, away_team: str) -> ProbaResult:
return self.compute_proba(self.championnat[home_team], self.championnat[away_team])
[docs]
def proba_w(self, diff: float) -> float:
num = 0.5 * diff / self.sigma
return 10 ** (num) / (10**num + 10 ** (-num) + self.kappa)
[docs]
def proba_d(self, diff: float) -> float:
num = 0.5 * diff / self.sigma
return self.kappa / (10**num + 10 ** (-num) + self.kappa)
[docs]
def compute_proba(self, home_team: EloTeam, away_team: EloTeam) -> ProbaResult:
diff = home_team.rank - away_team.rank
diff = diff + self.eta * self.sigma
probaH = self.proba_w(diff)
probaA = self.proba_w(-diff)
probaDraw = self.proba_d(diff)
return ProbaResult(proba_home=probaH, proba_draw=probaDraw, proba_away=probaA)