feat(metrics): Deflated Sharpe Ratio (Bailey & Lopez de Prado)

Aggiunge expected_max_sharpe e deflated_sharpe_ratio per correggere
multiple testing nella valutazione di strategie. Considera skewness,
kurtosis e numero di trial.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-09 19:21:35 +02:00
parent 2687ce7dd2
commit b61bbaf13c
2 changed files with 98 additions and 0 deletions
+66
View File
@@ -0,0 +1,66 @@
from __future__ import annotations
import numpy as np
import pandas as pd # type: ignore[import-untyped]
from scipy import stats # type: ignore[import-untyped]
from .basic import sharpe_ratio
EULER_MASCHERONI = 0.5772156649015329
def expected_max_sharpe(n_trials: int, sharpe_var: float) -> float:
"""E[max SR] su n_trials con varianza sharpe_var (Bailey & Lopez de Prado).
Formula: sqrt(sharpe_var) * ((1-gamma) * Phi^-1(1 - 1/N)
+ gamma * Phi^-1(1 - 1/(N*e)))
dove gamma e' la costante di Eulero-Mascheroni.
"""
if n_trials < 2:
return 0.0
e = np.e
z1 = stats.norm.ppf(1 - 1.0 / n_trials)
z2 = stats.norm.ppf(1 - 1.0 / (n_trials * e))
return float(
np.sqrt(sharpe_var) * ((1 - EULER_MASCHERONI) * z1 + EULER_MASCHERONI * z2)
)
def deflated_sharpe_ratio(
returns: pd.Series,
n_trials: int,
periods_per_year: int = 8760,
sharpe_var: float = 1.0,
skewness: float | None = None,
kurtosis: float | None = None,
) -> tuple[float, float]:
"""Deflated Sharpe Ratio (DSR) e p-value associato.
Restituisce (DSR, p_value). p_value e' la prob. che lo SR osservato sia
superiore al massimo atteso sotto null. p_value bassi (es. < 0.05)
indicano significativita' dopo correzione per multiple testing.
"""
n = len(returns)
if n < 30:
return 0.0, 1.0
sr = sharpe_ratio(returns, periods_per_year=periods_per_year)
sr_period = sr / np.sqrt(periods_per_year)
if skewness is None:
skewness = float(stats.skew(returns, bias=False))
if kurtosis is None:
kurtosis = float(stats.kurtosis(returns, fisher=True, bias=False))
sr_expected_max = expected_max_sharpe(n_trials, sharpe_var) / np.sqrt(periods_per_year)
denom = np.sqrt(
max(
(1 - skewness * sr_period + ((kurtosis - 1) / 4.0) * sr_period**2) / (n - 1),
1e-12,
)
)
z = (sr_period - sr_expected_max) / denom
p_value = float(1.0 - stats.norm.cdf(z))
dsr = float(stats.norm.cdf(z))
return dsr, p_value