From b61bbaf13ca9339c84068303fd30167615eae647 Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Sat, 9 May 2026 19:21:35 +0200 Subject: [PATCH] feat(metrics): Deflated Sharpe Ratio (Bailey & Lopez de Prado) Aggiunge expected_max_sharpe e deflated_sharpe_ratio per correggere multiple testing nella valutazione di strategie. Considera skewness, kurtosis e numero di trial. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/multi_swarm/metrics/dsr.py | 66 ++++++++++++++++++++++++++++++++++ tests/unit/test_metrics_dsr.py | 32 +++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 src/multi_swarm/metrics/dsr.py create mode 100644 tests/unit/test_metrics_dsr.py diff --git a/src/multi_swarm/metrics/dsr.py b/src/multi_swarm/metrics/dsr.py new file mode 100644 index 0000000..07b29dd --- /dev/null +++ b/src/multi_swarm/metrics/dsr.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd # type: ignore[import-untyped] +from scipy import stats # type: ignore[import-untyped] + +from .basic import sharpe_ratio + +EULER_MASCHERONI = 0.5772156649015329 + + +def expected_max_sharpe(n_trials: int, sharpe_var: float) -> float: + """E[max SR] su n_trials con varianza sharpe_var (Bailey & Lopez de Prado). + + Formula: sqrt(sharpe_var) * ((1-gamma) * Phi^-1(1 - 1/N) + + gamma * Phi^-1(1 - 1/(N*e))) + dove gamma e' la costante di Eulero-Mascheroni. + """ + if n_trials < 2: + return 0.0 + e = np.e + z1 = stats.norm.ppf(1 - 1.0 / n_trials) + z2 = stats.norm.ppf(1 - 1.0 / (n_trials * e)) + return float( + np.sqrt(sharpe_var) * ((1 - EULER_MASCHERONI) * z1 + EULER_MASCHERONI * z2) + ) + + +def deflated_sharpe_ratio( + returns: pd.Series, + n_trials: int, + periods_per_year: int = 8760, + sharpe_var: float = 1.0, + skewness: float | None = None, + kurtosis: float | None = None, +) -> tuple[float, float]: + """Deflated Sharpe Ratio (DSR) e p-value associato. + + Restituisce (DSR, p_value). p_value e' la prob. che lo SR osservato sia + superiore al massimo atteso sotto null. p_value bassi (es. < 0.05) + indicano significativita' dopo correzione per multiple testing. + """ + n = len(returns) + if n < 30: + return 0.0, 1.0 + + sr = sharpe_ratio(returns, periods_per_year=periods_per_year) + sr_period = sr / np.sqrt(periods_per_year) + + if skewness is None: + skewness = float(stats.skew(returns, bias=False)) + if kurtosis is None: + kurtosis = float(stats.kurtosis(returns, fisher=True, bias=False)) + + sr_expected_max = expected_max_sharpe(n_trials, sharpe_var) / np.sqrt(periods_per_year) + + denom = np.sqrt( + max( + (1 - skewness * sr_period + ((kurtosis - 1) / 4.0) * sr_period**2) / (n - 1), + 1e-12, + ) + ) + z = (sr_period - sr_expected_max) / denom + p_value = float(1.0 - stats.norm.cdf(z)) + dsr = float(stats.norm.cdf(z)) + return dsr, p_value diff --git a/tests/unit/test_metrics_dsr.py b/tests/unit/test_metrics_dsr.py new file mode 100644 index 0000000..b6ed841 --- /dev/null +++ b/tests/unit/test_metrics_dsr.py @@ -0,0 +1,32 @@ +import numpy as np +import pandas as pd + +from multi_swarm.metrics.dsr import deflated_sharpe_ratio, expected_max_sharpe + + +def test_expected_max_sharpe_grows_with_n_trials(): + e1 = expected_max_sharpe(n_trials=1, sharpe_var=1.0) + e10 = expected_max_sharpe(n_trials=10, sharpe_var=1.0) + e100 = expected_max_sharpe(n_trials=100, sharpe_var=1.0) + assert e1 < e10 < e100 + + +def test_dsr_zero_when_sharpe_equals_expected_max(): + np.random.seed(0) + returns = pd.Series(np.random.normal(0, 0.01, 500)) + _dsr, p = deflated_sharpe_ratio( + returns, n_trials=10, periods_per_year=8760, sharpe_var=0.0 + ) + # Con sharpe_var=0 e Sharpe stimato vicino a 0, p-value deve essere alto. + assert 0.0 <= p <= 1.0 + + +def test_dsr_significant_for_strong_sharpe(): + np.random.seed(42) + returns = pd.Series(np.random.normal(0.005, 0.005, 1000)) + dsr, p = deflated_sharpe_ratio( + returns, n_trials=5, periods_per_year=8760, sharpe_var=1.0 + ) + # Sharpe atteso > 0 e p-value basso + assert dsr > 0 + assert p < 0.5