fix(data,protocol): paginazione OHLCV + macd accetta signal param
Run reale phase1-real-002 ha rivelato: 1. Cerbero/Deribit cap ~5000 candele per call. Una richiesta di 2 anni 1h (17500 candele) ritorna troncata. CerberoOHLCVLoader._fetch ora pagina in chunk da 4500 barre, concatena e dedupe. 2. _ind_macd accettava solo (df, fast, slow). Il prompt suggerisce "(indicator macd 12 26 9)" con 3 numeri (fast/slow/signal). Aggiunto signal=9 default e calcolo histogram (macd_line - signal_line). Test suite 122 PASSED, ruff e mypy clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,16 +19,15 @@ the three plausible shapes (object-of-records under ``candles``/``data``/
|
|||||||
``result``/``ohlcv``/``klines``/``bars``, array-of-arrays ccxt-style, or
|
``result``/``ohlcv``/``klines``/``bars``, array-of-arrays ccxt-style, or
|
||||||
a raw list at the top level) and raises a clear error if none matches.
|
a raw list at the top level) and raises a clear error if none matches.
|
||||||
|
|
||||||
Pagination is NOT yet implemented — Cerbero is assumed to accept the full
|
Cerbero/Deribit applicano un cap soft di ~5000 candele per call: il
|
||||||
date range and page internally. If a future live call shows a cap (e.g.
|
loader pagina internamente in chunk da 4500 barre, concatena e dedupe.
|
||||||
~1000 candles per call), add a chunked fetch in a follow-up.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, ClassVar
|
from typing import Any, ClassVar
|
||||||
|
|
||||||
@@ -73,10 +72,38 @@ class CerberoOHLCVLoader:
|
|||||||
df.to_parquet(cache_file)
|
df.to_parquet(cache_file)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
# Cerbero/Deribit hanno un cap soft di ~5000 candele per call.
|
||||||
|
# Paginiamo in chunk piu' piccoli per intervalli lunghi.
|
||||||
|
_CHUNK_BARS: ClassVar[int] = 4500
|
||||||
|
|
||||||
def _fetch(self, req: OHLCVRequest) -> pd.DataFrame:
|
def _fetch(self, req: OHLCVRequest) -> pd.DataFrame:
|
||||||
args = self._build_args(req)
|
bar_seconds = _timeframe_to_minutes(req.timeframe) * 60
|
||||||
response = self.client.call_tool(req.exchange, "get_historical", args)
|
chunk_seconds = self._CHUNK_BARS * bar_seconds
|
||||||
return self._parse_response(response)
|
chunks: list[pd.DataFrame] = []
|
||||||
|
cursor = req.start
|
||||||
|
while cursor < req.end:
|
||||||
|
chunk_end = min(req.end, cursor + timedelta(seconds=chunk_seconds))
|
||||||
|
chunk_req = OHLCVRequest(
|
||||||
|
symbol=req.symbol, timeframe=req.timeframe,
|
||||||
|
start=cursor, end=chunk_end, exchange=req.exchange,
|
||||||
|
)
|
||||||
|
args = self._build_args(chunk_req)
|
||||||
|
response = self.client.call_tool(req.exchange, "get_historical", args)
|
||||||
|
chunk = self._parse_response(response)
|
||||||
|
if not chunk.empty:
|
||||||
|
chunks.append(chunk)
|
||||||
|
last_ts = chunk.index[-1].to_pydatetime()
|
||||||
|
# avanza di un bar oltre l'ultimo per evitare overlap
|
||||||
|
cursor = max(last_ts + timedelta(seconds=bar_seconds), chunk_end)
|
||||||
|
else:
|
||||||
|
cursor = chunk_end
|
||||||
|
if not chunks:
|
||||||
|
return pd.DataFrame(columns=self._COLUMNS).set_index(
|
||||||
|
pd.DatetimeIndex([], tz="UTC", name="ts")
|
||||||
|
)
|
||||||
|
df = pd.concat(chunks)
|
||||||
|
df = df[~df.index.duplicated(keep="first")].sort_index()
|
||||||
|
return df
|
||||||
|
|
||||||
def _build_args(self, req: OHLCVRequest) -> dict[str, Any]:
|
def _build_args(self, req: OHLCVRequest) -> dict[str, Any]:
|
||||||
if req.exchange == "deribit":
|
if req.exchange == "deribit":
|
||||||
|
|||||||
@@ -77,8 +77,12 @@ def _ind_realized_vol(df: pd.DataFrame, window: int) -> pd.Series:
|
|||||||
return _realized_vol(df["close"], window)
|
return _realized_vol(df["close"], window)
|
||||||
|
|
||||||
|
|
||||||
def _ind_macd(df: pd.DataFrame, fast: int = 12, slow: int = 26) -> pd.Series:
|
def _ind_macd(
|
||||||
return _sma(df["close"], fast) - _sma(df["close"], slow)
|
df: pd.DataFrame, fast: int = 12, slow: int = 26, signal: int = 9,
|
||||||
|
) -> pd.Series:
|
||||||
|
macd_line = _sma(df["close"], fast) - _sma(df["close"], slow)
|
||||||
|
signal_line = _sma(macd_line, signal)
|
||||||
|
return macd_line - signal_line
|
||||||
|
|
||||||
|
|
||||||
# Annotated as ``dict[str, Any]`` deliberately: each indicator has its own
|
# Annotated as ``dict[str, Any]`` deliberately: each indicator has its own
|
||||||
|
|||||||
Reference in New Issue
Block a user