refactor(protocol): swap S-expression grammar for strict JSON Schema

Sostituisce la grammatica S-expression con uno schema JSON stretto. La grammatica S-expression falliva il parsing nel 64% delle generazioni del modello Qwen3-235B sul run reale; JSON e' nativo per gli LLM moderni e si parsa con json.loads. Cambiamenti principali: - grammar.py: costanti rinominate LOGICAL_OPS / COMPARATOR_OPS / CROSSOVER_OPS / ACTION_VALUES / KIND_VALUES. - parser.py: nuovo AST a dataclass tipizzato (OpNode, IndicatorNode, FeatureNode, LiteralNode, Rule, Strategy); parse_strategy ora consuma JSON tramite json.loads. - validator.py: walk dispatchato per tipo (isinstance) invece di pattern-matching su 'kind'; arity check su operatori e indicator. - compiler.py: traversal del nuovo AST tipizzato, dispatch per isinstance; logica indicator/feature/literal invariata. - hypothesis.py: prompt SYSTEM riscritto con esempi JSON e vincoli espliciti su no-nesting; estrazione via fence ```json``` + fallback brace-balanced. - __init__.py: re-export pubblico delle entita' del protocollo. - Tutti i test (parser, validator, compiler, hypothesis_agent, falsification, adversarial, e2e, smoke_run) migrati a JSON. - Rimossa dipendenza sexpdata da pyproject.toml + uv.lock. Test: 135 passed (era 122; aggiunti casi parser/validator). ruff + mypy strict clean. Smoke run end-to-end OK. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 21:17:26 +02:00
parent df76906505
commit 44eb6436c1
16 changed files with 1082 additions and 392 deletions
@@ -1,96 +1,203 @@
+"""JSON-based parser per la strategia di trading (Phase 1).
+
+L'AST Ã¨ una piccola gerarchia di dataclass:
+
+* :class:`Strategy` Ã¨ il top-level (lista di :class:`Rule`).
+* :class:`Rule` accoppia una condizione (Node) ad un'azione (str).
+* :class:`Node` Ã¨ un'unione: nodi operatore (:class:`OpNode`) e nodi leaf
+  (:class:`IndicatorNode`, :class:`FeatureNode`, :class:`LiteralNode`).
+
+Convenzione di shape sui dict in input:
+
+* Nodi operatore: ``{"op": "<name>", "args": [<node>, ...]}``.
+* Nodi indicator: ``{"kind": "indicator", "name": "<name>", "params": [<num>, ...]}``.
+* Nodi feature:   ``{"kind": "feature",   "name": "<name>"}``.
+* Nodi literal:   ``{"kind": "literal",   "value": <number>}``.
+"""
+
 from __future__ import annotations

+import json
 from dataclasses import dataclass, field
 from typing import Any

-import sexpdata  # type: ignore[import-untyped]
-
-from .grammar import ACTION_VERBS, VERBS
+from .grammar import (
+    ACTION_VALUES,
+    ALL_OPS,
+)


 class ParseError(Exception):
-    """Raised when an S-expression strategy cannot be parsed."""
+    """Raised when a JSON strategy cannot be parsed into a valid AST."""
+
+
+# ---------------------------------------------------------------------------
+# Dataclass AST
+# ---------------------------------------------------------------------------


@dataclass
-class Node:
-    kind: str
-    args: list[Any] = field(default_factory=list)
+class OpNode:
+    """Operator node: logical / comparator / crossover."""
+
+    op: str
+    args: list[Node] = field(default_factory=list)
+
+
+@dataclass
+class IndicatorNode:
+    """Leaf: indicatore tecnico calcolato sul dataframe OHLCV."""
+
+    name: str
+    params: list[float] = field(default_factory=list)
+
+
+@dataclass
+class FeatureNode:
+    """Leaf: colonna OHLCV (open/high/low/close/volume)."""
+
+    name: str
+
+
+@dataclass
+class LiteralNode:
+    """Leaf: costante numerica."""
+
+    value: float
+
+
+Node = OpNode | IndicatorNode | FeatureNode | LiteralNode


@dataclass
 class Rule:
-    kind: str  # always "when"
    condition: Node
-    action: Node
+    action: str


@dataclass
 class Strategy:
-    kind: str  # always "strategy"
    rules: list[Rule]


-def _to_node(token: Any) -> Node | float | int | str:
-    """Convert a sexpdata token tree into a Node (or scalar leaf)."""
-    if isinstance(token, sexpdata.Symbol):
-        name = str(token.value())
-        # Bare symbols inside expressions (e.g. `rsi` in (indicator rsi 14))
-        # are kept as Node-with-no-args so callers can introspect uniformly.
-        return Node(kind=name, args=[])
-    if isinstance(token, list):
-        if not token:
-            raise ParseError("Empty s-expression")
-        head = token[0]
-        if not isinstance(head, sexpdata.Symbol):
-            raise ParseError(f"Non-symbol head: {head!r}")
-        name = str(head.value())
-        if name not in VERBS:
-            raise ParseError(f"Unknown verb: {name}")
-        return Node(kind=name, args=[_to_node(arg) for arg in token[1:]])
-    # numeric / string literals pass through unchanged
-    return token  # type: ignore[no-any-return]
+# ---------------------------------------------------------------------------
+# Conversione dict -> Node
+# ---------------------------------------------------------------------------
+
+
+def _to_node(obj: Any) -> Node:
+    if not isinstance(obj, dict):
+        raise ParseError(f"Node must be a JSON object, got {type(obj).__name__}")
+
+    has_op = "op" in obj
+    has_kind = "kind" in obj
+    if has_op and has_kind:
+        raise ParseError(
+            "Node cannot define both 'op' and 'kind' (mutually exclusive)"
+        )
+    if not has_op and not has_kind:
+        raise ParseError("Node must define either 'op' or 'kind'")
+
+    if has_op:
+        op = obj["op"]
+        if not isinstance(op, str):
+            raise ParseError(f"'op' must be a string, got {type(op).__name__}")
+        if op not in ALL_OPS:
+            raise ParseError(f"Unknown op: {op!r}")
+        raw_args = obj.get("args")
+        if not isinstance(raw_args, list):
+            raise ParseError(f"Operator '{op}' missing 'args' list")
+        args = [_to_node(a) for a in raw_args]
+        return OpNode(op=op, args=args)
+
+    # leaf node
+    kind = obj["kind"]
+    if not isinstance(kind, str):
+        raise ParseError(f"'kind' must be a string, got {type(kind).__name__}")
+
+    if kind == "indicator":
+        name = obj.get("name")
+        if not isinstance(name, str):
+            raise ParseError("indicator node requires string 'name'")
+        raw_params = obj.get("params", [])
+        if not isinstance(raw_params, list):
+            raise ParseError("indicator 'params' must be a list")
+        params: list[float] = []
+        for p in raw_params:
+            if isinstance(p, bool) or not isinstance(p, (int, float)):
+                raise ParseError(
+                    f"indicator '{name}' params accept only numbers, got {p!r}"
+                )
+            params.append(float(p))
+        return IndicatorNode(name=name, params=params)
+
+    if kind == "feature":
+        name = obj.get("name")
+        if not isinstance(name, str):
+            raise ParseError("feature node requires string 'name'")
+        return FeatureNode(name=name)
+
+    if kind == "literal":
+        if "value" not in obj:
+            raise ParseError("literal node requires 'value'")
+        value = obj["value"]
+        if isinstance(value, bool) or not isinstance(value, (int, float)):
+            raise ParseError(f"literal value must be numeric, got {value!r}")
+        return LiteralNode(value=float(value))
+
+    raise ParseError(f"Unknown leaf kind: {kind!r}")
+
+
+# ---------------------------------------------------------------------------
+# Top-level parser
+# ---------------------------------------------------------------------------


 def parse_strategy(src: str) -> Strategy:
-    """Parse an S-expression strategy string into a Strategy AST.
+    """Parse a JSON strategy string into a :class:`Strategy` AST.

-    The grammar is documented in :mod:`multi_swarm.protocol.grammar` and is
-    intentionally tiny (15 verbs). We delegate raw S-expr lexing to
-    :mod:`sexpdata`, then validate the verb set ourselves.
+    Lo schema atteso Ã¨::
+
+        {
+          "rules": [
+            {"condition": <node>, "action": "<action-string>"},
+            ...
+          ]
+        }
+
+    Raise :class:`ParseError` su JSON malformato o struttura inattesa.
    """
    try:
-        parsed = sexpdata.loads(src)
-    except Exception as e:  # sexpdata raises various exception types
-        raise ParseError(f"sexp parse error: {e}") from e
+        parsed = json.loads(src)
+    except json.JSONDecodeError as e:
+        raise ParseError(f"invalid JSON: {e}") from e

-    if not isinstance(parsed, list) or not parsed:
-        raise ParseError("Top-level must be (strategy ...)")
-    head = parsed[0]
-    if not isinstance(head, sexpdata.Symbol) or str(head.value()) != "strategy":
-        raise ParseError("Top-level must start with 'strategy'")
-
-    raw_rules = parsed[1:]
+    if not isinstance(parsed, dict):
+        raise ParseError("Top-level must be a JSON object with 'rules'")
+    if "rules" not in parsed:
+        raise ParseError("Top-level object must contain 'rules' key")
+    raw_rules = parsed["rules"]
+    if not isinstance(raw_rules, list):
+        raise ParseError("'rules' must be a list")
    if not raw_rules:
        raise ParseError("Strategy must contain at least one rule")

    rules: list[Rule] = []
    for raw in raw_rules:
-        if not isinstance(raw, list) or len(raw) != 3:
-            raise ParseError(f"Rule must be (when <cond> <action>): {raw!r}")
-        head_r = raw[0]
-        if not isinstance(head_r, sexpdata.Symbol) or str(head_r.value()) != "when":
-            raise ParseError(f"Rule must start with 'when': {raw!r}")
-        cond = _to_node(raw[1])
-        action = _to_node(raw[2])
-        if not isinstance(cond, Node):
-            raise ParseError(f"Condition must be a node: {cond!r}")
-        if not isinstance(action, Node):
-            raise ParseError(f"Action must be a node: {action!r}")
-        if action.kind not in ACTION_VERBS:
+        if not isinstance(raw, dict):
+            raise ParseError(f"Rule must be a JSON object, got {raw!r}")
+        if "condition" not in raw or "action" not in raw:
            raise ParseError(
-                f"Action must be one of {sorted(ACTION_VERBS)}, got {action.kind!r}"
+                f"Rule must contain 'condition' and 'action' keys: {raw!r}"
            )
-        rules.append(Rule(kind="when", condition=cond, action=action))
+        action = raw["action"]
+        if not isinstance(action, str):
+            raise ParseError(f"action must be a string, got {action!r}")
+        if action not in ACTION_VALUES:
+            raise ParseError(
+                f"action must be one of {sorted(ACTION_VALUES)}, got {action!r}"
+            )
+        cond = _to_node(raw["condition"])
+        rules.append(Rule(condition=cond, action=action))

-    return Strategy(kind="strategy", rules=rules)
+    return Strategy(rules=rules)