diff --git a/src/multi_swarm/dashboard/aquarium.py b/src/multi_swarm/dashboard/aquarium.py index a94ba13..817f13e 100644 --- a/src/multi_swarm/dashboard/aquarium.py +++ b/src/multi_swarm/dashboard/aquarium.py @@ -1,8 +1,9 @@ """Aquarium 2D visualization helpers. -Builds a list of fish records from a merged DataFrame (evaluations + genomes) -and renders a self-contained HTML/JS canvas animation, embeddable in Streamlit -via ``streamlit.components.v1.html``. +Builds fish records (with full genome attributes + ancestor lineage) and +renders a self-contained HTML/JS canvas animation, embeddable in Streamlit +via ``streamlit.components.v1.html``. Includes a click handler that opens +an info panel showing genome details and BFS ancestor levels. """ from __future__ import annotations @@ -25,50 +26,6 @@ STYLE_COLORS: dict[str, str] = { DEFAULT_COLOR: str = "#94a3b8" -def build_fish_dataset(merged: pd.DataFrame, max_fish: int = 30) -> list[dict[str, Any]]: - """Build a list of fish records from a merged evaluations+genomes DataFrame. - - Expects columns: ``genome_id``, ``fitness``, ``cognitive_style``, ``n_trades``, - ``dsr``. Rows with NaN ``fitness`` are dropped. Result is sorted by fitness - descending and capped at ``max_fish`` entries. - """ - if merged.empty: - return [] - - cols_needed = ["genome_id", "fitness", "cognitive_style", "n_trades", "dsr"] - available = [c for c in cols_needed if c in merged.columns] - df = merged[available].copy() - - if "fitness" not in df.columns: - return [] - - df = df.dropna(subset=["fitness"]) - df = df.sort_values("fitness", ascending=False).head(max_fish) - - fish: list[dict[str, Any]] = [] - for _, row in df.iterrows(): - gid = str(row.get("genome_id", "")) - fitness_val = float(row.get("fitness", 0.0)) - if math.isnan(fitness_val): - fitness_val = 0.0 - style_raw = row.get("cognitive_style", None) - style = str(style_raw) if style_raw is not None and not _is_nan(style_raw) else "unknown" - n_trades_raw = row.get("n_trades", 0) - n_trades = int(n_trades_raw) if not _is_nan(n_trades_raw) else 0 - dsr_raw = row.get("dsr", 0.0) - dsr = float(dsr_raw) if not _is_nan(dsr_raw) else 0.0 - fish.append( - { - "id": gid, - "fitness": fitness_val, - "cognitive_style": style, - "n_trades": n_trades, - "dsr": dsr, - } - ) - return fish - - def _is_nan(v: Any) -> bool: try: return bool(pd.isna(v)) @@ -76,17 +33,236 @@ def _is_nan(v: Any) -> bool: return False +def _safe_float(v: Any, default: float = 0.0) -> float: + if v is None or _is_nan(v): + return default + try: + return float(v) + except (TypeError, ValueError): + return default + + +def _safe_int(v: Any, default: int = 0) -> int: + if v is None or _is_nan(v): + return default + try: + return int(v) + except (TypeError, ValueError): + return default + + +def _safe_str(v: Any, default: str = "") -> str: + if v is None or _is_nan(v): + return default + return str(v) + + +def _safe_list(v: Any) -> list[Any]: + if v is None: + return [] + if isinstance(v, list): + return list(v) + # pandas may store python lists in object cells; if it's e.g. a numpy array, + # falling back to list() is fine. NaN scalar is excluded by _is_nan. + if _is_nan(v): + return [] + try: + return list(v) + except TypeError: + return [] + + +def build_lineage_index( + genomes_df: pd.DataFrame, evals_df: pd.DataFrame +) -> dict[str, dict[str, Any]]: + """Build ``{genome_id: attrs}`` for every genome in the run. + + ``genomes_df`` must come from ``genomes_df(repo, run_id)`` (no gen filter): + columns include ``id``, ``generation_idx``, ``system_prompt``, + ``feature_access``, ``temperature``, ``top_p``, ``model_tier``, + ``lookback_window``, ``cognitive_style``, ``parent_ids``, ``generation``. + + ``evals_df`` must come from ``evaluations_df(repo, run_id)``: columns + include ``genome_id``, ``fitness``, ``dsr``, ``sharpe``, ``max_dd``, + ``n_trades``. + """ + if genomes_df.empty: + return {} + + if evals_df is None or evals_df.empty: + merged = genomes_df.copy() + for col in ("fitness", "dsr", "sharpe", "max_dd", "n_trades"): + if col not in merged.columns: + merged[col] = 0.0 if col != "n_trades" else 0 + else: + merged = genomes_df.merge( + evals_df, + left_on="id", + right_on="genome_id", + how="left", + suffixes=("", "_eval"), + ) + + index: dict[str, dict[str, Any]] = {} + for _, row in merged.iterrows(): + gid = _safe_str(row.get("id")) + if not gid: + continue + # ``generation`` is the genome's evolutionary generation (from payload). + # If absent, fall back to ``generation_idx`` (column added by the + # repository). Defensive: both may be missing in edge cases. + gen_val: Any = row.get("generation") + if gen_val is None or _is_nan(gen_val): + gen_val = row.get("generation_idx", 0) + index[gid] = { + "id": gid, + "generation": _safe_int(gen_val, 0), + "fitness": _safe_float(row.get("fitness"), 0.0), + "dsr": _safe_float(row.get("dsr"), 0.0), + "sharpe": _safe_float(row.get("sharpe"), 0.0), + "max_dd": _safe_float(row.get("max_dd"), 0.0), + "n_trades": _safe_int(row.get("n_trades"), 0), + "cognitive_style": _safe_str(row.get("cognitive_style"), ""), + "system_prompt": _safe_str(row.get("system_prompt"), ""), + "temperature": _safe_float(row.get("temperature"), 0.0), + "lookback_window": _safe_int(row.get("lookback_window"), 0), + "feature_access": _safe_list(row.get("feature_access")), + "model_tier": _safe_str(row.get("model_tier"), ""), + "parent_ids": _safe_list(row.get("parent_ids")), + } + return index + + +def trace_ancestors( + genome_id: str, + lineage_index: dict[str, dict[str, Any]], + max_levels: int = 5, +) -> list[list[dict[str, Any]]]: + """BFS over ``parent_ids`` returning levels of ancestors. + + ``levels[0]`` = direct parents, ``levels[1]`` = grandparents, etc. Each + entry is a small dict (no ``system_prompt``, to keep JSON payload light): + ``{id, generation, fitness, cognitive_style}``. Cycles are guarded via a + ``seen`` set; missing parents (not in this run) are stubbed with sentinel + values so the lineage display still renders the relationship. + """ + levels: list[list[dict[str, Any]]] = [] + root = lineage_index.get(genome_id, {}) + current_ids: list[str] = list(root.get("parent_ids", [])) + seen: set[str] = {genome_id} + for _ in range(max_levels): + if not current_ids: + break + level_entries: list[dict[str, Any]] = [] + next_ids: list[str] = [] + for pid in current_ids: + if pid in seen: + continue + seen.add(pid) + entry = lineage_index.get(pid) + if entry is None: + level_entries.append( + { + "id": pid, + "generation": -1, + "fitness": 0.0, + "cognitive_style": "", + } + ) + continue + level_entries.append( + { + "id": entry["id"], + "generation": entry["generation"], + "fitness": entry["fitness"], + "cognitive_style": entry["cognitive_style"], + } + ) + next_ids.extend(entry.get("parent_ids", [])) + if not level_entries: + break + levels.append(level_entries) + current_ids = next_ids + return levels + + +def build_fish_dataset( + active_df: pd.DataFrame, + lineage_index: dict[str, dict[str, Any]] | None = None, + max_lineage_levels: int = 5, +) -> list[dict[str, Any]]: + """Build full fish records for each active genome. + + For every row in ``active_df`` the matching entry in ``lineage_index`` is + looked up by ``genome_id`` (or ``id``) and attached together with the BFS + ancestor levels. Rows whose id is not in the index are skipped. + + Backward-compat: if ``lineage_index`` is ``None`` (legacy call site, e.g. + test fixtures with simple merged DataFrames) we synthesize a minimal + lineage from ``active_df`` itself so the function still returns useful + fish records. + """ + if active_df.empty: + return [] + + if lineage_index is None: + # Legacy path: build a tiny index from the active df only. + synth: dict[str, dict[str, Any]] = {} + for _, row in active_df.iterrows(): + gid = _safe_str(row.get("genome_id") or row.get("id")) + if not gid: + continue + fitness_val = _safe_float(row.get("fitness"), float("nan")) + if math.isnan(fitness_val): + continue + synth[gid] = { + "id": gid, + "generation": _safe_int(row.get("generation"), 0), + "fitness": fitness_val, + "dsr": _safe_float(row.get("dsr"), 0.0), + "sharpe": _safe_float(row.get("sharpe"), 0.0), + "max_dd": _safe_float(row.get("max_dd"), 0.0), + "n_trades": _safe_int(row.get("n_trades"), 0), + "cognitive_style": _safe_str(row.get("cognitive_style"), "unknown"), + "system_prompt": _safe_str(row.get("system_prompt"), ""), + "temperature": _safe_float(row.get("temperature"), 0.0), + "lookback_window": _safe_int(row.get("lookback_window"), 0), + "feature_access": _safe_list(row.get("feature_access")), + "model_tier": _safe_str(row.get("model_tier"), ""), + "parent_ids": _safe_list(row.get("parent_ids")), + } + lineage_index = synth + + fish: list[dict[str, Any]] = [] + for _, row in active_df.iterrows(): + gid = _safe_str(row.get("genome_id") or row.get("id")) + if not gid: + continue + attrs = lineage_index.get(gid) + if attrs is None: + continue + if math.isnan(attrs.get("fitness", 0.0)): + continue + ancestors = trace_ancestors(gid, lineage_index, max_lineage_levels) + record = {**attrs, "ancestors": ancestors} + fish.append(record) + return fish + + def build_aquarium_html( fish: list[dict[str, Any]], canvas_w: int = 1000, canvas_h: int = 600, - show_labels: bool = False, ) -> str: - """Build the self-contained HTML/JS string for the aquarium canvas.""" + """Build the self-contained HTML/JS string for the aquarium canvas. + + The output embeds a click handler: tapping a fish opens an info panel + (top-right of the canvas) showing its genome attributes and BFS ancestor + levels. Labels are no longer rendered on the canvas itself. + """ fish_json = json.dumps(fish) palette_json = json.dumps(STYLE_COLORS) default_color = DEFAULT_COLOR - show_labels_js = "true" if show_labels else "false" # All braces inside