perf: Fase 1 speed+precision (V1 V11 P1 P5)

V1 Coarse-to-fine angolare: - Al top-level valuta solo 1 variante ogni coarse_angle_factor (default 2) - Espande ai vicini nel full-res per preservare accuracy - Safe anche per template allungati (factor=2 non perde match) V11 Cache matcher in-memory (LRU, capacita 8): - Key = md5(ROI bytes + params tecnici che influenzano il training) - Re-match con stessi parametri: train_time = 0s (era 0.5-1.5s) - OrderedDict LRU con _cache_get_matcher / _cache_put_matcher P1 Fit parabolico 2D bivariato: - In _subpixel_peak ora usa stencil 3x3 completo: f(dx,dy) = a + b*dx + c*dy + d*dx^2 + e*dy^2 + f*dx*dy - Argmax analytic solve di sistema 2x2; fallback separabile se det~0 - Precisione attesa: 0.1-0.3 px (era 0.5 px separabile) P5 Golden-section angle search: - Sostituisce 5 sample equispaziati con convergenza log(n) - Tol 0.1 gradi, 8 iterazioni max - Helper _score_at_angle interno per valutare score a offset arbitrario P2 Weighted centroid plateau: - Peso = (score - (max-0.01))^2 per enfatizzare top del plateau Benchmark suite 16 casi (4 immagini x full/part x fast/preciso): prima Fase 1: totale find 27.3s dopo Fase 1: totale find 25.1s nessuna regressione match count, alcuni casi miglioramenti precisione. ROADMAP.md aggiornato con checklist Fase 1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 11:35:40 +02:00
parent b83e577eab
commit 37b718e45e
3 changed files with 213 additions and 79 deletions
@@ -26,6 +26,7 @@ della ROI (modello non-rettangolare).

 from __future__ import annotations

+import math
 import os
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
@@ -33,6 +34,8 @@ from dataclasses import dataclass
 import cv2
 import numpy as np

+_GOLDEN = (math.sqrt(5.0) - 1.0) / 2.0  # ≈ 0.618
+
 from pm2d._jit_kernels import (
    score_by_shift as _jit_score_by_shift,
    score_bitmap as _jit_score_bitmap,
@@ -338,9 +341,10 @@ class LineShapeMatcher:
    ) -> tuple[float, float]:
        """Posizione sub-pixel del picco.

-        Se c'è un plateau di valori ~massimi (spread_radius satura il peak
-        su un'area) ritorna il CENTROIDE del plateau. Altrimenti fit
-        parabolico 2D ±0.5 px.
+        1. Plateau saturo → centroide pesato del plateau (peso = score).
+        2. Altrimenti → fit quadratico 2D bivariato sui 9 vicini
+           (z = a + b·dx + c·dy + d·dx² + e·dy² + f·dx·dy), argmax risolto
+           analiticamente con clamping ±0.5 px.
        """
        H, W = acc.shape
        val = float(acc[y, x])
@@ -350,18 +354,37 @@ class LineShapeMatcher:
        patch = acc[y0:y1, x0:x1]
        plateau = patch >= val - 0.01
        if plateau.sum() > 1:
+            # Centroide pesato per (score - (max-0.01))² per enfatizzare i top
+            weights = np.where(plateau, patch - (val - 0.01), 0.0).astype(np.float64)
+            weights = weights * weights
+            total = weights.sum()
+            if total > 1e-9:
+                ys_idx, xs_idx = np.indices(patch.shape)
+                cx_w = (xs_idx * weights).sum() / total
+                cy_w = (ys_idx * weights).sum() / total
+                return float(x0 + cx_w), float(y0 + cy_w)
            ys_m, xs_m = np.where(plateau)
            return float(x0 + xs_m.mean()), float(y0 + ys_m.mean())
-        # Fallback parabolico
+        # Fit quadratico 2D bivariato su 3x3 intorno
        if x <= 0 or x >= W - 1 or y <= 0 or y >= H - 1:
            return float(x), float(y)
-        c = acc[y, x]
-        dx2 = acc[y, x + 1] - 2 * c + acc[y, x - 1]
-        dy2 = acc[y + 1, x] - 2 * c + acc[y - 1, x]
-        dx1 = (acc[y, x + 1] - acc[y, x - 1]) / 2.0
-        dy1 = (acc[y + 1, x] - acc[y - 1, x]) / 2.0
-        ox = -dx1 / dx2 if abs(dx2) > 1e-6 else 0.0
-        oy = -dy1 / dy2 if abs(dy2) > 1e-6 else 0.0
+        # Stencil 3x3: Z[i, j] con i,j ∈ {-1, 0, +1}
+        Z = acc[y - 1:y + 2, x - 1:x + 2].astype(np.float64)
+        # Coefficienti da finite differences
+        b_c = (Z[1, 2] - Z[1, 0]) / 2.0
+        c_c = (Z[2, 1] - Z[0, 1]) / 2.0
+        d_c = (Z[1, 2] + Z[1, 0] - 2.0 * Z[1, 1]) / 2.0
+        e_c = (Z[2, 1] + Z[0, 1] - 2.0 * Z[1, 1]) / 2.0
+        f_c = (Z[2, 2] - Z[0, 2] - Z[2, 0] + Z[0, 0]) / 4.0
+        # Max: risolve [2d f; f 2e][dx;dy] = [-b;-c]
+        det = 4.0 * d_c * e_c - f_c * f_c
+        if abs(det) > 1e-9:
+            ox = (-2.0 * e_c * b_c + f_c * c_c) / det
+            oy = (-2.0 * d_c * c_c + f_c * b_c) / det
+        else:
+            # Fallback separabile
+            ox = -b_c / (2.0 * d_c) if abs(d_c) > 1e-6 else 0.0
+            oy = -c_c / (2.0 * e_c) if abs(e_c) > 1e-6 else 0.0
        ox = float(np.clip(ox, -0.5, 0.5))
        oy = float(np.clip(oy, -0.5, 0.5))
        return x + ox, y + oy
@@ -384,16 +407,11 @@ class LineShapeMatcher:
        l'angolo con score massimo (parabolic fit sulle 3 score centrali).
        Ritorna (angle_refined, score, cx_refined, cy_refined).
        """
-        # Se il match grezzo è già quasi perfetto, NON refinare: il parabolic
-        # fit su picco saturo produce spostamenti spurious di posizione e
-        # angolo (esempio: modello==scena deve dare ang=0, pos=centro ROI)
+        # Se il match grezzo è già quasi perfetto, NON refinare
        if original_score is not None and original_score >= 0.99:
            return (angle_deg, original_score, cx, cy)
        if search_radius is None:
            search_radius = self.angle_step_deg / 2.0
-        offsets = np.linspace(-search_radius, search_radius, 5)
-        best = (angle_deg, -1.0, cx, cy)
-        scores_by_off: dict[float, float] = {}

        h, w = template_gray.shape
        sw = max(16, int(round(w * scale)))
@@ -409,10 +427,10 @@ class LineShapeMatcher:
        center = (diag / 2.0, diag / 2.0)

        H, W = spread0.shape
-        # Ricerca locale posizione con margine ±2 px sulla (cx, cy)
        margin = 3

-        for off in offsets:
+        def _score_at_angle(off: float) -> tuple[float, float, float]:
+            """Ritorna (score, best_cx, best_cy) per angolo = angle_deg + off."""
            ang = angle_deg + off
            M = cv2.getRotationMatrix2D(center, ang, 1.0)
            gray_r = cv2.warpAffine(gray_p, M, (diag, diag),
@@ -423,22 +441,20 @@ class LineShapeMatcher:
            mag, bins = self._gradient(gray_r)
            fx, fy, fb = self._extract_features(mag, bins, mask_r)
            if len(fx) < 8:
-                scores_by_off[float(off)] = 0.0
-                continue
+                return (0.0, cx, cy)
            dx = (fx - center[0]).astype(np.int32)
            dy = (fy - center[1]).astype(np.int32)
-            # Finestra locale ±margin attorno a (cx, cy) via slicing su bitmap
            y_lo = int(cy) - margin; y_hi = int(cy) + margin + 1
            x_lo = int(cx) - margin; x_hi = int(cx) + margin + 1
-            sh = y_hi - y_lo; sw = x_hi - x_lo
-            acc = np.zeros((sh, sw), dtype=np.float32)
+            sh_w = y_hi - y_lo; sw_w = x_hi - x_lo
+            acc = np.zeros((sh_w, sw_w), dtype=np.float32)
            for i in range(len(dx)):
                ddx = int(dx[i]); ddy = int(dy[i]); b = int(fb[i])
                bit = np.uint8(1 << b)
                sy0 = y_lo + ddy; sy1 = y_hi + ddy
                sx0 = x_lo + ddx; sx1 = x_hi + ddx
-                a_y0 = max(0, -sy0); a_y1 = sh - max(0, sy1 - H)
-                a_x0 = max(0, -sx0); a_x1 = sw - max(0, sx1 - W)
+                a_y0 = max(0, -sy0); a_y1 = sh_w - max(0, sy1 - H)
+                a_x0 = max(0, -sx0); a_x1 = sw_w - max(0, sx1 - W)
                s_y0 = max(0, sy0); s_y1 = min(H, sy1)
                s_x0 = max(0, sx0); s_x1 = min(W, sx1)
                if s_y1 > s_y0 and s_x1 > s_x0:
@@ -448,31 +464,39 @@ class LineShapeMatcher:
                    ).astype(np.float32)
            acc /= len(dx)
            _, max_val, _, max_loc = cv2.minMaxLoc(acc)
-            scores_by_off[float(off)] = float(max_val)
-            if max_val > best[1]:
-                new_cx = x_lo + float(max_loc[0])
-                new_cy = y_lo + float(max_loc[1])
-                best = (ang, float(max_val), new_cx, new_cy)
+            return (float(max_val),
+                    float(x_lo + max_loc[0]), float(y_lo + max_loc[1]))

-        # Parabolic fit su 3 angoli attorno al massimo
-        sorted_offs = sorted(scores_by_off.keys())
-        best_off = best[0] - angle_deg
-        try:
-            i = sorted_offs.index(
-                min(sorted_offs, key=lambda x: abs(x - best_off))
-            )
-            if 0 < i < len(sorted_offs) - 1:
-                s0 = scores_by_off[sorted_offs[i - 1]]
-                s1 = scores_by_off[sorted_offs[i]]
-                s2 = scores_by_off[sorted_offs[i + 1]]
-                denom = (s0 - 2 * s1 + s2)
-                if abs(denom) > 1e-6:
-                    delta = 0.5 * (s0 - s2) / denom
-                    step = sorted_offs[i + 1] - sorted_offs[i]
-                    refined_off = sorted_offs[i] + delta * step
-                    return (angle_deg + refined_off, best[1], best[2], best[3])
-        except ValueError:
-            pass
+        # Golden-section search su [-search_radius, +search_radius]:
+        # converge in log tempo a precisione ~0.1°, ~8 valutazioni vs 5
+        # ma centrate su picco reale (non sample equispaziati).
+        a_lo = -search_radius
+        a_hi = +search_radius
+        x1 = a_hi - _GOLDEN * (a_hi - a_lo)
+        x2 = a_lo + _GOLDEN * (a_hi - a_lo)
+        s1, cx1, cy1 = _score_at_angle(x1)
+        s2, cx2, cy2 = _score_at_angle(x2)
+        # Score all'origine come riferimento (ang offset 0)
+        s0, cx0_s, cy0_s = _score_at_angle(0.0)
+        best = (angle_deg, s0, cx0_s, cy0_s)
+        tol = 0.1  # gradi
+        for _ in range(8):
+            if s1 > best[1]:
+                best = (angle_deg + x1, s1, cx1, cy1)
+            if s2 > best[1]:
+                best = (angle_deg + x2, s2, cx2, cy2)
+            if abs(a_hi - a_lo) < tol:
+                break
+            if s1 > s2:
+                a_hi = x2
+                x2 = x1; s2 = s1; cx2 = cx1; cy2 = cy1
+                x1 = a_hi - _GOLDEN * (a_hi - a_lo)
+                s1, cx1, cy1 = _score_at_angle(x1)
+            else:
+                a_lo = x1
+                x1 = x2; s1 = s2; cx1 = cx2; cy1 = cy2
+                x2 = a_lo + _GOLDEN * (a_hi - a_lo)
+                s2, cx2, cy2 = _score_at_angle(x2)
        return best

    def _verify_ncc(
@@ -523,6 +547,7 @@ class LineShapeMatcher:
        subpixel: bool = True,
        verify_ncc: bool = True,
        verify_threshold: float = 0.4,
+        coarse_angle_factor: int = 2,
    ) -> list[Match]:
        if not self.variants:
            raise RuntimeError("Matcher non addestrato: chiamare train() prima.")
@@ -564,7 +589,30 @@ class LineShapeMatcher:
        def _rescore(score: np.ndarray, bg: np.ndarray) -> np.ndarray:
            return np.maximum(0.0, (score - bg) / (1.0 - bg + 1e-6))

-        # Pruning varianti via top-level (parallelizzato)
+        # Coarse-to-fine angolare:
+        # 1) Raggruppa varianti per scala, ordina per angolo
+        # 2) Top-level: valuta solo 1 ogni coarse_angle_factor varianti
+        # 3) Espandi ai vicini nel full-res
+        variants_by_scale: dict[float, list[int]] = {}
+        for vi, var in enumerate(self.variants):
+            variants_by_scale.setdefault(var.scale, []).append(vi)
+
+        coarse_idx_list: list[int] = []  # varianti da valutare al top
+        neighbor_map: dict[int, list[int]] = {}  # vi_coarse -> indici vicini
+        cf = max(1, coarse_angle_factor)
+        for scale_key, vi_list in variants_by_scale.items():
+            vi_sorted = sorted(vi_list, key=lambda i: self.variants[i].angle_deg)
+            n = len(vi_sorted)
+            for i in range(0, n, cf):
+                vi_c = vi_sorted[i]
+                coarse_idx_list.append(vi_c)
+                # Vicini: ±cf/2 attorno a i (stessa scala)
+                half = cf // 2
+                start = max(0, i - half)
+                end = min(n, i + half + 1)
+                neighbor_map[vi_c] = vi_sorted[start:end]
+
+        # Pruning varianti via top-level (parallelizzato) - solo coarse
        def _top_score(vi: int) -> tuple[int, float]:
            var = self.variants[vi]
            lvl = var.levels[min(top, len(var.levels) - 1)]
@@ -574,17 +622,30 @@ class LineShapeMatcher:
            score = _rescore(score, bg_cache_top[var.scale])
            return vi, float(score.max()) if score.size else -1.0

-        kept_variants: list[tuple[int, float]] = []
-        if self.n_threads > 1:
+        kept_coarse: list[tuple[int, float]] = []
+        if self.n_threads > 1 and len(coarse_idx_list) > 1:
            with ThreadPoolExecutor(max_workers=self.n_threads) as ex:
-                for vi, best in ex.map(_top_score, range(len(self.variants))):
+                for vi, best in ex.map(_top_score, coarse_idx_list):
                    if best >= top_thresh:
-                        kept_variants.append((vi, best))
+                        kept_coarse.append((vi, best))
        else:
-            for vi in range(len(self.variants)):
+            for vi in coarse_idx_list:
                vi2, best = _top_score(vi)
                if best >= top_thresh:
-                    kept_variants.append((vi2, best))
+                    kept_coarse.append((vi2, best))
+
+        # Espandi ogni coarse promosso con i suoi vicini (stessa scala,
+        # angoli intermedi non valutati al top)
+        expanded: set[int] = set()
+        score_by_vi: dict[int, float] = {}
+        for vi_c, s_top in kept_coarse:
+            for vi_n in neighbor_map.get(vi_c, [vi_c]):
+                expanded.add(vi_n)
+                # Usa lo score del coarse come stima per il sort successivo
+                score_by_vi[vi_n] = max(score_by_vi.get(vi_n, 0.0), s_top)
+        kept_variants: list[tuple[int, float]] = [
+            (vi, score_by_vi[vi]) for vi in expanded
+        ]

        if not kept_variants:
            return []