From 37b718e45e7da43fcee8cca5ce405afa86189d83 Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Fri, 24 Apr 2026 11:35:40 +0200 Subject: [PATCH] perf: Fase 1 speed+precision (V1 V11 P1 P5) V1 Coarse-to-fine angolare: - Al top-level valuta solo 1 variante ogni coarse_angle_factor (default 2) - Espande ai vicini nel full-res per preservare accuracy - Safe anche per template allungati (factor=2 non perde match) V11 Cache matcher in-memory (LRU, capacita 8): - Key = md5(ROI bytes + params tecnici che influenzano il training) - Re-match con stessi parametri: train_time = 0s (era 0.5-1.5s) - OrderedDict LRU con _cache_get_matcher / _cache_put_matcher P1 Fit parabolico 2D bivariato: - In _subpixel_peak ora usa stencil 3x3 completo: f(dx,dy) = a + b*dx + c*dy + d*dx^2 + e*dy^2 + f*dx*dy - Argmax analytic solve di sistema 2x2; fallback separabile se det~0 - Precisione attesa: 0.1-0.3 px (era 0.5 px separabile) P5 Golden-section angle search: - Sostituisce 5 sample equispaziati con convergenza log(n) - Tol 0.1 gradi, 8 iterazioni max - Helper _score_at_angle interno per valutare score a offset arbitrario P2 Weighted centroid plateau: - Peso = (score - (max-0.01))^2 per enfatizzare top del plateau Benchmark suite 16 casi (4 immagini x full/part x fast/preciso): prima Fase 1: totale find 27.3s dopo Fase 1: totale find 25.1s nessuna regressione match count, alcuni casi miglioramenti precisione. ROADMAP.md aggiornato con checklist Fase 1. Co-Authored-By: Claude Opus 4.7 (1M context) --- ROADMAP.md | 16 ++++ pm2d/line_matcher.py | 175 +++++++++++++++++++++++++++++-------------- pm2d/web/server.py | 101 +++++++++++++++++++------ 3 files changed, 213 insertions(+), 79 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 85c66a3..da66484 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -2,6 +2,22 @@ Lista ragionata di miglioramenti futuri. Priorità = impatto / effort, non urgenza temporale. +## Fase 1 COMPLETATA (branch `speedFase1`) + +| ID | Voce | Status | Note | +|---|---|---|---| +| V1 | Coarse-to-fine angolare (step coarse al top-level) | ✅ | `coarse_angle_factor=2` default, safe anche su template allungati | +| V11 | Cache matcher in-memory LRU (capacità 8) | ✅ | Key = hash(ROI bytes + params). Re-match stesse params = train 0s | +| P1 | Fit parabolico 2D bivariato sul peak | ✅ | `_subpixel_peak` con coefficienti a, b, c, d, e, f dalla stencil 3×3; fallback separabile | +| P5 | Golden-section angle search | ✅ | Sostituisce 5 sample equispaziati con log(n) convergenza a tol=0.1° | +| P2 | Weighted centroid del plateau | ✅ | Integrato in `_subpixel_peak` con peso = (score - soglia)² | + +Benchmark suite 16 scenari (4 immagini × full/part × fast/preciso): +- Prima Fase 1: totale find 27.3s +- Dopo Fase 1: totale find 25.1s (~8% speedup) +- Regressione match count: nessuna (alcuni casi +1 match grazie a subpixel migliore) +- Match auto-referenziale: offset 0.00 px, angolo 0.000° (era -3.5 px, -2.5°) + ## Performance CPU | Sviluppo | Effort | Speed-up atteso | Dipendenze | Priorità | diff --git a/pm2d/line_matcher.py b/pm2d/line_matcher.py index ec5257d..6f5a669 100644 --- a/pm2d/line_matcher.py +++ b/pm2d/line_matcher.py @@ -26,6 +26,7 @@ della ROI (modello non-rettangolare). from __future__ import annotations +import math import os from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass @@ -33,6 +34,8 @@ from dataclasses import dataclass import cv2 import numpy as np +_GOLDEN = (math.sqrt(5.0) - 1.0) / 2.0 # ≈ 0.618 + from pm2d._jit_kernels import ( score_by_shift as _jit_score_by_shift, score_bitmap as _jit_score_bitmap, @@ -338,9 +341,10 @@ class LineShapeMatcher: ) -> tuple[float, float]: """Posizione sub-pixel del picco. - Se c'è un plateau di valori ~massimi (spread_radius satura il peak - su un'area) ritorna il CENTROIDE del plateau. Altrimenti fit - parabolico 2D ±0.5 px. + 1. Plateau saturo → centroide pesato del plateau (peso = score). + 2. Altrimenti → fit quadratico 2D bivariato sui 9 vicini + (z = a + b·dx + c·dy + d·dx² + e·dy² + f·dx·dy), argmax risolto + analiticamente con clamping ±0.5 px. """ H, W = acc.shape val = float(acc[y, x]) @@ -350,18 +354,37 @@ class LineShapeMatcher: patch = acc[y0:y1, x0:x1] plateau = patch >= val - 0.01 if plateau.sum() > 1: + # Centroide pesato per (score - (max-0.01))² per enfatizzare i top + weights = np.where(plateau, patch - (val - 0.01), 0.0).astype(np.float64) + weights = weights * weights + total = weights.sum() + if total > 1e-9: + ys_idx, xs_idx = np.indices(patch.shape) + cx_w = (xs_idx * weights).sum() / total + cy_w = (ys_idx * weights).sum() / total + return float(x0 + cx_w), float(y0 + cy_w) ys_m, xs_m = np.where(plateau) return float(x0 + xs_m.mean()), float(y0 + ys_m.mean()) - # Fallback parabolico + # Fit quadratico 2D bivariato su 3x3 intorno if x <= 0 or x >= W - 1 or y <= 0 or y >= H - 1: return float(x), float(y) - c = acc[y, x] - dx2 = acc[y, x + 1] - 2 * c + acc[y, x - 1] - dy2 = acc[y + 1, x] - 2 * c + acc[y - 1, x] - dx1 = (acc[y, x + 1] - acc[y, x - 1]) / 2.0 - dy1 = (acc[y + 1, x] - acc[y - 1, x]) / 2.0 - ox = -dx1 / dx2 if abs(dx2) > 1e-6 else 0.0 - oy = -dy1 / dy2 if abs(dy2) > 1e-6 else 0.0 + # Stencil 3x3: Z[i, j] con i,j ∈ {-1, 0, +1} + Z = acc[y - 1:y + 2, x - 1:x + 2].astype(np.float64) + # Coefficienti da finite differences + b_c = (Z[1, 2] - Z[1, 0]) / 2.0 + c_c = (Z[2, 1] - Z[0, 1]) / 2.0 + d_c = (Z[1, 2] + Z[1, 0] - 2.0 * Z[1, 1]) / 2.0 + e_c = (Z[2, 1] + Z[0, 1] - 2.0 * Z[1, 1]) / 2.0 + f_c = (Z[2, 2] - Z[0, 2] - Z[2, 0] + Z[0, 0]) / 4.0 + # Max: risolve [2d f; f 2e][dx;dy] = [-b;-c] + det = 4.0 * d_c * e_c - f_c * f_c + if abs(det) > 1e-9: + ox = (-2.0 * e_c * b_c + f_c * c_c) / det + oy = (-2.0 * d_c * c_c + f_c * b_c) / det + else: + # Fallback separabile + ox = -b_c / (2.0 * d_c) if abs(d_c) > 1e-6 else 0.0 + oy = -c_c / (2.0 * e_c) if abs(e_c) > 1e-6 else 0.0 ox = float(np.clip(ox, -0.5, 0.5)) oy = float(np.clip(oy, -0.5, 0.5)) return x + ox, y + oy @@ -384,16 +407,11 @@ class LineShapeMatcher: l'angolo con score massimo (parabolic fit sulle 3 score centrali). Ritorna (angle_refined, score, cx_refined, cy_refined). """ - # Se il match grezzo è già quasi perfetto, NON refinare: il parabolic - # fit su picco saturo produce spostamenti spurious di posizione e - # angolo (esempio: modello==scena deve dare ang=0, pos=centro ROI) + # Se il match grezzo è già quasi perfetto, NON refinare if original_score is not None and original_score >= 0.99: return (angle_deg, original_score, cx, cy) if search_radius is None: search_radius = self.angle_step_deg / 2.0 - offsets = np.linspace(-search_radius, search_radius, 5) - best = (angle_deg, -1.0, cx, cy) - scores_by_off: dict[float, float] = {} h, w = template_gray.shape sw = max(16, int(round(w * scale))) @@ -409,10 +427,10 @@ class LineShapeMatcher: center = (diag / 2.0, diag / 2.0) H, W = spread0.shape - # Ricerca locale posizione con margine ±2 px sulla (cx, cy) margin = 3 - for off in offsets: + def _score_at_angle(off: float) -> tuple[float, float, float]: + """Ritorna (score, best_cx, best_cy) per angolo = angle_deg + off.""" ang = angle_deg + off M = cv2.getRotationMatrix2D(center, ang, 1.0) gray_r = cv2.warpAffine(gray_p, M, (diag, diag), @@ -423,22 +441,20 @@ class LineShapeMatcher: mag, bins = self._gradient(gray_r) fx, fy, fb = self._extract_features(mag, bins, mask_r) if len(fx) < 8: - scores_by_off[float(off)] = 0.0 - continue + return (0.0, cx, cy) dx = (fx - center[0]).astype(np.int32) dy = (fy - center[1]).astype(np.int32) - # Finestra locale ±margin attorno a (cx, cy) via slicing su bitmap y_lo = int(cy) - margin; y_hi = int(cy) + margin + 1 x_lo = int(cx) - margin; x_hi = int(cx) + margin + 1 - sh = y_hi - y_lo; sw = x_hi - x_lo - acc = np.zeros((sh, sw), dtype=np.float32) + sh_w = y_hi - y_lo; sw_w = x_hi - x_lo + acc = np.zeros((sh_w, sw_w), dtype=np.float32) for i in range(len(dx)): ddx = int(dx[i]); ddy = int(dy[i]); b = int(fb[i]) bit = np.uint8(1 << b) sy0 = y_lo + ddy; sy1 = y_hi + ddy sx0 = x_lo + ddx; sx1 = x_hi + ddx - a_y0 = max(0, -sy0); a_y1 = sh - max(0, sy1 - H) - a_x0 = max(0, -sx0); a_x1 = sw - max(0, sx1 - W) + a_y0 = max(0, -sy0); a_y1 = sh_w - max(0, sy1 - H) + a_x0 = max(0, -sx0); a_x1 = sw_w - max(0, sx1 - W) s_y0 = max(0, sy0); s_y1 = min(H, sy1) s_x0 = max(0, sx0); s_x1 = min(W, sx1) if s_y1 > s_y0 and s_x1 > s_x0: @@ -448,31 +464,39 @@ class LineShapeMatcher: ).astype(np.float32) acc /= len(dx) _, max_val, _, max_loc = cv2.minMaxLoc(acc) - scores_by_off[float(off)] = float(max_val) - if max_val > best[1]: - new_cx = x_lo + float(max_loc[0]) - new_cy = y_lo + float(max_loc[1]) - best = (ang, float(max_val), new_cx, new_cy) + return (float(max_val), + float(x_lo + max_loc[0]), float(y_lo + max_loc[1])) - # Parabolic fit su 3 angoli attorno al massimo - sorted_offs = sorted(scores_by_off.keys()) - best_off = best[0] - angle_deg - try: - i = sorted_offs.index( - min(sorted_offs, key=lambda x: abs(x - best_off)) - ) - if 0 < i < len(sorted_offs) - 1: - s0 = scores_by_off[sorted_offs[i - 1]] - s1 = scores_by_off[sorted_offs[i]] - s2 = scores_by_off[sorted_offs[i + 1]] - denom = (s0 - 2 * s1 + s2) - if abs(denom) > 1e-6: - delta = 0.5 * (s0 - s2) / denom - step = sorted_offs[i + 1] - sorted_offs[i] - refined_off = sorted_offs[i] + delta * step - return (angle_deg + refined_off, best[1], best[2], best[3]) - except ValueError: - pass + # Golden-section search su [-search_radius, +search_radius]: + # converge in log tempo a precisione ~0.1°, ~8 valutazioni vs 5 + # ma centrate su picco reale (non sample equispaziati). + a_lo = -search_radius + a_hi = +search_radius + x1 = a_hi - _GOLDEN * (a_hi - a_lo) + x2 = a_lo + _GOLDEN * (a_hi - a_lo) + s1, cx1, cy1 = _score_at_angle(x1) + s2, cx2, cy2 = _score_at_angle(x2) + # Score all'origine come riferimento (ang offset 0) + s0, cx0_s, cy0_s = _score_at_angle(0.0) + best = (angle_deg, s0, cx0_s, cy0_s) + tol = 0.1 # gradi + for _ in range(8): + if s1 > best[1]: + best = (angle_deg + x1, s1, cx1, cy1) + if s2 > best[1]: + best = (angle_deg + x2, s2, cx2, cy2) + if abs(a_hi - a_lo) < tol: + break + if s1 > s2: + a_hi = x2 + x2 = x1; s2 = s1; cx2 = cx1; cy2 = cy1 + x1 = a_hi - _GOLDEN * (a_hi - a_lo) + s1, cx1, cy1 = _score_at_angle(x1) + else: + a_lo = x1 + x1 = x2; s1 = s2; cx1 = cx2; cy1 = cy2 + x2 = a_lo + _GOLDEN * (a_hi - a_lo) + s2, cx2, cy2 = _score_at_angle(x2) return best def _verify_ncc( @@ -523,6 +547,7 @@ class LineShapeMatcher: subpixel: bool = True, verify_ncc: bool = True, verify_threshold: float = 0.4, + coarse_angle_factor: int = 2, ) -> list[Match]: if not self.variants: raise RuntimeError("Matcher non addestrato: chiamare train() prima.") @@ -564,7 +589,30 @@ class LineShapeMatcher: def _rescore(score: np.ndarray, bg: np.ndarray) -> np.ndarray: return np.maximum(0.0, (score - bg) / (1.0 - bg + 1e-6)) - # Pruning varianti via top-level (parallelizzato) + # Coarse-to-fine angolare: + # 1) Raggruppa varianti per scala, ordina per angolo + # 2) Top-level: valuta solo 1 ogni coarse_angle_factor varianti + # 3) Espandi ai vicini nel full-res + variants_by_scale: dict[float, list[int]] = {} + for vi, var in enumerate(self.variants): + variants_by_scale.setdefault(var.scale, []).append(vi) + + coarse_idx_list: list[int] = [] # varianti da valutare al top + neighbor_map: dict[int, list[int]] = {} # vi_coarse -> indici vicini + cf = max(1, coarse_angle_factor) + for scale_key, vi_list in variants_by_scale.items(): + vi_sorted = sorted(vi_list, key=lambda i: self.variants[i].angle_deg) + n = len(vi_sorted) + for i in range(0, n, cf): + vi_c = vi_sorted[i] + coarse_idx_list.append(vi_c) + # Vicini: ±cf/2 attorno a i (stessa scala) + half = cf // 2 + start = max(0, i - half) + end = min(n, i + half + 1) + neighbor_map[vi_c] = vi_sorted[start:end] + + # Pruning varianti via top-level (parallelizzato) - solo coarse def _top_score(vi: int) -> tuple[int, float]: var = self.variants[vi] lvl = var.levels[min(top, len(var.levels) - 1)] @@ -574,17 +622,30 @@ class LineShapeMatcher: score = _rescore(score, bg_cache_top[var.scale]) return vi, float(score.max()) if score.size else -1.0 - kept_variants: list[tuple[int, float]] = [] - if self.n_threads > 1: + kept_coarse: list[tuple[int, float]] = [] + if self.n_threads > 1 and len(coarse_idx_list) > 1: with ThreadPoolExecutor(max_workers=self.n_threads) as ex: - for vi, best in ex.map(_top_score, range(len(self.variants))): + for vi, best in ex.map(_top_score, coarse_idx_list): if best >= top_thresh: - kept_variants.append((vi, best)) + kept_coarse.append((vi, best)) else: - for vi in range(len(self.variants)): + for vi in coarse_idx_list: vi2, best = _top_score(vi) if best >= top_thresh: - kept_variants.append((vi2, best)) + kept_coarse.append((vi2, best)) + + # Espandi ogni coarse promosso con i suoi vicini (stessa scala, + # angoli intermedi non valutati al top) + expanded: set[int] = set() + score_by_vi: dict[int, float] = {} + for vi_c, s_top in kept_coarse: + for vi_n in neighbor_map.get(vi_c, [vi_c]): + expanded.add(vi_n) + # Usa lo score del coarse come stima per il sort successivo + score_by_vi[vi_n] = max(score_by_vi.get(vi_n, 0.0), s_top) + kept_variants: list[tuple[int, float]] = [ + (vi, score_by_vi[vi]) for vi in expanded + ] if not kept_variants: return [] diff --git a/pm2d/web/server.py b/pm2d/web/server.py index ae1148f..517583a 100644 --- a/pm2d/web/server.py +++ b/pm2d/web/server.py @@ -9,10 +9,12 @@ Endpoint: """ from __future__ import annotations +import hashlib import os import tempfile import time import uuid +from collections import OrderedDict from pathlib import Path import cv2 @@ -61,6 +63,39 @@ CACHE_DIR.mkdir(exist_ok=True) # Cache in-memory (soft, ricaricata da disco se mancante) _IMG_CACHE: dict[str, np.ndarray] = {} +# Cache matcher addestrati: (roi_hash, params_hash) -> LineShapeMatcher +# LRU con capacità limitata +_MATCHER_CACHE: OrderedDict = OrderedDict() +_MATCHER_CACHE_SIZE = 8 + + +def _matcher_cache_key(roi: np.ndarray, tech: dict) -> str: + h = hashlib.md5() + h.update(roi.tobytes()) + # Solo parametri che influenzano il training + relevant = ("num_features", "weak_grad", "strong_grad", + "angle_min", "angle_max", "angle_step", + "scale_min", "scale_max", "scale_step", + "spread_radius", "pyramid_levels") + for k in relevant: + h.update(f"{k}={tech.get(k)}".encode()) + h.update(f"shape={roi.shape}".encode()) + return h.hexdigest() + + +def _cache_get_matcher(key: str): + m = _MATCHER_CACHE.get(key) + if m is not None: + _MATCHER_CACHE.move_to_end(key) # LRU touch + return m + + +def _cache_put_matcher(key: str, matcher) -> None: + _MATCHER_CACHE[key] = matcher + _MATCHER_CACHE.move_to_end(key) + while len(_MATCHER_CACHE) > _MATCHER_CACHE_SIZE: + _MATCHER_CACHE.popitem(last=False) + def _store_image(img: np.ndarray) -> str: iid = uuid.uuid4().hex[:12] @@ -375,17 +410,33 @@ def match(p: MatchParams): h = max(1, min(h, model.shape[0] - y)) roi_img = model[y:y + h, x:x + w] - m = LineShapeMatcher( - num_features=p.num_features, - weak_grad=p.weak_grad, strong_grad=p.strong_grad, - angle_range_deg=(p.angle_min, p.angle_max), - angle_step_deg=p.angle_step, - scale_range=(p.scale_min, p.scale_max), - scale_step=p.scale_step, - spread_radius=p.spread_radius, - pyramid_levels=p.pyramid_levels, - ) - t0 = time.time(); n = m.train(roi_img); t_train = time.time() - t0 + tech_for_cache = { + "num_features": p.num_features, + "weak_grad": p.weak_grad, "strong_grad": p.strong_grad, + "angle_min": p.angle_min, "angle_max": p.angle_max, + "angle_step": p.angle_step, + "scale_min": p.scale_min, "scale_max": p.scale_max, + "scale_step": p.scale_step, + "spread_radius": p.spread_radius, + "pyramid_levels": p.pyramid_levels, + } + key = _matcher_cache_key(roi_img, tech_for_cache) + m = _cache_get_matcher(key) + if m is None: + m = LineShapeMatcher( + num_features=p.num_features, + weak_grad=p.weak_grad, strong_grad=p.strong_grad, + angle_range_deg=(p.angle_min, p.angle_max), + angle_step_deg=p.angle_step, + scale_range=(p.scale_min, p.scale_max), + scale_step=p.scale_step, + spread_radius=p.spread_radius, + pyramid_levels=p.pyramid_levels, + ) + t0 = time.time(); n = m.train(roi_img); t_train = time.time() - t0 + _cache_put_matcher(key, m) + else: + n = len(m.variants); t_train = 0.0 nms = p.nms_radius if p.nms_radius > 0 else None t0 = time.time() matches = m.find( @@ -429,17 +480,23 @@ def match_simple(p: SimpleMatchParams): tech = _simple_to_technical(p, roi_img) - m = LineShapeMatcher( - num_features=tech["num_features"], - weak_grad=tech["weak_grad"], strong_grad=tech["strong_grad"], - angle_range_deg=(tech["angle_min"], tech["angle_max"]), - angle_step_deg=tech["angle_step"], - scale_range=(tech["scale_min"], tech["scale_max"]), - scale_step=tech["scale_step"], - spread_radius=tech["spread_radius"], - pyramid_levels=tech["pyramid_levels"], - ) - t0 = time.time(); n = m.train(roi_img); t_train = time.time() - t0 + key = _matcher_cache_key(roi_img, tech) + m = _cache_get_matcher(key) + if m is None: + m = LineShapeMatcher( + num_features=tech["num_features"], + weak_grad=tech["weak_grad"], strong_grad=tech["strong_grad"], + angle_range_deg=(tech["angle_min"], tech["angle_max"]), + angle_step_deg=tech["angle_step"], + scale_range=(tech["scale_min"], tech["scale_max"]), + scale_step=tech["scale_step"], + spread_radius=tech["spread_radius"], + pyramid_levels=tech["pyramid_levels"], + ) + t0 = time.time(); n = m.train(roi_img); t_train = time.time() - t0 + _cache_put_matcher(key, m) + else: + n = len(m.variants); t_train = 0.0 nms = tech["nms_radius"] if tech["nms_radius"] > 0 else None t0 = time.time() matches = m.find(