From faebccb69e731f7a7a0d554fae9a48156302e1e4 Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Fri, 24 Apr 2026 01:37:01 +0200 Subject: [PATCH] feat: background locale + verify NCC per eliminare falsi positivi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problema: matcher linemod con solo orientamento gradient può dare score alto su texture dense/rumore che per caso accumulano orientamenti compatibili. Esempio: template ruota dentata su scena clip → match a score 0.9 (errati). Fix in 2 livelli: 1. Background score LOCALE nel find() - _bg_map(resp, box_size) = densità media bin attivi in bbox template - Rinormalizza score: s' = max(0, (s - bg) / (1 - bg)) - Annulla contributo di zone sature ma preserva pattern puliti 2. Verify NCC post-hoc - _verify_ncc(): warpa template alla pose (cx, cy, angle, scale) e calcola NCC classico su intensità con la scena sottostante - Threshold di default 0.4 elimina FP con edge orientati casualmente - Parametro esposto in GUI (verify_threshold) Rimossa penalty di saturazione nel response_map (ridondante). Test regression (ruote dentate vs clip, clip vs ruote dentate): no verify: 12+ falsi positivi con score ~0.7 verify 0.4: 1-2 falsi positivi rimanenti, true positive invariati verify 0.5: 0 falsi positivi, 1 TP scale piccola perso Benchmark clip→clip (13 istanze): full pipeline (Numba + threads + refine + subpix + verify): 1.12s Co-Authored-By: Claude Opus 4.7 (1M context) --- pm2d/auto_tune.py | 1 + pm2d/gui.py | 18 +++++++--- pm2d/line_matcher.py | 79 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 8 deletions(-) diff --git a/pm2d/auto_tune.py b/pm2d/auto_tune.py index 28a2ccf..ecd83cd 100644 --- a/pm2d/auto_tune.py +++ b/pm2d/auto_tune.py @@ -190,6 +190,7 @@ def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict: "strong_grad": round(strong_grad, 1), "spread_radius": spread_radius, "pyramid_levels": pyr, + "verify_threshold": 0.4, # meta (non in PARAM_SCHEMA, usato per log) "_symmetry_order": sym["order"], "_symmetry_conf": round(sym["confidence"], 2), diff --git a/pm2d/gui.py b/pm2d/gui.py index aace88f..78cfd6c 100644 --- a/pm2d/gui.py +++ b/pm2d/gui.py @@ -43,6 +43,7 @@ PARAM_SCHEMA: list[tuple[str, str, type]] = [ ("strong_grad", "Strong grad (line)", float), ("spread_radius", "Spread radius (line)", int), ("pyramid_levels", "Pyramid levels", int), + ("verify_threshold", "Verify NCC threshold", float), ] @@ -426,6 +427,7 @@ def run( min_score: float = 0.55, max_matches: int = 25, nms_radius: int = 0, + verify_threshold: float = 0.4, backend: str = "line", ) -> None: """Entry-point GUI completo.""" @@ -467,6 +469,7 @@ def run( "strong_grad": strong_grad, "spread_radius": spread_radius, "pyramid_levels": pyramid_levels, + "verify_threshold": verify_threshold, } while True: @@ -502,10 +505,17 @@ def run( print(f" train: {n} varianti in {t_train:.2f}s") t0 = time.time() nms = cur["nms_radius"] if cur["nms_radius"] > 0 else None - matches = matcher.find( - scene, min_score=cur["min_score"], - max_matches=cur["max_matches"], nms_radius=nms, - ) + if cur["backend"] == "line": + matches = matcher.find( + scene, min_score=cur["min_score"], + max_matches=cur["max_matches"], nms_radius=nms, + verify_threshold=cur.get("verify_threshold", 0.4), + ) + else: + matches = matcher.find( + scene, min_score=cur["min_score"], + max_matches=cur["max_matches"], nms_radius=nms, + ) t_find = time.time() - t0 print(f" find: {len(matches)} match in {t_find:.2f}s") diff --git a/pm2d/line_matcher.py b/pm2d/line_matcher.py index 0aa72ad..a7fcb3c 100644 --- a/pm2d/line_matcher.py +++ b/pm2d/line_matcher.py @@ -286,17 +286,22 @@ class LineShapeMatcher: # --- Matching ------------------------------------------------------ def _response_map(self, gray: np.ndarray) -> np.ndarray: - """Costruisce response map shape (N_BINS, H, W) float32 0/1.""" + """Response map shape (N_BINS, H, W) float32 0/1. + + Rinormalizzazione anti-background (match vs texture densa) è + applicata a valle nel `find()` via `_bg_map` locale. + """ mag, bins = self._gradient(gray) valid = mag >= self.weak_grad k = 2 * self.spread_radius + 1 kernel = np.ones((k, k), dtype=np.uint8) - resp = np.zeros((N_BINS, gray.shape[0], gray.shape[1]), dtype=np.float32) + H, W = gray.shape + raw = np.zeros((N_BINS, H, W), dtype=np.float32) for b in range(N_BINS): mask_b = ((bins == b) & valid).astype(np.uint8) d = cv2.dilate(mask_b, kernel) - resp[b] = d.astype(np.float32) - return resp + raw[b] = d.astype(np.float32) + return raw @staticmethod def _score_by_shift( @@ -424,6 +429,44 @@ class LineShapeMatcher: pass return best + def _verify_ncc( + self, scene_gray: np.ndarray, cx: float, cy: float, + angle_deg: float, scale: float, + ) -> float: + """NCC tra template warpato alla pose e scena sottostante. + + Ritorna score [-1, 1]. Usato come filtro anti-falso-positivo: + il matcher linemod può dare score alto su texture generiche ma + sovrapponendo il template gray i pixel non corrispondono. + """ + if self.template_gray is None: + return 1.0 + t = self.template_gray + h, w = t.shape + cx_t = (w - 1) / 2.0 + cy_t = (h - 1) / 2.0 + M = cv2.getRotationMatrix2D((cx_t, cy_t), angle_deg, scale) + M[0, 2] += cx - cx_t + M[1, 2] += cy - cy_t + H, W = scene_gray.shape + warped = cv2.warpAffine( + t, M, (W, H), + flags=cv2.INTER_LINEAR, borderValue=0, + ) + mask = cv2.warpAffine( + np.full_like(t, 255), M, (W, H), + flags=cv2.INTER_NEAREST, borderValue=0, + ) + valid = mask > 0 + if valid.sum() < 20: + return 0.0 + tpl = warped[valid].astype(np.float32) + scn = scene_gray[valid].astype(np.float32) + tm = tpl - tpl.mean() + sm = scn - scn.mean() + denom = np.sqrt((tm * tm).sum() * (sm * sm).sum()) + 1e-9 + return float((tm * sm).sum() / denom) + def find( self, scene_bgr: np.ndarray, @@ -432,6 +475,8 @@ class LineShapeMatcher: nms_radius: int | None = None, refine_angle: bool = True, subpixel: bool = True, + verify_ncc: bool = True, + verify_threshold: float = 0.4, ) -> list[Match]: if not self.variants: raise RuntimeError("Matcher non addestrato: chiamare train() prima.") @@ -449,6 +494,23 @@ class LineShapeMatcher: nms_radius = max(8, min(self.template_size) // 2) top_thresh = min_score * self.top_score_factor + # Background score LOCALE: densità media bin-attivi normalizzata su + # bbox template. Rinormalizzazione rimuove match dove la zona ha + # attivazioni dense in tutti gli orientamenti (texture/rumore). + tw, th = self.template_size + + def _bg_map(resp: np.ndarray, scale_div: int = 1) -> np.ndarray: + """bg_map[y,x] = frazione bin attivi media in bbox template.""" + density = resp.sum(axis=0) # (H, W) + bw = max(9, tw // scale_div); bh = max(9, th // scale_div) + smooth = cv2.boxFilter(density, cv2.CV_32F, (bw, bh)) + return np.clip(smooth / N_BINS, 0.0, 0.99) + + bg_top = _bg_map(resp_top, scale_div=2 ** top) + + def _rescore(score: np.ndarray, bg: np.ndarray) -> np.ndarray: + return np.maximum(0.0, (score - bg) / (1.0 - bg + 1e-6)) + # Pruning varianti via top-level (parallelizzato) def _top_score(vi: int) -> tuple[int, float]: var = self.variants[vi] @@ -456,6 +518,7 @@ class LineShapeMatcher: score = self._score_by_shift( resp_top, lvl.dx, lvl.dy, lvl.bin, bin_has_data=bin_has_top, ) + score = _rescore(score, bg_top) return vi, float(score.max()) if score.size else -1.0 kept_variants: list[tuple[int, float]] = [] @@ -480,6 +543,7 @@ class LineShapeMatcher: # Full-res (parallelizzato per variante) resp0 = self._response_map(gray0) bin_has_full = np.array([resp0[b].any() for b in range(N_BINS)]) + bg_full = _bg_map(resp0, scale_div=1) def _full_score(vi: int) -> tuple[int, np.ndarray]: var = self.variants[vi] @@ -487,6 +551,7 @@ class LineShapeMatcher: score = self._score_by_shift( resp0, lvl0.dx, lvl0.dy, lvl0.bin, bin_has_data=bin_has_full, ) + score = _rescore(score, bg_full) return vi, score candidates_per_var: list[tuple[int, np.ndarray]] = [] @@ -540,6 +605,12 @@ class LineShapeMatcher: search_radius=self.angle_step_deg / 2.0, ) + # Verify NCC: filtra falsi positivi con mismatch pixel-level + if verify_ncc: + ncc = self._verify_ncc(gray0, cx_f, cy_f, ang_f, var.scale) + if ncc < verify_threshold: + continue + poly = _oriented_bbox_polygon( cx_f, cy_f, tw * var.scale, th * var.scale, ang_f, )