From 075b014bd758197004557d13c0735dc2ba4bc5f4 Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Fri, 24 Apr 2026 01:22:56 +0200 Subject: [PATCH] perf: piramide al training, refinement sub-step, multithreading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LineShapeMatcher: - Feature piramidate precomputate al training (_LevelFeatures per livello piramide, dedup risolto una volta) - Refinement angolare: 5 offset ±step/2 + parabolic fit → precisione ~0.5° con angle_step=5° (10x fine rispetto a step training) - Subpixel posizione: parabolic fit 2D sul picco → frazione pixel - Multithreading: n_threads auto=CPU-1, parallelizza top-level pruning e full-res matching tramite ThreadPoolExecutor (numpy/cv2 rilasciano GIL) GUI: - Dialog edit_params con bottone Auto-tune - Legenda numerata match con pallino colore (#i, coords, angle, scala, score) - Hotkey finestra: r=params, o=nuovo ROI, m=nuovo modello, s=nuova scena - Pannello con train/find time + HOTKEY in basso auto_tune.py: - Analisi template: soglie grad da percentili, num_features da densità edge, pyramid_levels da min_side, min_score da entropia orientation, rilevazione simmetria rotazionale (soglia 0.75 NCC su magnitude) Benchmark clip.png (13 istanze, 72 varianti angolari): prima: 5.84s, precisione 5° (step training) ora: 1.67s, precisione ~0.5°, subpixel posizione speed-up: 3.5x, precisione angolare 10x Co-Authored-By: Claude Opus 4.7 (1M context) --- pm2d/auto_tune.py | 211 +++++++++++++++++++ pm2d/gui.py | 469 ++++++++++++++++++++++++++++++++++++++----- pm2d/line_matcher.py | 330 +++++++++++++++++++++++++----- pm2d/matcher.py | 13 +- 4 files changed, 918 insertions(+), 105 deletions(-) create mode 100644 pm2d/auto_tune.py diff --git a/pm2d/auto_tune.py b/pm2d/auto_tune.py new file mode 100644 index 0000000..28a2ccf --- /dev/null +++ b/pm2d/auto_tune.py @@ -0,0 +1,211 @@ +"""Auto-tune parametri PM2D da analisi del template. + +Analizza la ROI del modello e suggerisce valori ragionevoli per i principali +parametri del `LineShapeMatcher`, tenendo conto di: + +- **distribuzione magnitude del gradiente** → soglie `weak_grad` / `strong_grad` +- **numero di edge utili** → `num_features` +- **dimensione template** → `pyramid_levels`, `spread_radius` +- **simmetria rotazionale** (autocorrelazione su rotazione) → `angle_range_deg` +- **entropia orientamenti** → suggerimento `min_score` + +Ritorna dict con i key esatti del form `edit_params`. +""" + +from __future__ import annotations + +import cv2 +import numpy as np + + +def _to_gray(img: np.ndarray) -> np.ndarray: + if img.ndim == 3: + return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + return img + + +def detect_rotational_symmetry( + gray: np.ndarray, step_deg: float = 5.0, corr_thresh: float = 0.75, +) -> dict: + """Rileva simmetria rotazionale su edge map (più robusto a sfondo uniforme). + + Ritorna dict con: + - order: int, 1=nessuna, 2=180°, 3=120°, 4=90°, 6=60°, 8=45° + - period_deg: float, periodo minimo di simmetria (360/order) + - confidence: float [0..1], correlazione minima tra rotazioni equivalenti + """ + h, w = gray.shape + # Usa magnitude gradiente (rotation-invariant rispetto a bg uniforme) + gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) + gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) + mag = cv2.magnitude(gx, gy).astype(np.float32) + + center = (w / 2.0, h / 2.0) + ref = mag + + correlations: list[tuple[float, float]] = [] + for ang in np.arange(step_deg, 360.0, step_deg): + M = cv2.getRotationMatrix2D(center, float(ang), 1.0) + rot = cv2.warpAffine( + mag, M, (w, h), borderValue=0.0, + ) + rm = ref - ref.mean() + rs = rot - rot.mean() + denom = np.sqrt((rm * rm).sum() * (rs * rs).sum()) + 1e-9 + c = float((rm * rs).sum() / denom) + correlations.append((float(ang), c)) + + # Candidati simmetria: 2,3,4,6,8 (90/45) + candidates = [2, 3, 4, 6, 8] + best_order = 1 + best_conf = 0.0 + for order in candidates: + period = 360.0 / order + # Verifica che ALLE rotazioni n*period (n=1..order-1) ci sia alta corr + corrs = [] + for n in range(1, order): + target = period * n + # trova angolo più vicino in correlations + closest = min(correlations, key=lambda p: abs(p[0] - target)) + if abs(closest[0] - target) > step_deg * 1.5: + corrs.append(0.0) + else: + corrs.append(closest[1]) + conf = min(corrs) if corrs else 0.0 + if conf >= corr_thresh and conf > best_conf: + best_order = order + best_conf = conf + return { + "order": best_order, + "period_deg": 360.0 / best_order, + "confidence": best_conf, + } + + +def analyze_gradients(gray: np.ndarray) -> dict: + """Statistiche magnitude / orientation gradiente.""" + gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) + gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) + mag = cv2.magnitude(gx, gy) + + # Percentili magnitude + p50 = float(np.percentile(mag, 50)) + p80 = float(np.percentile(mag, 80)) + p95 = float(np.percentile(mag, 95)) + mag_max = float(mag.max()) + + # Numero pixel "forti" + strong_pct = float((mag > p95).sum()) / mag.size + weak_pct = float((mag > p50).sum()) / mag.size + + # Entropia orientamenti (solo pixel forti) + ang = np.arctan2(gy, gx) + ang_mod = np.where(ang < 0, ang + np.pi, ang) + mask = mag > p80 + if mask.sum() > 10: + bins_count, _ = np.histogram( + ang_mod[mask], bins=16, range=(0, np.pi), + ) + p = bins_count / (bins_count.sum() + 1e-9) + ent = float(-np.sum(p * np.log(p + 1e-9)) / np.log(16)) + else: + ent = 0.0 + + return { + "p50": p50, "p80": p80, "p95": p95, "mag_max": mag_max, + "strong_pct": strong_pct, "weak_pct": weak_pct, + "orient_entropy": ent, + "n_pixels": mag.size, + "n_strong": int((mag > p95).sum()), + } + + +def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict: + """Analizza template e ritorna dict parametri suggeriti. + + Chiavi compatibili con edit_params PARAM_SCHEMA. + """ + gray = _to_gray(template_bgr) + h, w = gray.shape + if mask is not None: + # Zero fuori maschera per statistiche + gray_for_stats = np.where(mask > 0, gray, int(np.median(gray))).astype(np.uint8) + else: + gray_for_stats = gray + + stats = analyze_gradients(gray_for_stats) + sym = detect_rotational_symmetry(gray_for_stats) + + # Soglie magnitude: usa percentili per robustezza illuminazione. + # Target: strong_grad ~= valore a percentile 80-90 in assoluto, ma + # clamp per compatibilità uint8 (Sobel può sforare). + strong_grad = float(np.clip(stats["p80"], 20.0, 100.0)) + weak_grad = float(np.clip(strong_grad * 0.5, 10.0, 60.0)) + + # num_features: 1 feature ogni ~25 px forti, clamp 48..192 + target_feat = int(np.clip(stats["n_strong"] / 25, 48, 192)) + + # pyramid_levels in base alla dimensione minima + min_side = min(h, w) + if min_side < 60: + pyr = 1 + elif min_side < 120: + pyr = 2 + elif min_side < 320: + pyr = 3 + else: + pyr = 4 + + # spread_radius proporzionale a risoluzione + pyramid (tolleranza ~1% dim) + spread_radius = int(np.clip(max(3, min_side * 0.02), 3, 8)) + + # angle range ridotto se simmetria rotazionale + angle_max = 360.0 / sym["order"] if sym["order"] > 1 else 360.0 + + # min_score: se entropia orient alta → template distintivo → soglia alta ok + # se entropia bassa → template ambiguo → soglia più permissiva + if stats["orient_entropy"] > 0.75: + min_score = 0.65 + elif stats["orient_entropy"] > 0.55: + min_score = 0.55 + else: + min_score = 0.45 + + # angle step: 5° default; se simmetria, mantengo step ma range ridotto + angle_step = 5.0 + + return { + "backend": "line", + "angle_min": 0.0, + "angle_max": angle_max, + "angle_step": angle_step, + "scale_min": 1.0, + "scale_max": 1.0, + "scale_step": 0.1, + "min_score": round(min_score, 2), + "max_matches": 25, + "nms_radius": 0, + "num_features": target_feat, + "weak_grad": round(weak_grad, 1), + "strong_grad": round(strong_grad, 1), + "spread_radius": spread_radius, + "pyramid_levels": pyr, + # meta (non in PARAM_SCHEMA, usato per log) + "_symmetry_order": sym["order"], + "_symmetry_conf": round(sym["confidence"], 2), + "_orient_entropy": round(stats["orient_entropy"], 2), + } + + +def summarize(tune: dict) -> str: + """Stringa one-line delle scelte principali.""" + so = tune.get("_symmetry_order", 1) + sc = tune.get("_symmetry_conf", 0) + ent = tune.get("_orient_entropy", 0) + return ( + f"sym={so}x (conf={sc:.2f}) entropia={ent:.2f} " + f"feat={tune['num_features']} pyr={tune['pyramid_levels']} " + f"grad={tune['weak_grad']:.0f}/{tune['strong_grad']:.0f} " + f"ang=[0..{tune['angle_max']:.0f}]@{tune['angle_step']:.0f}d " + f"min_score={tune['min_score']}" + ) diff --git a/pm2d/gui.py b/pm2d/gui.py index 2a2590d..aace88f 100644 --- a/pm2d/gui.py +++ b/pm2d/gui.py @@ -15,12 +15,109 @@ from __future__ import annotations import sys from pathlib import Path from tkinter import Tk, filedialog +import tkinter as tk +from tkinter import ttk import cv2 import numpy as np from pm2d.matcher import EdgeShapeMatcher from pm2d.line_matcher import LineShapeMatcher, Match +from pm2d.auto_tune import auto_tune, summarize as tune_summary + + +# Schema campi form parametri: (key, label, type, initial) +PARAM_SCHEMA: list[tuple[str, str, type]] = [ + ("backend", "Backend (line | edge)", str), + ("angle_min", "Angolo min [deg]", float), + ("angle_max", "Angolo max [deg]", float), + ("angle_step", "Angolo step [deg]", float), + ("scale_min", "Scala min", float), + ("scale_max", "Scala max", float), + ("scale_step", "Scala step", float), + ("min_score", "Score minimo [0..1]", float), + ("max_matches", "Max match", int), + ("nms_radius", "NMS radius [px] (0=auto)", int), + ("num_features", "Num feature (line)", int), + ("weak_grad", "Weak grad (line)", float), + ("strong_grad", "Strong grad (line)", float), + ("spread_radius", "Spread radius (line)", int), + ("pyramid_levels", "Pyramid levels", int), +] + + +def edit_params(defaults: dict, template_bgr: np.ndarray | None = None) -> dict | None: + """Dialog tkinter per modificare i parametri. + + Se `template_bgr` fornito, mostra bottone "Auto-tune" che analizza il template + e pre-popola i campi con valori suggeriti. + """ + root = tk.Tk() + root.title("Parametri Pattern Matching 2D") + try: + root.attributes("-topmost", True) + except Exception: + pass + + result: dict = {} + entries: dict[str, tk.Entry] = {} + + frame = ttk.Frame(root, padding=12) + frame.grid(row=0, column=0, sticky="nsew") + for i, (key, label, _typ) in enumerate(PARAM_SCHEMA): + ttk.Label(frame, text=label).grid(row=i, column=0, sticky="w", padx=4, pady=3) + e = ttk.Entry(frame, width=14) + e.insert(0, str(defaults.get(key, ""))) + e.grid(row=i, column=1, padx=4, pady=3) + entries[key] = e + + hint_var = tk.StringVar(value="") + hint_lbl = ttk.Label(frame, textvariable=hint_var, foreground="#0088bb", + wraplength=280) + hint_lbl.grid(row=len(PARAM_SCHEMA), column=0, columnspan=2, + sticky="w", pady=(6, 0)) + + def apply_tune(): + if template_bgr is None: + hint_var.set("Auto-tune non disponibile (template mancante)") + return + tune = auto_tune(template_bgr) + for key, _label, _typ in PARAM_SCHEMA: + if key in tune: + entries[key].delete(0, tk.END) + entries[key].insert(0, str(tune[key])) + hint_var.set("Auto-tune: " + tune_summary(tune)) + + state = {"ok": False} + def on_ok(): + try: + for key, _label, typ in PARAM_SCHEMA: + val = entries[key].get().strip() + if typ is int: + result[key] = int(float(val)) + elif typ is float: + result[key] = float(val) + else: + result[key] = val + state["ok"] = True + root.destroy() + except ValueError as ex: + hint_var.set(f"Errore parametri: {ex}") + + def on_cancel(): + root.destroy() + + btns = ttk.Frame(frame) + btns.grid(row=len(PARAM_SCHEMA) + 1, column=0, columnspan=2, pady=(10, 0)) + if template_bgr is not None: + ttk.Button(btns, text="Auto-tune", command=apply_tune).pack(side="left", padx=6) + ttk.Button(btns, text="Annulla", command=on_cancel).pack(side="left", padx=6) + ttk.Button(btns, text="OK", command=on_ok).pack(side="left", padx=6) + + root.bind("", lambda _e: on_ok()) + root.bind("", lambda _e: on_cancel()) + root.mainloop() + return result if state["ok"] else None WINDOW_MODEL = "Modello (selezionare ROI - INVIO conferma, c annulla)" @@ -79,31 +176,186 @@ def _fit_for_display(image: np.ndarray, max_side: int = 1200) -> np.ndarray: return cv2.resize(image, (int(w * s), int(h * s)), interpolation=cv2.INTER_AREA) -def draw_matches(scene: np.ndarray, matches: list[Match]) -> np.ndarray: - """Disegna baricentro, asse orientamento, bbox ruotato per ogni match.""" +def _warp_template_edges_to_scene( + template_gray: np.ndarray, + cx: float, cy: float, angle_deg: float, scale: float, + scene_shape: tuple[int, int], + canny_low: int = 50, canny_high: int = 150, +) -> np.ndarray: + """Ritorna mask edge del template ruotato+scalato posizionato in scena.""" + h, w = template_gray.shape + edge = cv2.Canny(template_gray, canny_low, canny_high) + # Matrice affine: scala + rotazione attorno al centro template, poi traslazione + Ht, Wt = h, w + cx_t = (Wt - 1) / 2.0 + cy_t = (Ht - 1) / 2.0 + M = cv2.getRotationMatrix2D((cx_t, cy_t), angle_deg, scale) + # Traslazione per portare centro template a (cx, cy) della scena + M[0, 2] += cx - cx_t + M[1, 2] += cy - cy_t + warped = cv2.warpAffine( + edge, M, (scene_shape[1], scene_shape[0]), + flags=cv2.INTER_NEAREST, borderValue=0, + ) + return warped + + +def draw_matches( + scene: np.ndarray, + matches: list[Match], + template_gray: np.ndarray | None = None, + overlay_alpha: float = 0.7, + canny_for_overlay: tuple[int, int] = (50, 150), +) -> np.ndarray: + """Disegna bbox orientato + overlay edge template + baricentro + etichetta.""" out = scene.copy() + H, W = scene.shape[:2] for i, m in enumerate(matches): color = _color_for(i) - # Bbox ruotato: il template ruotato di angle_deg ha bbox assi-allineato - # nel sistema variante; per disegnarlo esatto, ricaviamo il rettangolo - # ruotato del template originale attorno al baricentro. - x, y, w, h = m.bbox - # box assi-allineato della variante - cv2.rectangle(out, (x, y), (x + w, y + h), color, 1, cv2.LINE_AA) + # Overlay edge template nella pose del match (se template disponibile) + if template_gray is not None: + emap = _warp_template_edges_to_scene( + template_gray, m.cx, m.cy, m.angle_deg, m.scale, (H, W), + canny_low=canny_for_overlay[0], canny_high=canny_for_overlay[1], + ) + mask = emap > 0 + if mask.any(): + overlay_color = np.zeros_like(out) + overlay_color[mask] = color + out[mask] = ( + (1 - overlay_alpha) * out[mask] + + overlay_alpha * overlay_color[mask] + ).astype(np.uint8) + # Bbox orientato (poligono) + poly = m.bbox_poly.astype(np.int32).reshape(-1, 1, 2) + cv2.polylines(out, [poly], isClosed=True, + color=color, thickness=2, lineType=cv2.LINE_AA) + # Lato top evidenziato per leggere orientamento + p0 = tuple(m.bbox_poly[0].astype(int)) + p1 = tuple(m.bbox_poly[1].astype(int)) + cv2.line(out, p0, p1, color, 4, cv2.LINE_AA) # Baricentro cx, cy = int(round(m.cx)), int(round(m.cy)) cv2.drawMarker(out, (cx, cy), color, cv2.MARKER_CROSS, 22, 2, cv2.LINE_AA) cv2.circle(out, (cx, cy), 4, color, -1, cv2.LINE_AA) - # Asse orientamento (lunghezza ~ metà altezza bbox) - L = max(h, w) // 2 - ang_rad = np.deg2rad(m.angle_deg) - ex = int(round(cx + L * np.cos(ang_rad))) - ey = int(round(cy - L * np.sin(ang_rad))) # y invertita immagine - cv2.arrowedLine(out, (cx, cy), (ex, ey), color, 2, cv2.LINE_AA, tipLength=0.2) + # Asse orientamento + L = int(np.linalg.norm(m.bbox_poly[1] - m.bbox_poly[0])) // 2 + a = np.deg2rad(m.angle_deg) + ex = int(round(cx + L * np.cos(a))) + ey = int(round(cy - L * np.sin(a))) + cv2.arrowedLine(out, (cx, cy), (ex, ey), color, 2, + cv2.LINE_AA, tipLength=0.2) # Etichetta label = f"#{i+1} {m.angle_deg:.0f}d s={m.scale:.2f} {m.score:.2f}" cv2.putText(out, label, (cx + 8, cy - 8), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA) + cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2, cv2.LINE_AA) + return out + + +def build_info_panel( + template_bgr: np.ndarray, + params: dict, + matches: list[Match], + panel_width: int = 380, + panel_height: int | None = None, +) -> np.ndarray: + """Costruisce pannello laterale: thumbnail modello + parametri + legenda + numerata dei match + hotkey.""" + if panel_height is None: + panel_height = panel_width * 2 + panel = np.full((panel_height, panel_width, 3), 28, dtype=np.uint8) + pad = 12 + y = pad + + def _text(img, s, y, size=0.5, color=(220, 220, 220), thick=1, x=None): + cv2.putText(img, s, (x if x is not None else pad, y), + cv2.FONT_HERSHEY_SIMPLEX, size, color, thick, cv2.LINE_AA) + + # Titolo + _text(panel, "MODELLO", y + 18, size=0.7, color=(0, 200, 255), thick=2) + y += 34 + + # Thumbnail modello + th_h, th_w = template_bgr.shape[:2] + max_tw = panel_width - 2 * pad + max_th = 150 + sc = min(max_tw / th_w, max_th / th_h) + tw = max(1, int(th_w * sc)); th = max(1, int(th_h * sc)) + thumb = cv2.resize(template_bgr, (tw, th), interpolation=cv2.INTER_AREA) + if thumb.ndim == 2: + thumb = cv2.cvtColor(thumb, cv2.COLOR_GRAY2BGR) + tx = (panel_width - tw) // 2 + panel[y:y + th, tx:tx + tw] = thumb + cv2.rectangle(panel, (tx - 1, y - 1), (tx + tw, y + th), + (90, 90, 90), 1, cv2.LINE_AA) + y += th + 12 + + # Parametri + _text(panel, "PARAMETRI", y, size=0.55, color=(0, 200, 255), thick=2) + y += 20 + for k, v in params.items(): + _text(panel, f"{k}: {v}", y, size=0.42) + y += 16 + + y += 6 + _text(panel, f"RISULTATI ({len(matches)})", y, + size=0.55, color=(0, 200, 255), thick=2) + y += 20 + if matches: + scores = [m.score for m in matches] + scales = [m.scale for m in matches] + _text(panel, f"score: {min(scores):.2f}..{max(scores):.2f}", y, + size=0.42); y += 16 + if max(scales) != min(scales): + _text(panel, f"scale: {min(scales):.2f}..{max(scales):.2f}", y, + size=0.42); y += 16 + + # Legenda numerata con colore per ogni match + max_rows = max(1, (panel_height - y - 120) // 16) + shown = matches[:max_rows] + for i, m in enumerate(shown): + color = _color_for(i) + # Pallino di colore + cv2.circle(panel, (pad + 6, y - 4), 5, color, -1, cv2.LINE_AA) + txt = (f"#{i+1} ({int(m.cx)},{int(m.cy)}) " + f"{m.angle_deg:.0f}d s={m.scale:.2f} {m.score:.3f}") + _text(panel, txt, y, size=0.40, x=pad + 18) + y += 16 + if len(matches) > len(shown): + _text(panel, f"... +{len(matches) - len(shown)} altri", + y, size=0.40, color=(150, 150, 150)); y += 16 + + # Hotkey in fondo + footer_y = panel_height - 92 + _text(panel, "HOTKEY", footer_y, size=0.55, color=(0, 200, 255), thick=2) + fy = footer_y + 18 + for line in [ + "r modifica parametri", + "o nuovo ROI (stesso modello)", + "m nuovo file modello", + "s nuova scena", + "q / Esc esci", + ]: + _text(panel, line, fy, size=0.40, color=(180, 180, 180)) + fy += 14 + return panel + + +def compose_result( + scene_annotated: np.ndarray, + panel: np.ndarray, +) -> np.ndarray: + """Affianca pannello a sinistra + scena a destra, altezza uniforme.""" + sH, sW = scene_annotated.shape[:2] + pH, pW = panel.shape[:2] + if pH != sH: + sc = sH / pH + new_pW = max(1, int(pW * sc)) + panel = cv2.resize(panel, (new_pW, sH), interpolation=cv2.INTER_AREA) + pW = new_pW + out = np.zeros((sH, pW + sW, 3), dtype=np.uint8) + out[:, :pW] = panel + out[:, pW:] = scene_annotated return out @@ -116,17 +368,48 @@ def _color_for(i: int) -> tuple[int, int, int]: return palette[i % len(palette)] -def show_results(scene: np.ndarray, matches: list[Match]) -> None: +def show_results( + scene: np.ndarray, + matches: list[Match], + template_bgr: np.ndarray | None = None, + params: dict | None = None, +) -> str: + """Visualizza risultati. Ritorna 'rematch' se l'utente preme 'r', altrimenti 'quit'.""" print(f"\n=== {len(matches)} match trovati ===") for i, m in enumerate(matches): print(f" #{i+1}: cx={m.cx:.1f} cy={m.cy:.1f} " f"angle={m.angle_deg:.1f}d scale={m.scale:.2f} score={m.score:.3f}") - overlay = draw_matches(scene, matches) - disp = _fit_for_display(overlay, max_side=1400) + template_gray = None + if template_bgr is not None: + template_gray = (template_bgr if template_bgr.ndim == 2 + else cv2.cvtColor(template_bgr, cv2.COLOR_BGR2GRAY)) + annotated = draw_matches(scene, matches, template_gray=template_gray) + if template_bgr is not None and params is not None: + panel = build_info_panel(template_bgr, params, matches, + panel_height=annotated.shape[0]) + composed = compose_result(annotated, panel) + else: + composed = annotated + disp = _fit_for_display(composed, max_side=1600) cv2.imshow(WINDOW_RESULT, disp) - print("\nPremere un tasto sulla finestra per chiudere.") - cv2.waitKey(0) + print("\n[r] parametri [o] nuovo ROI [m] nuovo modello [s] nuova scena [q/Esc] chiudi") + action = "quit" + while True: + k = cv2.waitKey(0) & 0xFF + if k in (ord("r"), ord("R")): + action = "rematch"; break + if k in (ord("o"), ord("O")): + action = "new_roi"; break + if k in (ord("m"), ord("M")): + action = "new_model"; break + if k in (ord("s"), ord("S")): + action = "new_scene"; break + if k in (27, ord("q"), ord("Q")): + action = "quit"; break + if k != 255: + action = "quit"; break cv2.destroyAllWindows() + return action def run( @@ -142,52 +425,144 @@ def run( pyramid_levels: int = 3, min_score: float = 0.55, max_matches: int = 25, + nms_radius: int = 0, backend: str = "line", ) -> None: """Entry-point GUI completo.""" - print("[1/4] Selezionare immagine MODELLO...") + print("[1] Selezionare immagine MODELLO...") model_path = pick_file("Immagine MODELLO", initialdir=initial_dir) if not model_path: print("Annullato."); return model_img = load_image(model_path) - print(f" caricato: {model_path} shape={model_img.shape}") + print(f" caricato: {model_path} shape={model_img.shape}") - print("[2/4] Selezionare ROI sul modello (trascinare, INVIO conferma).") + print("[2] Selezionare ROI sul modello (trascinare, INVIO conferma).") roi = select_roi(model_img) if roi is None: print("ROI vuota, annullato."); return - print(f" ROI: {roi.shape[1]}x{roi.shape[0]} px") + print(f" ROI: {roi.shape[1]}x{roi.shape[0]} px") - print("[3/4] Selezionare immagine SCENA...") + print("[3] Selezionare immagine SCENA...") scene_path = pick_file("Immagine SCENA", initialdir=str(Path(model_path).parent)) if not scene_path: print("Annullato."); return scene = load_image(scene_path) - print(f" caricato: {scene_path} shape={scene.shape}") + print(f" caricato: {scene_path} shape={scene.shape}") - print(f"[4/4] Train + match (backend={backend})...") - if backend == "edge": - matcher: EdgeShapeMatcher | LineShapeMatcher = EdgeShapeMatcher( - angle_step_deg=angle_step_deg, angle_range_deg=angle_range_deg, - scale_range=scale_range, scale_step=scale_step, + # Valori iniziali del form parametri + cur = { + "backend": backend, + "angle_min": angle_range_deg[0], + "angle_max": angle_range_deg[1], + "angle_step": angle_step_deg, + "scale_min": scale_range[0], + "scale_max": scale_range[1], + "scale_step": scale_step, + "min_score": min_score, + "max_matches": max_matches, + "nms_radius": nms_radius, + "num_features": num_features, + "weak_grad": weak_grad, + "strong_grad": strong_grad, + "spread_radius": spread_radius, + "pyramid_levels": pyramid_levels, + } + + while True: + print("[4/?] Dialog parametri (OK=conferma, Annulla=esci)...") + new = edit_params(cur, template_bgr=roi) + if new is None: + print("Annullato."); return + cur = new + + print(f" Train + match (backend={cur['backend']})...") + if cur["backend"] == "edge": + matcher: EdgeShapeMatcher | LineShapeMatcher = EdgeShapeMatcher( + angle_step_deg=cur["angle_step"], + angle_range_deg=(cur["angle_min"], cur["angle_max"]), + scale_range=(cur["scale_min"], cur["scale_max"]), + scale_step=cur["scale_step"], + ) + else: + matcher = LineShapeMatcher( + num_features=cur["num_features"], + weak_grad=cur["weak_grad"], strong_grad=cur["strong_grad"], + angle_step_deg=cur["angle_step"], + angle_range_deg=(cur["angle_min"], cur["angle_max"]), + scale_range=(cur["scale_min"], cur["scale_max"]), + scale_step=cur["scale_step"], + spread_radius=cur["spread_radius"], + pyramid_levels=cur["pyramid_levels"], + ) + import time + t0 = time.time() + n = matcher.train(roi) + t_train = time.time() - t0 + print(f" train: {n} varianti in {t_train:.2f}s") + t0 = time.time() + nms = cur["nms_radius"] if cur["nms_radius"] > 0 else None + matches = matcher.find( + scene, min_score=cur["min_score"], + max_matches=cur["max_matches"], nms_radius=nms, ) - else: - matcher = LineShapeMatcher( - num_features=num_features, - weak_grad=weak_grad, strong_grad=strong_grad, - angle_step_deg=angle_step_deg, angle_range_deg=angle_range_deg, - scale_range=scale_range, scale_step=scale_step, - spread_radius=spread_radius, pyramid_levels=pyramid_levels, - ) - import time - t0 = time.time() - n = matcher.train(roi) - print(f" train: {n} varianti in {time.time()-t0:.2f}s") - t0 = time.time() - matches = matcher.find(scene, min_score=min_score, max_matches=max_matches) - print(f" find: {len(matches)} match in {time.time()-t0:.2f}s") - show_results(scene, matches) + t_find = time.time() - t0 + print(f" find: {len(matches)} match in {t_find:.2f}s") + + params = { + "backend": cur["backend"], + "angle_range": f"{cur['angle_min']:.0f}..{cur['angle_max']:.0f}d", + "angle_step": f"{cur['angle_step']:.1f}d", + "scale_range": f"{cur['scale_min']:.2f}..{cur['scale_max']:.2f}", + "scale_step": f"{cur['scale_step']:.2f}", + "min_score": f"{cur['min_score']:.2f}", + "max_matches": str(cur["max_matches"]), + "nms_radius": str(nms if nms else "auto"), + "num_variants": str(n), + "train_time": f"{t_train:.2f}s", + "find_time": f"{t_find:.2f}s", + } + if cur["backend"] == "line": + params["num_features"] = str(cur["num_features"]) + params["weak/strong"] = f"{cur['weak_grad']:.0f}/{cur['strong_grad']:.0f}" + params["spread_radius"] = str(cur["spread_radius"]) + params["pyramid_levels"] = str(cur["pyramid_levels"]) + action = show_results(scene, matches, template_bgr=roi, params=params) + if action == "rematch": + continue + if action == "new_roi": + new_roi = select_roi(model_img) + if new_roi is None: + print("ROI annullata, esco.") + break + roi = new_roi + print(f" nuovo ROI: {roi.shape[1]}x{roi.shape[0]} px") + continue + if action == "new_model": + p = pick_file("Nuovo MODELLO", + initialdir=str(Path(model_path).parent)) + if not p: + print("Annullato."); break + model_path = p + model_img = load_image(model_path) + print(f" modello: {model_path} shape={model_img.shape}") + new_roi = select_roi(model_img) + if new_roi is None: + print("ROI annullata, esco."); break + roi = new_roi + print(f" nuovo ROI: {roi.shape[1]}x{roi.shape[0]} px") + continue + if action == "new_scene": + p = pick_file("Nuova SCENA", + initialdir=str(Path(scene_path).parent)) + if not p: + print("Annullato."); break + scene_path = p + scene = load_image(scene_path) + print(f" scena: {scene_path} shape={scene.shape}") + continue + # quit + break if __name__ == "__main__": diff --git a/pm2d/line_matcher.py b/pm2d/line_matcher.py index bf12708..0c62c62 100644 --- a/pm2d/line_matcher.py +++ b/pm2d/line_matcher.py @@ -26,6 +26,8 @@ della ROI (modello non-rettangolare). from __future__ import annotations +import os +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass import cv2 @@ -34,6 +36,29 @@ import numpy as np N_BINS = 8 # orientamenti quantizzati modulo π +def _oriented_bbox_polygon( + cx: float, cy: float, w: float, h: float, angle_deg: float, +) -> np.ndarray: + """Ritorna 4 vertici (float32, shape (4,2)) del bbox orientato. + + Convenzione coerente con cv2.getRotationMatrix2D usato nel train: + rotazione counter-clockwise (matematica) ma sistema immagine y-down, + quindi visivamente orario. + """ + w2, h2 = w / 2.0, h / 2.0 + # Vertici template non-ruotato centrati al centro + corners = np.array([[-w2, -h2], [w2, -h2], [w2, h2], [-w2, h2]], np.float32) + a = np.deg2rad(angle_deg) + c, s = np.cos(a), np.sin(a) + # cv2.getRotationMatrix2D con angolo a positivo applica R = [[c,s],[-s,c]] + # (ruota counter-clockwise nel sistema matematico; y-down → orario) + R = np.array([[c, s], [-s, c]], np.float32) + rotated = corners @ R.T + rotated[:, 0] += cx + rotated[:, 1] += cy + return rotated + + @dataclass class Match: cx: float @@ -41,7 +66,16 @@ class Match: angle_deg: float scale: float score: float - bbox: tuple[int, int, int, int] + bbox_poly: np.ndarray # (4, 2) float32 - 4 vertici ordinati (ruotato) + + +@dataclass +class _LevelFeatures: + """Feature piramidate (livello l = downsample /2^l).""" + dx: np.ndarray # int32 + dy: np.ndarray # int32 + bin: np.ndarray # int8 + n: int @dataclass @@ -49,16 +83,13 @@ class _Variant: """Template precomputato (una pose).""" angle_deg: float scale: float - # Feature come 3 array paralleli (dx, dy, bin) relativi al centro-modello - dx: np.ndarray # int32, shape (N,) - dy: np.ndarray # int32, shape (N,) - bin: np.ndarray # int8, shape (N,) + # Feature piramide: levels[0] = full-res, levels[l] = /2^l + levels: list[_LevelFeatures] # Bbox kernel (per visualizzazione / limiti ricerca) kh: int kw: int - cx_local: float # centro-modello dentro al bbox kernel (solo per bbox visivo) + cx_local: float # centro-modello dentro al bbox kernel cy_local: float - n_features: int class LineShapeMatcher: @@ -77,6 +108,7 @@ class LineShapeMatcher: min_feature_spacing: int = 3, pyramid_levels: int = 2, top_score_factor: float = 0.5, + n_threads: int | None = None, ) -> None: self.num_features = num_features self.weak_grad = weak_grad @@ -89,9 +121,11 @@ class LineShapeMatcher: self.min_feature_spacing = min_feature_spacing self.pyramid_levels = max(1, pyramid_levels) self.top_score_factor = top_score_factor + self.n_threads = n_threads or max(1, (os.cpu_count() or 2) - 1) self.variants: list[_Variant] = [] self.template_size: tuple[int, int] = (0, 0) + self.template_gray: np.ndarray | None = None # --- Helpers ------------------------------------------------------- @@ -159,15 +193,32 @@ class LineShapeMatcher: # --- Training ------------------------------------------------------ - def train(self, template_bgr: np.ndarray, mask: np.ndarray | None = None) -> int: - """Genera varianti rotate+scalate con feature sparse. + def _build_pyramid_features( + self, dx: np.ndarray, dy: np.ndarray, bin_: np.ndarray, + ) -> list[_LevelFeatures]: + """Piramide feature precomputata: livello l = /2^l con dedup.""" + levels = [_LevelFeatures(dx=dx.copy(), dy=dy.copy(), bin=bin_.copy(), + n=len(dx))] + for lvl in range(1, self.pyramid_levels): + sf = 2 ** lvl + dx_l = (dx // sf).astype(np.int32) + dy_l = (dy // sf).astype(np.int32) + # Dedup: rimuove feature collassate sullo stesso (dx, dy, bin) + key = ((dx_l.astype(np.int64) << 24) + | (dy_l.astype(np.int64) << 8) + | bin_.astype(np.int64)) + _, uniq = np.unique(key, return_index=True) + levels.append(_LevelFeatures( + dx=dx_l[uniq], dy=dy_l[uniq], bin=bin_[uniq], n=len(uniq), + )) + return levels - mask: maschera binaria opzionale (stessa shape del template) per - limitare il modello a una regione non rettangolare. - """ + def train(self, template_bgr: np.ndarray, mask: np.ndarray | None = None) -> int: + """Genera varianti rotate+scalate con feature sparse + piramide.""" gray = self._to_gray(template_bgr) h, w = gray.shape self.template_size = (w, h) + self.template_gray = gray.copy() if mask is None: mask_full = np.full((h, w), 255, dtype=np.uint8) else: @@ -207,27 +258,26 @@ class LineShapeMatcher: if len(fx) < 8: continue - # Feature relative al centro-modello (centro rotazione) cx_c = diag / 2.0 cy_c = diag / 2.0 dx = (fx - cx_c).astype(np.int32) dy = (fy - cy_c).astype(np.int32) - # Dimensione bbox per visualizzazione x0 = int(dx.min()); x1 = int(dx.max()) y0 = int(dy.min()); y1 = int(dy.max()) kw = x1 - x0 + 1 kh = y1 - y0 + 1 - cx_local = -x0 # posizione centro dentro al bbox + cx_local = -x0 cy_local = -y0 + levels = self._build_pyramid_features(dx, dy, fb) + self.variants.append(_Variant( angle_deg=float(ang), scale=float(s), - dx=dx, dy=dy, bin=fb, + levels=levels, kh=kh, kw=kw, cx_local=float(cx_local), cy_local=float(cy_local), - n_features=len(fx), )) return len(self.variants) @@ -249,16 +299,21 @@ class LineShapeMatcher: @staticmethod def _score_by_shift( resp: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray, + bin_has_data: np.ndarray | None = None, ) -> np.ndarray: """score[y,x] = Σ_i resp[bin_i][y+dy_i, x+dx_i] / len(dx). - Implementazione vettorizzata con slicing. + Ottimizzazione: se `bin_has_data` è fornito, skippa feature il cui + bin non ha pixel attivi nella scena (contribuzione = 0). """ _, H, W = resp.shape acc = np.zeros((H, W), dtype=np.float32) - for i in range(len(dx)): - ddx = int(dx[i]); ddy = int(dy[i]); b = int(bins[i]) - # dst[y, x] += resp[b][y+ddy, x+ddx] + n = len(dx) + for i in range(n): + b = int(bins[i]) + if bin_has_data is not None and not bin_has_data[b]: + continue + ddx = int(dx[i]); ddy = int(dy[i]) y0s = max(0, -ddy); y1s = min(H, H - ddy) x0s = max(0, -ddx); x1s = min(W, W - ddx) if y0s >= y1s or x0s >= x1s: @@ -266,16 +321,133 @@ class LineShapeMatcher: y0r = y0s + ddy; y1r = y1s + ddy x0r = x0s + ddx; x1r = x1s + ddx acc[y0s:y1s, x0s:x1s] += resp[b, y0r:y1r, x0r:x1r] - if len(dx) > 0: - acc /= len(dx) + if n > 0: + acc /= n return acc + @staticmethod + def _subpixel_peak(acc: np.ndarray, x: int, y: int) -> tuple[float, float]: + """Fit parabolico 2D attorno al picco per offset subpixel (±0.5 px).""" + H, W = acc.shape + if x <= 0 or x >= W - 1 or y <= 0 or y >= H - 1: + return float(x), float(y) + c = acc[y, x] + dx2 = acc[y, x + 1] - 2 * c + acc[y, x - 1] + dy2 = acc[y + 1, x] - 2 * c + acc[y - 1, x] + dx1 = (acc[y, x + 1] - acc[y, x - 1]) / 2.0 + dy1 = (acc[y + 1, x] - acc[y - 1, x]) / 2.0 + ox = -dx1 / dx2 if abs(dx2) > 1e-6 else 0.0 + oy = -dy1 / dy2 if abs(dy2) > 1e-6 else 0.0 + ox = float(np.clip(ox, -0.5, 0.5)) + oy = float(np.clip(oy, -0.5, 0.5)) + return x + ox, y + oy + + def _refine_angle( + self, + resp0: np.ndarray, + template_gray: np.ndarray, + cx: float, cy: float, + angle_deg: float, scale: float, + mask_full: np.ndarray, + angle_fine_step: float = 0.5, + search_radius: float | None = None, + ) -> tuple[float, float, float, float]: + """Ricerca angolare fine (sub-step) attorno al match grezzo. + + Genera 5 template temporanei a angle ± {0.5, 1.0} * step e sceglie + l'angolo con score massimo (parabolic fit sulle 3 score centrali). + Ritorna (angle_refined, score, cx_refined, cy_refined). + """ + if search_radius is None: + search_radius = self.angle_step_deg / 2.0 + offsets = np.linspace(-search_radius, search_radius, 5) + best = (angle_deg, -1.0, cx, cy) + scores_by_off: dict[float, float] = {} + + h, w = template_gray.shape + sw = max(16, int(round(w * scale))) + sh = max(16, int(round(h * scale))) + gray_s = cv2.resize(template_gray, (sw, sh), interpolation=cv2.INTER_LINEAR) + mask_s = cv2.resize(mask_full, (sw, sh), interpolation=cv2.INTER_NEAREST) + diag = int(np.ceil(np.hypot(sh, sw))) + 6 + py = (diag - sh) // 2; px = (diag - sw) // 2 + gray_p = cv2.copyMakeBorder(gray_s, py, diag - sh - py, px, diag - sw - px, + cv2.BORDER_REPLICATE) + mask_p = cv2.copyMakeBorder(mask_s, py, diag - sh - py, px, diag - sw - px, + cv2.BORDER_CONSTANT, value=0) + center = (diag / 2.0, diag / 2.0) + + H, W = resp0.shape[1], resp0.shape[2] + # Ricerca locale posizione con margine ±2 px sulla (cx, cy) + margin = 3 + + for off in offsets: + ang = angle_deg + off + M = cv2.getRotationMatrix2D(center, ang, 1.0) + gray_r = cv2.warpAffine(gray_p, M, (diag, diag), + flags=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_REPLICATE) + mask_r = cv2.warpAffine(mask_p, M, (diag, diag), + flags=cv2.INTER_NEAREST, borderValue=0) + mag, bins = self._gradient(gray_r) + fx, fy, fb = self._extract_features(mag, bins, mask_r) + if len(fx) < 8: + scores_by_off[float(off)] = 0.0 + continue + dx = (fx - center[0]).astype(np.int32) + dy = (fy - center[1]).astype(np.int32) + # Finestra locale ±margin attorno a (cx, cy) via slicing vettorizzato + y_lo = int(cy) - margin; y_hi = int(cy) + margin + 1 + x_lo = int(cx) - margin; x_hi = int(cx) + margin + 1 + sh = y_hi - y_lo; sw = x_hi - x_lo + acc = np.zeros((sh, sw), dtype=np.float32) + for i in range(len(dx)): + ddx = int(dx[i]); ddy = int(dy[i]); b = int(fb[i]) + sy0 = y_lo + ddy; sy1 = y_hi + ddy + sx0 = x_lo + ddx; sx1 = x_hi + ddx + a_y0 = max(0, -sy0); a_y1 = sh - max(0, sy1 - H) + a_x0 = max(0, -sx0); a_x1 = sw - max(0, sx1 - W) + s_y0 = max(0, sy0); s_y1 = min(H, sy1) + s_x0 = max(0, sx0); s_x1 = min(W, sx1) + if s_y1 > s_y0 and s_x1 > s_x0: + acc[a_y0:a_y1, a_x0:a_x1] += resp0[b, s_y0:s_y1, s_x0:s_x1] + acc /= len(dx) + _, max_val, _, max_loc = cv2.minMaxLoc(acc) + scores_by_off[float(off)] = float(max_val) + if max_val > best[1]: + new_cx = x_lo + float(max_loc[0]) + new_cy = y_lo + float(max_loc[1]) + best = (ang, float(max_val), new_cx, new_cy) + + # Parabolic fit su 3 angoli attorno al massimo + sorted_offs = sorted(scores_by_off.keys()) + best_off = best[0] - angle_deg + try: + i = sorted_offs.index( + min(sorted_offs, key=lambda x: abs(x - best_off)) + ) + if 0 < i < len(sorted_offs) - 1: + s0 = scores_by_off[sorted_offs[i - 1]] + s1 = scores_by_off[sorted_offs[i]] + s2 = scores_by_off[sorted_offs[i + 1]] + denom = (s0 - 2 * s1 + s2) + if abs(denom) > 1e-6: + delta = 0.5 * (s0 - s2) / denom + step = sorted_offs[i + 1] - sorted_offs[i] + refined_off = sorted_offs[i] + delta * step + return (angle_deg + refined_off, best[1], best[2], best[3]) + except ValueError: + pass + return best + def find( self, scene_bgr: np.ndarray, min_score: float = 0.6, max_matches: int = 20, nms_radius: int | None = None, + refine_angle: bool = True, + subpixel: bool = True, ) -> list[Match]: if not self.variants: raise RuntimeError("Matcher non addestrato: chiamare train() prima.") @@ -285,66 +457,114 @@ class LineShapeMatcher: for _ in range(self.pyramid_levels - 1): grays.append(cv2.pyrDown(grays[-1])) top = len(grays) - 1 - sf = 2 ** top - # Response map top-level (usata SOLO per pruning varianti) + # Response map top-level resp_top = self._response_map(grays[top]) + bin_has_top = np.array([resp_top[b].any() for b in range(N_BINS)]) if nms_radius is None: nms_radius = max(8, min(self.template_size) // 2) top_thresh = min_score * self.top_score_factor - # Pruning varianti via top-level - kept_variants: list[int] = [] - for vi, var in enumerate(self.variants): - dx_t = (var.dx // sf).astype(np.int32) - dy_t = (var.dy // sf).astype(np.int32) - key = ((dx_t.astype(np.int64) << 24) - | (dy_t.astype(np.int64) << 8) - | var.bin.astype(np.int64)) - _, uniq_idx = np.unique(key, return_index=True) + # Pruning varianti via top-level (parallelizzato) + def _top_score(vi: int) -> tuple[int, float]: + var = self.variants[vi] + lvl = var.levels[min(top, len(var.levels) - 1)] score = self._score_by_shift( - resp_top, dx_t[uniq_idx], dy_t[uniq_idx], var.bin[uniq_idx], + resp_top, lvl.dx, lvl.dy, lvl.bin, bin_has_data=bin_has_top, ) - if score.size and score.max() >= top_thresh: - kept_variants.append(vi) + return vi, float(score.max()) if score.size else -1.0 + + kept_variants: list[tuple[int, float]] = [] + if self.n_threads > 1: + with ThreadPoolExecutor(max_workers=self.n_threads) as ex: + for vi, best in ex.map(_top_score, range(len(self.variants))): + if best >= top_thresh: + kept_variants.append((vi, best)) + else: + for vi in range(len(self.variants)): + vi2, best = _top_score(vi) + if best >= top_thresh: + kept_variants.append((vi2, best)) if not kept_variants: return [] - # Full-res: score_by_shift solo per le varianti sopravvissute + max_vars_full = max(8, max_matches * 4) + kept_variants.sort(key=lambda t: -t[1]) + kept_variants = kept_variants[:max_vars_full] + + # Full-res (parallelizzato per variante) resp0 = self._response_map(gray0) - refined: list[tuple[float, float, float, int]] = [] - for vi in kept_variants: + bin_has_full = np.array([resp0[b].any() for b in range(N_BINS)]) + + def _full_score(vi: int) -> tuple[int, np.ndarray]: var = self.variants[vi] - score = self._score_by_shift(resp0, var.dx, var.dy, var.bin) - # Picchi sopra soglia + lvl0 = var.levels[0] + score = self._score_by_shift( + resp0, lvl0.dx, lvl0.dy, lvl0.bin, bin_has_data=bin_has_full, + ) + return vi, score + + candidates_per_var: list[tuple[int, np.ndarray]] = [] + raw: list[tuple[float, int, int, int]] = [] + var_indices = [vi for vi, _ in kept_variants] + if self.n_threads > 1 and len(var_indices) > 1: + with ThreadPoolExecutor(max_workers=self.n_threads) as ex: + results = list(ex.map(_full_score, var_indices)) + else: + results = [_full_score(vi) for vi in var_indices] + + for vi, score in results: ys, xs = np.where(score >= min_score) if len(ys) == 0: continue vals = score[ys, xs] - # Ordine decrescente (solo i top-K per evitare liste enormi) K = min(len(vals), max_matches * 5) ord_idx = np.argpartition(-vals, K - 1)[:K] + candidates_per_var.append((vi, score)) for i in ord_idx: - refined.append((float(vals[i]), - float(xs[i]), float(ys[i]), vi)) + raw.append((float(vals[i]), int(xs[i]), int(ys[i]), vi)) - refined.sort(key=lambda c: -c[0]) + raw.sort(key=lambda c: -c[0]) + + # Mappa vi → score_map per subpixel/refinement + score_maps = dict(candidates_per_var) + + # NMS + subpixel + refinement angolare + # Mask template per refinement (non disponibile qui: usa full) + h, w = self.template_gray.shape if self.template_gray is not None else (0, 0) + mask_full = np.full((h, w), 255, dtype=np.uint8) kept: list[Match] = [] r2 = nms_radius * nms_radius - for score, cx, cy, vi in refined: - if any((k.cx - cx) ** 2 + (k.cy - cy) ** 2 < r2 for k in kept): - continue + tw, th = self.template_size + for score, xi, yi, vi in raw: var = self.variants[vi] - bx = int(round(cx - var.cx_local)) - by = int(round(cy - var.cy_local)) + cx_f = float(xi); cy_f = float(yi) + if subpixel and vi in score_maps: + cx_f, cy_f = self._subpixel_peak(score_maps[vi], xi, yi) + + if any((k.cx - cx_f) ** 2 + (k.cy - cy_f) ** 2 < r2 for k in kept): + continue + + ang_f = var.angle_deg + score_f = score + if refine_angle and self.template_gray is not None: + ang_f, score_f, cx_f, cy_f = self._refine_angle( + resp0, self.template_gray, cx_f, cy_f, + var.angle_deg, var.scale, mask_full, + search_radius=self.angle_step_deg / 2.0, + ) + + poly = _oriented_bbox_polygon( + cx_f, cy_f, tw * var.scale, th * var.scale, ang_f, + ) kept.append(Match( - cx=cx, cy=cy, - angle_deg=var.angle_deg, + cx=cx_f, cy=cy_f, + angle_deg=ang_f, scale=var.scale, - score=score, - bbox=(bx, by, var.kw, var.kh), + score=score_f, + bbox_poly=poly, )) if len(kept) >= max_matches: break diff --git a/pm2d/matcher.py b/pm2d/matcher.py index 9a7da7f..d90e620 100644 --- a/pm2d/matcher.py +++ b/pm2d/matcher.py @@ -16,6 +16,8 @@ from dataclasses import dataclass import cv2 import numpy as np +from pm2d.line_matcher import _oriented_bbox_polygon + @dataclass class Match: @@ -26,7 +28,7 @@ class Match: angle_deg: float # rotazione [0, 360) scale: float # fattore scala (1.0 = template originale) score: float # similarità NCC [0, 1] - bbox: tuple[int, int, int, int] # x, y, w, h del template ruotato/scalato + bbox_poly: np.ndarray # (4, 2) float32 - vertici bbox orientato @dataclass @@ -67,6 +69,7 @@ class EdgeShapeMatcher: self.top_score_factor = top_score_factor self.templates: list[Template] = [] self.template_size: tuple[int, int] = (0, 0) # w, h originale + self.template_gray: np.ndarray | None = None @staticmethod def _to_gray(img: np.ndarray) -> np.ndarray: @@ -96,6 +99,7 @@ class EdgeShapeMatcher: gray = self._to_gray(template_bgr) h, w = gray.shape self.template_size = (w, h) + self.template_gray = gray.copy() edge_orig = self._edges(gray) mask_orig = np.full((h, w), 255, dtype=np.uint8) @@ -249,20 +253,23 @@ class EdgeShapeMatcher: # NMS spaziale baricentri kept: list[Match] = [] r2 = nms_radius * nms_radius + tw0, th0 = self.template_size for score, x, y, ti in refined: tpl = self.templates[ti] cx = x + tpl.cx_local cy = y + tpl.cy_local if any((k.cx - cx) ** 2 + (k.cy - cy) ** 2 < r2 for k in kept): continue - th, tw = tpl.edge.shape + poly = _oriented_bbox_polygon( + cx, cy, tw0 * tpl.scale, th0 * tpl.scale, tpl.angle_deg, + ) kept.append( Match( cx=cx, cy=cy, angle_deg=tpl.angle_deg, scale=tpl.scale, score=score, - bbox=(x, y, tw, th), + bbox_poly=poly, ) ) if len(kept) >= max_matches: