feat: PM2D standalone shape-based matcher

Programma standalone Pattern Matching 2D con GUI cv2/tk + algoritmo
puro riusabile. Due backend:

- LineShapeMatcher (default): porting Python di line2Dup (linemod-style)
  - Gradient orientation quantized 8-bin modulo π + spreading
  - Feature sparse top-magnitude con spacing minimo
  - Score via shift-add vettorizzato numpy (O(N_features·H·W))
  - Piramide multi-risoluzione con pruning varianti al top-level
  - Supporto mask binaria per modello non-rettangolare

- EdgeShapeMatcher (fallback): Canny + matchTemplate multi-rotazione

GUI separata da algoritmo. Benchmark clip.png (13 istanze):
  - Edge backend:  84s, 6/13 score ~0.3
  - Line backend:  4.1s, 13/13 score 0.98-1.00

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-24 00:46:59 +02:00
commit b9a4d51fac
14 changed files with 2499 additions and 0 deletions
+7
View File
@@ -0,0 +1,7 @@
from pm2d.matcher import EdgeShapeMatcher, Match, Template
from pm2d.line_matcher import LineShapeMatcher, Match as LineMatch
__all__ = [
"EdgeShapeMatcher", "Match", "Template",
"LineShapeMatcher", "LineMatch",
]
+195
View File
@@ -0,0 +1,195 @@
"""GUI standalone OpenCV per Pattern Matching 2D.
Flusso:
1. Apri immagine modello (file dialog tk)
2. Selezione ROI con cv2.selectROI
3. Apri immagine scena
4. Esegui matching
5. Visualizza risultati (baricentro, angolo, score, bbox)
Tutta la logica algoritmica vive in pm2d.matcher.EdgeShapeMatcher.
"""
from __future__ import annotations
import sys
from pathlib import Path
from tkinter import Tk, filedialog
import cv2
import numpy as np
from pm2d.matcher import EdgeShapeMatcher
from pm2d.line_matcher import LineShapeMatcher, Match
WINDOW_MODEL = "Modello (selezionare ROI - INVIO conferma, c annulla)"
WINDOW_RESULT = "Risultato matching"
def pick_file(title: str, initialdir: str | None = None) -> str | None:
"""Tk file picker (root nascosto)."""
root = Tk()
root.withdraw()
path = filedialog.askopenfilename(
title=title,
initialdir=initialdir,
filetypes=[
("Immagini", "*.png *.jpg *.jpeg *.bmp *.tif *.tiff"),
("Tutti i file", "*.*"),
],
)
root.destroy()
return path or None
def load_image(path: str) -> np.ndarray:
img = cv2.imread(path, cv2.IMREAD_COLOR)
if img is None:
raise FileNotFoundError(f"Impossibile leggere immagine: {path}")
return img
def select_roi(image: np.ndarray) -> np.ndarray | None:
"""Apre finestra di selezione ROI. Ritorna ROI BGR o None se annullato."""
disp = _fit_for_display(image, max_side=1200)
scale = disp.shape[1] / image.shape[1]
r = cv2.selectROI(WINDOW_MODEL, disp, showCrosshair=True, fromCenter=False)
cv2.destroyWindow(WINDOW_MODEL)
x, y, w, h = r
if w == 0 or h == 0:
return None
# Riporta a coordinate immagine originale
x0 = int(round(x / scale))
y0 = int(round(y / scale))
w0 = int(round(w / scale))
h0 = int(round(h / scale))
x0 = max(0, x0); y0 = max(0, y0)
w0 = max(1, min(w0, image.shape[1] - x0))
h0 = max(1, min(h0, image.shape[0] - y0))
return image[y0:y0 + h0, x0:x0 + w0].copy()
def _fit_for_display(image: np.ndarray, max_side: int = 1200) -> np.ndarray:
h, w = image.shape[:2]
m = max(h, w)
if m <= max_side:
return image
s = max_side / m
return cv2.resize(image, (int(w * s), int(h * s)), interpolation=cv2.INTER_AREA)
def draw_matches(scene: np.ndarray, matches: list[Match]) -> np.ndarray:
"""Disegna baricentro, asse orientamento, bbox ruotato per ogni match."""
out = scene.copy()
for i, m in enumerate(matches):
color = _color_for(i)
# Bbox ruotato: il template ruotato di angle_deg ha bbox assi-allineato
# nel sistema variante; per disegnarlo esatto, ricaviamo il rettangolo
# ruotato del template originale attorno al baricentro.
x, y, w, h = m.bbox
# box assi-allineato della variante
cv2.rectangle(out, (x, y), (x + w, y + h), color, 1, cv2.LINE_AA)
# Baricentro
cx, cy = int(round(m.cx)), int(round(m.cy))
cv2.drawMarker(out, (cx, cy), color, cv2.MARKER_CROSS, 22, 2, cv2.LINE_AA)
cv2.circle(out, (cx, cy), 4, color, -1, cv2.LINE_AA)
# Asse orientamento (lunghezza ~ metà altezza bbox)
L = max(h, w) // 2
ang_rad = np.deg2rad(m.angle_deg)
ex = int(round(cx + L * np.cos(ang_rad)))
ey = int(round(cy - L * np.sin(ang_rad))) # y invertita immagine
cv2.arrowedLine(out, (cx, cy), (ex, ey), color, 2, cv2.LINE_AA, tipLength=0.2)
# Etichetta
label = f"#{i+1} {m.angle_deg:.0f}d s={m.scale:.2f} {m.score:.2f}"
cv2.putText(out, label, (cx + 8, cy - 8),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)
return out
def _color_for(i: int) -> tuple[int, int, int]:
palette = [
(0, 255, 0), (0, 200, 255), (255, 100, 100),
(255, 200, 0), (200, 0, 255), (100, 255, 200),
(255, 0, 0), (0, 255, 255),
]
return palette[i % len(palette)]
def show_results(scene: np.ndarray, matches: list[Match]) -> None:
print(f"\n=== {len(matches)} match trovati ===")
for i, m in enumerate(matches):
print(f" #{i+1}: cx={m.cx:.1f} cy={m.cy:.1f} "
f"angle={m.angle_deg:.1f}d scale={m.scale:.2f} score={m.score:.3f}")
overlay = draw_matches(scene, matches)
disp = _fit_for_display(overlay, max_side=1400)
cv2.imshow(WINDOW_RESULT, disp)
print("\nPremere un tasto sulla finestra per chiudere.")
cv2.waitKey(0)
cv2.destroyAllWindows()
def run(
initial_dir: str | None = None,
angle_step_deg: float = 5.0,
angle_range_deg: tuple[float, float] = (0.0, 360.0),
scale_range: tuple[float, float] = (1.0, 1.0),
scale_step: float = 0.1,
num_features: int = 96,
weak_grad: float = 30.0,
strong_grad: float = 60.0,
spread_radius: int = 5,
pyramid_levels: int = 3,
min_score: float = 0.55,
max_matches: int = 25,
backend: str = "line",
) -> None:
"""Entry-point GUI completo."""
print("[1/4] Selezionare immagine MODELLO...")
model_path = pick_file("Immagine MODELLO", initialdir=initial_dir)
if not model_path:
print("Annullato."); return
model_img = load_image(model_path)
print(f" caricato: {model_path} shape={model_img.shape}")
print("[2/4] Selezionare ROI sul modello (trascinare, INVIO conferma).")
roi = select_roi(model_img)
if roi is None:
print("ROI vuota, annullato."); return
print(f" ROI: {roi.shape[1]}x{roi.shape[0]} px")
print("[3/4] Selezionare immagine SCENA...")
scene_path = pick_file("Immagine SCENA",
initialdir=str(Path(model_path).parent))
if not scene_path:
print("Annullato."); return
scene = load_image(scene_path)
print(f" caricato: {scene_path} shape={scene.shape}")
print(f"[4/4] Train + match (backend={backend})...")
if backend == "edge":
matcher: EdgeShapeMatcher | LineShapeMatcher = EdgeShapeMatcher(
angle_step_deg=angle_step_deg, angle_range_deg=angle_range_deg,
scale_range=scale_range, scale_step=scale_step,
)
else:
matcher = LineShapeMatcher(
num_features=num_features,
weak_grad=weak_grad, strong_grad=strong_grad,
angle_step_deg=angle_step_deg, angle_range_deg=angle_range_deg,
scale_range=scale_range, scale_step=scale_step,
spread_radius=spread_radius, pyramid_levels=pyramid_levels,
)
import time
t0 = time.time()
n = matcher.train(roi)
print(f" train: {n} varianti in {time.time()-t0:.2f}s")
t0 = time.time()
matches = matcher.find(scene, min_score=min_score, max_matches=max_matches)
print(f" find: {len(matches)} match in {time.time()-t0:.2f}s")
show_results(scene, matches)
if __name__ == "__main__":
test_dir = "/home/adriano/Documenti/Git_XYZ/VisionSuite/Shape_model_2d/Test"
run(initial_dir=test_dir if Path(test_dir).is_dir() else None)
+351
View File
@@ -0,0 +1,351 @@
"""Shape-based matcher stile linemod (line2Dup) - Python puro + numpy/OpenCV.
Porting algoritmico dell'idea di `meiqua/shape_based_matching` (no MIPP/SIMD —
equivalente usando vettorizzazione numpy).
Training (costoso, fatto una volta per ricetta):
- Per ogni variante (angolo, scala) del template:
1. Sobel → magnitude + orientation
2. Quantizzazione orientation in N_BINS bin (modulo π, edge simmetrici)
3. Estrazione feature sparse top-magnitude con spacing minimo
4. Salvataggio feature = liste (dx, dy, bin) relative al centro-modello
Matching (veloce):
- Scena processata una sola volta per livello di piramide:
Sobel → magnitude → quant orientation → spread (dilate per bin) →
response map (N_BINS, H, W) — bit b acceso dove orientamento b presente.
- Per ogni variante:
score_map[y,x] = Σ resp[b_i][y+dy_i, x+dx_i] / N_features
implementato con shift-add vettorizzato (numpy).
- Piramide: matching top-level (basso costo, soglia ridotta) +
refinement a risoluzione piena attorno ai candidati.
Il training supporta una `mask` binaria per modellare solo una regione parziale
della ROI (modello non-rettangolare).
"""
from __future__ import annotations
from dataclasses import dataclass
import cv2
import numpy as np
N_BINS = 8 # orientamenti quantizzati modulo π
@dataclass
class Match:
cx: float
cy: float
angle_deg: float
scale: float
score: float
bbox: tuple[int, int, int, int]
@dataclass
class _Variant:
"""Template precomputato (una pose)."""
angle_deg: float
scale: float
# Feature come 3 array paralleli (dx, dy, bin) relativi al centro-modello
dx: np.ndarray # int32, shape (N,)
dy: np.ndarray # int32, shape (N,)
bin: np.ndarray # int8, shape (N,)
# Bbox kernel (per visualizzazione / limiti ricerca)
kh: int
kw: int
cx_local: float # centro-modello dentro al bbox kernel (solo per bbox visivo)
cy_local: float
n_features: int
class LineShapeMatcher:
"""Shape-based matcher linemod-style - Python/numpy, no SIMD."""
def __init__(
self,
num_features: int = 96,
weak_grad: float = 30.0,
strong_grad: float = 60.0,
angle_range_deg: tuple[float, float] = (0.0, 360.0),
angle_step_deg: float = 5.0,
scale_range: tuple[float, float] = (1.0, 1.0),
scale_step: float = 0.1,
spread_radius: int = 4,
min_feature_spacing: int = 3,
pyramid_levels: int = 2,
top_score_factor: float = 0.5,
) -> None:
self.num_features = num_features
self.weak_grad = weak_grad
self.strong_grad = strong_grad
self.angle_range_deg = angle_range_deg
self.angle_step_deg = angle_step_deg
self.scale_range = scale_range
self.scale_step = scale_step
self.spread_radius = spread_radius
self.min_feature_spacing = min_feature_spacing
self.pyramid_levels = max(1, pyramid_levels)
self.top_score_factor = top_score_factor
self.variants: list[_Variant] = []
self.template_size: tuple[int, int] = (0, 0)
# --- Helpers -------------------------------------------------------
@staticmethod
def _to_gray(img: np.ndarray) -> np.ndarray:
if img.ndim == 3:
return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img
@staticmethod
def _gradient(gray: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
mag = cv2.magnitude(gx, gy)
ang = np.arctan2(gy, gx)
ang_mod = np.where(ang < 0, ang + np.pi, ang)
bins = np.floor(ang_mod / np.pi * N_BINS).astype(np.int16)
bins = np.clip(bins, 0, N_BINS - 1)
return mag, bins
def _extract_features(
self, mag: np.ndarray, bins: np.ndarray, mask: np.ndarray | None,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
if mask is not None:
mag = np.where(mask > 0, mag, 0)
strong = mag >= self.strong_grad
ys, xs = np.where(strong)
if len(xs) == 0:
return (np.zeros(0, np.int32),) * 3
vals = mag[ys, xs]
order = np.argsort(-vals)
spc = max(1, self.min_feature_spacing)
occupied = np.zeros(mag.shape, dtype=bool)
picked_x: list[int] = []
picked_y: list[int] = []
picked_b: list[int] = []
for idx in order:
y, x = int(ys[idx]), int(xs[idx])
if occupied[y, x]:
continue
picked_x.append(x); picked_y.append(y)
picked_b.append(int(bins[y, x]))
y0 = max(0, y - spc); y1 = min(mag.shape[0], y + spc + 1)
x0 = max(0, x - spc); x1 = min(mag.shape[1], x + spc + 1)
occupied[y0:y1, x0:x1] = True
if len(picked_x) >= self.num_features:
break
return (np.array(picked_x, np.int32),
np.array(picked_y, np.int32),
np.array(picked_b, np.int8))
def _scale_list(self) -> list[float]:
s0, s1 = self.scale_range
if s0 >= s1 or self.scale_step <= 0:
return [float(s0)]
n = int(np.floor((s1 - s0) / self.scale_step)) + 1
return [float(s0 + i * self.scale_step) for i in range(n)]
def _angle_list(self) -> list[float]:
a0, a1 = self.angle_range_deg
if self.angle_step_deg <= 0 or a0 >= a1:
return [float(a0)]
n = int(np.floor((a1 - a0) / self.angle_step_deg))
return [float(a0 + i * self.angle_step_deg) for i in range(n)]
# --- Training ------------------------------------------------------
def train(self, template_bgr: np.ndarray, mask: np.ndarray | None = None) -> int:
"""Genera varianti rotate+scalate con feature sparse.
mask: maschera binaria opzionale (stessa shape del template) per
limitare il modello a una regione non rettangolare.
"""
gray = self._to_gray(template_bgr)
h, w = gray.shape
self.template_size = (w, h)
if mask is None:
mask_full = np.full((h, w), 255, dtype=np.uint8)
else:
mask_full = (mask > 0).astype(np.uint8) * 255
self.variants.clear()
for s in self._scale_list():
sw = max(16, int(round(w * s)))
sh = max(16, int(round(h * s)))
gray_s = cv2.resize(gray, (sw, sh), interpolation=cv2.INTER_LINEAR)
mask_s = cv2.resize(mask_full, (sw, sh), interpolation=cv2.INTER_NEAREST)
diag = int(np.ceil(np.hypot(sh, sw))) + 6
py = (diag - sh) // 2
px = (diag - sw) // 2
gray_p = cv2.copyMakeBorder(
gray_s, py, diag - sh - py, px, diag - sw - px,
cv2.BORDER_REPLICATE,
)
mask_p = cv2.copyMakeBorder(
mask_s, py, diag - sh - py, px, diag - sw - px,
cv2.BORDER_CONSTANT, value=0,
)
center = (diag / 2.0, diag / 2.0)
for ang in self._angle_list():
M = cv2.getRotationMatrix2D(center, ang, 1.0)
gray_r = cv2.warpAffine(
gray_p, M, (diag, diag),
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE,
)
mask_r = cv2.warpAffine(
mask_p, M, (diag, diag),
flags=cv2.INTER_NEAREST, borderValue=0,
)
mag, bins = self._gradient(gray_r)
fx, fy, fb = self._extract_features(mag, bins, mask_r)
if len(fx) < 8:
continue
# Feature relative al centro-modello (centro rotazione)
cx_c = diag / 2.0
cy_c = diag / 2.0
dx = (fx - cx_c).astype(np.int32)
dy = (fy - cy_c).astype(np.int32)
# Dimensione bbox per visualizzazione
x0 = int(dx.min()); x1 = int(dx.max())
y0 = int(dy.min()); y1 = int(dy.max())
kw = x1 - x0 + 1
kh = y1 - y0 + 1
cx_local = -x0 # posizione centro dentro al bbox
cy_local = -y0
self.variants.append(_Variant(
angle_deg=float(ang),
scale=float(s),
dx=dx, dy=dy, bin=fb,
kh=kh, kw=kw,
cx_local=float(cx_local), cy_local=float(cy_local),
n_features=len(fx),
))
return len(self.variants)
# --- Matching ------------------------------------------------------
def _response_map(self, gray: np.ndarray) -> np.ndarray:
"""Costruisce response map shape (N_BINS, H, W) float32 0/1."""
mag, bins = self._gradient(gray)
valid = mag >= self.weak_grad
k = 2 * self.spread_radius + 1
kernel = np.ones((k, k), dtype=np.uint8)
resp = np.zeros((N_BINS, gray.shape[0], gray.shape[1]), dtype=np.float32)
for b in range(N_BINS):
mask_b = ((bins == b) & valid).astype(np.uint8)
d = cv2.dilate(mask_b, kernel)
resp[b] = d.astype(np.float32)
return resp
@staticmethod
def _score_by_shift(
resp: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
) -> np.ndarray:
"""score[y,x] = Σ_i resp[bin_i][y+dy_i, x+dx_i] / len(dx).
Implementazione vettorizzata con slicing.
"""
_, H, W = resp.shape
acc = np.zeros((H, W), dtype=np.float32)
for i in range(len(dx)):
ddx = int(dx[i]); ddy = int(dy[i]); b = int(bins[i])
# dst[y, x] += resp[b][y+ddy, x+ddx]
y0s = max(0, -ddy); y1s = min(H, H - ddy)
x0s = max(0, -ddx); x1s = min(W, W - ddx)
if y0s >= y1s or x0s >= x1s:
continue
y0r = y0s + ddy; y1r = y1s + ddy
x0r = x0s + ddx; x1r = x1s + ddx
acc[y0s:y1s, x0s:x1s] += resp[b, y0r:y1r, x0r:x1r]
if len(dx) > 0:
acc /= len(dx)
return acc
def find(
self,
scene_bgr: np.ndarray,
min_score: float = 0.6,
max_matches: int = 20,
nms_radius: int | None = None,
) -> list[Match]:
if not self.variants:
raise RuntimeError("Matcher non addestrato: chiamare train() prima.")
gray0 = self._to_gray(scene_bgr)
grays = [gray0]
for _ in range(self.pyramid_levels - 1):
grays.append(cv2.pyrDown(grays[-1]))
top = len(grays) - 1
sf = 2 ** top
# Response map top-level (usata SOLO per pruning varianti)
resp_top = self._response_map(grays[top])
if nms_radius is None:
nms_radius = max(8, min(self.template_size) // 2)
top_thresh = min_score * self.top_score_factor
# Pruning varianti via top-level
kept_variants: list[int] = []
for vi, var in enumerate(self.variants):
dx_t = (var.dx // sf).astype(np.int32)
dy_t = (var.dy // sf).astype(np.int32)
key = ((dx_t.astype(np.int64) << 24)
| (dy_t.astype(np.int64) << 8)
| var.bin.astype(np.int64))
_, uniq_idx = np.unique(key, return_index=True)
score = self._score_by_shift(
resp_top, dx_t[uniq_idx], dy_t[uniq_idx], var.bin[uniq_idx],
)
if score.size and score.max() >= top_thresh:
kept_variants.append(vi)
if not kept_variants:
return []
# Full-res: score_by_shift solo per le varianti sopravvissute
resp0 = self._response_map(gray0)
refined: list[tuple[float, float, float, int]] = []
for vi in kept_variants:
var = self.variants[vi]
score = self._score_by_shift(resp0, var.dx, var.dy, var.bin)
# Picchi sopra soglia
ys, xs = np.where(score >= min_score)
if len(ys) == 0:
continue
vals = score[ys, xs]
# Ordine decrescente (solo i top-K per evitare liste enormi)
K = min(len(vals), max_matches * 5)
ord_idx = np.argpartition(-vals, K - 1)[:K]
for i in ord_idx:
refined.append((float(vals[i]),
float(xs[i]), float(ys[i]), vi))
refined.sort(key=lambda c: -c[0])
kept: list[Match] = []
r2 = nms_radius * nms_radius
for score, cx, cy, vi in refined:
if any((k.cx - cx) ** 2 + (k.cy - cy) ** 2 < r2 for k in kept):
continue
var = self.variants[vi]
bx = int(round(cx - var.cx_local))
by = int(round(cy - var.cy_local))
kept.append(Match(
cx=cx, cy=cy,
angle_deg=var.angle_deg,
scale=var.scale,
score=score,
bbox=(bx, by, var.kw, var.kh),
))
if len(kept) >= max_matches:
break
return kept
+320
View File
@@ -0,0 +1,320 @@
"""Pattern Matching 2D shape-based via edge template matching multi-rotazione/scala.
Algoritmo equivalente a Fase Alpha del documento tecnico Vision Suite:
- Estrazione edge Canny dal template (invarianza illuminazione)
- Generazione varianti del template edge per ogni (angolo, scala)
- matchTemplate NCC sulla scena edge per ogni variante
- Picchi locali con NMS spaziale per multi-istanza
Uso: vedi `EdgeShapeMatcher.train` e `EdgeShapeMatcher.find`.
"""
from __future__ import annotations
from dataclasses import dataclass
import cv2
import numpy as np
@dataclass
class Match:
"""Singola istanza trovata nella scena."""
cx: float # baricentro x [px] nella scena
cy: float # baricentro y [px] nella scena
angle_deg: float # rotazione [0, 360)
scale: float # fattore scala (1.0 = template originale)
score: float # similarità NCC [0, 1]
bbox: tuple[int, int, int, int] # x, y, w, h del template ruotato/scalato
@dataclass
class Template:
"""Variante precomputata del template a un dato (angolo, scala)."""
angle_deg: float
scale: float
edge: np.ndarray # immagine edge ruotata+scalata (uint8 0/255)
mask: np.ndarray # maschera supporto (uint8 0/255)
cx_local: float # baricentro nel sistema locale variante
cy_local: float
class EdgeShapeMatcher:
"""Matcher shape-based su edge Canny con rotazione + scala precomputate."""
def __init__(
self,
canny_low: int = 50,
canny_high: int = 150,
angle_step_deg: float = 5.0,
angle_range_deg: tuple[float, float] = (0.0, 360.0),
scale_range: tuple[float, float] = (1.0, 1.0),
scale_step: float = 0.1,
match_method: int = cv2.TM_CCOEFF_NORMED,
pyramid_levels: int = 3,
top_score_factor: float = 0.6,
) -> None:
self.canny_low = canny_low
self.canny_high = canny_high
self.angle_step_deg = angle_step_deg
self.angle_range_deg = angle_range_deg
self.scale_range = scale_range
self.scale_step = scale_step
self.match_method = match_method
self.pyramid_levels = max(1, pyramid_levels)
self.top_score_factor = top_score_factor
self.templates: list[Template] = []
self.template_size: tuple[int, int] = (0, 0) # w, h originale
@staticmethod
def _to_gray(img: np.ndarray) -> np.ndarray:
if img.ndim == 3:
return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img
def _edges(self, gray: np.ndarray) -> np.ndarray:
return cv2.Canny(gray, self.canny_low, self.canny_high)
def _scale_list(self) -> list[float]:
s0, s1 = self.scale_range
if s0 >= s1 or self.scale_step <= 0:
return [float(s0)]
n = int(np.floor((s1 - s0) / self.scale_step)) + 1
return [float(s0 + i * self.scale_step) for i in range(n)]
def _angle_list(self) -> list[float]:
a0, a1 = self.angle_range_deg
if self.angle_step_deg <= 0 or a0 >= a1:
return [float(a0)]
n = int(np.floor((a1 - a0) / self.angle_step_deg))
return [float(a0 + i * self.angle_step_deg) for i in range(n)]
def train(self, template_bgr: np.ndarray) -> int:
"""Genera varianti per tutte le combinazioni (angolo, scala)."""
gray = self._to_gray(template_bgr)
h, w = gray.shape
self.template_size = (w, h)
edge_orig = self._edges(gray)
mask_orig = np.full((h, w), 255, dtype=np.uint8)
self.templates.clear()
scales = self._scale_list()
angles = self._angle_list()
for s in scales:
sw = max(8, int(round(w * s)))
sh = max(8, int(round(h * s)))
edge_s = cv2.resize(edge_orig, (sw, sh), interpolation=cv2.INTER_LINEAR)
mask_s = cv2.resize(mask_orig, (sw, sh), interpolation=cv2.INTER_NEAREST)
# Re-thresh dopo resize
_, edge_s = cv2.threshold(edge_s, 64, 255, cv2.THRESH_BINARY)
# Padding diagonale per rotazione senza cropping
diag = int(np.ceil(np.hypot(sh, sw))) + 4
pad_y = (diag - sh) // 2
pad_x = (diag - sw) // 2
edge_p = cv2.copyMakeBorder(
edge_s, pad_y, diag - sh - pad_y, pad_x, diag - sw - pad_x,
cv2.BORDER_CONSTANT, value=0,
)
mask_p = cv2.copyMakeBorder(
mask_s, pad_y, diag - sh - pad_y, pad_x, diag - sw - pad_x,
cv2.BORDER_CONSTANT, value=0,
)
center = (diag / 2.0, diag / 2.0)
for ang in angles:
M = cv2.getRotationMatrix2D(center, ang, 1.0)
edge_r = cv2.warpAffine(
edge_p, M, (diag, diag),
flags=cv2.INTER_LINEAR, borderValue=0,
)
mask_r = cv2.warpAffine(
mask_p, M, (diag, diag),
flags=cv2.INTER_NEAREST, borderValue=0,
)
# Crop minimo bounding mask
ys, xs = np.where(mask_r > 0)
if len(xs) == 0:
continue
x0, x1 = xs.min(), xs.max() + 1
y0, y1 = ys.min(), ys.max() + 1
edge_c = edge_r[y0:y1, x0:x1]
mask_c = mask_r[y0:y1, x0:x1]
cx_local = (mask_c.shape[1] - 1) / 2.0
cy_local = (mask_c.shape[0] - 1) / 2.0
self.templates.append(
Template(
angle_deg=float(ang),
scale=float(s),
edge=edge_c,
mask=mask_c,
cx_local=cx_local,
cy_local=cy_local,
)
)
return len(self.templates)
def _pyrdown_binary(self, img: np.ndarray) -> np.ndarray:
"""pyrDown + re-thresh per mantenere binario 0/255."""
d = cv2.pyrDown(img)
_, d = cv2.threshold(d, 32, 255, cv2.THRESH_BINARY)
return d
def find(
self,
scene_bgr: np.ndarray,
min_score: float = 0.5,
max_matches: int = 10,
nms_radius: int | None = None,
) -> list[Match]:
"""Cerca istanze del template nella scena con strategia piramidale.
- Top-level: matching brute-force a bassa risoluzione (veloce, soglia ridotta)
- Refinement: re-match locale a risoluzione piena per ogni candidato
"""
if not self.templates:
raise RuntimeError("Matcher non addestrato: chiamare train() prima.")
gray = self._to_gray(scene_bgr)
scene_edge0 = self._edges(gray)
# Piramide scena edge
scene_pyr = [scene_edge0]
for _ in range(self.pyramid_levels - 1):
scene_pyr.append(self._pyrdown_binary(scene_pyr[-1]))
top = len(scene_pyr) - 1
sf = 2 ** top # scale factor top→0
scene_top = scene_pyr[top]
if nms_radius is None:
nms_radius = max(8, min(self.template_size) // 2)
top_thresh = min_score * self.top_score_factor
# Top-level brute-force
candidates: list[tuple[float, int, int, int]] = []
for ti, tpl in enumerate(self.templates):
edge_top = tpl.edge.copy()
mask_top = tpl.mask.copy()
for _ in range(top):
edge_top = self._pyrdown_binary(edge_top)
mask_top = self._pyrdown_binary(mask_top)
th, tw = edge_top.shape
if th < 6 or tw < 6:
continue
if scene_top.shape[0] < th or scene_top.shape[1] < tw:
continue
res = cv2.matchTemplate(
scene_top, edge_top, self.match_method, mask=mask_top,
)
res = np.nan_to_num(res, nan=-1.0, posinf=-1.0, neginf=-1.0)
ys, xs = np.where(res >= top_thresh)
for y, x in zip(ys, xs):
candidates.append((float(res[y, x]), int(x), int(y), ti))
# Refinement a risoluzione piena: per ogni candidato top, finestra locale
refined: list[tuple[float, int, int, int]] = []
margin = sf + 4
for _, xt, yt, ti in candidates:
tpl = self.templates[ti]
th, tw = tpl.edge.shape
x0 = xt * sf
y0 = yt * sf
sx0 = max(0, x0 - margin)
sy0 = max(0, y0 - margin)
sx1 = min(scene_edge0.shape[1], x0 + tw + margin)
sy1 = min(scene_edge0.shape[0], y0 + th + margin)
roi = scene_edge0[sy0:sy1, sx0:sx1]
if roi.shape[0] < th or roi.shape[1] < tw:
continue
res = cv2.matchTemplate(
roi, tpl.edge, self.match_method, mask=tpl.mask,
)
res = np.nan_to_num(res, nan=-1.0, posinf=-1.0, neginf=-1.0)
_, max_val, _, max_loc = cv2.minMaxLoc(res)
if max_val < min_score:
continue
bx = sx0 + max_loc[0]
by = sy0 + max_loc[1]
refined.append((float(max_val), bx, by, ti))
refined.sort(key=lambda c: -c[0])
# NMS spaziale baricentri
kept: list[Match] = []
r2 = nms_radius * nms_radius
for score, x, y, ti in refined:
tpl = self.templates[ti]
cx = x + tpl.cx_local
cy = y + tpl.cy_local
if any((k.cx - cx) ** 2 + (k.cy - cy) ** 2 < r2 for k in kept):
continue
th, tw = tpl.edge.shape
kept.append(
Match(
cx=cx, cy=cy,
angle_deg=tpl.angle_deg,
scale=tpl.scale,
score=score,
bbox=(x, y, tw, th),
)
)
if len(kept) >= max_matches:
break
return kept
# --- Persistenza modello ---
def save(self, path: str) -> None:
"""Salva matcher su disco (.npz)."""
meta = np.array(
[(t.angle_deg, t.scale, t.cx_local, t.cy_local) for t in self.templates],
dtype=np.float32,
)
params = np.array(
[self.canny_low, self.canny_high, self.angle_step_deg,
self.angle_range_deg[0], self.angle_range_deg[1],
self.scale_range[0], self.scale_range[1], self.scale_step,
self.template_size[0], self.template_size[1], self.match_method,
self.pyramid_levels, self.top_score_factor],
dtype=np.float32,
)
arrays = {f"edge_{i}": t.edge for i, t in enumerate(self.templates)}
arrays.update({f"mask_{i}": t.mask for i, t in enumerate(self.templates)})
np.savez_compressed(path, params=params, meta=meta, **arrays)
@classmethod
def load(cls, path: str) -> "EdgeShapeMatcher":
z = np.load(path)
p = z["params"]
m = cls(
canny_low=int(p[0]),
canny_high=int(p[1]),
angle_step_deg=float(p[2]),
angle_range_deg=(float(p[3]), float(p[4])),
scale_range=(float(p[5]), float(p[6])),
scale_step=float(p[7]),
match_method=int(p[10]),
pyramid_levels=int(p[11]) if len(p) > 11 else 3,
top_score_factor=float(p[12]) if len(p) > 12 else 0.6,
)
m.template_size = (int(p[8]), int(p[9]))
meta = z["meta"]
for i in range(len(meta)):
m.templates.append(
Template(
angle_deg=float(meta[i, 0]),
scale=float(meta[i, 1]),
edge=z[f"edge_{i}"],
mask=z[f"mask_{i}"],
cx_local=float(meta[i, 2]),
cy_local=float(meta[i, 3]),
)
)
return m