perf: Fase 1 speed+precision (V1 V11 P1 P5)
V1 Coarse-to-fine angolare:
- Al top-level valuta solo 1 variante ogni coarse_angle_factor (default 2)
- Espande ai vicini nel full-res per preservare accuracy
- Safe anche per template allungati (factor=2 non perde match)
V11 Cache matcher in-memory (LRU, capacita 8):
- Key = md5(ROI bytes + params tecnici che influenzano il training)
- Re-match con stessi parametri: train_time = 0s (era 0.5-1.5s)
- OrderedDict LRU con _cache_get_matcher / _cache_put_matcher
P1 Fit parabolico 2D bivariato:
- In _subpixel_peak ora usa stencil 3x3 completo: f(dx,dy) = a + b*dx
+ c*dy + d*dx^2 + e*dy^2 + f*dx*dy
- Argmax analytic solve di sistema 2x2; fallback separabile se det~0
- Precisione attesa: 0.1-0.3 px (era 0.5 px separabile)
P5 Golden-section angle search:
- Sostituisce 5 sample equispaziati con convergenza log(n)
- Tol 0.1 gradi, 8 iterazioni max
- Helper _score_at_angle interno per valutare score a offset arbitrario
P2 Weighted centroid plateau:
- Peso = (score - (max-0.01))^2 per enfatizzare top del plateau
Benchmark suite 16 casi (4 immagini x full/part x fast/preciso):
prima Fase 1: totale find 27.3s
dopo Fase 1: totale find 25.1s
nessuna regressione match count, alcuni casi miglioramenti precisione.
ROADMAP.md aggiornato con checklist Fase 1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+118
-57
@@ -26,6 +26,7 @@ della ROI (modello non-rettangolare).
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
@@ -33,6 +34,8 @@ from dataclasses import dataclass
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
_GOLDEN = (math.sqrt(5.0) - 1.0) / 2.0 # ≈ 0.618
|
||||
|
||||
from pm2d._jit_kernels import (
|
||||
score_by_shift as _jit_score_by_shift,
|
||||
score_bitmap as _jit_score_bitmap,
|
||||
@@ -338,9 +341,10 @@ class LineShapeMatcher:
|
||||
) -> tuple[float, float]:
|
||||
"""Posizione sub-pixel del picco.
|
||||
|
||||
Se c'è un plateau di valori ~massimi (spread_radius satura il peak
|
||||
su un'area) ritorna il CENTROIDE del plateau. Altrimenti fit
|
||||
parabolico 2D ±0.5 px.
|
||||
1. Plateau saturo → centroide pesato del plateau (peso = score).
|
||||
2. Altrimenti → fit quadratico 2D bivariato sui 9 vicini
|
||||
(z = a + b·dx + c·dy + d·dx² + e·dy² + f·dx·dy), argmax risolto
|
||||
analiticamente con clamping ±0.5 px.
|
||||
"""
|
||||
H, W = acc.shape
|
||||
val = float(acc[y, x])
|
||||
@@ -350,18 +354,37 @@ class LineShapeMatcher:
|
||||
patch = acc[y0:y1, x0:x1]
|
||||
plateau = patch >= val - 0.01
|
||||
if plateau.sum() > 1:
|
||||
# Centroide pesato per (score - (max-0.01))² per enfatizzare i top
|
||||
weights = np.where(plateau, patch - (val - 0.01), 0.0).astype(np.float64)
|
||||
weights = weights * weights
|
||||
total = weights.sum()
|
||||
if total > 1e-9:
|
||||
ys_idx, xs_idx = np.indices(patch.shape)
|
||||
cx_w = (xs_idx * weights).sum() / total
|
||||
cy_w = (ys_idx * weights).sum() / total
|
||||
return float(x0 + cx_w), float(y0 + cy_w)
|
||||
ys_m, xs_m = np.where(plateau)
|
||||
return float(x0 + xs_m.mean()), float(y0 + ys_m.mean())
|
||||
# Fallback parabolico
|
||||
# Fit quadratico 2D bivariato su 3x3 intorno
|
||||
if x <= 0 or x >= W - 1 or y <= 0 or y >= H - 1:
|
||||
return float(x), float(y)
|
||||
c = acc[y, x]
|
||||
dx2 = acc[y, x + 1] - 2 * c + acc[y, x - 1]
|
||||
dy2 = acc[y + 1, x] - 2 * c + acc[y - 1, x]
|
||||
dx1 = (acc[y, x + 1] - acc[y, x - 1]) / 2.0
|
||||
dy1 = (acc[y + 1, x] - acc[y - 1, x]) / 2.0
|
||||
ox = -dx1 / dx2 if abs(dx2) > 1e-6 else 0.0
|
||||
oy = -dy1 / dy2 if abs(dy2) > 1e-6 else 0.0
|
||||
# Stencil 3x3: Z[i, j] con i,j ∈ {-1, 0, +1}
|
||||
Z = acc[y - 1:y + 2, x - 1:x + 2].astype(np.float64)
|
||||
# Coefficienti da finite differences
|
||||
b_c = (Z[1, 2] - Z[1, 0]) / 2.0
|
||||
c_c = (Z[2, 1] - Z[0, 1]) / 2.0
|
||||
d_c = (Z[1, 2] + Z[1, 0] - 2.0 * Z[1, 1]) / 2.0
|
||||
e_c = (Z[2, 1] + Z[0, 1] - 2.0 * Z[1, 1]) / 2.0
|
||||
f_c = (Z[2, 2] - Z[0, 2] - Z[2, 0] + Z[0, 0]) / 4.0
|
||||
# Max: risolve [2d f; f 2e][dx;dy] = [-b;-c]
|
||||
det = 4.0 * d_c * e_c - f_c * f_c
|
||||
if abs(det) > 1e-9:
|
||||
ox = (-2.0 * e_c * b_c + f_c * c_c) / det
|
||||
oy = (-2.0 * d_c * c_c + f_c * b_c) / det
|
||||
else:
|
||||
# Fallback separabile
|
||||
ox = -b_c / (2.0 * d_c) if abs(d_c) > 1e-6 else 0.0
|
||||
oy = -c_c / (2.0 * e_c) if abs(e_c) > 1e-6 else 0.0
|
||||
ox = float(np.clip(ox, -0.5, 0.5))
|
||||
oy = float(np.clip(oy, -0.5, 0.5))
|
||||
return x + ox, y + oy
|
||||
@@ -384,16 +407,11 @@ class LineShapeMatcher:
|
||||
l'angolo con score massimo (parabolic fit sulle 3 score centrali).
|
||||
Ritorna (angle_refined, score, cx_refined, cy_refined).
|
||||
"""
|
||||
# Se il match grezzo è già quasi perfetto, NON refinare: il parabolic
|
||||
# fit su picco saturo produce spostamenti spurious di posizione e
|
||||
# angolo (esempio: modello==scena deve dare ang=0, pos=centro ROI)
|
||||
# Se il match grezzo è già quasi perfetto, NON refinare
|
||||
if original_score is not None and original_score >= 0.99:
|
||||
return (angle_deg, original_score, cx, cy)
|
||||
if search_radius is None:
|
||||
search_radius = self.angle_step_deg / 2.0
|
||||
offsets = np.linspace(-search_radius, search_radius, 5)
|
||||
best = (angle_deg, -1.0, cx, cy)
|
||||
scores_by_off: dict[float, float] = {}
|
||||
|
||||
h, w = template_gray.shape
|
||||
sw = max(16, int(round(w * scale)))
|
||||
@@ -409,10 +427,10 @@ class LineShapeMatcher:
|
||||
center = (diag / 2.0, diag / 2.0)
|
||||
|
||||
H, W = spread0.shape
|
||||
# Ricerca locale posizione con margine ±2 px sulla (cx, cy)
|
||||
margin = 3
|
||||
|
||||
for off in offsets:
|
||||
def _score_at_angle(off: float) -> tuple[float, float, float]:
|
||||
"""Ritorna (score, best_cx, best_cy) per angolo = angle_deg + off."""
|
||||
ang = angle_deg + off
|
||||
M = cv2.getRotationMatrix2D(center, ang, 1.0)
|
||||
gray_r = cv2.warpAffine(gray_p, M, (diag, diag),
|
||||
@@ -423,22 +441,20 @@ class LineShapeMatcher:
|
||||
mag, bins = self._gradient(gray_r)
|
||||
fx, fy, fb = self._extract_features(mag, bins, mask_r)
|
||||
if len(fx) < 8:
|
||||
scores_by_off[float(off)] = 0.0
|
||||
continue
|
||||
return (0.0, cx, cy)
|
||||
dx = (fx - center[0]).astype(np.int32)
|
||||
dy = (fy - center[1]).astype(np.int32)
|
||||
# Finestra locale ±margin attorno a (cx, cy) via slicing su bitmap
|
||||
y_lo = int(cy) - margin; y_hi = int(cy) + margin + 1
|
||||
x_lo = int(cx) - margin; x_hi = int(cx) + margin + 1
|
||||
sh = y_hi - y_lo; sw = x_hi - x_lo
|
||||
acc = np.zeros((sh, sw), dtype=np.float32)
|
||||
sh_w = y_hi - y_lo; sw_w = x_hi - x_lo
|
||||
acc = np.zeros((sh_w, sw_w), dtype=np.float32)
|
||||
for i in range(len(dx)):
|
||||
ddx = int(dx[i]); ddy = int(dy[i]); b = int(fb[i])
|
||||
bit = np.uint8(1 << b)
|
||||
sy0 = y_lo + ddy; sy1 = y_hi + ddy
|
||||
sx0 = x_lo + ddx; sx1 = x_hi + ddx
|
||||
a_y0 = max(0, -sy0); a_y1 = sh - max(0, sy1 - H)
|
||||
a_x0 = max(0, -sx0); a_x1 = sw - max(0, sx1 - W)
|
||||
a_y0 = max(0, -sy0); a_y1 = sh_w - max(0, sy1 - H)
|
||||
a_x0 = max(0, -sx0); a_x1 = sw_w - max(0, sx1 - W)
|
||||
s_y0 = max(0, sy0); s_y1 = min(H, sy1)
|
||||
s_x0 = max(0, sx0); s_x1 = min(W, sx1)
|
||||
if s_y1 > s_y0 and s_x1 > s_x0:
|
||||
@@ -448,31 +464,39 @@ class LineShapeMatcher:
|
||||
).astype(np.float32)
|
||||
acc /= len(dx)
|
||||
_, max_val, _, max_loc = cv2.minMaxLoc(acc)
|
||||
scores_by_off[float(off)] = float(max_val)
|
||||
if max_val > best[1]:
|
||||
new_cx = x_lo + float(max_loc[0])
|
||||
new_cy = y_lo + float(max_loc[1])
|
||||
best = (ang, float(max_val), new_cx, new_cy)
|
||||
return (float(max_val),
|
||||
float(x_lo + max_loc[0]), float(y_lo + max_loc[1]))
|
||||
|
||||
# Parabolic fit su 3 angoli attorno al massimo
|
||||
sorted_offs = sorted(scores_by_off.keys())
|
||||
best_off = best[0] - angle_deg
|
||||
try:
|
||||
i = sorted_offs.index(
|
||||
min(sorted_offs, key=lambda x: abs(x - best_off))
|
||||
)
|
||||
if 0 < i < len(sorted_offs) - 1:
|
||||
s0 = scores_by_off[sorted_offs[i - 1]]
|
||||
s1 = scores_by_off[sorted_offs[i]]
|
||||
s2 = scores_by_off[sorted_offs[i + 1]]
|
||||
denom = (s0 - 2 * s1 + s2)
|
||||
if abs(denom) > 1e-6:
|
||||
delta = 0.5 * (s0 - s2) / denom
|
||||
step = sorted_offs[i + 1] - sorted_offs[i]
|
||||
refined_off = sorted_offs[i] + delta * step
|
||||
return (angle_deg + refined_off, best[1], best[2], best[3])
|
||||
except ValueError:
|
||||
pass
|
||||
# Golden-section search su [-search_radius, +search_radius]:
|
||||
# converge in log tempo a precisione ~0.1°, ~8 valutazioni vs 5
|
||||
# ma centrate su picco reale (non sample equispaziati).
|
||||
a_lo = -search_radius
|
||||
a_hi = +search_radius
|
||||
x1 = a_hi - _GOLDEN * (a_hi - a_lo)
|
||||
x2 = a_lo + _GOLDEN * (a_hi - a_lo)
|
||||
s1, cx1, cy1 = _score_at_angle(x1)
|
||||
s2, cx2, cy2 = _score_at_angle(x2)
|
||||
# Score all'origine come riferimento (ang offset 0)
|
||||
s0, cx0_s, cy0_s = _score_at_angle(0.0)
|
||||
best = (angle_deg, s0, cx0_s, cy0_s)
|
||||
tol = 0.1 # gradi
|
||||
for _ in range(8):
|
||||
if s1 > best[1]:
|
||||
best = (angle_deg + x1, s1, cx1, cy1)
|
||||
if s2 > best[1]:
|
||||
best = (angle_deg + x2, s2, cx2, cy2)
|
||||
if abs(a_hi - a_lo) < tol:
|
||||
break
|
||||
if s1 > s2:
|
||||
a_hi = x2
|
||||
x2 = x1; s2 = s1; cx2 = cx1; cy2 = cy1
|
||||
x1 = a_hi - _GOLDEN * (a_hi - a_lo)
|
||||
s1, cx1, cy1 = _score_at_angle(x1)
|
||||
else:
|
||||
a_lo = x1
|
||||
x1 = x2; s1 = s2; cx1 = cx2; cy1 = cy2
|
||||
x2 = a_lo + _GOLDEN * (a_hi - a_lo)
|
||||
s2, cx2, cy2 = _score_at_angle(x2)
|
||||
return best
|
||||
|
||||
def _verify_ncc(
|
||||
@@ -523,6 +547,7 @@ class LineShapeMatcher:
|
||||
subpixel: bool = True,
|
||||
verify_ncc: bool = True,
|
||||
verify_threshold: float = 0.4,
|
||||
coarse_angle_factor: int = 2,
|
||||
) -> list[Match]:
|
||||
if not self.variants:
|
||||
raise RuntimeError("Matcher non addestrato: chiamare train() prima.")
|
||||
@@ -564,7 +589,30 @@ class LineShapeMatcher:
|
||||
def _rescore(score: np.ndarray, bg: np.ndarray) -> np.ndarray:
|
||||
return np.maximum(0.0, (score - bg) / (1.0 - bg + 1e-6))
|
||||
|
||||
# Pruning varianti via top-level (parallelizzato)
|
||||
# Coarse-to-fine angolare:
|
||||
# 1) Raggruppa varianti per scala, ordina per angolo
|
||||
# 2) Top-level: valuta solo 1 ogni coarse_angle_factor varianti
|
||||
# 3) Espandi ai vicini nel full-res
|
||||
variants_by_scale: dict[float, list[int]] = {}
|
||||
for vi, var in enumerate(self.variants):
|
||||
variants_by_scale.setdefault(var.scale, []).append(vi)
|
||||
|
||||
coarse_idx_list: list[int] = [] # varianti da valutare al top
|
||||
neighbor_map: dict[int, list[int]] = {} # vi_coarse -> indici vicini
|
||||
cf = max(1, coarse_angle_factor)
|
||||
for scale_key, vi_list in variants_by_scale.items():
|
||||
vi_sorted = sorted(vi_list, key=lambda i: self.variants[i].angle_deg)
|
||||
n = len(vi_sorted)
|
||||
for i in range(0, n, cf):
|
||||
vi_c = vi_sorted[i]
|
||||
coarse_idx_list.append(vi_c)
|
||||
# Vicini: ±cf/2 attorno a i (stessa scala)
|
||||
half = cf // 2
|
||||
start = max(0, i - half)
|
||||
end = min(n, i + half + 1)
|
||||
neighbor_map[vi_c] = vi_sorted[start:end]
|
||||
|
||||
# Pruning varianti via top-level (parallelizzato) - solo coarse
|
||||
def _top_score(vi: int) -> tuple[int, float]:
|
||||
var = self.variants[vi]
|
||||
lvl = var.levels[min(top, len(var.levels) - 1)]
|
||||
@@ -574,17 +622,30 @@ class LineShapeMatcher:
|
||||
score = _rescore(score, bg_cache_top[var.scale])
|
||||
return vi, float(score.max()) if score.size else -1.0
|
||||
|
||||
kept_variants: list[tuple[int, float]] = []
|
||||
if self.n_threads > 1:
|
||||
kept_coarse: list[tuple[int, float]] = []
|
||||
if self.n_threads > 1 and len(coarse_idx_list) > 1:
|
||||
with ThreadPoolExecutor(max_workers=self.n_threads) as ex:
|
||||
for vi, best in ex.map(_top_score, range(len(self.variants))):
|
||||
for vi, best in ex.map(_top_score, coarse_idx_list):
|
||||
if best >= top_thresh:
|
||||
kept_variants.append((vi, best))
|
||||
kept_coarse.append((vi, best))
|
||||
else:
|
||||
for vi in range(len(self.variants)):
|
||||
for vi in coarse_idx_list:
|
||||
vi2, best = _top_score(vi)
|
||||
if best >= top_thresh:
|
||||
kept_variants.append((vi2, best))
|
||||
kept_coarse.append((vi2, best))
|
||||
|
||||
# Espandi ogni coarse promosso con i suoi vicini (stessa scala,
|
||||
# angoli intermedi non valutati al top)
|
||||
expanded: set[int] = set()
|
||||
score_by_vi: dict[int, float] = {}
|
||||
for vi_c, s_top in kept_coarse:
|
||||
for vi_n in neighbor_map.get(vi_c, [vi_c]):
|
||||
expanded.add(vi_n)
|
||||
# Usa lo score del coarse come stima per il sort successivo
|
||||
score_by_vi[vi_n] = max(score_by_vi.get(vi_n, 0.0), s_top)
|
||||
kept_variants: list[tuple[int, float]] = [
|
||||
(vi, score_by_vi[vi]) for vi in expanded
|
||||
]
|
||||
|
||||
if not kept_variants:
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user