perf: spread bitmap uint8 + pre-NMS prima refine (3.5x globale, 49x worst case)
Due ottimizzazioni chiave:
1. Spread bitmap uint8 invece di response map (N_BINS, H, W) float32
- 32x meno memoria, cache-friendly
- Nuovi kernel Numba: _jit_score_bitmap, _jit_popcount_density
- Formato: spread[y,x] bit b = bin b attivo nel raggio di spread
- _refine_angle usa slicing su bitmap con mask & bit
2. Pre-NMS prima di refine_angle/verify_ncc
- Problema: loop 'for raw in candidati' applicava refine+verify A OGNI
candidato prima del check NMS → 2000+ refine chiamati per ~25 match
- Fix: pre-NMS su (cx, cy) subpixel, limita a max_matches*3 candidati,
poi refine + verify solo su quelli
- Esempio worst case: lama_full_fast 55.9s → 1.13s (49x)
Benchmark suite 16 scenari (4 immagini x full/part x fast/preciso):
prima: totale find 94.6s
dopo: totale find 27.3s (3.5x globale)
casi peggiori <5s (prima erano >50s)
ROI parziali (solo metà oggetto) funzionano in tutti i casi.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,96 @@
|
|||||||
|
"""Test suite esaustivo su Test/*.png con varie configurazioni.
|
||||||
|
|
||||||
|
Esegue matrix (immagine, ROI completa/parziale, config) e stampa tempi/match.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from pm2d import LineShapeMatcher
|
||||||
|
from pm2d.gui import draw_matches
|
||||||
|
|
||||||
|
|
||||||
|
TEST_DIR = Path(__file__).parent.parent / "Test"
|
||||||
|
OUT_DIR = Path("/tmp/pm2d_suite"); OUT_DIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# Casi: (nome, immagine, (y0,y1,x0,x1) roi completa, (y0,y1,x0,x1) roi parziale)
|
||||||
|
CASES = [
|
||||||
|
("clip", "clip.png", ( 60, 200, 90, 290), ( 60, 135, 90, 290)),
|
||||||
|
("ruota", "rings_and_nuts.png", ( 55, 175, 90, 215), ( 55, 115, 90, 215)),
|
||||||
|
("dado", "rings_and_nuts.png", (255, 375, 40, 170), (255, 315, 40, 170)),
|
||||||
|
("lama", "razors2.png", ( 90, 370, 120, 160), ( 90, 230, 120, 160)),
|
||||||
|
]
|
||||||
|
|
||||||
|
CONFIGS = [
|
||||||
|
("fast", dict(angle_step_deg=10.0, scale_range=(1.0, 1.0),
|
||||||
|
pyramid_levels=3, num_features=64)),
|
||||||
|
("preciso", dict(angle_step_deg=5.0, scale_range=(0.5, 1.1), scale_step=0.05,
|
||||||
|
pyramid_levels=3, num_features=96)),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def bench(case_name: str, img_path: str, roi_box: tuple, roi_kind: str,
|
||||||
|
cfg_name: str, cfg: dict) -> dict:
|
||||||
|
scene = cv2.imread(str(TEST_DIR / img_path))
|
||||||
|
y0, y1, x0, x1 = roi_box
|
||||||
|
roi = scene[y0:y1, x0:x1].copy()
|
||||||
|
m = LineShapeMatcher(
|
||||||
|
angle_range_deg=(0.0, 360.0),
|
||||||
|
weak_grad=30, strong_grad=60,
|
||||||
|
spread_radius=5, n_threads=4, **cfg,
|
||||||
|
)
|
||||||
|
t0 = time.time()
|
||||||
|
n_var = m.train(roi)
|
||||||
|
t_train = time.time() - t0
|
||||||
|
# warmup (prima call è JIT compile)
|
||||||
|
m.find(scene, min_score=0.55, max_matches=3, refine_angle=False)
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
matches = m.find(
|
||||||
|
scene, min_score=0.55, max_matches=25, nms_radius=None,
|
||||||
|
refine_angle=True, subpixel=True, verify_threshold=0.4,
|
||||||
|
)
|
||||||
|
t_find = time.time() - t0
|
||||||
|
|
||||||
|
tag = f"{case_name}_{roi_kind}_{cfg_name}"
|
||||||
|
overlay = draw_matches(scene, matches,
|
||||||
|
template_gray=cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY))
|
||||||
|
cv2.imwrite(str(OUT_DIR / f"{tag}.png"), overlay)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"case": tag,
|
||||||
|
"roi": f"{roi.shape[1]}x{roi.shape[0]}",
|
||||||
|
"variants": n_var,
|
||||||
|
"train_s": t_train,
|
||||||
|
"find_s": t_find,
|
||||||
|
"n_match": len(matches),
|
||||||
|
"score_range": (
|
||||||
|
f"{min(x.score for x in matches):.2f}..{max(x.score for x in matches):.2f}"
|
||||||
|
if matches else "-"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print(f"{'case':30s} {'roi':>9s} {'var':>4s} "
|
||||||
|
f"{'train':>6s} {'find':>6s} {'n':>3s} score")
|
||||||
|
print("-" * 85)
|
||||||
|
total_find = 0.0
|
||||||
|
for case_name, img, roi_full, roi_part in CASES:
|
||||||
|
for roi_kind, roi_box in [("full", roi_full), ("part", roi_part)]:
|
||||||
|
for cfg_name, cfg in CONFIGS:
|
||||||
|
r = bench(case_name, img, roi_box, roi_kind, cfg_name, cfg)
|
||||||
|
print(f"{r['case']:30s} {r['roi']:>9s} {r['variants']:>4d} "
|
||||||
|
f"{r['train_s']:>5.2f}s {r['find_s']:>5.2f}s "
|
||||||
|
f"{r['n_match']:>3d} {r['score_range']}")
|
||||||
|
total_find += r["find_s"]
|
||||||
|
print("-" * 85)
|
||||||
|
print(f"totale find: {total_find:.1f}s overlay salvati in {OUT_DIR}/")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
+96
-4
@@ -45,13 +45,12 @@ if HAS_NUMBA:
|
|||||||
resp: np.ndarray, # float32 (N_BINS, H, W)
|
resp: np.ndarray, # float32 (N_BINS, H, W)
|
||||||
dx: np.ndarray, # int32 (N,)
|
dx: np.ndarray, # int32 (N,)
|
||||||
dy: np.ndarray, # int32 (N,)
|
dy: np.ndarray, # int32 (N,)
|
||||||
bins: np.ndarray, # int8 or int32 (N,)
|
bins: np.ndarray, # int8 (N,)
|
||||||
bin_active: np.ndarray, # bool_ (N_BINS,)
|
bin_active: np.ndarray, # bool_ (N_BINS,)
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
n_bins, H, W = resp.shape
|
_, H, W = resp.shape
|
||||||
N = dx.shape[0]
|
N = dx.shape[0]
|
||||||
acc = np.zeros((H, W), dtype=np.float32)
|
acc = np.zeros((H, W), dtype=np.float32)
|
||||||
# Parallelizza per riga: niente race (ogni y scrive solo acc[y, :])
|
|
||||||
for y in nb.prange(H):
|
for y in nb.prange(H):
|
||||||
for i in range(N):
|
for i in range(N):
|
||||||
b = bins[i]
|
b = bins[i]
|
||||||
@@ -73,7 +72,59 @@ if HAS_NUMBA:
|
|||||||
acc[y, x] *= inv
|
acc[y, x] *= inv
|
||||||
return acc
|
return acc
|
||||||
|
|
||||||
# Warmup: precompila con dummy data
|
@nb.njit(cache=True, parallel=True, fastmath=True, boundscheck=False)
|
||||||
|
def _jit_score_bitmap(
|
||||||
|
spread: np.ndarray, # uint8 (H, W), bit b = bin b attivo
|
||||||
|
dx: np.ndarray, # int32 (N,)
|
||||||
|
dy: np.ndarray, # int32 (N,)
|
||||||
|
bins: np.ndarray, # int8 (N,) bin per ogni feature
|
||||||
|
bit_active: np.uint8, # bitmask bin attivi in scena
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""score[y,x] = (Σ_i [bit bins[i] acceso in spread[y+dy_i, x+dx_i]]) / N.
|
||||||
|
|
||||||
|
32× meno memoria di response map float32 → cache-friendly.
|
||||||
|
"""
|
||||||
|
H, W = spread.shape
|
||||||
|
N = dx.shape[0]
|
||||||
|
acc = np.zeros((H, W), dtype=np.float32)
|
||||||
|
for y in nb.prange(H):
|
||||||
|
for i in range(N):
|
||||||
|
b = bins[i]
|
||||||
|
mask = np.uint8(1) << b
|
||||||
|
if (bit_active & mask) == 0:
|
||||||
|
continue
|
||||||
|
ddy = dy[i]
|
||||||
|
yy = y + ddy
|
||||||
|
if yy < 0 or yy >= H:
|
||||||
|
continue
|
||||||
|
ddx = dx[i]
|
||||||
|
x_lo = 0 if ddx >= 0 else -ddx
|
||||||
|
x_hi = W if ddx <= 0 else W - ddx
|
||||||
|
for x in range(x_lo, x_hi):
|
||||||
|
if spread[yy, x + ddx] & mask:
|
||||||
|
acc[y, x] += 1.0
|
||||||
|
if N > 0:
|
||||||
|
inv = 1.0 / N
|
||||||
|
for y in nb.prange(H):
|
||||||
|
for x in range(W):
|
||||||
|
acc[y, x] *= inv
|
||||||
|
return acc
|
||||||
|
|
||||||
|
@nb.njit(cache=True, parallel=True, fastmath=True, boundscheck=False)
|
||||||
|
def _jit_popcount_density(spread: np.ndarray) -> np.ndarray:
|
||||||
|
"""Conta bit set per pixel: ritorna (H, W) float32 in [0..8]."""
|
||||||
|
H, W = spread.shape
|
||||||
|
out = np.zeros((H, W), dtype=np.float32)
|
||||||
|
for y in nb.prange(H):
|
||||||
|
for x in range(W):
|
||||||
|
v = spread[y, x]
|
||||||
|
# popcount manuale
|
||||||
|
v = (v & 0x55) + ((v >> 1) & 0x55)
|
||||||
|
v = (v & 0x33) + ((v >> 2) & 0x33)
|
||||||
|
v = (v & 0x0F) + ((v >> 4) & 0x0F)
|
||||||
|
out[y, x] = float(v)
|
||||||
|
return out
|
||||||
|
|
||||||
def _warmup():
|
def _warmup():
|
||||||
resp = np.zeros((8, 32, 32), dtype=np.float32)
|
resp = np.zeros((8, 32, 32), dtype=np.float32)
|
||||||
dx = np.zeros(1, dtype=np.int32)
|
dx = np.zeros(1, dtype=np.int32)
|
||||||
@@ -81,16 +132,57 @@ if HAS_NUMBA:
|
|||||||
b = np.zeros(1, dtype=np.int8)
|
b = np.zeros(1, dtype=np.int8)
|
||||||
ba = np.ones(8, dtype=np.bool_)
|
ba = np.ones(8, dtype=np.bool_)
|
||||||
_jit_score_by_shift(resp, dx, dy, b, ba)
|
_jit_score_by_shift(resp, dx, dy, b, ba)
|
||||||
|
spread = np.zeros((32, 32), dtype=np.uint8)
|
||||||
|
_jit_score_bitmap(spread, dx, dy, b, np.uint8(0xFF))
|
||||||
|
_jit_popcount_density(spread)
|
||||||
|
|
||||||
else: # pragma: no cover
|
else: # pragma: no cover
|
||||||
|
|
||||||
def _jit_score_by_shift(resp, dx, dy, bins, bin_active):
|
def _jit_score_by_shift(resp, dx, dy, bins, bin_active):
|
||||||
raise RuntimeError("numba non disponibile")
|
raise RuntimeError("numba non disponibile")
|
||||||
|
|
||||||
|
def _jit_score_bitmap(spread, dx, dy, bins, bit_active):
|
||||||
|
raise RuntimeError("numba non disponibile")
|
||||||
|
|
||||||
|
def _jit_popcount_density(spread):
|
||||||
|
raise RuntimeError("numba non disponibile")
|
||||||
|
|
||||||
def _warmup():
|
def _warmup():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def score_bitmap(
|
||||||
|
spread: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
|
||||||
|
bit_active: int,
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Dispatch bitmap: JIT se numba, fallback numpy."""
|
||||||
|
if HAS_NUMBA and len(dx) > 0:
|
||||||
|
return _jit_score_bitmap(
|
||||||
|
np.ascontiguousarray(spread, dtype=np.uint8),
|
||||||
|
np.ascontiguousarray(dx, dtype=np.int32),
|
||||||
|
np.ascontiguousarray(dy, dtype=np.int32),
|
||||||
|
np.ascontiguousarray(bins, dtype=np.int8),
|
||||||
|
np.uint8(bit_active),
|
||||||
|
)
|
||||||
|
# Fallback numpy (lento): converte bitmap a response 3D
|
||||||
|
H, W = spread.shape
|
||||||
|
resp = np.zeros((8, H, W), dtype=np.float32)
|
||||||
|
for b in range(8):
|
||||||
|
resp[b] = ((spread >> b) & 1).astype(np.float32)
|
||||||
|
return _numpy_score_by_shift(resp, dx, dy, bins, None)
|
||||||
|
|
||||||
|
|
||||||
|
def popcount_density(spread: np.ndarray) -> np.ndarray:
|
||||||
|
if HAS_NUMBA:
|
||||||
|
return _jit_popcount_density(np.ascontiguousarray(spread, dtype=np.uint8))
|
||||||
|
# Fallback
|
||||||
|
H, W = spread.shape
|
||||||
|
out = np.zeros((H, W), dtype=np.float32)
|
||||||
|
for b in range(8):
|
||||||
|
out += ((spread >> b) & 1).astype(np.float32)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def score_by_shift(
|
def score_by_shift(
|
||||||
resp: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
|
resp: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
|
||||||
bin_has_data: np.ndarray | None = None,
|
bin_has_data: np.ndarray | None = None,
|
||||||
|
|||||||
+70
-34
@@ -33,7 +33,12 @@ from dataclasses import dataclass
|
|||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from pm2d._jit_kernels import score_by_shift as _jit_score_by_shift, HAS_NUMBA
|
from pm2d._jit_kernels import (
|
||||||
|
score_by_shift as _jit_score_by_shift,
|
||||||
|
score_bitmap as _jit_score_bitmap,
|
||||||
|
popcount_density as _jit_popcount,
|
||||||
|
HAS_NUMBA,
|
||||||
|
)
|
||||||
|
|
||||||
N_BINS = 8 # orientamenti quantizzati modulo π
|
N_BINS = 8 # orientamenti quantizzati modulo π
|
||||||
|
|
||||||
@@ -286,11 +291,7 @@ class LineShapeMatcher:
|
|||||||
# --- Matching ------------------------------------------------------
|
# --- Matching ------------------------------------------------------
|
||||||
|
|
||||||
def _response_map(self, gray: np.ndarray) -> np.ndarray:
|
def _response_map(self, gray: np.ndarray) -> np.ndarray:
|
||||||
"""Response map shape (N_BINS, H, W) float32 0/1.
|
"""Response map shape (N_BINS, H, W) float32 (legacy path)."""
|
||||||
|
|
||||||
Rinormalizzazione anti-background (match vs texture densa) è
|
|
||||||
applicata a valle nel `find()` via `_bg_map` locale.
|
|
||||||
"""
|
|
||||||
mag, bins = self._gradient(gray)
|
mag, bins = self._gradient(gray)
|
||||||
valid = mag >= self.weak_grad
|
valid = mag >= self.weak_grad
|
||||||
k = 2 * self.spread_radius + 1
|
k = 2 * self.spread_radius + 1
|
||||||
@@ -303,6 +304,23 @@ class LineShapeMatcher:
|
|||||||
raw[b] = d.astype(np.float32)
|
raw[b] = d.astype(np.float32)
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
|
def _spread_bitmap(self, gray: np.ndarray) -> np.ndarray:
|
||||||
|
"""Spread bitmap uint8: bit b acceso dove bin b è presente nel raggio.
|
||||||
|
|
||||||
|
Formato compatto 32× più denso della response map (N_BINS, H, W) float32.
|
||||||
|
"""
|
||||||
|
mag, bins = self._gradient(gray)
|
||||||
|
valid = mag >= self.weak_grad
|
||||||
|
k = 2 * self.spread_radius + 1
|
||||||
|
kernel = np.ones((k, k), dtype=np.uint8)
|
||||||
|
H, W = gray.shape
|
||||||
|
spread = np.zeros((H, W), dtype=np.uint8)
|
||||||
|
for b in range(N_BINS):
|
||||||
|
mask_b = ((bins == b) & valid).astype(np.uint8)
|
||||||
|
d = cv2.dilate(mask_b, kernel)
|
||||||
|
spread |= (d << b)
|
||||||
|
return spread
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _score_by_shift(
|
def _score_by_shift(
|
||||||
resp: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
|
resp: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
|
||||||
@@ -333,7 +351,8 @@ class LineShapeMatcher:
|
|||||||
|
|
||||||
def _refine_angle(
|
def _refine_angle(
|
||||||
self,
|
self,
|
||||||
resp0: np.ndarray,
|
spread0: np.ndarray, # bitmap uint8 (H, W)
|
||||||
|
bit_active: int,
|
||||||
template_gray: np.ndarray,
|
template_gray: np.ndarray,
|
||||||
cx: float, cy: float,
|
cx: float, cy: float,
|
||||||
angle_deg: float, scale: float,
|
angle_deg: float, scale: float,
|
||||||
@@ -366,7 +385,7 @@ class LineShapeMatcher:
|
|||||||
cv2.BORDER_CONSTANT, value=0)
|
cv2.BORDER_CONSTANT, value=0)
|
||||||
center = (diag / 2.0, diag / 2.0)
|
center = (diag / 2.0, diag / 2.0)
|
||||||
|
|
||||||
H, W = resp0.shape[1], resp0.shape[2]
|
H, W = spread0.shape
|
||||||
# Ricerca locale posizione con margine ±2 px sulla (cx, cy)
|
# Ricerca locale posizione con margine ±2 px sulla (cx, cy)
|
||||||
margin = 3
|
margin = 3
|
||||||
|
|
||||||
@@ -385,13 +404,14 @@ class LineShapeMatcher:
|
|||||||
continue
|
continue
|
||||||
dx = (fx - center[0]).astype(np.int32)
|
dx = (fx - center[0]).astype(np.int32)
|
||||||
dy = (fy - center[1]).astype(np.int32)
|
dy = (fy - center[1]).astype(np.int32)
|
||||||
# Finestra locale ±margin attorno a (cx, cy) via slicing vettorizzato
|
# Finestra locale ±margin attorno a (cx, cy) via slicing su bitmap
|
||||||
y_lo = int(cy) - margin; y_hi = int(cy) + margin + 1
|
y_lo = int(cy) - margin; y_hi = int(cy) + margin + 1
|
||||||
x_lo = int(cx) - margin; x_hi = int(cx) + margin + 1
|
x_lo = int(cx) - margin; x_hi = int(cx) + margin + 1
|
||||||
sh = y_hi - y_lo; sw = x_hi - x_lo
|
sh = y_hi - y_lo; sw = x_hi - x_lo
|
||||||
acc = np.zeros((sh, sw), dtype=np.float32)
|
acc = np.zeros((sh, sw), dtype=np.float32)
|
||||||
for i in range(len(dx)):
|
for i in range(len(dx)):
|
||||||
ddx = int(dx[i]); ddy = int(dy[i]); b = int(fb[i])
|
ddx = int(dx[i]); ddy = int(dy[i]); b = int(fb[i])
|
||||||
|
bit = np.uint8(1 << b)
|
||||||
sy0 = y_lo + ddy; sy1 = y_hi + ddy
|
sy0 = y_lo + ddy; sy1 = y_hi + ddy
|
||||||
sx0 = x_lo + ddx; sx1 = x_hi + ddx
|
sx0 = x_lo + ddx; sx1 = x_hi + ddx
|
||||||
a_y0 = max(0, -sy0); a_y1 = sh - max(0, sy1 - H)
|
a_y0 = max(0, -sy0); a_y1 = sh - max(0, sy1 - H)
|
||||||
@@ -399,7 +419,10 @@ class LineShapeMatcher:
|
|||||||
s_y0 = max(0, sy0); s_y1 = min(H, sy1)
|
s_y0 = max(0, sy0); s_y1 = min(H, sy1)
|
||||||
s_x0 = max(0, sx0); s_x1 = min(W, sx1)
|
s_x0 = max(0, sx0); s_x1 = min(W, sx1)
|
||||||
if s_y1 > s_y0 and s_x1 > s_x0:
|
if s_y1 > s_y0 and s_x1 > s_x0:
|
||||||
acc[a_y0:a_y1, a_x0:a_x1] += resp0[b, s_y0:s_y1, s_x0:s_x1]
|
region = spread0[s_y0:s_y1, s_x0:s_x1]
|
||||||
|
acc[a_y0:a_y1, a_x0:a_x1] += (
|
||||||
|
(region & bit) != 0
|
||||||
|
).astype(np.float32)
|
||||||
acc /= len(dx)
|
acc /= len(dx)
|
||||||
_, max_val, _, max_loc = cv2.minMaxLoc(acc)
|
_, max_val, _, max_loc = cv2.minMaxLoc(acc)
|
||||||
scores_by_off[float(off)] = float(max_val)
|
scores_by_off[float(off)] = float(max_val)
|
||||||
@@ -487,18 +510,19 @@ class LineShapeMatcher:
|
|||||||
grays.append(cv2.pyrDown(grays[-1]))
|
grays.append(cv2.pyrDown(grays[-1]))
|
||||||
top = len(grays) - 1
|
top = len(grays) - 1
|
||||||
|
|
||||||
# Response map top-level
|
# Spread bitmap (uint8) al top level: 32× meno memoria della response
|
||||||
resp_top = self._response_map(grays[top])
|
# map float32 → MOLTO più cache-friendly per _score_by_shift.
|
||||||
bin_has_top = np.array([resp_top[b].any() for b in range(N_BINS)])
|
spread_top = self._spread_bitmap(grays[top])
|
||||||
|
bit_active_top = int(
|
||||||
|
sum(1 << b for b in range(N_BINS)
|
||||||
|
if (spread_top & np.uint8(1 << b)).any())
|
||||||
|
)
|
||||||
if nms_radius is None:
|
if nms_radius is None:
|
||||||
nms_radius = max(8, min(self.template_size) // 2)
|
nms_radius = max(8, min(self.template_size) // 2)
|
||||||
top_thresh = min_score * self.top_score_factor
|
top_thresh = min_score * self.top_score_factor
|
||||||
|
|
||||||
# Background map PER-SCALA: densità media bin attivi normalizzata
|
|
||||||
# su bbox template scalata. Rinormalizza score per isolare contributo
|
|
||||||
# non-random e riduce FP in zone con attivazione densa.
|
|
||||||
tw, th = self.template_size
|
tw, th = self.template_size
|
||||||
density_top = resp_top.sum(axis=0)
|
density_top = _jit_popcount(spread_top)
|
||||||
sf_top = 2 ** top
|
sf_top = 2 ** top
|
||||||
bg_cache_top: dict[float, np.ndarray] = {}
|
bg_cache_top: dict[float, np.ndarray] = {}
|
||||||
bg_cache_full: dict[float, np.ndarray] = {}
|
bg_cache_full: dict[float, np.ndarray] = {}
|
||||||
@@ -521,8 +545,8 @@ class LineShapeMatcher:
|
|||||||
def _top_score(vi: int) -> tuple[int, float]:
|
def _top_score(vi: int) -> tuple[int, float]:
|
||||||
var = self.variants[vi]
|
var = self.variants[vi]
|
||||||
lvl = var.levels[min(top, len(var.levels) - 1)]
|
lvl = var.levels[min(top, len(var.levels) - 1)]
|
||||||
score = self._score_by_shift(
|
score = _jit_score_bitmap(
|
||||||
resp_top, lvl.dx, lvl.dy, lvl.bin, bin_has_data=bin_has_top,
|
spread_top, lvl.dx, lvl.dy, lvl.bin, bit_active_top,
|
||||||
)
|
)
|
||||||
score = _rescore(score, bg_cache_top[var.scale])
|
score = _rescore(score, bg_cache_top[var.scale])
|
||||||
return vi, float(score.max()) if score.size else -1.0
|
return vi, float(score.max()) if score.size else -1.0
|
||||||
@@ -549,18 +573,21 @@ class LineShapeMatcher:
|
|||||||
max_vars_full = max(max_matches * 8, len(self.variants) // 2)
|
max_vars_full = max(max_matches * 8, len(self.variants) // 2)
|
||||||
kept_variants = kept_variants[:max_vars_full]
|
kept_variants = kept_variants[:max_vars_full]
|
||||||
|
|
||||||
# Full-res (parallelizzato per variante)
|
# Full-res (parallelizzato) con bitmap
|
||||||
resp0 = self._response_map(gray0)
|
spread0 = self._spread_bitmap(gray0)
|
||||||
bin_has_full = np.array([resp0[b].any() for b in range(N_BINS)])
|
bit_active_full = int(
|
||||||
density_full = resp0.sum(axis=0)
|
sum(1 << b for b in range(N_BINS)
|
||||||
|
if (spread0 & np.uint8(1 << b)).any())
|
||||||
|
)
|
||||||
|
density_full = _jit_popcount(spread0)
|
||||||
for sc in unique_scales:
|
for sc in unique_scales:
|
||||||
bg_cache_full[sc] = _bg_for_scale(density_full, sc, 1)
|
bg_cache_full[sc] = _bg_for_scale(density_full, sc, 1)
|
||||||
|
|
||||||
def _full_score(vi: int) -> tuple[int, np.ndarray]:
|
def _full_score(vi: int) -> tuple[int, np.ndarray]:
|
||||||
var = self.variants[vi]
|
var = self.variants[vi]
|
||||||
lvl0 = var.levels[0]
|
lvl0 = var.levels[0]
|
||||||
score = self._score_by_shift(
|
score = _jit_score_bitmap(
|
||||||
resp0, lvl0.dx, lvl0.dy, lvl0.bin, bin_has_data=bin_has_full,
|
spread0, lvl0.dx, lvl0.dy, lvl0.bin, bit_active_full,
|
||||||
)
|
)
|
||||||
score = _rescore(score, bg_cache_full[var.scale])
|
score = _rescore(score, bg_cache_full[var.scale])
|
||||||
return vi, score
|
return vi, score
|
||||||
@@ -595,28 +622,37 @@ class LineShapeMatcher:
|
|||||||
h, w = self.template_gray.shape if self.template_gray is not None else (0, 0)
|
h, w = self.template_gray.shape if self.template_gray is not None else (0, 0)
|
||||||
mask_full = np.full((h, w), 255, dtype=np.uint8)
|
mask_full = np.full((h, w), 255, dtype=np.uint8)
|
||||||
|
|
||||||
kept: list[Match] = []
|
# Pre-NMS rapido su raw (solo subpixel, no refine/verify): riduce
|
||||||
|
# i candidati a ~max_matches*3 prima di operazioni costose (refine,
|
||||||
|
# verify) che erano chiamate per ogni raw causando lentezze 100x.
|
||||||
r2 = nms_radius * nms_radius
|
r2 = nms_radius * nms_radius
|
||||||
tw, th = self.template_size
|
preliminary: list[tuple[float, float, float, int]] = []
|
||||||
|
pre_cap = max(max_matches * 3, max_matches + 10)
|
||||||
for score, xi, yi, vi in raw:
|
for score, xi, yi, vi in raw:
|
||||||
var = self.variants[vi]
|
|
||||||
cx_f = float(xi); cy_f = float(yi)
|
|
||||||
if subpixel and vi in score_maps:
|
if subpixel and vi in score_maps:
|
||||||
cx_f, cy_f = self._subpixel_peak(score_maps[vi], xi, yi)
|
cx_f, cy_f = self._subpixel_peak(score_maps[vi], xi, yi)
|
||||||
|
else:
|
||||||
if any((k.cx - cx_f) ** 2 + (k.cy - cy_f) ** 2 < r2 for k in kept):
|
cx_f, cy_f = float(xi), float(yi)
|
||||||
|
if any((k[1] - cx_f) ** 2 + (k[2] - cy_f) ** 2 < r2
|
||||||
|
for k in preliminary):
|
||||||
continue
|
continue
|
||||||
|
preliminary.append((score, cx_f, cy_f, vi))
|
||||||
|
if len(preliminary) >= pre_cap:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Ora refine + verify solo sui candidati pre-NMS
|
||||||
|
kept: list[Match] = []
|
||||||
|
tw, th = self.template_size
|
||||||
|
for score, cx_f, cy_f, vi in preliminary:
|
||||||
|
var = self.variants[vi]
|
||||||
ang_f = var.angle_deg
|
ang_f = var.angle_deg
|
||||||
score_f = score
|
score_f = score
|
||||||
if refine_angle and self.template_gray is not None:
|
if refine_angle and self.template_gray is not None:
|
||||||
ang_f, score_f, cx_f, cy_f = self._refine_angle(
|
ang_f, score_f, cx_f, cy_f = self._refine_angle(
|
||||||
resp0, self.template_gray, cx_f, cy_f,
|
spread0, bit_active_full, self.template_gray, cx_f, cy_f,
|
||||||
var.angle_deg, var.scale, mask_full,
|
var.angle_deg, var.scale, mask_full,
|
||||||
search_radius=self.angle_step_deg / 2.0,
|
search_radius=self.angle_step_deg / 2.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify NCC: filtra falsi positivi con mismatch pixel-level
|
|
||||||
if verify_ncc:
|
if verify_ncc:
|
||||||
ncc = self._verify_ncc(gray0, cx_f, cy_f, ang_f, var.scale)
|
ncc = self._verify_ncc(gray0, cx_f, cy_f, ang_f, var.scale)
|
||||||
if ncc < verify_threshold:
|
if ncc < verify_threshold:
|
||||||
|
|||||||
Reference in New Issue
Block a user