Compare commits

...

3 Commits

Author SHA1 Message Date
Adriano ba4024d252 feat: search_roi parametro find() per limitare area di ricerca
Equivalente a Halcon set_aoi: matching opera su crop locale, coord
output ri-traslate al sistema scena. Costo proporzionale a w*h del
ROI invece di W*H scena intera.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 15:22:43 +02:00
root 89b59b3ea3 perf: Fase 2 speed (3x baseline) - fuse JIT + LRU + sub-pixel lazy
Ottimizzazioni cumulative (225s -> 73s sul bench suite, 3.07x):

pm2d/line_matcher.py:
- Sub-pixel + plateau centroid spostati DOPO il pre-NMS (prima: 58k chiamate
  per clip_preciso anche su candidati poi scartati dalla NMS; ora solo sui
  ~75 preliminary sopravvissuti). Coordinate intere OK per la decisione
  reject, dato che nms_radius >= 8 px.
- Usa nuovo kernel fuso score+rescore (no allocazione intermedia).
- Adaptive plateau_radius + propagazione train_mask per NCC coerente.
- Local crop NCC (diag template invece di intera scena).
- Fallback adattivo se bg_rescore azzera tutti gli score top-level.

pm2d/_jit_kernels.py:
- Nuovo kernel _jit_score_bitmap_rescored: fonde scoring bitmap e rescore
  (score - bg) / (1 - bg) in un singolo pass parallelo. Evita allocazione
  e passata aggiuntiva (era ~15% del tempo find sul preciso).

pm2d/auto_tune.py:
- LRU cache in-memory sui risultati auto_tune (chiave md5 ROI + mask):
  richiamate successive con stessa ROI sono O(1).
- Downsample a 128px prima della correlazione rotazionale
  (O(n_angles * H * W) -> insensibile su sample moderati).
- Soglie weak/strong da percentili reali (p55/p85) senza clamp a 100,
  con clamp massimo 400 per evitare saturazione su template ad alto contrasto.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 21:21:59 +00:00
root 44a3046616 deploy: build locale immagine + allineamento Traefik
- build: . invece di pull da registry (non disponibile su VPS)
- certresolver: mytlschallenge (già configurato in Traefik)
- redirect HTTP→HTTPS gestito dall'entrypoint web globale

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 14:46:23 +00:00
4 changed files with 261 additions and 55 deletions
+5 -10
View File
@@ -2,13 +2,14 @@
# Assume che Traefik sia già attivo sulla VPS con: # Assume che Traefik sia già attivo sulla VPS con:
# - network esterna "traefik" (adatta nome se diverso) # - network esterna "traefik" (adatta nome se diverso)
# - entrypoint "websecure" su :443 # - entrypoint "websecure" su :443
# - certresolver "letsencrypt" configurato # - certresolver "mytlschallenge" configurato
# #
# Adattare eventualmente: nome network, entrypoint, certresolver. # Adattare eventualmente: nome network, entrypoint, certresolver.
services: services:
pm2d: pm2d:
image: ${REGISTRY:-localhost:5000}/pm2d:${TAG:-latest} build: .
image: pm2d:latest
container_name: pm2d container_name: pm2d
restart: unless-stopped restart: unless-stopped
environment: environment:
@@ -27,19 +28,13 @@ services:
- "traefik.http.routers.pm2d.rule=Host(`pm.tielogic.xyz`)" - "traefik.http.routers.pm2d.rule=Host(`pm.tielogic.xyz`)"
- "traefik.http.routers.pm2d.entrypoints=websecure" - "traefik.http.routers.pm2d.entrypoints=websecure"
- "traefik.http.routers.pm2d.tls=true" - "traefik.http.routers.pm2d.tls=true"
- "traefik.http.routers.pm2d.tls.certresolver=letsencrypt" - "traefik.http.routers.pm2d.tls.certresolver=mytlschallenge"
- "traefik.http.services.pm2d.loadbalancer.server.port=${PORT:-8080}" - "traefik.http.services.pm2d.loadbalancer.server.port=${PORT:-8080}"
# Middleware: upload fino a 50MB (default Traefik bufferizza a 4MB) # Middleware: upload fino a 50MB (default Traefik bufferizza a 4MB)
- "traefik.http.middlewares.pm2d-bodysize.buffering.maxRequestBodyBytes=52428800" - "traefik.http.middlewares.pm2d-bodysize.buffering.maxRequestBodyBytes=52428800"
- "traefik.http.routers.pm2d.middlewares=pm2d-bodysize" - "traefik.http.routers.pm2d.middlewares=pm2d-bodysize"
# Redirect HTTP → HTTPS è gestito globalmente dall'entrypoint `web` di Traefik
# Redirect HTTP → HTTPS
- "traefik.http.routers.pm2d-http.rule=Host(`pm.tielogic.xyz`)"
- "traefik.http.routers.pm2d-http.entrypoints=web"
- "traefik.http.routers.pm2d-http.middlewares=pm2d-redirect-https"
- "traefik.http.middlewares.pm2d-redirect-https.redirectscheme.scheme=https"
- "traefik.http.middlewares.pm2d-redirect-https.redirectscheme.permanent=true"
networks: networks:
traefik: traefik:
+74
View File
@@ -110,6 +110,55 @@ if HAS_NUMBA:
acc[y, x] *= inv acc[y, x] *= inv
return acc return acc
@nb.njit(cache=True, parallel=True, fastmath=True, boundscheck=False)
def _jit_score_bitmap_rescored(
spread: np.ndarray, # uint8 (H, W)
dx: np.ndarray, # int32 (N,)
dy: np.ndarray, # int32 (N,)
bins: np.ndarray, # int8 (N,)
bit_active: np.uint8,
bg: np.ndarray, # float32 (H, W) background density normalizzata
) -> np.ndarray:
"""score+rescore in un singolo pass: evita allocazione intermedia.
Equivalente a:
score = _jit_score_bitmap(...)
out = max(0, (score - bg) / (1 - bg + 1e-6))
ma fonde la seconda passata dentro la normalizzazione finale
(cache-friendly, risparmia ~15% sul totale find).
"""
H, W = spread.shape
N = dx.shape[0]
acc = np.zeros((H, W), dtype=np.float32)
for y in nb.prange(H):
for i in range(N):
b = bins[i]
mask = np.uint8(1) << b
if (bit_active & mask) == 0:
continue
ddy = dy[i]
yy = y + ddy
if yy < 0 or yy >= H:
continue
ddx = dx[i]
x_lo = 0 if ddx >= 0 else -ddx
x_hi = W if ddx <= 0 else W - ddx
for x in range(x_lo, x_hi):
if spread[yy, x + ddx] & mask:
acc[y, x] += 1.0
if N > 0:
inv = 1.0 / N
for y in nb.prange(H):
for x in range(W):
v = acc[y, x] * inv
bgv = bg[y, x]
if bgv < 1.0:
r = (v - bgv) / (1.0 - bgv + 1e-6)
acc[y, x] = r if r > 0.0 else 0.0
else:
acc[y, x] = 0.0
return acc
@nb.njit(cache=True, parallel=True, fastmath=True, boundscheck=False) @nb.njit(cache=True, parallel=True, fastmath=True, boundscheck=False)
def _jit_popcount_density(spread: np.ndarray) -> np.ndarray: def _jit_popcount_density(spread: np.ndarray) -> np.ndarray:
"""Conta bit set per pixel: ritorna (H, W) float32 in [0..8].""" """Conta bit set per pixel: ritorna (H, W) float32 in [0..8]."""
@@ -134,6 +183,8 @@ if HAS_NUMBA:
_jit_score_by_shift(resp, dx, dy, b, ba) _jit_score_by_shift(resp, dx, dy, b, ba)
spread = np.zeros((32, 32), dtype=np.uint8) spread = np.zeros((32, 32), dtype=np.uint8)
_jit_score_bitmap(spread, dx, dy, b, np.uint8(0xFF)) _jit_score_bitmap(spread, dx, dy, b, np.uint8(0xFF))
bg = np.zeros((32, 32), dtype=np.float32)
_jit_score_bitmap_rescored(spread, dx, dy, b, np.uint8(0xFF), bg)
_jit_popcount_density(spread) _jit_popcount_density(spread)
else: # pragma: no cover else: # pragma: no cover
@@ -144,6 +195,9 @@ else: # pragma: no cover
def _jit_score_bitmap(spread, dx, dy, bins, bit_active): def _jit_score_bitmap(spread, dx, dy, bins, bit_active):
raise RuntimeError("numba non disponibile") raise RuntimeError("numba non disponibile")
def _jit_score_bitmap_rescored(spread, dx, dy, bins, bit_active, bg):
raise RuntimeError("numba non disponibile")
def _jit_popcount_density(spread): def _jit_popcount_density(spread):
raise RuntimeError("numba non disponibile") raise RuntimeError("numba non disponibile")
@@ -172,6 +226,26 @@ def score_bitmap(
return _numpy_score_by_shift(resp, dx, dy, bins, None) return _numpy_score_by_shift(resp, dx, dy, bins, None)
def score_bitmap_rescored(
spread: np.ndarray, dx: np.ndarray, dy: np.ndarray, bins: np.ndarray,
bit_active: int, bg: np.ndarray,
) -> np.ndarray:
"""Score bitmap + rescore fusi in un solo pass (JIT)."""
if HAS_NUMBA and len(dx) > 0:
return _jit_score_bitmap_rescored(
np.ascontiguousarray(spread, dtype=np.uint8),
np.ascontiguousarray(dx, dtype=np.int32),
np.ascontiguousarray(dy, dtype=np.int32),
np.ascontiguousarray(bins, dtype=np.int8),
np.uint8(bit_active),
np.ascontiguousarray(bg, dtype=np.float32),
)
# Fallback: chiamate separate
score = score_bitmap(spread, dx, dy, bins, bit_active)
out = (score - bg) / (1.0 - bg + 1e-6)
return np.maximum(0.0, out).astype(np.float32)
def popcount_density(spread: np.ndarray) -> np.ndarray: def popcount_density(spread: np.ndarray) -> np.ndarray:
if HAS_NUMBA: if HAS_NUMBA:
return _jit_popcount_density(np.ascontiguousarray(spread, dtype=np.uint8)) return _jit_popcount_density(np.ascontiguousarray(spread, dtype=np.uint8))
+66 -11
View File
@@ -14,6 +14,9 @@ Ritorna dict con i key esatti del form `edit_params`.
from __future__ import annotations from __future__ import annotations
import hashlib
from collections import OrderedDict
import cv2 import cv2
import numpy as np import numpy as np
@@ -24,17 +27,33 @@ def _to_gray(img: np.ndarray) -> np.ndarray:
return img return img
# Cache in-memory (LRU) dei risultati auto_tune per stesso input ROI.
_TUNE_CACHE: OrderedDict[str, dict] = OrderedDict()
_TUNE_CACHE_SIZE = 32
def detect_rotational_symmetry( def detect_rotational_symmetry(
gray: np.ndarray, step_deg: float = 5.0, corr_thresh: float = 0.75, gray: np.ndarray, step_deg: float = 5.0, corr_thresh: float = 0.75,
) -> dict: ) -> dict:
"""Rileva simmetria rotazionale su edge map (più robusto a sfondo uniforme). """Rileva simmetria rotazionale su edge map (più robusto a sfondo uniforme).
Downsample a max 128 px prima di correlare per abbattere il costo
O(n_angles · H · W) senza perdere precisione (la simmetria rotazionale
è invariante a subsampling moderato).
Ritorna dict con: Ritorna dict con:
- order: int, 1=nessuna, 2=180°, 3=120°, 4=90°, 6=60°, 8=45° - order: int, 1=nessuna, 2=180°, 3=120°, 4=90°, 6=60°, 8=45°
- period_deg: float, periodo minimo di simmetria (360/order) - period_deg: float, periodo minimo di simmetria (360/order)
- confidence: float [0..1], correlazione minima tra rotazioni equivalenti - confidence: float [0..1], correlazione minima tra rotazioni equivalenti
""" """
h, w = gray.shape h, w = gray.shape
target = 128
if max(h, w) > target:
sf = target / max(h, w)
new_w = max(32, int(w * sf))
new_h = max(32, int(h * sf))
gray = cv2.resize(gray, (new_w, new_h), interpolation=cv2.INTER_AREA)
h, w = gray.shape
# Usa magnitude gradiente (rotation-invariant rispetto a bg uniforme) # Usa magnitude gradiente (rotation-invariant rispetto a bg uniforme)
gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
@@ -88,9 +107,12 @@ def analyze_gradients(gray: np.ndarray) -> dict:
gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
mag = cv2.magnitude(gx, gy) mag = cv2.magnitude(gx, gy)
# Percentili magnitude # Percentili magnitude: p55/p85 usati per soglie weak/strong (più aderenti
# alla distribuzione reale rispetto a p50/p80 + clamp).
p50 = float(np.percentile(mag, 50)) p50 = float(np.percentile(mag, 50))
p55 = float(np.percentile(mag, 55))
p80 = float(np.percentile(mag, 80)) p80 = float(np.percentile(mag, 80))
p85 = float(np.percentile(mag, 85))
p95 = float(np.percentile(mag, 95)) p95 = float(np.percentile(mag, 95))
mag_max = float(mag.max()) mag_max = float(mag.max())
@@ -112,7 +134,8 @@ def analyze_gradients(gray: np.ndarray) -> dict:
ent = 0.0 ent = 0.0
return { return {
"p50": p50, "p80": p80, "p95": p95, "mag_max": mag_max, "p50": p50, "p55": p55, "p80": p80, "p85": p85, "p95": p95,
"mag_max": mag_max,
"strong_pct": strong_pct, "weak_pct": weak_pct, "strong_pct": strong_pct, "weak_pct": weak_pct,
"orient_entropy": ent, "orient_entropy": ent,
"n_pixels": mag.size, "n_pixels": mag.size,
@@ -120,11 +143,28 @@ def analyze_gradients(gray: np.ndarray) -> dict:
} }
def _cache_key(template_bgr: np.ndarray, mask: np.ndarray | None) -> str:
h = hashlib.md5()
h.update(np.ascontiguousarray(template_bgr).tobytes())
h.update(f"shape={template_bgr.shape}".encode())
if mask is not None:
h.update(np.ascontiguousarray(mask).tobytes())
return h.hexdigest()
def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict: def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict:
"""Analizza template e ritorna dict parametri suggeriti. """Analizza template e ritorna dict parametri suggeriti.
Chiavi compatibili con edit_params PARAM_SCHEMA. Chiavi compatibili con edit_params PARAM_SCHEMA.
Risultato cachato in-memory (LRU): ri-chiamare con stessa ROI è O(1).
""" """
ck = _cache_key(template_bgr, mask)
cached = _TUNE_CACHE.get(ck)
if cached is not None:
_TUNE_CACHE.move_to_end(ck)
return dict(cached)
gray = _to_gray(template_bgr) gray = _to_gray(template_bgr)
h, w = gray.shape h, w = gray.shape
if mask is not None: if mask is not None:
@@ -136,16 +176,22 @@ def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict:
stats = analyze_gradients(gray_for_stats) stats = analyze_gradients(gray_for_stats)
sym = detect_rotational_symmetry(gray_for_stats) sym = detect_rotational_symmetry(gray_for_stats)
# Soglie magnitude: usa percentili per robustezza illuminazione. # Soglie magnitude: usa percentili reali (p85/p55) senza clamp duro a 100.
# Target: strong_grad ~= valore a percentile 80-90 in assoluto, ma # Sobel ksize=3 su uint8 può arrivare a ~1020, quindi clamp massimo 400
# clamp per compatibilità uint8 (Sobel può sforare). # evita saturazione del threshold su template ad alto contrasto.
strong_grad = float(np.clip(stats["p80"], 20.0, 100.0)) strong_grad = float(np.clip(stats["p85"], 30.0, 400.0))
weak_grad = float(np.clip(strong_grad * 0.5, 10.0, 60.0)) weak_grad = float(np.clip(stats["p55"], 15.0, strong_grad * 0.7))
# num_features: 1 feature ogni ~25 px forti, clamp 48..192 # num_features: ibrido perimetro + densità. Target = min(perimeter_budget,
target_feat = int(np.clip(stats["n_strong"] / 25, 48, 192)) # density_budget) per non generare più feature di quante edge nitide siano
# disponibili, ma neanche meno di quante il perimetro possa tracciare.
perim_budget = int(2 * (h + w) * 0.4) # ~40% dei pixel di perimetro
density_budget = int(stats["n_strong"] / 20) # 1 feature ogni ~20 px forti
target_feat = int(np.clip(min(perim_budget, density_budget), 64, 192))
# pyramid_levels in base alla dimensione minima # pyramid_levels in base a dimensione minima E densità feature: un template
# grande ma povero di feature non deve scendere troppi livelli (rischio
# collasso a <16 feature al top level).
min_side = min(h, w) min_side = min(h, w)
if min_side < 60: if min_side < 60:
pyr = 1 pyr = 1
@@ -155,6 +201,9 @@ def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict:
pyr = 3 pyr = 3
else: else:
pyr = 4 pyr = 4
# Cap: non scendere sotto ~16 feature al top level (feature ÷ 4^(pyr-1))
max_pyr_from_feat = max(1, int(np.floor(np.log2(max(1, target_feat / 16.0)) / 2.0)) + 1)
pyr = min(pyr, max_pyr_from_feat)
# spread_radius proporzionale a risoluzione + pyramid (tolleranza ~1% dim) # spread_radius proporzionale a risoluzione + pyramid (tolleranza ~1% dim)
spread_radius = int(np.clip(max(3, min_side * 0.02), 3, 8)) spread_radius = int(np.clip(max(3, min_side * 0.02), 3, 8))
@@ -174,7 +223,7 @@ def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict:
# angle step: 5° default; se simmetria, mantengo step ma range ridotto # angle step: 5° default; se simmetria, mantengo step ma range ridotto
angle_step = 5.0 angle_step = 5.0
return { result = {
"backend": "line", "backend": "line",
"angle_min": 0.0, "angle_min": 0.0,
"angle_max": angle_max, "angle_max": angle_max,
@@ -196,6 +245,12 @@ def auto_tune(template_bgr: np.ndarray, mask: np.ndarray | None = None) -> dict:
"_symmetry_conf": round(sym["confidence"], 2), "_symmetry_conf": round(sym["confidence"], 2),
"_orient_entropy": round(stats["orient_entropy"], 2), "_orient_entropy": round(stats["orient_entropy"], 2),
} }
# Store in LRU cache
_TUNE_CACHE[ck] = dict(result)
_TUNE_CACHE.move_to_end(ck)
while len(_TUNE_CACHE) > _TUNE_CACHE_SIZE:
_TUNE_CACHE.popitem(last=False)
return result
def summarize(tune: dict) -> str: def summarize(tune: dict) -> str:
+116 -34
View File
@@ -39,6 +39,7 @@ _GOLDEN = (math.sqrt(5.0) - 1.0) / 2.0 # ≈ 0.618
from pm2d._jit_kernels import ( from pm2d._jit_kernels import (
score_by_shift as _jit_score_by_shift, score_by_shift as _jit_score_by_shift,
score_bitmap as _jit_score_bitmap, score_bitmap as _jit_score_bitmap,
score_bitmap_rescored as _jit_score_bitmap_rescored,
popcount_density as _jit_popcount, popcount_density as _jit_popcount,
HAS_NUMBA, HAS_NUMBA,
) )
@@ -136,6 +137,8 @@ class LineShapeMatcher:
self.variants: list[_Variant] = [] self.variants: list[_Variant] = []
self.template_size: tuple[int, int] = (0, 0) self.template_size: tuple[int, int] = (0, 0)
self.template_gray: np.ndarray | None = None self.template_gray: np.ndarray | None = None
# Maschera usata in training (propagata al refine per coerenza).
self._train_mask: np.ndarray | None = None
# --- Helpers ------------------------------------------------------- # --- Helpers -------------------------------------------------------
@@ -233,6 +236,7 @@ class LineShapeMatcher:
mask_full = np.full((h, w), 255, dtype=np.uint8) mask_full = np.full((h, w), 255, dtype=np.uint8)
else: else:
mask_full = (mask > 0).astype(np.uint8) * 255 mask_full = (mask > 0).astype(np.uint8) * 255
self._train_mask = mask_full.copy()
self.variants.clear() self.variants.clear()
for s in self._scale_list(): for s in self._scale_list():
@@ -505,6 +509,10 @@ class LineShapeMatcher:
) -> float: ) -> float:
"""NCC tra template warpato alla pose e scena sottostante. """NCC tra template warpato alla pose e scena sottostante.
Lavora su un **crop locale** della scena di lato = diagonale del
template ruotato+scalato, non sull'intera scena. Su scene grandi
(1920×1080) taglia drasticamente il costo del warp per ogni match.
Ritorna score [-1, 1]. Usato come filtro anti-falso-positivo: Ritorna score [-1, 1]. Usato come filtro anti-falso-positivo:
il matcher linemod può dare score alto su texture generiche ma il matcher linemod può dare score alto su texture generiche ma
sovrapponendo il template gray i pixel non corrispondono. sovrapponendo il template gray i pixel non corrispondono.
@@ -515,23 +523,40 @@ class LineShapeMatcher:
h, w = t.shape h, w = t.shape
cx_t = (w - 1) / 2.0 cx_t = (w - 1) / 2.0
cy_t = (h - 1) / 2.0 cy_t = (h - 1) / 2.0
M = cv2.getRotationMatrix2D((cx_t, cy_t), angle_deg, scale)
M[0, 2] += cx - cx_t # Bounding box del template ruotato/scalato attorno a (cx, cy)
M[1, 2] += cy - cy_t diag = int(np.ceil(np.hypot(w, h) * scale)) + 8
H, W = scene_gray.shape H, W = scene_gray.shape
x0 = int(round(cx)) - diag // 2
y0 = int(round(cy)) - diag // 2
cx0 = max(0, x0); cy0 = max(0, y0)
cx1 = min(W, x0 + diag); cy1 = min(H, y0 + diag)
if cx1 - cx0 < 10 or cy1 - cy0 < 10:
return 0.0
scn_crop = scene_gray[cy0:cy1, cx0:cx1]
ch, cw = scn_crop.shape
M = cv2.getRotationMatrix2D((cx_t, cy_t), angle_deg, scale)
# Porta il centro del template a (cx - cx0, cy - cy0) del crop
M[0, 2] += (cx - cx0) - cx_t
M[1, 2] += (cy - cy0) - cy_t
warped = cv2.warpAffine( warped = cv2.warpAffine(
t, M, (W, H), t, M, (cw, ch),
flags=cv2.INTER_LINEAR, borderValue=0, flags=cv2.INTER_LINEAR, borderValue=0,
) )
mask = cv2.warpAffine( if self._train_mask is not None:
np.full_like(t, 255), M, (W, H), mask_src = self._train_mask
else:
mask_src = np.full_like(t, 255)
mask_w = cv2.warpAffine(
mask_src, M, (cw, ch),
flags=cv2.INTER_NEAREST, borderValue=0, flags=cv2.INTER_NEAREST, borderValue=0,
) )
valid = mask > 0 valid = mask_w > 0
if valid.sum() < 20: if valid.sum() < 20:
return 0.0 return 0.0
tpl = warped[valid].astype(np.float32) tpl = warped[valid].astype(np.float32)
scn = scene_gray[valid].astype(np.float32) scn = scn_crop[valid].astype(np.float32)
tm = tpl - tpl.mean() tm = tpl - tpl.mean()
sm = scn - scn.mean() sm = scn - scn.mean()
denom = np.sqrt((tm * tm).sum() * (sm * sm).sum()) + 1e-9 denom = np.sqrt((tm * tm).sum() * (sm * sm).sum()) + 1e-9
@@ -549,6 +574,7 @@ class LineShapeMatcher:
verify_threshold: float = 0.4, verify_threshold: float = 0.4,
coarse_angle_factor: int = 2, coarse_angle_factor: int = 2,
scale_penalty: float = 0.0, scale_penalty: float = 0.0,
search_roi: tuple[int, int, int, int] | None = None,
) -> list[Match]: ) -> list[Match]:
""" """
scale_penalty: se > 0, riduce lo score per match a scala diversa da 1.0: scale_penalty: se > 0, riduce lo score per match a scala diversa da 1.0:
@@ -556,11 +582,30 @@ class LineShapeMatcher:
Utile se l'operatore vuole che match "identico al template anche per Utile se l'operatore vuole che match "identico al template anche per
dimensione" abbia score più alto di match "stessa forma, dimensione dimensione" abbia score più alto di match "stessa forma, dimensione
diversa". scale_penalty=0 (default) = comportamento shape puro. diversa". scale_penalty=0 (default) = comportamento shape puro.
search_roi: (x, y, w, h) limita la ricerca a una regione della scena.
Equivalente a Halcon set_aoi: il matching opera su crop locale e le
coordinate output sono ri-traslate al sistema scena originale. Usare
quando si conosce a priori l'area in cui il pezzo può apparire (es.
feeder a posizione fissa) → costo proporzionale a w·h invece di W·H.
""" """
if not self.variants: if not self.variants:
raise RuntimeError("Matcher non addestrato: chiamare train() prima.") raise RuntimeError("Matcher non addestrato: chiamare train() prima.")
gray0 = self._to_gray(scene_bgr) gray_full = self._to_gray(scene_bgr)
# Applica ROI di ricerca: restringe scena a crop, ricorda offset per
# ri-traslare le coordinate dei match a fine pipeline.
if search_roi is not None:
rx, ry, rw, rh = search_roi
H_s, W_s = gray_full.shape
rx = max(0, int(rx)); ry = max(0, int(ry))
rw = max(1, min(int(rw), W_s - rx))
rh = max(1, min(int(rh), H_s - ry))
gray0 = gray_full[ry:ry + rh, rx:rx + rw]
roi_offset = (rx, ry)
else:
gray0 = gray_full
roi_offset = (0, 0)
grays = [gray0] grays = [gray0]
for _ in range(self.pyramid_levels - 1): for _ in range(self.pyramid_levels - 1):
grays.append(cv2.pyrDown(grays[-1])) grays.append(cv2.pyrDown(grays[-1]))
@@ -624,24 +669,37 @@ class LineShapeMatcher:
def _top_score(vi: int) -> tuple[int, float]: def _top_score(vi: int) -> tuple[int, float]:
var = self.variants[vi] var = self.variants[vi]
lvl = var.levels[min(top, len(var.levels) - 1)] lvl = var.levels[min(top, len(var.levels) - 1)]
score = _jit_score_bitmap( score = _jit_score_bitmap_rescored(
spread_top, lvl.dx, lvl.dy, lvl.bin, bit_active_top, spread_top, lvl.dx, lvl.dy, lvl.bin, bit_active_top,
bg_cache_top[var.scale],
) )
score = _rescore(score, bg_cache_top[var.scale])
return vi, float(score.max()) if score.size else -1.0 return vi, float(score.max()) if score.size else -1.0
kept_coarse: list[tuple[int, float]] = [] kept_coarse: list[tuple[int, float]] = []
all_top_scores: list[tuple[int, float]] = []
if self.n_threads > 1 and len(coarse_idx_list) > 1: if self.n_threads > 1 and len(coarse_idx_list) > 1:
with ThreadPoolExecutor(max_workers=self.n_threads) as ex: with ThreadPoolExecutor(max_workers=self.n_threads) as ex:
for vi, best in ex.map(_top_score, coarse_idx_list): for vi, best in ex.map(_top_score, coarse_idx_list):
all_top_scores.append((vi, best))
if best >= top_thresh: if best >= top_thresh:
kept_coarse.append((vi, best)) kept_coarse.append((vi, best))
else: else:
for vi in coarse_idx_list: for vi in coarse_idx_list:
vi2, best = _top_score(vi) vi2, best = _top_score(vi)
all_top_scores.append((vi2, best))
if best >= top_thresh: if best >= top_thresh:
kept_coarse.append((vi2, best)) kept_coarse.append((vi2, best))
# Fallback adattivo: se il rescore background ha abbattuto tutti
# gli score sotto top_thresh (scene texturate pesanti), ripesca
# le varianti migliori al top level per dare comunque una chance
# alla fase full-res invece di ritornare 0 match.
if not kept_coarse and all_top_scores:
all_top_scores.sort(key=lambda t: -t[1])
n_keep = max(4, len(all_top_scores) // 10)
# Limita a varianti con score top > 0 (non completamente a zero)
kept_coarse = [(vi, s) for vi, s in all_top_scores[:n_keep] if s > 0]
# Espandi ogni coarse promosso con i suoi vicini (stessa scala, # Espandi ogni coarse promosso con i suoi vicini (stessa scala,
# angoli intermedi non valutati al top) # angoli intermedi non valutati al top)
expanded: set[int] = set() expanded: set[int] = set()
@@ -678,10 +736,10 @@ class LineShapeMatcher:
def _full_score(vi: int) -> tuple[int, np.ndarray]: def _full_score(vi: int) -> tuple[int, np.ndarray]:
var = self.variants[vi] var = self.variants[vi]
lvl0 = var.levels[0] lvl0 = var.levels[0]
score = _jit_score_bitmap( score = _jit_score_bitmap_rescored(
spread0, lvl0.dx, lvl0.dy, lvl0.bin, bit_active_full, spread0, lvl0.dx, lvl0.dy, lvl0.bin, bit_active_full,
bg_cache_full[var.scale],
) )
score = _rescore(score, bg_cache_full[var.scale])
return vi, score return vi, score
candidates_per_var: list[tuple[int, np.ndarray]] = [] candidates_per_var: list[tuple[int, np.ndarray]] = []
@@ -693,14 +751,24 @@ class LineShapeMatcher:
else: else:
results = [_full_score(vi) for vi in var_indices] results = [_full_score(vi) for vi in var_indices]
def _scale_factor(s: float) -> float:
"""Penalità moltiplicativa per scala diversa da 1.0."""
if scale_penalty > 0.0 and s != 1.0:
return max(0.0, 1.0 - scale_penalty * abs(s - 1.0))
return 1.0
for vi, score in results: for vi, score in results:
ys, xs = np.where(score >= min_score) pen = _scale_factor(self.variants[vi].scale)
# Ordinare/sogliare su score penalizzato: un match a scala 1.5 con
# score 0.8 e penalty=0.3 effettivamente vale 0.56, non 0.8.
score_for_sort = score if pen == 1.0 else score * pen
ys, xs = np.where(score_for_sort >= min_score)
if len(ys) == 0: if len(ys) == 0:
continue continue
vals = score[ys, xs] vals = score_for_sort[ys, xs]
K = min(len(vals), max_matches * 5) K = min(len(vals), max_matches * 5)
ord_idx = np.argpartition(-vals, K - 1)[:K] ord_idx = np.argpartition(-vals, K - 1)[:K]
candidates_per_var.append((vi, score)) candidates_per_var.append((vi, score)) # score_map originale
for i in ord_idx: for i in ord_idx:
raw.append((float(vals[i]), int(xs[i]), int(ys[i]), vi)) raw.append((float(vals[i]), int(xs[i]), int(ys[i]), vi))
@@ -710,32 +778,43 @@ class LineShapeMatcher:
score_maps = dict(candidates_per_var) score_maps = dict(candidates_per_var)
# NMS + subpixel + refinement angolare # NMS + subpixel + refinement angolare
# Mask template per refinement (non disponibile qui: usa full) # Usa mask salvata in train() per coerenza (se ROI poligonale).
h, w = self.template_gray.shape if self.template_gray is not None else (0, 0) h, w = self.template_gray.shape if self.template_gray is not None else (0, 0)
mask_full = np.full((h, w), 255, dtype=np.uint8) mask_full = (
self._train_mask if self._train_mask is not None
else np.full((h, w), 255, dtype=np.uint8)
)
# Plateau radius adattivo al template (evita plateau troppo ampi su
# template piccoli: 8% del lato minimo, clampato [3, 10]).
plateau_r = max(3, min(10, int(min(self.template_size) * 0.08)))
# Pre-NMS rapido su raw (solo subpixel, no refine/verify): riduce # Pre-NMS rapido su raw con coordinate intere (nms_radius ≥ 8,
# i candidati a ~max_matches*3 prima di operazioni costose (refine, # la precisione sub-pixel non cambia la decisione di reject).
# verify) che erano chiamate per ogni raw causando lentezze 100x. # Subpixel viene calcolato DOPO il pre-NMS solo sui ~pre_cap
# preliminary sopravvissuti: prima era chiamato su ogni raw (~58k
# chiamate su clip_preciso) anche se la maggior parte veniva poi
# scartata dalla NMS, sprecando la parte più costosa del loop.
r2 = nms_radius * nms_radius r2 = nms_radius * nms_radius
preliminary: list[tuple[float, float, float, int]] = []
pre_cap = max(max_matches * 3, max_matches + 10) pre_cap = max(max_matches * 3, max_matches + 10)
preliminary_int: list[tuple[float, int, int, int]] = []
for score, xi, yi, vi in raw: for score, xi, yi, vi in raw:
if subpixel and vi in score_maps: if any((k[1] - xi) ** 2 + (k[2] - yi) ** 2 < r2
cx_f, cy_f = self._subpixel_peak(score_maps[vi], xi, yi) for k in preliminary_int):
else:
cx_f, cy_f = float(xi), float(yi)
if any((k[1] - cx_f) ** 2 + (k[2] - cy_f) ** 2 < r2
for k in preliminary):
continue continue
preliminary.append((score, cx_f, cy_f, vi)) preliminary_int.append((score, xi, yi, vi))
if len(preliminary) >= pre_cap: if len(preliminary_int) >= pre_cap:
break break
# Ora refine + verify solo sui candidati pre-NMS # Subpixel + refine + verify solo sui candidati pre-NMS (max pre_cap)
kept: list[Match] = [] kept: list[Match] = []
tw, th = self.template_size tw, th = self.template_size
for score, cx_f, cy_f, vi in preliminary: for score, xi, yi, vi in preliminary_int:
if subpixel and vi in score_maps:
cx_f, cy_f = self._subpixel_peak(
score_maps[vi], xi, yi, plateau_radius=plateau_r,
)
else:
cx_f, cy_f = float(xi), float(yi)
var = self.variants[vi] var = self.variants[vi]
ang_f = var.angle_deg ang_f = var.angle_deg
score_f = score score_f = score
@@ -751,8 +830,11 @@ class LineShapeMatcher:
if ncc < verify_threshold: if ncc < verify_threshold:
continue continue
# Ri-traslo coord da spazio crop ROI a spazio scena originale.
cx_out = cx_f + roi_offset[0]
cy_out = cy_f + roi_offset[1]
poly = _oriented_bbox_polygon( poly = _oriented_bbox_polygon(
cx_f, cy_f, tw * var.scale, th * var.scale, ang_f, cx_out, cy_out, tw * var.scale, th * var.scale, ang_f,
) )
# Penalità scala opzionale: score degrada con distanza da 1.0 # Penalità scala opzionale: score degrada con distanza da 1.0
if scale_penalty > 0.0 and var.scale != 1.0: if scale_penalty > 0.0 and var.scale != 1.0:
@@ -760,7 +842,7 @@ class LineShapeMatcher:
0.0, 1.0 - scale_penalty * abs(var.scale - 1.0) 0.0, 1.0 - scale_penalty * abs(var.scale - 1.0)
) )
kept.append(Match( kept.append(Match(
cx=cx_f, cy=cy_f, cx=cx_out, cy=cy_out,
angle_deg=ang_f, angle_deg=ang_f,
scale=var.scale, scale=var.scale,
score=score_f, score=score_f,