diff --git a/.gitignore b/.gitignore
index 7d85b60..45a148e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,19 @@
 __pycache__
 *.vscode
-cfg.yaml
+cfg.yam
 *.pth
 *.pkl
 *.pyc
 *.so
 *.egg-info
+g.txt
 # Ignore weight files but keep the folder structure
 weights/*
-!weights/.gitkeep
 output/
-.claude/
\ No newline at end of file
+.claude/
+*.ply
+data/mono/*
+data/pattern_cube/*
+reports/*
+
+*.bak
diff --git a/Utils.py b/Utils.py
index 1f12387..d3aaaf8 100755
--- a/Utils.py
+++ b/Utils.py
@@ -1,4 +1,4 @@
-import os, sys, torch, imageio, logging, importlib, argparse
+import os, sys, torch, logging, importlib #, argparse, imageio
 import cv2
 import numpy as np
 import yaml
diff --git a/core/foundation_stereo.py b/core/foundation_stereo.py
index b1377d4..27f6c1a 100755
--- a/core/foundation_stereo.py
+++ b/core/foundation_stereo.py
@@ -1,4 +1,4 @@
-import torch,pdb,logging,timm
+import torch #,pdb,logging,timm
 import torch.nn as nn
 import torch.nn.functional as F
 import numpy as np
diff --git a/core/submodule.py b/core/submodule.py
index 6764d64..ce32622 100755
--- a/core/submodule.py
+++ b/core/submodule.py
@@ -619,7 +619,8 @@ def __init__(self, in_planes, ratio=16):
 
     def forward(self, x):
         avg_out = self.fc(self.avg_pool(x))
-        max_out = self.fc(self.max_pool(x))
+        #max_out = self.fc(torch.amax(x, dim=(-2, -1), keepdim=True))
+        max_out = self.fc(self.max_pool(x)) # niv
         out = avg_out + max_out
         return self.sigmoid(out)
 
diff --git a/data/faro/index_0060/img_depth_faro.png b/data/faro/index_0060/img_depth_faro.png
new file mode 100644
index 0000000..db7678a
Binary files /dev/null and b/data/faro/index_0060/img_depth_faro.png differ
diff --git a/data/faro/index_0060/img_depth_rs.png b/data/faro/index_0060/img_depth_rs.png
new file mode 100644
index 0000000..1673727
Binary files /dev/null and b/data/faro/index_0060/img_depth_rs.png differ
diff --git a/data/faro/index_0060/img_left.png b/data/faro/index_0060/img_left.png
new file mode 100644
index 0000000..3a9780d
Binary files /dev/null and b/data/faro/index_0060/img_left.png differ
diff --git a/data/faro/index_0060/img_rgb.png b/data/faro/index_0060/img_rgb.png
new file mode 100644
index 0000000..03ff669
Binary files /dev/null and b/data/faro/index_0060/img_rgb.png differ
diff --git a/data/faro/index_0060/img_right.png b/data/faro/index_0060/img_right.png
new file mode 100644
index 0000000..6ad0743
Binary files /dev/null and b/data/faro/index_0060/img_right.png differ
diff --git a/data/faro/index_0441/img_depth_faro.png b/data/faro/index_0441/img_depth_faro.png
new file mode 100644
index 0000000..758b0d1
Binary files /dev/null and b/data/faro/index_0441/img_depth_faro.png differ
diff --git a/data/faro/index_0441/img_depth_rs.png b/data/faro/index_0441/img_depth_rs.png
new file mode 100644
index 0000000..7e57812
Binary files /dev/null and b/data/faro/index_0441/img_depth_rs.png differ
diff --git a/data/faro/index_0441/img_left.png b/data/faro/index_0441/img_left.png
new file mode 100644
index 0000000..d6f7afc
Binary files /dev/null and b/data/faro/index_0441/img_left.png differ
diff --git a/data/faro/index_0441/img_rgb.png b/data/faro/index_0441/img_rgb.png
new file mode 100644
index 0000000..f5a9da5
Binary files /dev/null and b/data/faro/index_0441/img_rgb.png differ
diff --git a/data/faro/index_0441/img_right.png b/data/faro/index_0441/img_right.png
new file mode 100644
index 0000000..03a35c8
Binary files /dev/null and b/data/faro/index_0441/img_right.png differ
diff --git a/data/faro/index_0742/img_depth_faro.png b/data/faro/index_0742/img_depth_faro.png
new file mode 100644
index 0000000..c1fe53d
Binary files /dev/null and b/data/faro/index_0742/img_depth_faro.png differ
diff --git a/data/faro/index_0742/img_depth_rs.png b/data/faro/index_0742/img_depth_rs.png
new file mode 100644
index 0000000..3cc14f5
Binary files /dev/null and b/data/faro/index_0742/img_depth_rs.png differ
diff --git a/data/faro/index_0742/img_left.png b/data/faro/index_0742/img_left.png
new file mode 100644
index 0000000..c4a4812
Binary files /dev/null and b/data/faro/index_0742/img_left.png differ
diff --git a/data/faro/index_0742/img_rgb.png b/data/faro/index_0742/img_rgb.png
new file mode 100644
index 0000000..5ed54d7
Binary files /dev/null and b/data/faro/index_0742/img_rgb.png differ
diff --git a/data/faro/index_0742/img_right.png b/data/faro/index_0742/img_right.png
new file mode 100644
index 0000000..deb8d1c
Binary files /dev/null and b/data/faro/index_0742/img_right.png differ
diff --git a/data/faro/index_1241/img_depth_faro.png b/data/faro/index_1241/img_depth_faro.png
new file mode 100644
index 0000000..a0929fa
Binary files /dev/null and b/data/faro/index_1241/img_depth_faro.png differ
diff --git a/data/faro/index_1241/img_depth_rs.png b/data/faro/index_1241/img_depth_rs.png
new file mode 100644
index 0000000..6f8c2a6
Binary files /dev/null and b/data/faro/index_1241/img_depth_rs.png differ
diff --git a/data/faro/index_1241/img_left.png b/data/faro/index_1241/img_left.png
new file mode 100644
index 0000000..869d2da
Binary files /dev/null and b/data/faro/index_1241/img_left.png differ
diff --git a/data/faro/index_1241/img_rgb.png b/data/faro/index_1241/img_rgb.png
new file mode 100644
index 0000000..9892d74
Binary files /dev/null and b/data/faro/index_1241/img_rgb.png differ
diff --git a/data/faro/index_1241/img_right.png b/data/faro/index_1241/img_right.png
new file mode 100644
index 0000000..47c4e79
Binary files /dev/null and b/data/faro/index_1241/img_right.png differ
diff --git a/data/faro/index_1982/img_depth_faro.png b/data/faro/index_1982/img_depth_faro.png
new file mode 100644
index 0000000..003b4fd
Binary files /dev/null and b/data/faro/index_1982/img_depth_faro.png differ
diff --git a/data/faro/index_1982/img_depth_rs.png b/data/faro/index_1982/img_depth_rs.png
new file mode 100644
index 0000000..64527df
Binary files /dev/null and b/data/faro/index_1982/img_depth_rs.png differ
diff --git a/data/faro/index_1982/img_left.png b/data/faro/index_1982/img_left.png
new file mode 100644
index 0000000..dae2c9c
Binary files /dev/null and b/data/faro/index_1982/img_left.png differ
diff --git a/data/faro/index_1982/img_rgb.png b/data/faro/index_1982/img_rgb.png
new file mode 100644
index 0000000..af300b8
Binary files /dev/null and b/data/faro/index_1982/img_rgb.png differ
diff --git a/data/faro/index_1982/img_right.png b/data/faro/index_1982/img_right.png
new file mode 100644
index 0000000..6443dfd
Binary files /dev/null and b/data/faro/index_1982/img_right.png differ
diff --git a/data/faro/index_2441/img_depth_faro.png b/data/faro/index_2441/img_depth_faro.png
new file mode 100644
index 0000000..f656010
Binary files /dev/null and b/data/faro/index_2441/img_depth_faro.png differ
diff --git a/data/faro/index_2441/img_depth_rs.png b/data/faro/index_2441/img_depth_rs.png
new file mode 100644
index 0000000..2464325
Binary files /dev/null and b/data/faro/index_2441/img_depth_rs.png differ
diff --git a/data/faro/index_2441/img_left.png b/data/faro/index_2441/img_left.png
new file mode 100644
index 0000000..be71db9
Binary files /dev/null and b/data/faro/index_2441/img_left.png differ
diff --git a/data/faro/index_2441/img_rgb.png b/data/faro/index_2441/img_rgb.png
new file mode 100644
index 0000000..09c4d45
Binary files /dev/null and b/data/faro/index_2441/img_rgb.png differ
diff --git a/data/faro/index_2441/img_right.png b/data/faro/index_2441/img_right.png
new file mode 100644
index 0000000..46e3ffe
Binary files /dev/null and b/data/faro/index_2441/img_right.png differ
diff --git a/data/faro/index_2537/img_depth_faro.png b/data/faro/index_2537/img_depth_faro.png
new file mode 100644
index 0000000..e5a8098
Binary files /dev/null and b/data/faro/index_2537/img_depth_faro.png differ
diff --git a/data/faro/index_2537/img_depth_rs.png b/data/faro/index_2537/img_depth_rs.png
new file mode 100644
index 0000000..af0734a
Binary files /dev/null and b/data/faro/index_2537/img_depth_rs.png differ
diff --git a/data/faro/index_2537/img_left.png b/data/faro/index_2537/img_left.png
new file mode 100644
index 0000000..0af09b8
Binary files /dev/null and b/data/faro/index_2537/img_left.png differ
diff --git a/data/faro/index_2537/img_rgb.png b/data/faro/index_2537/img_rgb.png
new file mode 100644
index 0000000..d9c6041
Binary files /dev/null and b/data/faro/index_2537/img_rgb.png differ
diff --git a/data/faro/index_2537/img_right.png b/data/faro/index_2537/img_right.png
new file mode 100644
index 0000000..82602e9
Binary files /dev/null and b/data/faro/index_2537/img_right.png differ
diff --git a/data/faro/index_3049/img_depth_faro.png b/data/faro/index_3049/img_depth_faro.png
new file mode 100644
index 0000000..5d1845f
Binary files /dev/null and b/data/faro/index_3049/img_depth_faro.png differ
diff --git a/data/faro/index_3049/img_depth_rs.png b/data/faro/index_3049/img_depth_rs.png
new file mode 100644
index 0000000..a8a1c82
Binary files /dev/null and b/data/faro/index_3049/img_depth_rs.png differ
diff --git a/data/faro/index_3049/img_left.png b/data/faro/index_3049/img_left.png
new file mode 100644
index 0000000..82c8303
Binary files /dev/null and b/data/faro/index_3049/img_left.png differ
diff --git a/data/faro/index_3049/img_rgb.png b/data/faro/index_3049/img_rgb.png
new file mode 100644
index 0000000..6910b12
Binary files /dev/null and b/data/faro/index_3049/img_rgb.png differ
diff --git a/data/faro/index_3049/img_right.png b/data/faro/index_3049/img_right.png
new file mode 100644
index 0000000..4467644
Binary files /dev/null and b/data/faro/index_3049/img_right.png differ
diff --git a/data/faro/index_3375/img_depth_faro.png b/data/faro/index_3375/img_depth_faro.png
new file mode 100644
index 0000000..cb3b76c
Binary files /dev/null and b/data/faro/index_3375/img_depth_faro.png differ
diff --git a/data/faro/index_3375/img_depth_rs.png b/data/faro/index_3375/img_depth_rs.png
new file mode 100644
index 0000000..f6ececf
Binary files /dev/null and b/data/faro/index_3375/img_depth_rs.png differ
diff --git a/data/faro/index_3375/img_left.png b/data/faro/index_3375/img_left.png
new file mode 100644
index 0000000..f4a3989
Binary files /dev/null and b/data/faro/index_3375/img_left.png differ
diff --git a/data/faro/index_3375/img_rgb.png b/data/faro/index_3375/img_rgb.png
new file mode 100644
index 0000000..2152e5d
Binary files /dev/null and b/data/faro/index_3375/img_rgb.png differ
diff --git a/data/faro/index_3375/img_right.png b/data/faro/index_3375/img_right.png
new file mode 100644
index 0000000..24d9835
Binary files /dev/null and b/data/faro/index_3375/img_right.png differ
diff --git a/data/faro/index_3617/img_depth_faro.png b/data/faro/index_3617/img_depth_faro.png
new file mode 100644
index 0000000..3bd6423
Binary files /dev/null and b/data/faro/index_3617/img_depth_faro.png differ
diff --git a/data/faro/index_3617/img_depth_rs.png b/data/faro/index_3617/img_depth_rs.png
new file mode 100644
index 0000000..84166c8
Binary files /dev/null and b/data/faro/index_3617/img_depth_rs.png differ
diff --git a/data/faro/index_3617/img_left.png b/data/faro/index_3617/img_left.png
new file mode 100644
index 0000000..ce52d8a
Binary files /dev/null and b/data/faro/index_3617/img_left.png differ
diff --git a/data/faro/index_3617/img_rgb.png b/data/faro/index_3617/img_rgb.png
new file mode 100644
index 0000000..dcf6a2e
Binary files /dev/null and b/data/faro/index_3617/img_rgb.png differ
diff --git a/data/faro/index_3617/img_right.png b/data/faro/index_3617/img_right.png
new file mode 100644
index 0000000..e4e707b
Binary files /dev/null and b/data/faro/index_3617/img_right.png differ
diff --git a/data/faro/index_3629/img_depth_faro.png b/data/faro/index_3629/img_depth_faro.png
new file mode 100644
index 0000000..d324e1e
Binary files /dev/null and b/data/faro/index_3629/img_depth_faro.png differ
diff --git a/data/faro/index_3629/img_depth_rs.png b/data/faro/index_3629/img_depth_rs.png
new file mode 100644
index 0000000..bc17188
Binary files /dev/null and b/data/faro/index_3629/img_depth_rs.png differ
diff --git a/data/faro/index_3629/img_left.png b/data/faro/index_3629/img_left.png
new file mode 100644
index 0000000..c3a56bc
Binary files /dev/null and b/data/faro/index_3629/img_left.png differ
diff --git a/data/faro/index_3629/img_rgb.png b/data/faro/index_3629/img_rgb.png
new file mode 100644
index 0000000..8e4fb95
Binary files /dev/null and b/data/faro/index_3629/img_rgb.png differ
diff --git a/data/faro/index_3629/img_right.png b/data/faro/index_3629/img_right.png
new file mode 100644
index 0000000..d5f5289
Binary files /dev/null and b/data/faro/index_3629/img_right.png differ
diff --git a/data/faro/index_3990/img_depth_faro.png b/data/faro/index_3990/img_depth_faro.png
new file mode 100644
index 0000000..0f75d0d
Binary files /dev/null and b/data/faro/index_3990/img_depth_faro.png differ
diff --git a/data/faro/index_3990/img_depth_rs.png b/data/faro/index_3990/img_depth_rs.png
new file mode 100644
index 0000000..e27d036
Binary files /dev/null and b/data/faro/index_3990/img_depth_rs.png differ
diff --git a/data/faro/index_3990/img_left.png b/data/faro/index_3990/img_left.png
new file mode 100644
index 0000000..dd45226
Binary files /dev/null and b/data/faro/index_3990/img_left.png differ
diff --git a/data/faro/index_3990/img_rgb.png b/data/faro/index_3990/img_rgb.png
new file mode 100644
index 0000000..0b5db51
Binary files /dev/null and b/data/faro/index_3990/img_rgb.png differ
diff --git a/data/faro/index_3990/img_right.png b/data/faro/index_3990/img_right.png
new file mode 100644
index 0000000..bee1ba9
Binary files /dev/null and b/data/faro/index_3990/img_right.png differ
diff --git a/data/faro/index_4426/img_depth_faro.png b/data/faro/index_4426/img_depth_faro.png
new file mode 100644
index 0000000..f1cb1a3
Binary files /dev/null and b/data/faro/index_4426/img_depth_faro.png differ
diff --git a/data/faro/index_4426/img_depth_rs.png b/data/faro/index_4426/img_depth_rs.png
new file mode 100644
index 0000000..3ce8f07
Binary files /dev/null and b/data/faro/index_4426/img_depth_rs.png differ
diff --git a/data/faro/index_4426/img_left.png b/data/faro/index_4426/img_left.png
new file mode 100644
index 0000000..455a0c6
Binary files /dev/null and b/data/faro/index_4426/img_left.png differ
diff --git a/data/faro/index_4426/img_rgb.png b/data/faro/index_4426/img_rgb.png
new file mode 100644
index 0000000..f365397
Binary files /dev/null and b/data/faro/index_4426/img_rgb.png differ
diff --git a/data/faro/index_4426/img_right.png b/data/faro/index_4426/img_right.png
new file mode 100644
index 0000000..e603b9d
Binary files /dev/null and b/data/faro/index_4426/img_right.png differ
diff --git a/data/faro/index_4609/img_depth_faro.png b/data/faro/index_4609/img_depth_faro.png
new file mode 100644
index 0000000..3929700
Binary files /dev/null and b/data/faro/index_4609/img_depth_faro.png differ
diff --git a/data/faro/index_4609/img_depth_rs.png b/data/faro/index_4609/img_depth_rs.png
new file mode 100644
index 0000000..1d2fd54
Binary files /dev/null and b/data/faro/index_4609/img_depth_rs.png differ
diff --git a/data/faro/index_4609/img_left.png b/data/faro/index_4609/img_left.png
new file mode 100644
index 0000000..1d1a045
Binary files /dev/null and b/data/faro/index_4609/img_left.png differ
diff --git a/data/faro/index_4609/img_rgb.png b/data/faro/index_4609/img_rgb.png
new file mode 100644
index 0000000..a71aa9a
Binary files /dev/null and b/data/faro/index_4609/img_rgb.png differ
diff --git a/data/faro/index_4609/img_right.png b/data/faro/index_4609/img_right.png
new file mode 100644
index 0000000..81250da
Binary files /dev/null and b/data/faro/index_4609/img_right.png differ
diff --git a/data/faro/index_4788/img_depth_faro.png b/data/faro/index_4788/img_depth_faro.png
new file mode 100644
index 0000000..6f741f8
Binary files /dev/null and b/data/faro/index_4788/img_depth_faro.png differ
diff --git a/data/faro/index_4788/img_depth_rs.png b/data/faro/index_4788/img_depth_rs.png
new file mode 100644
index 0000000..7f50877
Binary files /dev/null and b/data/faro/index_4788/img_depth_rs.png differ
diff --git a/data/faro/index_4788/img_left.png b/data/faro/index_4788/img_left.png
new file mode 100644
index 0000000..677facd
Binary files /dev/null and b/data/faro/index_4788/img_left.png differ
diff --git a/data/faro/index_4788/img_rgb.png b/data/faro/index_4788/img_rgb.png
new file mode 100644
index 0000000..80e0061
Binary files /dev/null and b/data/faro/index_4788/img_rgb.png differ
diff --git a/data/faro/index_4788/img_right.png b/data/faro/index_4788/img_right.png
new file mode 100644
index 0000000..8f948db
Binary files /dev/null and b/data/faro/index_4788/img_right.png differ
diff --git a/data/faro/index_5074/img_depth_faro.png b/data/faro/index_5074/img_depth_faro.png
new file mode 100644
index 0000000..d28a077
Binary files /dev/null and b/data/faro/index_5074/img_depth_faro.png differ
diff --git a/data/faro/index_5074/img_depth_rs.png b/data/faro/index_5074/img_depth_rs.png
new file mode 100644
index 0000000..8b88004
Binary files /dev/null and b/data/faro/index_5074/img_depth_rs.png differ
diff --git a/data/faro/index_5074/img_left.png b/data/faro/index_5074/img_left.png
new file mode 100644
index 0000000..debdd5d
Binary files /dev/null and b/data/faro/index_5074/img_left.png differ
diff --git a/data/faro/index_5074/img_rgb.png b/data/faro/index_5074/img_rgb.png
new file mode 100644
index 0000000..5a085b4
Binary files /dev/null and b/data/faro/index_5074/img_rgb.png differ
diff --git a/data/faro/index_5074/img_right.png b/data/faro/index_5074/img_right.png
new file mode 100644
index 0000000..87b4ba9
Binary files /dev/null and b/data/faro/index_5074/img_right.png differ
diff --git a/data/faro/index_5646/img_depth_faro.png b/data/faro/index_5646/img_depth_faro.png
new file mode 100644
index 0000000..25b3c4a
Binary files /dev/null and b/data/faro/index_5646/img_depth_faro.png differ
diff --git a/data/faro/index_5646/img_depth_rs.png b/data/faro/index_5646/img_depth_rs.png
new file mode 100644
index 0000000..9772370
Binary files /dev/null and b/data/faro/index_5646/img_depth_rs.png differ
diff --git a/data/faro/index_5646/img_left.png b/data/faro/index_5646/img_left.png
new file mode 100644
index 0000000..2eaac0c
Binary files /dev/null and b/data/faro/index_5646/img_left.png differ
diff --git a/data/faro/index_5646/img_rgb.png b/data/faro/index_5646/img_rgb.png
new file mode 100644
index 0000000..8b1dd1e
Binary files /dev/null and b/data/faro/index_5646/img_rgb.png differ
diff --git a/data/faro/index_5646/img_right.png b/data/faro/index_5646/img_right.png
new file mode 100644
index 0000000..0d98e0c
Binary files /dev/null and b/data/faro/index_5646/img_right.png differ
diff --git a/data/faro/index_5661/img_depth_faro.png b/data/faro/index_5661/img_depth_faro.png
new file mode 100644
index 0000000..cb58031
Binary files /dev/null and b/data/faro/index_5661/img_depth_faro.png differ
diff --git a/data/faro/index_5661/img_depth_rs.png b/data/faro/index_5661/img_depth_rs.png
new file mode 100644
index 0000000..812413c
Binary files /dev/null and b/data/faro/index_5661/img_depth_rs.png differ
diff --git a/data/faro/index_5661/img_left.png b/data/faro/index_5661/img_left.png
new file mode 100644
index 0000000..f978787
Binary files /dev/null and b/data/faro/index_5661/img_left.png differ
diff --git a/data/faro/index_5661/img_rgb.png b/data/faro/index_5661/img_rgb.png
new file mode 100644
index 0000000..4a4bb82
Binary files /dev/null and b/data/faro/index_5661/img_rgb.png differ
diff --git a/data/faro/index_5661/img_right.png b/data/faro/index_5661/img_right.png
new file mode 100644
index 0000000..ab6fa1c
Binary files /dev/null and b/data/faro/index_5661/img_right.png differ
diff --git a/data/faro/index_5808/img_depth_faro.png b/data/faro/index_5808/img_depth_faro.png
new file mode 100644
index 0000000..161d61d
Binary files /dev/null and b/data/faro/index_5808/img_depth_faro.png differ
diff --git a/data/faro/index_5808/img_depth_rs.png b/data/faro/index_5808/img_depth_rs.png
new file mode 100644
index 0000000..f9efd39
Binary files /dev/null and b/data/faro/index_5808/img_depth_rs.png differ
diff --git a/data/faro/index_5808/img_left.png b/data/faro/index_5808/img_left.png
new file mode 100644
index 0000000..177ba6e
Binary files /dev/null and b/data/faro/index_5808/img_left.png differ
diff --git a/data/faro/index_5808/img_rgb.png b/data/faro/index_5808/img_rgb.png
new file mode 100644
index 0000000..11c4db5
Binary files /dev/null and b/data/faro/index_5808/img_rgb.png differ
diff --git a/data/faro/index_5808/img_right.png b/data/faro/index_5808/img_right.png
new file mode 100644
index 0000000..2bce5ef
Binary files /dev/null and b/data/faro/index_5808/img_right.png differ
diff --git a/data/faro/index_5881/img_depth_faro.png b/data/faro/index_5881/img_depth_faro.png
new file mode 100644
index 0000000..b5788ff
Binary files /dev/null and b/data/faro/index_5881/img_depth_faro.png differ
diff --git a/data/faro/index_5881/img_depth_rs.png b/data/faro/index_5881/img_depth_rs.png
new file mode 100644
index 0000000..5c9b762
Binary files /dev/null and b/data/faro/index_5881/img_depth_rs.png differ
diff --git a/data/faro/index_5881/img_left.png b/data/faro/index_5881/img_left.png
new file mode 100644
index 0000000..331cd40
Binary files /dev/null and b/data/faro/index_5881/img_left.png differ
diff --git a/data/faro/index_5881/img_rgb.png b/data/faro/index_5881/img_rgb.png
new file mode 100644
index 0000000..de68e00
Binary files /dev/null and b/data/faro/index_5881/img_rgb.png differ
diff --git a/data/faro/index_5881/img_right.png b/data/faro/index_5881/img_right.png
new file mode 100644
index 0000000..fd0d235
Binary files /dev/null and b/data/faro/index_5881/img_right.png differ
diff --git a/data/faro/index_5964/img_depth_faro.png b/data/faro/index_5964/img_depth_faro.png
new file mode 100644
index 0000000..40116a6
Binary files /dev/null and b/data/faro/index_5964/img_depth_faro.png differ
diff --git a/data/faro/index_5964/img_depth_rs.png b/data/faro/index_5964/img_depth_rs.png
new file mode 100644
index 0000000..fe2d513
Binary files /dev/null and b/data/faro/index_5964/img_depth_rs.png differ
diff --git a/data/faro/index_5964/img_left.png b/data/faro/index_5964/img_left.png
new file mode 100644
index 0000000..b7fb250
Binary files /dev/null and b/data/faro/index_5964/img_left.png differ
diff --git a/data/faro/index_5964/img_rgb.png b/data/faro/index_5964/img_rgb.png
new file mode 100644
index 0000000..5620d7c
Binary files /dev/null and b/data/faro/index_5964/img_rgb.png differ
diff --git a/data/faro/index_5964/img_right.png b/data/faro/index_5964/img_right.png
new file mode 100644
index 0000000..61a5c99
Binary files /dev/null and b/data/faro/index_5964/img_right.png differ
diff --git a/data/faro/index_6422/img_depth_faro.png b/data/faro/index_6422/img_depth_faro.png
new file mode 100644
index 0000000..3ff7b67
Binary files /dev/null and b/data/faro/index_6422/img_depth_faro.png differ
diff --git a/data/faro/index_6422/img_depth_rs.png b/data/faro/index_6422/img_depth_rs.png
new file mode 100644
index 0000000..2d13beb
Binary files /dev/null and b/data/faro/index_6422/img_depth_rs.png differ
diff --git a/data/faro/index_6422/img_left.png b/data/faro/index_6422/img_left.png
new file mode 100644
index 0000000..e19b559
Binary files /dev/null and b/data/faro/index_6422/img_left.png differ
diff --git a/data/faro/index_6422/img_rgb.png b/data/faro/index_6422/img_rgb.png
new file mode 100644
index 0000000..cd9ddbb
Binary files /dev/null and b/data/faro/index_6422/img_rgb.png differ
diff --git a/data/faro/index_6422/img_right.png b/data/faro/index_6422/img_right.png
new file mode 100644
index 0000000..f3dc911
Binary files /dev/null and b/data/faro/index_6422/img_right.png differ
diff --git a/data/faro/index_6461/img_depth_faro.png b/data/faro/index_6461/img_depth_faro.png
new file mode 100644
index 0000000..9e444fd
Binary files /dev/null and b/data/faro/index_6461/img_depth_faro.png differ
diff --git a/data/faro/index_6461/img_depth_rs.png b/data/faro/index_6461/img_depth_rs.png
new file mode 100644
index 0000000..3e31eb3
Binary files /dev/null and b/data/faro/index_6461/img_depth_rs.png differ
diff --git a/data/faro/index_6461/img_left.png b/data/faro/index_6461/img_left.png
new file mode 100644
index 0000000..8ac7e85
Binary files /dev/null and b/data/faro/index_6461/img_left.png differ
diff --git a/data/faro/index_6461/img_rgb.png b/data/faro/index_6461/img_rgb.png
new file mode 100644
index 0000000..f6f3cef
Binary files /dev/null and b/data/faro/index_6461/img_rgb.png differ
diff --git a/data/faro/index_6461/img_right.png b/data/faro/index_6461/img_right.png
new file mode 100644
index 0000000..e9e3f7e
Binary files /dev/null and b/data/faro/index_6461/img_right.png differ
diff --git a/data/faro/index_6539/img_depth_faro.png b/data/faro/index_6539/img_depth_faro.png
new file mode 100644
index 0000000..fee8747
Binary files /dev/null and b/data/faro/index_6539/img_depth_faro.png differ
diff --git a/data/faro/index_6539/img_depth_rs.png b/data/faro/index_6539/img_depth_rs.png
new file mode 100644
index 0000000..44594e4
Binary files /dev/null and b/data/faro/index_6539/img_depth_rs.png differ
diff --git a/data/faro/index_6539/img_left.png b/data/faro/index_6539/img_left.png
new file mode 100644
index 0000000..5127108
Binary files /dev/null and b/data/faro/index_6539/img_left.png differ
diff --git a/data/faro/index_6539/img_rgb.png b/data/faro/index_6539/img_rgb.png
new file mode 100644
index 0000000..73a3ae9
Binary files /dev/null and b/data/faro/index_6539/img_rgb.png differ
diff --git a/data/faro/index_6539/img_right.png b/data/faro/index_6539/img_right.png
new file mode 100644
index 0000000..d56e3f2
Binary files /dev/null and b/data/faro/index_6539/img_right.png differ
diff --git a/demo_data/camera.txt b/demo_data/camera.txt
new file mode 100644
index 0000000..d193d89
--- /dev/null
+++ b/demo_data/camera.txt
@@ -0,0 +1,4 @@
+focal_px=901.0471
+baseline_m=0.054903
+source=FARO DA_RGB_Kitchen_1 frame 100
+resolution=1280x720
diff --git a/demo_data/depth.png b/demo_data/depth.png
new file mode 100644
index 0000000..59f8c99
Binary files /dev/null and b/demo_data/depth.png differ
diff --git a/demo_data/imageD_d16_000.png b/demo_data/imageD_d16_000.png
new file mode 100644
index 0000000..f6bec18
Binary files /dev/null and b/demo_data/imageD_d16_000.png differ
diff --git a/demo_data/imageL_d16_000.png b/demo_data/imageL_d16_000.png
new file mode 100644
index 0000000..6ea2df4
Binary files /dev/null and b/demo_data/imageL_d16_000.png differ
diff --git a/demo_data/imageR_d16_000.png b/demo_data/imageR_d16_000.png
new file mode 100644
index 0000000..747fd3b
Binary files /dev/null and b/demo_data/imageR_d16_000.png differ
diff --git a/demo_data/left_faro.png b/demo_data/left_faro.png
new file mode 100644
index 0000000..e8a1d90
Binary files /dev/null and b/demo_data/left_faro.png differ
diff --git a/demo_data/right_faro.png b/demo_data/right_faro.png
new file mode 100644
index 0000000..cf10ef1
Binary files /dev/null and b/demo_data/right_faro.png differ
diff --git a/demo_data_out/depth.png b/demo_data_out/depth.png
new file mode 100644
index 0000000..5fbbba8
Binary files /dev/null and b/demo_data_out/depth.png differ
diff --git a/demo_data_out/depth_colorized.png b/demo_data_out/depth_colorized.png
new file mode 100644
index 0000000..950c0da
Binary files /dev/null and b/demo_data_out/depth_colorized.png differ
diff --git a/demo_data_out/depth_meter.npy b/demo_data_out/depth_meter.npy
new file mode 100644
index 0000000..b377082
Binary files /dev/null and b/demo_data_out/depth_meter.npy differ
diff --git a/demo_data_out/disp_0044.png b/demo_data_out/disp_0044.png
new file mode 100644
index 0000000..a1a632e
Binary files /dev/null and b/demo_data_out/disp_0044.png differ
diff --git a/demo_data_out/disp_0066.png b/demo_data_out/disp_0066.png
new file mode 100644
index 0000000..e121744
Binary files /dev/null and b/demo_data_out/disp_0066.png differ
diff --git a/demo_data_out/disp_0074.png b/demo_data_out/disp_0074.png
new file mode 100644
index 0000000..1c58a6f
Binary files /dev/null and b/demo_data_out/disp_0074.png differ
diff --git a/docs/docs_niv/superpowers/plans/2026-05-18-image-server.md b/docs/docs_niv/superpowers/plans/2026-05-18-image-server.md
new file mode 100644
index 0000000..7e6a95e
--- /dev/null
+++ b/docs/docs_niv/superpowers/plans/2026-05-18-image-server.md
@@ -0,0 +1,469 @@
+# Image RGB-to-Grayscale TCP Server Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Build a threaded TCP server that receives JPEG images, converts RGB to grayscale with OpenCV, and returns the result — plus a client module and integration tests.
+
+**Architecture:** `socketserver.ThreadingTCPServer` with one handler thread per client. Both directions use the same framing: 4-byte big-endian length prefix followed by JPEG bytes. Protocol helpers live in `image_server.py` and are imported by `image_client.py`.
+
+**Tech Stack:** Python 3, `socketserver` (stdlib), `socket` (stdlib), `struct` (stdlib), `threading` (stdlib), `opencv-python` (`cv2`), `numpy`, `unittest` (stdlib).
+
+---
+
+## File Map
+
+| File | Role |
+|------|------|
+| `image_server.py` | `send_msg`, `recv_msg`, `_recv_exact`, `ImageHandler`, `main()` |
+| `image_client.py` | `ImageClient` class, `main()` CLI entry point |
+| `test_server.py` | `TestProtocolHelpers`, `TestImageServer`, `TestImageClient` |
+
+---
+
+### Task 1: Protocol framing helpers
+
+**Files:**
+- Create: `image_server.py`
+- Create: `test_server.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `test_server.py`:
+
+```python
+import socket
+import unittest
+from image_server import send_msg, recv_msg
+
+
+class TestProtocolHelpers(unittest.TestCase):
+    def _pair(self):
+        return socket.socketpair()
+
+    def test_roundtrip_small(self):
+        a, b = self._pair()
+        try:
+            payload = b'hello world'
+            send_msg(a, payload)
+            result = recv_msg(b)
+            self.assertEqual(result, payload)
+        finally:
+            a.close()
+            b.close()
+
+    def test_roundtrip_binary(self):
+        a, b = self._pair()
+        try:
+            payload = bytes(range(256)) * 100
+            send_msg(a, payload)
+            result = recv_msg(b)
+            self.assertEqual(result, payload)
+        finally:
+            a.close()
+            b.close()
+
+    def test_recv_returns_none_on_closed_socket(self):
+        a, b = self._pair()
+        a.close()
+        result = recv_msg(b)
+        self.assertIsNone(result)
+        b.close()
+
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cd /home/adiroha/repos/niv/d1_min
+python -m pytest test_server.py::TestProtocolHelpers -v
+```
+
+Expected: `ModuleNotFoundError: No module named 'image_server'`
+
+- [ ] **Step 3: Implement the protocol helpers**
+
+Create `image_server.py`:
+
+```python
+import socket
+import socketserver
+import struct
+import logging
+import cv2
+import numpy as np
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
+logger = logging.getLogger(__name__)
+
+
+def send_msg(sock: socket.socket, data: bytes) -> None:
+    header = struct.pack('>I', len(data))
+    sock.sendall(header + data)
+
+
+def recv_msg(sock: socket.socket) -> bytes | None:
+    header = _recv_exact(sock, 4)
+    if header is None:
+        return None
+    length = struct.unpack('>I', header)[0]
+    return _recv_exact(sock, length)
+
+
+def _recv_exact(sock: socket.socket, n: int) -> bytes | None:
+    buf = b''
+    while len(buf) < n:
+        chunk = sock.recv(n - len(buf))
+        if not chunk:
+            return None
+        buf += chunk
+    return buf
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+```bash
+python -m pytest test_server.py::TestProtocolHelpers -v
+```
+
+Expected:
+```
+PASSED test_server.py::TestProtocolHelpers::test_roundtrip_small
+PASSED test_server.py::TestProtocolHelpers::test_roundtrip_binary
+PASSED test_server.py::TestProtocolHelpers::test_recv_returns_none_on_closed_socket
+```
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add image_server.py test_server.py
+git commit -m "feat: add TCP framing helpers with tests"
+```
+
+---
+
+### Task 2: Server handler and main
+
+**Files:**
+- Modify: `image_server.py` — add `ImageHandler`, `main()`
+- Modify: `test_server.py` — add `TestImageServer` with synthetic image test
+
+- [ ] **Step 1: Add the failing integration test**
+
+Append to `test_server.py` (before `if __name__ == '__main__':`):
+
+```python
+import threading
+import socketserver
+import cv2
+import numpy as np
+from image_server import send_msg, recv_msg, ImageHandler
+
+
+def _make_server() -> socketserver.ThreadingTCPServer:
+    server = socketserver.ThreadingTCPServer(('127.0.0.1', 0), ImageHandler)
+    server.allow_reuse_address = True
+    return server
+
+
+class TestImageServer(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.server = _make_server()
+        cls.port = cls.server.server_address[1]
+        cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True)
+        cls.thread.start()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.server.shutdown()
+        cls.thread.join(timeout=2)
+
+    def _connect(self) -> socket.socket:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.connect(('127.0.0.1', self.port))
+        return sock
+
+    def _send_image(self, sock: socket.socket, img: np.ndarray) -> None:
+        _, jpeg = cv2.imencode('.jpg', img)
+        send_msg(sock, jpeg.tobytes())
+
+    def _recv_gray(self, sock: socket.socket) -> np.ndarray:
+        data = recv_msg(sock)
+        self.assertIsNotNone(data, 'Server returned no data')
+        gray = cv2.imdecode(np.frombuffer(data, dtype=np.uint8), cv2.IMREAD_GRAYSCALE)
+        self.assertIsNotNone(gray, 'Response could not be decoded as image')
+        return gray
+
+    def test_synthetic_image_becomes_grayscale(self):
+        img = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
+        with self._connect() as sock:
+            self._send_image(sock, img)
+            gray = self._recv_gray(sock)
+        self.assertEqual(gray.shape, (100, 100))
+```
+
+- [ ] **Step 2: Run the test to verify it fails**
+
+```bash
+python -m pytest test_server.py::TestImageServer::test_synthetic_image_becomes_grayscale -v
+```
+
+Expected: `ImportError: cannot import name 'ImageHandler' from 'image_server'`
+
+- [ ] **Step 3: Implement ImageHandler and main()**
+
+Append to `image_server.py`:
+
+```python
+class ImageHandler(socketserver.BaseRequestHandler):
+    def handle(self) -> None:
+        logger.info('Connection from %s', self.client_address)
+        try:
+            while True:
+                data = recv_msg(self.request)
+                if data is None:
+                    break
+                img = cv2.imdecode(np.frombuffer(data, dtype=np.uint8), cv2.IMREAD_COLOR)
+                if img is None:
+                    logger.warning('Could not decode image, closing connection')
+                    break
+                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+                _, jpeg = cv2.imencode('.jpg', gray)
+                send_msg(self.request, jpeg.tobytes())
+        except (ConnectionResetError, BrokenPipeError, OSError) as exc:
+            logger.warning('Connection error: %s', exc)
+        finally:
+            logger.info('Connection closed: %s', self.client_address)
+
+
+def main() -> None:
+    host, port = '0.0.0.0', 9999
+    with socketserver.ThreadingTCPServer((host, port), ImageHandler) as server:
+        server.allow_reuse_address = True
+        logger.info('Listening on %s:%d', host, port)
+        try:
+            server.serve_forever()
+        except KeyboardInterrupt:
+            logger.info('Shutting down')
+
+
+if __name__ == '__main__':
+    main()
+```
+
+- [ ] **Step 4: Run the test to verify it passes**
+
+```bash
+python -m pytest test_server.py::TestImageServer::test_synthetic_image_becomes_grayscale -v
+```
+
+Expected: `PASSED`
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add image_server.py test_server.py
+git commit -m "feat: add threaded TCP image server with grayscale conversion"
+```
+
+---
+
+### Task 3: Multiple-image and real-image tests
+
+**Files:**
+- Modify: `test_server.py` — add two more test methods to `TestImageServer`
+
+- [ ] **Step 1: Add the tests**
+
+Add these two methods inside the `TestImageServer` class in `test_server.py`:
+
+```python
+    def test_multiple_images_same_connection(self):
+        with self._connect() as sock:
+            for _ in range(3):
+                img = np.random.randint(0, 256, (64, 64, 3), dtype=np.uint8)
+                self._send_image(sock, img)
+                gray = self._recv_gray(sock)
+                self.assertEqual(gray.shape, (64, 64))
+
+    def test_real_image(self):
+        img = cv2.imread('sample/left.png')
+        self.assertIsNotNone(img, 'sample/left.png must be readable')
+        h, w = img.shape[:2]
+        with self._connect() as sock:
+            self._send_image(sock, img)
+            gray = self._recv_gray(sock)
+        self.assertEqual(gray.shape, (h, w))
+```
+
+- [ ] **Step 2: Run the new tests**
+
+```bash
+python -m pytest test_server.py::TestImageServer -v
+```
+
+Expected: all three tests in `TestImageServer` pass.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add test_server.py
+git commit -m "test: add multi-image and real-image integration tests"
+```
+
+---
+
+### Task 4: Client module
+
+**Files:**
+- Create: `image_client.py`
+- Modify: `test_server.py` — add `TestImageClient`
+
+- [ ] **Step 1: Add the failing client test**
+
+Append to `test_server.py` (before `if __name__ == '__main__':`):
+
+```python
+class TestImageClient(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.server = _make_server()
+        cls.port = cls.server.server_address[1]
+        cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True)
+        cls.thread.start()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.server.shutdown()
+        cls.thread.join(timeout=2)
+
+    def test_client_process_returns_grayscale(self):
+        from image_client import ImageClient
+        img = np.random.randint(0, 256, (80, 80, 3), dtype=np.uint8)
+        with ImageClient('127.0.0.1', self.port) as client:
+            gray = client.process(img)
+        self.assertEqual(gray.shape, (80, 80))
+
+    def test_client_process_multiple_images(self):
+        from image_client import ImageClient
+        with ImageClient('127.0.0.1', self.port) as client:
+            for size in [32, 64, 128]:
+                img = np.random.randint(0, 256, (size, size, 3), dtype=np.uint8)
+                gray = client.process(img)
+                self.assertEqual(gray.shape, (size, size))
+```
+
+- [ ] **Step 2: Run to verify failure**
+
+```bash
+python -m pytest test_server.py::TestImageClient -v
+```
+
+Expected: `ModuleNotFoundError: No module named 'image_client'`
+
+- [ ] **Step 3: Implement image_client.py**
+
+Create `image_client.py`:
+
+```python
+import socket
+import sys
+import cv2
+import numpy as np
+from image_server import send_msg, recv_msg
+
+
+class ImageClient:
+    def __init__(self, host: str, port: int) -> None:
+        self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        self._sock.connect((host, port))
+
+    def process(self, img: np.ndarray) -> np.ndarray:
+        _, jpeg = cv2.imencode('.jpg', img)
+        send_msg(self._sock, jpeg.tobytes())
+        data = recv_msg(self._sock)
+        if data is None:
+            raise RuntimeError('Server closed the connection unexpectedly')
+        gray = cv2.imdecode(np.frombuffer(data, dtype=np.uint8), cv2.IMREAD_GRAYSCALE)
+        if gray is None:
+            raise RuntimeError('Server response could not be decoded as an image')
+        return gray
+
+    def close(self) -> None:
+        self._sock.close()
+
+    def __enter__(self) -> 'ImageClient':
+        return self
+
+    def __exit__(self, *_) -> None:
+        self.close()
+
+
+def main() -> None:
+    if len(sys.argv) != 3:
+        print(f'Usage: {sys.argv[0]} <input_image> <output_image>')
+        sys.exit(1)
+    img = cv2.imread(sys.argv[1])
+    if img is None:
+        print(f'Cannot read image: {sys.argv[1]}')
+        sys.exit(1)
+    with ImageClient('127.0.0.1', 9999) as client:
+        gray = client.process(img)
+    cv2.imwrite(sys.argv[2], gray)
+    print(f'Saved: {sys.argv[2]}')
+
+
+if __name__ == '__main__':
+    main()
+```
+
+- [ ] **Step 4: Run the client tests**
+
+```bash
+python -m pytest test_server.py::TestImageClient -v
+```
+
+Expected:
+```
+PASSED test_server.py::TestImageClient::test_client_process_returns_grayscale
+PASSED test_server.py::TestImageClient::test_client_process_multiple_images
+```
+
+- [ ] **Step 5: Run the full test suite**
+
+```bash
+python -m pytest test_server.py -v
+```
+
+Expected: all 9 tests pass, no failures.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add image_client.py test_server.py
+git commit -m "feat: add ImageClient with integration tests"
+```
+
+---
+
+## Usage
+
+**Run the server:**
+```bash
+python image_server.py
+# Listening on 0.0.0.0:9999
+```
+
+**Run the client (in another terminal):**
+```bash
+python image_client.py sample/left.png output_gray.png
+# Saved: output_gray.png
+```
+
+**Run all tests:**
+```bash
+python -m pytest test_server.py -v
+```
diff --git a/docs/docs_niv/superpowers/specs/2026-05-18-image-server-design.md b/docs/docs_niv/superpowers/specs/2026-05-18-image-server-design.md
new file mode 100644
index 0000000..dde7220
--- /dev/null
+++ b/docs/docs_niv/superpowers/specs/2026-05-18-image-server-design.md
@@ -0,0 +1,87 @@
+# Image RGB-to-Grayscale TCP Server — Design Spec
+
+**Date:** 2026-05-18  
+**Status:** Approved
+
+---
+
+## Overview
+
+A Python TCP server that accepts image data from remote clients, converts each image from RGB to grayscale using OpenCV, and returns the result. Uses the existing `opencv-python` dependency — no new packages required.
+
+---
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `image_server.py` | Server: listens for connections, receives images, converts, sends back |
+| `image_client.py` | Client: sends a color image, receives and saves the grayscale result |
+| `test_server.py` | Tests: integration tests using a real server on a free port |
+
+---
+
+## Architecture
+
+`socketserver.ThreadingTCPServer` with a `BaseRequestHandler` subclass. Each client connection runs in its own thread, allowing multiple concurrent clients. Image conversion is handled by `cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)`.
+
+```
+Client                          Server (one thread per connection)
+------                          ----------------------------------
+encode image as JPEG
+send [4-byte length][JPEG]  -->  recv length header
+                                 recv N bytes of JPEG
+                                 decode JPEG → BGR numpy array
+                                 cvtColor BGR → GRAY
+                                 encode GRAY as JPEG
+                            <--  send [4-byte length][JPEG]
+recv length header
+recv N bytes of JPEG
+decode → grayscale image
+```
+
+A single connection can carry multiple request/response pairs before closing.
+
+---
+
+## Wire Protocol
+
+Every message in both directions:
+
+```
+[4 bytes, big-endian uint32: payload length][N bytes: JPEG image data]
+```
+
+- **Length header:** `struct.pack('>I', len(jpeg_bytes))` / `struct.unpack('>I', header)[0]`
+- **Image encoding:** JPEG (OpenCV default quality)
+- **Direction:** identical framing for both client→server and server→client
+
+---
+
+## Error Handling
+
+| Scenario | Behavior |
+|----------|----------|
+| Client disconnects mid-transfer | Handler catches `ConnectionResetError` / partial read, logs, closes socket |
+| Corrupt or non-image bytes | `cv2.imdecode` returns `None`; handler logs a warning and closes connection |
+| `KeyboardInterrupt` on server | `server.shutdown()` called in `finally` block; exits cleanly |
+
+---
+
+## Testing (`test_server.py`)
+
+Uses Python `unittest` — no new dependencies. Each test class starts a real `ThreadingTCPServer` on a random free port (port `0`) in a `setUpClass` background thread and shuts it down in `tearDownClass`.
+
+| Test | What it verifies |
+|------|-----------------|
+| `test_synthetic_image` | 100×100 synthetic RGB → response decodes as valid grayscale (shape `(100, 100)`) |
+| `test_multiple_images_same_connection` | 3 images sent sequentially on one socket, all responses valid |
+| `test_real_image` | Sends `sample/left.png` (known to exist); verifies grayscale response |
+
+---
+
+## Non-Goals
+
+- No authentication or TLS (local / trusted network assumed)
+- No image format negotiation (JPEG only)
+- No retry logic on the client
diff --git a/docs/superpowers/specs/2026-04-30-depth-fusion-design.md b/docs/superpowers/specs/2026-04-30-depth-fusion-design.md
new file mode 100644
index 0000000..ad27f71
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-30-depth-fusion-design.md
@@ -0,0 +1,239 @@
+# RealSense Depth Fusion — Design Spec
+
+**Date:** 2026-04-30  
+**Status:** Approved for implementation  
+**Goal:** Fuse RealSense RGB-D depth into FastFoundationStereo at three pipeline stages to improve disparity accuracy, with no latency constraint and using fine-tuning of the pretrained model.
+
+---
+
+## 1. Overview
+
+The modified model takes three inputs: `image1` (left RGB), `image2` (right RGB), and `depth` (RealSense depth map, single-channel metric in meters, hardware-registered to the left RGB camera). Depth is fused at three stages: early (feature level), mid (cost volume logits), and late (GRU initialization).
+
+**Modified forward signature:**
+```python
+def forward(self, image1, image2, depth=None, focal=None, baseline=None, iters=12, ...)
+```
+
+When `depth=None`, all three stages are skipped and the model behaves identically to the original. No existing code paths are changed.
+
+**Architecture diagram:**
+```
+INPUTS
+  image1 (B,3,H,W) ──┐
+  image2 (B,3,H,W) ──┤
+  depth  (B,1,H,W) ──┼──────────────────────────────────────────────────┐
+  focal, baseline    │                                                   │
+                     │                                                   │
+STAGE 1 — EARLY FEATURE FUSION                                         │
+                     │                                                   │
+  image1+2 ──► Feature (EdgeNeXt)      depth ──► DepthEncoder [NEW]    │
+               [x4,x8,x16,x32]                   [d4,d8,d16,d32]       │
+                    │                                    │               │
+                    └──── DepthFusionModule [NEW] ───────┘               │
+                         (concat + 1×1 conv per scale)                  │
+                         → fused_left[0..3]                             │
+                                                                        │
+COST VOLUME (unchanged)                                                 │
+                                                                        │
+  fused_left[0] + features_right[0]                                    │
+       ├── GWC volume                                                   │
+       └── Concat volume                                                │
+                │                                                       │
+           corr_stem → corr_feature_att                                 │
+           hourglass (3D, FeatureAtt + CostVolumeDisparityAttention)   │
+           classifier → logits (B, D, H/4, W/4)                        │
+                │                                                       │
+STAGE 2 — DISPARITY PRIOR ON LOGITS                                    │
+                │                                                       │
+                │    depth ──► disp_prior = focal·baseline/depth ◄──────┘
+                │                  │
+                │    prior_bias = Gaussian(μ=disp_prior, σ=σ_learned)
+                │                  │  (zeroed where depth is invalid)
+                └──► logits + α·prior_bias   [NEW α, σ learnable]
+                              │
+                     softmax → prob → soft-argmax → stereo_init
+                                                          │
+STAGE 3 — GRU INITIALIZATION BLEND                       │
+                                                          │
+  disp_prior_14 ──┐                                       │
+  validity_mask ──┼──► DepthInitBlend [NEW] ──► blend_w ─┤
+  stereo_init ────┘    (3-layer CNN → sigmoid)            │
+                                                          │
+  init_disp = blend_w·disp_prior + (1-blend_w)·stereo_init ◄──────────┘
+
+GRU ITERATIVE REFINEMENT (unchanged internals)
+  for itr in range(iters):
+      geo_feat = Combined_Geo_Encoding_Volume(disp)
+      net_list, mask_feat_4, delta_disp = update_block(...)
+      disp = disp + delta_disp
+      disp_up = upsample_disp(disp, mask_feat_4, stem_2x)
+
+OUTPUT: final disparity (B,1,H,W)
+```
+
+---
+
+## 2. Depth Preprocessing
+
+Applied before `DepthEncoder` and before computing `disp_prior`. Shared logic, computed once in `forward`.
+
+- **Validity mask:** `M = (depth > 0) & torch.isfinite(depth)` → (B,1,H,W) float, 1=valid, 0=invalid
+- **Normalization:** `d_norm = (depth - DEPTH_MEAN) / DEPTH_STD` applied only to valid pixels; invalid pixels set to 0
+- **Encoder input:** `torch.cat([d_norm, M], dim=1)` → (B,2,H,W)
+- **`DEPTH_MEAN`, `DEPTH_STD`:** dataset statistics, stored as registered buffers (not trained)
+
+---
+
+## 3. Stage 1 — DepthEncoder + DepthFusionModule
+
+**File:** `core/extractor.py`
+
+### DepthEncoder
+
+Lightweight 4-scale CNN built from existing `BasicConv` primitives.
+
+```
+Input: (B, 2, H, W)  [normalized depth + validity mask]
+
+stem:   BasicConv(2  → 32, k=3, stride=2, BN+ReLU)   → (B, 32, H/2, W/2)
+stage1: BasicConv(32 → 32, k=3, stride=2, BN+ReLU)   → depth_x4  (B, 32,  H/4,  W/4)
+stage2: BasicConv(32 → 64, k=3, stride=2, BN+ReLU)   → depth_x8  (B, 64,  H/8,  W/8)
+stage3: BasicConv(64 → 96, k=3, stride=2, BN+ReLU)   → depth_x16 (B, 96,  H/16, W/16)
+stage4: BasicConv(96 → 128,k=3, stride=2, BN+ReLU)   → depth_x32 (B, 128, H/32, W/32)
+
+depth_chans = [32, 64, 96, 128]
+```
+
+Each stage's output is multiplied by a downsampled validity mask so invalid regions produce near-zero features and do not corrupt fusion.
+
+### DepthFusionModule
+
+One 1×1 conv per scale. Input is concat of left feature and depth feature; output has the same shape as the left feature.
+
+```
+d_out = Feature.d_out = [96+vit_feat_dim, 192, 320, 304]
+
+fusion_4:  Conv2d(d_out[0]+32,  d_out[0],  1)
+fusion_8:  Conv2d(d_out[1]+64,  d_out[1],  1)
+fusion_16: Conv2d(d_out[2]+96,  d_out[2],  1)
+fusion_32: Conv2d(d_out[3]+128, d_out[3],  1)
+```
+
+Applied only to `features_left`. Right image features are not fused — RealSense depth is registered to the left camera only.
+
+**In `FastFoundationStereo.forward`:**
+```python
+if depth is not None:
+    depth_input = self.preprocess_depth(depth)          # normalize + validity mask
+    depth_feats = self.depth_encoder(depth_input)
+    features_left = self.depth_fusion(features_left, depth_feats)
+```
+
+---
+
+## 4. Stage 2 — Disparity Prior on Logits
+
+**File:** `core/foundation_stereo.py`
+
+Convert depth to disparity at 1/4 resolution, build a per-pixel Gaussian prior over the disparity axis, and add it as an additive bias to the cost volume logits before softmax.
+
+```python
+# depth → disparity prior
+disp_prior = (focal * baseline) / depth.clamp(min=1e-3)    # (B,1,H,W)
+disp_prior_14 = F.interpolate(disp_prior / 4, (H//4, W//4), mode='nearest')
+validity_14   = F.interpolate(M.float(), (H//4, W//4), mode='nearest')
+
+# Gaussian bias over disparity axis
+d_indices = torch.arange(D, device=disp_prior.device).view(1, D, 1, 1)
+prior_bias = -0.5 * ((d_indices - disp_prior_14) / self.depth_sigma.abs()) ** 2
+prior_bias = prior_bias * validity_14    # flat (zero) where depth is invalid
+
+# Inject into logits
+logits = self.classifier(comb_volume).squeeze(1)
+if depth is not None:
+    logits = logits + self.depth_prior_scale * prior_bias
+prob = F.softmax(logits, dim=1)
+init_disp_stereo = disparity_regression(prob, self.args.max_disp // 4)
+```
+
+**New parameters:**
+- `self.depth_sigma`: `nn.Parameter(torch.tensor(4.0))` — learned std in disparity units at 1/4 scale
+- `self.depth_prior_scale`: `nn.Parameter(torch.tensor(0.1))` — initialized small so training starts near original stereo behavior
+
+---
+
+## 5. Stage 3 — GRU Initialization Blend
+
+**File:** `core/foundation_stereo.py`
+
+A small 3-layer CNN learns per-pixel blend weights between the stereo soft-argmax estimate and the depth-derived disparity prior.
+
+```python
+class DepthInitBlend(nn.Module):
+    # Input:  concat(stereo_init, disp_prior_14, validity_14) → (B, 3, H/4, W/4)
+    # Layers: BasicConv(3→16), BasicConv(16→16), Conv2d(16→1)
+    # Output: blend_w ∈ [0,1] via sigmoid, masked to 0 where validity_14=0
+```
+
+**In `FastFoundationStereo.forward`:**
+```python
+if depth is not None:
+    blend_w = self.depth_init_blend(
+        torch.cat([init_disp_stereo, disp_prior_14, validity_14], dim=1)
+    ) * validity_14
+    init_disp = blend_w * disp_prior_14 + (1 - blend_w) * init_disp_stereo
+else:
+    init_disp = init_disp_stereo
+```
+
+The blend learns *where* to trust depth (nearby objects, textureless regions) vs. stereo (edges, far range), giving the GRU a better warm start.
+
+---
+
+## 6. New Modules Summary
+
+| Module | File | Est. Params | Role |
+|---|---|---|---|
+| `DepthEncoder` | `core/extractor.py` | ~200K | Encode raw depth to 4-scale features |
+| `DepthFusionModule` | `core/extractor.py` | ~80K | Fuse depth features into left image features |
+| `depth_sigma`, `depth_prior_scale` | `core/foundation_stereo.py` | 2 | Learned prior sharpness and weight |
+| `DepthInitBlend` | `core/foundation_stereo.py` | ~5K | Per-pixel GRU init blend |
+
+---
+
+## 7. Training Strategy
+
+**Frozen — do not update:**
+- `self.feature.stem` and `self.feature.stages` (EdgeNeXt backbone, ~7M params)
+- Cost volume builders (no parameters)
+
+**New modules — train from scratch at full LR:**
+- `DepthEncoder`
+- `DepthFusionModule`
+- `depth_sigma`, `depth_prior_scale`
+- `DepthInitBlend`
+
+**Existing modules — fine-tune at 0.1× base LR:**
+- `self.feature.deconv32_16`, `deconv16_8`, `deconv8_4`, `conv4`
+- `self.cost_agg` (hourglass, all layers)
+- `self.update_block` (GRU)
+- `self.cnet`, `self.context_zqr_convs`
+- `self.classifier`
+
+**Loss:** standard sequence disparity loss (smooth L1 on all GRU iteration outputs, same as original training). No changes needed.
+
+**Dataset requirement:** Triplets of `(left_RGB, right_RGB, RealSense_depth, GT_disparity)`. GT disparity can come from LiDAR accumulation, structured-light ground truth at close range, or synthetic renders with simulated RealSense noise (holes at edges and reflective surfaces).
+
+---
+
+## 8. Depth Hole Handling Summary
+
+Invalid pixels (holes, out-of-range readings) are handled consistently at each stage:
+
+| Stage | Invalid pixel behavior |
+|---|---|
+| DepthEncoder | Features multiplied by downsampled validity mask → near-zero output |
+| DepthFusionModule | 1×1 conv learns to ignore near-zero depth features |
+| Stage 2 prior bias | `prior_bias * validity_14` → flat (zero additive bias) for invalid pixels |
+| Stage 3 blend | `blend_w * validity_14` → blend_w=0, falls back to stereo init |
diff --git a/docs/superpowers/specs/2026-05-04-depth-fusion-v2-design.md b/docs/superpowers/specs/2026-05-04-depth-fusion-v2-design.md
new file mode 100644
index 0000000..9131e9c
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-04-depth-fusion-v2-design.md
@@ -0,0 +1,161 @@
+# RealSense Depth Fusion v2 — Design Spec
+
+**Date:** 2026-05-04  
+**Status:** Approved for implementation  
+**Goal:** Fuse RealSense depth into FastFoundationStereo without DepthEncoder or DepthFusionModule; fully freeze feature extraction; add a new post-GRU full-resolution output blend stage.
+
+---
+
+## 1. Overview
+
+v1 (`finetune_inbolt_depthrs.py`) performed feature-level fusion via a learned depth encoder and per-scale residual projections. Despite fixing all initialization bugs and unit errors, the 13-sample Inbolt dataset was too small for the new modules to learn from scratch, and the model matched but did not beat the stereo fine-tuned baseline.
+
+v2 removes the heavy early-fusion modules entirely and instead acts only at the **cost volume** and **disparity output** stages, where the depth prior carries direct geometric meaning with no learned feature extraction required.
+
+**Forward signature (unchanged externally):**
+```python
+def forward(self, image1, image2, depth_rs_mm=None, iters=12, test_mode=False, ...)
+```
+
+When `depth_rs_mm=None`, all depth stages are skipped and the model behaves identically to the wrapped pretrained stereo model.
+
+---
+
+## 2. Architecture Diagram
+
+```
+INPUTS
+  left_IR     (B,3,H,W) ──────────────────────────────────────────────────────┐
+  right_IR    (B,3,H,W) ──────────────────────────────────────────────────────┤
+  depth_rs_mm (B,1,H,W) ──────────────────────────────────────────────────────┤
+                                                                                │
+FEATURE EXTRACTION  [FULLY FROZEN]                                             │
+  left+right → Feature (EdgeNeXt backbone + FPN decoder)                      │
+  → feat_left[x4,x8,x16,x32], feat_right[x4,x8,x16,x32]                     │
+  → stem_2x   (for convex upsampling)                                         │
+                                                                                │
+DEPTH PREPROCESSING  [no learned params]                                       │
+  validity M = (depth_rs_mm > 0) & isfinite(depth_rs_mm)                      │
+  depth_rs_m = depth_rs_mm / 1000.0 · M                                       │
+  disp_prior = where(M, focal·baseline_m / depth_rs_m, 0)  ← full-res px     │
+  disp_prior_14 = F.interpolate(disp_prior / 4, H/4, W/4, 'nearest')         │
+  validity_14   = F.interpolate(M, H/4, W/4, 'nearest')                       │
+                                                                                │
+COST VOLUME  [fine-tune 0.1× LR]                                               │
+  feat_left[0] + feat_right[0]                                                 │
+       ├── GWC volume                                                           │
+       └── Concat volume                                                        │
+               │                                                                │
+          corr_stem → corr_feature_att → hourglass → logits (B,D,H/4,W/4)    │
+                                                                                │
+STAGE 2 — GAUSSIAN LOGIT PRIOR  [learnable σ, α — full LR]                   │
+  d_idx = arange(D).view(1,D,1,1)                                              │
+  prior_bias = −½·((d_idx − disp_prior_14) / σ.abs().clamp(0.1))²            │
+  prior_bias = prior_bias · validity_14                                         │
+  logits' = logits + α · prior_bias                                            │
+  → prob = softmax(logits')                                                     │
+  → stereo_init = disparity_regression(prob, max_disp//4)   (B,1,H/4,W/4)    │
+                                                                                │
+STAGE 3a — GRU INIT BLEND  [DepthInitBlend ~5K — full LR]                    │
+  x = cat(stereo_init, disp_prior_14, validity_14)    (B,3,H/4,W/4)          │
+  blend_w = sigmoid(3-layer CNN(x)) · validity_14    [bias=-5 at init → 0]   │
+  init_disp = blend_w·disp_prior_14 + (1−blend_w)·stereo_init                │
+                                                                                │
+CONTEXT NETWORK + GRU ITERATIONS  [fine-tune 0.1× LR]                         │
+  cnet(feat_left) → net_list, inp_list, att                                    │
+  for itr in 0..iters-1:                                                       │
+      geo_feat = Geo_Encoding_Volume(disp, coords)                             │
+      Δdisp = update_block(net_list, inp_list, geo_feat, disp, att)           │
+      disp += Δdisp                                                             │
+      disp_up = upsample_disp(disp, mask_feat_4, stem_2x)     (B,1,H,W)      │
+                                                                                │
+STAGE 3b — OUTPUT BLEND  [DepthOutputBlend ~8K — full LR]  ← NEW            ←┘
+  for each disp_up in disp_preds:
+      x = cat(disp_up, disp_prior_full, validity)     (B,3,H,W)
+      blend_w = sigmoid(3-layer CNN(x)) · validity    [bias=-5 at init → 0]
+      disp_final = disp_up + blend_w·(disp_prior_full − disp_up)
+
+OUTPUT: disp_final (B,1,H,W)
+```
+
+---
+
+## 3. New Modules
+
+### DepthInitBlend (Stage 3a, ~5 K params)
+```
+Input:  cat(stereo_init, disp_prior_14, validity_14)  (B,3,H/4,W/4)
+Layers: BasicConv(3→16, k=3, BN+ReLU)
+        BasicConv(16→16, k=3, BN+ReLU)
+        Conv2d(16→1, k=1)                     ← weight=0, bias=−5 at init
+Output: blend_w = sigmoid(net(x)) · validity_14
+Return: blend_w·disp_prior_14 + (1−blend_w)·stereo_init
+```
+
+### DepthOutputBlend (Stage 3b, ~8 K params)  — NEW
+```
+Input:  cat(disp_stereo, disp_prior_full, validity)  (B,3,H,W)
+Layers: BasicConv(3→32, k=3, BN+ReLU)
+        BasicConv(32→32, k=3, BN+ReLU)
+        Conv2d(32→1, k=1)                     ← weight=0, bias=−5 at init
+Output: blend_w = sigmoid(net(x)) · validity
+Return: disp_stereo + blend_w·(disp_prior_full − disp_stereo)   [residual]
+```
+
+Both modules start with near-zero blend weight (sigmoid(−5) ≈ 0.007), so the model is numerically identical to the wrapped pretrained stereo model at epoch 0. Depth fusion is learned gradually.
+
+Applied in **training mode** to every GRU iteration prediction (8 predictions per step → 8 sequence-loss terms, all passing through Stage 3b). Applied in **test mode** only to the final upsampled disparity.
+
+---
+
+## 4. Depth Preprocessing
+
+All depth-prior computations use a consistent unit system:
+
+```python
+depth_rs_m  = depth_rs_mm / 1000.0 · validity         # metres, invalid → 0
+disp_prior  = where(validity, focal_px·baseline_m / depth_rs_m.clamp(1e-3), 0)
+              # focal(px) · baseline(m) / depth(m) = pixels
+```
+
+No separate normalization step or `DEPTH_NORM_M` constant needed (no DepthEncoder).
+
+---
+
+## 5. Parameter Groups
+
+| Group | Modules | LR |
+|---|---|---|
+| Frozen (no grad) | `stereo.feature` (all submodules), `stereo.stem_2` | 0 |
+| New — full LR | `depth_init_blend`, `depth_output_blend`, `depth_sigma`, `depth_prior_scale` | 2e-5 |
+| Fine-tune — 0.1× LR | All remaining trainable stereo params | 2e-6 |
+
+New learnable params: `depth_sigma` (σ, 4.0), `depth_prior_scale` (α, 0.1), plus ~13 K CNN params.  
+Fully frozen: entire `Feature` module (backbone + FPN decoder) + `stem_2`.
+
+---
+
+## 6. Zero-Init Guarantee
+
+At initialization, with all new module weights zero and biases −5:
+
+- **Stage 2**: `α = 0.1` → small Gaussian prior added to logits. The prior is correctly centered at the depth-derived disparity and has radius σ=4 (in 1/4-scale pixels ≈ 16 full-scale pixels), giving a gentle push toward the depth estimate without overriding the cost volume.
+- **Stage 3a**: `blend_w ≈ 0.007` → `init_disp ≈ stereo_init`. GRU warm start is unchanged.
+- **Stage 3b**: `blend_w ≈ 0.007` → `disp_final ≈ disp_up`. Output is unchanged.
+
+Epoch 1 training loss should be < 1.0 (same order as the fine-tuned stereo baseline).
+
+---
+
+## 7. Key Differences from v1
+
+| | v1 | v2 |
+|---|---|---|
+| DepthEncoder | ✓ (4-scale CNN) | ✗ removed |
+| DepthFusionModule | ✓ (residual proj) | ✗ removed |
+| Feature extraction frozen | stem+stages only | entire Feature + stem_2 |
+| Stage 2 logit prior | ✓ | ✓ |
+| Stage 3a GRU init blend | ✓ | ✓ |
+| Stage 3b output blend | ✗ | ✓ NEW |
+| New params | ~288 K | ~13 K |
+| Fine-tune params | ~12.4 M | ~11.4 M |
diff --git a/model_architecture_report.html b/model_architecture_report.html
new file mode 100644
index 0000000..3b1aa11
--- /dev/null
+++ b/model_architecture_report.html
@@ -0,0 +1,917 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>FastFoundationStereo — Architecture Report</title>
+  <style>
+    :root {
+      --bg:       #0f1117;
+      --surface:  #1a1d27;
+      --surface2: #22263a;
+      --border:   #2e3450;
+      --accent:   #4f7cff;
+      --accent2:  #7c4fff;
+      --green:    #3ecf8e;
+      --yellow:   #f5a623;
+      --red:      #ff5c5c;
+      --text:     #e2e8f0;
+      --muted:    #8892a4;
+      --code-bg:  #111827;
+    }
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body {
+      background: var(--bg);
+      color: var(--text);
+      font-family: 'Segoe UI', system-ui, sans-serif;
+      font-size: 15px;
+      line-height: 1.7;
+    }
+
+    /* ── Header ── */
+    header {
+      background: linear-gradient(135deg, #1a1d27 0%, #0d1b4b 100%);
+      border-bottom: 1px solid var(--border);
+      padding: 48px 40px 36px;
+    }
+    header h1 {
+      font-size: 2.2rem;
+      font-weight: 700;
+      background: linear-gradient(90deg, #6ab0ff, #a78bfa);
+      -webkit-background-clip: text;
+      -webkit-text-fill-color: transparent;
+      background-clip: text;
+    }
+    header .subtitle {
+      color: var(--muted);
+      margin-top: 6px;
+      font-size: 0.95rem;
+    }
+    .badges {
+      display: flex;
+      gap: 10px;
+      margin-top: 18px;
+      flex-wrap: wrap;
+    }
+    .badge {
+      padding: 4px 12px;
+      border-radius: 20px;
+      font-size: 0.78rem;
+      font-weight: 600;
+      border: 1px solid;
+    }
+    .badge-blue  { background: #1e3a8a22; border-color: #3b82f6; color: #93c5fd; }
+    .badge-green { background: #06402022; border-color: #22c55e; color: #86efac; }
+    .badge-purple{ background: #4c1d9522; border-color: #a78bfa; color: #c4b5fd; }
+
+    /* ── Layout ── */
+    main { max-width: 1100px; margin: 0 auto; padding: 40px 32px 80px; }
+
+    /* ── Section headings ── */
+    .section-title {
+      font-size: 1.25rem;
+      font-weight: 700;
+      color: var(--text);
+      margin: 48px 0 20px;
+      padding-bottom: 10px;
+      border-bottom: 1px solid var(--border);
+      display: flex;
+      align-items: center;
+      gap: 10px;
+    }
+    .section-title .icon {
+      width: 28px; height: 28px;
+      background: linear-gradient(135deg, var(--accent), var(--accent2));
+      border-radius: 8px;
+      display: flex; align-items: center; justify-content: center;
+      font-size: 14px;
+    }
+
+    /* ── Pipeline diagram ── */
+    .pipeline {
+      display: flex;
+      flex-direction: column;
+      gap: 0;
+    }
+    .pipe-step {
+      display: flex;
+      align-items: stretch;
+      gap: 0;
+    }
+    .pipe-connector {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      width: 48px;
+      flex-shrink: 0;
+    }
+    .pipe-dot {
+      width: 14px; height: 14px;
+      border-radius: 50%;
+      background: var(--accent);
+      border: 2px solid var(--bg);
+      flex-shrink: 0;
+      margin-top: 22px;
+    }
+    .pipe-line {
+      width: 2px;
+      background: linear-gradient(to bottom, var(--accent), var(--accent2));
+      flex: 1;
+      min-height: 20px;
+    }
+    .pipe-line.last { background: transparent; }
+    .pipe-card {
+      flex: 1;
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 12px;
+      padding: 18px 22px;
+      margin: 6px 0;
+      transition: border-color 0.2s;
+    }
+    .pipe-card:hover { border-color: var(--accent); }
+    .pipe-card.highlight { border-color: var(--red); background: #1a0f0f; }
+    .pipe-header {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      gap: 12px;
+      flex-wrap: wrap;
+    }
+    .pipe-num {
+      font-size: 0.72rem;
+      color: var(--muted);
+      font-weight: 600;
+      letter-spacing: 0.08em;
+      text-transform: uppercase;
+    }
+    .pipe-name {
+      font-size: 1rem;
+      font-weight: 700;
+      color: var(--text);
+      margin-top: 2px;
+    }
+    .pipe-file {
+      font-size: 0.78rem;
+      color: var(--muted);
+      font-family: 'Cascadia Code', 'Fira Code', monospace;
+      background: var(--code-bg);
+      padding: 2px 8px;
+      border-radius: 4px;
+      border: 1px solid var(--border);
+    }
+    .pipe-desc {
+      color: var(--muted);
+      font-size: 0.88rem;
+      margin-top: 10px;
+    }
+    .pipe-shapes {
+      display: flex;
+      gap: 6px;
+      margin-top: 12px;
+      align-items: center;
+      flex-wrap: wrap;
+    }
+    .shape-tag {
+      font-family: 'Cascadia Code', 'Fira Code', monospace;
+      font-size: 0.78rem;
+      padding: 3px 10px;
+      border-radius: 6px;
+      background: var(--code-bg);
+      border: 1px solid var(--border);
+      color: #7dd3fc;
+    }
+    .arrow-tag { color: var(--muted); font-size: 0.85rem; }
+    .pipe-cost {
+      margin-top: 14px;
+    }
+    .cost-label {
+      font-size: 0.75rem;
+      color: var(--muted);
+      margin-bottom: 5px;
+      font-weight: 600;
+    }
+    .cost-bar-wrap {
+      background: var(--code-bg);
+      border-radius: 6px;
+      height: 8px;
+      width: 100%;
+      overflow: hidden;
+    }
+    .cost-bar {
+      height: 100%;
+      border-radius: 6px;
+      transition: width 1s ease;
+    }
+    .cost-pct {
+      font-size: 0.78rem;
+      color: var(--muted);
+      margin-top: 4px;
+    }
+    .cost-high   { background: linear-gradient(90deg, #ef4444, #f97316); }
+    .cost-med    { background: linear-gradient(90deg, #f59e0b, #fbbf24); }
+    .cost-low    { background: linear-gradient(90deg, #3b82f6, #60a5fa); }
+    .cost-tiny   { background: linear-gradient(90deg, #6b7280, #9ca3af); }
+
+    /* ── Sub-blocks inside a card ── */
+    .sub-blocks {
+      margin-top: 14px;
+      display: flex;
+      flex-direction: column;
+      gap: 6px;
+    }
+    .sub-block {
+      display: flex;
+      align-items: flex-start;
+      gap: 10px;
+      padding: 8px 12px;
+      background: var(--code-bg);
+      border-radius: 8px;
+      border: 1px solid var(--border);
+      font-size: 0.83rem;
+    }
+    .sub-block-icon { font-size: 1rem; flex-shrink: 0; margin-top: 1px; }
+    .sub-block-text { color: var(--muted); line-height: 1.5; }
+    .sub-block-text strong { color: var(--text); }
+
+    /* ── Hourglass diagram ── */
+    .hourglass-diagram {
+      background: var(--code-bg);
+      border: 1px solid var(--border);
+      border-radius: 10px;
+      padding: 18px 20px;
+      font-family: 'Cascadia Code', 'Fira Code', monospace;
+      font-size: 0.8rem;
+      color: #a5b4fc;
+      line-height: 1.8;
+      overflow-x: auto;
+      white-space: pre;
+    }
+
+    /* ── Performance table ── */
+    .perf-table-wrap { overflow-x: auto; }
+    table {
+      width: 100%;
+      border-collapse: collapse;
+      font-size: 0.875rem;
+    }
+    thead tr {
+      background: var(--surface2);
+      text-transform: uppercase;
+      font-size: 0.72rem;
+      letter-spacing: 0.08em;
+      color: var(--muted);
+    }
+    th { padding: 10px 16px; text-align: left; font-weight: 600; }
+    td { padding: 11px 16px; border-top: 1px solid var(--border); vertical-align: middle; }
+    tr:hover td { background: var(--surface2); }
+    .rank-1 td:first-child { color: var(--red); font-weight: 700; }
+    .rank-2 td:first-child { color: var(--yellow); font-weight: 700; }
+    .rank-3 td:first-child { color: #60a5fa; font-weight: 700; }
+    .mini-bar-wrap {
+      width: 120px;
+      background: var(--code-bg);
+      border-radius: 4px;
+      height: 7px;
+      overflow: hidden;
+      display: inline-block;
+      vertical-align: middle;
+    }
+    .mini-bar { height: 100%; border-radius: 4px; }
+
+    /* ── Info box ── */
+    .info-box {
+      background: #0d2040;
+      border: 1px solid #1d4ed8;
+      border-left: 4px solid #3b82f6;
+      border-radius: 8px;
+      padding: 16px 20px;
+      margin: 24px 0;
+      font-size: 0.9rem;
+    }
+    .warn-box {
+      background: #1a0f00;
+      border: 1px solid #92400e;
+      border-left: 4px solid var(--yellow);
+      border-radius: 8px;
+      padding: 16px 20px;
+      margin: 24px 0;
+      font-size: 0.9rem;
+    }
+    .warn-box .warn-title, .info-box .info-title {
+      font-weight: 700;
+      margin-bottom: 6px;
+      font-size: 0.95rem;
+    }
+    .warn-box .warn-title { color: var(--yellow); }
+    .info-box .info-title { color: #60a5fa; }
+
+    /* ── Param count grid ── */
+    .stat-grid {
+      display: grid;
+      grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+      gap: 14px;
+      margin: 20px 0;
+    }
+    .stat-card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 10px;
+      padding: 18px;
+      text-align: center;
+    }
+    .stat-val {
+      font-size: 1.8rem;
+      font-weight: 800;
+      background: linear-gradient(90deg, var(--accent), var(--accent2));
+      -webkit-background-clip: text;
+      -webkit-text-fill-color: transparent;
+      background-clip: text;
+    }
+    .stat-label { font-size: 0.8rem; color: var(--muted); margin-top: 4px; }
+
+    /* ── Footer ── */
+    footer {
+      border-top: 1px solid var(--border);
+      padding: 24px 40px;
+      color: var(--muted);
+      font-size: 0.82rem;
+      text-align: center;
+    }
+
+    code {
+      font-family: 'Cascadia Code', 'Fira Code', monospace;
+      background: var(--code-bg);
+      padding: 1px 6px;
+      border-radius: 4px;
+      font-size: 0.85em;
+      color: #7dd3fc;
+    }
+  </style>
+</head>
+<body>
+
+<header>
+  <h1>FastFoundationStereo</h1>
+  <div class="subtitle">Neural Network Architecture &amp; Processing Cost Report</div>
+  <div class="badges">
+    <span class="badge badge-blue">Stereo Depth Estimation</span>
+    <span class="badge badge-green">EdgeNeXt Backbone</span>
+    <span class="badge badge-purple">GRU Iterative Refinement</span>
+    <span class="badge badge-blue">3D Cost Aggregation</span>
+    <span class="badge badge-purple">Transformer Attention</span>
+  </div>
+</header>
+
+<main>
+
+  <!-- Overview stats -->
+  <div class="section-title">
+    <div class="icon">📊</div>
+    Model Overview
+  </div>
+
+  <div class="stat-grid">
+    <div class="stat-card">
+      <div class="stat-val">~23M</div>
+      <div class="stat-label">Total Parameters</div>
+    </div>
+    <div class="stat-card">
+      <div class="stat-val">480×640</div>
+      <div class="stat-label">Typical Input Resolution</div>
+    </div>
+    <div class="stat-card">
+      <div class="stat-val">192</div>
+      <div class="stat-label">Max Disparity (pixels)</div>
+    </div>
+    <div class="stat-card">
+      <div class="stat-val">8–12</div>
+      <div class="stat-label">GRU Refinement Iterations</div>
+    </div>
+    <div class="stat-card">
+      <div class="stat-val">1/4</div>
+      <div class="stat-label">Internal Resolution Scale</div>
+    </div>
+    <div class="stat-card">
+      <div class="stat-val">fp16</div>
+      <div class="stat-label">AMP Inference Dtype</div>
+    </div>
+  </div>
+
+  <div class="info-box">
+    <div class="info-title">🔁 Pipeline Summary</div>
+    FastFoundationStereo follows a classic stereo matching flow: <strong>feature extraction → cost volume → cost aggregation → iterative GRU refinement → upsampling</strong>.
+    The key design choices are a <em>3D hourglass</em> with per-scale image feature guidance for cost aggregation, and a <em>Selective ConvGRU</em> that blends small and large receptive fields based on a spatial attention map.
+  </div>
+
+  <!-- Pipeline -->
+  <div class="section-title">
+    <div class="icon">⛓</div>
+    Block-by-Block Pipeline
+  </div>
+
+  <div class="pipeline">
+
+    <!-- Block 1 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 1</div>
+            <div class="pipe-name">Feature Extraction</div>
+          </div>
+          <span class="pipe-file">core/extractor.py · Feature</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">2 × (B, 3, H, W)</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">(B, 128, H/4, W/4)</span>
+          <span class="shape-tag">(B, 192, H/8, W/8)</span>
+          <span class="shape-tag">(B, 320, H/16, W/16)</span>
+          <span class="shape-tag">(B, 304, H/32, W/32)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🧠</span>
+            <span class="sub-block-text"><strong>EdgeNeXt-small backbone (pretrained)</strong> — 4-stage hierarchical encoder producing multi-scale features at strides 4, 8, 16, 32. Channel widths: 48 → 96 → 160 → 304.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🔺</span>
+            <span class="sub-block-text"><strong>FPN-style deconvolution</strong> — deconv32→16, deconv16→8, deconv8→4 with lateral skip connections. Final Conv1×1 projects to 128ch at stride-4 (includes ViT feature dim offset).</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">⚡</span>
+            <span class="sub-block-text"><strong>Efficiency trick</strong> — left and right images concatenated into one batch <code>(2B, 3, H, W)</code> for a single forward pass through the shared backbone, then split.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-med" style="width:13%"></div></div>
+          <div class="cost-pct">~10–15%</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 2 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 2</div>
+            <div class="pipe-name">Cost Volume Construction</div>
+          </div>
+          <span class="pipe-file">core/submodule.py · build_gwc / build_concat</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">(B, 128, H/4, W/4) × 2</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">(B, 32, D/4, H/4, W/4)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">📐</span>
+            <span class="sub-block-text"><strong>GWC Volume</strong> (Group-wise Correlation) — features split into 8 groups, dot-product correlation for every disparity shift <code>d=0..D/4-1</code>. Shape: <code>(B, 8, D/4, H/4, W/4)</code>. Optimised via PyTorch1 or Triton kernel.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🧩</span>
+            <span class="sub-block-text"><strong>Concat Volume</strong> — <code>proj_cmb</code> (Conv1×1: 128→12ch) projects each side, then left/right feature pairs concatenated for each shift. Shape: <code>(B, 24, D/4, H/4, W/4)</code>.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🔗</span>
+            <span class="sub-block-text"><strong>Combined</strong> — channel-wise cat → <code>(B, 32, 48, H/4, W/4)</code>. At 480×640 this tensor holds ~88M values — the single largest allocation.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-med" style="width:13%"></div></div>
+          <div class="cost-pct">~10–15%</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 3 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 3</div>
+            <div class="pipe-name">Cost Volume Stem</div>
+          </div>
+          <span class="pipe-file">core/foundation_stereo.py · corr_stem + corr_feature_att</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">(B, 32, 48, H/4, W/4)</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">(B, 28, 48, H/4, W/4)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🔧</span>
+            <span class="sub-block-text"><strong>corr_stem</strong> — Conv3d 1×1: 32→28ch, then BasicConv 3D 3×3×3, then 2 × ResnetBasicBlock3D (28ch). Projects and refines the raw cost volume.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">👁</span>
+            <span class="sub-block-text"><strong>FeatureAtt</strong> — squeeze-excite style: global-avg-pool the 2D image features (128ch), FC → scale factors applied to cost volume channels. Modulates cost volume by image content at stride-4.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-low" style="width:3%"></div></div>
+          <div class="cost-pct">~3%</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 4 — HIGHLIGHT -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot" style="background:var(--red);"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card highlight">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 4 — ⚠ Bottleneck</div>
+            <div class="pipe-name">Hourglass Cost Aggregation</div>
+          </div>
+          <span class="pipe-file">core/foundation_stereo.py · hourglass</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">(B, 28, 48, H/4, W/4)</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">(B, 28, 48, H/4, W/4)</span>
+        </div>
+        <div class="hourglass-diagram">
+ Input  (28ch, D/4, H/4, W/4)
+   │
+   ├─[Encoder]──────────────────────────────────────────────
+   │  conv1: stride-2 3D + Conv3dNormActReduced  →  56ch  D/8  H/8  W/8
+   │    └─ FeatureAtt_8  (guided by x8 img feats, 192ch)
+   │  conv2: stride-2 3D + Conv3dNormActReduced  →  112ch D/16 H/16 W/16
+   │    └─ FeatureAtt_16 (guided by x16 img feats, 320ch)
+   │  conv3: stride-2 3D + Conv3dNormActReduced  →  168ch D/32 H/32 W/32
+   │    └─ FeatureAtt_32 (guided by x32 img feats, 304ch)
+   │
+   ├─[Bottleneck Attention]─────────────────────────────────
+   │  conv_patch: 4×4×4 strided → (28ch, D/64, H/64, W/64)
+   │  CostVolumeDisparityAttention:
+   │    4-head Transformer × 4 layers  (in disparity dimension)
+   │    + sinusoidal positional embeddings
+   │  interpolate ×4 → (28ch, D/4, H/4, W/4)  [residual add]
+   │
+   ├─[Decoder]──────────────────────────────────────────────
+   │  conv3_up: deconv  →  112ch
+   │    cat with enc skip → 224ch → agg_0 (2×Conv3dNormActReduced) → 112ch
+   │    └─ FeatureAtt_up_16
+   │  conv2_up: deconv  →  56ch
+   │    cat with enc skip → 112ch → agg_1 (2×Conv3dNormActReduced) → 56ch
+   │    └─ FeatureAtt_up_8
+   │  conv1_up: deconv  →  28ch
+   │    + bottleneck residual
+   │  conv_out: 2×Conv3dNormActReduced → 28ch
+   │
+ Output (28ch, D/4, H/4, W/4)</div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-high" style="width:55%"></div></div>
+          <div class="cost-pct">~50–60% — dominant bottleneck due to 3D conv over 4D volume at multiple scales</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 5 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 5</div>
+            <div class="pipe-name">Classifier + Initial Disparity</div>
+          </div>
+          <span class="pipe-file">core/foundation_stereo.py · classifier</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">(B, 28, 48, H/4, W/4)</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">(B, 1, H/4, W/4)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🎯</span>
+            <span class="sub-block-text"><strong>Classifier</strong> — BasicConv 3D: 28→14ch, ResnetBasicBlock3D, Conv3d 7×7×7: 14→1ch → logits <code>(B, 48, H/4, W/4)</code>.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">📉</span>
+            <span class="sub-block-text"><strong>Soft-argmax</strong> — softmax over disparity dim, weighted sum <code>Σ p(d) · d</code> → smooth initial disparity estimate (used as seed for GRU iterations).</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-low" style="width:2%"></div></div>
+          <div class="cost-pct">~2%</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 6 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 6</div>
+            <div class="pipe-name">Context Network + Attention</div>
+          </div>
+          <span class="pipe-file">core/extractor.py · ContextNetSharedBackbone</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">(B, 128, H/4, W/4)</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">net (128ch) + inp (128ch)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🧵</span>
+            <span class="sub-block-text"><strong>ContextNet</strong> — two parallel Conv2d 3×3 projections on <code>x4</code>: one for hidden state <code>net</code> (tanh), one for context <code>inp</code> (ReLU). Uses the same left features already computed in Block 1.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🔍</span>
+            <span class="sub-block-text"><strong>CAM</strong> (Channel Attention Enhancement) — squeeze-excite on <code>inp</code>. <strong>SAM</strong> (Spatial Attention Extractor) — produces spatial attention map used by SelectiveConvGRU. Both are very lightweight.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-tiny" style="width:1%"></div></div>
+          <div class="cost-pct">~1%</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 7 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 7</div>
+            <div class="pipe-name">Geometry Encoding Volume</div>
+          </div>
+          <span class="pipe-file">core/geometry.py · Combined_Geo_Encoding_Volume</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">features_left/right[0] + cost vol</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">2-level pyramid (sampled per iter)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🗺</span>
+            <span class="sub-block-text"><strong>Full correlation map</strong> — einsum dot-product between all left×right feature positions: <code>(B, H/4, W/4, 1, W/4)</code>. Normalised L2. Built once.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🔭</span>
+            <span class="sub-block-text"><strong>2-level pyramid</strong> — cost volume and correlation map both avg-pooled in disparity dim × 2 for coarse/fine sampling. At each GRU iter, 1D bilinear sampling at <code>disp ± corr_radius=4</code> → 9 samples × 2 levels × 2 sources = feature vector.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share (init + 8 lookups)</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-low" style="width:7%"></div></div>
+          <div class="cost-pct">~5–8%</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 8 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot" style="background:var(--yellow);"></div>
+        <div class="pipe-line"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 8 — Iterative (×8 default)</div>
+            <div class="pipe-name">Selective GRU Update Block</div>
+          </div>
+          <span class="pipe-file">core/update.py · BasicSelectiveMultiUpdateBlock</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">net (128ch) + inp (128ch) + geo_feat</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">Δdisp (1ch) + mask (32ch)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🏃</span>
+            <span class="sub-block-text"><strong>BasicMotionEncoder</strong> — corr branch: Conv2d 1×1 + 3×3 → 256ch. Disp branch: Conv2d 7×7 + 3×3 → 64ch. Fuse: Conv2d 1×1 → 127ch + concat raw disp = 128ch motion features.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🔀</span>
+            <span class="sub-block-text"><strong>SelectiveConvGRU</strong> — blends two ConvGRU cells: <em>small_gru</em> (1×1 conv, narrow RF) and <em>large_gru</em> (3×3 conv, wider RF), mixed by spatial attention <code>att</code>. Adaptive receptive field per pixel.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">📏</span>
+            <span class="sub-block-text"><strong>DispHead</strong> — Conv2d + ReLU, then 2 × EdgeNextConvEncoder (DWConv 7×7, expansion×4), Conv2d → Δdisp (1ch). Predicts the additive disparity update.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🧮</span>
+            <span class="sub-block-text"><strong>Mask head</strong> — 2 × Conv2d → 32ch. Produces per-pixel upsampling weights used in Block 9.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share (×8 iters total)</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-med" style="width:13%"></div></div>
+          <div class="cost-pct">~10–15% total (cheap per iter, accumulates over iterations)</div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Block 9 -->
+    <div class="pipe-step">
+      <div class="pipe-connector">
+        <div class="pipe-dot"></div>
+        <div class="pipe-line last"></div>
+      </div>
+      <div class="pipe-card">
+        <div class="pipe-header">
+          <div>
+            <div class="pipe-num">Block 9</div>
+            <div class="pipe-name">Disparity Upsampling</div>
+          </div>
+          <span class="pipe-file">core/foundation_stereo.py · upsample_disp</span>
+        </div>
+        <div class="pipe-shapes">
+          <span class="shape-tag">(B, 1, H/4, W/4)</span>
+          <span class="arrow-tag">→</span>
+          <span class="shape-tag">(B, 1, H, W)</span>
+        </div>
+        <div class="sub-blocks">
+          <div class="sub-block">
+            <span class="sub-block-icon">🔼</span>
+            <span class="sub-block-text"><strong>stem_2x</strong> — a 2-layer Conv2d branch (<code>stem_2</code>) produces 32ch features at H/2 × W/2 directly from the input image. Gives high-resolution texture cues.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">⬆</span>
+            <span class="sub-block-text"><strong>Conv2x deconv</strong> (<code>spx_2_gru</code>) — upsample mask_feat_4 (32ch, H/4) with stem_2x (32ch, H/2) → 64ch at H/2. Then ConvTranspose2d → 9ch at full resolution.</span>
+          </div>
+          <div class="sub-block">
+            <span class="sub-block-icon">🎨</span>
+            <span class="sub-block-text"><strong>context_upsample</strong> — the 9 channels are per-pixel soft weights for a 3×3 neighbourhood of the disparity map. Weighted sum produces sharp, edge-aware upsampled disparity at full H × W.</span>
+          </div>
+        </div>
+        <div class="pipe-cost">
+          <div class="cost-label">Estimated compute share</div>
+          <div class="cost-bar-wrap"><div class="cost-bar cost-tiny" style="width:1%"></div></div>
+          <div class="cost-pct">~1%</div>
+        </div>
+      </div>
+    </div>
+
+  </div><!-- /pipeline -->
+
+  <!-- Performance table -->
+  <div class="section-title">
+    <div class="icon">⏱</div>
+    Relative Processing Cost (480×640, max_disp=192, iters=8)
+  </div>
+
+  <div class="warn-box">
+    <div class="warn-title">⚠ Note on measurements</div>
+    The training job <code>finetune_faro.py</code> is currently occupying the GPU (~14.9 GiB / 16 GiB), preventing live profiling.
+    Estimates below are derived from code analysis (op counts, tensor sizes, kernel types) and the partial CUDA kernel profiler output obtained before OOM.
+  </div>
+
+  <div class="perf-table-wrap">
+    <table>
+      <thead>
+        <tr>
+          <th>#</th>
+          <th>Block</th>
+          <th>Type</th>
+          <th>Calls/forward</th>
+          <th>Relative cost</th>
+          <th>Visual</th>
+        </tr>
+      </thead>
+      <tbody>
+        <tr class="rank-1">
+          <td>4</td>
+          <td>Hourglass Cost Aggregation</td>
+          <td>3D Conv enc-dec + Transformer</td>
+          <td>1×</td>
+          <td>50–60%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-high" style="width:100%"></div></div></td>
+        </tr>
+        <tr class="rank-2">
+          <td>1</td>
+          <td>Feature Extraction</td>
+          <td>2D Conv (EdgeNeXt + FPN)</td>
+          <td>1× (both images)</td>
+          <td>10–15%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-med" style="width:25%"></div></div></td>
+        </tr>
+        <tr class="rank-2">
+          <td>2</td>
+          <td>Cost Volume Construction</td>
+          <td>GWC + concat, O(D·H·W)</td>
+          <td>1×</td>
+          <td>10–15%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-med" style="width:25%"></div></div></td>
+        </tr>
+        <tr class="rank-2">
+          <td>8</td>
+          <td>GRU Update Block</td>
+          <td>2D Conv + ConvGRU</td>
+          <td>8× (one per iter)</td>
+          <td>10–15%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-med" style="width:25%"></div></div></td>
+        </tr>
+        <tr class="rank-3">
+          <td>7</td>
+          <td>Geo Encoding Volume lookup</td>
+          <td>Bilinear sampling</td>
+          <td>8× (one per iter)</td>
+          <td>5–8%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-low" style="width:13%"></div></div></td>
+        </tr>
+        <tr>
+          <td>3</td>
+          <td>Cost Volume Stem</td>
+          <td>3D Conv + ResNet</td>
+          <td>1×</td>
+          <td>~3%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-low" style="width:6%"></div></div></td>
+        </tr>
+        <tr>
+          <td>5</td>
+          <td>Classifier + Soft-argmax</td>
+          <td>3D Conv (small)</td>
+          <td>1×</td>
+          <td>~2%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-tiny" style="width:4%"></div></div></td>
+        </tr>
+        <tr>
+          <td>6</td>
+          <td>Context Network + CAM/SAM</td>
+          <td>2D Conv (tiny)</td>
+          <td>1×</td>
+          <td>~1%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-tiny" style="width:2%"></div></div></td>
+        </tr>
+        <tr>
+          <td>9</td>
+          <td>Disparity Upsampling</td>
+          <td>2D ConvTranspose + softmax</td>
+          <td>1× (test) / N× (train)</td>
+          <td>~1%</td>
+          <td><div class="mini-bar-wrap"><div class="mini-bar cost-tiny" style="width:2%"></div></div></td>
+        </tr>
+      </tbody>
+    </table>
+  </div>
+
+  <!-- Optimisation hints -->
+  <div class="section-title">
+    <div class="icon">🚀</div>
+    Optimisation Targets
+  </div>
+
+  <div class="info-box">
+    <div class="info-title">Highest-impact targets for speed improvement</div>
+    <ul style="margin-top:10px; padding-left:20px; line-height:2.0; color: var(--muted);">
+      <li><strong style="color:var(--red)">Block 4 — Hourglass (50–60%):</strong> Reduce number of hourglass levels, decrease <code>volume_dim</code> (default 28), reduce Transformer depth (<code>num_transformer=4</code>), use half-precision 3D convolutions, or replace 3D convs with 2D factorized convs.</li>
+      <li><strong style="color:var(--yellow)">Block 2 — Cost Volume (10–15%):</strong> Reduce <code>max_disp</code> or use the Triton-optimised kernel (<code>optimize_build_volume='triton'</code>). Lower <code>cv_group</code> (default 8) reduces GWC channels.</li>
+      <li><strong style="color:var(--yellow)">Block 8 — GRU Iterations (10–15%):</strong> Reduce <code>valid_iters</code> from 8 to 4–6. Quality vs. speed tradeoff. Hierarchical inference (<code>run_hierachical</code>) can help for high-res inputs.</li>
+      <li><strong style="color:#60a5fa">Block 1 — Feature Extraction (10–15%):</strong> Switch to a smaller EdgeNeXt variant or prune the FPN deconv layers. Already shared between left/right via batch trick.</li>
+    </ul>
+  </div>
+
+</main>
+
+<footer>
+  FastFoundationStereo Architecture Report — generated 2026-03-24 &nbsp;|&nbsp; Fast-FoundationStereo project
+</footer>
+
+</body>
+</html>
diff --git a/niv/Utils.py b/niv/Utils.py
new file mode 100644
index 0000000..1f12387
--- /dev/null
+++ b/niv/Utils.py
@@ -0,0 +1,88 @@
+import os, sys, torch, imageio, logging, importlib, argparse
+import cv2
+import numpy as np
+import yaml
+try:
+  import open3d as o3d
+except:
+  o3d = None
+
+AMP_DTYPE = torch.float16
+
+def set_logging_format(level=logging.INFO):
+  importlib.reload(logging)
+  FORMAT = '%(message)s'
+  logging.basicConfig(level=level, format=FORMAT, datefmt='%m-%d|%H:%M:%S')
+
+
+def set_seed(random_seed):
+  import torch,random
+  np.random.seed(random_seed)
+  random.seed(random_seed)
+  torch.manual_seed(random_seed)
+  torch.cuda.manual_seed_all(random_seed)
+  torch.backends.cudnn.deterministic = True
+  torch.backends.cudnn.benchmark = False
+
+
+def toOpen3dCloud(points,colors=None,normals=None):
+  cloud = o3d.geometry.PointCloud()
+  cloud.points = o3d.utility.Vector3dVector(points.astype(np.float64))
+  if colors is not None:
+    if colors.max()>1:
+      colors = colors/255.0
+    cloud.colors = o3d.utility.Vector3dVector(colors.astype(np.float64))
+  if normals is not None:
+    cloud.normals = o3d.utility.Vector3dVector(normals.astype(np.float64))
+  return cloud
+
+
+
+def depth2xyzmap(depth:np.ndarray, K, uvs:np.ndarray=None, zmin=0.1):
+  invalid_mask = (depth<zmin)
+  H,W = depth.shape[:2]
+  if uvs is None:
+    vs,us = np.meshgrid(np.arange(0,H),np.arange(0,W), sparse=False, indexing='ij')
+    vs = vs.reshape(-1)
+    us = us.reshape(-1)
+  else:
+    uvs = uvs.round().astype(int)
+    us = uvs[:,0]
+    vs = uvs[:,1]
+  zs = depth[vs,us]
+  xs = (us-K[0,2])*zs/K[0,0]
+  ys = (vs-K[1,2])*zs/K[1,1]
+  pts = np.stack((xs.reshape(-1),ys.reshape(-1),zs.reshape(-1)), 1)  #(N,3)
+  xyz_map = np.zeros((H,W,3), dtype=np.float32)
+  xyz_map[vs,us] = pts
+  if invalid_mask.any():
+    xyz_map[invalid_mask] = 0
+  return xyz_map
+
+
+def vis_disparity(disp, min_val=None, max_val=None, invalid_thres=np.inf, color_map=cv2.COLORMAP_TURBO, cmap=None, other_output={}):
+  """
+  @disp: np array (H,W)
+  @invalid_thres: > thres is invalid
+  """
+  disp = disp.copy()
+  H,W = disp.shape[:2]
+  invalid_mask = disp>=invalid_thres
+  if (invalid_mask==0).sum()==0:
+    other_output['min_val'] = None
+    other_output['max_val'] = None
+    return np.zeros((H,W,3))
+  if min_val is None:
+    min_val = disp[invalid_mask==0].min()
+  if max_val is None:
+    max_val = disp[invalid_mask==0].max()
+  other_output['min_val'] = min_val
+  other_output['max_val'] = max_val
+  vis = ((disp-min_val)/(max_val-min_val)).clip(0,1) * 255
+  if cmap is None:
+    vis = cv2.applyColorMap(vis.clip(0, 255).astype(np.uint8), color_map)[...,::-1]
+  else:
+    vis = cmap(vis.astype(np.uint8))[...,:3]*255
+  if invalid_mask.any():
+    vis[invalid_mask] = 0
+  return vis.astype(np.uint8)
diff --git a/niv/depth_prior_explainer.py b/niv/depth_prior_explainer.py
new file mode 100644
index 0000000..625bf11
--- /dev/null
+++ b/niv/depth_prior_explainer.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+"""
+D1-EdgeNeXt-FFS: RS depth prior injection points
+=================================================
+
+Shows exactly where rs_disp and conf enter the network during inference.
+Two injection points:
+  1. Sensor-gated disparity initialisation (before first GRU iteration)
+  2. GRU motion encoder — prior fed at every iteration
+
+Run:
+    python3 docs/depth_prior_explainer.py --ckpt weights/stage3_best.pt
+
+Bundled sample images are used automatically.  Pass --output-dir to save
+the predicted depth map.
+"""
+
+import argparse
+import os
+import sys
+import cv2  #matlib
+#matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+_HERE     = os.path.dirname(os.path.abspath(__file__))
+_FFS_ROOT = os.path.abspath(os.path.join(_HERE, '..'))
+if _FFS_ROOT not in sys.path:
+    sys.path.insert(0, _FFS_ROOT)
+
+from niv.smart_lite.model import build_gwc_volume
+from niv.smart_lite.ablation.d1_models import build_d1_model  
+
+device_type     = "cuda" if torch.cuda.is_available() else "cpu"
+device          = torch.device(device_type)
+
+MODEL_H, MODEL_W = 384, 512
+
+
+# =============================================================================
+# Input helpers
+# =============================================================================
+
+def depth_mm_to_prior(depth_mm, focal_px, baseline_m):
+    """(H,W) uint16 mm → rs_disp (1,1,384,512) + conf (1,1,384,512) float32."""
+    #src_h, src_w = depth_mm.shape
+    # y0 = (src_h - MODEL_H) // 2
+    # x0 = (src_w - MODEL_W) // 2
+    # crop  = depth_mm[y0:y0+MODEL_H, x0:x0+MODEL_W].astype(np.float32)
+    z_m   = depth_mm / 1000.0
+    valid = z_m > 0.0
+    rs_disp = np.where(valid, focal_px * baseline_m / np.maximum(z_m, 1e-6), 0.0).astype(np.float32)
+    conf    = valid.astype(np.float32)
+    return rs_disp[np.newaxis, np.newaxis], conf[np.newaxis, np.newaxis]
+
+
+def _load_image(path):
+    """Load image as (H,W,3) uint8 BGR.  Accepts PNG/JPG or FARO .mat (key Il/Ir)."""
+    if path.endswith(".mat"):
+        import scipy.io as sio
+        mat = sio.loadmat(path)
+        for key in ("Il", "Ir", "img", "image"):
+            if key in mat:
+                mono = mat[key].astype(np.uint8)
+                return np.stack([mono, mono, mono], axis=-1)
+        raise SystemExit(f"Unknown keys in .mat: {list(mat.keys())}")
+    import cv2
+    img = cv2.imread(path)
+    if img is None:
+        raise SystemExit(f"Cannot read image: {path}")
+    if img.ndim == 2:
+        import cv2 as _cv2
+        img = _cv2.cvtColor(img, _cv2.COLOR_GRAY2BGR)
+    return img
+
+
+def _load_depth_mm(path):
+    """Load depth as (H,W) uint16 mm.  Accepts PNG uint16 or FARO .mat (key Z_im)."""
+    if path.endswith(".mat"):
+        import scipy.io as sio
+        mat = sio.loadmat(path)
+        for key in ("Z_im", "depth_mm", "depth"):
+            if key in mat:
+                return mat[key].astype(np.uint16)
+        raise SystemExit(f"Unknown keys in .mat: {list(mat.keys())}")
+    import cv2
+    d = cv2.imread(path, cv2.IMREAD_ANYDEPTH)
+    if d is None:
+        raise SystemExit(f"Cannot read depth: {path}")
+    return d.astype(np.uint16)
+
+
+def to_tensor(bgr):
+    #y0 = (stream_h - MODEL_H) // 2
+    #x0 = (stream_w - MODEL_W) // 2
+    crop = bgr #[y0:y0+MODEL_H, x0:x0+MODEL_W]
+    rgb  = crop[..., ::-1].astype(np.float32) / 255.0
+    return torch.from_numpy(np.ascontiguousarray(rgb.transpose(2, 0, 1))).unsqueeze(0).to(device)
+
+# =============================================================================
+# Annotated forward pass — two injection points
+# =============================================================================
+
+def annotated_forward(model, left_t, right_t, rs_disp_t, conf_t, num_iters=8):
+    """
+    Runs inference and prints where rs_disp / conf affect computation.
+
+    INJECTION POINT 1 — disparity initialisation
+        Before any GRU iteration, an initial disparity estimate is formed.
+        Where RS sensor has data (conf > 0), the initial disparity is taken
+        directly from the sensor.  Where there are holes, it falls back to
+        a coarse cost-volume regression.
+
+        has_sensor = conf_q4 > 0
+        disp_init  = has_sensor * rs_q4  +  (1 - has_sensor) * coarse_disp
+
+    INJECTION POINT 2 — GRU motion encoder (every iteration)
+        At each of the num_iters GRU steps, the prior is concatenated into
+        the motion encoder alongside the current disparity and correlation:
+
+            motion = motion_encoder(current_disp, corr, prior)
+
+        Inside MotionEncoder:
+            p      = prior_net(prior)               # 2-ch → prior_w-ch
+            fused  = relu(fuse(cat([corr, disp, p])))
+            motion = cat([fused, disp], dim=1)
+
+        The prior is therefore not a one-time warm-start — it acts as a
+        persistent guidance channel at every refinement step.
+    """
+    with torch.no_grad():
+        B, _, H, W = left_t.shape
+        Hq, Wq = H // 4, W // 4
+
+        # Feature extraction and cost volume (no prior here)
+        feat_left, feat_right = model._extract(left_t, right_t)
+        
+        gwc_volume = build_gwc_volume(feat_left, feat_right,  model.max_disp_q4, model.num_groups)
+
+        # Downsample prior to GRU resolution (1/4), scale disparity accordingly
+        rs_q4   = F.interpolate(rs_disp_t, (Hq, Wq), mode='bilinear', align_corners=False) * 0.25
+        conf_q4 = F.interpolate(conf_t,    (Hq, Wq), mode='bilinear', align_corners=False)
+        prior   = torch.cat([rs_q4, conf_q4], dim=1)   # (B, 2, Hq, Wq)
+
+        # ── INJECTION POINT 1: sensor-gated disparity initialisation ──────────
+        coarse_disp = model.coarse_head(gwc_volume)
+        has_sensor  = (conf_q4 > 0).float()
+        disp_init   = has_sensor * rs_q4 + (1.0 - has_sensor) * coarse_disp
+
+        sensor_pct = has_sensor.mean().item() * 100
+        print(f"\n  [INJECTION 1] disparity initialisation")
+        print(f"    RS-seeded pixels : {sensor_pct:.1f}%   (conf > 0 → init from sensor)")
+        print(f"    cost-vol pixels  : {100-sensor_pct:.1f}%   (holes → init from coarse regression)")
+        print(f"    init disp range  : [{disp_init.min():.2f}, {disp_init.max():.2f}] px  (at 1/4 res)")
+
+        # Context features (no prior)
+        net, inp, att = model.context_net(feat_left)
+
+        # ── INJECTION POINT 2: prior injected into motion encoder every step ──
+        print(f"\n  [INJECTION 2] GRU iterations — prior in motion encoder each step")
+        print(f"    prior shape fed to motion_encoder: {tuple(prior.shape)}  (rs_q4 + conf_q4)")
+        disp       = disp_init
+        disp_preds = []
+        for i in range(num_iters):
+            disp   = disp.detach()
+            corr   = model.corr_lookup(gwc_volume, disp)
+            motion = model.motion_encoder(disp, corr, prior)   # ← prior here
+            net    = model.gru(net, torch.cat([motion, inp * att], dim=1))
+            delta  = model.disp_head(net)
+            disp   = disp + delta
+            pred   = model.upsample(net, disp)
+            disp_preds.append(pred)
+            print(f"    iter {i+1}/{num_iters}:  delta_mean={delta.abs().mean():.4f} px")
+
+        final = disp_preds[-1]
+        print(f"\n  final disparity: {tuple(final.shape)}  "
+              f"range [{final.min():.2f}, {final.max():.2f}] px")
+    return final
+
+
+# =============================================================================
+# Output saving
+# =============================================================================
+
+def _save_output(out_dir, disp, focal_px, baseline_m):
+
+
+    os.makedirs(out_dir, exist_ok=True)
+    d           = disp[0, 0].cpu().numpy()
+    valid       = d > 0.5
+    depth_m     = np.zeros_like(d)
+    depth_m[valid] = focal_px * baseline_m / np.maximum(d[valid], 1e-4)
+
+    depth_mm = (depth_m * 1000).clip(0, 65535).astype(np.uint16)
+    cv2.imwrite(os.path.join(out_dir, "depth.png"), depth_mm)
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    vmax = np.percentile(depth_m[valid], 95) if valid.any() else 5.0
+    im = ax.imshow(depth_m, cmap="plasma", vmin=0, vmax=vmax)
+    ax.set_title("D1-EdgeNeXt-FFS depth (real RS prior)", fontsize=11)
+    ax.axis("off")
+    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="depth (m)")
+    fig.tight_layout()
+    fig.savefig(os.path.join(out_dir, "depth_colorized.png"), dpi=120)
+    plt.close(fig)
+
+    print(f"\n  Saved → {out_dir}/depth.png  (uint16 mm)")
+    print(f"         → {out_dir}/depth_colorized.png")
+
+
+# =============================================================================
+# Main
+# =============================================================================
+
+def main():
+    p = argparse.ArgumentParser(
+        description="D1-EdgeNeXt-FFS: RS prior injection point explainer")
+    p.add_argument("--pt",       default = f'{_FFS_ROOT}/weights/weights_niv/stage3_best.pt',                help="Path to stage3_best.pt")
+    p.add_argument("--ffs-ckpt", default = f'{_FFS_ROOT}/weights/weights_niv/model_best_bp2_serialize.pth',  help="FFS backbone weights (default: weights/model_best_bp2_serialize.pth)")
+    p.add_argument("--device",   default="cuda" if torch.cuda.is_available() else "cpu")
+    p.add_argument("--left",     metavar="FILE", help="Left IR/RGB image (PNG or FARO .mat)")
+    p.add_argument("--right",    metavar="FILE", help="Right IR/RGB image")
+    p.add_argument("--depth",    metavar="FILE", help="Hardware depth: PNG uint16 mm or FARO .mat")
+    p.add_argument("--focal",    type=float, default=420.0, metavar="PX")
+    p.add_argument("--baseline", type=float, default=0.05,  metavar="M")
+    p.add_argument("--output-dir", default = f'{_FFS_ROOT}/demo_data_out')
+    args = p.parse_args()
+
+    #device          = torch.device(args.device)
+    model_path      = args.ffs_ckpt #or os.path.join(_FFS_ROOT, "weights", "weights_niv","model_best_bp2_serialize.pth")
+    
+    # Load model
+    model           = build_d1_model("edgenext", max_disp=192, num_iters=8, ffs_ckpt=model_path)
+    ck              = torch.load(args.pt, map_location="cpu", weights_only=False)
+    model.load_state_dict(ck.get("model", ck), strict=False)
+    model.eval().to(device)
+    total           = sum(p.numel() for p in model.parameters()) / 1e6
+    frozen          = sum(p.numel() for p in model.parameters() if not p.requires_grad) / 1e6
+    print(f"Model: {total:.1f}M params  ({frozen:.1f}M frozen backbone, {total-frozen:.1f}M trained)")
+
+    # Input data
+    if args.left and args.right:
+        left_bgr  = _load_image(args.left)
+        right_bgr = _load_image(args.right)
+        focal_px, baseline_m = args.focal, args.baseline
+        stream_h, stream_w = left_bgr.shape[:2]
+        depth_mm = _load_depth_mm(args.depth) if args.depth else np.zeros((stream_h, stream_w), dtype=np.uint16)
+        print(f"Input: {stream_w}×{stream_h}  focal={focal_px:.1f}px  baseline={baseline_m*1000:.1f}mm  "
+              f"depth valid={100*(depth_mm>0).mean():.1f}%")
+    else:
+        _sample_dir  = os.path.join(_FFS_ROOT, "demo_data")
+        _sample_l    = os.path.join(_sample_dir, "imageL_d16_000.png")
+        _sample_r    = os.path.join(_sample_dir, "imageR_d16_000.png")
+        _sample_d    = os.path.join(_sample_dir, "imageD_d16_000.png")
+        _sample_cam  = os.path.join(_sample_dir, "camera.txt")
+        if os.path.isfile(_sample_l) and os.path.isfile(_sample_r):
+            left_bgr  = _load_image(_sample_l)
+            right_bgr = _load_image(_sample_r)
+            depth_mm  = _load_depth_mm(_sample_d) if os.path.isfile(_sample_d) else np.zeros(left_bgr.shape[:2], dtype=np.uint16)
+            focal_px, baseline_m = 420.0, 0.05
+            if os.path.isfile(_sample_cam):
+                for line in open(_sample_cam):
+                    if line.startswith("focal_px="):    focal_px    = float(line.split("=")[1])
+                    elif line.startswith("baseline_m="): baseline_m = float(line.split("=")[1])
+            stream_h, stream_w = left_bgr.shape[:2]
+            if args.output_dir is None:
+                args.output_dir = os.path.join(_FFS_ROOT, "demo_data_out")
+            print(f"Using bundled sample images ({stream_w}×{stream_h})")
+        else:
+            raise SystemExit("Provide --left / --right, or ensure sample/ directory is present.")
+
+    # Depth mm → prior tensors
+    rs_disp_np, conf_np = depth_mm_to_prior(depth_mm, focal_px, baseline_m)
+    rs_disp_t = torch.from_numpy(rs_disp_np).to(device)
+    conf_t    = torch.from_numpy(conf_np).to(device)
+
+    left_t  = to_tensor(left_bgr)
+    right_t = to_tensor(right_bgr)
+
+    print("\n" + "=" * 55)
+    print("RS prior injection points")
+    #print("=" * 55)
+    final = annotated_forward(model, left_t, right_t, rs_disp_t, conf_t, num_iters=8)
+
+    if args.output_dir:
+        _save_output(args.output_dir, final, focal_px, baseline_m)
+
+
+if __name__ == "__main__":
+    #python3 docs/depth_prior_explainer.py --ckpt weights/stage3_best.pt
+    main()
diff --git a/niv/image_client.py b/niv/image_client.py
new file mode 100644
index 0000000..e0f26b5
--- /dev/null
+++ b/niv/image_client.py
@@ -0,0 +1,55 @@
+import socket
+import sys
+import cv2
+import numpy as np
+from niv.image_server import send_msg, recv_msg
+
+
+class ImageClient:
+    def __init__(self, host: str, port: int) -> None:
+        self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        self._sock.connect((host, port))
+
+    def process(self, img: np.ndarray) -> np.ndarray:
+        _, jpeg = cv2.imencode('.jpg', img)
+        send_msg(self._sock, jpeg.tobytes())
+        data = recv_msg(self._sock)
+        if data is None:
+            raise RuntimeError('Server closed the connection unexpectedly')
+        gray = cv2.imdecode(np.frombuffer(data, dtype=np.uint8), cv2.IMREAD_GRAYSCALE)
+        if gray is None:
+            raise RuntimeError('Server response could not be decoded as an image')
+        return gray
+
+    def close(self) -> None:
+        self._sock.close()
+
+    def __enter__(self) -> 'ImageClient':
+        return self
+
+    def __exit__(self, *_) -> None:
+        self.close()
+
+
+def main() -> None:
+    cap = cv2.VideoCapture(0)
+    if not cap.isOpened():
+        print('Cannot open webcam', file=sys.stderr)
+        sys.exit(1)
+    with ImageClient('127.0.0.1', 9999) as client:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                print('Failed to capture frame', file=sys.stderr)
+                break
+            gray = client.process(frame)
+            cv2.imshow('Original', frame)
+            cv2.imshow('Grayscale (server)', gray)
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+    cap.release() 
+    cv2.destroyAllWindows()
+
+ 
+if __name__ == '__main__':
+    main()
diff --git a/niv/image_server.py b/niv/image_server.py
new file mode 100644
index 0000000..fccea2c
--- /dev/null
+++ b/niv/image_server.py
@@ -0,0 +1,71 @@
+import socket
+import struct
+import logging
+import socketserver
+import cv2
+import numpy as np
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
+logger = logging.getLogger(__name__)
+
+  
+def send_msg(sock: socket.socket, data: bytes) -> None:
+    header = struct.pack('>I', len(data))
+    sock.sendall(header + data)
+
+
+def recv_msg(sock: socket.socket) -> bytes | None:
+    header = _recv_exact(sock, 4)
+    if header is None:
+        return None
+    length = struct.unpack('>I', header)[0]
+    return _recv_exact(sock, length)
+
+
+def _recv_exact(sock: socket.socket, n: int) -> bytes | None:
+    buf = b''
+    while len(buf) < n:
+        chunk = sock.recv(n - len(buf))
+        if not chunk:
+            # Note: returning None here cannot distinguish a clean close from a
+            # truncated frame mid-read.  Callers should treat None as
+            # "connection gone" and terminate the session.
+            return None
+        buf += chunk
+    return buf
+
+
+class ImageHandler(socketserver.BaseRequestHandler):
+    def handle(self) -> None:
+        logger.info('Connection from %s', self.client_address)
+        try:
+            while True:
+                data = recv_msg(self.request)
+                if data is None:
+                    break
+                img = cv2.imdecode(np.frombuffer(data, dtype=np.uint8), cv2.IMREAD_COLOR)
+                if img is None:
+                    logger.warning('Could not decode image, closing connection')
+                    break
+                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+                _, jpeg = cv2.imencode('.jpg', gray)
+                send_msg(self.request, jpeg.tobytes())
+        except (ConnectionResetError, BrokenPipeError, OSError) as exc:
+            logger.warning('Connection error: %s', exc)
+        finally:
+            logger.info('Connection closed: %s', self.client_address)
+
+
+def main() -> None:
+    host, port = '0.0.0.0', 9999
+    socketserver.ThreadingTCPServer.allow_reuse_address = True
+    with socketserver.ThreadingTCPServer((host, port), ImageHandler) as server:
+        logger.info('Listening on %s:%d', host, port)
+        try:
+            server.serve_forever()
+        except KeyboardInterrupt:
+            logger.info('Shutting down')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/niv/requirements.txt b/niv/requirements.txt
new file mode 100644
index 0000000..9db4ce5
--- /dev/null
+++ b/niv/requirements.txt
@@ -0,0 +1,7 @@
+torch>=2.0          # install from https://pytorch.org with your CUDA version
+numpy
+timm
+opencv-python
+pyyaml
+matplotlib
+scipy               # only needed if passing .mat files via --left/--right/--depth
diff --git a/niv/run_fast_foundation_niv_with_rs.py b/niv/run_fast_foundation_niv_with_rs.py
new file mode 100644
index 0000000..0161a4e
--- /dev/null
+++ b/niv/run_fast_foundation_niv_with_rs.py
@@ -0,0 +1,875 @@
+
+'''
+python scripts/run_fast_foundation_with_rs.py - github
+
+Environment:
+    ffs - GPU laptop
+
+Installation:
+    python -m venv ./envs/ffs
+    source ./envs/ffs/bin/activate
+    pip install torch==2.6.0 torchvision==0.21.0 xformers --index-url https://download.pytorch.org/whl/cu124
+    cd Fast-FoundationStereo
+    pip install -r requirements.txt
+    pip install pyrealsense2
+    download weights : https://drive.google.com/drive/folders/1HuTt7UIp7gQsMiDvJwVuWmKpvFzIIMap
+
+    python scripts/run_demo.py --model_dir weights/20-30-48/model_best_bp2_serialize.pth --left_file demo_data/left.png --right_file demo_data/right.png --intrinsic_file assets/K.txt --out_dir output/ --remove_invisible 0 --denoise_cloud 1  --scale 1 --get_pc 1 --valid_iters 8 --max_disp 192 --zfar 100
+
+
+'''
+
+
+
+
+import numpy as np
+from argparse import ArgumentParser
+
+import os,sys
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+from omegaconf import OmegaConf
+from core.utils.utils import InputPadder
+import argparse, torch, logging, yaml
+import numpy as np
+from Utils import (
+    AMP_DTYPE, set_logging_format, set_seed, vis_disparity,
+    depth2xyzmap, toOpen3dCloud, o3d,
+)
+import cv2
+#from faro_data_manager import DataSource
+from scripts.data_manager_faro_laptop import DataSource
+from scripts.opencv_realsense_camera import RealSense
+
+
+def process_arguments():
+    parser = ArgumentParser()
+    code_dir = os.path.dirname(os.path.realpath(__file__))
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_dir', default=f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth', type=str)
+    parser.add_argument('--left_file', default=f'{code_dir}/../demo_data/left.png', type=str)
+    parser.add_argument('--right_file', default=f'{code_dir}/../demo_data/right.png', type=str)
+    parser.add_argument('--intrinsic_file', default=f'{code_dir}/../demo_data/K.txt', type=str, help='camera intrinsic matrix and baseline file')
+    parser.add_argument('--out_dir', default='/home/bowen/debug/stereo_output', type=str)
+    parser.add_argument('--remove_invisible', default=1, type=int)
+    parser.add_argument('--denoise_cloud', default=0, type=int)
+    parser.add_argument('--denoise_nb_points', type=int, default=30, help='number of points to consider for radius outlier removal')
+    parser.add_argument('--denoise_radius', type=float, default=0.03, help='radius to use for outlier removal')
+    parser.add_argument('--scale', default=1, type=float)
+    parser.add_argument('--hiera', default=0, type=int)
+    parser.add_argument('--get_pc', type=int, default=1, help='save point cloud output')
+    parser.add_argument('--valid_iters', type=int, default=8, help='number of flow-field updates during forward pass')
+    parser.add_argument('--max_disp', type=int, default=192, help='maximum disparity')
+    parser.add_argument('--zfar', type=float, default=100, help="max depth to include in point cloud")
+
+    return parser.parse_args()
+
+# Original -------------------------------------------
+
+# Original Split -------------------------------------------
+#from opencv_realsense_camera import RealSense
+import time
+
+def foundation_stereo_algo_init(args):
+    "initialize the algorithm"
+
+    set_logging_format()
+    set_seed(0)
+    torch.autograd.set_grad_enabled(False)
+
+    os.system(f'rm -rf {args.out_dir} && mkdir -p {args.out_dir}')
+
+    with open(f'{os.path.dirname(args.model_dir)}/cfg.yaml', 'r') as ff:
+        cfg:dict = yaml.safe_load(ff)
+    for k in args.__dict__:
+        if args.__dict__[k] is not None:
+            cfg[k] = args.__dict__[k]
+    args = OmegaConf.create(cfg)
+    logging.info(f"args:\n{args}")
+    model = torch.load(args.model_dir, map_location='cpu', weights_only=False)
+    model.args.valid_iters = args.valid_iters
+    model.args.max_disp = args.max_disp
+
+    model.cuda().eval()
+    return model
+
+def foundation_stereo_algo(args, model, np_left, np_right):
+    "stereo algo"
+    scale = args.scale
+
+    img0 = np_left #imageio.imread(args.left_file)
+    img1 = np_right #imageio.imread(args.right_file)
+    if len(img0.shape)==2:
+        img0 = np.tile(img0[...,None], (1,1,3))
+        img1 = np.tile(img1[...,None], (1,1,3))
+
+    img0    = img0[...,:3]
+    img1    = img1[...,:3]
+    Ho,Wo   = img0.shape[:2]
+
+    img0    = cv2.resize(img0, fx=scale, fy=scale, dsize=None)
+    img1    = cv2.resize(img1, dsize=(img0.shape[1], img0.shape[0]))
+
+    H,W     = img0.shape[:2]
+    #img0_ori = img0.copy()
+    #img1_ori = img1.copy()
+    #logging.info(f"img0: {img0.shape}")
+    #imageio.imwrite(f'{args.out_dir}/left.png', img0)
+    #imageio.imwrite(f'{args.out_dir}/right.png', img1)
+
+    img0    = torch.as_tensor(img0).cuda().float()[None].permute(0,3,1,2)
+    img1    = torch.as_tensor(img1).cuda().float()[None].permute(0,3,1,2)
+    padder  = InputPadder(img0.shape, divis_by=32, force_square=False)
+    img0, img1 = padder.pad(img0, img1)
+
+    #logging.info(f"Start forward, 1st time run can be slow due to compilation")
+    with torch.amp.autocast('cuda', enabled=True, dtype=AMP_DTYPE):
+        if not args.hiera:
+            disp = model.forward(img0, img1, iters=args.valid_iters, test_mode=True, optimize_build_volume='pytorch1')
+        else:
+            disp = model.run_hierachical(img0, img1, iters=args.valid_iters, test_mode=True, small_ratio=0.5)
+
+    #logging.info("forward done")
+    
+    scale_factor   = 1/scale
+    disp    = padder.unpad(disp.float())
+    #disp    = disp.data.cpu().numpy().reshape(H,W).clip(0, None)
+    disp    = disp.data.cpu().numpy().reshape(H,W)*scale_factor
+    disp    = disp.clip(0, None)
+
+    # recover original size
+    #orig_shape     = np_left.shape[::-1]
+    orig_shape     = (np_left.shape[1],np_left.shape[0])
+    #print(orig_shape, disp.shape)
+    #scale_factor   = orig_shape[0]/process_shape[0]
+    disp_out = cv2.resize(disp, orig_shape, interpolation=cv2.INTER_NEAREST) # A: changed    
+
+    # cmap    = None
+    # min_val = None
+    # max_val = None
+    # vis     = vis_disparity(disp, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+    # vis     = np.concatenate([img0_ori, img1_ori, vis], axis=1)
+    # imageio.imwrite(f'{args.out_dir}/disp_vis.png', vis)
+    # s = 1280/vis.shape[1]
+    # resized_vis = cv2.resize(vis, (int(vis.shape[1]*s), int(vis.shape[0]*s)))
+    # cv2.imshow('disp', resized_vis[:,:,::-1])
+    # cv2.waitKey(0)
+
+    # if args.remove_invisible:
+    #     yy,xx = np.meshgrid(np.arange(disp.shape[0]), np.arange(disp.shape[1]), indexing='ij')
+    #     us_right = xx-disp
+    #     invalid = us_right<0
+    #     disp[invalid] = np.inf
+
+    # debug and show
+    #show_point_cloud(args, disp, img0_ori)
+    #disp = disp * 1000 # must be in mm
+    return disp_out
+
+def convert_disparity_to_depth(BF, disparity):
+    "from GIL"
+    
+    disparity           = disparity.astype(np.float32) 
+    depth               = np.zeros_like(disparity) 
+    disparity_valid     = disparity > 0.1
+    depth[disparity_valid]   = BF / disparity[disparity_valid]
+    #depth[disparity_valid]   += 0.5  # LUT in the simulator
+    return depth.astype(np.uint16)
+
+def depth_opencv_rs_merge(depth_rs, depth_cv):
+    "computing disparity by merging depth from real sense and opencv"
+    depth_merged    = np.copy(depth_rs)
+    # lesss than 400 mm use opencv depth (520 min Z in HD D455)
+    mask               = (depth_cv < 550) & (depth_cv > 80)
+    depth_merged[mask] = depth_cv[mask]
+
+    # A: changed
+    # # if the real sense depth is invalid use opencv depth
+    # mask            = (depth_rs < 1) & (depth_cv > 0)
+    # depth_merged[mask] = depth_cv[mask]
+
+    return depth_merged
+
+def depth_fs_rs_error(depth_rs, depth_fs):
+    "computing depth error between real sense and fs"
+    depth_rs, depth_fs  = depth_rs.astype(np.float32), depth_fs.astype(np.float32) 
+    depth_error         = np.abs(depth_rs - depth_fs)
+
+    # lesss than 400 mm use opencv depth (520 min Z in HD D455)
+    mask               = depth_rs < 2
+    depth_error[mask]  = 0
+
+    return depth_error
+
+def preprocess(frame, scale_factor = 0.5, crop_size=(512, 384)):
+    "convert and downscale, then center-crop to crop_size (W, H)"
+    frame           = frame.astype(np.float32)
+
+    # assign
+    imgL            = frame[:,:,0]
+    imgR            = frame[:,:,1]
+    imgD            = frame[:,:,2]
+
+    # if scale_factor < 0.9:
+
+    #     imgL           = cv2.resize(imgL, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+    #     imgR           = cv2.resize(imgR, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+    #     imgD           = cv2.resize(imgD, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_NEAREST)
+
+    # center crop
+    crop_w, crop_h  = crop_size
+    H, W            = imgL.shape[:2]
+    y0              = (H - crop_h) // 2
+    x0              = (W - crop_w) // 2
+    imgL            = imgL[y0:y0+crop_h, x0:x0+crop_w]
+    imgR            = imgR[y0:y0+crop_h, x0:x0+crop_w]
+    imgD            = imgD[y0:y0+crop_h, x0:x0+crop_w]
+
+    return imgL, imgR, imgD
+
+def show_images_depth(imgD = None, imgC = None,  fig_num = 1, fig_name = 'Depth Image', vmax = 1500):
+    "draw results of depth estimation"
+          
+
+    if (imgD is None) and (imgC is None):
+        print('No images found')
+        return False
+    
+    elif imgD is None: # no data acquired
+        img_show = imgC
+
+    elif imgC is None: # no data is processed
+        img_show = imgD      
+        #img_show = cv.applyColorMap(self.imgD, cv.COLORMAP_TURBO)     
+
+    elif np.all(imgD.shape == imgC.shape):
+        img_show = np.concatenate((imgD, imgC ), axis = 1)
+
+    # deal with 16 uint    
+    if img_show.dtype == 'uint16' or img_show.dtype == 'float32':
+        img_show    = cv2.convertScaleAbs(img_show, alpha=0.1)
+        img_show    = cv2.applyColorMap(img_show, cv2.COLORMAP_TURBO) #   
+        pass
+    else:
+        #self.imgD = np.repeat(self.imgD[:,:,np.newaxis], 3, axis = 2)
+        #img_show = np.concatenate((self.imgD, self.imgC ), axis = 1)
+        #img_show = cv.applyColorMap(img_show.astype(np.uint8), cv.COLORMAP_TURBO) 
+        #img_show = self.imgC #np.concatenate((self.imgD, self.imgC ), axis = 1)
+        pass
+
+    # deal with black and white
+    if img_show.shape[1] > 2400:
+        img_show = cv2.pyrDown(img_show)
+            
+    cv2.imshow(f'{fig_name} (q-exit)', img_show)
+    ch = cv2.waitKey(5)
+    ret = ch == ord('q')
+
+    # plt.figure()
+    # plt.imshow(imgD, vmin = 100, vmax=vmax)
+    # plt.title(fig_name)
+    # plt.show(block=False)
+
+    return ret
+
+def show_point_cloud(args, disp, img0_ori):
+    "from fs"
+    if not args.get_pc:
+      return
+      
+    scale = args.scale
+    with open(args.intrinsic_file, 'r') as f:
+        lines = f.readlines()
+        K = np.array(list(map(float, lines[0].rstrip().split()))).astype(np.float32).reshape(3,3)
+        baseline = float(lines[1])
+        K[:2] *= scale
+        depth = K[0,0]*baseline/disp
+        np.save(f'{args.out_dir}/depth_meter.npy', depth)
+        xyz_map = depth2xyzmap(depth, K)
+        pcd = toOpen3dCloud(xyz_map.reshape(-1,3), img0_ori.reshape(-1,3))
+        keep_mask = (np.asarray(pcd.points)[:,2]>0) & (np.asarray(pcd.points)[:,2]<=args.zfar)
+        keep_ids = np.arange(len(np.asarray(pcd.points)))[keep_mask]
+        pcd = pcd.select_by_index(keep_ids)
+        #o3d.io.write_point_cloud(f'{args.out_dir}/cloud.ply', pcd)
+        #logging.info(f"PCL saved to {args.out_dir}")
+
+    if args.denoise_cloud:
+        logging.info("[Optional step] denoise point cloud...")
+        cl, ind = pcd.remove_radius_outlier(nb_points=args.denoise_nb_points, radius=args.denoise_radius)
+        inlier_cloud = pcd.select_by_index(ind)
+        #o3d.io.write_point_cloud(f'{args.out_dir}/cloud_denoise.ply', inlier_cloud)
+        pcd = inlier_cloud
+
+    logging.info("Visualizing point cloud. Press ESC to exit.")
+    vis = o3d.visualization.Visualizer()
+    vis.create_window()
+    vis.add_geometry(pcd)
+    vis.get_render_option().point_size = 1.0
+    vis.get_render_option().background_color = np.array([0.5, 0.5, 0.5])
+    ctr = vis.get_view_control()
+    ctr.set_front([0, 0, -1])
+    id = np.asarray(pcd.points)[:,2].argmin()
+    ctr.set_lookat(np.asarray(pcd.points)[id])
+    ctr.set_up([0, -1, 0])
+    vis.run()
+    vis.destroy_window()
+
+def depth2xyz(depth:np.ndarray, K, us=None, vs=None, zmin=0.1):
+  #invalid_mask = (depth<zmin)
+  # one time init
+  if us is None:
+    H,W = depth.shape[:2]
+    vs,us = np.meshgrid(np.arange(0,H),np.arange(0,W), sparse=False, indexing='ij')
+    vs = vs.reshape(-1)
+    us = us.reshape(-1)
+
+  zs = depth[vs,us]
+  zs[zs<zmin] = zmin
+  xs = (us-K[0,2])*zs/K[0,0]
+  ys = (vs-K[1,2])*zs/K[1,1]
+  pts = np.stack((xs.reshape(-1),ys.reshape(-1),zs.reshape(-1)), 1)  #(N,3)
+
+  return pts,us,vs    
+
+# ----------------------------------------
+import open3d as o3d
+import numpy as np
+import time
+
+def test_point_cloud_rt():
+    "show point cloud update in RT"
+    # Source - https://stackoverflow.com/a/74669788
+    # Posted by Javier TG, modified by community. See post 'Timeline' for change history
+    # Retrieved 2026-02-10, License - CC BY-SA 4.0
+
+    # create visualizer and window.
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(height=480, width=640)
+
+    # initialize pointcloud instance.
+    pcd = o3d.geometry.PointCloud()
+    # *optionally* add initial points
+    points = np.random.rand(10, 3)
+    pcd.points = o3d.utility.Vector3dVector(points)
+
+    # include it in the visualizer before non-blocking visualization.
+    vis.add_geometry(pcd)
+
+    # to add new points each dt secs.
+    dt = 0.01
+    # number of points that will be added
+    n_new = 10
+
+    previous_t = time.time()
+
+    # run non-blocking visualization. 
+    # To exit, press 'q' or click the 'x' of the window.
+    keep_running = True
+    while keep_running:
+        
+        if time.time() - previous_t > dt:
+            # Options (uncomment each to try them out):
+            # 1) extend with ndarrays.
+            pcd.points.extend(np.random.rand(n_new, 3))
+            
+            # 2) extend with Vector3dVector instances.
+            # pcd.points.extend(
+            #     o3d.utility.Vector3dVector(np.random.rand(n_new, 3)))
+            
+            # 3) other iterables, e.g
+            # pcd.points.extend(np.random.rand(n_new, 3).tolist())
+            
+            vis.update_geometry(pcd)
+            previous_t = time.time()
+
+        keep_running = vis.poll_events()
+        vis.update_renderer()
+
+    vis.destroy_window()
+
+def test_video_stream_rs_and_foundation():
+    "streaming rs to foundation"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #self.cap.set_exposure(1000) # 10ms
+    d.switch_projector(True)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_merged    = depth_opencv_rs_merge(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_merged, None, fig_name='Depth Merged')
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+        
+    d.release()
+
+def test_video_error_rs_versus_foundation():
+    "streaming rs to foundation - checks error between them"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        #print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        #print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+
+        # plt.show()
+        
+    d.release()
+
+def test_point_cloud_rs_versus_foundation():
+    "streaming rs to foundation - checks error between them"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #self.cap.set_exposure(1000) # 10ms
+    #d.switch_projector(True)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    #camera_k    = d.get_camera_intrinsics()
+    args        = process_arguments()
+    K           = np.array([[637,0,640],[0,637,360],[0,0,1]])
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+
+    # create visualizer and window.
+    vis         = o3d.visualization.Visualizer()
+    vis.create_window(height=720, width=1280)
+
+    # initialize pointcloud instance.
+    pcd         = o3d.geometry.PointCloud()
+    # *optionally* add initial points
+    points      = np.random.rand(10, 3)*1000
+    pcd.points  = o3d.utility.Vector3dVector(points)
+    us, vs      = None, None
+
+    # include it in the visualizer before non-blocking visualization.
+    vis.add_geometry(pcd)    
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+        #xyz_map, pts     = depth2xyzmap(img_depth_fs, K)
+        pts, us, vs     = depth2xyz(img_depth_fs, K, us, vs, zmin=100)
+        clr             = np.zeros_like(pts)
+        clr[:,0] = clr[:,1] = clr[:,2] = img_left.flatten()
+
+        # show point cloud
+        #pcd.points.extend(pts)
+        
+        #vis.remove_geometry(pcd)
+        #pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(pts)
+        #pcd.colors = o3d.utility.Vector3dVector(clr)
+        downpcd = pcd.voxel_down_sample(voxel_size=0.1)
+        #vis.add_geometry(pcd)
+        
+        # 3) other iterables, e.g
+        # pcd.points.extend(np.random.rand(n_new, 3).tolist())
+        
+        vis.update_geometry(downpcd)
+
+        keep_running = vis.poll_events()
+        if not keep_running: break
+        vis.update_renderer()        
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error')
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+        
+    d.release()
+    vis.destroy_window()
+
+def test_video_stream_rs_versus_foundation_x2():
+    "streaming rs to foundation - checks speed"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+    args.scale  = 0.5
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    d.release()
+
+def merge_fs_rs(depth_rs, depth_fs):
+    "trying to merge information and deal with non valid regions"
+    nr, nc       = depth_rs.shape
+    depth_rs_out = depth_rs.copy()
+    valid_rs     = depth_rs > 1
+    start_rs     = np.zeros_like(valid_rs)
+    start_rs[:,:-1] = np.logical_and(valid_rs[:,:-1] , ~valid_rs[:,1:]) # if pixel k is valid and k + 1 is not
+    stop_rs      = np.zeros_like(valid_rs)
+    stop_rs[:,1:] = np.logical_and(~valid_rs[:,:-1] , valid_rs[:,1:]) # if pixel k-1 is not valid and k  is valid
+
+    for r in range(nr):
+        start_ind = np.where(start_rs[r,:])[0]
+        stop_ind  = np.where(stop_rs[r,:])[0]
+        if len(start_ind) < 1 or len(stop_ind) < 1:
+            continue
+        
+        for s in start_ind:
+            ii = np.where(s < stop_ind)[0]
+            if len(ii) < 1: continue
+            f = stop_ind[0]
+            if np.abs(depth_fs[r,s] - depth_fs[r,f]) < 0.1*depth_fs[r,s]:
+                depth_rs_out[r,s:f] = depth_fs[r,s:f]
+                print('.')
+
+    return depth_rs_out
+      
+def test_video_stream_rs_fs_merge():
+    "streaming rs to foundation - checks speed"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+    args.scale  = 0.5
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        img_depth_merge = merge_fs_rs(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(img_depth_merge, None, fig_name='Depth Merge', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        d.save_image(img_depth_rs,fname='depth_rs')
+        d.save_image(img_depth_fs,fname='depth_fs')
+        d.save_image(img_left,fname='img_left')
+        d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    d.release()
+
+def test_faro_rs_fs_error():
+    "reading data files from FARO and comparing the results"
+
+    #d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    d           = DataSource()
+    img_num     = d.init_directory()    
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+
+    args        = process_arguments()
+    args.scale  = 0.5
+    args.model_dir = '/home/administrato/dev/Fast-FoundationStereo/weights/20-30-48/model_finetuned_faro.pth'
+    args.model_dir = '/home/administrato/dev/Fast-FoundationStereo/weights/20-30-48/model_finetuned_faro.pth'
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+
+
+    img_index   = np.random.randint(0,img_num,8)
+    for k in img_index:
+
+        # frame is I1,I2, D data
+        out_data       = d.get_item(k, debug = True)
+        #out_data       = d.load_specific_files(debug = True)
+        #print(out_data)
+    
+        # extract
+        img_left, img_right, img_depth_rs, img_depth_faro = out_data["img_left"], out_data["img_right"], out_data["img_depth_rs"], out_data["img_depth_faro"]
+        print(img_left.shape, img_right.shape)
+
+        # process
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        img_error_fs    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        img_error_faro  = depth_fs_rs_error(img_depth_faro, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS', vmax = 400)
+        ret3 = show_images_depth(img_depth_faro, None, fig_name='Depth Faro')
+        ret4 = show_images_depth(img_error_fs.astype(np.uint8),     None, fig_name='Error RS-FS', vmax = 100)
+        ret5 = show_images_depth(img_error_faro.astype(np.uint8), None, fig_name='Error Faro-FS', vmax = 100)
+        ret6 = show_images_depth(img_left.astype(np.uint8), None, fig_name='Images L')
+        ret7 = show_images_depth(img_right.astype(np.uint8), None, fig_name='Images R')
+        ret = ret1 or ret2 or ret3 or ret4 or ret5 or ret6 or ret7
+        if ret: break
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        #d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    #d.ckose()
+
+
+# ─── D1-EdgeNeXt-FFS: fast foundation stereo with RS depth prior ─────────────
+from niv.smart_lite.ablation.d1_models import build_d1_model
+from niv.smart_lite.model import build_gwc_volume as _build_gwc_volume
+import torch.nn.functional as _F
+from niv.depth_prior_explainer import depth_mm_to_prior
+
+
+def d1_model_init(ffs_ckpt=None, d1_ckpt=None, max_disp=192, num_iters=8):
+    "initialize D1-EdgeNeXt-FFS model with RS depth prior support"
+    if ffs_ckpt is None:
+        ffs_ckpt = f'{code_dir}/../weights/weights_niv/model_best_bp2_serialize.pth'
+    if d1_ckpt is None:
+        d1_ckpt  = f'{code_dir}/../weights/weights_niv/stage3_best.pt'
+    model = build_d1_model("edgenext", max_disp=max_disp, num_iters=num_iters, ffs_ckpt=ffs_ckpt)
+    ck    = torch.load(d1_ckpt, map_location="cpu", weights_only=False)
+    model.load_state_dict(ck.get("model", ck), strict=False)
+    return model.eval().cuda()
+
+
+def d1_stereo_algo(model, left_bgr, right_bgr, depth_mm, focal_px, baseline_m, num_iters=8):
+    "run D1-EdgeNeXt-FFS forward pass — returns disparity (H,W) float32"
+    rs_disp_np, conf_np = depth_mm_to_prior(depth_mm, focal_px, baseline_m)
+    rs_disp_t = torch.from_numpy(rs_disp_np).cuda()
+    conf_t    = torch.from_numpy(conf_np).cuda()
+
+    def _to_tensor(img0):
+        if len(img0.shape)==2:
+            img0 = np.tile(img0[...,None], (1,1,3))
+        rgb = img0[..., ::-1].astype(np.float32) / 255.0
+        return torch.from_numpy(np.ascontiguousarray(rgb.transpose(2, 0, 1))).unsqueeze(0).cuda()
+
+    left_t  = _to_tensor(left_bgr)
+    right_t = _to_tensor(right_bgr)
+
+    with torch.no_grad():
+        _, _, H, W = left_t.shape
+        Hq, Wq = H // 4, W // 4
+
+        feat_left, feat_right = model._extract(left_t, right_t)
+        gwc_vol  = _build_gwc_volume(feat_left, feat_right, model.max_disp_q4, model.num_groups)
+
+        rs_q4   = _F.interpolate(rs_disp_t, (Hq, Wq), mode='bilinear', align_corners=False) * 0.25
+        conf_q4 = _F.interpolate(conf_t,    (Hq, Wq), mode='bilinear', align_corners=False)
+        prior   = torch.cat([rs_q4, conf_q4], dim=1)
+
+        coarse_disp = model.coarse_head(gwc_vol)
+        has_sensor  = (conf_q4 > 0).float()
+        disp        = has_sensor * rs_q4 + (1.0 - has_sensor) * coarse_disp
+
+        net, inp, att = model.context_net(feat_left)
+        for _ in range(num_iters):
+            disp   = disp.detach()
+            corr   = model.corr_lookup(gwc_vol, disp)
+            motion = model.motion_encoder(disp, corr, prior)
+            net    = model.gru(net, torch.cat([motion, inp * att], dim=1))
+            disp   = disp + model.disp_head(net)
+        final = model.upsample(net, disp)
+
+    return final[0, 0].cpu().numpy()
+
+
+def test_video_stream_rs_d1():
+    "stream RealSense frames through D1-EdgeNeXt-FFS with RS depth prior"
+    d          = RealSense(mode='d16', use_ir=True, frame_size=(1280, 720))
+    #d.switch_projector(True)
+    camera_bf  = d.get_bf()   # focal_px * baseline_mm
+    K          = np.array([[637, 0, 640], [0, 637, 360], [0, 0, 1]], dtype=np.float32)
+    focal_px   = K[0, 0]
+    baseline_m = camera_bf / focal_px / 1000.0
+
+    model     = d1_model_init()
+    
+    ret = False
+    while not ret:
+        retf, frame = d.read()
+        if not retf:
+            print('is your camera open/connected?')
+            break
+
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start       = time.time()
+        img_disparity = d1_stereo_algo(
+            model,
+            img_left.astype(np.uint8),
+            img_right.astype(np.uint8),
+            img_depth_rs,
+            focal_px,
+            baseline_m,
+        )
+        img_depth_d1 = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Process time: {time.time() - t_start:.3f}s')
+
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_d1, None, fig_name='Depth D1-FFS')
+        ret3 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret  = ret1 or ret2 or ret3
+
+    d.release()
+
+
+if __name__ == '__main__':
+    #main()
+    #test_video_stream_rs_and_foundation() # ok
+    #test_video_error_rs_versus_foundation() # ok
+    # test_point_cloud_rt() # ok
+    #test_point_cloud_rs_versus_foundation()
+    #test_video_stream_rs_versus_foundation_x2() # ok
+    #test_video_stream_rs_fs_merge()
+    #test_faro_rs_fs_error()
+    test_video_stream_rs_d1()
+
+
+
+
+
+
+
+
+    
\ No newline at end of file
diff --git a/niv/smart_lite/__init__.py b/niv/smart_lite/__init__.py
new file mode 100644
index 0000000..d4364e7
--- /dev/null
+++ b/niv/smart_lite/__init__.py
@@ -0,0 +1 @@
+from .model import SmartLiteStereo
diff --git a/niv/smart_lite/ablation/__init__.py b/niv/smart_lite/ablation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/niv/smart_lite/ablation/d1_models.py b/niv/smart_lite/ablation/d1_models.py
new file mode 100644
index 0000000..f7a39a5
--- /dev/null
+++ b/niv/smart_lite/ablation/d1_models.py
@@ -0,0 +1,495 @@
+"""
+Direction-1 ablation models: SmartLite-L base, one FFS component swapped in.
+
+Each class replaces exactly one sub-system with its FFS counterpart so we
+can measure the per-component accuracy delta.
+
+Variants
+--------
+D1_NoRsPrior   – eval only; zeros rs_disp / conf  (baseline minus prior)
+D1_SPX         – replaces ConvexUpsample with FFS SPX upsample
+D1_SelGRU      – replaces DS-ConvGRU with FFS SelectiveConvGRU
+D1_RichVolume  – replaces tiny CoarseDisparityHead with combined GWC+concat
+                 volume + 3-D stem + classifier (no hourglass, no multi-scale
+                 features needed)
+D1_EdgeNeXt    – replaces DS-conv backbone with FFS EdgeNeXt Feature extractor
+                 + 1×1 adapter conv
+
+Usage
+-----
+    from smart_lite.ablation.d1_models import build_d1_model
+    model = build_d1_model('spx', max_disp=192)
+    model = build_d1_model('edgenext', max_disp=192,
+                           ffs_ckpt='../../weights/model_best_bp2.pth')
+"""
+
+import os
+import sys
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+_HERE = os.path.dirname(os.path.realpath(__file__))
+_FFS_ROOT = os.path.abspath(os.path.join(_HERE, '..', '..'))
+if _FFS_ROOT not in sys.path:
+    sys.path.insert(0, _FFS_ROOT)
+
+from niv.smart_lite.model import (
+    SmartLiteStereo, build_gwc_volume, MODEL_CONFIGS,
+    DepthwiseSeparableConv2d, DepthwiseSeparableConvGRU,
+    MotionEncoder,
+)
+from core.submodule import (
+    Conv2x,
+    context_upsample,
+    build_gwc_volume_optimized_pytorch1,
+    build_concat_volume_optimized_pytorch1,
+    disparity_regression,
+    BasicConv_IN,
+)
+from core.update import SelectiveConvGRU
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _L_cfg():
+    """SmartLite-L hyperparameters."""
+    return MODEL_CONFIGS['L']
+
+
+def _load_ffs(ffs_ckpt: str, device='cpu'):
+    """Load FastFoundationStereo, return frozen on *device*.
+
+    Supports two checkpoint formats:
+      - model_best_bp2_serialize.pth : serialized FastFoundationStereo object
+        (pure EdgeNeXt, no DINOv2 — preferred)
+      - model_best_bp2.pth : state-dict checkpoint (contains DINOv2 + EdgeNeXt;
+        shape-filters to extract only EdgeNeXt feature.* keys)
+    """
+    obj = torch.load(ffs_ckpt, map_location='cpu', weights_only=False)
+
+    if hasattr(obj, 'state_dict'):
+        # Serialized FastFoundationStereo — pure EdgeNeXt, no DINOv2
+        ffs = obj
+        print(f'[_load_ffs] loaded serialized FastFoundationStereo from {ffs_ckpt}')
+    else:
+        # State-dict checkpoint: build model from cfg.yaml then load weights
+        from omegaconf import OmegaConf
+        cfg_path = os.path.join(os.path.dirname(ffs_ckpt), 'cfg.yaml')
+        cfg  = OmegaConf.load(cfg_path)
+        state = obj.get('model', obj.get('state_dict', obj))
+        from core.foundation_stereo import FastFoundationStereo
+        ffs  = FastFoundationStereo(cfg)
+        model_state = ffs.state_dict()
+        compatible = {k: v for k, v in state.items()
+                      if k in model_state and v.shape == model_state[k].shape}
+        skipped = len(state) - len(compatible)
+        if skipped:
+            print(f'[_load_ffs] skipped {skipped} shape-mismatched keys '
+                  f'(DINOv2/dim mismatches — harmless, only feature.* extracted)')
+        missing, unexpected = ffs.load_state_dict(compatible, strict=False)
+        feature_missing = [k for k in missing if k.startswith('feature.')]
+        if feature_missing:
+            print(f'[_load_ffs] WARNING: feature keys missing: {feature_missing[:5]}')
+
+    ffs = ffs.to(device).eval()
+    for p in ffs.parameters():
+        p.requires_grad = False
+    cfg = getattr(ffs, 'cfg', None)
+    return ffs, cfg
+
+
+# ---------------------------------------------------------------------------
+# D1_NoRsPrior — eval only, zeros the sensor prior
+# ---------------------------------------------------------------------------
+
+class D1_NoRsPrior(SmartLiteStereo):
+    """SmartLite-L with RS prior disabled. No training changes needed."""
+
+    def forward(self, left_rgb, right_rgb, rs_disp, conf,
+                num_iters=None, test_mode=False):
+        return super().forward(
+            left_rgb, right_rgb,
+            torch.zeros_like(rs_disp),
+            torch.zeros_like(conf),
+            num_iters=num_iters, test_mode=test_mode,
+        )
+
+
+# ---------------------------------------------------------------------------
+# D1_SPX — FFS SPX upsample instead of ConvexUpsample
+# ---------------------------------------------------------------------------
+
+class D1_SPX(SmartLiteStereo):
+    """SmartLite-L with FFS SPX upsampling.
+
+    New modules (need training): stem_2, spx_2_gru, spx_gru
+    Removed: upsample (ConvexUpsample)
+
+    SPX uses the original-resolution image (1/2-res stem features) to sharpen
+    depth-discontinuity boundaries, which ConvexUpsample cannot.
+    """
+
+    def __init__(self, max_disp=192, **sl_kwargs):
+        super().__init__(max_disp=max_disp, **sl_kwargs)
+        hidden_dim = sl_kwargs.get('hidden_dim', _L_cfg()['hidden_dim'])
+        del self.upsample  # replaced
+
+        # SPX modules matching FFS architecture
+        self.stem_2 = nn.Sequential(
+            BasicConv_IN(3, 32, kernel_size=3, stride=2, padding=1),
+            nn.Conv2d(32, 32, 3, 1, 1, bias=False),
+            nn.InstanceNorm2d(32), nn.ReLU(),
+        )
+        self.spx_2_gru = Conv2x(hidden_dim, 32, deconv=True, bn=False, concat=True)
+        self.spx_gru   = nn.ConvTranspose2d(2 * 32, 9, kernel_size=4,
+                                            stride=2, padding=1)
+
+    def _upsample(self, net, disp_low, stem_2x):
+        xspx     = self.spx_2_gru(net, stem_2x)
+        spx_pred = F.softmax(self.spx_gru(xspx), dim=1)
+        return context_upsample(disp_low * 4., spx_pred).unsqueeze(1)
+
+    def forward(self, left_rgb, right_rgb, rs_disp, conf,
+                num_iters=None, test_mode=False):
+        iters = num_iters if num_iters is not None else self.num_iters
+        B, _, H, W = left_rgb.shape
+        Hq, Wq = H // 4, W // 4
+
+        stem_2x = self.stem_2(left_rgb)
+
+        feat_left, feat_right = self.backbone(left_rgb, right_rgb)
+        gwc_volume = build_gwc_volume(feat_left, feat_right,
+                                      self.max_disp_q4, self.num_groups)
+
+        rs_q4   = F.interpolate(rs_disp, (Hq, Wq), mode='bilinear',
+                                align_corners=False) * 0.25
+        conf_q4 = F.interpolate(conf,    (Hq, Wq), mode='bilinear',
+                                align_corners=False)
+        prior   = torch.cat([rs_q4, conf_q4], dim=1)
+
+        coarse_disp = self.coarse_head(gwc_volume)
+        has_sensor  = (conf_q4 > 0).float()
+        disp = has_sensor * rs_q4 + (1.0 - has_sensor) * coarse_disp
+
+        net, inp, att = self.context_net(feat_left)
+
+        disp_preds = []
+        for i in range(iters):
+            disp  = disp.detach()
+            corr  = self.corr_lookup(gwc_volume, disp)
+            motion = self.motion_encoder(disp, corr, prior)
+            net   = self.gru(net, torch.cat([motion, inp * att], dim=1))
+            disp  = disp + self.disp_head(net)
+            if test_mode and i < iters - 1:
+                continue
+            disp_preds.append(self._upsample(net, disp, stem_2x))
+
+        return disp_preds[-1] if test_mode else (coarse_disp, disp_preds)
+
+
+# ---------------------------------------------------------------------------
+# D1_SelGRU — FFS SelectiveConvGRU cell, keep MotionEncoder + prior
+# ---------------------------------------------------------------------------
+
+class _GRUAttProj(nn.Module):
+    """Predict per-pixel kernel-selection attention from GRU hidden state."""
+    def __init__(self, hidden_dim):
+        super().__init__()
+        self.proj = nn.Sequential(
+            nn.Conv2d(hidden_dim, hidden_dim // 2, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(hidden_dim // 2, 1, 1),
+            nn.Sigmoid(),
+        )
+    def forward(self, h):
+        return self.proj(h)
+
+
+class D1_SelGRU(SmartLiteStereo):
+    """SmartLite-L with FFS SelectiveConvGRU replacing DS-ConvGRU.
+
+    New modules (need training): sel_gru, gru_att_proj
+    Removed: gru (DepthwiseSeparableConvGRU)
+
+    SelectiveConvGRU blends a 1×1 (fast, global) and 3×3 (spatial) GRU cell
+    based on per-pixel attention — more expressive than a fixed DS-GRU.
+    """
+
+    def __init__(self, max_disp=192, **sl_kwargs):
+        super().__init__(max_disp=max_disp, **sl_kwargs)
+        cfg = _L_cfg()
+        hidden_dim  = sl_kwargs.get('hidden_dim',  cfg['hidden_dim'])
+        context_dim = sl_kwargs.get('context_dim', cfg['context_dim'])
+        input_dim   = hidden_dim + context_dim   # motion + inp*att
+        del self.gru  # replaced
+
+        self.sel_gru      = SelectiveConvGRU(hidden_dim=hidden_dim,
+                                             input_dim=input_dim)
+        self.gru_att_proj = _GRUAttProj(hidden_dim)
+
+    def forward(self, left_rgb, right_rgb, rs_disp, conf,
+                num_iters=None, test_mode=False):
+        iters = num_iters if num_iters is not None else self.num_iters
+        B, _, H, W = left_rgb.shape
+        Hq, Wq = H // 4, W // 4
+
+        feat_left, feat_right = self.backbone(left_rgb, right_rgb)
+        gwc_volume = build_gwc_volume(feat_left, feat_right,
+                                      self.max_disp_q4, self.num_groups)
+
+        rs_q4   = F.interpolate(rs_disp, (Hq, Wq), mode='bilinear',
+                                align_corners=False) * 0.25
+        conf_q4 = F.interpolate(conf,    (Hq, Wq), mode='bilinear',
+                                align_corners=False)
+        prior   = torch.cat([rs_q4, conf_q4], dim=1)
+
+        coarse_disp = self.coarse_head(gwc_volume)
+        has_sensor  = (conf_q4 > 0).float()
+        disp = has_sensor * rs_q4 + (1.0 - has_sensor) * coarse_disp
+
+        net, inp, att = self.context_net(feat_left)
+
+        disp_preds = []
+        for i in range(iters):
+            disp   = disp.detach()
+            corr   = self.corr_lookup(gwc_volume, disp)
+            motion = self.motion_encoder(disp, corr, prior)
+            x_in   = torch.cat([motion, inp * att], dim=1)
+            gru_att = self.gru_att_proj(net)           # [B,1,Hq,Wq]
+            net    = self.sel_gru(gru_att, net, x_in)
+            disp   = disp + self.disp_head(net)
+            if test_mode and i < iters - 1:
+                continue
+            disp_preds.append(self.upsample(net, disp))
+
+        return disp_preds[-1] if test_mode else (coarse_disp, disp_preds)
+
+
+# ---------------------------------------------------------------------------
+# D1_RichVolume — combined GWC+concat volume + 3-D stem + classifier
+#                 (FFS cost-agg init without the hourglass)
+# ---------------------------------------------------------------------------
+
+class D1_RichVolume(SmartLiteStereo):
+    """SmartLite-L with a richer coarse init from GWC+concat+3D-stem.
+
+    New modules (need training): proj_cmb, corr_stem, ffs_classifier
+    Removed: coarse_head
+
+    Note: hourglass and FeatureAtt are skipped because they require multi-scale
+    features that SmartLite backbone doesn't produce.  This still tests whether
+    a richer (combined) cost volume gives a better warm start over the tiny 2-
+    layer 3D-conv head.
+    """
+
+    _CONCAT_HALF = 12    # FFS concat_channel // 2
+
+    def __init__(self, max_disp=192, **sl_kwargs):
+        super().__init__(max_disp=max_disp, **sl_kwargs)
+        feature_ch = sl_kwargs.get('feature_ch', _L_cfg()['feature_ch'])
+        num_groups  = sl_kwargs.get('num_groups', _L_cfg()['num_groups'])
+        del self.coarse_head  # replaced
+
+        volume_dim = 28   # FFS default
+        ch_half    = self._CONCAT_HALF
+
+        self.proj_cmb = nn.Conv2d(feature_ch, ch_half, 1, bias=False)
+
+        from core.submodule import BasicConv, ResnetBasicBlock3D
+        self.corr_stem = nn.Sequential(
+            nn.Conv3d(ch_half * 2 + num_groups, volume_dim, kernel_size=1),
+            BasicConv(volume_dim, volume_dim, kernel_size=3, padding=1, is_3d=True),
+            ResnetBasicBlock3D(volume_dim, volume_dim, kernel_size=3,
+                               stride=1, padding=1),
+            ResnetBasicBlock3D(volume_dim, volume_dim, kernel_size=3,
+                               stride=1, padding=1),
+        )
+        from core.submodule import ResnetBasicBlock3D as R3D
+        self.ffs_classifier = nn.Sequential(
+            BasicConv(volume_dim, volume_dim // 2, kernel_size=3,
+                      padding=1, is_3d=True),
+            R3D(volume_dim // 2, volume_dim // 2, kernel_size=3,
+                stride=1, padding=1),
+            nn.Conv3d(volume_dim // 2, 1, kernel_size=7, padding=3),
+        )
+
+    def _coarse_init(self, feat_left, feat_right):
+        gwc = build_gwc_volume_optimized_pytorch1(
+            feat_left, feat_right, self.max_disp_q4,
+            self.num_groups, normalize=True)
+        lc = self.proj_cmb(feat_left)
+        rc = self.proj_cmb(feat_right)
+        cat_vol = build_concat_volume_optimized_pytorch1(
+            lc, rc, maxdisp=self.max_disp_q4)
+        comb = self.corr_stem(torch.cat([gwc, cat_vol], dim=1))
+        logits = self.ffs_classifier(comb).squeeze(1)
+        return disparity_regression(F.softmax(logits, dim=1), self.max_disp_q4)
+
+    def forward(self, left_rgb, right_rgb, rs_disp, conf,
+                num_iters=None, test_mode=False):
+        iters = num_iters if num_iters is not None else self.num_iters
+        B, _, H, W = left_rgb.shape
+        Hq, Wq = H // 4, W // 4
+
+        feat_left, feat_right = self.backbone(left_rgb, right_rgb)
+        gwc_volume = build_gwc_volume(feat_left, feat_right,
+                                      self.max_disp_q4, self.num_groups)
+
+        rs_q4   = F.interpolate(rs_disp, (Hq, Wq), mode='bilinear',
+                                align_corners=False) * 0.25
+        conf_q4 = F.interpolate(conf,    (Hq, Wq), mode='bilinear',
+                                align_corners=False)
+        prior   = torch.cat([rs_q4, conf_q4], dim=1)
+
+        coarse_disp = self._coarse_init(feat_left, feat_right)
+        has_sensor  = (conf_q4 > 0).float()
+        disp = has_sensor * rs_q4 + (1.0 - has_sensor) * coarse_disp
+
+        net, inp, att = self.context_net(feat_left)
+
+        disp_preds = []
+        for i in range(iters):
+            disp   = disp.detach()
+            corr   = self.corr_lookup(gwc_volume, disp)
+            motion = self.motion_encoder(disp, corr, prior)
+            net    = self.gru(net, torch.cat([motion, inp * att], dim=1))
+            disp   = disp + self.disp_head(net)
+            if test_mode and i < iters - 1:
+                continue
+            disp_preds.append(self.upsample(net, disp))
+
+        return disp_preds[-1] if test_mode else (coarse_disp, disp_preds)
+
+
+# ---------------------------------------------------------------------------
+# D1_EdgeNeXt — FFS EdgeNeXt backbone + 1×1 adapter, keep everything else
+# ---------------------------------------------------------------------------
+
+class D1_EdgeNeXt(SmartLiteStereo):
+    """SmartLite-L with FFS EdgeNeXt Feature extractor replacing DS backbone.
+
+    New modules (need training): feat_adapter (1×1 conv)
+    Frozen (if ffs_ckpt provided): ffs_feature (pretrained ImageNet weights)
+    Removed: backbone
+
+    The EdgeNeXt backbone is pretrained on ImageNet and outputs 128-160ch
+    features at 1/4 resolution.  A 1×1 conv adapts this to SmartLite's
+    expected feature_ch=64, so the downstream GWC / context_net / GRU
+    continue to operate unchanged.
+    """
+
+    def __init__(self, ffs_ckpt: str = None, max_disp=192, **sl_kwargs):
+        super().__init__(max_disp=max_disp, **sl_kwargs)
+        feature_ch = sl_kwargs.get('feature_ch', _L_cfg()['feature_ch'])
+        del self.backbone
+
+        from core.extractor import Feature as FFSFeature
+        if ffs_ckpt is not None:
+            ffs_model, ffs_cfg = _load_ffs(ffs_ckpt)
+            self.ffs_feature = ffs_model.feature
+        else:
+            from omegaconf import OmegaConf
+            # Minimal stub args if no checkpoint provided
+            ffs_cfg = OmegaConf.create({
+                'vit_size': 'vits', 'mixed_precision': False,
+            })
+            self.ffs_feature = FFSFeature(ffs_cfg)
+
+        ffs_x4_ch = self.ffs_feature.d_out[0]
+        self.feat_adapter = nn.Conv2d(ffs_x4_ch, feature_ch, 1, bias=False)
+
+    def _extract(self, left_rgb, right_rgb):
+        imgs = torch.cat([left_rgb * 255.0, right_rgb * 255.0], dim=0)
+        out  = self.ffs_feature(imgs)
+        B    = left_rgb.shape[0]
+        fl   = self.feat_adapter(out[0][:B])
+        fr   = self.feat_adapter(out[0][B:])
+        return fl.contiguous(), fr.contiguous()
+
+    def forward(self, left_rgb, right_rgb, rs_disp, conf,
+                num_iters=None, test_mode=False):
+        iters = num_iters if num_iters is not None else self.num_iters
+        B, _, H, W = left_rgb.shape
+        Hq, Wq = H // 4, W // 4
+
+        feat_left, feat_right = self._extract(left_rgb, right_rgb)
+        gwc_volume = build_gwc_volume(feat_left, feat_right,
+                                      self.max_disp_q4, self.num_groups)
+
+        rs_q4   = F.interpolate(rs_disp, (Hq, Wq), mode='bilinear',
+                                align_corners=False) * 0.25
+        conf_q4 = F.interpolate(conf,    (Hq, Wq), mode='bilinear',
+                                align_corners=False)
+        prior   = torch.cat([rs_q4, conf_q4], dim=1)
+
+        coarse_disp = self.coarse_head(gwc_volume)
+        has_sensor  = (conf_q4 > 0).float()
+        disp = has_sensor * rs_q4 + (1.0 - has_sensor) * coarse_disp
+
+        net, inp, att = self.context_net(feat_left)
+
+        disp_preds = []
+        for i in range(iters):
+            disp   = disp.detach()
+            corr   = self.corr_lookup(gwc_volume, disp)
+            motion = self.motion_encoder(disp, corr, prior)
+            net    = self.gru(net, torch.cat([motion, inp * att], dim=1))
+            disp   = disp + self.disp_head(net)
+            if test_mode and i < iters - 1:
+                continue
+            disp_preds.append(self.upsample(net, disp))
+
+        return disp_preds[-1] if test_mode else (coarse_disp, disp_preds)
+
+
+# ---------------------------------------------------------------------------
+# Factory
+# ---------------------------------------------------------------------------
+
+_D1_CLASSES = {
+    'no_prior':   D1_NoRsPrior,
+    'spx':        D1_SPX,
+    'selgru':     D1_SelGRU,
+    'rich_volume': D1_RichVolume,
+    'edgenext':   D1_EdgeNeXt,
+}
+
+
+def build_d1_model(variant: str, max_disp: int = 192,
+                   ffs_ckpt: str = None,
+                   num_iters: int = None,
+                   feature_ch: int = None) -> SmartLiteStereo:
+    """Build a Direction-1 hybrid model with L config.
+
+    Args:
+        variant:    one of no_prior | spx | selgru | rich_volume | edgenext
+        max_disp:   maximum disparity (default 192)
+        ffs_ckpt:   path to FFS checkpoint (required for edgenext)
+        num_iters:  GRU iterations override (default: L config = 8).
+        feature_ch: override feature width (default: L config = 64).
+                    Use 32 for a lighter model.
+    """
+    if variant not in _D1_CLASSES:
+        raise ValueError(f"Unknown D1 variant '{variant}'. "
+                         f"Choose: {list(_D1_CLASSES)}")
+    cfg = _L_cfg()
+    kwargs = dict(
+        feature_ch=feature_ch if feature_ch is not None else cfg['feature_ch'],
+        backbone_mid1=cfg['backbone_mid1'],
+        backbone_mid2=cfg['backbone_mid2'],
+        backbone_extra_blocks=cfg['backbone_extra_blocks'],
+        hidden_dim=cfg['hidden_dim'],
+        context_dim=cfg['context_dim'],
+        context_mid=cfg['context_mid'],
+        motion_ch=cfg['motion_ch'],
+        num_groups=cfg['num_groups'],
+        corr_radius=cfg['corr_radius'],
+        num_iters=num_iters if num_iters is not None else cfg['num_iters'],
+    )
+    cls = _D1_CLASSES[variant]
+    if variant == 'edgenext':
+        return cls(ffs_ckpt=ffs_ckpt, max_disp=max_disp, **kwargs)
+    return cls(max_disp=max_disp, **kwargs)
diff --git a/niv/smart_lite/model.py b/niv/smart_lite/model.py
new file mode 100644
index 0000000..266334b
--- /dev/null
+++ b/niv/smart_lite/model.py
@@ -0,0 +1,531 @@
+"""
+Smart Lite Stereo — lightweight stereo-matching with sensor-prior fusion.
+Three sizes (S / M / L) share the same 4-stage architecture; only channel
+widths and GRU iteration counts differ.
+
+Stages:
+  1. Depthwise-separable Siamese feature backbone  (→ 1/4 res)
+  2. 8-group GWC cost volume + tiny 3-D head → coarse disparity (soft-argmax)
+  3. DS-ConvGRU iterative refinement with 1-D bilinear correlation lookup
+  4. Learned convex upsampling (3×3 mask head)
+
+Quick start:
+    from smart_lite.model import build_model
+    model = build_model("M")          # or "S" / "L"
+    model = build_model("L", max_disp=192)
+"""
+
+from __future__ import annotations
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# ---------------------------------------------------------------------------
+# Size presets
+# ---------------------------------------------------------------------------
+
+MODEL_CONFIGS = {
+    # ── SmartLite-S ─────────────────────────────────────────────────────────
+    # Baseline distillation target.  Fastest inference; fits comfortably on
+    # Jetson Orin NX 8 GB.
+    "S": dict(
+        feature_ch=32,
+        backbone_mid1=16, backbone_mid2=24, backbone_extra_blocks=0,
+        hidden_dim=16, context_dim=24, context_mid=64, motion_ch=64,
+        num_groups=8, corr_radius=4, num_iters=4,
+    ),
+    # ── SmartLite-M ─────────────────────────────────────────────────────────
+    # Better accuracy, still real-time on a desktop GPU.
+    "M": dict(
+        feature_ch=48,
+        backbone_mid1=24, backbone_mid2=36, backbone_extra_blocks=0,
+        hidden_dim=32, context_dim=48, context_mid=96, motion_ch=96,
+        num_groups=8, corr_radius=4, num_iters=6,
+    ),
+    # ── SmartLite-L ─────────────────────────────────────────────────────────
+    # Highest accuracy among the three; one extra backbone refinement block.
+    # Still far lighter than FoundationStereo.
+    "L": dict(
+        feature_ch=64,
+        backbone_mid1=32, backbone_mid2=48, backbone_extra_blocks=1,
+        hidden_dim=64, context_dim=64, context_mid=128, motion_ch=128,
+        num_groups=8, corr_radius=4, num_iters=8,
+    ),
+}
+
+
+def build_model(size: str = "S", max_disp: int = 192) -> "SmartLiteStereo":
+    """Construct a SmartLiteStereo from a named size preset (S / M / L)."""
+    if size not in MODEL_CONFIGS:
+        raise ValueError(f"Unknown model size '{size}'. Choose from: {list(MODEL_CONFIGS)}")
+    cfg = MODEL_CONFIGS[size]
+    return SmartLiteStereo(max_disp=max_disp, **cfg)
+
+
+# ---------------------------------------------------------------------------
+# Building blocks
+# ---------------------------------------------------------------------------
+
+class DepthwiseSeparableConv2d(nn.Module):
+    """Depthwise-separable convolution: depthwise 3×3 + pointwise 1×1."""
+
+    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=False):
+        super().__init__()
+        self.depthwise = nn.Conv2d(in_ch, in_ch, kernel_size, stride, padding,
+                                   groups=in_ch, bias=False)
+        self.pointwise = nn.Conv2d(in_ch, out_ch, 1, bias=bias)
+
+    def forward(self, x):
+        return self.pointwise(self.depthwise(x))
+
+
+class DSConvBnRelu(nn.Module):
+    """Depthwise-separable conv + BatchNorm + ReLU."""
+
+    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1):
+        super().__init__()
+        self.conv = DepthwiseSeparableConv2d(in_ch, out_ch, kernel_size, stride, padding)
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        return self.relu(self.bn(self.conv(x)))
+
+
+# ---------------------------------------------------------------------------
+# Stage 1 — Feature Backbone (Siamese, depthwise-separable)
+# ---------------------------------------------------------------------------
+
+class FeatureBackbone(nn.Module):
+    """
+    Vertical-stack Siamese backbone.  Accepts *stacked* left+right images
+    ``[B, 3, 2*H, W]`` and produces per-image features at 1/4 resolution.
+
+    Architecture:
+        stem   (3 → mid1_ch, stride 2)  → 1/2 res
+        stage1 (mid1_ch → mid2_ch, stride 2) → 1/4 res
+        stage2 (mid2_ch → feature_ch, stride 1) × (2 + extra_blocks) → 1/4 res
+    """
+
+    def __init__(
+        self,
+        out_ch: int = 32,
+        mid1_ch: int = 16,
+        mid2_ch: int = 24,
+        extra_blocks: int = 0,
+    ):
+        super().__init__()
+        self.stem   = DSConvBnRelu(3,       mid1_ch, stride=2)
+        self.stage1 = DSConvBnRelu(mid1_ch, mid2_ch, stride=2)
+        blocks = [
+            DSConvBnRelu(mid2_ch, out_ch, stride=1),
+            DSConvBnRelu(out_ch,  out_ch, stride=1),
+        ]
+        for _ in range(extra_blocks):
+            blocks.append(DSConvBnRelu(out_ch, out_ch, stride=1))
+        self.stage2 = nn.Sequential(*blocks)
+
+    def forward(self, left_rgb, right_rgb):
+        stacked = torch.cat([left_rgb, right_rgb], dim=2)   # [B, 3, 2H, W]
+        x = self.stage2(self.stage1(self.stem(stacked)))    # [B, out_ch, H/2, W/4]
+        feat_left, feat_right = x.chunk(2, dim=2)
+        return feat_left.contiguous(), feat_right.contiguous()
+
+
+# ---------------------------------------------------------------------------
+# Stage 1b — Group-wise Correlation (GWC) Volume
+# ---------------------------------------------------------------------------
+
+def build_gwc_volume(feat_left, feat_right, max_disp, num_groups=8):
+    """
+    Build a group-wise correlation volume.
+
+    Returns:
+        volume: [B, num_groups, max_disp, H, W]
+    """
+    B, C, H, W = feat_left.shape
+    assert C % num_groups == 0
+    cpg = C // num_groups
+
+    left  = F.normalize(feat_left,  dim=1).view(B, num_groups, cpg, H, W)
+    right = F.normalize(feat_right, dim=1).view(B, num_groups, cpg, H, W)
+
+    volume = feat_left.new_zeros(B, num_groups, max_disp, H, W)
+    for d in range(max_disp):
+        if d == 0:
+            volume[:, :, d] = (left * right).sum(dim=2)
+        else:
+            volume[:, :, d, :, d:] = (left[:, :, :, :, d:] * right[:, :, :, :, :-d]).sum(dim=2)
+    return volume
+
+
+# ---------------------------------------------------------------------------
+# Stage 2 — Coarse Disparity Regression ("Warm Start")
+# ---------------------------------------------------------------------------
+
+class CoarseDisparityHead(nn.Module):
+    """
+    Tiny 2-layer 3-D conv head → soft-argmax over disparity dimension.
+
+    Input:  GWC volume [B, G, D, H, W]
+    Output: coarse disparity [B, 1, H, W]
+    """
+
+    def __init__(self, in_ch=8, mid_ch=8, max_disp=48):
+        super().__init__()
+        self.max_disp = max_disp
+        self.conv1 = nn.Sequential(
+            nn.Conv3d(in_ch, mid_ch, kernel_size=3, padding=1),
+            nn.BatchNorm3d(mid_ch),
+            nn.ReLU(inplace=True),
+        )
+        self.conv2 = nn.Conv3d(mid_ch, 1, kernel_size=3, padding=1)
+
+    def forward(self, volume):
+        x = self.conv1(volume)
+        logits = self.conv2(x).squeeze(1)               # [B, D, H, W]
+        prob   = F.softmax(logits, dim=1)
+        disp_vals = torch.arange(0, self.max_disp, dtype=prob.dtype,
+                                 device=prob.device).view(1, -1, 1, 1)
+        return (prob * disp_vals).sum(dim=1, keepdim=True)
+
+
+# ---------------------------------------------------------------------------
+# Stage 2b — Context Network
+# ---------------------------------------------------------------------------
+
+class ContextNet(nn.Module):
+    """
+    Processes *left* features → GRU initial state + context + attention.
+
+    Architecture: 2×Conv3×3 + CAM + SAM → proj
+    Outputs: net0 [B, hidden_dim, H, W],
+             inp0 [B, context_dim, H, W],
+             att0 [B, 1, H, W]
+    """
+
+    def __init__(self, in_ch=32, hidden_dim=16, context_dim=24, mid_ch=64):
+        super().__init__()
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_ch, mid_ch, 3, padding=1), nn.BatchNorm2d(mid_ch), nn.ReLU(True),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(mid_ch, mid_ch, 3, padding=1), nn.BatchNorm2d(mid_ch), nn.ReLU(True),
+        )
+        self.proj = nn.Conv2d(mid_ch, hidden_dim + context_dim + 1, 1)
+
+        self.cam = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(mid_ch, max(1, mid_ch // 4), 1), nn.ReLU(True),
+            nn.Conv2d(max(1, mid_ch // 4), mid_ch, 1), nn.Sigmoid(),
+        )
+        self.sam = nn.Sequential(
+            nn.Conv2d(2, 1, 7, padding=3), nn.Sigmoid(),
+        )
+        self.hidden_dim  = hidden_dim
+        self.context_dim = context_dim
+
+    def forward(self, feat_left):
+        x = self.conv1(feat_left)
+        x = x * self.cam(x)
+        x = self.conv2(x)
+        sa = self.sam(torch.cat([x.mean(1, keepdim=True), x.amax(1, keepdim=True)], dim=1))
+        x = x * sa # (1,128,H/4,W/4)
+        out  = self.proj(x)
+        net0 = torch.tanh(out[:, :self.hidden_dim])
+        inp0 = torch.relu(out[:, self.hidden_dim:self.hidden_dim + self.context_dim])
+        att0 = torch.sigmoid(out[:, -1:])
+        return net0, inp0, att0
+
+
+# ---------------------------------------------------------------------------
+# Stage 3 — 1-D Bilinear Correlation Lookup
+# ---------------------------------------------------------------------------
+
+class CorrLookup1D(nn.Module):
+    """
+    Sample 2*radius+1 correlation values from the GWC volume around
+    the current disparity estimate via bilinear interpolation.
+
+    Returns: corr_features [B, G*(2*radius+1), H, W]
+    """
+
+    def __init__(self, radius=4, num_groups=8):
+        super().__init__()
+        self.radius     = radius
+        self.num_groups = num_groups
+        offsets = torch.arange(-radius, radius + 1, dtype=torch.float32)
+        self.register_buffer("offsets", offsets)
+
+    def forward(self, volume, disp):
+        B, G, D, H, W = volume.shape
+        r = self.radius
+
+        sample_d      = disp + self.offsets.view(1, -1, 1, 1)
+        sample_d_norm = 2.0 * sample_d / max(D - 1, 1) - 1.0
+
+        vol_flat  = volume.view(B * G, 1, D, H * W)
+        S         = H * W
+        n_samples = 2 * r + 1
+
+        spatial_norm = torch.linspace(-1, 1, S, device=volume.device)
+        spatial_norm = spatial_norm.view(1, 1, 1, S).expand(B, n_samples, 1, S)
+        d_norm = sample_d_norm.view(B, n_samples, H, W).reshape(B, n_samples, 1, S)
+
+        grid = torch.stack(
+            [spatial_norm.expand(B, n_samples, 1, S).reshape(B, n_samples, S),
+             d_norm.reshape(B, n_samples, S)],
+            dim=-1,
+        )
+        grid = grid.unsqueeze(1).expand(B, G, n_samples, S, 2).reshape(B * G, n_samples, S, 2)
+
+        sampled = F.grid_sample(vol_flat, grid, mode='bilinear',
+                                padding_mode='zeros', align_corners=True)
+        return sampled.view(B, G, n_samples, H, W).view(B, G * n_samples, H, W)
+
+
+# ---------------------------------------------------------------------------
+# Stage 3 — Motion Encoder + DS-ConvGRU
+# ---------------------------------------------------------------------------
+
+class MotionEncoder(nn.Module):
+    """
+    Encodes correlation features + current disparity + sensor prior
+    into a motion feature for the GRU.
+
+    motion_ch controls the internal width of all three paths.
+    """
+
+    def __init__(self, corr_ch, prior_ch=2, hidden_dim=16, motion_ch=64):
+        super().__init__()
+        disp_ch  = max(4, motion_ch // 4)
+        prior_w  = max(4, motion_ch // 4)
+        fuse_in  = motion_ch + disp_ch + prior_w
+
+        self.corr_net = nn.Sequential(
+            nn.Conv2d(corr_ch, motion_ch, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(motion_ch, motion_ch, 3, padding=1),
+            nn.ReLU(inplace=True),
+        )
+        self.disp_net = nn.Sequential(
+            nn.Conv2d(1, disp_ch, 7, padding=3),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(disp_ch, disp_ch, 3, padding=1),
+            nn.ReLU(inplace=True),
+        )
+        self.prior_net = nn.Sequential(
+            nn.Conv2d(prior_ch, prior_w, 3, padding=1),
+            nn.ReLU(inplace=True),
+        )
+        self.fuse = nn.Conv2d(fuse_in, hidden_dim - 1, 1)
+
+    def forward(self, disp, corr, prior):
+        c = self.corr_net(corr)
+        d = self.disp_net(disp)
+        p = self.prior_net(prior)
+        fused = F.relu(self.fuse(torch.cat([c, d, p], dim=1)))
+        return torch.cat([fused, disp], dim=1)
+
+
+class DepthwiseSeparableConvGRU(nn.Module):
+    """ConvGRU cell using depthwise-separable convolutions."""
+
+    def __init__(self, hidden_dim, input_dim, kernel_size=3):
+        super().__init__()
+        total = hidden_dim + input_dim
+        pad   = kernel_size // 2
+        self.convz = DepthwiseSeparableConv2d(total, hidden_dim, kernel_size, padding=pad)
+        self.convr = DepthwiseSeparableConv2d(total, hidden_dim, kernel_size, padding=pad)
+        self.convq = DepthwiseSeparableConv2d(total, hidden_dim, kernel_size, padding=pad)
+
+    def forward(self, h, x):
+        hx = torch.cat([h, x], dim=1)
+        z  = torch.sigmoid(self.convz(hx))
+        r  = torch.sigmoid(self.convr(hx))
+        q  = torch.tanh(self.convq(torch.cat([r * h, x], dim=1)))
+        return (1 - z) * h + z * q
+
+
+class DispHead(nn.Module):
+    """Predicts a residual disparity delta from GRU hidden state."""
+
+    def __init__(self, hidden_dim):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(hidden_dim, hidden_dim, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(hidden_dim, 1, 3, padding=1),
+        )
+
+    def forward(self, h):
+        return self.conv(h)
+
+
+# ---------------------------------------------------------------------------
+# Stage 4 — Learned Convex Upsampling (3×3 mask head)
+# ---------------------------------------------------------------------------
+
+class ConvexUpsample(nn.Module):
+    """
+    Predict 9 softmax weights per coarse pixel for content-adaptive 4× upsampling.
+    """
+
+    def __init__(self, hidden_dim, upsample_factor=4):
+        super().__init__()
+        self.factor    = upsample_factor
+        self.mask_head = nn.Conv2d(hidden_dim, upsample_factor ** 2 * 9, 3, padding=1)
+
+    def forward(self, h, disp_low):
+        B, _, H, W = disp_low.shape
+        f = self.factor
+
+        mask = self.mask_head(h).view(B, 1, 9, f, f, H, W)
+        mask = F.softmax(mask, dim=2)
+
+        disp_unfold = F.unfold(disp_low * f, 3, padding=1).view(B, 1, 9, 1, 1, H, W)
+        up = (mask * disp_unfold).sum(dim=2)
+        return up.permute(0, 1, 4, 2, 5, 3).reshape(B, 1, H * f, W * f)
+
+
+# ---------------------------------------------------------------------------
+# Full Model
+# ---------------------------------------------------------------------------
+
+class SmartLiteStereo(nn.Module):
+    """
+    Smart Lite Stereo network.  Use ``build_model(size)`` for the standard
+    S / M / L presets, or construct directly for custom configurations.
+
+    Args:
+        max_disp:              max disparity at full resolution (default 192)
+        feature_ch:            backbone output channels
+        backbone_mid1:         backbone stem output channels
+        backbone_mid2:         backbone stage-1 output channels
+        backbone_extra_blocks: extra refinement blocks in backbone stage-2
+        hidden_dim:            GRU hidden-state channels
+        context_dim:           context channels injected each GRU iteration
+        context_mid:           internal width of ContextNet convolutions
+        motion_ch:             internal width of MotionEncoder paths
+        num_groups:            GWC correlation groups
+        corr_radius:           1-D lookup radius (2r+1 samples per pixel)
+        num_iters:             default GRU iterations
+    """
+
+    def __init__(
+        self,
+        max_disp: int = 192,
+        feature_ch: int = 32,
+        backbone_mid1: int = 16,
+        backbone_mid2: int = 24,
+        backbone_extra_blocks: int = 0,
+        hidden_dim: int = 16,
+        context_dim: int = 24,
+        context_mid: int = 64,
+        motion_ch: int = 64,
+        num_groups: int = 8,
+        corr_radius: int = 4,
+        num_iters: int = 4,
+    ):
+        super().__init__()
+        self.max_disp    = max_disp
+        self.max_disp_q4 = max_disp // 4
+        self.hidden_dim  = hidden_dim
+        self.context_dim = context_dim
+        self.num_groups  = num_groups
+        self.num_iters   = num_iters
+
+        self.backbone = FeatureBackbone(
+            out_ch=feature_ch,
+            mid1_ch=backbone_mid1,
+            mid2_ch=backbone_mid2,
+            extra_blocks=backbone_extra_blocks,
+        )
+        self.coarse_head = CoarseDisparityHead(
+            in_ch=num_groups, mid_ch=num_groups, max_disp=self.max_disp_q4,
+        )
+        self.context_net = ContextNet(
+            in_ch=feature_ch, hidden_dim=hidden_dim,
+            context_dim=context_dim, mid_ch=context_mid,
+        )
+
+        corr_ch = num_groups * (2 * corr_radius + 1)
+        self.motion_encoder = MotionEncoder(
+            corr_ch=corr_ch, prior_ch=2,
+            hidden_dim=hidden_dim, motion_ch=motion_ch,
+        )
+        self.gru = DepthwiseSeparableConvGRU(
+            hidden_dim=hidden_dim, input_dim=hidden_dim + context_dim,
+        )
+        self.disp_head  = DispHead(hidden_dim)
+        self.corr_lookup = CorrLookup1D(radius=corr_radius, num_groups=num_groups)
+        self.upsample    = ConvexUpsample(hidden_dim=hidden_dim, upsample_factor=4)
+
+    # ------------------------------------------------------------------
+
+    def param_count(self) -> int:
+        return sum(p.numel() for p in self.parameters())
+
+    # ------------------------------------------------------------------
+
+    def forward(
+        self,
+        left_rgb: torch.Tensor,
+        right_rgb: torch.Tensor,
+        rs_disp: torch.Tensor,
+        conf: torch.Tensor,
+        num_iters: int | None = None,
+        test_mode: bool = False,
+    ):
+        """
+        Args:
+            left_rgb:  [B, 3, H, W]
+            right_rgb: [B, 3, H, W]
+            rs_disp:   [B, 1, H, W]   RealSense raw disparity (0 = invalid)
+            conf:      [B, 1, H, W]   confidence (0 = invalid)
+            num_iters: override self.num_iters at inference
+            test_mode: True → return only final full-res disparity
+        Returns:
+            test_mode=True  → disparity [B, 1, H, W]
+            test_mode=False → (coarse [B,1,H/4,W/4], list of full-res preds)
+        """
+        iters = num_iters if num_iters is not None else self.num_iters
+        B, _, H, W = left_rgb.shape
+        Hq, Wq = H // 4, W // 4
+
+        feat_left, feat_right = self.backbone(left_rgb, right_rgb)
+        gwc_volume = build_gwc_volume(feat_left, feat_right,
+                                      self.max_disp_q4, self.num_groups)
+
+        rs_disp_q4 = F.interpolate(rs_disp, (Hq, Wq), mode='bilinear',
+                                   align_corners=False) * 0.25
+        conf_q4    = F.interpolate(conf,    (Hq, Wq), mode='bilinear',
+                                   align_corners=False)
+        prior = torch.cat([rs_disp_q4, conf_q4], dim=1)
+
+        coarse_disp = self.coarse_head(gwc_volume)
+        has_sensor  = (conf_q4 > 0).float()
+        disp = has_sensor * rs_disp_q4 + (1.0 - has_sensor) * coarse_disp
+
+        net, inp, att = self.context_net(feat_left)
+
+        disp_preds = []
+        for i in range(iters):
+            disp  = disp.detach()
+            corr  = self.corr_lookup(gwc_volume, disp)
+            motion = self.motion_encoder(disp, corr, prior)
+            net   = self.gru(net, torch.cat([motion, inp * att], dim=1))
+            disp  = disp + self.disp_head(net)
+
+            if test_mode and i < iters - 1:
+                continue
+
+            disp_up = self.upsample(net, disp)
+            disp_preds.append(disp_up)
+
+        if test_mode:
+            return disp_up
+
+        return coarse_disp, disp_preds
diff --git a/niv/test_server.py b/niv/test_server.py
new file mode 100644
index 0000000..7f40fd8
--- /dev/null
+++ b/niv/test_server.py
@@ -0,0 +1,195 @@
+import os
+import socket
+import threading
+import unittest
+import socketserver
+import cv2
+import numpy as np
+from niv.image_server import send_msg, recv_msg, ImageHandler
+
+_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+_FIXTURE_LEFT = os.path.join(_TEST_DIR, 'sample', 'left.png')
+
+
+class TestProtocolHelpers(unittest.TestCase):
+    def _pair(self):
+        return socket.socketpair()
+
+    def test_roundtrip_small(self):
+        a, b = self._pair()
+        try:
+            payload = b'hello world'
+            send_msg(a, payload)
+            result = recv_msg(b)
+            self.assertEqual(result, payload)
+        finally:
+            a.close()
+            b.close()
+
+    def test_roundtrip_binary(self):
+        a, b = self._pair()
+        try:
+            payload = bytes(range(256)) * 100
+            send_msg(a, payload)
+            result = recv_msg(b)
+            self.assertEqual(result, payload)
+        finally:
+            a.close()
+            b.close()
+
+    def test_recv_returns_none_on_closed_socket(self):
+        a, b = self._pair()
+        a.close()
+        result = recv_msg(b)
+        self.assertIsNone(result)
+        b.close()
+
+
+def _make_server() -> socketserver.ThreadingTCPServer:
+    socketserver.ThreadingTCPServer.allow_reuse_address = True
+    server = socketserver.ThreadingTCPServer(('127.0.0.1', 0), ImageHandler)
+    return server
+
+
+class TestImageServer(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.server = _make_server()
+        cls.port = cls.server.server_address[1]
+        cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True)
+        cls.thread.start()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.server.shutdown()
+        cls.thread.join(timeout=2)
+
+    def _connect(self) -> socket.socket:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.connect(('127.0.0.1', self.port))
+        return sock
+
+    def _send_image(self, sock: socket.socket, img: np.ndarray) -> None:
+        _, jpeg = cv2.imencode('.jpg', img)
+        send_msg(sock, jpeg.tobytes())
+
+    def _recv_gray(self, sock: socket.socket) -> np.ndarray:
+        data = recv_msg(sock)
+        self.assertIsNotNone(data, 'Server returned no data')
+        gray = cv2.imdecode(np.frombuffer(data, dtype=np.uint8), cv2.IMREAD_GRAYSCALE)
+        self.assertIsNotNone(gray, 'Response could not be decoded as image')
+        return gray
+
+    def test_synthetic_image_becomes_grayscale(self):
+        img = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
+        with self._connect() as sock:
+            self._send_image(sock, img)
+            gray = self._recv_gray(sock)
+        self.assertEqual(gray.shape, (100, 100))
+
+    def test_multiple_images_same_connection(self):
+        with self._connect() as sock:
+            for _ in range(3):
+                img = np.random.randint(0, 256, (64, 64, 3), dtype=np.uint8)
+                self._send_image(sock, img)
+                gray = self._recv_gray(sock)
+                self.assertEqual(gray.shape, (64, 64))
+
+    @unittest.skipUnless(os.path.exists(_FIXTURE_LEFT), 'sample/left.png fixture not present')
+    def test_real_image(self):
+        img = cv2.imread(_FIXTURE_LEFT)
+        self.assertIsNotNone(img, 'sample/left.png must be readable')
+        h, w = img.shape[:2]
+        with self._connect() as sock:
+            self._send_image(sock, img)
+            gray = self._recv_gray(sock)
+        self.assertEqual(gray.shape, (h, w))
+
+
+class TestImageClient(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.server = _make_server()
+        cls.port = cls.server.server_address[1]
+        cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True)
+        cls.thread.start()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.server.shutdown()
+        cls.thread.join(timeout=2)
+
+    def test_client_process_returns_grayscale(self):
+        from niv.image_client import ImageClient
+        img = np.random.randint(0, 256, (80, 80, 3), dtype=np.uint8)
+        with ImageClient('127.0.0.1', self.port) as client:
+            gray = client.process(img)
+        self.assertEqual(gray.shape, (80, 80))
+
+    def test_client_process_multiple_images(self):
+        from niv.image_client import ImageClient
+        with ImageClient('127.0.0.1', self.port) as client:
+            for size in [32, 64, 128]:
+                img = np.random.randint(0, 256, (size, size, 3), dtype=np.uint8)
+                gray = client.process(img)
+                self.assertEqual(gray.shape, (size, size))
+
+
+class TestImageClientWebcam(unittest.TestCase):
+    """Interactive webcam test.
+
+    Disabled by default because it requires a physical camera and a display.
+    Enable by setting the environment variable NIV_WEBCAM_TEST=1.
+    Press 'q' in the display window to finish the test.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        #if os.environ.get('NIV_WEBCAM_TEST') != '1':
+        #    raise unittest.SkipTest('Set NIV_WEBCAM_TEST=1 to run the webcam test')
+        cls.server = _make_server()
+        cls.port = cls.server.server_address[1]
+        cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True)
+        cls.thread.start()
+
+    @classmethod
+    def tearDownClass(cls):
+        #if os.environ.get('NIV_WEBCAM_TEST') != '1':
+        #    return
+        cls.server.shutdown()
+        cls.thread.join(timeout=2)
+
+    def test_webcam_roundtrip_display(self):
+        from niv.image_client import ImageClient
+        cam_index = int(os.environ.get('NIV_WEBCAM_INDEX', '0'))
+        cap = cv2.VideoCapture(cam_index)
+        if not cap.isOpened():
+            self.skipTest(f'Cannot open webcam at index {cam_index}')
+
+        frames_processed = 0
+        try:
+            with ImageClient('127.0.0.1', self.port) as client:
+                while True:
+                    ret, frame = cap.read()
+                    if not ret:
+                        self.fail('Failed to capture frame from webcam')
+                    gray = client.process(frame)
+                    self.assertEqual(gray.shape, frame.shape[:2])
+                    cv2.imshow('Webcam (original)', frame)
+                    cv2.imshow('Server response (grayscale)', gray)
+                    frames_processed += 1
+                    if cv2.waitKey(1) & 0xFF == ord('q'):
+                        break
+        finally:
+            cap.release()
+            cv2.destroyAllWindows()
+
+        self.assertGreater(frames_processed, 0)
+
+
+if __name__ == '__main__':
+    #unittest.main()
+    t = TestImageClientWebcam()
+    t.setUpClass()
+    t.test_webcam_roundtrip_display()
+    t.tearDownClass()
diff --git a/reports/faro_benchmark/close_range_analysis.png b/reports/faro_benchmark/close_range_analysis.png
new file mode 100644
index 0000000..547bead
Binary files /dev/null and b/reports/faro_benchmark/close_range_analysis.png differ
diff --git a/reports/faro_benchmark/coverage_heatmaps.png b/reports/faro_benchmark/coverage_heatmaps.png
new file mode 100644
index 0000000..4e84b62
Binary files /dev/null and b/reports/faro_benchmark/coverage_heatmaps.png differ
diff --git a/reports/faro_benchmark/depth_comparison.png b/reports/faro_benchmark/depth_comparison.png
new file mode 100644
index 0000000..50eeb3d
Binary files /dev/null and b/reports/faro_benchmark/depth_comparison.png differ
diff --git a/reports/faro_benchmark/distance_error_curve.png b/reports/faro_benchmark/distance_error_curve.png
new file mode 100644
index 0000000..a30bfe9
Binary files /dev/null and b/reports/faro_benchmark/distance_error_curve.png differ
diff --git a/reports/faro_benchmark/error_histograms.png b/reports/faro_benchmark/error_histograms.png
new file mode 100644
index 0000000..3992406
Binary files /dev/null and b/reports/faro_benchmark/error_histograms.png differ
diff --git a/reports/faro_benchmark/error_maps.png b/reports/faro_benchmark/error_maps.png
new file mode 100644
index 0000000..24647a2
Binary files /dev/null and b/reports/faro_benchmark/error_maps.png differ
diff --git a/reports/faro_benchmark/index.html b/reports/faro_benchmark/index.html
new file mode 100644
index 0000000..7633a64
--- /dev/null
+++ b/reports/faro_benchmark/index.html
@@ -0,0 +1,141 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Depth Quality Report — rs-enhanced-depth</title>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, -apple-system, sans-serif; background: #f0f2f5;
+            color: #222; max-width: 1200px; margin: 0 auto; padding: 24px 16px; }
+    h1 { font-size: 1.8em; color: #1a2e4a; margin-bottom: 4px; }
+    .subtitle { color: #555; font-size: .95em; margin-bottom: 8px; }
+    h2 { font-size: 1.15em; color: #1a2e4a; border-bottom: 2px solid #0f3460;
+          padding-bottom: 6px; margin-bottom: 16px; }
+    .meta { background: white; border-radius: 10px; padding: 16px 20px;
+             margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }
+    .meta table { border-collapse: collapse; width: 100%; font-size: .9em; }
+    .meta td { padding: 5px 12px; }
+    .meta tr:nth-child(even) { background: #f5f7fa; }
+    .section { background: white; border-radius: 10px; padding: 20px 24px;
+                margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }
+    .figure-wrapper { text-align: center; }
+    .figure-wrapper img { max-width: 100%; height: auto;
+                           border: 1px solid #e0e0e0; border-radius: 6px; }
+    .caption { font-size: .83em; color: #666; margin-top: 10px; }
+    .legend { background: #e8f4fd; border: 1px solid #b3d7f0; border-radius: 10px;
+               padding: 18px 22px; margin-bottom: 24px; }
+    .legend h3 { font-size: 1em; color: #1a5276; margin-bottom: 10px; }
+    .legend table { border-collapse: collapse; width: 100%; font-size: .85em; }
+    .legend td { padding: 5px 10px; vertical-align: top; }
+    .legend td:first-child { white-space: nowrap; width: 110px; }
+    .legend tr:nth-child(even) { background: rgba(255,255,255,.5); }
+    footer { text-align: center; color: #999; font-size: .8em; padding: 24px 0 8px; }
+    .pdf-btn {
+      display: inline-flex; align-items: center; gap: 7px;
+      background: #0f3460; color: white; border: none; border-radius: 7px;
+      padding: 9px 20px; font-size: .95em; font-weight: 600; cursor: pointer;
+      margin: 12px 0 20px; text-decoration: none; transition: background .15s;
+    }
+    .pdf-btn:hover { background: #16213e; }
+    @media print {
+      .pdf-btn { display: none; }
+      body { background: white; padding: 0; }
+      .section { box-shadow: none; border: 1px solid #ddd; break-inside: avoid; }
+      .meta { box-shadow: none; border: 1px solid #ddd; }
+    }
+  </style>
+</head>
+<body>
+  <h1>Depth Quality Report</h1>
+  <p class="subtitle">rs-enhanced-depth — multi-method depth quality analysis</p>
+  <button class="pdf-btn" onclick="window.print()">&#x1F4E5; Export as PDF</button>
+  <div class="meta"><table><tr><td><b>Source</b></td><td>FARO dataset (/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE)</td></tr><tr><td><b>Frames</b></td><td>702</td></tr><tr><td><b>Resolution</b></td><td>1280×720</td></tr><tr><td><b>Ground truth</b></td><td>FARO GT</td></tr><tr><td><b>Methods evaluated</b></td><td>FARO GT, Fine-tuned on FARO, Original model</td></tr><tr><td><b>Generated</b></td><td>2026-03-29 15:13:49</td></tr></table></div>
+
+    <div class="section">
+      <h2>Model Configuration</h2>
+      <table style="border-collapse:collapse;width:100%;font-size:.9em;">
+        <tr style="background:#2c3e50;color:white;font-weight:bold;"><td style="padding:6px 12px;">Method</td><td style="padding:6px 12px;">max_disp</td><td style="padding:6px 12px;">valid_iters</td><td style="padding:6px 12px;">Engine Resolution</td><td style="padding:6px 12px;">Engine Dir</td></tr>
+        <tr><td style="padding:6px 12px;">Original model</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;font-size:.8em;">—</td></tr>
+        <tr><td style="padding:6px 12px;">Fine-tuned on FARO</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;font-size:.8em;">—</td></tr>
+      </table>
+    </div>
+
+    <div class="section">
+      <h2>Depth Comparison</h2>
+      <div class="figure-wrapper">
+        <img src="depth_comparison.png" alt="Depth Comparison">
+        <p class="caption">Side-by-side depth maps from a single representative frame. Invalid pixels are black.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Error Maps</h2>
+      <div class="figure-wrapper">
+        <img src="error_maps.png" alt="Error Maps">
+        <p class="caption">Per-pixel absolute error |pred − GT| clipped at 0.5 m. Brighter = more error.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Coverage Heatmaps</h2>
+      <div class="figure-wrapper">
+        <img src="coverage_heatmaps.png" alt="Coverage Heatmaps">
+        <p class="caption">Fraction of frames each pixel has valid depth, averaged over all benchmark frames.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Distance Error Curve</h2>
+      <div class="figure-wrapper">
+        <img src="distance_error_curve.png" alt="Distance Error Curve">
+        <p class="caption">Mean Absolute Error (MAE) broken down by distance range.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Error Histograms</h2>
+      <div class="figure-wrapper">
+        <img src="error_histograms.png" alt="Error Histograms">
+        <p class="caption">Distribution of per-pixel absolute errors from the stored visualisation frames.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Summary Table</h2>
+      <div class="figure-wrapper">
+        <img src="summary_table.png" alt="Summary Table">
+        <p class="caption">Aggregate quality metrics — see legend below the table for column explanations.</p>
+      </div>
+    </div>
+    <div class="legend">
+      <h3>How to Read the Summary Table</h3>
+      <table>
+        <tr><td><b>MRE* (%%)</b></td>
+            <td><b>Overall score (recommended).</b> Mean Relative Error with hole penalty &mdash; pixels where the method has no depth but ground truth does count as 100%% error. This is the fairest single metric because it penalises both inaccuracy and missing coverage. <b>Lower is better.</b></td></tr>
+        <tr><td><b>MRE (%%)</b></td>
+            <td>Mean Relative Error over valid pixels only (holes ignored). 5%% means each measured pixel is ~5%% off on average. <b>Lower is better.</b></td></tr>
+        <tr><td><b>MAE (m)</b></td>
+            <td>Mean Absolute Error in meters, valid pixels only. <b>Lower is better.</b></td></tr>
+        <tr><td><b>&delta;1 (%%)</b></td>
+            <td>Percentage of valid pixels within 1.25&times; of ground truth depth. <b>Higher is better.</b> 100%% is perfect.</td></tr>
+        <tr><td><b>Coverage (%%)</b></td>
+            <td>Percentage of pixels that produced valid depth. <b>Higher is better.</b> MinZ improves this at close range (&lt;0.55m) by filling holes the hardware camera cannot see.</td></tr>
+        <tr><td><b>FPS</b></td>
+            <td>Processing speed (frames per second). <b>Higher is faster.</b></td></tr>
+        <tr><td><b>GT?</b></td>
+            <td>&starf; GT marks the ground truth method (NNDepth accurate). Its error columns show "&mdash;" because you don't compare ground truth to itself.</td></tr>
+      </table>
+    </div>
+    <div class="section">
+      <h2>Close Range Analysis</h2>
+      <div class="figure-wrapper">
+        <img src="close_range_analysis.png" alt="Close Range Analysis">
+        <p class="caption">Coverage and stability for objects closer than 0.55 m. Highlights MinZ benefit.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Timing Bars</h2>
+      <div class="figure-wrapper">
+        <img src="timing_bars.png" alt="Timing Bars">
+        <p class="caption">Processing speed in FPS. Hardware baseline is fixed at ~30 FPS (camera frame rate).</p>
+      </div>
+    </div>
+  <footer>Generated by <code>tests/depth_report/</code> on 2026-03-29 15:13:49</footer>
+</body>
+</html>
\ No newline at end of file
diff --git a/reports/faro_benchmark/results.json b/reports/faro_benchmark/results.json
new file mode 100644
index 0000000..68093c4
--- /dev/null
+++ b/reports/faro_benchmark/results.json
@@ -0,0 +1,64 @@
+{
+  "source": "FARO dataset (/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE)",
+  "n_frames": 702,
+  "resolution": "1280x720",
+  "ground_truth": "faro_gt",
+  "distance_bins": [
+    "0\u20131 m",
+    "1\u20132 m",
+    "2\u20133 m",
+    "3\u20135 m"
+  ],
+  "model_configs": {
+    "original": {
+      "model_path": "/home/adiroha/repos/Fast-FoundationStereo/scripts/../weights/20-30-48/model_best_bp2_serialize.pth"
+    },
+    "finetuned": {
+      "model_path": "/home/adiroha/repos/Fast-FoundationStereo/scripts/../weights/20-30-48/model_finetuned_faro_kitchen.pth"
+    }
+  },
+  "methods": {
+    "faro_gt": {
+      "label": "FARO GT",
+      "mae_mean": 0.0,
+      "mae_std": 0.0,
+      "rmse_mean": 0.0,
+      "mre_mean": 0.0,
+      "delta1_mean": 100.0,
+      "coverage_mean": 99.32362520773978,
+      "fps_mean": -1,
+      "time_ms_mean": 0.0,
+      "close_range_coverage": 25.356125356125357,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": true
+    },
+    "finetuned": {
+      "label": "Fine-tuned on FARO",
+      "mae_mean": 0.009571012443838975,
+      "mae_std": 0.0034880630610683075,
+      "rmse_mean": 0.03290741254795778,
+      "mre_mean": 0.005161342896292904,
+      "delta1_mean": 99.92280284364074,
+      "coverage_mean": 99.32361763389324,
+      "fps_mean": 6.596843296084722,
+      "time_ms_mean": 151.58765414262726,
+      "close_range_coverage": 25.356125356125357,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": false
+    },
+    "original": {
+      "label": "Original model",
+      "mae_mean": 0.02072368732167997,
+      "mae_std": 0.007662992898661906,
+      "rmse_mean": 0.06485350047400025,
+      "mre_mean": 0.011096837766537512,
+      "delta1_mean": 99.66651418755849,
+      "coverage_mean": 99.32362520773978,
+      "fps_mean": 6.876085113326082,
+      "time_ms_mean": 145.43159130796195,
+      "close_range_coverage": 25.356125356125357,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": false
+    }
+  }
+}
\ No newline at end of file
diff --git a/reports/faro_benchmark/summary_table.png b/reports/faro_benchmark/summary_table.png
new file mode 100644
index 0000000..f939d52
Binary files /dev/null and b/reports/faro_benchmark/summary_table.png differ
diff --git a/reports/faro_benchmark/timing_bars.png b/reports/faro_benchmark/timing_bars.png
new file mode 100644
index 0000000..cd04e94
Binary files /dev/null and b/reports/faro_benchmark/timing_bars.png differ
diff --git a/reports/faro_benchmark_office/close_range_analysis.png b/reports/faro_benchmark_office/close_range_analysis.png
new file mode 100644
index 0000000..2fd72b3
Binary files /dev/null and b/reports/faro_benchmark_office/close_range_analysis.png differ
diff --git a/reports/faro_benchmark_office/coverage_heatmaps.png b/reports/faro_benchmark_office/coverage_heatmaps.png
new file mode 100644
index 0000000..24fcb8b
Binary files /dev/null and b/reports/faro_benchmark_office/coverage_heatmaps.png differ
diff --git a/reports/faro_benchmark_office/depth_comparison.png b/reports/faro_benchmark_office/depth_comparison.png
new file mode 100644
index 0000000..492edf7
Binary files /dev/null and b/reports/faro_benchmark_office/depth_comparison.png differ
diff --git a/reports/faro_benchmark_office/distance_error_curve.png b/reports/faro_benchmark_office/distance_error_curve.png
new file mode 100644
index 0000000..8df6473
Binary files /dev/null and b/reports/faro_benchmark_office/distance_error_curve.png differ
diff --git a/reports/faro_benchmark_office/error_histograms.png b/reports/faro_benchmark_office/error_histograms.png
new file mode 100644
index 0000000..bde7364
Binary files /dev/null and b/reports/faro_benchmark_office/error_histograms.png differ
diff --git a/reports/faro_benchmark_office/error_maps.png b/reports/faro_benchmark_office/error_maps.png
new file mode 100644
index 0000000..8735645
Binary files /dev/null and b/reports/faro_benchmark_office/error_maps.png differ
diff --git a/reports/faro_benchmark_office/index.html b/reports/faro_benchmark_office/index.html
new file mode 100644
index 0000000..6c14cb4
--- /dev/null
+++ b/reports/faro_benchmark_office/index.html
@@ -0,0 +1,141 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Depth Quality Report — rs-enhanced-depth</title>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, -apple-system, sans-serif; background: #f0f2f5;
+            color: #222; max-width: 1200px; margin: 0 auto; padding: 24px 16px; }
+    h1 { font-size: 1.8em; color: #1a2e4a; margin-bottom: 4px; }
+    .subtitle { color: #555; font-size: .95em; margin-bottom: 8px; }
+    h2 { font-size: 1.15em; color: #1a2e4a; border-bottom: 2px solid #0f3460;
+          padding-bottom: 6px; margin-bottom: 16px; }
+    .meta { background: white; border-radius: 10px; padding: 16px 20px;
+             margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }
+    .meta table { border-collapse: collapse; width: 100%; font-size: .9em; }
+    .meta td { padding: 5px 12px; }
+    .meta tr:nth-child(even) { background: #f5f7fa; }
+    .section { background: white; border-radius: 10px; padding: 20px 24px;
+                margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }
+    .figure-wrapper { text-align: center; }
+    .figure-wrapper img { max-width: 100%; height: auto;
+                           border: 1px solid #e0e0e0; border-radius: 6px; }
+    .caption { font-size: .83em; color: #666; margin-top: 10px; }
+    .legend { background: #e8f4fd; border: 1px solid #b3d7f0; border-radius: 10px;
+               padding: 18px 22px; margin-bottom: 24px; }
+    .legend h3 { font-size: 1em; color: #1a5276; margin-bottom: 10px; }
+    .legend table { border-collapse: collapse; width: 100%; font-size: .85em; }
+    .legend td { padding: 5px 10px; vertical-align: top; }
+    .legend td:first-child { white-space: nowrap; width: 110px; }
+    .legend tr:nth-child(even) { background: rgba(255,255,255,.5); }
+    footer { text-align: center; color: #999; font-size: .8em; padding: 24px 0 8px; }
+    .pdf-btn {
+      display: inline-flex; align-items: center; gap: 7px;
+      background: #0f3460; color: white; border: none; border-radius: 7px;
+      padding: 9px 20px; font-size: .95em; font-weight: 600; cursor: pointer;
+      margin: 12px 0 20px; text-decoration: none; transition: background .15s;
+    }
+    .pdf-btn:hover { background: #16213e; }
+    @media print {
+      .pdf-btn { display: none; }
+      body { background: white; padding: 0; }
+      .section { box-shadow: none; border: 1px solid #ddd; break-inside: avoid; }
+      .meta { box-shadow: none; border: 1px solid #ddd; }
+    }
+  </style>
+</head>
+<body>
+  <h1>Depth Quality Report</h1>
+  <p class="subtitle">rs-enhanced-depth — multi-method depth quality analysis</p>
+  <button class="pdf-btn" onclick="window.print()">&#x1F4E5; Export as PDF</button>
+  <div class="meta"><table><tr><td><b>Source</b></td><td>FARO dataset (/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE)</td></tr><tr><td><b>Frames</b></td><td>445</td></tr><tr><td><b>Resolution</b></td><td>1280×720</td></tr><tr><td><b>Ground truth</b></td><td>FARO GT</td></tr><tr><td><b>Methods evaluated</b></td><td>FARO GT, Fine-tuned on FARO, Original model</td></tr><tr><td><b>Generated</b></td><td>2026-04-12 14:19:16</td></tr></table></div>
+
+    <div class="section">
+      <h2>Model Configuration</h2>
+      <table style="border-collapse:collapse;width:100%;font-size:.9em;">
+        <tr style="background:#2c3e50;color:white;font-weight:bold;"><td style="padding:6px 12px;">Method</td><td style="padding:6px 12px;">max_disp</td><td style="padding:6px 12px;">valid_iters</td><td style="padding:6px 12px;">Engine Resolution</td><td style="padding:6px 12px;">Engine Dir</td></tr>
+        <tr><td style="padding:6px 12px;">Original model</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;font-size:.8em;">—</td></tr>
+        <tr><td style="padding:6px 12px;">Fine-tuned on FARO</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;font-size:.8em;">—</td></tr>
+      </table>
+    </div>
+
+    <div class="section">
+      <h2>Depth Comparison</h2>
+      <div class="figure-wrapper">
+        <img src="depth_comparison.png" alt="Depth Comparison">
+        <p class="caption">Side-by-side depth maps from a single representative frame. Invalid pixels are black.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Error Maps</h2>
+      <div class="figure-wrapper">
+        <img src="error_maps.png" alt="Error Maps">
+        <p class="caption">Per-pixel absolute error |pred − GT| clipped at 0.5 m. Brighter = more error.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Coverage Heatmaps</h2>
+      <div class="figure-wrapper">
+        <img src="coverage_heatmaps.png" alt="Coverage Heatmaps">
+        <p class="caption">Fraction of frames each pixel has valid depth, averaged over all benchmark frames.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Distance Error Curve</h2>
+      <div class="figure-wrapper">
+        <img src="distance_error_curve.png" alt="Distance Error Curve">
+        <p class="caption">Mean Absolute Error (MAE) broken down by distance range.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Error Histograms</h2>
+      <div class="figure-wrapper">
+        <img src="error_histograms.png" alt="Error Histograms">
+        <p class="caption">Distribution of per-pixel absolute errors from the stored visualisation frames.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Summary Table</h2>
+      <div class="figure-wrapper">
+        <img src="summary_table.png" alt="Summary Table">
+        <p class="caption">Aggregate quality metrics — see legend below the table for column explanations.</p>
+      </div>
+    </div>
+    <div class="legend">
+      <h3>How to Read the Summary Table</h3>
+      <table>
+        <tr><td><b>MRE* (%%)</b></td>
+            <td><b>Overall score (recommended).</b> Mean Relative Error with hole penalty &mdash; pixels where the method has no depth but ground truth does count as 100%% error. This is the fairest single metric because it penalises both inaccuracy and missing coverage. <b>Lower is better.</b></td></tr>
+        <tr><td><b>MRE (%%)</b></td>
+            <td>Mean Relative Error over valid pixels only (holes ignored). 5%% means each measured pixel is ~5%% off on average. <b>Lower is better.</b></td></tr>
+        <tr><td><b>MAE (m)</b></td>
+            <td>Mean Absolute Error in meters, valid pixels only. <b>Lower is better.</b></td></tr>
+        <tr><td><b>&delta;1 (%%)</b></td>
+            <td>Percentage of valid pixels within 1.25&times; of ground truth depth. <b>Higher is better.</b> 100%% is perfect.</td></tr>
+        <tr><td><b>Coverage (%%)</b></td>
+            <td>Percentage of pixels that produced valid depth. <b>Higher is better.</b> MinZ improves this at close range (&lt;0.55m) by filling holes the hardware camera cannot see.</td></tr>
+        <tr><td><b>FPS</b></td>
+            <td>Processing speed (frames per second). <b>Higher is faster.</b></td></tr>
+        <tr><td><b>GT?</b></td>
+            <td>&starf; GT marks the ground truth method (NNDepth accurate). Its error columns show "&mdash;" because you don't compare ground truth to itself.</td></tr>
+      </table>
+    </div>
+    <div class="section">
+      <h2>Close Range Analysis</h2>
+      <div class="figure-wrapper">
+        <img src="close_range_analysis.png" alt="Close Range Analysis">
+        <p class="caption">Coverage and stability for objects closer than 0.55 m. Highlights MinZ benefit.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Timing Bars</h2>
+      <div class="figure-wrapper">
+        <img src="timing_bars.png" alt="Timing Bars">
+        <p class="caption">Processing speed in FPS. Hardware baseline is fixed at ~30 FPS (camera frame rate).</p>
+      </div>
+    </div>
+  <footer>Generated by <code>tests/depth_report/</code> on 2026-04-12 14:19:16</footer>
+</body>
+</html>
\ No newline at end of file
diff --git a/reports/faro_benchmark_office/results.json b/reports/faro_benchmark_office/results.json
new file mode 100644
index 0000000..30ba0a2
--- /dev/null
+++ b/reports/faro_benchmark_office/results.json
@@ -0,0 +1,64 @@
+{
+  "source": "FARO dataset (/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE)",
+  "n_frames": 445,
+  "resolution": "1280x720",
+  "ground_truth": "faro_gt",
+  "distance_bins": [
+    "0\u20131 m",
+    "1\u20132 m",
+    "2\u20133 m",
+    "3\u20135 m"
+  ],
+  "model_configs": {
+    "original": {
+      "model_path": "/home/adiroha/repos/Fast-FoundationStereo/scripts/../weights/20-30-48/model_best_bp2_serialize.pth"
+    },
+    "finetuned": {
+      "model_path": "/home/adiroha/repos/Fast-FoundationStereo/scripts/../weights/20-30-48/model_finetuned_faro_kitchen.pth"
+    }
+  },
+  "methods": {
+    "faro_gt": {
+      "label": "FARO GT",
+      "mae_mean": 0.0,
+      "mae_std": 0.0,
+      "rmse_mean": 0.0,
+      "mre_mean": 0.0,
+      "delta1_mean": 100.0,
+      "coverage_mean": 99.13976791705682,
+      "fps_mean": -1,
+      "time_ms_mean": 0.0,
+      "close_range_coverage": 8.539325842696629,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": true
+    },
+    "finetuned": {
+      "label": "Fine-tuned on FARO",
+      "mae_mean": 0.017334664322101,
+      "mae_std": 0.0018994669092165116,
+      "rmse_mean": 0.06851478966601779,
+      "mre_mean": 0.010713689330588565,
+      "delta1_mean": 99.3566269030714,
+      "coverage_mean": 99.13976084581773,
+      "fps_mean": 6.591002457733087,
+      "time_ms_mean": 151.72198863720962,
+      "close_range_coverage": 8.539325842696629,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": false
+    },
+    "original": {
+      "label": "Original model",
+      "mae_mean": 0.019633635941348718,
+      "mae_std": 0.0025874206605145706,
+      "rmse_mean": 0.07497013829899637,
+      "mre_mean": 0.012076542543226414,
+      "delta1_mean": 99.20257452337185,
+      "coverage_mean": 99.13975767595193,
+      "fps_mean": 6.838218937662999,
+      "time_ms_mean": 146.2369089255507,
+      "close_range_coverage": 8.539325842696629,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": false
+    }
+  }
+}
\ No newline at end of file
diff --git a/reports/faro_benchmark_office/summary_table.png b/reports/faro_benchmark_office/summary_table.png
new file mode 100644
index 0000000..7a156d5
Binary files /dev/null and b/reports/faro_benchmark_office/summary_table.png differ
diff --git a/reports/faro_benchmark_office/timing_bars.png b/reports/faro_benchmark_office/timing_bars.png
new file mode 100644
index 0000000..b0be1d1
Binary files /dev/null and b/reports/faro_benchmark_office/timing_bars.png differ
diff --git a/reports/inbolt_benchmark/close_range_analysis.png b/reports/inbolt_benchmark/close_range_analysis.png
new file mode 100644
index 0000000..e543bb0
Binary files /dev/null and b/reports/inbolt_benchmark/close_range_analysis.png differ
diff --git a/reports/inbolt_benchmark/coverage_heatmaps.png b/reports/inbolt_benchmark/coverage_heatmaps.png
new file mode 100644
index 0000000..273f28c
Binary files /dev/null and b/reports/inbolt_benchmark/coverage_heatmaps.png differ
diff --git a/reports/inbolt_benchmark/depth_comparison.png b/reports/inbolt_benchmark/depth_comparison.png
new file mode 100644
index 0000000..6bd5cd9
Binary files /dev/null and b/reports/inbolt_benchmark/depth_comparison.png differ
diff --git a/reports/inbolt_benchmark/depth_scale_comparison.png b/reports/inbolt_benchmark/depth_scale_comparison.png
new file mode 100644
index 0000000..27c0799
Binary files /dev/null and b/reports/inbolt_benchmark/depth_scale_comparison.png differ
diff --git a/reports/inbolt_benchmark/depth_vs_distance.png b/reports/inbolt_benchmark/depth_vs_distance.png
new file mode 100644
index 0000000..2f49bde
Binary files /dev/null and b/reports/inbolt_benchmark/depth_vs_distance.png differ
diff --git a/reports/inbolt_benchmark/distance_error_curve.png b/reports/inbolt_benchmark/distance_error_curve.png
new file mode 100644
index 0000000..b800cb1
Binary files /dev/null and b/reports/inbolt_benchmark/distance_error_curve.png differ
diff --git a/reports/inbolt_benchmark/error_histograms.png b/reports/inbolt_benchmark/error_histograms.png
new file mode 100644
index 0000000..5c1b221
Binary files /dev/null and b/reports/inbolt_benchmark/error_histograms.png differ
diff --git a/reports/inbolt_benchmark/error_maps.png b/reports/inbolt_benchmark/error_maps.png
new file mode 100644
index 0000000..57dfcc9
Binary files /dev/null and b/reports/inbolt_benchmark/error_maps.png differ
diff --git a/reports/inbolt_benchmark/index.html b/reports/inbolt_benchmark/index.html
new file mode 100644
index 0000000..ac8de6d
--- /dev/null
+++ b/reports/inbolt_benchmark/index.html
@@ -0,0 +1,141 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Depth Quality Report — rs-enhanced-depth</title>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, -apple-system, sans-serif; background: #f0f2f5;
+            color: #222; max-width: 1200px; margin: 0 auto; padding: 24px 16px; }
+    h1 { font-size: 1.8em; color: #1a2e4a; margin-bottom: 4px; }
+    .subtitle { color: #555; font-size: .95em; margin-bottom: 8px; }
+    h2 { font-size: 1.15em; color: #1a2e4a; border-bottom: 2px solid #0f3460;
+          padding-bottom: 6px; margin-bottom: 16px; }
+    .meta { background: white; border-radius: 10px; padding: 16px 20px;
+             margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }
+    .meta table { border-collapse: collapse; width: 100%; font-size: .9em; }
+    .meta td { padding: 5px 12px; }
+    .meta tr:nth-child(even) { background: #f5f7fa; }
+    .section { background: white; border-radius: 10px; padding: 20px 24px;
+                margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }
+    .figure-wrapper { text-align: center; }
+    .figure-wrapper img { max-width: 100%; height: auto;
+                           border: 1px solid #e0e0e0; border-radius: 6px; }
+    .caption { font-size: .83em; color: #666; margin-top: 10px; }
+    .legend { background: #e8f4fd; border: 1px solid #b3d7f0; border-radius: 10px;
+               padding: 18px 22px; margin-bottom: 24px; }
+    .legend h3 { font-size: 1em; color: #1a5276; margin-bottom: 10px; }
+    .legend table { border-collapse: collapse; width: 100%; font-size: .85em; }
+    .legend td { padding: 5px 10px; vertical-align: top; }
+    .legend td:first-child { white-space: nowrap; width: 110px; }
+    .legend tr:nth-child(even) { background: rgba(255,255,255,.5); }
+    footer { text-align: center; color: #999; font-size: .8em; padding: 24px 0 8px; }
+    .pdf-btn {
+      display: inline-flex; align-items: center; gap: 7px;
+      background: #0f3460; color: white; border: none; border-radius: 7px;
+      padding: 9px 20px; font-size: .95em; font-weight: 600; cursor: pointer;
+      margin: 12px 0 20px; text-decoration: none; transition: background .15s;
+    }
+    .pdf-btn:hover { background: #16213e; }
+    @media print {
+      .pdf-btn { display: none; }
+      body { background: white; padding: 0; }
+      .section { box-shadow: none; border: 1px solid #ddd; break-inside: avoid; }
+      .meta { box-shadow: none; border: 1px solid #ddd; }
+    }
+  </style>
+</head>
+<body>
+  <h1>Depth Quality Report</h1>
+  <p class="subtitle">rs-enhanced-depth — multi-method depth quality analysis</p>
+  <button class="pdf-btn" onclick="window.print()">&#x1F4E5; Export as PDF</button>
+  <div class="meta"><table><tr><td><b>Source</b></td><td>INBOLT dataset (/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection)</td></tr><tr><td><b>Frames</b></td><td>18</td></tr><tr><td><b>Resolution</b></td><td>640×480</td></tr><tr><td><b>Ground truth</b></td><td>INBOLT GT</td></tr><tr><td><b>Methods evaluated</b></td><td>INBOLT GT, Fine-tuned on INBOLT, Original model</td></tr><tr><td><b>Generated</b></td><td>2026-03-25 12:01:18</td></tr></table></div>
+
+    <div class="section">
+      <h2>Model Configuration</h2>
+      <table style="border-collapse:collapse;width:100%;font-size:.9em;">
+        <tr style="background:#2c3e50;color:white;font-weight:bold;"><td style="padding:6px 12px;">Method</td><td style="padding:6px 12px;">max_disp</td><td style="padding:6px 12px;">valid_iters</td><td style="padding:6px 12px;">Engine Resolution</td><td style="padding:6px 12px;">Engine Dir</td></tr>
+        <tr><td style="padding:6px 12px;">Original model</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;font-size:.8em;">—</td></tr>
+        <tr><td style="padding:6px 12px;">Fine-tuned on INBOLT</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;">—</td><td style="padding:6px 12px;font-size:.8em;">—</td></tr>
+      </table>
+    </div>
+
+    <div class="section">
+      <h2>Depth Comparison</h2>
+      <div class="figure-wrapper">
+        <img src="depth_comparison.png" alt="Depth Comparison">
+        <p class="caption">Side-by-side depth maps from a single representative frame. Invalid pixels are black.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Error Maps</h2>
+      <div class="figure-wrapper">
+        <img src="error_maps.png" alt="Error Maps">
+        <p class="caption">Per-pixel absolute error |pred − GT| clipped at 0.5 m. Brighter = more error.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Coverage Heatmaps</h2>
+      <div class="figure-wrapper">
+        <img src="coverage_heatmaps.png" alt="Coverage Heatmaps">
+        <p class="caption">Fraction of frames each pixel has valid depth, averaged over all benchmark frames.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Distance Error Curve</h2>
+      <div class="figure-wrapper">
+        <img src="distance_error_curve.png" alt="Distance Error Curve">
+        <p class="caption">Mean Absolute Error (MAE) broken down by distance range.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Error Histograms</h2>
+      <div class="figure-wrapper">
+        <img src="error_histograms.png" alt="Error Histograms">
+        <p class="caption">Distribution of per-pixel absolute errors from the stored visualisation frames.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Summary Table</h2>
+      <div class="figure-wrapper">
+        <img src="summary_table.png" alt="Summary Table">
+        <p class="caption">Aggregate quality metrics — see legend below the table for column explanations.</p>
+      </div>
+    </div>
+    <div class="legend">
+      <h3>How to Read the Summary Table</h3>
+      <table>
+        <tr><td><b>MRE* (%%)</b></td>
+            <td><b>Overall score (recommended).</b> Mean Relative Error with hole penalty &mdash; pixels where the method has no depth but ground truth does count as 100%% error. This is the fairest single metric because it penalises both inaccuracy and missing coverage. <b>Lower is better.</b></td></tr>
+        <tr><td><b>MRE (%%)</b></td>
+            <td>Mean Relative Error over valid pixels only (holes ignored). 5%% means each measured pixel is ~5%% off on average. <b>Lower is better.</b></td></tr>
+        <tr><td><b>MAE (m)</b></td>
+            <td>Mean Absolute Error in meters, valid pixels only. <b>Lower is better.</b></td></tr>
+        <tr><td><b>&delta;1 (%%)</b></td>
+            <td>Percentage of valid pixels within 1.25&times; of ground truth depth. <b>Higher is better.</b> 100%% is perfect.</td></tr>
+        <tr><td><b>Coverage (%%)</b></td>
+            <td>Percentage of pixels that produced valid depth. <b>Higher is better.</b> MinZ improves this at close range (&lt;0.55m) by filling holes the hardware camera cannot see.</td></tr>
+        <tr><td><b>FPS</b></td>
+            <td>Processing speed (frames per second). <b>Higher is faster.</b></td></tr>
+        <tr><td><b>GT?</b></td>
+            <td>&starf; GT marks the ground truth method (NNDepth accurate). Its error columns show "&mdash;" because you don't compare ground truth to itself.</td></tr>
+      </table>
+    </div>
+    <div class="section">
+      <h2>Close Range Analysis</h2>
+      <div class="figure-wrapper">
+        <img src="close_range_analysis.png" alt="Close Range Analysis">
+        <p class="caption">Coverage and stability for objects closer than 0.55 m. Highlights MinZ benefit.</p>
+      </div>
+    </div>
+    <div class="section">
+      <h2>Timing Bars</h2>
+      <div class="figure-wrapper">
+        <img src="timing_bars.png" alt="Timing Bars">
+        <p class="caption">Processing speed in FPS. Hardware baseline is fixed at ~30 FPS (camera frame rate).</p>
+      </div>
+    </div>
+  <footer>Generated by <code>tests/depth_report/</code> on 2026-03-25 12:01:18</footer>
+</body>
+</html>
\ No newline at end of file
diff --git a/reports/inbolt_benchmark/results.json b/reports/inbolt_benchmark/results.json
new file mode 100644
index 0000000..33e451b
--- /dev/null
+++ b/reports/inbolt_benchmark/results.json
@@ -0,0 +1,64 @@
+{
+  "source": "INBOLT dataset (/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection)",
+  "n_frames": 18,
+  "resolution": "640x480",
+  "ground_truth": "inbolt_gt",
+  "distance_bins": [
+    "0\u20131 m",
+    "1\u20132 m",
+    "2\u20133 m",
+    "3\u20135 m"
+  ],
+  "model_configs": {
+    "original": {
+      "model_path": "/home/adiroha/repos/Fast-FoundationStereo/scripts/../weights/20-30-48/model_best_bp2_serialize.pth"
+    },
+    "finetuned": {
+      "model_path": "/home/adiroha/repos/Fast-FoundationStereo/scripts/../weights/20-30-48/model_finetuned_inbolt.pth"
+    }
+  },
+  "methods": {
+    "inbolt_gt": {
+      "label": "INBOLT GT",
+      "mae_mean": 0.0,
+      "mae_std": 0.0,
+      "rmse_mean": 0.0,
+      "mre_mean": 0.0,
+      "delta1_mean": 100.0,
+      "coverage_mean": 82.82678674768518,
+      "fps_mean": -1,
+      "time_ms_mean": 0.0,
+      "close_range_coverage": 11.11111111111111,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": true
+    },
+    "finetuned": {
+      "label": "Fine-tuned on INBOLT",
+      "mae_mean": 1.6766890618536208,
+      "mae_std": 0.5363837577208318,
+      "rmse_mean": 1.706638468636407,
+      "mre_mean": 1.5533304479387071,
+      "delta1_mean": 0.0,
+      "coverage_mean": 82.82678674768518,
+      "fps_mean": 5.817052135797856,
+      "time_ms_mean": 171.9083784458538,
+      "close_range_coverage": 11.11111111111111,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": false
+    },
+    "original": {
+      "label": "Original model",
+      "mae_mean": 2.0390112466282315,
+      "mae_std": 0.5198313899331768,
+      "rmse_mean": 2.0891922778553433,
+      "mre_mean": 1.9439863959948223,
+      "delta1_mean": 0.0,
+      "coverage_mean": 82.82678674768518,
+      "fps_mean": 21.39207336477468,
+      "time_ms_mean": 46.74628695162634,
+      "close_range_coverage": 11.11111111111111,
+      "gpu_load_mean": 0.0,
+      "is_ground_truth": false
+    }
+  }
+}
\ No newline at end of file
diff --git a/reports/inbolt_benchmark/summary_table.png b/reports/inbolt_benchmark/summary_table.png
new file mode 100644
index 0000000..0b6f4c4
Binary files /dev/null and b/reports/inbolt_benchmark/summary_table.png differ
diff --git a/reports/inbolt_benchmark/timing_bars.png b/reports/inbolt_benchmark/timing_bars.png
new file mode 100644
index 0000000..b7d5f9e
Binary files /dev/null and b/reports/inbolt_benchmark/timing_bars.png differ
diff --git a/reports/inbolt_ffs_benchmark/depth_comparison_zivid_rs_ffs.png b/reports/inbolt_ffs_benchmark/depth_comparison_zivid_rs_ffs.png
new file mode 100644
index 0000000..f749725
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/depth_comparison_zivid_rs_ffs.png differ
diff --git a/reports/inbolt_ffs_benchmark/depth_noise_comparison_ffs.png b/reports/inbolt_ffs_benchmark/depth_noise_comparison_ffs.png
new file mode 100644
index 0000000..7933c80
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/depth_noise_comparison_ffs.png differ
diff --git a/reports/inbolt_ffs_benchmark/depth_scale_comparison.png b/reports/inbolt_ffs_benchmark/depth_scale_comparison.png
new file mode 100644
index 0000000..8deb338
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/depth_scale_comparison.png differ
diff --git a/reports/inbolt_ffs_benchmark/depth_scale_comparison_ffs.png b/reports/inbolt_ffs_benchmark/depth_scale_comparison_ffs.png
new file mode 100644
index 0000000..8deb338
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/depth_scale_comparison_ffs.png differ
diff --git a/reports/inbolt_ffs_benchmark/depth_scale_comparison_inbolt.png b/reports/inbolt_ffs_benchmark/depth_scale_comparison_inbolt.png
new file mode 100644
index 0000000..3df6090
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/depth_scale_comparison_inbolt.png differ
diff --git a/reports/inbolt_ffs_benchmark/depth_scale_comparison_wrong_bf.png b/reports/inbolt_ffs_benchmark/depth_scale_comparison_wrong_bf.png
new file mode 100644
index 0000000..0f1fbe6
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/depth_scale_comparison_wrong_bf.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_000_inputs.png b/reports/inbolt_ffs_benchmark/error_000_inputs.png
new file mode 100644
index 0000000..8f964e9
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_000_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_001_inputs.png b/reports/inbolt_ffs_benchmark/error_001_inputs.png
new file mode 100644
index 0000000..ddfb697
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_001_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_002_inputs.png b/reports/inbolt_ffs_benchmark/error_002_inputs.png
new file mode 100644
index 0000000..23f4e41
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_002_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_003_inputs.png b/reports/inbolt_ffs_benchmark/error_003_inputs.png
new file mode 100644
index 0000000..f205448
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_003_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_004_inputs.png b/reports/inbolt_ffs_benchmark/error_004_inputs.png
new file mode 100644
index 0000000..c5d5278
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_004_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_005_inputs.png b/reports/inbolt_ffs_benchmark/error_005_inputs.png
new file mode 100644
index 0000000..704e62a
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_005_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_006_inputs.png b/reports/inbolt_ffs_benchmark/error_006_inputs.png
new file mode 100644
index 0000000..220bb27
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_006_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_007_inputs.png b/reports/inbolt_ffs_benchmark/error_007_inputs.png
new file mode 100644
index 0000000..197fdd9
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_007_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/error_008_inputs.png b/reports/inbolt_ffs_benchmark/error_008_inputs.png
new file mode 100644
index 0000000..7edac6e
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/error_008_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_000_inputs.png b/reports/inbolt_ffs_benchmark/sample_000_inputs.png
new file mode 100644
index 0000000..4cd3ccd
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_000_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_001_inputs.png b/reports/inbolt_ffs_benchmark/sample_001_inputs.png
new file mode 100644
index 0000000..5abc63d
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_001_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_002_inputs.png b/reports/inbolt_ffs_benchmark/sample_002_inputs.png
new file mode 100644
index 0000000..4f4c761
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_002_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_003_inputs.png b/reports/inbolt_ffs_benchmark/sample_003_inputs.png
new file mode 100644
index 0000000..e9f3ced
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_003_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_004_inputs.png b/reports/inbolt_ffs_benchmark/sample_004_inputs.png
new file mode 100644
index 0000000..ecfaae1
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_004_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_005_inputs.png b/reports/inbolt_ffs_benchmark/sample_005_inputs.png
new file mode 100644
index 0000000..a0d3082
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_005_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_006_inputs.png b/reports/inbolt_ffs_benchmark/sample_006_inputs.png
new file mode 100644
index 0000000..b8f30c6
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_006_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_007_inputs.png b/reports/inbolt_ffs_benchmark/sample_007_inputs.png
new file mode 100644
index 0000000..3d567de
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_007_inputs.png differ
diff --git a/reports/inbolt_ffs_benchmark/sample_008_inputs.png b/reports/inbolt_ffs_benchmark/sample_008_inputs.png
new file mode 100644
index 0000000..5ce370f
Binary files /dev/null and b/reports/inbolt_ffs_benchmark/sample_008_inputs.png differ
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..f775d3d
--- /dev/null
+++ b/scripts/__init__.py
@@ -0,0 +1 @@
+# just to create a module
\ No newline at end of file
diff --git a/scripts/benchmark_faro.py b/scripts/benchmark_faro.py
new file mode 100644
index 0000000..bdb6a90
--- /dev/null
+++ b/scripts/benchmark_faro.py
@@ -0,0 +1,225 @@
+"""Benchmark original vs fine-tuned FastFoundationStereo on the FARO dataset.
+
+Loads both models, runs inference on all FARO samples, computes depth quality
+metrics against FARO scanner ground truth, and produces an HTML report.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_faro.py [--out_dir reports/faro_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import torch
+
+from core.utils.utils import InputPadder
+import Utils as U
+from scripts.data_manager_faro import DataSource
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_metrics,
+    compute_bin_mae,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+FARO_DIR       = r'/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE'
+#FARO_DIR       = r'data/faro'  # local path to FARO dataset --- IGNORE ---
+ORIGINAL_PATH  = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+FINETUNED_PATH = f'{code_dir}/../weights/20-30-48/model_finetuned_faro_kitchen_epoch_006_epoch_013.pth'
+DEFAULT_OUT    = f'{code_dir}/../reports/benchmark_faro_office_013'
+
+BF     = 49470.45   # focal_px * baseline_mm  (calibrated from camera)
+ITERS  = 8          # GRU iterations
+N_VIZ  = 5         # number of frames saved for visual comparison in report
+
+METHODS = {
+    "original":  {"label": "Original model",     "color": "#2980b9"},
+    "finetuned": {"label": "Fine-tuned on FARO", "color": "#e74c3c"},
+    "faro_gt":   {"label": "FARO GT",            "color": "#27ae60"},
+}
+GT_NAME = "faro_gt"
+
+
+# ── inference helpers ─────────────────────────────────────────────────────────
+
+def _preprocess_ir(left: np.ndarray, right: np.ndarray):
+    """Convert uint16 IR images to CUDA float tensors (3-channel pseudo-RGB)."""
+    left  = np.clip(left.astype(np.float32),  0, 255)
+    right = np.clip(right.astype(np.float32), 0, 255)
+    left  = np.stack([left,  left,  left],  axis=-1)   # H×W×3
+    right = np.stack([right, right, right], axis=-1)
+    left_t  = torch.as_tensor(left).float()[None].permute(0, 3, 1, 2).cuda()
+    right_t = torch.as_tensor(right).float()[None].permute(0, 3, 1, 2).cuda()
+    return left_t, right_t
+
+
+@torch.no_grad()
+def infer_depth_m(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair; return depth map in metres (H×W float32)."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    disp = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_m = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_m[valid] = (BF / disp_np[valid]) / 1000.0   # disparity → mm → m
+    return depth_m
+
+
+def load_model(path: str):
+    logging.info(f"Loading model from {path}")
+    model = torch.load(path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+    return model
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir',   default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--faro_dir',  default=FARO_DIR, help='Path to FARO dataset root')
+    parser.add_argument('--original',  default=ORIGINAL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load models ──────────────────────────────────────────────────────────
+    models = {}
+    if Path(args.finetuned).exists():
+        models["finetuned"] = load_model(args.finetuned)
+    else:
+        logging.warning(f"Fine-tuned model not found at {args.finetuned} — skipping")
+
+    models["original"] = load_model(args.original)
+
+    active_methods = [GT_NAME] + list(models.keys())
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.faro_dir,test_keywords=['OFFICE'], split='train')
+    logging.info(f"Found {n} samples in {args.faro_dir}")
+    if n == 0:
+        logging.error("No samples found — check FARO_DIR path")
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics       = []
+    viz_frames        = []
+    valid_acc         = {}     # will be init on first frame
+    dist_bin_mae      = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw     = {m: [] for m in models}
+    H = W = None
+
+    for idx in range(n):
+        data  = source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        gt_mm = data['depth_faro'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0   # mm → m
+
+        # run inference for each model
+        frame_depths = {GT_NAME: gt_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        # per-frame metrics
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                                  float((pred > 0).mean()) * 100.0, 0.0,
+                                  mae_pen=0.0, mre_pen=0.0)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+            all_metrics.append(fm)
+
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = float((pred[gt_close_mask] > 0).mean()) * 100.0 if n_close > 0 else 0.0
+            close_range_valid[mname].append(close_cov)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f"  {idx + 1}/{n} frames processed")
+
+    # normalise coverage maps to [0, 1]
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    # ── aggregate timing ──────────────────────────────────────────────────────
+    mean_timing = {m: float(np.mean(ts)) if ts else 0.0 for m, ts in timing_ms_raw.items()}
+    mean_timing[GT_NAME] = 0.0
+
+    # ── build BenchmarkResults ────────────────────────────────────────────────
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]["label"] for m in active_methods},
+        method_colors={m: METHODS[m]["color"] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f"FARO dataset ({args.faro_dir})",
+        method_configs={
+            "original":  {"model_path": args.original},
+            "finetuned": {"model_path": args.finetuned},
+        },
+    )
+
+    stats = aggregate(results, mean_timing)
+
+    # ── generate report ───────────────────────────────────────────────────────
+    reporter = ReportGenerator(results, stats, out_dir)
+    reporter.generate()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_faro_rs.py b/scripts/benchmark_faro_rs.py
new file mode 100644
index 0000000..64a8601
--- /dev/null
+++ b/scripts/benchmark_faro_rs.py
@@ -0,0 +1,585 @@
+"""Benchmark stereo models + RealSense hardware depth against FARO scanner ground truth.
+
+All depth values are in millimetres (mm) throughout this script.
+
+Metric descriptions
+-------------------
+MAE (mm)            Mean Absolute Error — average |predicted − GT| in mm over valid pixels.
+                    Lower is better.  Sensitive to large outlier errors.
+
+MAE* / MRE* (pen.)  Penalised variants of MAE / MRE.  Pixels where the model produces
+                    no depth (holes) are penalised: MAE* counts the full GT depth as the
+                    error, MRE* counts a 100% relative error.  Rewards methods with high
+                    coverage — a sensor that refuses to predict on anything has 100% MRE*.
+
+RMSE (mm)           Root Mean Square Error — sqrt(mean(|pred − GT|²)).
+                    Penalises large individual errors more than MAE.  Lower is better.
+
+MRE (%)             Mean Relative Error — mean(|pred − GT| / GT) × 100.
+                    Scale-independent; allows fair comparison across depth ranges.
+                    Lower is better.
+
+δ1 (%)              Inlier accuracy — % of valid pixels where
+                    max(pred/GT, GT/pred) < 1.25.  Higher is better.
+                    A single threshold commonly used in stereo depth benchmarks.
+
+Coverage (%)        % of image pixels where both prediction and GT are non-zero.
+                    Higher is better.  Sensors with many holes score lower.
+
+Close-range cov.    Coverage restricted to pixels where GT < 550 mm.  Important for
+                    near-field robotics and manipulation tasks.  Higher is better.
+
+Latency (ms)        Wall-clock inference time per frame (model only; data loading excluded).
+                    Hardware sensors (RealSense) are treated as fixed 30 FPS devices.
+
+FPS                 Frames per second = 1000 / latency_ms.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_faro_rs.py [--out_dir reports/benchmark_faro_rs]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+
+from core.utils.utils import InputPadder
+import Utils as U
+from scripts.data_manager_faro import DataSource
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_metrics,
+    aggregate,
+)
+from report import ReportGenerator
+
+
+# ── constants ─────────────────────────────────────────────────────────────────
+
+FARO_DIR       = r'/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE'
+ORIGINAL_PATH  = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+FINETUNED_PATH = f'{code_dir}/../weights/20-30-48/model_finetuned_faro_kitchen_epoch_006_epoch_013.pth'
+DEFAULT_OUT    = f'{code_dir}/../reports/benchmark_faro_rs_bathroom'
+DATASET_KEYWORDS = ['BATHROOM'] # ['OFFICE', 'KITCHEN']  # filter dataset paths containing any of these (case-insensitive)
+
+BF     = 49470.45   # focal_px × baseline_mm  (calibrated from RealSense stereo pair)
+ITERS  = 8          # GRU update iterations
+N_VIZ  = 5         # frames saved for visual comparison in the report
+
+# Depth threshold for the "close-range" coverage metric — in mm
+CLOSE_RANGE_THRESHOLD_MM = 550.0
+
+# Distance bins used for the per-bin MAE curve — all in mm
+DIST_BINS_MM: List[Tuple[float, float]] = [
+    (0.0,    500.0),
+    (500.0,  1000.0),
+    (1000.0, 2000.0),
+    (2000.0, 3000.0)
+]
+BIN_LABELS_MM  = ["0–500 mm", "500–1000 mm", "1000–2000 mm", "2000–3000 mm"]
+BIN_CENTERS_MM = [250.0, 750.0, 1500.0, 2500.0]
+
+METHODS: Dict[str, Dict[str, str]] = {
+    "original":  {"label": "FFS Original",            "color": "#2980b9"},
+    "finetuned": {"label": "FFS Fine-tuned (FARO)",    "color": "#e74c3c"},
+    "depth_rs":  {"label": "RealSense Hardware Depth", "color": "#f39c12"},
+    "faro_gt":   {"label": "FARO GT",                  "color": "#27ae60"},
+}
+GT_NAME = "faro_gt"
+RS_NAME = "depth_rs"   # pre-recorded RealSense active-stereo depth from the dataset
+
+
+# ── mm-based metric helpers ───────────────────────────────────────────────────
+
+def compute_bin_mae_mm(pred_mm: np.ndarray, gt_mm: np.ndarray) -> List[float]:
+    """MAE (mm) per distance bin; returns NaN for bins with no valid GT pixels."""
+    result = []
+    for lo, hi in DIST_BINS_MM:
+        mask = (gt_mm >= lo) & (gt_mm < hi) & (gt_mm > 0) & (pred_mm > 0)
+        if mask.sum() == 0:
+            result.append(float("nan"))
+        else:
+            result.append(float(np.abs(pred_mm[mask] - gt_mm[mask]).mean()))
+    return result
+
+
+# ── inference helpers ─────────────────────────────────────────────────────────
+
+def _preprocess_ir(left: np.ndarray, right: np.ndarray):
+    """Convert uint16 IR images to CUDA float tensors (3-channel pseudo-RGB)."""
+    left  = np.clip(left.astype(np.float32),  0, 255)
+    right = np.clip(right.astype(np.float32), 0, 255)
+    left  = np.stack([left,  left,  left],  axis=-1)   # H×W×3
+    right = np.stack([right, right, right], axis=-1)
+    left_t  = torch.as_tensor(left).float()[None].permute(0, 3, 1, 2).cuda()
+    right_t = torch.as_tensor(right).float()[None].permute(0, 3, 1, 2).cuda()
+    return left_t, right_t
+
+
+@torch.no_grad()
+def infer_depth_mm(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair; return depth map in mm (H×W float32).
+
+    BF = focal_px × baseline_mm, so  depth_mm = BF / disparity_px.
+    No unit conversion needed — result is already in mm.
+    """
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    disp = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_mm = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_mm[valid] = BF / disp_np[valid]   # disparity → mm  (BF already in focal·mm)
+    return depth_mm
+
+
+def load_model(path: str):
+    logging.info(f"Loading model from {path}")
+    model = torch.load(path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+    return model
+
+
+# ── mm-aware report generator ─────────────────────────────────────────────────
+
+class ReportGeneratorMM(ReportGenerator):
+    """ReportGenerator subclass with all axis labels and colorbars in mm."""
+
+    # Bin constants injected by the benchmark script
+    _bin_labels  = BIN_LABELS_MM
+    _bin_centers = BIN_CENTERS_MM
+
+    def __init__(self, results, stats, output_dir) -> None:
+        super().__init__(results, stats, output_dir)
+        self._selected_viz_indices: List[int] = []
+
+    def _get_selected_viz_indices(self, n_pick: int = 4) -> List[int]:
+        """Return cached random frame indices used consistently across report sections."""
+        if self._selected_viz_indices:
+            return self._selected_viz_indices
+
+        n_total = len(self._r.viz_frames)
+        if n_total == 0:
+            self._selected_viz_indices = []
+            return self._selected_viz_indices
+
+        n = min(n_pick, n_total)
+        # fixed seed for reproducible reports
+        rng = np.random.default_rng(42)
+        self._selected_viz_indices = sorted(rng.choice(n_total, size=n, replace=False).tolist())
+        return self._selected_viz_indices
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        method_names = [n for n in self._r.method_names if n in vf0]
+        nrows = len(sel)
+        ncols = len(method_names)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        cmap = self._depth_cmap()
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            for c, name in enumerate(method_names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                im = ax.imshow(vf[name], cmap=cmap, vmin=1.0, vmax=5000.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="mm")
+                title = self._r.method_labels.get(name, name)
+                if c == 0:
+                    title = f"Frame {frame_idx + 1} • {title}"
+                ax.set_title(title, fontsize=9, wrap=True)
+                ax.axis("off")
+
+        fig.suptitle("Depth Map Comparison (4 random frames) — values in mm",
+                     fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "depth_comparison.png")
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig("error_maps.png", "No comparison methods")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("error_maps.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        names = ([self._gt] if self._gt in vf0 else []) + [n for n in self._non_gt if n in vf0]
+        if not names:
+            return self._empty_fig("error_maps.png", "Ground truth not available in viz frame")
+
+        nrows = len(sel)
+        ncols = len(names)
+        cmap = plt.get_cmap("hot").copy()
+        cmap.set_under("#222222")
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            gt = vf.get(self._gt)
+            if gt is None:
+                for c in range(ncols):
+                    axes[r, c].axis("off")
+                continue
+
+            for c, name in enumerate(names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                pred = vf[name]
+                valid = (gt > 0) & (pred > 0)
+                err = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+                im = ax.imshow(err, cmap=cmap, vmin=1.0, vmax=100.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="|error| (mm)")
+                mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+                label = self._r.method_labels.get(name, name)
+                if c == 0:
+                    ax.set_title(f"Frame {frame_idx + 1} • {label}\nMAE={mean_err:.0f} mm", fontsize=9)
+                else:
+                    ax.set_title(f"{label}\nMAE={mean_err:.0f} mm", fontsize=9)
+                ax.axis("off")
+
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f"Absolute Error vs {gt_label} (4 random frames, mm)", fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "error_maps.png")
+
+    def _fig_distance_error_curve(self) -> str:
+        if not self._non_gt:
+            return self._empty_fig("distance_error_curve.png", "No comparison methods")
+        fig, ax = plt.subplots(figsize=(8, 5))
+        for name in self._non_gt:
+            bin_data = self._r.dist_bin_mae.get(name, [])
+            if not bin_data:
+                continue
+            arr = np.array(bin_data)
+            mean_per_bin = np.array([
+                np.nanmean(arr[:, i]) if np.any(~np.isnan(arr[:, i])) else 0.0
+                for i in range(arr.shape[1])
+            ])
+            color = self._r.method_colors.get(name, "#888")
+            label = self._r.method_labels.get(name, name)
+            ax.plot(self._bin_centers, mean_per_bin, marker="o", color=color,
+                    label=label, linewidth=2, markersize=7)
+        ax.set_xticks(self._bin_centers)
+        ax.set_xticklabels(self._bin_labels, fontsize=9)
+        ax.set_xlabel("Distance range", fontsize=10)
+        ax.set_ylabel("Mean Absolute Error (mm)", fontsize=10)
+        ax.set_title("Depth Error vs Distance", fontsize=12)
+        ax.legend(fontsize=9)
+        ax.grid(alpha=0.3)
+        fig.tight_layout()
+        return self._save(fig, "distance_error_curve.png")
+
+    def _fig_error_histograms(self) -> str:
+        if not self._non_gt or not self._r.viz_frames:
+            return self._empty_fig("error_histograms.png", "No comparison data")
+        names = [n for n in self._non_gt
+                 if any(n in vf and self._gt in vf for vf in self._r.viz_frames)]
+        if not names:
+            return self._empty_fig("error_histograms.png", "No viz data for comparison")
+        n = len(names)
+        nrows, ncols = self._grid_layout(n)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(5 * ncols, 4 * nrows))
+        axes = np.asarray(axes).flatten().tolist()
+        for ax, name in zip(axes, names):
+            errors = []
+            for vf in self._r.viz_frames:
+                if name not in vf or self._gt not in vf:
+                    continue
+                pred, gt = vf[name], vf[self._gt]
+                valid = (gt > 0) & (pred > 0)
+                if valid.any():
+                    errors.extend(np.abs(pred[valid] - gt[valid]).tolist())
+            if not errors:
+                ax.text(0.5, 0.5, "No data", transform=ax.transAxes,
+                        ha="center", va="center", color="gray")
+                continue
+            color = self._r.method_colors.get(name, "#888")
+            label = self._r.method_labels.get(name, name)
+            ax.hist(errors, bins=50, range=(0.0, 500.0), color=color,
+                    alpha=0.8, edgecolor="none")
+            mean_e = float(np.mean(errors))
+            ax.axvline(mean_e, color="red", linestyle="--", linewidth=1.5,
+                       label=f"mean={mean_e:.0f} mm")
+            ax.set_xlabel("Absolute error (mm)", fontsize=9)
+            ax.set_ylabel("Pixel count", fontsize=9)
+            ax.set_title(label, fontsize=9)
+            ax.legend(fontsize=8)
+        for ax in axes[n:]:
+            ax.axis("off")
+        fig.suptitle("Per-Pixel Error Distribution (vs GT, viz frames)", fontsize=11)
+        fig.tight_layout()
+        return self._save(fig, "error_histograms.png")
+
+    def _fig_summary_table(self) -> str:
+        if not self._stats:
+            return self._empty_fig("summary_table.png", "No stats")
+        cols = ["Method", "MRE* (%)", "MRE (%)", "MAE (mm)", "δ1 (%)",
+                "Coverage (%)", "FPS", "GPU %", "GT?"]
+        gt_rows, other_rows = [], []
+        for name, s in self._stats.items():
+            is_gt = (name == self._gt)
+            row = [
+                s.label,
+                "—" if is_gt else f"{s.mre_pen_mean * 100:.1f}",
+                "—" if is_gt else f"{s.mre_mean * 100:.1f}",
+                "—" if is_gt else f"{s.mae_mean:.1f}",
+                "—" if is_gt else f"{s.delta1_mean:.1f}",
+                f"{s.coverage_mean:.1f}",
+                f"{s.fps_mean:.1f}" if s.fps_mean < 999 else "≈30",
+                f"{s.gpu_load_mean:.0f}" if s.gpu_load_mean > 0 else "—",
+                "★ GT" if is_gt else "",
+            ]
+            (gt_rows if is_gt else other_rows).append((name, row))
+        ordered = gt_rows + other_rows
+        cell_text = [r for _, r in ordered]
+        n = len(ordered)
+        fig, ax = plt.subplots(figsize=(14, 1.0 + 0.55 * n))
+        ax.axis("off")
+        table = ax.table(cellText=cell_text, colLabels=cols,
+                         cellLoc="center", loc="center")
+        table.auto_set_font_size(False)
+        table.set_fontsize(9)
+        table.scale(1, 1.6)
+        for (row, col), cell in table.get_celld().items():
+            if row == 0:
+                cell.set_facecolor("#2c3e50")
+                cell.set_text_props(color="white", fontweight="bold")
+            elif row % 2 == 0:
+                cell.set_facecolor("#f7f7f7")
+            cell.set_edgecolor("#cccccc")
+            if row > 0 and cell_text[row - 1][-1] == "★ GT":
+                cell.set_facecolor("#d5f5d5")
+        ax.set_title("Depth Quality Summary (errors in mm)", fontsize=12,
+                     pad=10, fontweight="bold")
+        fig.tight_layout()
+        return self._save(fig, "summary_table.png")
+
+    def _fig_close_range_analysis(self) -> str:
+        names = list(self._r.method_names)
+        if not names:
+            return self._empty_fig("close_range_analysis.png", "No methods")
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+        labels    = [self._r.method_labels.get(n, n) for n in names]
+        coverages = [self._stats[n].close_range_coverage if n in self._stats else 0.0
+                     for n in names]
+        colors    = [self._r.method_colors.get(n, "#888") for n in names]
+        bars = ax1.bar(labels, coverages, color=colors, alpha=0.85, edgecolor="white")
+        ax1.bar_label(bars, labels=[f"{v:.1f}%" for v in coverages], padding=3, fontsize=7)
+        ax1.set_ylabel(f"Coverage at < {CLOSE_RANGE_THRESHOLD_MM:.0f} mm (%)", fontsize=10)
+        ax1.set_title(f"Close-Range Coverage (< {CLOSE_RANGE_THRESHOLD_MM:.0f} mm)", fontsize=11)
+        ax1.tick_params(axis="x", rotation=45, labelsize=7)
+        ax1.set_xticklabels(labels, ha="right")
+        ax1.set_ylim(0, 115)
+        ax1.grid(axis="y", alpha=0.3)
+        for name in names:
+            vals = self._r.close_range_valid.get(name, [])
+            if not vals:
+                continue
+            color = self._r.method_colors.get(name, "#888")
+            label = self._r.method_labels.get(name, name)
+            ax2.plot(range(1, len(vals) + 1), vals, color=color,
+                     label=label, alpha=0.8, linewidth=1.5)
+        ax2.set_xlabel("Frame", fontsize=10)
+        ax2.set_ylabel(f"Coverage at < {CLOSE_RANGE_THRESHOLD_MM:.0f} mm (%)", fontsize=10)
+        ax2.set_title("Close-Range Coverage per Frame", fontsize=11)
+        ax2.legend(fontsize=6, loc="best")
+        ax2.grid(alpha=0.3)
+        ax2.set_ylim(-5, 115)
+        fig.suptitle("Close-Range Depth Analysis", fontsize=13, fontweight="bold")
+        fig.tight_layout()
+        return self._save(fig, "close_range_analysis.png")
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('--out_dir',   default=DEFAULT_OUT,    help='Output directory for the report')
+    parser.add_argument('--faro_dir',  default=FARO_DIR,       help='Path to FARO dataset root')
+    parser.add_argument('--original',  default=ORIGINAL_PATH,  help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ,    help='Frames saved for visual comparison')
+    parser.add_argument('--keywords',  nargs='+', default=DATASET_KEYWORDS,  help='Keywords to filter dataset paths (case-insensitive)')
+    parser.add_argument('--split', default='train', choices=['train', 'test', 'all'],
+                        help='Dataset split to evaluate')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load stereo models ────────────────────────────────────────────────────
+    models = {}
+    if Path(args.finetuned).exists():
+        models["finetuned"] = load_model(args.finetuned)
+    else:
+        logging.warning(f"Fine-tuned model not found at {args.finetuned} — skipping")
+
+    models["original"] = load_model(args.original)
+
+    # active_methods includes GT, RS hardware, and all NN models
+    active_methods = [GT_NAME, RS_NAME] + list(models.keys())
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(
+        input_rectified=args.faro_dir,
+        test_keywords=args.keywords,
+        split=args.split,
+    )
+    logging.info(f"Found {n} samples in {args.faro_dir}")
+    if n == 0:
+        logging.error("No samples found — check FARO_DIR path")
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics       = []
+    viz_frames        = []
+    valid_acc         = {}     # initialised on first frame
+    dist_bin_mae      = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw     = {m: [] for m in models}   # only NN models have inference latency
+    H = W = None
+
+    for idx in range(n):
+        data  = source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        gt_mm = data['depth_faro'].astype(np.float32)   # FARO GT — already in mm
+        rs_mm = data['depth_rs'].astype(np.float32)     # RealSense hardware — already in mm
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        # ── run NN inference (returns mm) ─────────────────────────────────────
+        frame_depths = {GT_NAME: gt_mm, RS_NAME: rs_mm}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_mm(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        # ── per-frame metrics (all values in mm) ──────────────────────────────
+        gt_close_mask = (gt_mm > 0) & (gt_mm < CLOSE_RANGE_THRESHOLD_MM)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                    float((pred > 0).mean()) * 100.0, 0.0,
+                    mae_pen=0.0, mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                # RealSense hardware depth — compare against FARO GT (both in mm)
+                fm = compute_metrics(pred, gt_mm, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_mm, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae_mm(pred, gt_mm))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f"  {idx + 1}/{n} frames processed")
+
+    # normalise coverage maps to [0, 1]
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    # ── aggregate timing ──────────────────────────────────────────────────────
+    mean_timing: Dict[str, float] = {
+        m: float(np.mean(ts)) if ts else 0.0
+        for m, ts in timing_ms_raw.items()
+    }
+    # FARO GT has no latency; RealSense is hardware 30 FPS
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / 30.0   # ~33 ms per frame at native 30 FPS
+
+    # ── build BenchmarkResults ────────────────────────────────────────────────
+    method_configs = {
+        "original": {"model_path": args.original},
+    }
+    if "finetuned" in models:
+        method_configs["finetuned"] = {"model_path": args.finetuned}
+    method_configs[RS_NAME]  = {"source": "RealSense D435 hardware depth (30 FPS)"}
+    method_configs[GT_NAME]  = {"source": "FARO Focus 3D scanner — reference GT"}
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]["label"] for m in active_methods},
+        method_colors={m: METHODS[m]["color"] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f"FARO dataset  •  {args.faro_dir}  •  split={args.split}  •  filter={args.keywords}",
+        method_configs=method_configs,
+    )
+
+    # aggregate() computes per-method summaries; pass RS as hardware (30 fps)
+    stats = aggregate(results, mean_timing)
+    # Fix fps for hardware depth sensor (aggregate() uses 1000/t_ms)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = 30.0
+
+    # ── generate report ───────────────────────────────────────────────────────
+    reporter = ReportGeneratorMM(results, stats, out_dir)
+    reporter.generate()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_inbolt.py b/scripts/benchmark_inbolt.py
new file mode 100644
index 0000000..aade923
--- /dev/null
+++ b/scripts/benchmark_inbolt.py
@@ -0,0 +1,690 @@
+"""Benchmark original vs fine-tuned FastFoundationStereo on the Inbolt dataset.
+
+Loads both models, runs inference on all Inbolt samples, computes depth quality
+metrics against Zivid scanner ground truth, and produces an HTML report.
+
+Also generates depth accuracy and noise plots comparing RealSense stereo predictions
+(model output) against Zivid ground-truth depth across distance bins.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt.py [--out_dir reports/inbolt_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import torch
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+from core.utils.utils import InputPadder
+import Utils as U
+
+from scripts.data_manager_inbolt import DataSource
+
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_metrics,
+    compute_bin_mae,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+#DATA_DIR         = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection' 
+DATA_DIR         = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'  # local path to the dataset
+MODEL_PATH      = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+FINETUNED_PATH  = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415.pth'
+MODEL_PATH      = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415.pth'
+DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_benchmark'
+#FARO_DIR       = r'data/faro'  # local path to FARO dataset --- IGNORE ---
+# ORIGINAL_PATH  = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+# FINETUNED_PATH = f'{code_dir}/../weights/20-30-48/model_finetuned_faro.pth'
+# DEFAULT_OUT    = f'{code_dir}/../reports/faro_benchmark'
+
+BF              = 50.102706998586 * 385.509887695312  #49470.45   # focal_px * baseline_mm  (calibrated from camera)
+ITERS           = 8          # GRU iterations
+N_VIZ           = 5         # number of frames saved for visual comparison in report
+
+DEPTH_BIN_SIZE_M = 0.1       # width of each distance bin for accuracy/noise plots
+MAX_DEPTH_M      = 2.0       # maximum depth considered in plots
+
+METHODS = {
+    "original":  {"label": "Original model",    "color": "#2980b9"},
+    "finetuned": {"label": "Fine-tuned on INBOLT", "color": "#e74c3c"},
+    "inbolt_gt":   {"label": "INBOLT GT",            "color": "#27ae60"},
+}
+GT_NAME = "inbolt_gt"
+
+
+# ── depth-vs-distance analysis ────────────────────────────────────────────────
+
+class DepthBinAccumulator:
+    """Accumulates mean and std-dev of depth values per GT-distance bin.
+
+    Bins are defined by Zivid GT depth, so *every* pixel whose GT depth falls in
+    [k * bin_size, (k+1) * bin_size) contributes to bin k.  Works for any sensor
+    (model predictions OR Zivid GT values themselves — the latter gives an
+    estimate of within-bin spatial variation / measurement noise).
+    """
+
+    def __init__(self, bin_size_m: float = DEPTH_BIN_SIZE_M, max_depth_m: float = MAX_DEPTH_M):
+        self.bin_size   = bin_size_m
+        self.n_bins     = int(np.ceil(max_depth_m / bin_size_m))
+        self.count      = np.zeros(self.n_bins, dtype=np.float64)
+        self.sum_       = np.zeros(self.n_bins, dtype=np.float64)
+        self.sum_sq     = np.zeros(self.n_bins, dtype=np.float64)
+
+    def update(self, values_m: np.ndarray, gt_m: np.ndarray):
+        """Add one frame of data.
+
+        Parameters
+        ----------
+        values_m : (H, W) array of the depth values to accumulate (model or GT).
+        gt_m     : (H, W) array of Zivid GT depths that define which bin each pixel falls in.
+        """
+        valid = (gt_m > 0) & (values_m > 0)
+        if not valid.any():
+            return
+        v_vals = values_m[valid].ravel().astype(np.float64)
+        v_gt   = gt_m[valid].ravel().astype(np.float64)
+
+        bins = np.floor(v_gt / self.bin_size).astype(np.int32)
+        bins = np.clip(bins, 0, self.n_bins - 1)
+
+        np.add.at(self.count,  bins, 1.0)
+        np.add.at(self.sum_,   bins, v_vals)
+        np.add.at(self.sum_sq, bins, v_vals ** 2)
+
+    @property
+    def bin_centers(self) -> np.ndarray:
+        return (np.arange(self.n_bins) + 0.5) * self.bin_size
+
+    def mean(self) -> np.ndarray:
+        c = np.maximum(self.count, 1)
+        return np.where(self.count > 0, self.sum_ / c, np.nan)
+
+    def std(self) -> np.ndarray:
+        c = np.maximum(self.count, 1)
+        m = np.where(self.count > 0, self.sum_ / c, np.nan)
+        v = np.where(self.count > 0, self.sum_sq / c - m ** 2, np.nan)
+        return np.sqrt(np.maximum(v, 0.0))
+
+def plot_depth_vs_distance(
+    accumulators: dict,          # {label: DepthBinAccumulator}
+    colors: dict,                # {label: color_str}
+    out_path: Path,
+    min_count: int = 100,        # bins with fewer samples are hidden
+):
+    """Produce a two-panel figure:
+      left  – actual depth (Zivid GT) vs measured/predicted depth
+      right – noise (std-dev of measured depth) vs actual depth
+    """
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+
+    for label, acc in accumulators.items():
+        x     = acc.bin_centers
+        mu    = acc.mean()
+        sigma = acc.std()
+        color = colors.get(label, None)
+
+        valid = acc.count >= min_count
+        xv, muv, sv = x[valid], mu[valid], sigma[valid]
+
+        ax0 = axes[0]
+        ax0.plot(xv, muv, label=label, color=color, linewidth=1.5)
+        ax0.fill_between(xv, muv - sv, muv + sv, alpha=0.15, color=color)
+
+        axes[1].plot(xv, sv * 1000, label=label, color=color, linewidth=1.5)
+
+    # ideal line
+    lim = MAX_DEPTH_M
+    axes[0].plot([0, lim], [0, lim], 'k--', linewidth=1, label='ideal (y = x)')
+    axes[0].set_xlim(0, lim)
+    axes[0].set_ylim(0, lim)
+    axes[0].set_xlabel('Actual depth — Zivid GT (m)')
+    axes[0].set_ylabel('Measured depth (m)')
+    axes[0].set_title('Depth Accuracy: Actual vs Measured\n(shaded band = ±1 std dev)')
+    axes[0].legend(fontsize=9)
+    axes[0].grid(True, alpha=0.4)
+
+    axes[1].set_xlim(0, lim)
+    axes[1].set_xlabel('Actual depth — Zivid GT (m)')
+    axes[1].set_ylabel('Noise / Std Dev (mm)')
+    axes[1].set_title('Depth Noise per Distance Bin')
+    axes[1].legend(fontsize=9)
+    axes[1].grid(True, alpha=0.4)
+
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    logging.info(f"Saved depth-vs-distance plot → {out_path}")
+
+def _to_1d_float_array(values, name: str) -> np.ndarray:
+    """Convert *values* to a finite 1D float array."""
+    arr = np.asarray(values, dtype=np.float64).reshape(-1)
+    if arr.size == 0:
+        raise ValueError(f"{name} must contain at least one value")
+    if not np.all(np.isfinite(arr)):
+        raise ValueError(f"{name} contains NaN or infinite values")
+    return arr
+
+def fit_depth_scale_regression(
+    gt_delta_mm,
+    measured_delta_mm,
+    fit_intercept: bool = False,
+) -> dict:
+    """Fit a linear depth-scale regression and compute residual statistics.
+
+    Parameters
+    ----------
+    gt_delta_mm : array-like
+        Ground-truth floor/depth deltas in millimetres.
+    measured_delta_mm : array-like
+        Measured deltas from one sensor/model in millimetres.
+    fit_intercept : bool, default=False
+        If False, uses a through-origin fit `y = slope * x`, which matches the
+        style of the attached plot.  If True, fits `y = slope * x + intercept`.
+
+    Returns
+    -------
+    dict
+        Contains slope, intercept, fitted values, residuals, RMSE, and masks.
+    """
+    x = _to_1d_float_array(gt_delta_mm, "gt_delta_mm")
+    y = _to_1d_float_array(measured_delta_mm, "measured_delta_mm")
+
+    if x.shape != y.shape:
+        raise ValueError("gt_delta_mm and measured_delta_mm must have the same shape")
+    if x.size < 2:
+        raise ValueError("At least two samples are required for regression")
+
+    valid = np.isfinite(x) & np.isfinite(y)
+    x = x[valid]
+    y = y[valid]
+
+    if x.size < 2:
+        raise ValueError("Need at least two finite samples after filtering")
+
+    if fit_intercept:
+        slope, intercept = np.polyfit(x, y, deg=1)
+    else:
+        denom = float(np.dot(x, x))
+        if denom <= 0:
+            raise ValueError("Cannot fit a through-origin regression when gt deltas are all zero")
+        slope = float(np.dot(x, y) / denom)
+        intercept = 0.0
+
+    fitted = slope * x + intercept
+    residuals = y - fitted
+    rmse = float(np.sqrt(np.mean(residuals ** 2)))
+
+    return {
+        "gt_delta_mm": x,
+        "measured_delta_mm": y,
+        "slope": float(slope),
+        "intercept": float(intercept),
+        "fitted_mm": fitted,
+        "residuals_mm": residuals,
+        "rmse_mm": rmse,
+        "fit_intercept": fit_intercept,
+    }
+
+def build_example_depth_scale_regression_series(gt_delta_mm, rs_delta_mm, zv_delta_mm) -> dict:
+    """Return example depth-delta series that reproduces the attached figure.
+
+    The values approximate the plot shown in the screenshot:
+      - RealSense has a noticeable scale bias.
+      - Zivid stays close to the ideal slope of 1.
+    """
+    gt_delta_mm = np.array([0, 100, 200, 300, 400, 500, 600, 700], dtype=np.float64) if gt_delta_mm is None else gt_delta_mm
+    rs_delta_mm = np.array([0.0, 104.0, 218.0, 323.0, 433.0, 542.0, 664.0, 754.0], dtype=np.float64) if rs_delta_mm is None else rs_delta_mm
+    zv_delta_mm = np.array([0.0, 101.0, 201.0, 301.0, 401.0, 502.0, 602.0, 707.0], dtype=np.float64) if zv_delta_mm is None else zv_delta_mm
+
+    return {
+        "realsense": {
+            "gt_delta_mm": gt_delta_mm,
+            "measured_delta_mm": rs_delta_mm,
+            "color": "#e74c3c",
+            "marker": "s",
+            "label": "realsense",
+        },
+        "zivid": {
+            "gt_delta_mm": gt_delta_mm,
+            "measured_delta_mm": zv_delta_mm,
+            "color": "#2980b9",
+            "marker": "o",
+            "label": "zivid",
+        },
+    }
+
+def plot_depth_scale_regression(
+    series_map: dict,
+    out_path: Path,
+    title: str = "Depth Scale Regression — dataset_depth_bias",
+    fit_intercept: bool = False,
+    ideal_slope: float = 1.0,
+):
+    """Create the two-panel regression + residuals figure from paired series.
+
+    Parameters
+    ----------
+    series_map : dict
+        Mapping of series name to configuration dict. Each entry should provide:
+          - gt_delta_mm
+          - measured_delta_mm
+        and may optionally include:
+          - label
+          - color
+          - marker
+    out_path : Path
+        Destination PNG path.
+    title : str
+        Figure title.
+    fit_intercept : bool
+        Whether to fit a free intercept. Defaults to a through-origin fit.
+    ideal_slope : float
+        Slope of the ideal reference line shown on the left panel.
+    """
+    if not series_map:
+        raise ValueError("series_map must contain at least one series")
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+    fit_results = []
+    max_x = 0.0
+    max_y = 0.0
+
+    for default_name, cfg in series_map.items():
+        result = fit_depth_scale_regression(
+            gt_delta_mm=cfg["gt_delta_mm"],
+            measured_delta_mm=cfg["measured_delta_mm"],
+            fit_intercept=fit_intercept,
+        )
+        result["label"] = cfg.get("label", default_name)
+        result["color"] = cfg.get("color", None)
+        result["marker"] = cfg.get("marker", "o")
+        fit_results.append(result)
+        max_x = max(max_x, float(np.max(result["gt_delta_mm"])))
+        max_y = max(max_y, float(np.max(result["measured_delta_mm"])))
+
+    lim = max(max_x, max_y)
+    fit_x = np.linspace(0.0, lim, 200)
+
+    for result in fit_results:
+        label = result["label"]
+        color = result["color"]
+        marker = result["marker"]
+        x = result["gt_delta_mm"]
+        y = result["measured_delta_mm"]
+        slope = result["slope"]
+        intercept = result["intercept"]
+        rmse = result["rmse_mm"]
+
+        axes[0].scatter(x, y, color=color, marker=marker, s=70, label=f"{label} (raw)", zorder=3)
+        axes[0].plot(
+            fit_x,
+            slope * fit_x + intercept,
+            color=color,
+            linewidth=2.0,
+            label=(
+                f"{label} fit: slope={slope:.3f}, intercept={intercept:.1f}mm, RMSE={rmse:.1f}mm"
+                if fit_intercept else
+                f"{label} fit: slope={slope:.3f}, RMSE={rmse:.1f}mm"
+            ),
+        )
+
+        axes[1].scatter(
+            x,
+            result["residuals_mm"],
+            color=color,
+            marker=marker,
+            s=70,
+            label=f"{label} (RMSE={rmse:.1f}mm)",
+            zorder=3,
+        )
+
+    axes[0].plot(
+        fit_x,
+        ideal_slope * fit_x,
+        linestyle="--",
+        color="gray",
+        linewidth=1.5,
+        label=f"ideal (slope={ideal_slope:.1f})",
+    )
+    axes[0].set_xlabel("Ground Truth Delta (mm)")
+    axes[0].set_ylabel("Measured Depth Delta (mm)")
+    axes[0].set_title("Floor Depth Delta: Measured vs Ground Truth")
+    axes[0].grid(True, alpha=0.3)
+    axes[0].legend(fontsize=9, loc="upper left")
+
+    axes[1].axhline(0.0, linestyle="--", color="gray", linewidth=1.2)
+    axes[1].set_xlabel("Ground Truth Delta (mm)")
+    axes[1].set_ylabel("Residual (mm)")
+    axes[1].set_title("Residuals (Measured − Fit)")
+    axes[1].grid(True, alpha=0.3)
+    axes[1].legend(fontsize=9, loc="upper left")
+
+    axes[0].set_xlim(-0.05 * max(lim, 1.0), lim * 1.05)
+    axes[1].set_xlim(-0.05 * max(lim, 1.0), lim * 1.05)
+
+    residual_values = np.concatenate([r["residuals_mm"] for r in fit_results])
+    residual_abs_max = max(1.0, float(np.max(np.abs(residual_values))))
+    axes[1].set_ylim(-residual_abs_max * 1.15, residual_abs_max * 1.15)
+
+    fig.suptitle(title, fontsize=18, fontweight="bold")
+    plt.tight_layout(rect=[0, 0, 1, 0.96])
+    plt.savefig(out_path, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+    logging.info(f"Saved depth-scale regression plot → {out_path}")
+
+
+# ── inference helpers ─────────────────────────────────────────────────────────
+
+def _preprocess_ir(left: np.ndarray, right: np.ndarray):
+    """Convert uint16 IR images to CUDA float tensors (3-channel pseudo-RGB)."""
+    left  = np.clip(left.astype(np.float32),  0, 255)
+    right = np.clip(right.astype(np.float32), 0, 255)
+    left  = np.stack([left,  left,  left],  axis=-1)   # H×W×3
+    right = np.stack([right, right, right], axis=-1)
+    left_t  = torch.as_tensor(left).float()[None].permute(0, 3, 1, 2).cuda()
+    right_t = torch.as_tensor(right).float()[None].permute(0, 3, 1, 2).cuda()
+    return left_t, right_t
+
+@torch.no_grad()
+def infer_depth_m(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair; return depth map in metres (H×W float32)."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    disp = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_m = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_m[valid] = (BF / disp_np[valid]) / 1000.0   # disparity → mm → m
+    return depth_m
+
+def load_model(path: str):
+    logging.info(f"Loading model from {path}")
+    model = torch.load(path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+    return model
+
+@torch.no_grad()
+def infer_depth_nobf_m(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair that returns depth; return depth map in metres (H×W float32)."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        depth = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    depth = padder.unpad(depth.float())
+    depth_np = depth.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_m = np.zeros_like(depth_np)
+    valid = depth_np > 0
+    depth_m[valid] = depth_np[valid] / 1000.0   # mm → m
+    return depth_m
+
+
+# ── inbolt graphs ─────────────────────────────────────────────────────────────────────
+
+def main_inbolt_graphs():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+    gt_depth_diff = np.arange(n)*100 # mm
+    rs_depth_diff = np.arange(n)*0 # mm
+    zv_depth_diff = np.arange(n)*0 # zivid mm
+    rs_ref = None
+    zv_ref = None
+    for idx in range(n):
+        data  = source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        zv_mm = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+
+        # # Resize Zivid depth to match RealSense IR image resolution for pixel-level comparison
+        # rs_h, rs_w = left.shape[:2]
+        # if gt_mm.shape != (rs_h, rs_w):
+        #     #gt_mm = _cv2.resize(gt_mm, (rs_w, rs_h), interpolation=_cv2.INTER_NEAREST)
+        #     print(f"Shape mismatch: gt_mm {gt_mm.shape} vs rs {rs_h, rs_w}")
+        rs_valid           = (rs_mm > rs_mm.max()*0.8) 
+        zv_valid           = (zv_mm > zv_mm.max()*0.8) 
+        if idx == 0:
+            rs_ref = np.nanmean(rs_mm[rs_valid])
+            zv_ref = np.nanmean(zv_mm[zv_valid])
+        else:
+            rs_depth_diff[idx] = np.nanmean(rs_mm[rs_valid]) - rs_ref
+            zv_depth_diff[idx] = np.nanmean(zv_mm[zv_valid]) - zv_ref
+
+
+    sm = build_example_depth_scale_regression_series(gt_depth_diff, rs_depth_diff, zv_depth_diff)
+    plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_scale_comparison.png", title="Depth Scale Comparison")
+
+    logging.info(f"All outputs written to {out_dir}")
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load models ──────────────────────────────────────────────────────────
+    models = {}
+    if Path(args.finetuned).exists():
+        models["finetuned"] = load_model(args.finetuned)
+    else:
+        logging.warning(f"Fine-tuned model not found at {args.finetuned} — skipping")
+
+    models["original"] = load_model(args.original)
+
+    active_methods = [GT_NAME] + list(models.keys())
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics       = []
+    viz_frames        = []
+    valid_acc         = {}     # will be init on first frame
+    dist_bin_mae      = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw     = {m: [] for m in models}
+    H = W = None
+
+    # depth-vs-distance accumulators:
+    #   "zivid_gt" – Zivid depth values binned by Zivid GT (shows intra-bin spatial spread)
+    #   one entry per stereo model – model predictions binned by Zivid GT
+    depth_acc_keys = ["zivid_gt"] + list(models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+
+    for idx in range(n):
+        data  = source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        gt_mm = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+
+        # Resize Zivid depth to match RealSense IR image resolution for pixel-level comparison
+        rs_h, rs_w = left.shape[:2]
+        if gt_mm.shape != (rs_h, rs_w):
+            #gt_mm = _cv2.resize(gt_mm, (rs_w, rs_h), interpolation=_cv2.INTER_NEAREST)
+            print(f"Shape mismatch: gt_mm {gt_mm.shape} vs rs {rs_h, rs_w}")
+
+        if H is None:
+            H, W = rs_h, rs_w
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0   # mm → m
+        rs_m = rs_mm / 1000.0   # mm → m
+
+        # run inference for each model
+        frame_depths = {GT_NAME: gt_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        # per-frame metrics
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                                  float((pred > 0).mean()) * 100.0, 0.0,
+                                  mae_pen=0.0, mre_pen=0.0)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = float((pred[gt_close_mask] > 0).mean()) * 100.0 if n_close > 0 else 0.0
+            close_range_valid[mname].append(close_cov)
+
+        # depth-vs-distance: accumulate per-bin stats
+        depth_accs["zivid_gt"].update(gt_m, gt_m)   # GT vs itself → intra-bin spread
+        for mname in models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f"  {idx + 1}/{n} frames processed")
+
+    # normalise coverage maps to [0, 1]
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    # ── aggregate timing ──────────────────────────────────────────────────────
+    mean_timing = {m: float(np.mean(ts)) if ts else 0.0 for m, ts in timing_ms_raw.items()}
+    mean_timing[GT_NAME] = 0.0
+
+    # ── build BenchmarkResults ────────────────────────────────────────────────
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]["label"] for m in active_methods},
+        method_colors={m: METHODS[m]["color"] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f"INBOLT dataset ({args.data_dir})",
+        method_configs={
+            "original":  {"model_path": args.original},
+            "finetuned": {"model_path": args.finetuned},
+        },
+    )
+
+    stats = aggregate(results, mean_timing)
+
+    # ── generate report ───────────────────────────────────────────────────────
+    reporter = ReportGenerator(results, stats, out_dir)
+    reporter.generate()
+
+    # ── depth-vs-distance comparison plot ────────────────────────────────────
+    plot_colors = {
+        "zivid_gt": METHODS[GT_NAME]["color"],
+        **{m: METHODS[m]["color"] for m in models if m in METHODS},
+    }
+    plot_labels = {
+        "zivid_gt":  "Zivid GT (spatial spread)",
+        "original":  METHODS["original"]["label"],
+        "finetuned": METHODS["finetuned"]["label"],
+    }
+    # rename keys to human-readable labels for the plot
+    labeled_accs = {plot_labels.get(k, k): v
+                    for k, v in depth_accs.items()
+                    if depth_accs[k].count.sum() > 0}
+    labeled_colors = {plot_labels.get(k, k): plot_colors.get(k)
+                      for k in depth_accs if depth_accs[k].count.sum() > 0}
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / "depth_vs_distance.png",
+    )
+    logging.info(f"All outputs written to {out_dir}")
+
+
+if __name__ == '__main__':
+    # 1. works
+    #sm = build_example_depth_scale_regression_series()
+    #plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_scale_regression_example.png", title="Example Depth Scale Regression")
+
+    # 2. inbolt data
+    main_inbolt_graphs()
+
+    # 3. full benchmark + report
+    #main()
diff --git a/scripts/benchmark_inbolt_fast_foundation.py b/scripts/benchmark_inbolt_fast_foundation.py
new file mode 100644
index 0000000..540425d
--- /dev/null
+++ b/scripts/benchmark_inbolt_fast_foundation.py
@@ -0,0 +1,1213 @@
+"""Benchmark original vs fine-tuned FastFoundationStereo on the Inbolt dataset.
+
+Loads both models, runs inference on all Inbolt samples, computes depth quality
+metrics against Zivid scanner ground truth, and produces an HTML report.
+
+Also generates depth accuracy and noise plots comparing RealSense stereo predictions
+(model output) against Zivid ground-truth depth across distance bins.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt.py [--out_dir reports/inbolt_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+import cv2
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import torch
+# import matplotlib
+# matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+from core.utils.utils import InputPadder
+import Utils as U
+
+from scripts.data_manager_inbolt import DataSource
+
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_metrics,
+    compute_bin_mae,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+#DATA_DIR         = r'C:\Work\Data\Depth\Data Collection'  # local path to the dataset
+DATA_DIR        = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection'  # local path to the dataset
+DATA_DIR         = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'  # local path to the dataset
+
+MODEL_PATH      = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+FINETUNED_PATH  = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415_epoch_030.pth'
+DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark'
+#FARO_DIR       = r'data/faro'  # local path to FARO dataset --- IGNORE ---
+# ORIGINAL_PATH  = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+# FINETUNED_PATH = f'{code_dir}/../weights/20-30-48/model_finetuned_faro.pth'
+# DEFAULT_OUT    = f'{code_dir}/../reports/faro_benchmark'
+
+BF              = 49470.45   # focal_px * baseline_mm  (calibrated from camera)
+BF_RS           = 49.8624*385.73 #- true # D435 - focal_px * baseline_mm (calibrated from camera)
+ITERS           = 8          # GRU iterations
+N_VIZ           = 5         # number of frames saved for visual comparison in report
+
+DEPTH_BIN_SIZE_M = 0.1       # width of each distance bin for accuracy/noise plots
+MAX_DEPTH_M      = 6.0       # maximum depth considered in plots
+
+METHODS = {
+    "original":  {"label": "Original model",    "color": "#2980b9"},
+    "finetuned": {"label": "Fine-tuned on INBOLT", "color": "#e74c3c"},
+    "inbolt_gt":   {"label": "INBOLT GT",            "color": "#27ae60"},
+}
+GT_NAME = "inbolt_gt"
+
+CAMERA_MATRIX_RS = np.array([
+    [385.5098876953125, 0, 328.31732177734375],
+    [0, 385.5098876953125, 235.6382141113281],
+    [0, 0, 1]
+])
+
+DIST_COEFFS_RS = np.array([
+    0.0,
+    -0.0,
+    -0.0,
+    0.0,
+    -0.0
+])
+
+CAMERA_MATRIX_ZIVID = np.array([
+    [1241.8536376953125, 0, 609.9681922158361],
+    [0, 1241.6390380859375, 513.828698754003],
+    [0, 0, 1]
+])
+
+DIST_COEFFS_ZIVID = np.array([
+    0.045981280505657196,
+    -0.0316404290497303,
+    -0.00012756904470734298,
+    0.0001183780113933608,
+    -0.17966397106647491
+])
+
+# ── projection helpers ─────────────────────────────────────────────────────────
+
+def save_to_ply(points: np.ndarray, filename: str):
+    """Save a point cloud to a PLY file for visualization."""
+    with open(filename, 'w') as f:
+        f.write('ply\n')
+        f.write('format ascii 1.0\n')
+        f.write(f'element vertex {len(points)}\n')
+        f.write('property float x\n')
+        f.write('property float y\n')
+        f.write('property float z\n')
+        f.write('end_header\n')
+        for x, y, z in points:
+            f.write(f'{x} {y} {z}\n')
+
+def project_camera_to_3d(depth_img_mm: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray) -> np.ndarray:
+    """Project 2D pixel coordinates with depth to 3D points in camera space."""
+    h, w = depth_img_mm.shape
+    xs, ys = np.meshgrid(np.arange(w, dtype=np.float32), np.arange(h, dtype=np.float32), indexing='xy')
+
+    # OpenCV expects Nx1x2 contiguous float32/float64 image points in (x, y) order.
+    distorted_points = np.stack([xs, ys], axis=-1).reshape(-1, 1, 2).astype(np.float32)
+    undistorted_points = cv2.undistortPoints(distorted_points,  cam_matrix.astype(np.float32),  dist_coeffs.astype(np.float32) )
+
+    uv = undistorted_points.reshape(-1, 2)
+    Z = depth_img_mm.reshape(-1).astype(np.float32)
+    valid = np.isfinite(Z) & (Z > 0)
+    if not np.any(valid):
+        return np.zeros((0, 3), dtype=np.float32)
+
+    uv      = uv[valid]
+    Z       = Z[valid]
+    X       = uv[:, 0] * Z
+    Y       = uv[:, 1] * Z
+
+    # save to ply point cloud for visualization
+    XYZ     = np.stack([X, Y, Z], axis=1).astype(np.float32)
+
+    return XYZ
+
+def project_3d_to_camera(points_3d: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray, frame_size = (480,640)) -> np.ndarray:
+    """Project 3D points in camera space back to 2D pixel coordinates."""
+    if points_3d.shape[1] != 3:
+        raise ValueError("Input points_3d must have shape (N, 3)")
+    projected_pts, _ = cv2.projectPoints(
+        points_3d.reshape(-1, 1, 3),
+        np.zeros(3, dtype=np.float32),
+        np.zeros(3, dtype=np.float32),
+        cam_matrix.astype(np.float32),
+        dist_coeffs.astype(np.float32),
+    )
+
+    uv_rs = projected_pts.reshape(-1, 2)
+    u_idx = np.rint(uv_rs[:, 0]).astype(np.int32)
+    v_idx = np.rint(uv_rs[:, 1]).astype(np.int32)
+
+    h_rs, w_rs = frame_size
+    in_bounds = (u_idx >= 0) & (u_idx < w_rs) & (v_idx >= 0) & (v_idx < h_rs)
+    if not np.any(in_bounds):
+        return np.zeros((h_rs, w_rs), dtype=np.float32)
+
+    u_idx = u_idx[in_bounds]
+    v_idx = v_idx[in_bounds]
+    z_vals = points_3d[in_bounds, 2]  # Z values of the valid points
+
+    # Rasterize by nearest pixel; if multiple points hit a pixel, keep the closest depth.
+    lin             = v_idx * w_rs + u_idx
+    depth_buffer    = np.full(h_rs * w_rs, np.inf, dtype=np.float32)
+    np.minimum.at(depth_buffer, lin, z_vals)
+    depth_projected = depth_buffer.reshape(h_rs, w_rs)
+    depth_projected[~np.isfinite(depth_projected)] = 0.0
+    return depth_projected
+
+# project from zivid depth patrix to point cloud and back to depth matrix with rs intrinsics and distortion to get "zivid GT as seen by RealSense" for pixel-level comparison
+def project_depth_zivid_to_rs(depth_zivid_mm: np.ndarray, depth_rs_mm: np.ndarray, finx = 0) -> np.ndarray:
+    # create 3D point cloud from zivid depth
+    XYZ = project_camera_to_3d(depth_zivid_mm, CAMERA_MATRIX_ZIVID, DIST_COEFFS_ZIVID)  # (N, 3) array of 3D points in Zivid camera space
+    # save to ply point cloud for visualization
+    #save_to_ply(XYZ/1000, f'zivid_original_points_{finx:03d}.ply') # save in meters for visualization
+
+    # project back on imaage RS
+    depth_zivid_projected_mm = project_3d_to_camera(XYZ, CAMERA_MATRIX_RS, DIST_COEFFS_RS, frame_size = depth_rs_mm.shape)  # (H, W) depth map of Zivid points projected into RealSense pixel space
+
+    XYZ_RS = project_camera_to_3d(depth_zivid_projected_mm, CAMERA_MATRIX_RS, DIST_COEFFS_RS)
+        # save to ply point cloud for visualization
+    #save_to_ply(XYZ_RS/1000, f'zivid_projected_points_{finx:03d}.ply') # save in meters for visualization
+
+    return depth_zivid_projected_mm
+
+
+# ── depth-vs-distance analysis ────────────────────────────────────────────────
+
+class DepthBinAccumulator:
+    """Accumulates mean and std-dev of depth values per GT-distance bin.
+
+    Bins are defined by Zivid GT depth, so *every* pixel whose GT depth falls in
+    [k * bin_size, (k+1) * bin_size) contributes to bin k.  Works for any sensor
+    (model predictions OR Zivid GT values themselves — the latter gives an
+    estimate of within-bin spatial variation / measurement noise).
+    """
+
+    def __init__(self, bin_size_m: float = DEPTH_BIN_SIZE_M, max_depth_m: float = MAX_DEPTH_M):
+        self.bin_size   = bin_size_m
+        self.n_bins     = int(np.ceil(max_depth_m / bin_size_m))
+        self.count      = np.zeros(self.n_bins, dtype=np.float64)
+        self.sum_       = np.zeros(self.n_bins, dtype=np.float64)
+        self.sum_sq     = np.zeros(self.n_bins, dtype=np.float64)
+
+    def update(self, values_m: np.ndarray, gt_m: np.ndarray):
+        """Add one frame of data.
+
+        Parameters
+        ----------
+        values_m : (H, W) array of the depth values to accumulate (model or GT).
+        gt_m     : (H, W) array of Zivid GT depths that define which bin each pixel falls in.
+        """
+        valid = (gt_m > 0) & (values_m > 0)
+        if not valid.any():
+            return
+        v_vals = values_m[valid].ravel().astype(np.float64)
+        v_gt   = gt_m[valid].ravel().astype(np.float64)
+
+        bins = np.floor(v_gt / self.bin_size).astype(np.int32)
+        bins = np.clip(bins, 0, self.n_bins - 1)
+
+        np.add.at(self.count,  bins, 1.0)
+        np.add.at(self.sum_,   bins, v_vals)
+        np.add.at(self.sum_sq, bins, v_vals ** 2)
+
+    @property
+    def bin_centers(self) -> np.ndarray:
+        return (np.arange(self.n_bins) + 0.5) * self.bin_size
+
+    def mean(self) -> np.ndarray:
+        c = np.maximum(self.count, 1)
+        return np.where(self.count > 0, self.sum_ / c, np.nan)
+
+    def std(self) -> np.ndarray:
+        c = np.maximum(self.count, 1)
+        m = np.where(self.count > 0, self.sum_ / c, np.nan)
+        v = np.where(self.count > 0, self.sum_sq / c - m ** 2, np.nan)
+        return np.sqrt(np.maximum(v, 0.0))
+
+def plot_depth_vs_distance(
+    accumulators: dict,          # {label: DepthBinAccumulator}
+    colors: dict,                # {label: color_str}
+    out_path: Path,
+    min_count: int = 100,        # bins with fewer samples are hidden
+):
+    """Produce a two-panel figure:
+      left  – actual depth (Zivid GT) vs measured/predicted depth
+      right – noise (std-dev of measured depth) vs actual depth
+    """
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+
+    for label, acc in accumulators.items():
+        x     = acc.bin_centers
+        mu    = acc.mean()
+        sigma = acc.std()
+        color = colors.get(label, None)
+
+        valid = acc.count >= min_count
+        xv, muv, sv = x[valid], mu[valid], sigma[valid]
+
+        ax0 = axes[0]
+        ax0.plot(xv, muv, label=label, color=color, linewidth=1.5)
+        ax0.fill_between(xv, muv - sv, muv + sv, alpha=0.15, color=color)
+
+        axes[1].plot(xv, sv * 1000, label=label, color=color, linewidth=1.5)
+
+    # ideal line
+    lim = MAX_DEPTH_M
+    axes[0].plot([0, lim], [0, lim], 'k--', linewidth=1, label='ideal (y = x)')
+    axes[0].set_xlim(0, lim)
+    axes[0].set_ylim(0, lim)
+    axes[0].set_xlabel('Actual depth — Zivid GT (m)')
+    axes[0].set_ylabel('Measured depth (m)')
+    axes[0].set_title('Depth Accuracy: Actual vs Measured\n(shaded band = ±1 std dev)')
+    axes[0].legend(fontsize=9)
+    axes[0].grid(True, alpha=0.4)
+
+    axes[1].set_xlim(0, lim)
+    axes[1].set_xlabel('Actual depth — Zivid GT (m)')
+    axes[1].set_ylabel('Noise / Std Dev (mm)')
+    axes[1].set_title('Depth Noise per Distance Bin')
+    axes[1].legend(fontsize=9)
+    axes[1].grid(True, alpha=0.4)
+
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    logging.info(f"Saved depth-vs-distance plot → {out_path}")
+
+def _to_1d_float_array(values, name: str) -> np.ndarray:
+    """Convert *values* to a finite 1D float array."""
+    arr = np.asarray(values, dtype=np.float64).reshape(-1)
+    if arr.size == 0:
+        raise ValueError(f"{name} must contain at least one value")
+    if not np.all(np.isfinite(arr)):
+        raise ValueError(f"{name} contains NaN or infinite values")
+    return arr
+
+def fit_depth_scale_regression(
+    gt_delta_mm,
+    measured_delta_mm,
+    fit_intercept: bool = True,
+) -> dict:
+    """Fit a linear depth-scale regression and compute residual statistics.
+
+    Parameters
+    ----------
+    gt_delta_mm : array-like
+        Ground-truth floor/depth deltas in millimetres.
+    measured_delta_mm : array-like
+        Measured deltas from one sensor/model in millimetres.
+    fit_intercept : bool, default=False
+        If False, uses a through-origin fit `y = slope * x`, which matches the
+        style of the attached plot.  If True, fits `y = slope * x + intercept`.
+
+    Returns
+    -------
+    dict
+        Contains slope, intercept, fitted values, residuals, RMSE, and masks.
+    """
+    x = _to_1d_float_array(gt_delta_mm, "gt_delta_mm")
+    y = _to_1d_float_array(measured_delta_mm, "measured_delta_mm")
+
+    if x.shape != y.shape:
+        raise ValueError("gt_delta_mm and measured_delta_mm must have the same shape")
+    if x.size < 2:
+        raise ValueError("At least two samples are required for regression")
+
+    valid = np.isfinite(x) & np.isfinite(y)
+    x = x[valid]
+    y = y[valid]
+
+    if x.size < 2:
+        raise ValueError("Need at least two finite samples after filtering")
+
+    if fit_intercept:
+        slope, intercept = np.polyfit(x, y, deg=1)
+    else:
+        denom = float(np.dot(x, x))
+        if denom <= 0:
+            raise ValueError("Cannot fit a through-origin regression when gt deltas are all zero")
+        slope = float(np.dot(x, y) / denom)
+        intercept = 0.0
+
+    fitted = slope * x + intercept
+    residuals = y - fitted
+    rmse = float(np.sqrt(np.mean(residuals ** 2)))
+
+    return {
+        "gt_delta_mm": x,
+        "measured_delta_mm": y,
+        "slope": float(slope),
+        "intercept": float(intercept),
+        "fitted_mm": fitted,
+        "residuals_mm": residuals,
+        "rmse_mm": rmse,
+        "fit_intercept": fit_intercept,
+    }
+
+def fit_plane_and_compute_error(depth_diff_mm: np.ndarray, valid_mask: np.ndarray) -> dict:
+    """Fit a plane z = a*x + b*y + c on valid pixels and compute residual errors.
+
+    Parameters
+    ----------
+    depth_diff_mm : np.ndarray
+        2D depth-difference image in millimetres.
+    valid_mask : np.ndarray
+        2D boolean mask of valid pixels used for fitting/evaluation.
+
+    Returns
+    -------
+    dict
+        Plane coefficients, residual statistics, and counts.
+    """
+    if depth_diff_mm.ndim != 2:
+        raise ValueError("depth_diff_mm must be a 2D array")
+    if valid_mask.shape != depth_diff_mm.shape:
+        raise ValueError("valid_mask must have the same shape as depth_diff_mm")
+
+    valid = valid_mask.astype(bool) & np.isfinite(depth_diff_mm)
+    n_valid = int(np.count_nonzero(valid))
+    if n_valid < 3:
+        return {
+            "coeffs": np.array([np.nan, np.nan, np.nan], dtype=np.float64),
+            "rmse_mm": np.nan,
+            "mae_mm": np.nan,
+            "residuals_mm": np.array([], dtype=np.float64),
+            "n_valid": n_valid,
+        }
+
+    ys, xs = np.nonzero(valid)
+    z = depth_diff_mm[valid].astype(np.float64)
+
+    # Solve least-squares for z = a*x + b*y + c
+    A = np.stack([xs.astype(np.float64), ys.astype(np.float64), np.ones_like(z)], axis=1)
+    coeffs, _, _, _ = np.linalg.lstsq(A, z, rcond=None)
+
+    fitted = A @ coeffs
+    residuals = z - fitted
+    rmse = float(np.sqrt(np.mean(residuals ** 2)))
+    mae = float(np.mean(np.abs(residuals)))
+
+    return {
+        "coeffs": coeffs,
+        "rmse_mm": rmse,
+        "mae_mm": mae,
+        "residuals_mm": residuals,
+        "n_valid": n_valid,
+    }
+
+def build_example_depth_scale_regression_series(
+    gt_delta_mm,
+    rs_delta_mm,
+    zv_delta_mm,
+    fs_delta_mm=None,
+    ft_delta_mm=None,
+    rs_rsme_mm=None,
+    zv_rsme_mm=None,
+    fs_rsme_mm=None,
+    ft_rsme_mm=None
+) -> dict:
+    """Return example depth-delta series that reproduces the attached figure.
+
+    The values approximate the plot shown in the screenshot:
+      - RealSense has a noticeable scale bias.
+      - Zivid stays close to the ideal slope of 1.
+    """
+    gt_delta_mm = np.array([0, 100, 200, 300, 400, 500, 600, 700], dtype=np.float64) if gt_delta_mm is None else gt_delta_mm
+    # rs_delta_mm = np.array([0.0, 104.0, 218.0, 323.0, 433.0, 542.0, 664.0, 754.0], dtype=np.float64) if rs_delta_mm is None else rs_delta_mm
+    # zv_delta_mm = np.array([0.0, 101.0, 201.0, 301.0, 401.0, 502.0, 602.0, 707.0], dtype=np.float64) if zv_delta_mm is None else zv_delta_mm
+    # fs_delta_mm = np.array([0.0, 102.0, 204.0, 306.0, 408.0, 510.0, 612.0, 714.0], dtype=np.float64) if fs_delta_mm is None else fs_delta_mm
+    # ftn_delta_mm = np.array([0.0, 103.0, 207.0, 311.0, 415.0, 519.0, 623.0, 727.0], dtype=np.float64) if ftn_delta_mm is None else ftn_delta_mm
+    series_map = {
+        "realsense": {
+            "gt_delta_mm": gt_delta_mm,
+            "measured_delta_mm": rs_delta_mm,
+            "rmse_mm": rs_rsme_mm,
+            "color": "#e74c3c",
+            "marker": "s",
+            "label": "realsense",
+        },
+        "zivid": {
+            "gt_delta_mm": gt_delta_mm,
+            "measured_delta_mm": zv_delta_mm,
+            "rmse_mm": zv_rsme_mm,
+            "color": "#2980b9",
+            "marker": "o",
+            "label": "zivid",
+        },
+    }
+    if fs_delta_mm is not None:
+        series_map["ffs"] = {
+            "gt_delta_mm": gt_delta_mm,
+            "measured_delta_mm": fs_delta_mm,
+            "rmse_mm": fs_rsme_mm,
+            "color": "#27ae60",
+            "marker": "d",
+            "label": "ffs",
+        }
+    if ft_delta_mm is not None:
+        series_map["ftn"] = {
+            "gt_delta_mm": gt_delta_mm,
+            "measured_delta_mm": ft_delta_mm,
+            "rmse_mm": ft_rsme_mm,
+            "color": "#f39c12",
+            "marker": "^",
+            "label": "ftn",
+        }
+    return series_map
+
+def plot_depth_scale_regression(
+    series_map: dict,
+    out_path: Path,
+    title: str = "Depth Scale Regression — dataset_depth_bias",
+    fit_intercept: bool = True,
+    ideal_slope: float = 1.0,
+):
+    """Create the two-panel regression + residuals figure from paired series.
+
+    Parameters
+    ----------
+    series_map : dict
+        Mapping of series name to configuration dict. Each entry should provide:
+          - gt_delta_mm
+          - measured_delta_mm
+        and may optionally include:
+          - label
+          - color
+          - marker
+    out_path : Path
+        Destination PNG path.
+    title : str
+        Figure title.
+    fit_intercept : bool
+        Whether to fit a free intercept. Defaults to a through-origin fit.
+    ideal_slope : float
+        Slope of the ideal reference line shown on the left panel.
+    """
+    if not series_map:
+        raise ValueError("series_map must contain at least one series")
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+    fit_results = []
+    max_x = 0.0
+    max_y = 0.0
+
+    for default_name, cfg in series_map.items():
+        result = fit_depth_scale_regression(
+            gt_delta_mm=cfg["gt_delta_mm"],
+            measured_delta_mm=cfg["measured_delta_mm"],
+            fit_intercept=fit_intercept,
+        )
+        result["label"] = cfg.get("label", default_name)
+        result["color"] = cfg.get("color", None)
+        result["marker"] = cfg.get("marker", "o")
+        result["rmse_mm"] = result["rmse_mm"] if "rmse_mm" not in cfg else cfg["rmse_mm"]
+        fit_results.append(result)
+        max_x = max(max_x, float(np.max(result["gt_delta_mm"])))
+        max_y = max(max_y, float(np.max(result["measured_delta_mm"])))
+
+    lim = max_x #max(max_x, max_y)
+    fit_x = np.linspace(0.0, lim, 200)
+
+    for result in fit_results:
+        label = result["label"]
+        color = result["color"]
+        marker = result["marker"]
+        x = result["gt_delta_mm"]
+        y = result["measured_delta_mm"]
+        slope = result["slope"]
+        intercept = result["intercept"]
+        rmse = result["rmse_mm"]
+        print(rmse)
+
+        axes[0].scatter(x, y-x, color=color, marker=marker, s=70, label=f"{label} (raw)", zorder=3)
+        # axes[0].plot(
+        #     fit_x,
+        #     slope * fit_x + intercept,
+        #     color=color,
+        #     linewidth=2.0,
+        #     label=(
+        #         f"{label} fit: slope={slope:.3f}, intercept={intercept:.1f}mm"
+        #         if fit_intercept else
+        #         f"{label} fit: slope={slope:.3f}"
+        #     ),
+        # )
+
+        axes[1].scatter(
+            x,
+            rmse, #result["residuals_mm"],
+            color=color,
+            marker=marker,
+            s=70,
+            label=f"{label} RMSE mm",
+            zorder=3,
+        )
+    
+
+    # axes[0].plot(
+    #     fit_x,
+    #     ideal_slope * fit_x,
+    #     linestyle="--",
+    #     color="gray",
+    #     linewidth=1.5,
+    #     label=f"ideal (slope={ideal_slope:.1f})",
+    # )
+    axes[0].axhline(0.0, linestyle="--", color="gray", linewidth=1.2)
+    axes[0].set_xlabel("Ground Truth Delta (mm)")
+    axes[0].set_ylabel("Measured Depth Delta (mm) - Ground Truth Delta (mm)")
+    axes[0].set_title("Depth Delta: Measured from Ground Truth")
+    axes[0].grid(True, alpha=0.3)
+    axes[0].legend(fontsize=9, loc="upper left")
+
+    axes[1].axhline(0.0, linestyle="--", color="gray", linewidth=1.2)
+    axes[1].set_xlabel("Ground Truth Delta (mm)")
+    axes[1].set_ylabel("Residual RMSE (mm)")
+    axes[1].set_title("Residuals (Measured − Plane Fit)")
+    axes[1].grid(True, alpha=0.3)
+    axes[1].legend(fontsize=9, loc="upper left")
+
+    axes[0].set_xlim(-0.05 * max(lim, 1.0), lim * 1.05)
+    axes[1].set_xlim(-0.05 * max(lim, 1.0), lim * 1.05)
+
+    residual_values = np.concatenate([r["residuals_mm"] for r in fit_results])
+    residual_abs_max = max(1.0, float(np.max(np.abs(residual_values))))
+    residual_abs_max = 60
+    axes[0].set_ylim(-20, 70)
+    axes[1].set_ylim(-20, 70)
+
+    fig.suptitle(title, fontsize=18, fontweight="bold")
+    plt.tight_layout(rect=[0, 0, 1, 0.96])
+    plt.savefig(out_path, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+    logging.info(f"Saved depth-scale regression plot → {out_path}")
+
+
+# ── inference helpers ─────────────────────────────────────────────────────────
+
+def _preprocess_ir(left: np.ndarray, right: np.ndarray):
+    """Convert uint16 IR images to CUDA float tensors (3-channel pseudo-RGB)."""
+    left  = np.clip(left.astype(np.float32),  0, 255)
+    right = np.clip(right.astype(np.float32), 0, 255)
+    left  = np.stack([left,  left,  left],  axis=-1)   # H×W×3
+    right = np.stack([right, right, right], axis=-1)
+    left_t  = torch.as_tensor(left).float()[None].permute(0, 3, 1, 2).cuda()
+    right_t = torch.as_tensor(right).float()[None].permute(0, 3, 1, 2).cuda()
+    return left_t, right_t
+
+@torch.no_grad()
+def infer_depth_m(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair; return depth map in metres (H×W float32)."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    disp = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_m = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_m[valid] = (BF / disp_np[valid]) / 1000.0   # disparity → mm → m
+    return depth_m
+
+@torch.no_grad()
+def infer_depth_rs_mm(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair; return depth map in metres (H×W float32)."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    disp = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_mm = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_mm[valid] = (BF_RS / disp_np[valid])    # disparity → mm → m
+    return depth_mm
+
+def load_model(path: str):
+    logging.info(f"Loading model from {path}")
+    model = torch.load(path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+    return model
+
+
+# ── inbolt graphs ─────────────────────────────────────────────────────────────────────
+
+def main_inbolt_graphs():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+    gt_depth_diff = np.arange(n)*100 # mm
+    rs_depth_diff = np.arange(n)*0 # mm
+    zv_depth_diff = np.arange(n)*0 # zivid mm
+    rs_ref = None
+    zv_ref = None
+    for idx in range(n):
+        data  = source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        zv_mm = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+
+
+        # # Resize Zivid depth to match RealSense IR image resolution for pixel-level comparison
+        # rs_h, rs_w = left.shape[:2]
+        # if gt_mm.shape != (rs_h, rs_w):
+        #     #gt_mm = _cv2.resize(gt_mm, (rs_w, rs_h), interpolation=_cv2.INTER_NEAREST)
+        #     print(f"Shape mismatch: gt_mm {gt_mm.shape} vs rs {rs_h, rs_w}")
+        rs_valid           = (rs_mm > rs_mm.max()*0.8) 
+        zv_valid           = (zv_mm > zv_mm.max()*0.8) 
+        if idx == 0:
+            rs_ref = np.nanmean(rs_mm[rs_valid])
+            zv_ref = np.nanmean(zv_mm[zv_valid])
+        else:
+            rs_depth_diff[idx] = np.nanmean(rs_mm[rs_valid]) - rs_ref
+            zv_depth_diff[idx] = np.nanmean(zv_mm[zv_valid]) - zv_ref
+
+
+    sm = build_example_depth_scale_regression_series(gt_depth_diff, rs_depth_diff, zv_depth_diff)
+    plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_scale_comparison.png", title="Depth Scale Comparison")
+
+    logging.info(f"All outputs written to {out_dir}")
+
+# ── inbolt graphs with Zivid projection ─────────────────────────────────────────────────────────────────────
+
+def main_inbolt_graphs_with_projection():
+
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+    gt_depth_diff = np.arange(n)*100 # mm
+    rs_depth_diff = np.arange(n)*0 # mm
+    zv_depth_diff = np.arange(n)*0 # zivid mm
+    rs_depth_rsme = np.arange(n)*0 # mm
+    zv_depth_rsme = np.arange(n)*0 # zivid mm
+    rs_ref = None
+    zv_ref = None
+    for idx in range(n):
+        data                = source.get_item(idx)
+        left                = data['left']
+        right               = data['right']
+        zv_mm               = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm               = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+
+        # project zivid on rs
+        zv_prj_mm           = project_depth_zivid_to_rs(zv_mm, rs_mm, finx = idx)
+
+        rs_valid           = (10 < rs_mm) 
+        rs_valid           = rs_valid & (rs_mm < rs_mm[rs_valid].min()*1.1) 
+        zv_valid           = (10 < zv_prj_mm) 
+        zv_valid           = zv_valid & (zv_prj_mm < zv_prj_mm[zv_valid].min()*1.05) & rs_valid
+        if idx == 0:
+            rs_ref = rs_mm
+            zv_ref = zv_prj_mm
+        else:
+
+            rs_diff_map = rs_mm     - rs_ref
+            zv_diff_map = zv_prj_mm - zv_ref
+
+            # debug visualization of difference maps and valid masks
+            # plt.figure(figsize=(12, 4))
+            # plt.subplot(1, 3, 1),plt.imshow(rs_diff_map, vmin=-10, vmax=1000),plt.title(f"RealSense Depth Diff (mm)"),plt.colorbar()
+            # plt.subplot(1, 3, 2),plt.imshow(zv_diff_map, vmin=-10, vmax=1000),plt.title(f"Zivid Projected Depth Diff (mm)"),plt.colorbar()
+            # plt.subplot(1, 3, 3),plt.imshow(zv_valid, cmap='gray'),plt.title(f"Valid Mask (Zivid Projection)"),plt.colorbar()
+            # plt.suptitle(f"Sample {idx:03d} Depth Difference Maps and Valid Mask", fontsize=16)
+            # plt.tight_layout()
+            # plt.show()
+
+            fig, axes = plt.subplots(1, 3, sharey=True, sharex=True, figsize=(8,4))
+            axes[0].imshow(rs_diff_map, vmin=-10, vmax=1000),axes[0].set_title(f"RealSense Depth Diff (mm)"),
+            axes[1].imshow(zv_diff_map, vmin=-10, vmax=1000),axes[1].set_title(f"Zivid Projected Depth Diff (mm)"),
+            axes[2].imshow(zv_valid, cmap='gray'),axes[2].set_title(f"Valid Mask (Zivid Projection)"),
+            plt.suptitle(f"Sample {idx:03d} Depth Difference Maps and Valid Mask", fontsize=16)
+            plt.tight_layout()
+            plt.show()
+
+
+
+            # Mean depth deltas over the common valid support.
+            rs_diff_valid = rs_diff_map[zv_valid]
+            zv_diff_valid = zv_diff_map[zv_valid]
+            rs_depth_diff[idx] = float(np.mean(rs_diff_valid))
+            zv_depth_diff[idx] = float(np.mean(zv_diff_valid))
+
+            # Fit a plane to each difference map and use fit residual RMSE as error.
+            rs_plane_fit = fit_plane_and_compute_error(rs_diff_map, zv_valid)
+            zv_plane_fit = fit_plane_and_compute_error(zv_diff_map, zv_valid)
+            rs_depth_rsme[idx] = rs_plane_fit["rmse_mm"]
+            zv_depth_rsme[idx] = zv_plane_fit["rmse_mm"]
+            # old code
+            #rs_depth_rsme[idx] = np.sqrt(np.mean((rs_diff - rs_depth_diff[idx])**2))
+            #zv_depth_rsme[idx] = np.sqrt(np.mean((zv_diff - zv_depth_diff[idx])**2))            
+
+    sm = build_example_depth_scale_regression_series(gt_depth_diff, rs_depth_diff, zv_depth_diff, rs_rsme_mm=rs_depth_rsme, zv_rsme_mm=zv_depth_rsme)
+    plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_scale_comparison_inbolt.png", title="Depth Scale Comparison")
+
+    logging.info(f"All outputs written to {out_dir}")
+
+# ── inbolt and FFS graphs ─────────────────────────────────────────────────────────────────────
+
+def main_inbolt_ffs_graphs_with_projection():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+    
+    # ── load models ──────────────────────────────────────────────────────────
+    models = {}
+    if Path(args.finetuned).exists():
+        models["finetuned"] = load_model(args.finetuned)
+    else:
+        logging.warning(f"Fine-tuned model not found at {args.finetuned} — skipping")
+
+    models["original"] = load_model(args.original)    
+
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+    gt_depth_diff = np.arange(n)*1 # when drive no meaning for gt depth diff, just want to see the scale of the error, so set to 1 mm
+    rs_depth_diff = np.arange(n)*0 # mm
+    zv_depth_diff = np.arange(n)*0 # zivid mm
+    ffs_depth_diff = np.arange(n)*0 # ffs mm
+    ftn_depth_diff = np.arange(n)*0 # ftn mm
+    rs_depth_rsme = np.arange(n)*0 # mm
+    zv_depth_rsme = np.arange(n)*0 # zivid mm
+    ffs_depth_rsme = np.arange(n)*0 # mm
+    ftn_depth_rsme = np.arange(n)*0 # zivid mm        
+
+    for idx in range(n):
+        data            = source.get_item(idx)
+        left            = data['left']
+        right           = data['right']
+        zv_mm           = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm           = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+        ffs_mm          = infer_depth_rs_mm(models["original"], left, right)
+        ftn_mm          = infer_depth_rs_mm(models["finetuned"], left, right)
+   
+        # project zivid on rs
+        zv_prj_mm       = project_depth_zivid_to_rs(zv_mm, rs_mm, finx = idx)
+
+        rs_valid        = (10 < rs_mm) 
+        zv_valid        = (10 < zv_prj_mm) 
+        ffs_valid       = (10 < ffs_mm)  
+        ftn_valid       = (10 < ftn_mm)  
+
+        zv_zv_error     = source.compute_depth_error(zv_prj_mm, zv_prj_mm, depth_mask=zv_valid & rs_valid)
+        rs_zv_error     = source.compute_depth_error(rs_mm,     zv_prj_mm, depth_mask=zv_valid & rs_valid)
+        ffs_zv_error    = source.compute_depth_error(ffs_mm,    zv_prj_mm, depth_mask=zv_valid & ffs_valid)
+        ftn_zv_error    = source.compute_depth_error(ftn_mm,    zv_prj_mm, depth_mask=zv_valid & ftn_valid)
+
+        # debug
+        # img_list = [left, right, rs_mm, zv_prj_mm, ffs_mm, ftn_mm]
+        # ttl_list = ['left (RS)', 'right (RS)', 'depth RS (mm)', 'depth Zivid (mm)', 'depth FFS (mm)', 'depth FTN (mm)']
+        # source.show_subset(img_list, ttl_list, save_path=DEFAULT_OUT , fig_name = f"sample_{idx:03d}_inputs.png")
+        img_list = [zv_zv_error, rs_zv_error, ffs_zv_error, ftn_zv_error]
+        ttl_list = ['Zivid Error', 'RS Error', 'FFS Error', 'FTN Error']
+        source.show_subset(img_list, ttl_list, save_path=DEFAULT_OUT , fig_name = f"error_{idx:03d}_inputs.png")
+
+        zv_depth_diff[idx] = np.mean(zv_zv_error) 
+        rs_depth_diff[idx] = np.mean(rs_zv_error)
+        ffs_depth_diff[idx] = np.mean(ffs_zv_error)
+        ftn_depth_diff[idx] = np.mean(ftn_zv_error)        
+
+        zv_depth_rsme[idx] = np.sqrt(np.mean(zv_zv_error**2)) 
+        rs_depth_rsme[idx] = np.sqrt(np.mean(rs_zv_error**2))
+        ffs_depth_rsme[idx] = np.sqrt(np.mean(ffs_zv_error**2))
+        ftn_depth_rsme[idx] = np.sqrt(np.mean(ftn_zv_error**2))
+           
+    
+    sm = build_example_depth_scale_regression_series(gt_depth_diff, rs_depth_diff, rs_depth_diff, ffs_depth_diff, ftn_depth_diff, rs_rsme_mm=rs_depth_rsme, zv_rsme_mm=zv_depth_rsme, fs_rsme_mm=ffs_depth_rsme, ft_rsme_mm=ftn_depth_rsme)
+    plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_scale_comparison_ffs.png", title="Depth Scale Comparison")
+
+    logging.info(f"All outputs written to {out_dir}")
+
+def main_inbolt_ffs_graphs_with_projection_biased_dataset():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source      = DataSource()
+    n           = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+    
+    # ── load models ──────────────────────────────────────────────────────────
+    models = {}
+    if Path(args.finetuned).exists():
+        models["finetuned"] = load_model(args.finetuned)
+    else:
+        logging.warning(f"Fine-tuned model not found at {args.finetuned} — skipping")
+
+    models["original"] = load_model(args.original)    
+
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+    gt_depth_diff = np.arange(n)*100 # when drive no meaning for gt depth diff, just want to see the scale of the error, so set to 1 mm
+    rs_depth_diff = np.arange(n)*0 # mm
+    zv_depth_diff = np.arange(n)*0 # zivid mm
+    ffs_depth_diff = np.arange(n)*0 # ffs mm
+    ftn_depth_diff = np.arange(n)*0 # ftn mm
+    rs_depth_rsme = np.arange(n)*0 # mm
+    zv_depth_rsme = np.arange(n)*0 # zivid mm
+    ffs_depth_rsme = np.arange(n)*0 # mm
+    ftn_depth_rsme = np.arange(n)*0 # zivid mm        
+    rs_ref = None
+    zv_ref = None
+    fs_ref = None
+    ft_ref = None    
+    for idx in range(n):
+        data            = source.get_item(idx)
+        left            = data['left']
+        right           = data['right']
+        zv_mm           = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm           = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+        ffs_mm          = infer_depth_rs_mm(models["original"], left, right)
+        ftn_mm          = infer_depth_rs_mm(models["finetuned"], left, right)
+   
+        # project zivid on rs
+        zv_prj_mm       = project_depth_zivid_to_rs(zv_mm, rs_mm, finx = idx)
+
+        if idx == 0:
+            zv_ref = zv_prj_mm
+            rs_ref = rs_mm
+            fs_ref = ffs_mm
+            ft_ref = ftn_mm
+
+        # save ply of the point cloud for visualization 
+        if idx == 1:
+            XYZ_FS = project_camera_to_3d(ffs_mm, CAMERA_MATRIX_RS, DIST_COEFFS_RS)
+            # save to ply point cloud for visualization
+            #save_to_ply(XYZ_FS/1000, f'ffs_projected_points_{idx:03d}.ply') # save in meters for visualization
+        
+
+        zv_valid           = (10 < zv_prj_mm) 
+        zv_valid           = (zv_prj_mm < (zv_prj_mm[zv_valid].min()*1.0+50)) & zv_valid
+        rs_valid           = (10 < rs_mm) 
+        rs_valid           = (rs_mm < (rs_mm[rs_valid].min()*1.0+100)) & rs_valid & zv_valid
+        fs_valid           = (10 < ffs_mm)
+        fs_valid           = (ffs_mm < ffs_mm[fs_valid].min()*1.0+100) & fs_valid & zv_valid
+        ft_valid           = (10 < ftn_mm) 
+        ft_valid           = (ftn_mm < ftn_mm[ft_valid].min()*1.0+100) & ft_valid & zv_valid
+
+        zv_zv_error         = source.compute_depth_error(zv_prj_mm, zv_ref, depth_mask= zv_valid)
+        rs_zv_error         = source.compute_depth_error(rs_mm,     rs_ref, depth_mask= rs_valid)
+        ffs_zv_error        = source.compute_depth_error(ffs_mm,    fs_ref, depth_mask= fs_valid)
+        ftn_zv_error        = source.compute_depth_error(ftn_mm,    ft_ref, depth_mask= ft_valid)
+
+        # debug
+        img_list            = [left, right, rs_mm, zv_prj_mm, ffs_mm, ftn_mm]
+        ttl_list            = ['left (RS)', 'right (RS)', 'depth RS (mm)', 'depth Zivid (mm)', 'depth FFS (mm)', 'depth FTN (mm)']
+        source.show_subset(img_list, ttl_list, save_path=DEFAULT_OUT , fig_name = f"sample_{idx:03d}_inputs")
+        img_list            = [zv_zv_error, rs_zv_error, ffs_zv_error, rs_valid.astype(np.float32)*100]
+        ttl_list            = ['Zivid Error', 'RS Error', 'FFS Error', 'RS Valid Mask']
+        source.show_subset(img_list, ttl_list, vmin=0, vmax=100, save_path=DEFAULT_OUT , fig_name = f"error_{idx:03d}_inputs")
+
+        zv_count            = np.sum(zv_valid)
+        rs_count            = np.sum(rs_valid)
+        fs_count            = np.sum(fs_valid)
+        ft_count            = np.sum(ft_valid)
+
+        zv_depth_diff[idx]   = np.sum(zv_zv_error) / zv_count
+        rs_depth_diff[idx]   = np.sum(rs_zv_error) / rs_count
+        ffs_depth_diff[idx]  = np.sum(ffs_zv_error) / fs_count
+        ftn_depth_diff[idx]  = np.sum(ftn_zv_error) / ft_count        
+
+        # zv_depth_rsme[idx]  = np.sqrt(np.sum(zv_zv_error**2) / zv_count)
+        # rs_depth_rsme[idx]  = np.sqrt(np.sum(rs_zv_error**2) / rs_count)
+        # ffs_depth_rsme[idx] = np.sqrt(np.sum(ffs_zv_error**2) / fs_count)
+        # ftn_depth_rsme[idx] = np.sqrt(np.sum(ftn_zv_error**2) / ft_count)
+
+        # Fit a plane to each difference map and use fit residual RMSE as error.
+        zv_plane_fit         = fit_plane_and_compute_error(zv_zv_error, zv_valid)        
+        rs_plane_fit         = fit_plane_and_compute_error(rs_zv_error, rs_valid)
+        ffs_plane_fit        = fit_plane_and_compute_error(ffs_zv_error, fs_valid)        
+        ftn_plane_fit        = fit_plane_and_compute_error(ftn_zv_error, ft_valid)
+        rs_depth_rsme[idx]   = rs_plane_fit["rmse_mm"]
+        zv_depth_rsme[idx]   = zv_plane_fit["rmse_mm"]
+        ffs_depth_rsme[idx]  = ffs_plane_fit["rmse_mm"]
+        ftn_depth_rsme[idx]  = ftn_plane_fit["rmse_mm"]
+           
+    
+    sm = build_example_depth_scale_regression_series(gt_depth_diff, rs_depth_diff, zv_depth_diff, ffs_depth_diff, ftn_depth_diff, rs_rsme_mm=rs_depth_rsme, zv_rsme_mm=zv_depth_rsme, fs_rsme_mm=ffs_depth_rsme, ft_rsme_mm=ftn_depth_rsme)
+    plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_comparison_zivid_rs_ffs.png", title="Depth Comparison")
+
+    logging.info(f"All outputs written to {out_dir}")
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=MODEL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load models ──────────────────────────────────────────────────────────
+    models = {}
+    if Path(args.finetuned).exists():
+        models["finetuned"] = load_model(args.finetuned)
+    else:
+        logging.warning(f"Fine-tuned model not found at {args.finetuned} — skipping")
+
+    models["original"] = load_model(args.original)
+
+    active_methods = [GT_NAME] + list(models.keys())
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f"Found {n} samples in {args.data_dir}")
+    if n == 0:
+        logging.error("No samples found — check DATA_DIR path")
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics       = []
+    viz_frames        = []
+    valid_acc         = {}     # will be init on first frame
+    dist_bin_mae      = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw     = {m: [] for m in models}
+    H = W = None
+
+    # depth-vs-distance accumulators:
+    #   "zivid_gt" – Zivid depth values binned by Zivid GT (shows intra-bin spatial spread)
+    #   one entry per stereo model – model predictions binned by Zivid GT
+    depth_acc_keys = ["zivid_gt"] + list(models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    #import cv2 as _cv2   # local import to avoid top-level dependency if already imported
+
+    for idx in range(n):
+        data  = source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        gt_mm = data['depth_zivid'].astype(np.float32)   # Zivid GT in mm
+        rs_mm = data['depth_rs'].astype(np.float32)   # RealSense depth in mm
+
+        # Resize Zivid depth to match RealSense IR image resolution for pixel-level comparison
+        rs_h, rs_w = left.shape[:2]
+        if gt_mm.shape != (rs_h, rs_w):
+            #gt_mm = _cv2.resize(gt_mm, (rs_w, rs_h), interpolation=_cv2.INTER_NEAREST)
+            print(f"Shape mismatch: gt_mm {gt_mm.shape} vs rs {rs_h, rs_w}")
+
+        if H is None:
+            H, W = rs_h, rs_w
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0   # mm → m
+        rs_m = rs_mm / 1000.0   # mm → m
+
+        # run inference for each model
+        frame_depths = {GT_NAME: gt_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_rs_mm(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        # per-frame metrics
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                                  float((pred > 0).mean()) * 100.0, 0.0,
+                                  mae_pen=0.0, mre_pen=0.0)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = float((pred[gt_close_mask] > 0).mean()) * 100.0 if n_close > 0 else 0.0
+            close_range_valid[mname].append(close_cov)
+
+        # depth-vs-distance: accumulate per-bin stats
+        depth_accs["zivid_gt"].update(gt_m, gt_m)   # GT vs itself → intra-bin spread
+        for mname in models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f"  {idx + 1}/{n} frames processed")
+
+    # normalise coverage maps to [0, 1]
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    # ── aggregate timing ──────────────────────────────────────────────────────
+    mean_timing = {m: float(np.mean(ts)) if ts else 0.0 for m, ts in timing_ms_raw.items()}
+    mean_timing[GT_NAME] = 0.0
+
+    # ── build BenchmarkResults ────────────────────────────────────────────────
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]["label"] for m in active_methods},
+        method_colors={m: METHODS[m]["color"] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f"INBOLT dataset ({args.data_dir})",
+        method_configs={
+            "original":  {"model_path": args.original},
+            "finetuned": {"model_path": args.finetuned},
+        },
+    )
+
+    stats = aggregate(results, mean_timing)
+
+    # ── generate report ───────────────────────────────────────────────────────
+    reporter = ReportGenerator(results, stats, out_dir)
+    reporter.generate()
+
+    # ── depth-vs-distance comparison plot ────────────────────────────────────
+    plot_colors = {
+        "zivid_gt": METHODS[GT_NAME]["color"],
+        **{m: METHODS[m]["color"] for m in models if m in METHODS},
+    }
+    plot_labels = {
+        "zivid_gt":  "Zivid GT (spatial spread)",
+        "original":  METHODS["original"]["label"],
+        "finetuned": METHODS["finetuned"]["label"],
+    }
+    # rename keys to human-readable labels for the plot
+    labeled_accs = {plot_labels.get(k, k): v
+                    for k, v in depth_accs.items()
+                    if depth_accs[k].count.sum() > 0}
+    labeled_colors = {plot_labels.get(k, k): plot_colors.get(k)
+                      for k in depth_accs if depth_accs[k].count.sum() > 0}
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / "depth_vs_distance.png",
+    )
+    logging.info(f"All outputs written to {out_dir}")
+
+
+if __name__ == '__main__':
+    # 1. works
+    #sm = build_example_depth_scale_regression_series()
+    #plot_depth_scale_regression(sm, out_path=Path(DEFAULT_OUT) / "depth_scale_regression_example.png", title="Example Depth Scale Regression")
+
+    # 2. inbolt data
+    #main_inbolt_graphs()
+
+    # 3. full benchmark + report
+    #main()
+
+    # 4. inbolt with ffs
+    #main_inbolt_ffs_graphs_with_projection()
+
+    # 5. inbolt with zivid projection
+    #main_inbolt_graphs_with_projection()
+
+    # 6. inbolt with zivid projection on biased dataset
+    main_inbolt_ffs_graphs_with_projection_biased_dataset()
diff --git a/scripts/benchmark_inbolt_fs.py b/scripts/benchmark_inbolt_fs.py
new file mode 100644
index 0000000..392de0a
--- /dev/null
+++ b/scripts/benchmark_inbolt_fs.py
@@ -0,0 +1,419 @@
+"""Benchmark FastFoundationStereo models + RealSense hardware depth on the Inbolt dataset.
+
+This benchmark mirrors the structure of ``benchmark_faro_rs.py`` but uses the
+Inbolt dataset and the meter-based reporting pipeline already used by
+``benchmark_inbolt.py``.
+
+For fair pixel-wise comparison against the RealSense stereo pair and hardware
+RealSense depth map, Zivid ground-truth depth is projected into RealSense image
+space via ``DataSource.get_item_projected()``.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt_fs.py [--out_dir reports/inbolt_ffs_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+import cv2
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import Utils as U
+from benchmark_inbolt import DepthBinAccumulator, infer_depth_m, load_model, plot_depth_vs_distance
+from scripts.data_manager_inbolt import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+
+
+# ── custom report generator ──────────────────────────────────────────────────
+
+class ReportGeneratorInbolt(ReportGenerator):
+    """Custom report generator that shows 4 frames in depth comparison and error maps."""
+
+    def __init__(self, results, stats, output_dir) -> None:
+        super().__init__(results, stats, output_dir)
+        self._selected_viz_indices = []
+
+    def _get_selected_viz_indices(self, n_pick: int = 4):
+        """Return cached random frame indices used consistently across report sections."""
+        if self._selected_viz_indices:
+            return self._selected_viz_indices
+
+        n_total = len(self._r.viz_frames)
+        if n_total == 0:
+            self._selected_viz_indices = []
+            return self._selected_viz_indices
+
+        n = min(n_pick, n_total)
+        rng = np.random.default_rng(42)
+        self._selected_viz_indices = sorted(rng.choice(n_total, size=n, replace=False).tolist())
+        return self._selected_viz_indices
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        method_names = [n for n in self._r.method_names if n in vf0]
+        nrows = len(sel)
+        ncols = len(method_names)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        cmap = self._depth_cmap()
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            for c, name in enumerate(method_names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                im = ax.imshow(vf[name], cmap=cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="m")
+                title = self._r.method_labels.get(name, name)
+                if c == 0:
+                    title = f"Frame {frame_idx + 1} • {title}"
+                ax.set_title(title, fontsize=9, wrap=True)
+                ax.axis("off")
+
+        fig.suptitle("Depth Map Comparison (4 random frames) — values in meters",
+                     fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "depth_comparison.png")
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig("error_maps.png", "No comparison methods")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("error_maps.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        names = ([self._gt] if self._gt in vf0 else []) + [n for n in self._non_gt if n in vf0]
+        if not names:
+            return self._empty_fig("error_maps.png", "Ground truth not available in viz frame")
+
+        nrows = len(sel)
+        ncols = len(names)
+        cmap = plt.get_cmap("hot").copy()
+        cmap.set_under("#222222")
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            gt = vf.get(self._gt)
+            if gt is None:
+                for c in range(ncols):
+                    axes[r, c].axis("off")
+                continue
+
+            for c, name in enumerate(names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                pred = vf[name]
+                valid = (gt > 0) & (pred > 0)
+                err = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+                im = ax.imshow(err, cmap=cmap, vmin=0.001, vmax=0.1)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="|error| (m)")
+                mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+                label = self._r.method_labels.get(name, name)
+                if c == 0:
+                    ax.set_title(f"Frame {frame_idx + 1} • {label}\nMAE={mean_err:.4f} m", fontsize=9)
+                else:
+                    ax.set_title(f"{label}\nMAE={mean_err:.4f} m", fontsize=9)
+                ax.axis("off")
+
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f"Absolute Error vs {gt_label} (4 random frames, m)", fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "error_maps.png")
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+DATA_DIR       = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+ORIGINAL_PATH  = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+# FINETUNED_PATH  = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415_epoch_030.pth'
+# MODEL_PATH      = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+#FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+#DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark-model37-111-set-20260414_142239'
+FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_planes_epoch_120.pth'
+DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark-planes_epoch_120'
+N_VIZ = 5
+
+METHODS: Dict[str, Dict[str, str]] = {
+    'original': {'label': 'FFS Original', 'color': '#2980b9'},
+    'finetuned': {'label': 'FFS Fine-tuned (INBOLT)', 'color': '#e74c3c'},
+    'depth_rs': {'label': 'RealSense Hardware Depth', 'color': '#f39c12'},
+    'zivid_gt': {'label': 'Zivid GT (projected to RS)', 'color': '#27ae60'},
+}
+GT_NAME = 'zivid_gt'
+RS_NAME = 'depth_rs'
+RS_FPS = 30.0
+
+
+def resolve_finetuned_model_path(preferred_path: str) -> Optional[str]:
+    """Return an existing fine-tuned Inbolt checkpoint path, or None if not found."""
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidate_names = [
+        'model_finetuned_inbolt.pth',
+        'model_finetuned_inbolt-20260415_epoch_030.pth',
+    ]
+
+    # 1) Try known candidate file names anywhere under weights/
+    for name in candidate_names:
+        found = sorted(weights_dir.glob(f'**/{name}'))
+        if found:
+            logging.warning(
+                f'Preferred fine-tuned model not found at {preferred}. Using fallback {found[0]}'
+            )
+            return str(found[0])
+
+    # 2) Fallback to any Inbolt fine-tuned checkpoint, prefer lexicographically latest
+    generic = sorted(weights_dir.glob('**/model_finetuned_inbolt*.pth'))
+    if generic:
+        chosen = generic[-1]
+        logging.warning(
+            f'Preferred fine-tuned model not found at {preferred}. Using discovered checkpoint {chosen}'
+        )
+        return str(chosen)
+
+    return None
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=ORIGINAL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load stereo models ───────────────────────────────────────────────────
+    models = {}
+    finetuned_path = resolve_finetuned_model_path(args.finetuned)
+    if finetuned_path is not None:
+        models['finetuned'] = load_model(finetuned_path)
+    else:
+        logging.warning(
+            f'Fine-tuned model not found (preferred: {args.finetuned}) and no fallback checkpoint found — skipping'
+        )
+
+    models['original'] = load_model(args.original)
+
+    active_methods = [GT_NAME, RS_NAME] + list(models.keys())
+
+    # ── dataset ──────────────────────────────────────────────────────────────
+    source = DataSource(train_mode = False)
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ─────────────────────────────────────────────────────────
+    all_metrics = []
+    viz_frames = []
+    valid_acc = {}
+    dist_bin_mae = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw = {m: [] for m in models}
+    H = W = None
+
+    depth_acc_keys = ['zivid_gt', RS_NAME] + list(models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data = source.get_item_projected(idx)
+        left = data['left']
+        right = data['right']
+        gt_mm = data['depth_zivid'].astype(np.float32)
+        rs_mm = data['depth_rs'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        frame_depths = {GT_NAME: gt_m, RS_NAME: rs_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            # save raw data to p.g images 16 bit PNGs for later analysis if needed
+            #cv2.imwrite(str(out_dir / f'{mname}_{idx:03d}.png'), (frame_depths[mname] * 1000.0).astype(np.uint16))
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        # # create point clouds for visualization
+        # if idx % 10 == 0:
+        #     for mname in active_methods:
+        #         pred = frame_depths[mname]
+
+        #         XYZ = source.project_camera_to_3d(pred, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in Zivid camera space
+        #         mname_path = os.path.join(out_dir, f'{mname}_{idx:03d}.ply')
+        #         source.save_to_ply(XYZ/1000, mname_path) # save in meters for visualization
+
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME,
+                    0.0,
+                    0.0,
+                    0.0,
+                    100.0,
+                    float((pred > 0).mean()) * 100.0,
+                    0.0,
+                    mae_pen=0.0,
+                    mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs['zivid_gt'].update(gt_m, gt_m)
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {
+        m: float(np.mean(ts)) if ts else 0.0
+        for m, ts in timing_ms_raw.items()
+    }
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original': {'model_path': args.original},
+        RS_NAME: {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+        GT_NAME: {'source': 'Projected Zivid depth map used as Inbolt ground truth'},
+    }
+    if 'finetuned' in models and finetuned_path is not None:
+        method_configs['finetuned'] = {'model_path': finetuned_path}
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]['label'] for m in active_methods},
+        method_colors={m: METHODS[m]['color'] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f'INBOLT dataset ({args.data_dir})',
+        method_configs=method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorInbolt(results, stats, out_dir)
+    reporter.generate()
+
+    plot_colors = {
+        'zivid_gt': METHODS[GT_NAME]['color'],
+        RS_NAME: METHODS[RS_NAME]['color'],
+        **{m: METHODS[m]['color'] for m in models if m in METHODS},
+    }
+    plot_labels = {
+        'zivid_gt': 'Zivid GT (spatial spread)',
+        RS_NAME: METHODS[RS_NAME]['label'],
+        'original': METHODS['original']['label'],
+        'finetuned': METHODS['finetuned']['label'],
+    }
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+    logging.info(f'All outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_inbolt_fs_depthrs.py b/scripts/benchmark_inbolt_fs_depthrs.py
new file mode 100644
index 0000000..df18044
--- /dev/null
+++ b/scripts/benchmark_inbolt_fs_depthrs.py
@@ -0,0 +1,386 @@
+"""Benchmark FastFoundationStereo + RealSense depth fusion vs baselines on the Inbolt dataset.
+
+Extends benchmark_inbolt_fs.py by adding depth-fusion models as additional methods.
+
+Methods compared
+----------------
+  original          : FFS pretrained, no fine-tuning, stereo only
+  finetuned         : FFS fine-tuned on Inbolt (stereo only)
+  depthrs_finetuned : FFS + RS Depth Fusion v1 (DepthEncoder + DepthFusionModule)
+  depthrs_v2        : FFS + RS Output Blend v2 (Init Blend + Output Blend, frozen features)
+  depth_rs          : RealSense hardware depth (no stereo)
+  zivid_gt          : Projected Zivid depth (ground truth)
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt_fs_depthrs.py [--out_dir reports/inbolt_ffs_depthrs]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import torch
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import Utils as U
+from core.utils.utils import InputPadder
+
+# Must be imported before torch.load so the depthrs models can be unpickled
+from scripts.finetune_inbolt_depthrs import (    # noqa: F401
+    FastFoundationStereoDepthRS,
+    DepthEncoder,
+    DepthFusionModule,
+    DepthInitBlend,
+)
+from scripts.finetune_inbolt_depthrs_2 import (  # noqa: F401
+    FastFoundationStereoDepthRS_v2,
+    DepthInitBlend as DepthInitBlend_v2,
+    DepthOutputBlend,
+)
+
+from benchmark_inbolt import (
+    DepthBinAccumulator,
+    _preprocess_ir,
+    infer_depth_m,
+    load_model,
+    plot_depth_vs_distance,
+    BF,
+    ITERS,
+)
+from benchmark_inbolt_fs import ReportGeneratorInbolt, resolve_finetuned_model_path
+from scripts.data_manager_inbolt import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+DATA_DIR       = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+ORIGINAL_PATH  = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+FINETUNED_PATH = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+DEPTHRS_PATH   = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_depthrs_epoch_001.pth'
+#DEPTHRS_V2_PATH = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_depthrs_v2_epoch_014.pth'
+DEFAULT_OUT    = f'{code_dir}/../reports/inbolt_ffs_depthrs_benchmark'
+N_VIZ          = 5
+
+METHODS: Dict[str, Dict[str, str]] = {
+    'original':          {'label': 'FFS Original',                         'color': '#2980b9'},
+    'finetuned':         {'label': 'FFS Fine-tuned (Inbolt)',              'color': '#e74c3c'},
+    'depthrs_finetuned': {'label': 'FFS + RS Depth Fusion v1 (Inbolt)',   'color': '#8e44ad'},
+    #'depthrs_v2':        {'label': 'FFS + RS Output Blend v2 (Inbolt)',   'color': '#1abc9c'},
+    'depth_rs':          {'label': 'RealSense Hardware Depth',             'color': '#f39c12'},
+    'zivid_gt':          {'label': 'Zivid GT (projected to RS)',           'color': '#27ae60'},
+}
+GT_NAME = 'zivid_gt'
+RS_NAME = 'depth_rs'
+RS_FPS  = 30.0
+
+
+# ── depth-fusion inference ────────────────────────────────────────────────────
+
+def load_depthrs_model(path: str):
+    """Load a FastFoundationStereoDepthRS checkpoint."""
+    logging.info(f"Loading depth-fusion model from {path}")
+    model = torch.load(path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+    return model
+
+
+def resolve_depthrs_model_path(preferred_path: str) -> Optional[str]:
+    """Return an existing depthrs v1 checkpoint path, or None if not found."""
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidates = sorted(weights_dir.glob('**/model_finetuned_inbolt_depthrs_epoch_*.pth'))
+    if candidates:
+        chosen = candidates[-1]
+        logging.warning(f'Preferred depthrs model not found at {preferred}. Using {chosen}')
+        return str(chosen)
+
+    return None
+
+
+def resolve_depthrs_v2_model_path(preferred_path: str) -> Optional[str]:
+    """Return an existing depthrs v2 checkpoint path, or None if not found."""
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidates = sorted(weights_dir.glob('**/model_finetuned_inbolt_depthrs_v2_epoch_*.pth'))
+    if candidates:
+        chosen = candidates[-1]
+        logging.warning(f'Preferred depthrs_v2 model not found at {preferred}. Using {chosen}')
+        return str(chosen)
+
+    return None
+
+
+@torch.no_grad()
+def infer_depth_m_depthrs(
+    model,
+    left: np.ndarray,
+    right: np.ndarray,
+    depth_rs_mm: np.ndarray,
+) -> np.ndarray:
+    """
+    Run depth-fusion inference; return depth map in metres (H×W float32).
+
+    depth_rs_mm: (H, W) float32, RealSense depth in millimetres.
+    """
+    left_t, right_t = _preprocess_ir(left, right)
+    # debug - make depth zero
+    depth_rs_mm = depth_rs_mm*0
+    depth_rs_t = torch.as_tensor(depth_rs_mm.astype(np.float32))[None, None].cuda()  # (1,1,H,W)
+
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t, depth_rs_t = padder.pad(left_t, right_t, depth_rs_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(
+            left_t, right_t,
+            depth_rs_mm=depth_rs_t,
+            iters=ITERS,
+            test_mode=True,
+        )
+
+    disp    = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_m        = np.zeros_like(disp_np)
+    valid          = disp_np > 0
+    depth_m[valid] = (BF / disp_np[valid]) / 1000.0   # disparity → mm → m
+    return depth_m
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir',   default=DEFAULT_OUT,    help='Output directory')
+    parser.add_argument('--data_dir',  default=DATA_DIR,       help='Dataset root')
+    parser.add_argument('--original',  default=ORIGINAL_PATH,  help='Original FFS weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Stereo-only fine-tuned weights')
+    parser.add_argument('--depthrs',    default=DEPTHRS_PATH,    help='Depth-fusion v1 weights')
+    #parser.add_argument('--depthrs_v2', default=DEPTHRS_V2_PATH, help='Depth-fusion v2 weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ,     help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load models ───────────────────────────────────────────────────────────
+    models       = {}   # name → model  (stereo-only interface)
+    depthrs_models = {} # name → model  (depth-fusion interface)
+
+    finetuned_path = resolve_finetuned_model_path(args.finetuned)
+    if finetuned_path is not None:
+        models['finetuned'] = load_model(finetuned_path)
+    else:
+        logging.warning(f'Stereo fine-tuned model not found at {args.finetuned} — skipping')
+
+    models['original'] = load_model(args.original)
+
+    depthrs_path = resolve_depthrs_model_path(args.depthrs)
+    if depthrs_path is not None:
+        depthrs_models['depthrs_finetuned'] = load_depthrs_model(depthrs_path)
+    else:
+        logging.warning(f'Depth-fusion v1 model not found at {args.depthrs} — skipping')
+
+    # depthrs_v2_path = resolve_depthrs_v2_model_path(args.depthrs_v2)
+    # if depthrs_v2_path is not None:
+    #     depthrs_models['depthrs_v2'] = load_depthrs_model(depthrs_v2_path)
+    # else:
+    #     logging.warning(f'Depth-fusion v2 model not found at {args.depthrs_v2} — skipping')
+
+    all_model_names = list(models.keys()) + list(depthrs_models.keys())
+    active_methods  = [GT_NAME, RS_NAME] + all_model_names
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics        = []
+    viz_frames         = []
+    valid_acc          = {}
+    dist_bin_mae       = {m: [] for m in active_methods}
+    close_range_valid  = {m: [] for m in active_methods}
+    timing_ms_raw      = {m: [] for m in all_model_names}
+    H = W = None
+
+    depth_acc_keys = [GT_NAME, RS_NAME] + all_model_names
+    depth_accs     = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data  = source.get_item_projected(idx)
+        left  = data['left']
+        right = data['right']
+        gt_mm = data['depth_zivid'].astype(np.float32)
+        rs_mm = data['depth_rs'].astype(np.float32)
+
+        h, w = gt_mm.shape[:2]
+        if rs_mm.shape != (h, w):
+            import cv2
+            rs_mm = cv2.resize(rs_mm, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        if H is None:
+            H, W = h, w
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        frame_depths = {GT_NAME: gt_m, RS_NAME: rs_m}
+
+        # stereo-only models
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        # depth-fusion models
+        for mname, model in depthrs_models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m_depthrs(model, left, right, rs_mm)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                    float((pred > 0).mean()) * 100.0, 0.0,
+                    mae_pen=0.0, mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs[GT_NAME].update(gt_m, gt_m)
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in all_model_names:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 20 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {
+        m: float(np.mean(ts)) if ts else 0.0
+        for m, ts in timing_ms_raw.items()
+    }
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original':  {'model_path': args.original},
+        RS_NAME:     {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+        GT_NAME:     {'source': 'Projected Zivid depth map (ground truth)'},
+    }
+    if 'finetuned' in models and finetuned_path:
+        method_configs['finetuned'] = {'model_path': finetuned_path}
+    if 'depthrs_finetuned' in depthrs_models and depthrs_path:
+        method_configs['depthrs_finetuned'] = {'model_path': depthrs_path}
+    # if 'depthrs_v2' in depthrs_models and depthrs_v2_path:
+    #     method_configs['depthrs_v2'] = {'model_path': depthrs_v2_path}
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]['label'] for m in active_methods},
+        method_colors={m: METHODS[m]['color'] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f'INBOLT dataset ({args.data_dir})',
+        method_configs=method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorInbolt(results, stats, out_dir)
+    reporter.generate()
+
+    # depth-vs-distance accuracy plot
+    plot_labels = {k: METHODS[k]['label'] for k in METHODS}
+    plot_colors = {k: METHODS[k]['color'] for k in METHODS}
+
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+    logging.info(f'All outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_inbolt_fs_plane_bf.py b/scripts/benchmark_inbolt_fs_plane_bf.py
new file mode 100644
index 0000000..1b7e3d6
--- /dev/null
+++ b/scripts/benchmark_inbolt_fs_plane_bf.py
@@ -0,0 +1,457 @@
+"""Benchmark FastFoundationStereo models + RealSense hardware depth on the Inbolt dataset.
+
+This benchmark mirrors the structure of ``benchmark_faro_rs.py`` but uses the
+Inbolt dataset and the meter-based reporting pipeline already used by
+``benchmark_inbolt.py``.
+
+For fair pixel-wise comparison against the RealSense stereo pair and hardware
+RealSense depth map, Zivid ground-truth depth is projected into RealSense image
+space via ``DataSource.get_item_projected()``.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt_fs.py [--out_dir reports/inbolt_ffs_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+import cv2
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import torch
+from core.utils.utils import InputPadder
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from scripts.finetune_inbolt_planes_bf import FastFoundationStereoWithDepth, DepthHead
+import Utils as U
+from benchmark_inbolt import DepthBinAccumulator, infer_depth_m, load_model, plot_depth_vs_distance #, infer_depth_nobf_m
+from scripts.data_manager_inbolt import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+from finetune_inbolt_planes import find_flat_regions
+
+
+# ── custom report generator ──────────────────────────────────────────────────
+
+class ReportGeneratorInbolt(ReportGenerator):
+    """Custom report generator that shows 4 frames in depth comparison and error maps."""
+
+    def __init__(self, results, stats, output_dir) -> None:
+        super().__init__(results, stats, output_dir)
+        self._selected_viz_indices = []
+
+    def _get_selected_viz_indices(self, n_pick: int = 4):
+        """Return cached random frame indices used consistently across report sections."""
+        if self._selected_viz_indices:
+            return self._selected_viz_indices
+
+        n_total = len(self._r.viz_frames)
+        if n_total == 0:
+            self._selected_viz_indices = []
+            return self._selected_viz_indices
+
+        n = min(n_pick, n_total)
+        rng = np.random.default_rng(42)
+        self._selected_viz_indices = sorted(rng.choice(n_total, size=n, replace=False).tolist())
+        return self._selected_viz_indices
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        method_names = [n for n in self._r.method_names if n in vf0]
+        nrows = len(sel)
+        ncols = len(method_names)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        cmap = self._depth_cmap()
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            for c, name in enumerate(method_names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                im = ax.imshow(vf[name], cmap=cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="m")
+                title = self._r.method_labels.get(name, name)
+                if c == 0:
+                    title = f"Frame {frame_idx + 1} • {title}"
+                ax.set_title(title, fontsize=9, wrap=True)
+                ax.axis("off")
+
+        fig.suptitle("Depth Map Comparison (4 random frames) — values in meters",
+                     fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "depth_comparison.png")
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig("error_maps.png", "No comparison methods")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("error_maps.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        names = ([self._gt] if self._gt in vf0 else []) + [n for n in self._non_gt if n in vf0]
+        if not names:
+            return self._empty_fig("error_maps.png", "Ground truth not available in viz frame")
+
+        nrows = len(sel)
+        ncols = len(names)
+        cmap = plt.get_cmap("hot").copy()
+        cmap.set_under("#222222")
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            gt = vf.get(self._gt)
+            if gt is None:
+                for c in range(ncols):
+                    axes[r, c].axis("off")
+                continue
+
+            for c, name in enumerate(names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                pred = vf[name]
+                valid = (gt > 0) & (pred > 0)
+                err = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+                im = ax.imshow(err, cmap=cmap, vmin=0.001, vmax=0.1)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="|error| (m)")
+                mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+                label = self._r.method_labels.get(name, name)
+                if c == 0:
+                    ax.set_title(f"Frame {frame_idx + 1} • {label}\nMAE={mean_err:.4f} m", fontsize=9)
+                else:
+                    ax.set_title(f"{label}\nMAE={mean_err:.4f} m", fontsize=9)
+                ax.axis("off")
+
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f"Absolute Error vs {gt_label} (4 random frames, m)", fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "error_maps.png")
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+DATA_DIR       = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+ORIGINAL_PATH  = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+# FINETUNED_PATH  = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415_epoch_030.pth'
+# MODEL_PATH      = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+#FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+#DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark-model37-111-set-20260414_142239'
+FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_planes_bf_epoch_093.pth'
+DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark_planes_bf'
+N_VIZ = 5
+
+METHODS: Dict[str, Dict[str, str]] = {
+    'original': {'label': 'FFS Original', 'color': '#2980b9'},
+    'finetuned': {'label': 'FFS Fine-tuned (INBOLT)', 'color': '#e74c3c'},
+    'depth_rs': {'label': 'RealSense Hardware Depth', 'color': '#f39c12'},
+    'zivid_gt': {'label': 'Zivid GT (projected to RS)', 'color': '#27ae60'},
+}
+GT_NAME = 'zivid_gt'
+RS_NAME = 'depth_rs'
+RS_FPS = 30.0
+ITERS  = 8
+
+# ── inference helpers ─────────────────────────────────────────────────────────
+
+def _preprocess_ir(left: np.ndarray, right: np.ndarray):
+    """Convert IR uint8 pair to float RGB tensors on CUDA."""
+    def _to_t(img):
+        img = np.clip(img.astype(np.float32), 0, 255)
+        img = np.stack([img, img, img], axis=-1)
+        return torch.as_tensor(img).float()[None].permute(0, 3, 1, 2).cuda()
+    return _to_t(left), _to_t(right)
+
+def resolve_finetuned_model_path(preferred_path: str) -> Optional[str]:
+    """Return an existing fine-tuned Inbolt checkpoint path, or None if not found."""
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidate_names = [
+        'model_finetuned_inbolt.pth',
+        'model_finetuned_inbolt-20260415_epoch_030.pth',
+    ]
+
+    # 1) Try known candidate file names anywhere under weights/
+    for name in candidate_names:
+        found = sorted(weights_dir.glob(f'**/{name}'))
+        if found:
+            logging.warning(
+                f'Preferred fine-tuned model not found at {preferred}. Using fallback {found[0]}'
+            )
+            return str(found[0])
+
+    # 2) Fallback to any Inbolt fine-tuned checkpoint, prefer lexicographically latest
+    generic = sorted(weights_dir.glob('**/model_finetuned_inbolt*.pth'))
+    if generic:
+        chosen = generic[-1]
+        logging.warning(
+            f'Preferred fine-tuned model not found at {preferred}. Using discovered checkpoint {chosen}'
+        )
+        return str(chosen)
+
+    return None
+
+@torch.no_grad()
+def infer_depth_nobf_m(model:FastFoundationStereoWithDepth, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Run stereo inference on an IR pair that returns depth; return depth map in metres (H×W float32)."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        depth, disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+
+    depth = padder.unpad(depth.float())
+    depth_np = depth.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+
+    depth_m = np.zeros_like(depth_np)
+    valid = depth_np > 0
+    depth_m[valid] = depth_np[valid] / 1000.0   # mm → m
+    return depth_m
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=ORIGINAL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load stereo models ───────────────────────────────────────────────────
+    models = {}
+    finetuned_path = resolve_finetuned_model_path(args.finetuned)
+    if finetuned_path is not None:
+        models['finetuned'] = load_model(finetuned_path)
+    else:
+        logging.warning(
+            f'Fine-tuned model not found (preferred: {args.finetuned}) and no fallback checkpoint found — skipping'
+        )
+
+    models['original'] = load_model(args.original)
+
+    active_methods = [GT_NAME, RS_NAME] + list(models.keys())
+
+    # ── dataset ──────────────────────────────────────────────────────────────
+    source = DataSource(train_mode = False)
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ─────────────────────────────────────────────────────────
+    all_metrics = []
+    viz_frames = []
+    valid_acc = {}
+    dist_bin_mae = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw = {m: [] for m in models}
+    H = W = None
+
+    depth_acc_keys = ['zivid_gt', RS_NAME] + list(models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data = source.get_item_projected(idx)
+        left = data['left']
+        right = data['right']
+        gt_mm = data['depth_zivid'].astype(np.float32)
+        rs_mm = data['depth_rs'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        # valid only for flat regions
+        valid = (gt_m > 0) 
+        valid = find_flat_regions(gt_mm, valid)
+        gt_m[valid == False] = 0.0
+
+        frame_depths = {GT_NAME: gt_m, RS_NAME: rs_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            if mname == 'finetuned':
+                frame_depths[mname] = infer_depth_nobf_m(model, left, right)
+            else:
+                frame_depths[mname] = infer_depth_m(model, left, right)
+            # save raw data to p.g images 16 bit PNGs for later analysis if needed
+            #cv2.imwrite(str(out_dir / f'{mname}_{idx:03d}.png'), (frame_depths[mname] * 1000.0).astype(np.uint16))
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        # # create point clouds for visualization
+        # if idx % 10 == 0:
+        #     for mname in active_methods:
+        #         pred = frame_depths[mname]
+
+        #         XYZ = source.project_camera_to_3d(pred, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in Zivid camera space
+        #         mname_path = os.path.join(out_dir, f'{mname}_{idx:03d}.ply')
+        #         source.save_to_ply(XYZ/1000, mname_path) # save in meters for visualization
+
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME,
+                    0.0,
+                    0.0,
+                    0.0,
+                    100.0,
+                    float((pred > 0).mean()) * 100.0,
+                    0.0,
+                    mae_pen=0.0,
+                    mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs['zivid_gt'].update(gt_m, gt_m)
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {
+        m: float(np.mean(ts)) if ts else 0.0
+        for m, ts in timing_ms_raw.items()
+    }
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original': {'model_path': args.original},
+        RS_NAME: {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+        GT_NAME: {'source': 'Projected Zivid depth map used as Inbolt ground truth'},
+    }
+    if 'finetuned' in models and finetuned_path is not None:
+        method_configs['finetuned'] = {'model_path': finetuned_path}
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]['label'] for m in active_methods},
+        method_colors={m: METHODS[m]['color'] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f'INBOLT dataset ({args.data_dir})',
+        method_configs=method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorInbolt(results, stats, out_dir)
+    reporter.generate()
+
+    plot_colors = {
+        'zivid_gt': METHODS[GT_NAME]['color'],
+        RS_NAME: METHODS[RS_NAME]['color'],
+        **{m: METHODS[m]['color'] for m in models if m in METHODS},
+    }
+    plot_labels = {
+        'zivid_gt': 'Zivid GT (spatial spread)',
+        RS_NAME: METHODS[RS_NAME]['label'],
+        'original': METHODS['original']['label'],
+        'finetuned': METHODS['finetuned']['label'],
+    }
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+    logging.info(f'All outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_inbolt_fs_planes.py b/scripts/benchmark_inbolt_fs_planes.py
new file mode 100644
index 0000000..06d5646
--- /dev/null
+++ b/scripts/benchmark_inbolt_fs_planes.py
@@ -0,0 +1,426 @@
+"""Benchmark FastFoundationStereo models + RealSense hardware depth on the Inbolt dataset.
+
+This benchmark mirrors the structure of ``benchmark_faro_rs.py`` but uses the
+Inbolt dataset and the meter-based reporting pipeline already used by
+``benchmark_inbolt.py``.
+
+For fair pixel-wise comparison against the RealSense stereo pair and hardware
+RealSense depth map, Zivid ground-truth depth is projected into RealSense image
+space via ``DataSource.get_item_projected()``.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt_fs.py [--out_dir reports/inbolt_ffs_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+import cv2
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import Utils as U
+from benchmark_inbolt import DepthBinAccumulator, infer_depth_m, load_model, plot_depth_vs_distance
+from scripts.data_manager_inbolt import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+from finetune_inbolt_planes import find_flat_regions
+
+
+# ── custom report generator ──────────────────────────────────────────────────
+
+class ReportGeneratorInbolt(ReportGenerator):
+    """Custom report generator that shows 4 frames in depth comparison and error maps."""
+
+    def __init__(self, results, stats, output_dir) -> None:
+        super().__init__(results, stats, output_dir)
+        self._selected_viz_indices = []
+
+    def _get_selected_viz_indices(self, n_pick: int = 4):
+        """Return cached random frame indices used consistently across report sections."""
+        if self._selected_viz_indices:
+            return self._selected_viz_indices
+
+        n_total = len(self._r.viz_frames)
+        if n_total == 0:
+            self._selected_viz_indices = []
+            return self._selected_viz_indices
+
+        n = min(n_pick, n_total)
+        rng = np.random.default_rng(42)
+        self._selected_viz_indices = sorted(rng.choice(n_total, size=n, replace=False).tolist())
+        return self._selected_viz_indices
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        method_names = [n for n in self._r.method_names if n in vf0]
+        nrows = len(sel)
+        ncols = len(method_names)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        cmap = self._depth_cmap()
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            for c, name in enumerate(method_names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                im = ax.imshow(vf[name], cmap=cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="m")
+                title = self._r.method_labels.get(name, name)
+                if c == 0:
+                    title = f"Frame {frame_idx + 1} • {title}"
+                ax.set_title(title, fontsize=9, wrap=True)
+                ax.axis("off")
+
+        fig.suptitle("Depth Map Comparison (4 random frames) — values in meters",
+                     fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "depth_comparison.png")
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig("error_maps.png", "No comparison methods")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("error_maps.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        names = ([self._gt] if self._gt in vf0 else []) + [n for n in self._non_gt if n in vf0]
+        if not names:
+            return self._empty_fig("error_maps.png", "Ground truth not available in viz frame")
+
+        nrows = len(sel)
+        ncols = len(names)
+        cmap = plt.get_cmap("hot").copy()
+        cmap.set_under("#222222")
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            gt = vf.get(self._gt)
+            if gt is None:
+                for c in range(ncols):
+                    axes[r, c].axis("off")
+                continue
+
+            for c, name in enumerate(names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                pred = vf[name]
+                valid = (gt > 0) & (pred > 0)
+                err = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+                im = ax.imshow(err, cmap=cmap, vmin=0.001, vmax=0.1)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="|error| (m)")
+                mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+                label = self._r.method_labels.get(name, name)
+                if c == 0:
+                    ax.set_title(f"Frame {frame_idx + 1} • {label}\nMAE={mean_err:.4f} m", fontsize=9)
+                else:
+                    ax.set_title(f"{label}\nMAE={mean_err:.4f} m", fontsize=9)
+                ax.axis("off")
+
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f"Absolute Error vs {gt_label} (4 random frames, m)", fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "error_maps.png")
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+DATA_DIR       = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+ORIGINAL_PATH  = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+# FINETUNED_PATH  = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415_epoch_030.pth'
+# MODEL_PATH      = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+#FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+#DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark-model37-111-set-20260414_142239'
+FINETUNED_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_planes_25_epoch_012.pth'
+DEFAULT_OUT     = f'{code_dir}/../reports/inbolt_ffs_benchmark-planes_25'
+N_VIZ = 5
+
+METHODS: Dict[str, Dict[str, str]] = {
+    'original': {'label': 'FFS Original', 'color': '#2980b9'},
+    'finetuned': {'label': 'FFS Fine-tuned (INBOLT)', 'color': '#e74c3c'},
+    'depth_rs': {'label': 'RealSense Hardware Depth', 'color': '#f39c12'},
+    'zivid_gt': {'label': 'Zivid GT (projected to RS)', 'color': '#27ae60'},
+}
+GT_NAME = 'zivid_gt'
+RS_NAME = 'depth_rs'
+RS_FPS = 30.0
+
+
+def resolve_finetuned_model_path(preferred_path: str) -> Optional[str]:
+    """Return an existing fine-tuned Inbolt checkpoint path, or None if not found."""
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidate_names = [
+        'model_finetuned_inbolt.pth',
+        'model_finetuned_inbolt-20260415_epoch_030.pth',
+    ]
+
+    # 1) Try known candidate file names anywhere under weights/
+    for name in candidate_names:
+        found = sorted(weights_dir.glob(f'**/{name}'))
+        if found:
+            logging.warning(
+                f'Preferred fine-tuned model not found at {preferred}. Using fallback {found[0]}'
+            )
+            return str(found[0])
+
+    # 2) Fallback to any Inbolt fine-tuned checkpoint, prefer lexicographically latest
+    generic = sorted(weights_dir.glob('**/model_finetuned_inbolt*.pth'))
+    if generic:
+        chosen = generic[-1]
+        logging.warning(
+            f'Preferred fine-tuned model not found at {preferred}. Using discovered checkpoint {chosen}'
+        )
+        return str(chosen)
+
+    return None
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=ORIGINAL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load stereo models ───────────────────────────────────────────────────
+    models = {}
+    finetuned_path = resolve_finetuned_model_path(args.finetuned)
+    if finetuned_path is not None:
+        models['finetuned'] = load_model(finetuned_path)
+    else:
+        logging.warning(
+            f'Fine-tuned model not found (preferred: {args.finetuned}) and no fallback checkpoint found — skipping'
+        )
+
+    models['original'] = load_model(args.original)
+
+    active_methods = [GT_NAME, RS_NAME] + list(models.keys())
+
+    # ── dataset ──────────────────────────────────────────────────────────────
+    source = DataSource(train_mode = False)
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ─────────────────────────────────────────────────────────
+    all_metrics = []
+    viz_frames = []
+    valid_acc = {}
+    dist_bin_mae = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw = {m: [] for m in models}
+    H = W = None
+
+    depth_acc_keys = ['zivid_gt', RS_NAME] + list(models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data = source.get_item_projected(idx)
+        left = data['left']
+        right = data['right']
+        gt_mm = data['depth_zivid'].astype(np.float32)
+        rs_mm = data['depth_rs'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        # valid only for flat regions
+        valid = (gt_m > 0) 
+        valid = find_flat_regions(gt_mm, valid)
+        gt_m[valid == False] = 0.0
+
+        frame_depths = {GT_NAME: gt_m, RS_NAME: rs_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            # save raw data to p.g images 16 bit PNGs for later analysis if needed
+            #cv2.imwrite(str(out_dir / f'{mname}_{idx:03d}.png'), (frame_depths[mname] * 1000.0).astype(np.uint16))
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        # # create point clouds for visualization
+        # if idx % 10 == 0:
+        #     for mname in active_methods:
+        #         pred = frame_depths[mname]
+
+        #         XYZ = source.project_camera_to_3d(pred, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in Zivid camera space
+        #         mname_path = os.path.join(out_dir, f'{mname}_{idx:03d}.ply')
+        #         source.save_to_ply(XYZ/1000, mname_path) # save in meters for visualization
+
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME,
+                    0.0,
+                    0.0,
+                    0.0,
+                    100.0,
+                    float((pred > 0).mean()) * 100.0,
+                    0.0,
+                    mae_pen=0.0,
+                    mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs['zivid_gt'].update(gt_m, gt_m)
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {
+        m: float(np.mean(ts)) if ts else 0.0
+        for m, ts in timing_ms_raw.items()
+    }
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original': {'model_path': args.original},
+        RS_NAME: {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+        GT_NAME: {'source': 'Projected Zivid depth map used as Inbolt ground truth'},
+    }
+    if 'finetuned' in models and finetuned_path is not None:
+        method_configs['finetuned'] = {'model_path': finetuned_path}
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]['label'] for m in active_methods},
+        method_colors={m: METHODS[m]['color'] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f'INBOLT dataset ({args.data_dir})',
+        method_configs=method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorInbolt(results, stats, out_dir)
+    reporter.generate()
+
+    plot_colors = {
+        'zivid_gt': METHODS[GT_NAME]['color'],
+        RS_NAME: METHODS[RS_NAME]['color'],
+        **{m: METHODS[m]['color'] for m in models if m in METHODS},
+    }
+    plot_labels = {
+        'zivid_gt': 'Zivid GT (spatial spread)',
+        RS_NAME: METHODS[RS_NAME]['label'],
+        'original': METHODS['original']['label'],
+        'finetuned': METHODS['finetuned']['label'],
+    }
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+    logging.info(f'All outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_inbolt_fs_with_confidence.py b/scripts/benchmark_inbolt_fs_with_confidence.py
new file mode 100644
index 0000000..eeae811
--- /dev/null
+++ b/scripts/benchmark_inbolt_fs_with_confidence.py
@@ -0,0 +1,477 @@
+"""Benchmark FastFoundationStereo models on the Inbolt dataset, including the
+confidence-head variant produced by finetune_inbolt_with_confidence.py.
+
+Methods compared:
+  original   – pretrained FFS (no fine-tuning)
+  finetuned  – FFS fine-tuned on INBOLT (standard loss)
+  confidence – FFS fine-tuned on INBOLT with confidence head
+  depth_rs   – RealSense hardware depth (baseline)
+  zivid_gt   – Zivid projected ground truth
+
+Confidence maps (one per frame) are stored alongside depth maps and rendered in
+an extra report figure.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt_fs_with_confidence.py [--out_dir reports/...]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import torch
+
+import Utils as U
+from core.utils.utils import InputPadder
+
+# Import confidence model classes into __main__ scope so torch.load can
+# find them when unpickling a model saved during training (where they were
+# also in __main__).
+from scripts.finetune_inbolt_with_confidence import FastFoundationStereoWithConfidence, ConfidenceHead  # noqa: F401
+
+from benchmark_inbolt import DepthBinAccumulator, plot_depth_vs_distance
+from benchmark_inbolt_fs import ReportGeneratorInbolt, resolve_finetuned_model_path
+from scripts.data_manager_inbolt import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+DATA_DIR         = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+ORIGINAL_PATH    = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+FINETUNED_PATH   = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+CONFIDENCE_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_with_confidence-20260507_epoch_026.pth'
+DEFAULT_OUT      = f'{code_dir}/../reports/inbolt_ffs_confidence_benchmark'
+N_VIZ  = 5
+ITERS  = 8
+BF     = 50.102706998586 * 385.509887695312   # focal_px * baseline_mm
+RS_FPS = 30.0
+
+CONF_VIZ_KEY = '_conf_map'   # suffix appended to 'confidence' in viz_frames
+
+METHODS: Dict[str, Dict] = {
+    'original':   {'label': 'FFS Original',                 'color': '#2980b9'},
+    'finetuned':  {'label': 'FFS Fine-tuned (INBOLT)',      'color': '#e74c3c'},
+    'confidence': {'label': 'FFS + Confidence Head',        'color': '#8e44ad'},
+    'depth_rs':   {'label': 'RealSense Hardware Depth',     'color': '#f39c12'},
+    'zivid_gt':   {'label': 'Zivid GT (projected to RS)',   'color': '#27ae60'},
+}
+GT_NAME = 'zivid_gt'
+RS_NAME = 'depth_rs'
+
+
+# ── inference helpers ─────────────────────────────────────────────────────────
+
+def _preprocess_ir(left: np.ndarray, right: np.ndarray):
+    """Convert IR uint8 pair to float RGB tensors on CUDA."""
+    def _to_t(img):
+        img = np.clip(img.astype(np.float32), 0, 255)
+        img = np.stack([img, img, img], axis=-1)
+        return torch.as_tensor(img).float()[None].permute(0, 3, 1, 2).cuda()
+    return _to_t(left), _to_t(right)
+
+
+@torch.no_grad()
+def infer_depth_m(model, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+    """Standard inference for models whose forward returns a disparity tensor."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+    disp = padder.unpad(disp.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+    depth_m = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_m[valid] = (BF / disp_np[valid]) / 1000.0
+    return depth_m
+
+
+@torch.no_grad()
+def infer_depth_and_conf_m(
+    model: FastFoundationStereoWithConfidence,
+    left: np.ndarray,
+    right: np.ndarray,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Inference for the confidence model; returns (depth_m, conf [0–1])."""
+    left_t, right_t = _preprocess_ir(left, right)
+    padder = InputPadder(left_t.shape, divis_by=32, force_square=False)
+    left_t, right_t = padder.pad(left_t, right_t)
+    with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+        disp, conf = model.forward(left_t, right_t, iters=ITERS, test_mode=True)
+    disp = padder.unpad(disp.float())
+    conf = padder.unpad(conf.float())
+    disp_np = disp.cpu().numpy().reshape(left.shape[:2]).clip(0, None)
+    conf_np = conf.cpu().numpy().reshape(left.shape[:2]).clip(0.0, 1.0)
+    depth_m = np.zeros_like(disp_np)
+    valid = disp_np > 0
+    depth_m[valid] = (BF / disp_np[valid]) / 1000.0
+    return depth_m, conf_np
+
+
+# ── model loading ─────────────────────────────────────────────────────────────
+
+def load_model(path: str):
+    logging.info(f"Loading model from {path}")
+    model = torch.load(path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+    return model
+
+
+def resolve_confidence_model_path(preferred: str) -> Optional[str]:
+    """Return an existing confidence-model checkpoint path, or None."""
+    p = Path(preferred)
+    if p.exists():
+        return str(p)
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidates = sorted(weights_dir.glob('**/model_finetuned_inbolt_with_confidence*.pth'))
+    if candidates:
+        chosen = candidates[-1]
+        logging.warning(f"Preferred confidence model not found at {preferred}. Using {chosen}")
+        return str(chosen)
+    return None
+
+
+# ── custom report generator ───────────────────────────────────────────────────
+
+class ReportGeneratorWithConfidence(ReportGeneratorInbolt):
+    """Extends the INBOLT report with a confidence map visualisation panel."""
+
+    def generate(self) -> None:
+        fig_paths = [
+            self._fig_depth_comparison(),
+            self._fig_error_maps(),
+            self._fig_confidence_maps(),
+            self._fig_coverage_heatmaps(),
+            self._fig_distance_error_curve(),
+            self._fig_error_histograms(),
+            self._fig_summary_table(),
+            self._fig_close_range_analysis(),
+            self._fig_timing_bars(),
+        ]
+        self._write_json()
+        self._write_html([p for p in fig_paths if p])
+        print(f"\nReport written to: {self._out / 'index.html'}")
+
+    def _fig_confidence_maps(self) -> str:
+        """Render per-frame confidence maps for the confidence model."""
+        conf_key = f'confidence{CONF_VIZ_KEY}'
+        frames_with_conf = [vf for vf in self._r.viz_frames if conf_key in vf]
+        if not frames_with_conf:
+            return self._empty_fig("confidence_maps.png", "No confidence maps recorded")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        sel = [i for i in sel if conf_key in self._r.viz_frames[i]]
+        if not sel:
+            return self._empty_fig("confidence_maps.png", "No confidence maps in selected frames")
+
+        ncols = 3   # left image (depth), confidence, zivid_gt (for reference)
+        nrows = len(sel)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+
+        depth_cmap = self._depth_cmap()
+        conf_cmap  = plt.get_cmap('RdYlGn')   # red = low conf, green = high conf
+
+        col_titles = [
+            METHODS['confidence']['label'],
+            'Confidence (0 = invalid, 1 = valid)',
+            METHODS[GT_NAME]['label'],
+        ]
+
+        for row_idx, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            depth_conf = vf.get('confidence')
+            conf_map   = vf.get(conf_key)
+            depth_gt   = vf.get(GT_NAME)
+
+            # Column 0: confidence model depth
+            ax = axes[row_idx, 0]
+            if depth_conf is not None:
+                im = ax.imshow(depth_conf, cmap=depth_cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label='m')
+            else:
+                ax.axis('off')
+            if row_idx == 0:
+                ax.set_title(col_titles[0], fontsize=8)
+            ax.set_ylabel(f'Frame {frame_idx + 1}', fontsize=8)
+            ax.set_xticks([])
+            ax.set_yticks([])
+
+            # Column 1: confidence map
+            ax = axes[row_idx, 1]
+            if conf_map is not None:
+                im = ax.imshow(conf_map, cmap=conf_cmap, vmin=0.0, vmax=1.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+                mean_conf = float(conf_map.mean())
+                ax.set_title(f'{col_titles[1]}\nmean={mean_conf:.3f}', fontsize=8)
+            else:
+                ax.axis('off')
+                if row_idx == 0:
+                    ax.set_title(col_titles[1], fontsize=8)
+            ax.set_xticks([])
+            ax.set_yticks([])
+
+            # Column 2: Zivid GT
+            ax = axes[row_idx, 2]
+            if depth_gt is not None:
+                im = ax.imshow(depth_gt, cmap=depth_cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label='m')
+            else:
+                ax.axis('off')
+            if row_idx == 0:
+                ax.set_title(col_titles[2], fontsize=8)
+            ax.set_xticks([])
+            ax.set_yticks([])
+
+        fig.suptitle(
+            'Confidence Head Output — Depth, Confidence Map, and Zivid GT',
+            fontsize=11, y=1.01,
+        )
+        fig.tight_layout()
+        return self._save(fig, 'confidence_maps.png')
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir',    default=DEFAULT_OUT,       help='Output directory for the report')
+    parser.add_argument('--data_dir',   default=DATA_DIR,          help='Path to dataset root')
+    parser.add_argument('--original',   default=ORIGINAL_PATH,     help='Path to original model weights')
+    parser.add_argument('--finetuned',  default=FINETUNED_PATH,    help='Path to standard fine-tuned model weights')
+    parser.add_argument('--confidence', default=CONFIDENCE_PATH,   help='Path to confidence-head model weights')
+    parser.add_argument('--n_viz',      type=int, default=N_VIZ,   help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load models ───────────────────────────────────────────────────────────
+    regular_models: Dict[str, object] = {}
+    conf_models:    Dict[str, object] = {}
+
+    regular_models['original'] = load_model(args.original)
+
+    ft_path = resolve_finetuned_model_path(args.finetuned)
+    if ft_path:
+        regular_models['finetuned'] = load_model(ft_path)
+    else:
+        logging.warning(f'Standard fine-tuned model not found at {args.finetuned} — skipping')
+        ft_path = args.finetuned
+
+    conf_path = resolve_confidence_model_path(args.confidence)
+    if conf_path:
+        conf_models['confidence'] = load_model(conf_path)
+    else:
+        logging.warning(f'Confidence model not found at {args.confidence} — skipping')
+        conf_path = args.confidence
+
+    all_nn_models   = {**regular_models, **conf_models}
+    active_methods  = [GT_NAME, RS_NAME] + list(all_nn_models.keys())
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics:        list[FrameMetrics]         = []
+    viz_frames:         list[dict]                 = []
+    valid_acc:          Dict[str, np.ndarray]      = {}
+    dist_bin_mae:       Dict[str, list]            = {m: [] for m in active_methods}
+    close_range_valid:  Dict[str, list]            = {m: [] for m in active_methods}
+    timing_ms_raw:      Dict[str, list]            = {m: [] for m in all_nn_models}
+    H = W = None
+
+    depth_acc_keys = [GT_NAME, RS_NAME] + list(all_nn_models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data   = source.get_item_projected(idx)
+        left   = data['left']
+        right  = data['right']
+        gt_mm  = data['depth_zivid'].astype(np.float32)
+        rs_mm  = data['depth_rs'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        frame_depths: Dict[str, np.ndarray] = {GT_NAME: gt_m, RS_NAME: rs_m}
+        frame_confs:  Dict[str, np.ndarray] = {}
+
+        for mname, model in regular_models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        for mname, model in conf_models.items():
+            t0 = time.monotonic()
+            depth_m, conf_np = infer_depth_and_conf_m(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+            frame_depths[mname] = depth_m
+            frame_confs[mname]  = conf_np
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                    float((pred > 0).mean()) * 100.0, 0.0,
+                    mae_pen=0.0, mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs[GT_NAME].update(gt_m, gt_m)
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in all_nn_models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            vf = {k: v.copy() for k, v in frame_depths.items()}
+            # Store confidence maps under a separate key so they don't pollute depth metrics
+            for mname, conf_np in frame_confs.items():
+                vf[f'{mname}{CONF_VIZ_KEY}'] = conf_np.copy()
+            viz_frames.append(vf)
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {m: float(np.mean(ts)) if ts else 0.0 for m, ts in timing_ms_raw.items()}
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original':  {'model_path': args.original},
+        RS_NAME:     {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+        GT_NAME:     {'source': 'Projected Zivid depth map used as INBOLT ground truth'},
+    }
+    if 'finetuned' in regular_models and ft_path:
+        method_configs['finetuned'] = {'model_path': ft_path}
+    if 'confidence' in conf_models and conf_path:
+        method_configs['confidence'] = {'model_path': conf_path}
+
+    results = BenchmarkResults(
+        method_names   = active_methods,
+        method_labels  = {m: METHODS[m]['label'] for m in active_methods},
+        method_colors  = {m: METHODS[m]['color']  for m in active_methods},
+        ground_truth_name = GT_NAME,
+        n_frames       = n,
+        width          = W,
+        height         = H,
+        all_metrics    = all_metrics,
+        viz_frames     = viz_frames,
+        coverage_maps  = valid_acc,
+        dist_bin_mae   = dist_bin_mae,
+        close_range_valid = close_range_valid,
+        source         = f'INBOLT dataset ({args.data_dir})',
+        method_configs = method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorWithConfidence(results, stats, out_dir)
+    reporter.generate()
+
+    # ── depth vs distance plot ─────────────────────────────────────────────────
+    plot_colors = {
+        GT_NAME:      METHODS[GT_NAME]['color'],
+        RS_NAME:      METHODS[RS_NAME]['color'],
+        **{m: METHODS[m]['color'] for m in all_nn_models if m in METHODS},
+    }
+    plot_labels = {
+        GT_NAME:      'Zivid GT (spatial spread)',
+        RS_NAME:      METHODS[RS_NAME]['label'],
+        'original':   METHODS['original']['label'],
+        'finetuned':  METHODS['finetuned']['label'],
+        'confidence': METHODS['confidence']['label'],
+    }
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+
+    # ── console summary ───────────────────────────────────────────────────────
+    logging.info('\n── Depth summary (test set) ─────────────────────────────────────────')
+    for mname in active_methods:
+        if mname == GT_NAME:
+            continue
+        s = stats.get(mname)
+        if s is None:
+            continue
+        logging.info(
+            f"  {s.label:<35}  MAE={s.mae_mean*1000:.1f} mm  "
+            f"MRE*={s.mre_pen_mean*100:.2f}%  coverage={s.coverage_mean:.1f}%  "
+            f"FPS={s.fps_mean:.1f}"
+        )
+
+    logging.info(f'\nAll outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_inbolt_trt.py b/scripts/benchmark_inbolt_trt.py
new file mode 100644
index 0000000..f30f6ab
--- /dev/null
+++ b/scripts/benchmark_inbolt_trt.py
@@ -0,0 +1,553 @@
+"""Benchmark original + fine-tuned FFS (PyTorch) and fine-tuned FFS-TRT (TensorRT FP16) on Inbolt.
+
+Extends ``benchmark_inbolt_fs.py`` by adding a TensorRT FP16 model alongside the
+PyTorch models and RealSense hardware depth, so you can compare accuracy and speed
+between the full-precision PyTorch path and the compiled TRT engine.
+
+Pass --rebuild_trt to export ONNX and compile TRT engines from the fine-tuned model
+at the start of the run (requires ~10 min on first compile).  The engines are written
+to --trt_dir and reused on subsequent runs without --rebuild_trt.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  # First run: build engines then benchmark
+  python scripts/benchmark_inbolt_trt.py --rebuild_trt [--trt_dir output/onnx_trt_ft]
+  # Subsequent runs: reuse existing engines
+  python scripts/benchmark_inbolt_trt.py [--trt_dir output/onnx_trt_ft] [--out_dir reports/inbolt_trt_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+import warnings
+import cv2
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+import torch
+import yaml
+from omegaconf import OmegaConf
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import Utils as U
+from benchmark_inbolt import DepthBinAccumulator, plot_depth_vs_distance, BF, ITERS
+from benchmark_inbolt import infer_depth_m, load_model
+from scripts.data_manager_inbolt import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+from core.foundation_stereo import TrtRunner, TrtFeatureRunner, TrtPostRunner, build_gwc_volume_triton
+from make_trt_engine import build_engine
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+DATA_DIR       = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+ORIGINAL_PATH  = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+FINETUNED_PATH = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+DEFAULT_TRT_DIR = f'{code_dir}/../output/onnx_trt'
+DEFAULT_OUT    = f'{code_dir}/../reports/inbolt_trt_benchmark'
+N_VIZ = 5
+
+RS_FPS = 30.0
+
+METHODS: Dict[str, Dict[str, str]] = {
+    'original':      {'label': 'FFS Original',                  'color': '#2980b9'},
+    'finetuned':     {'label': 'FFS Fine-tuned (INBOLT)',        'color': '#e74c3c'},
+    'finetuned_trt': {'label': 'FFS Fine-tuned TRT (FP16)',      'color': '#8e44ad'},
+    'depth_rs':      {'label': 'RealSense Hardware Depth',       'color': '#f39c12'},
+    'zivid_gt':      {'label': 'Zivid GT (projected to RS)',     'color': '#27ae60'},
+}
+GT_NAME = 'zivid_gt'
+RS_NAME = 'depth_rs'
+
+
+# ── TRT build ────────────────────────────────────────────────────────────────
+
+def rebuild_trt_engines(model_path: str, trt_dir: str, height: int, width: int) -> None:
+    """Export the model at *model_path* to ONNX, then compile both TRT engines.
+
+    Writes to *trt_dir*:
+        feature_runner.onnx / feature_runner.engine
+        post_runner.onnx    / post_runner.engine
+        onnx.yaml
+    """
+    assert height % 32 == 0 and width % 32 == 0, \
+        f"height and width must be divisible by 32, got {height}x{width}"
+
+    os.makedirs(trt_dir, exist_ok=True)
+    logging.info(f'[rebuild] Loading model from {model_path}')
+    model = torch.load(model_path, map_location='cpu', weights_only=False)
+    model.cuda().eval()
+
+    feature_runner = TrtFeatureRunner(model).cuda().eval()
+    post_runner    = TrtPostRunner(model).cuda().eval()
+
+    dummy_left  = torch.randn(1, 3, height, width, device='cuda').float() * 255
+    dummy_right = torch.randn(1, 3, height, width, device='cuda').float() * 255
+
+    # ── feature runner → ONNX ────────────────────────────────────────────────
+    feature_onnx = os.path.join(trt_dir, 'feature_runner.onnx')
+    logging.info(f'[rebuild] Exporting feature_runner → {feature_onnx}')
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore')
+        torch.onnx.export(
+            feature_runner,
+            (dummy_left, dummy_right),
+            feature_onnx,
+            opset_version=17,
+            input_names=['left', 'right'],
+            output_names=['features_left_04', 'features_left_08', 'features_left_16',
+                          'features_left_32', 'features_right_04', 'stem_2x'],
+            do_constant_folding=True,
+            dynamo=True,
+        )
+
+    # ── post runner → ONNX ───────────────────────────────────────────────────
+    with torch.no_grad():
+        feats = feature_runner(dummy_left, dummy_right)
+        f04, f08, f16, f32, fr04, stem_2x = feats
+        cv_group = getattr(model, 'cv_group', 8)
+        gwc_volume = build_gwc_volume_triton(
+            f04.half(), fr04.half(), model.args.max_disp // 4, cv_group
+        )
+
+    post_onnx = os.path.join(trt_dir, 'post_runner.onnx')
+    logging.info(f'[rebuild] Exporting post_runner → {post_onnx}')
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore')
+        torch.onnx.export(
+            post_runner,
+            (f04.float(), f08.float(), f16.float(), f32.float(),
+             fr04.float(), stem_2x.float(), gwc_volume.float()),
+            post_onnx,
+            opset_version=17,
+            input_names=['features_left_04', 'features_left_08', 'features_left_16',
+                         'features_left_32', 'features_right_04', 'stem_2x', 'gwc_volume'],
+            output_names=['disp'],
+            do_constant_folding=True,
+            dynamo=True,
+        )
+
+    # ── save model config ─────────────────────────────────────────────────────
+    yaml_path = os.path.join(trt_dir, 'onnx.yaml')
+    with open(yaml_path, 'w') as f:
+        yaml.safe_dump(OmegaConf.to_container(model.args), f)
+    logging.info(f'[rebuild] Saved onnx.yaml → {yaml_path}')
+
+    # ── compile TRT engines ───────────────────────────────────────────────────
+    del model, feature_runner, post_runner  # free GPU memory before TRT build
+    torch.cuda.empty_cache()
+
+    for name in ('feature_runner', 'post_runner'):
+        onnx_path   = os.path.join(trt_dir, f'{name}.onnx')
+        engine_path = os.path.join(trt_dir, f'{name}.engine')
+        logging.info(f'[rebuild] Compiling {name}.engine (this may take several minutes) …')
+        build_engine(onnx_path, engine_path, fp16=True, workspace_gb=4)
+
+    logging.info(f'[rebuild] TRT engines ready in {trt_dir}')
+
+
+# ── TRT helpers ───────────────────────────────────────────────────────────────
+
+def load_trt_model(trt_dir: str) -> Optional[TrtRunner]:
+    """Load TRT engines from *trt_dir*; return None if engines are missing."""
+    feature_engine = os.path.join(trt_dir, 'feature_runner.engine')
+    post_engine    = os.path.join(trt_dir, 'post_runner.engine')
+    yaml_path      = os.path.join(trt_dir, 'onnx.yaml')
+
+    for p in (feature_engine, post_engine, yaml_path):
+        if not os.path.exists(p):
+            logging.warning(f'TRT file not found: {p} — skipping TRT model')
+            return None
+
+    with open(yaml_path) as f:
+        cfg = yaml.safe_load(f)
+    args = OmegaConf.create(cfg)
+
+    logging.info(f'Loading TRT engines from {trt_dir}')
+    return TrtRunner(args, feature_engine, post_engine)
+
+
+@torch.no_grad()
+def infer_depth_m_trt(
+    trt_model: TrtRunner,
+    trt_h: int,
+    trt_w: int,
+    left: np.ndarray,
+    right: np.ndarray,
+) -> np.ndarray:
+    """Run TRT stereo inference; return depth map in metres (H×W float32).
+
+    Images are resized to the fixed TRT engine resolution, disparity is scaled
+    back to original pixel units before the BF depth conversion.
+    """
+    orig_h, orig_w = left.shape[:2]
+    fx = trt_w / orig_w
+
+    left_r  = cv2.resize(left.astype(np.float32),  (trt_w, trt_h))
+    right_r = cv2.resize(right.astype(np.float32), (trt_w, trt_h))
+
+    # pseudo-RGB (same as _preprocess_ir in benchmark_inbolt.py)
+    left_r  = np.stack([left_r,  left_r,  left_r],  axis=-1)
+    right_r = np.stack([right_r, right_r, right_r], axis=-1)
+
+    left_t  = torch.as_tensor(left_r).float()[None].permute(0, 3, 1, 2).cuda()
+    right_t = torch.as_tensor(right_r).float()[None].permute(0, 3, 1, 2).cuda()
+
+    disp = trt_model.forward(left_t, right_t)
+    disp_np = disp.cpu().numpy().reshape(trt_h, trt_w).clip(0, None)
+
+    # resize back to original resolution; divide by fx to restore pixel-unit disparity
+    disp_orig = cv2.resize(disp_np, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR) / fx
+
+    depth_m = np.zeros_like(disp_orig)
+    valid = disp_orig > 0
+    depth_m[valid] = (BF / disp_orig[valid]) / 1000.0
+    return depth_m
+
+
+# ── report generator ──────────────────────────────────────────────────────────
+
+class ReportGeneratorInbolt(ReportGenerator):
+    """4-frame depth comparison and error maps."""
+
+    def __init__(self, results, stats, output_dir) -> None:
+        super().__init__(results, stats, output_dir)
+        self._selected_viz_indices = []
+
+    def _get_selected_viz_indices(self, n_pick: int = 4):
+        if self._selected_viz_indices:
+            return self._selected_viz_indices
+        n_total = len(self._r.viz_frames)
+        if n_total == 0:
+            self._selected_viz_indices = []
+            return self._selected_viz_indices
+        n = min(n_pick, n_total)
+        rng = np.random.default_rng(42)
+        self._selected_viz_indices = sorted(rng.choice(n_total, size=n, replace=False).tolist())
+        return self._selected_viz_indices
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig('depth_comparison.png', 'No viz frames')
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig('depth_comparison.png', 'No viz frames')
+        vf0 = self._r.viz_frames[sel[0]]
+        method_names = [n for n in self._r.method_names if n in vf0]
+        nrows, ncols = len(sel), len(method_names)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        cmap = self._depth_cmap()
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            for c, name in enumerate(method_names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis('off')
+                    continue
+                im = ax.imshow(vf[name], cmap=cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label='m')
+                title = self._r.method_labels.get(name, name)
+                if c == 0:
+                    title = f'Frame {frame_idx + 1} • {title}'
+                ax.set_title(title, fontsize=9, wrap=True)
+                ax.axis('off')
+        fig.suptitle('Depth Map Comparison (4 random frames) — values in metres',
+                     fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, 'depth_comparison.png')
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig('error_maps.png', 'No comparison methods')
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig('error_maps.png', 'No viz frames')
+        vf0 = self._r.viz_frames[sel[0]]
+        names = ([self._gt] if self._gt in vf0 else []) + [n for n in self._non_gt if n in vf0]
+        if not names:
+            return self._empty_fig('error_maps.png', 'Ground truth not in viz frame')
+        nrows, ncols = len(sel), len(names)
+        cmap = plt.get_cmap('hot').copy()
+        cmap.set_under('#222222')
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            gt = vf.get(self._gt)
+            if gt is None:
+                for c in range(ncols):
+                    axes[r, c].axis('off')
+                continue
+            for c, name in enumerate(names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis('off')
+                    continue
+                pred  = vf[name]
+                valid = (gt > 0) & (pred > 0)
+                err   = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+                im    = ax.imshow(err, cmap=cmap, vmin=0.001, vmax=0.1)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label='|error| (m)')
+                mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+                label = self._r.method_labels.get(name, name)
+                title = f'Frame {frame_idx + 1} • {label}\nMAE={mean_err:.4f} m' if c == 0 \
+                        else f'{label}\nMAE={mean_err:.4f} m'
+                ax.set_title(title, fontsize=9)
+                ax.axis('off')
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f'Absolute Error vs {gt_label} (4 random frames, m)', fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, 'error_maps.png')
+
+
+# ── misc helpers ──────────────────────────────────────────────────────────────
+
+def resolve_finetuned_model_path(preferred_path: str) -> Optional[str]:
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    for name in ('model_finetuned_inbolt.pth', 'model_finetuned_inbolt-20260415_epoch_030.pth'):
+        found = sorted(weights_dir.glob(f'**/{name}'))
+        if found:
+            logging.warning(f'Preferred fine-tuned model not found. Using fallback {found[0]}')
+            return str(found[0])
+    generic = sorted(weights_dir.glob('**/model_finetuned_inbolt*.pth'))
+    if generic:
+        chosen = generic[-1]
+        logging.warning(f'Preferred fine-tuned model not found. Using discovered checkpoint {chosen}')
+        return str(chosen)
+    return None
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir',   default=DEFAULT_OUT,    help='Output directory')
+    parser.add_argument('--data_dir',  default=DATA_DIR,       help='Path to dataset root')
+    parser.add_argument('--original',  default=ORIGINAL_PATH,  help='Original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Fine-tuned model weights')
+    parser.add_argument('--trt_dir',   default=DEFAULT_TRT_DIR,
+                        help='Directory with feature_runner.engine, post_runner.engine, onnx.yaml')
+    parser.add_argument('--trt_height', type=int, default=448, help='TRT engine input height')
+    parser.add_argument('--trt_width',  type=int, default=640, help='TRT engine input width')
+    parser.add_argument('--rebuild_trt', action='store_true',
+                        help='Re-export ONNX and recompile TRT engines from --finetuned before benchmarking')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── optionally rebuild TRT engines ────────────────────────────────────────
+    if args.rebuild_trt:
+        build_src = resolve_finetuned_model_path(args.finetuned) or args.original
+        logging.info(f'[rebuild_trt] Building TRT engines from {build_src} → {args.trt_dir}')
+        rebuild_trt_engines(build_src, args.trt_dir, args.trt_height, args.trt_width)
+
+    # ── load PyTorch models ───────────────────────────────────────────────────
+    pt_models = {}
+    finetuned_path = resolve_finetuned_model_path(args.finetuned)
+    if finetuned_path is not None:
+        pt_models['finetuned'] = load_model(finetuned_path)
+    else:
+        logging.warning(f'Fine-tuned model not found (preferred: {args.finetuned}) — skipping')
+    pt_models['original'] = load_model(args.original)
+
+    # ── load TRT model ────────────────────────────────────────────────────────
+    trt_model = load_trt_model(args.trt_dir)
+    trt_h, trt_w = args.trt_height, args.trt_width
+
+    # ── assemble active methods ───────────────────────────────────────────────
+    active_methods = [GT_NAME, RS_NAME] + list(pt_models.keys())
+    if trt_model is not None:
+        active_methods.append('finetuned_trt')
+
+    logging.info(f'Active methods: {active_methods}')
+
+    # ── dataset ───────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ──────────────────────────────────────────────────────────
+    all_metrics       = []
+    viz_frames        = []
+    valid_acc         = {}
+    dist_bin_mae      = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw     = {m: [] for m in active_methods if m not in (GT_NAME, RS_NAME)}
+    H = W = None
+
+    depth_acc_keys = [GT_NAME, RS_NAME] + [m for m in active_methods if m not in (GT_NAME, RS_NAME)]
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data   = source.get_item_projected(idx)
+        left   = data['left']
+        right  = data['right']
+        gt_mm  = data['depth_zivid'].astype(np.float32)
+        rs_mm  = data['depth_rs'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        frame_depths = {GT_NAME: gt_m, RS_NAME: rs_m}
+
+        # PyTorch models
+        for mname, model in pt_models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            cv2.imwrite(str(out_dir / f'{mname}_{idx:03d}.png'),
+                        (frame_depths[mname] * 1000.0).astype(np.uint16))
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        # TRT model
+        if trt_model is not None:
+            t0 = time.monotonic()
+            frame_depths['finetuned_trt'] = infer_depth_m_trt(trt_model, trt_h, trt_w, left, right)
+            cv2.imwrite(str(out_dir / f'finetuned_trt_{idx:03d}.png'),
+                        (frame_depths['finetuned_trt'] * 1000.0).astype(np.uint16))
+            timing_ms_raw['finetuned_trt'].append((time.monotonic() - t0) * 1000.0)
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(GT_NAME, 0.0, 0.0, 0.0, 100.0,
+                                  float((pred > 0).mean()) * 100.0, 0.0,
+                                  mae_pen=0.0, mre_pen=0.0)
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+            close_cov = (float((pred[gt_close_mask] > 0).mean()) * 100.0
+                         if n_close > 0 else 0.0)
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs[GT_NAME].update(gt_m, gt_m)
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in active_methods:
+            if mname not in (GT_NAME, RS_NAME):
+                depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {m: float(np.mean(ts)) if ts else 0.0
+                   for m, ts in timing_ms_raw.items()}
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original':  {'model_path': args.original},
+        RS_NAME:     {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+        GT_NAME:     {'source': 'Projected Zivid depth map used as Inbolt ground truth'},
+    }
+    if 'finetuned' in pt_models and finetuned_path is not None:
+        method_configs['finetuned'] = {'model_path': finetuned_path}
+    if trt_model is not None:
+        method_configs['finetuned_trt'] = {
+            'engine_dir': args.trt_dir,
+            'input_size': f'{trt_h}x{trt_w}',
+            'precision':  'FP16',
+        }
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]['label'] for m in active_methods},
+        method_colors={m: METHODS[m]['color'] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f'INBOLT dataset ({args.data_dir})',
+        method_configs=method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorInbolt(results, stats, out_dir)
+    reporter.generate()
+
+    # ── depth-vs-distance plot ────────────────────────────────────────────────
+    plot_colors = {m: METHODS[m]['color'] for m in active_methods if m in METHODS}
+    plot_labels = {
+        GT_NAME:         'Zivid GT (spatial spread)',
+        RS_NAME:         METHODS[RS_NAME]['label'],
+        'original':      METHODS['original']['label'],
+        'finetuned':     METHODS['finetuned']['label'],
+        'finetuned_trt': METHODS['finetuned_trt']['label'],
+    }
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+    logging.info(f'All outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_office_fs.py b/scripts/benchmark_office_fs.py
new file mode 100644
index 0000000..8c1fee0
--- /dev/null
+++ b/scripts/benchmark_office_fs.py
@@ -0,0 +1,409 @@
+"""Benchmark FastFoundationStereo models + RealSense hardware depth on the Office dataset.
+
+This benchmark mirrors the structure of ``benchmark_faro_rs.py`` but uses the
+Office dataset and the meter-based reporting pipeline already used by
+``benchmark_inbolt.py``.
+
+Depth predictions from stereo and RealSense hardware depth are compared using
+RealSense depth as the baseline reference.
+
+Usage:
+  cd /home/adiroha/repos/Fast-FoundationStereo
+  python scripts/benchmark_inbolt_fs.py [--out_dir reports/inbolt_ffs_benchmark]
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+import cv2
+from pathlib import Path
+from typing import Dict, Optional
+
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import numpy as np
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+import Utils as U
+from benchmark_inbolt import DepthBinAccumulator, infer_depth_m, load_model, plot_depth_vs_distance
+from data_manager_office import DataSource, CAMERA_MATRIX_RS, DIST_COEFFS_RS
+from metrics import (
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+    aggregate,
+    CLOSE_RANGE_THRESHOLD_M,
+)
+from report import ReportGenerator
+
+
+# ── custom report generator ──────────────────────────────────────────────────
+
+class ReportGeneratorInbolt(ReportGenerator):
+    """Custom report generator that shows 4 frames in depth comparison and error maps."""
+
+    def __init__(self, results, stats, output_dir) -> None:
+        super().__init__(results, stats, output_dir)
+        self._selected_viz_indices = []
+
+    def _get_selected_viz_indices(self, n_pick: int = 4):
+        """Return cached random frame indices used consistently across report sections."""
+        if self._selected_viz_indices:
+            return self._selected_viz_indices
+
+        n_total = len(self._r.viz_frames)
+        if n_total == 0:
+            self._selected_viz_indices = []
+            return self._selected_viz_indices
+
+        n = min(n_pick, n_total)
+        rng = np.random.default_rng(42)
+        self._selected_viz_indices = sorted(rng.choice(n_total, size=n, replace=False).tolist())
+        return self._selected_viz_indices
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        method_names = [n for n in self._r.method_names if n in vf0]
+        nrows = len(sel)
+        ncols = len(method_names)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+        cmap = self._depth_cmap()
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            for c, name in enumerate(method_names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                im = ax.imshow(vf[name], cmap=cmap, vmin=0.1, vmax=2.0)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="m")
+                title = self._r.method_labels.get(name, name)
+                if c == 0:
+                    title = f"Frame {frame_idx + 1} • {title}"
+                ax.set_title(title, fontsize=9, wrap=True)
+                ax.axis("off")
+
+        fig.suptitle("Depth Map Comparison (4 random frames) — values in meters",
+                     fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "depth_comparison.png")
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig("error_maps.png", "No comparison methods")
+
+        sel = self._get_selected_viz_indices(n_pick=4)
+        if not sel:
+            return self._empty_fig("error_maps.png", "No viz frames")
+
+        vf0 = self._r.viz_frames[sel[0]]
+        names = ([self._gt] if self._gt in vf0 else []) + [n for n in self._non_gt if n in vf0]
+        if not names:
+            return self._empty_fig("error_maps.png", "Ground truth not available in viz frame")
+
+        nrows = len(sel)
+        ncols = len(names)
+        cmap = plt.get_cmap("hot").copy()
+        cmap.set_under("#222222")
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 3.8 * nrows))
+        axes = np.atleast_2d(axes)
+
+        for r, frame_idx in enumerate(sel):
+            vf = self._r.viz_frames[frame_idx]
+            gt = vf.get(self._gt)
+            if gt is None:
+                for c in range(ncols):
+                    axes[r, c].axis("off")
+                continue
+
+            for c, name in enumerate(names):
+                ax = axes[r, c]
+                if name not in vf:
+                    ax.axis("off")
+                    continue
+                pred = vf[name]
+                valid = (gt > 0) & (pred > 0)
+                err = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+                im = ax.imshow(err, cmap=cmap, vmin=0.001, vmax=0.1)
+                plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="|error| (m)")
+                mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+                label = self._r.method_labels.get(name, name)
+                if c == 0:
+                    ax.set_title(f"Frame {frame_idx + 1} • {label}\nMAE={mean_err:.4f} m", fontsize=9)
+                else:
+                    ax.set_title(f"{label}\nMAE={mean_err:.4f} m", fontsize=9)
+                ax.axis("off")
+
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f"Absolute Error vs {gt_label} (4 random frames, m)", fontsize=11, y=1.01)
+        fig.tight_layout()
+        return self._save(fig, "error_maps.png")
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+#DATA_DIR            = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+DATA_DIR            = f'{code_dir}/../data/pattern_cube'
+#ORIGINAL_PATH  = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+# FINETUNED_PATH  = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415_epoch_030.pth'
+ORIGINAL_PATH       = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+FINETUNED_PATH      = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_030.pth'
+DEFAULT_OUT         = f'{code_dir}/../reports/office_ffs_benchmark-model37'
+N_VIZ = 5
+
+METHODS: Dict[str, Dict[str, str]] = {
+    'original': {'label': 'FFS Original', 'color': '#2980b9'},
+    'finetuned': {'label': 'FFS Fine-tuned (INBOLT)', 'color': '#e74c3c'},
+    'depth_rs': {'label': 'RealSense Hardware Depth', 'color': '#f39c12'},
+}
+GT_NAME = 'depth_rs'
+RS_NAME = 'depth_rs'
+RS_FPS = 30.0
+
+
+def resolve_finetuned_model_path(preferred_path: str) -> Optional[str]:
+    """Return an existing fine-tuned Inbolt checkpoint path, or None if not found."""
+    preferred = Path(preferred_path)
+    if preferred.exists():
+        return str(preferred)
+
+    weights_dir = Path(code_dir) / '..' / 'weights'
+    candidate_names = [
+        'model_finetuned_inbolt.pth',
+        'model_finetuned_inbolt-20260415_epoch_030.pth',
+    ]
+
+    # 1) Try known candidate file names anywhere under weights/
+    for name in candidate_names:
+        found = sorted(weights_dir.glob(f'**/{name}'))
+        if found:
+            logging.warning(
+                f'Preferred fine-tuned model not found at {preferred}. Using fallback {found[0]}'
+            )
+            return str(found[0])
+
+    # 2) Fallback to any Inbolt fine-tuned checkpoint, prefer lexicographically latest
+    generic = sorted(weights_dir.glob('**/model_finetuned_inbolt*.pth'))
+    if generic:
+        chosen = generic[-1]
+        logging.warning(
+            f'Preferred fine-tuned model not found at {preferred}. Using discovered checkpoint {chosen}'
+        )
+        return str(chosen)
+
+    return None
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('--out_dir', default=DEFAULT_OUT, help='Output directory for the report')
+    parser.add_argument('--data_dir', default=DATA_DIR, help='Path to dataset root')
+    parser.add_argument('--original', default=ORIGINAL_PATH, help='Path to original model weights')
+    parser.add_argument('--finetuned', default=FINETUNED_PATH, help='Path to fine-tuned model weights')
+    parser.add_argument('--n_viz', type=int, default=N_VIZ, help='Frames saved for visual comparison')
+    args = parser.parse_args()
+
+    U.set_logging_format()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # ── load stereo models ───────────────────────────────────────────────────
+    models = {}
+    finetuned_path = resolve_finetuned_model_path(args.finetuned)
+    if finetuned_path is not None:
+        models['finetuned'] = load_model(finetuned_path)
+    else:
+        logging.warning(
+            f'Fine-tuned model not found (preferred: {args.finetuned}) and no fallback checkpoint found — skipping'
+        )
+
+    models['original'] = load_model(args.original)
+
+    active_methods = [GT_NAME, RS_NAME] + list(models.keys())
+
+    # ── dataset ──────────────────────────────────────────────────────────────
+    source = DataSource()
+    n = source.init_directory(input_rectified=args.data_dir)
+    logging.info(f'Found {n} samples in {args.data_dir}')
+    if n == 0:
+        logging.error('No samples found — check DATA_DIR path')
+        return
+
+    # ── accumulators ─────────────────────────────────────────────────────────
+    all_metrics = []
+    viz_frames = []
+    valid_acc = {}
+    dist_bin_mae = {m: [] for m in active_methods}
+    close_range_valid = {m: [] for m in active_methods}
+    timing_ms_raw = {m: [] for m in models}
+    H = W = None
+
+    depth_acc_keys = [RS_NAME] + list(models.keys())
+    depth_accs = {k: DepthBinAccumulator() for k in depth_acc_keys}
+
+    for idx in range(n):
+        data = source.get_item(idx)
+        left = data['left']
+        right = data['right']
+        gt_mm = data['depth_rs'].astype(np.float32)
+        rs_mm = data['depth_rs'].astype(np.float32)
+
+        if H is None:
+            H, W = gt_mm.shape[:2]
+            for m in active_methods:
+                valid_acc[m] = np.zeros((H, W), np.float32)
+
+        gt_m = gt_mm / 1000.0
+        rs_m = rs_mm / 1000.0
+
+        frame_depths = {GT_NAME: gt_m, RS_NAME: rs_m}
+        for mname, model in models.items():
+            t0 = time.monotonic()
+            frame_depths[mname] = infer_depth_m(model, left, right)
+            timing_ms_raw[mname].append((time.monotonic() - t0) * 1000.0)
+
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        # create point clouds for visualization
+        for mname in active_methods:
+            pred = frame_depths[mname]
+
+            XYZ = source.project_camera_to_3d(pred, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in RealSense camera space
+            mname_path = os.path.join(out_dir, f'{mname}_{idx}.ply')
+            source.save_to_ply(XYZ/1000, mname_path) # save in meters for visualization
+
+
+        for mname in active_methods:
+            pred = frame_depths[mname]
+            valid_acc[mname] += (pred > 0).astype(np.float32)
+
+            if mname == GT_NAME:
+                fm = FrameMetrics(
+                    GT_NAME,
+                    0.0,
+                    0.0,
+                    0.0,
+                    100.0,
+                    float((pred > 0).mean()) * 100.0,
+                    0.0,
+                    mae_pen=0.0,
+                    mre_pen=0.0,
+                )
+            elif mname == RS_NAME:
+                fm = compute_metrics(pred, gt_m, elapsed_ms=0.0, method_name=RS_NAME)
+            else:
+                fm = compute_metrics(pred, gt_m, timing_ms_raw[mname][-1], mname)
+
+            all_metrics.append(fm)
+            dist_bin_mae[mname].append(compute_bin_mae(pred, gt_m))
+
+            close_cov = (
+                float((pred[gt_close_mask] > 0).mean()) * 100.0
+                if n_close > 0 else 0.0
+            )
+            close_range_valid[mname].append(close_cov)
+
+        depth_accs[RS_NAME].update(rs_m, gt_m)
+        for mname in models:
+            depth_accs[mname].update(frame_depths[mname], gt_m)
+
+        if idx < args.n_viz:
+            viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+
+        if (idx + 1) % 200 == 0 or (idx + 1) == n:
+            logging.info(f'  {idx + 1}/{n} frames processed')
+
+    for m in active_methods:
+        valid_acc[m] /= max(n, 1)
+
+    mean_timing = {
+        m: float(np.mean(ts)) if ts else 0.0
+        for m, ts in timing_ms_raw.items()
+    }
+    mean_timing[GT_NAME] = 0.0
+    mean_timing[RS_NAME] = 1000.0 / RS_FPS
+
+    method_configs = {
+        'original': {'model_path': args.original},
+        RS_NAME: {'source': f'RealSense hardware depth (~{RS_FPS:.0f} FPS)'},
+    }
+    if 'finetuned' in models and finetuned_path is not None:
+        method_configs['finetuned'] = {'model_path': finetuned_path}
+
+    results = BenchmarkResults(
+        method_names=active_methods,
+        method_labels={m: METHODS[m]['label'] for m in active_methods},
+        method_colors={m: METHODS[m]['color'] for m in active_methods},
+        ground_truth_name=GT_NAME,
+        n_frames=n,
+        width=W,
+        height=H,
+        all_metrics=all_metrics,
+        viz_frames=viz_frames,
+        coverage_maps=valid_acc,
+        dist_bin_mae=dist_bin_mae,
+        close_range_valid=close_range_valid,
+        source=f'INBOLT dataset ({args.data_dir})',
+        method_configs=method_configs,
+    )
+
+    stats = aggregate(results, mean_timing)
+    if RS_NAME in stats:
+        stats[RS_NAME].fps_mean = RS_FPS
+
+    reporter = ReportGeneratorInbolt(results, stats, out_dir)
+    reporter.generate()
+
+    plot_colors = {
+        RS_NAME: METHODS[RS_NAME]['color'],
+        **{m: METHODS[m]['color'] for m in models if m in METHODS},
+    }
+    plot_labels = {
+        RS_NAME: METHODS[RS_NAME]['label'],
+        'original': METHODS['original']['label'],
+        'finetuned': METHODS['finetuned']['label'],
+    }
+    labeled_accs = {
+        plot_labels.get(k, k): v
+        for k, v in depth_accs.items()
+        if depth_accs[k].count.sum() > 0
+    }
+    labeled_colors = {
+        plot_labels.get(k, k): plot_colors.get(k)
+        for k in depth_accs
+        if depth_accs[k].count.sum() > 0
+    }
+
+    plot_depth_vs_distance(
+        accumulators=labeled_accs,
+        colors=labeled_colors,
+        out_path=out_dir / 'depth_vs_distance.png',
+    )
+    logging.info(f'All outputs written to {out_dir}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/benchmark_runner.py b/scripts/benchmark_runner.py
new file mode 100644
index 0000000..270a31d
--- /dev/null
+++ b/scripts/benchmark_runner.py
@@ -0,0 +1,280 @@
+"""BenchmarkRunner — orchestrates warm-up, frame collection and metric accumulation."""
+
+from __future__ import annotations
+
+import time
+from typing import Dict, List, Optional
+
+import numpy as np
+
+from .metrics import (
+    CLOSE_RANGE_THRESHOLD_M,
+    DIST_BINS,
+    BenchmarkResults,
+    FrameMetrics,
+    compute_bin_mae,
+    compute_metrics,
+)
+from .methods import DepthMethod
+
+
+_GPU_LOAD_PATH = None
+
+def _read_gpu_load() -> float:
+    """Read GPU utilisation from sysfs (Jetson Orin). Returns 0-100 or -1 on failure."""
+    global _GPU_LOAD_PATH
+    if _GPU_LOAD_PATH is None:
+        import glob
+        candidates = glob.glob("/sys/devices/platform/bus@0/*/load") + \
+                     glob.glob("/sys/devices/platform/gpu*/load")
+        for c in candidates:
+            if "gpu" in c.lower() or "17000000" in c:
+                _GPU_LOAD_PATH = c
+                break
+        if _GPU_LOAD_PATH is None:
+            _GPU_LOAD_PATH = ""
+    if not _GPU_LOAD_PATH:
+        return -1.0
+    try:
+        with open(_GPU_LOAD_PATH) as f:
+            return int(f.read().strip()) / 10.0
+    except Exception:
+        return -1.0
+
+
+class BenchmarkRunner:
+    """Runs the benchmark loop and collects all data needed by ReportGenerator.
+
+    Usage::
+
+        runner = BenchmarkRunner(capture, methods, ground_truth_name="nn_accurate")
+        results = runner.run(n_frames=30, warmup=5, n_viz=3)
+        timing  = runner.measure_timing(n_frames=5)
+    """
+
+    def __init__(
+        self,
+        capture,
+        methods: List[DepthMethod],
+        ground_truth_name: str,
+    ) -> None:
+        self._capture = capture
+        self._methods = methods
+        self._available = [m for m in methods if m.available]
+        self._gt_name = ground_truth_name
+        # Mark ground-truth flag on each method
+        for m in self._available:
+            m.is_ground_truth = m.name == ground_truth_name
+
+    # ------------------------------------------------------------------ public
+
+    def run(
+        self,
+        n_frames: int = 30,
+        warmup: int = 5,
+        n_viz: int = 3,
+    ) -> BenchmarkResults:
+        W = getattr(self._capture, "_width", None) or getattr(self._capture, "_w", 640)
+        H = getattr(self._capture, "_height", None) or getattr(self._capture, "_h", 480)
+        n_viz = min(n_viz, n_frames)
+
+        self._print_plan()
+        self._warmup(warmup)
+
+        all_metrics: List[FrameMetrics] = []
+        viz_frames: List[Dict] = []
+        valid_acc = {m.name: np.zeros((H, W), np.float32) for m in self._available}
+        dist_bin_mae: Dict[str, List[List[float]]] = {m.name: [] for m in self._available}
+        close_range_valid: Dict[str, List[float]] = {m.name: [] for m in self._available}
+
+        print(f"\nRunning benchmark ({n_frames} frames)...")
+        for frame_idx in range(n_frames):
+            frame_depths = self._process_frame()
+
+            gt_m = frame_depths.get(self._gt_name, np.zeros((H, W), np.float32))
+            self._accumulate(
+                frame_idx, frame_depths, gt_m, H, W,
+                all_metrics, valid_acc, dist_bin_mae, close_range_valid,
+            )
+            if frame_idx < n_viz:
+                viz_frames.append({k: v.copy() for k, v in frame_depths.items()})
+            if (frame_idx + 1) % 10 == 0 or (frame_idx + 1) == n_frames:
+                print(f"  Frame {frame_idx + 1}/{n_frames}")
+
+        # Normalise coverage maps to [0, 1]
+        for name in valid_acc:
+            valid_acc[name] /= max(n_frames, 1)
+
+        return BenchmarkResults(
+            method_names=[m.name for m in self._available],
+            method_labels={m.name: m.label for m in self._methods},
+            method_colors={m.name: m.color for m in self._methods},
+            ground_truth_name=self._gt_name,
+            n_frames=n_frames,
+            width=W,
+            height=H,
+            all_metrics=all_metrics,
+            viz_frames=viz_frames,
+            coverage_maps=valid_acc,
+            dist_bin_mae=dist_bin_mae,
+            close_range_valid=close_range_valid,
+            source=self._source_label(),
+            method_configs={m.name: m.config for m in self._available if m.config},
+        )
+
+    def measure_timing(self, n_frames: int = 5) -> Dict[str, float]:
+        """Return mean processing time (ms) per available method."""
+        times: Dict[str, List[float]] = {m.name: [] for m in self._available}
+        for _ in range(n_frames):
+            fs = self._capture.get_frames()
+            for m in self._available:
+                t0 = time.monotonic()
+                try:
+                    m.process_fn(fs)
+                except Exception:
+                    pass
+                times[m.name].append((time.monotonic() - t0) * 1000.0)
+        return {name: float(np.mean(ts)) if ts else 0.0
+                for name, ts in times.items()}
+
+    def measure_gpu_load(
+        self, duration_sec: float = 60.0, cooldown_sec: float = 60.0,
+    ) -> Dict[str, float]:
+        """Measure GPU load per method in isolation (one method at a time).
+
+        For each method, idles for *cooldown_sec* to let the GPU settle, then
+        runs the method for *duration_sec* while a background thread samples
+        GPU utilisation every 50 ms.  This gives a realistic sustained GPU
+        load, not a peak snapshot.
+        """
+        import threading
+
+        gpu_loads: Dict[str, float] = {}
+
+        for i, m in enumerate(self._available):
+            # Cooldown — let GPU idle between methods
+            if cooldown_sec > 0 and i > 0:
+                print(f"  Cooldown {cooldown_sec:.0f}s...")
+                time.sleep(cooldown_sec)
+
+            # Baseline reading before inference starts
+            baseline = _read_gpu_load()
+
+            samples: List[float] = []
+            stop_event = threading.Event()
+
+            def _sampler():
+                while not stop_event.is_set():
+                    val = _read_gpu_load()
+                    if val >= 0:
+                        samples.append(val)
+                    stop_event.wait(0.05)
+
+            # Start sampling
+            t = threading.Thread(target=_sampler, daemon=True)
+            t.start()
+
+            # Run method in isolation for duration_sec
+            t_end = time.monotonic() + duration_sec
+            n = 0
+            while time.monotonic() < t_end:
+                fs = self._capture.get_frames()
+                try:
+                    m.process_fn(fs)
+                except Exception:
+                    pass
+                n += 1
+
+            # Stop sampling
+            stop_event.set()
+            t.join(timeout=1.0)
+
+            gpu_loads[m.name] = float(np.mean(samples)) if samples else 0.0
+            print(f"  {m.label}: GPU {gpu_loads[m.name]:.0f}% "
+                  f"(baseline {baseline:.0f}%, {n} frames in {duration_sec:.0f}s)")
+
+        return gpu_loads
+
+    # ------------------------------------------------------------------ private
+
+    def _print_plan(self) -> None:
+        print(f"Ground truth: {self._gt_name}")
+        for m in self._methods:
+            if m.available:
+                tag = " [GT]" if m.is_ground_truth else ""
+                print(f"  [OK] {m.label}{tag}")
+            else:
+                print(f"  [--] {m.label}  ({m.skip_reason})")
+
+    def _warmup(self, n: int) -> None:
+        if n <= 0:
+            return
+        print(f"\nWarming up ({n} frames)...")
+        for _ in range(n):
+            fs = self._capture.get_frames()
+            for m in self._available:
+                try:
+                    m.process_fn(fs)
+                except Exception:
+                    pass
+
+    def _process_frame(self) -> Dict[str, np.ndarray]:
+        import cv2
+        fs = self._capture.get_frames()
+        W = getattr(self._capture, "_width", None) or getattr(self._capture, "_w", 640)
+        H = getattr(self._capture, "_height", None) or getattr(self._capture, "_h", 480)
+        result: Dict[str, np.ndarray] = {}
+        for m in self._available:
+            try:
+                pred = m.process_fn(fs)
+            except Exception:
+                pred = np.zeros((H, W), dtype=np.float32)
+            pred = pred.astype(np.float32) if pred.dtype != np.float32 else pred
+            # Resize to camera resolution if method returns different size
+            if pred.shape[:2] != (H, W):
+                pred = cv2.resize(pred, (W, H))
+            result[m.name] = pred
+        return result
+
+    def _accumulate(
+        self,
+        frame_idx: int,
+        frame_depths: Dict[str, np.ndarray],
+        gt_m: np.ndarray,
+        H: int, W: int,
+        all_metrics: List[FrameMetrics],
+        valid_acc: Dict,
+        dist_bin_mae: Dict,
+        close_range_valid: Dict,
+    ) -> None:
+        gt_close_mask = (gt_m > 0) & (gt_m < CLOSE_RANGE_THRESHOLD_M)
+        n_close = int(gt_close_mask.sum())
+
+        for m in self._available:
+            pred = frame_depths[m.name]
+            valid_acc[m.name] += (pred > 0).astype(np.float32)
+
+            if m.is_ground_truth:
+                fm = FrameMetrics(
+                    m.name, 0.0, 0.0, 0.0, 100.0,
+                    float((pred > 0).mean()) * 100.0, 0.0,
+                    mae_pen=0.0, mre_pen=0.0,
+                )
+            else:
+                fm = compute_metrics(pred, gt_m, 0.0, m.name)
+            all_metrics.append(fm)
+
+            dist_bin_mae[m.name].append(compute_bin_mae(pred, gt_m))
+
+            if n_close > 0:
+                close_cov = float((pred[gt_close_mask] > 0).mean()) * 100.0
+            else:
+                close_cov = 0.0
+            close_range_valid[m.name].append(close_cov)
+
+    def _source_label(self) -> str:
+        from .capture import SyntheticCapture
+        if isinstance(self._capture, SyntheticCapture):
+            return "synthetic"
+        bag = getattr(self._capture, "_bag", None)
+        return f"bag:{bag}" if bag else "live"
\ No newline at end of file
diff --git a/scripts/data_manager_faro.py b/scripts/data_manager_faro.py
new file mode 100644
index 0000000..9304e76
--- /dev/null
+++ b/scripts/data_manager_faro.py
@@ -0,0 +1,289 @@
+''' 
+
+Dataset management from different image source.
+Can read ros bag files, bin files, mp4 files and even image stream from the camera
+
+Output : 
+    Depth, Left, Right  or orther image types
+
+Usage:
+
+  Usage examples:
+  # Training set — excludes KITCHEN paths
+  ds = DataSource()
+  ds.init_directory(root, test_keywords=['KITCHEN'], split='train')
+
+  # Test set — only KITCHEN paths
+  ds = DataSource()
+  ds.init_directory(root, test_keywords=['KITCHEN'], split='test')
+
+  # Access counts directly
+  ds.train_imgs  # list of train paths
+  ds.test_imgs   # list of test paths
+
+Environment : 
+    C:\\Users\\udubin\\Documents\\Envs\\barcode
+
+Install : 
+    See README.md
+
+
+'''
+
+from copyreg import pickle
+import numpy as np
+import cv2 
+import matplotlib.pyplot as plt
+import os
+import glob
+import re
+import scipy.io as sio
+import unittest
+
+ # importing common Use modules 
+# import sys 
+# sys.path.append(r'..\Utils\src')
+#from logger import log
+import logging as log
+
+# --------------------------------
+#%% Data source
+class DataSource:
+
+    def __init__(self):
+
+        # params
+        self.gray_scale_input = False
+        self.imgs = []
+
+
+        log.info('Source is defined')
+
+    def init_directory(self, input_rectified = '', gray_scale_input = False, sub_indexes = None,
+                       test_keywords = None, split = 'all'):
+        """Load directory and optionally split into train/test by path keywords.
+
+        Args:
+            test_keywords: list of strings (case-insensitive); paths containing any
+                           of these keywords are assigned to the test split.
+                           E.g. ['KITCHEN', 'BATHROOM']
+            split: 'all'   – return all images (default, backward-compatible)
+                   'train' – return only images whose path matches no keyword
+                   'test'  – return only images whose path matches at least one keyword
+        """
+        if len(input_rectified) < 3:
+            input_rectified = r'/mnt/algonas/Local'
+
+        all_imgs = glob.glob(os.path.join(input_rectified, "**/L_images/L_Img_**.mat"), recursive=True)
+        self.gray_scale_input = gray_scale_input
+
+        if test_keywords:
+            keywords_upper = [kw.upper() for kw in test_keywords]
+            def is_test(path):
+                p = path.upper()
+                return any(kw in p for kw in keywords_upper)
+
+            self.train_imgs = [p for p in all_imgs if     is_test(p)]
+            self.test_imgs  = [p for p in all_imgs if not is_test(p)]
+        else:
+            self.train_imgs = all_imgs
+            self.test_imgs  = []
+
+        if split == 'train':
+            self.imgs = self.train_imgs
+        elif split == 'test':
+            self.imgs = self.test_imgs
+        else:  # 'all'
+            self.imgs = all_imgs
+
+        if sub_indexes is not None:
+            self.imgs = [self.imgs[idx] for idx in sub_indexes]
+
+        log.info(f"Split='{split}': {len(self.imgs)} images "
+                 f"(train={len(self.train_imgs)}, test={len(self.test_imgs)})")
+        return len(self.imgs)
+
+
+    def get_item(self, index: int, debug: bool = False):
+        "get one item from the dataset"
+        output_str          = {"left": [],  "right": [],   "depth_faro": [],   "depth_rs": [],   "rgb": []  }    
+
+        # find path
+        left_path           = self.imgs[index]
+        right_path          = left_path.replace("L_images", "R_images").replace("L_Img", "R_Img")
+        rgb_path            = left_path.replace("L_images", "RGB_images").replace("L_Img", "RGB_Img")
+        depth_faro_path     = left_path.replace("L_images", "Depth").replace("L_Img", "Depth_Img")
+        depth_rs_path       = left_path.replace("L_images", "Z_Intel_Depth").replace("L_Img", "Z_Intel_Img")
+
+        base_folder         = left_path[:left_path.rfind("L_images") - 1]
+        gt_test_folder      = os.path.join(base_folder, "Disparity")
+
+        # right image
+        filename_r          = os.path.basename(right_path)
+        filename_r_wo_ext   = os.path.splitext(filename_r)[0]
+        m                   = re.search('R_Img_(\d+)', filename_r_wo_ext)
+        file_idx            = int(m.group(1))
+
+        # read img
+        left_img            = sio.loadmat(left_path)['Il']
+        right_img           = sio.loadmat(right_path)['Ir']
+        rgb_img             = sio.loadmat(rgb_path)['I_RGB']
+        depth_faro_img      = sio.loadmat(depth_faro_path)['depth']
+        depth_rs_img        = sio.loadmat(depth_rs_path)['Z_im']
+        #left_img = cv2.cvtColor(left_img, cv2.COLOR_GRAY2BGR)
+        #right_img = cv2.cvtColor(right_img, cv2.COLOR_GRAY2BGR)
+
+        if left_img is None or right_img is None or rgb_img is None:
+            return output_str
+        
+        # if self.gray_scale_input:
+        #     left_img = cv2.cvtColor(left_img.astype("uint8"), cv2.COLOR_BGR2GRAY)[None, :, :]
+        #     right_img = cv2.cvtColor(right_img.astype("uint8"), cv2.COLOR_BGR2GRAY)[None, :, :]
+
+        # test_name = f"{base_folder}"
+        # prefix = f"{test_name}/{os.path.basename(left_path)}"
+        # file_sources = {
+        #     "left_path": left_path,
+        #     "prefix": os.path.basename(prefix),
+        #     "right_path": right_path,
+        #     "left_disp_path": left_disp_filename
+        # }
+
+        
+        left_img, right_img, rgb_img, depth_rs_img, depth_faro_img = left_img, right_img, rgb_img, depth_rs_img.astype(np.float32), depth_faro_img.astype(np.float32)
+
+        output_str["left"]          = left_img
+        output_str["right"]         = right_img
+        output_str["depth_faro"]    = depth_faro_img
+        output_str["depth_rs"]      = depth_rs_img
+        output_str["rgb"]           = rgb_img
+
+        if debug:
+            depth_error     = self.compute_depth_error(depth_rs_img, depth_faro_img)
+            img_list        = [left_img, right_img, rgb_img, depth_rs_img, depth_faro_img, depth_error]
+            ttl_list        = ['left','right','rgb','depth rs','depth faro','depth error']
+            self.show_subset(img_list, ttl_list)        
+
+        return output_str          
+
+    def compute_depth_error(self, depth_rs_img, depth_faro_img, depth_mask = None) :
+        "compute depth error"
+        depth_rs_img, depth_faro_img = depth_rs_img.astype(np.float32), depth_faro_img.astype(np.float32)
+        depth_error = np.zeros_like(depth_rs_img)
+        depth_mask  = np.ones_like(depth_rs_img,dtype=bool) if depth_mask is None else depth_mask
+        
+        #depth_valid = depth_faro_img > 0 if depth_mask is None else depth_mask # depth_rs_img > 0
+        depth_valid = np.logical_and(depth_faro_img > 0, depth_mask)
+        depth_valid = np.logical_and(depth_rs_img > 0, depth_valid)
+        depth_error[depth_valid] = np.abs(depth_rs_img[depth_valid] - depth_faro_img[depth_valid])
+        return depth_error
+    
+    def show_subset(self, img_list, ttl_list, vmin=None, vmax=None, save_path='', fig_name=''):
+        "show some images"
+        img_num  = len(img_list)
+        row_num  = int(img_num/4) +1
+        col_num  = int(img_num/row_num)
+        fig, axes = plt.subplots(row_num, col_num, sharey=True, sharex=True)
+        axes      = axes.reshape((row_num,col_num))
+        do_save   = os.path.exists(save_path)
+        for k in range(img_num):
+            ri, ci = int(k / col_num), k % col_num
+            pcm = axes[ri, ci].imshow(img_list[k], vmin=vmin, vmax=vmax)
+            axes[ri, ci].set_title(ttl_list[k])     
+            #fig.colorbar(pcm, ax=axes[ri, ci])  
+        
+        if do_save:
+            fig.savefig(os.path.join(save_path, fig_name + ".png"))
+        
+        plt.show(block=False)
+
+    def save_data_to_folder(self, output_str, output_directory):
+        "save data dict to disk "
+
+        # 3. Ensure the output directory exists
+        # exist_ok=True prevents an error if the directory already exists
+        os.makedirs(output_directory, exist_ok=True) 
+
+        img_left            = output_str["left"].astype(np.uint16)    
+        img_right           = output_str["right"].astype(np.uint16)
+        depth_faro_img      = output_str["depth_faro"].astype(np.uint16)
+        depth_rs_img        = output_str["depth_rs"].astype(np.uint16)
+        rgb_img             = output_str["rgb"].astype(np.uint16)
+
+        # 4. Create the full output path
+        output_path         = os.path.join(output_directory, "img_left.png")
+        success             = cv2.imwrite(output_path, img_left, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+        output_path         = os.path.join(output_directory, "img_right.png")
+        success             = cv2.imwrite(output_path, img_right, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+        output_path         = os.path.join(output_directory, "img_depth_faro.png")
+        success             = cv2.imwrite(output_path, depth_faro_img, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+        output_path         = os.path.join(output_directory, "img_depth_rs.png")
+        success             = cv2.imwrite(output_path, depth_rs_img, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+        output_path         = os.path.join(output_directory, "img_rgb.png")
+        success             = cv2.imwrite(output_path, rgb_img, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+
+        return success
+    
+ 
+    
+
+# --------------------------------        
+#%% Tests
+class TestDataSource(unittest.TestCase):
+
+    def test_init_directory(self):
+        "check image are in data source"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        self.assertTrue(img_num > 0)
+
+    def test_get_item(self):
+        "check image from data source"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        out_data    = p.get_item(7, debug = True)
+        self.assertTrue(len(out_data["left"]) > 0)    
+
+    def test_show_images(self):
+        "show image from video file"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        if img_num == 0:
+            log.warning("No images found in the directory.")
+            return
+        img_index   = np.random.randint(0, high=img_num, size=8)
+        for k in img_index:
+            out_data    = p.get_item(k, debug = True)
+            self.assertTrue(len(out_data["left"]) > 0) 
+        plt.show() 
+
+    def test_show_images_and_save(self):
+        "show image from data files and ssaves them to disk"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        if img_num == 0:
+            log.warning("No images found in the directory.")
+            return
+        img_index   = np.random.randint(0, high=img_num, size=16)
+        for k in img_index:
+            out_data    = p.get_item(k, debug = True)
+            out_folder  = f"C:\\Work\\Projects\\Deploy\\data\\fs\\index_{k:04d}"
+            ret         = p.save_data_to_folder(out_data, output_directory = out_folder)
+            self.assertTrue(ret) 
+        plt.show() 
+
+# --------------------------------
+#%% Run Test
+def RunTest():
+
+    tst = TestDataSource()
+    #tst.test_init_directory()
+    tst.test_get_item()
+    #tst.test_show_images()
+    #tst.test_show_images_and_save()
+
+
+#%%
+if __name__ == '__main__':
+    #print (__doc__)
+    RunTest()
\ No newline at end of file
diff --git a/scripts/data_manager_faro_laptop.py b/scripts/data_manager_faro_laptop.py
new file mode 100644
index 0000000..10fa007
--- /dev/null
+++ b/scripts/data_manager_faro_laptop.py
@@ -0,0 +1,278 @@
+''' 
+
+Dataset management from different image source.
+Can read png files created from FARO dataset
+
+Output : 
+    Depth, Left, Right  or orther image types
+
+Usage:
+
+Environment : 
+    docker fs
+
+Install : 
+    See README.md
+
+
+'''
+
+import numpy as np
+import cv2 
+import matplotlib.pyplot as plt
+import os
+import unittest
+#from torch.utils.data import DataLoader
+
+ # importing common Use modules
+
+
+
+# --------------------------------
+#%% Data source
+class DataSource:
+
+    def __init__(self):
+
+        # params
+        self.input_dir        = ''
+        self.gray_scale_input = False
+        self.dirs               = []
+        self.count            = 0
+
+        print('Source is defined')
+
+    def get_bf(self):
+        "image baseline for faro"
+        return 49470.45
+
+    def init_directory(self,  gray_scale_input = False, sub_indexes = None):
+        "load entire directory"
+
+        input_rectified      = r'./data/faro'
+        dir_list             = os.listdir(input_rectified)
+        self.dirs            = [file for file in dir_list if file.startswith('index')]
+
+        #self.imgs            = glob.glob(os.path.join(input_rectified,  f"/**/*.png"),  recursive=True)
+        self.gray_scale_input = gray_scale_input
+        if sub_indexes is not None:
+            self.dirs = [self.dirs[idx] for idx in sub_indexes]
+
+        self.input_dir      = input_rectified
+        print(f'Total directories {len(self.dirs)}')
+        return len(self.dirs)
+    
+    def get_image_from_directory(self, dir_path):
+        "get an d,l,r image from a training directory"
+
+        # check if initialized
+        fpath               = dir_path
+
+        #print(f'Reading files from {fpath}.....')
+        files               = os.listdir(fpath)
+        file_extensions     = ['.png'] #['.png','.jpg','.bmp','z.v_0.png']
+        file_name_part     = self.file_names[0] # depth name
+        filtered_files      = [file for file in files if file.endswith(tuple(file_extensions))]
+        file_list           = filtered_files
+        frame_count         = 0
+        print('Found %d.' %len(filtered_files))
+
+        file_num            = len(file_list)
+        if file_num < 1 or self.frame_count >= file_num:
+            print('No image files are found')
+            return False, None
+
+        # Iterate over files and process them
+        file_name           = file_list[self.frame_count]
+        file_path           = os.path.join(fpath, file_name)
+        img_array_d         = cv2.imread(file_path, cv2.IMREAD_UNCHANGED) 
+
+        # depth with left
+        file_path           = os.path.join(fpath, file_name.replace(self.file_names[0],self.file_names[1]))
+        img_array_l         = cv2.imread(file_path, cv2.IMREAD_UNCHANGED).astype(img_array_d.dtype) 
+
+        # depth with right
+        file_path           = os.path.join(fpath, file_name.replace(self.file_names[0],self.file_names[2]))
+        img_array_r         = cv2.imread(file_path, cv2.IMREAD_UNCHANGED).astype(img_array_d.dtype) 
+
+        # assign data
+        self.frame_gray     = img_array_d               
+        self.frame_left     = img_array_l
+        self.frame_right    = img_array_r        
+        img_array           = np.stack((img_array_l,img_array_r,img_array_d),2)
+
+
+        # check the number of files : self.direct_count can be 0,+1,-1
+        self.frame_count    = (self.frame_count + self.direct_count) #% file_num
+        self.frame_name     = file_name
+
+        return True, img_array   
+
+
+    def get_item(self, index: int, debug: bool = False):
+        "get one item from the dataset"
+        output_str          = {"img_left": [],  "img_right": [],   "img_depth_faro": [],   "img_depth_rs": [],   "img_rgb": []  }    
+
+        if  index > len(self.dirs):           
+            print(f'bad directory {base_folder}')
+            return output_str
+
+        # find path
+        base_folder         = os.path.join(self.input_dir ,self.dirs[index])
+        #print(f'Reading data from {base_folder}')
+        img_path            = os.path.join(base_folder, "img_left.png")
+        left_img            = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "img_right.png")
+        right_img           = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "img_depth_faro.png")
+        depth_faro_img      = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "img_depth_rs.png")
+        depth_rs_img        = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "img_rgb.png")
+        rgb_img             = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)        
+
+        #left_img = cv2.cvtColor(left_img, cv2.COLOR_GRAY2BGR)
+        #right_img = cv2.cvtColor(right_img, cv2.COLOR_GRAY2BGR)
+
+        if left_img is None or right_img is None or rgb_img is None:
+            print(f'bad directory {base_folder}')
+            return output_str
+        if len(left_img)<1  or len(right_img) < 1:
+            print(f'bad directory {base_folder}')
+            return output_str        
+        
+        #print(right_img)
+        
+        #left_img, right_img, rgb_img, depth_rs_img, depth_faro_img = left_img, right_img, rgb_img, depth_rs_img.astype(np.float32), depth_faro_img.astype(np.float32)
+
+        output_str["img_left"]          = left_img
+        output_str["img_right"]         = right_img
+        output_str["img_depth_faro"]    = depth_faro_img
+        output_str["img_depth_rs"]      = depth_rs_img
+        output_str["img_rgb"]           = rgb_img
+
+        if debug:
+            depth_error     = self.compute_depth_error(depth_rs_img, depth_faro_img)
+            img_list        = [left_img, right_img, rgb_img, depth_rs_img, depth_faro_img, depth_error]
+            ttl_list        = ['left','right','rgb','depth rs','depth faro','depth error']
+            self.show_subset(img_list, ttl_list)        
+
+        return output_str        
+
+    def load_specific_files(self, debug = True):
+        "specific files to load"  
+
+        # find path
+        base_folder         = r'./data/roi'
+        #print(f'Reading data from {base_folder}')
+        img_path            = os.path.join(base_folder, "2_Infrared.png")
+        left_img            = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "1_Color.png")
+        right_img           = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "1_Color.png")
+        depth_faro_img      = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "2_Infrared.png")
+        depth_rs_img        = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        img_path            = os.path.join(base_folder, "1_Color.png")
+        rgb_img             = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)          
+
+        output_str          = {"img_left": [],  "img_right": [],   "img_depth_faro": [],   "img_depth_rs": [],   "img_rgb": []  }   
+        output_str["img_left"]          = left_img
+        output_str["img_right"]         = right_img
+        output_str["img_depth_faro"]    = depth_faro_img[:,:,0]
+        output_str["img_depth_rs"]      = depth_rs_img[:,:,0]
+        output_str["img_rgb"]           = rgb_img
+
+        if debug:
+            depth_error     = self.compute_depth_error(depth_rs_img, depth_faro_img)
+            img_list        = [left_img, right_img, rgb_img, depth_rs_img, depth_faro_img, depth_error]
+            ttl_list        = ['left','right','rgb','depth rs','depth faro','depth error']
+            self.show_subset(img_list, ttl_list)        
+
+        return output_str  
+
+    def compute_depth_error(self, depth_rs_img, depth_faro_img, depth_mask = None) :
+        "compute depth error"
+        depth_rs_img, depth_faro_img = depth_rs_img.astype(np.float32), depth_faro_img.astype(np.float32)
+        depth_error = np.zeros_like(depth_rs_img)
+        depth_mask  = np.ones_like(depth_rs_img,dtype=bool) if depth_mask is None else depth_mask
+        
+        #depth_valid = depth_faro_img > 0 if depth_mask is None else depth_mask # depth_rs_img > 0
+        depth_valid = np.logical_and(depth_faro_img > 0, depth_mask)
+        depth_valid = np.logical_and(depth_rs_img > 0, depth_valid)
+        depth_error[depth_valid] = np.abs(depth_rs_img[depth_valid] - depth_faro_img[depth_valid])
+        return depth_error
+    
+    def show_subset(self, img_list, ttl_list, vmin=None, vmax=None, save_path='', fig_name=''):
+        "show some images"
+        img_num  = len(img_list)
+        row_num  = int(img_num/4) +1
+        col_num  = int(img_num/row_num)
+        fig, axes = plt.subplots(row_num, col_num, sharey=True, sharex=True)
+        axes      = axes.reshape((row_num,col_num))
+        do_save   = os.path.exists(save_path)
+        for k in range(img_num):
+            ri, ci = int(k / col_num), k % col_num
+            pcm = axes[ri, ci].imshow(img_list[k], vmin=vmin, vmax=vmax)
+            axes[ri, ci].set_title(ttl_list[k])     
+            #fig.colorbar(pcm, ax=axes[ri, ci])  
+        
+        if do_save:
+            fig.savefig(os.path.join(save_path, fig_name + ".png"))
+        
+        #plt.show(block=False)
+        plt.show()
+
+    def save_image(self, frame, fname = ''):
+        fn = './image_%03d_%s.png' % (self.count, fname)
+        frame = frame.astype(np.uint16) #cv.cvtColor(frame, cv.CV_16U)
+        cv2.imwrite(fn, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+        print(fn, 'saved')
+        self.count += 1          
+    
+ 
+    
+
+# --------------------------------        
+#%% Tests
+class TestDataSource(unittest.TestCase):
+
+    def test_init_directory(self):
+        "check image are in data source"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        self.assertTrue(img_num > 0)
+
+    def test_get_item(self):
+        "check image from data source"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        out_data    = p.get_item(7, debug = True)
+        self.assertTrue(len(out_data["img_left"]) > 0)    
+
+    def test_show_images(self):
+        "show image from video file"
+        p           = DataSource()
+        img_num     = p.init_directory()
+        img_index   = np.random.randint(0,img_num,8)
+        for k in img_index:
+            out_data    = p.get_item(k, debug = True)
+            self.assertTrue(len(out_data["img_left"]) > 0) 
+        plt.show() 
+
+
+
+# --------------------------------
+#%% Run Test
+def RunTest():
+
+    tst = TestDataSource()
+    #tst.test_init_directory()
+    #tst.test_get_item()
+    tst.test_show_images()
+
+
+#%%
+if __name__ == '__main__':
+    #print (__doc__)
+    RunTest()
\ No newline at end of file
diff --git a/scripts/data_manager_inbolt.py b/scripts/data_manager_inbolt.py
new file mode 100644
index 0000000..f4ef3ce
--- /dev/null
+++ b/scripts/data_manager_inbolt.py
@@ -0,0 +1,485 @@
+'''
+
+Dataset management for Inbolt stereo dataset.
+
+Loads RealSense IR stereo pairs (mono0/mono1) as left/right images
+and Zivid depthmap as ground-truth depth.
+
+Expected directory layout (one or more session folders under root):
+    <root>/
+      <session>/
+        realsense/
+          <idx>/
+            mono0.png          # left IR image  (uint8 or uint16)
+            mono1.png          # right IR image (uint8 or uint16)
+        zivid/
+          <idx>/
+            depthmap_mm.png    # GT depth in mm (uint16)
+            color.png          # optional RGB
+
+Only samples that have BOTH a realsense pair AND a matching zivid depth are
+included. Sessions that lack a zivid subfolder (e.g. freedrive-only captures)
+are silently skipped.
+
+Output dict keys (same as faro_data_manager for compatibility):
+    left        : numpy array  (H, W)   uint8/uint16 IR
+    right       : numpy array  (H, W)   uint8/uint16 IR
+    depth_faro  : numpy array  (H, W)   float32, mm  ← Zivid GT
+    depth_rs    : numpy array  (H, W)   float32, mm  (empty / zeros if absent)
+    rgb         : numpy array  (H, W, 3) uint8        (Zivid color, or empty)
+
+'''
+
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+import os
+import glob
+import unittest
+import logging as log
+import yaml
+
+# format logger
+log.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', level=log.INFO)
+
+# --------------------------------
+
+CAMERA_MATRIX_RS = np.array([
+    [385.5098876953125, 0, 328.31732177734375],
+    [0, 385.5098876953125, 235.6382141113281],
+    [0, 0, 1]
+])
+
+DIST_COEFFS_RS = np.array([
+    0.0,
+    -0.0,
+    -0.0,
+    0.0,
+    -0.0
+])
+
+CAMERA_MATRIX_ZIVID = np.array([
+    [1241.853637, 0, 609.9444419],
+    [0, 1241.853637, 513.6974808515621],
+    [0, 0, 1]
+])
+DIST_COEFFS_ZIVID = np.array([
+    - 0.04514386132359505,
+    - -0.03609563037753105,
+    - -6.156915333122015e-05,
+    - 0.00015102965699043125,
+    - -0.17297066748142242
+])
+
+
+
+
+# --------------------------------
+#%% Data source
+class DataSource:
+
+    def __init__(self, train_mode = True):
+        self.gray_scale_input = False
+        self.imgs = []   # list of dicts: {left, right, depth_zivid, rgb}
+        self.train_mode = train_mode
+        log.info('Source is defined')
+
+    def init_directory(self, input_rectified='', gray_scale_input=False, sub_indexes=None):
+        """Scan root for (realsense, zivid) sample pairs and populate self.imgs."""
+        if len(input_rectified) < 3:
+            input_rectified = (
+                r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/'
+                r'Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection'
+            )
+
+        self.gray_scale_input = gray_scale_input
+        self.imgs = []
+
+        #IGNORED_SESSIONS = {'dataset_y16_freedrive', 'dataset_y8_freedrive'}
+        #IGNORED_SESSIONS = {'dataset_y16_freedrive','dataset_depth_bias'}
+        IGNORED_SESSIONS = {'20260414_142239'}  # include all sessions by default; manually exclude any bad ones here
+
+        # Each immediate sub-directory is a session
+        try:
+            if self.train_mode:
+                sessions = sorted([
+                    os.path.join(input_rectified, d)
+                    for d in os.listdir(input_rectified)
+                    if os.path.isdir(os.path.join(input_rectified, d))
+                    and d not in IGNORED_SESSIONS
+                ])
+            else:
+                sessions = sorted([
+                    os.path.join(input_rectified, d)
+                    for d in os.listdir(input_rectified)
+                    if os.path.isdir(os.path.join(input_rectified, d))
+                    and d in IGNORED_SESSIONS
+                ])                
+
+        except FileNotFoundError:
+            log.error(f"Directory not found: {input_rectified}")
+            return 0
+
+        for session in sessions:
+            rs_root    = os.path.join(session, 'realsense')
+            zivid_root = os.path.join(session, 'zivid')
+
+            if not os.path.isdir(rs_root) or not os.path.isdir(zivid_root):
+                continue  # session has no stereo+GT pair
+
+            # Find all left images; match by index folder name
+            left_paths = sorted(glob.glob(os.path.join(rs_root, '*', 'mono0.png')))
+            for left_path in left_paths:
+                idx             = os.path.basename(os.path.dirname(left_path))
+                right_path      = os.path.join(rs_root, idx, 'mono1.png')
+                depth_rs_path    = os.path.join(rs_root, idx, 'depthmap_mm.png')
+                depth_zivid_path = os.path.join(zivid_root, idx, 'depthmap_mm.png')
+                rgb_path        = os.path.join(zivid_root, idx, 'color.png')
+
+                if not os.path.isfile(depth_rs_path) or not os.path.isfile(depth_zivid_path):
+                    continue  # skip incomplete samples
+
+                rs_metadata_path = os.path.join(rs_root, idx, 'metadata.yaml')
+                zv_metadata_path = os.path.join(zivid_root, idx, 'metadata.yaml')
+
+                self.imgs.append({
+                    'left':  left_path,
+                    'right': right_path,
+                    'depth_rs': depth_rs_path,
+                    'depth_zivid': depth_zivid_path,
+                    'rgb':   rgb_path if os.path.isfile(rgb_path) else None,
+                    'metadata_rs': rs_metadata_path if os.path.isfile(rs_metadata_path) else None,
+                    'metadata_zv': zv_metadata_path if os.path.isfile(zv_metadata_path) else None,
+                })
+
+        if sub_indexes is not None:
+            self.imgs = [self.imgs[i] for i in sub_indexes]
+
+        log.info(f"DataSource: found {len(self.imgs)} samples in {input_rectified}")
+        return len(self.imgs)
+
+    def get_item(self, index: int, debug: bool = False):
+        """Return one sample as a dict with left, right, depth_faro, depth_rs, rgb."""
+        output_str = {"left": [], "right": [], "depth_zivid": [], "depth_rs": [], "rgb": [], "metadata_rs": None, "metadata_zv": None}
+
+        entry = self.imgs[index]
+
+        left_img  = cv2.imread(entry['left'],  cv2.IMREAD_UNCHANGED)
+        right_img = cv2.imread(entry['right'], cv2.IMREAD_UNCHANGED)
+        depth_rs_img = cv2.imread(entry['depth_rs'], cv2.IMREAD_UNCHANGED)
+        depth_zivid_img = cv2.imread(entry['depth_zivid'], cv2.IMREAD_UNCHANGED)
+
+        if left_img is None or right_img is None or depth_rs_img is None or depth_zivid_img is None:
+            log.warning(f"Failed to load sample {index}: {entry}")
+            return output_str
+
+        rgb_img = np.array([], dtype=np.uint8)
+        if entry['rgb'] is not None:
+            rgb_img = cv2.imread(entry['rgb'], cv2.IMREAD_COLOR)
+            if rgb_img is None:
+                rgb_img = np.array([], dtype=np.uint8)
+
+        depth_rs = depth_rs_img.astype(np.float32)
+        depth_zivid = depth_zivid_img.astype(np.float32)   # uint16 mm → float32 mm
+
+        metadata_rs = None
+        if entry.get('metadata_rs') is not None:
+            with open(entry['metadata_rs'], 'r') as f:
+                metadata_rs = yaml.safe_load(f)
+
+        metadata_zv = None
+        if entry.get('metadata_zv') is not None:
+            with open(entry['metadata_zv'], 'r') as f:
+                metadata_zv = yaml.safe_load(f)
+
+        output_str["left"]        = left_img
+        output_str["right"]       = right_img
+        output_str["depth_zivid"] = depth_zivid   # Zivid GT
+        output_str["depth_rs"]    = depth_rs
+        output_str["rgb"]         = rgb_img
+        output_str["metadata_rs"] = metadata_rs
+        output_str["metadata_zv"] = metadata_zv
+
+        if debug:
+            img_list = [left_img, right_img, depth_rs, depth_zivid]
+            ttl_list = ['left (RS)', 'right (RS)', 'depth RS (mm)', 'depth Zivid (mm)']
+            if rgb_img.size > 0:
+                img_list.append(rgb_img)
+                ttl_list.append('rgb (Zivid)')
+            self.show_subset(img_list, ttl_list)
+
+        return output_str
+    
+    def get_item_projected(self, index: int, debug: bool = False):
+        """Return one sample as a dict with left, right, depth_faro, depth_rs, rgb."""
+        output_str = {"left": [], "right": [], "depth_zivid": [], "depth_rs": [], "rgb": [], "metadata_rs": None, "metadata_zv": None}
+
+        entry           = self.imgs[index]
+
+        left_img        = cv2.imread(entry['left'],  cv2.IMREAD_UNCHANGED)
+        right_img       = cv2.imread(entry['right'], cv2.IMREAD_UNCHANGED)
+        depth_rs_img    = cv2.imread(entry['depth_rs'], cv2.IMREAD_UNCHANGED)
+        depth_zivid_img = cv2.imread(entry['depth_zivid'], cv2.IMREAD_UNCHANGED)
+
+        if left_img is None or right_img is None or depth_rs_img is None or depth_zivid_img is None:
+            log.warning(f"Failed to load sample {index}: {entry}")
+            return output_str
+
+        rgb_img = np.array([], dtype=np.uint8)
+        if entry['rgb'] is not None:
+            rgb_img = cv2.imread(entry['rgb'], cv2.IMREAD_COLOR)
+            if rgb_img is None:
+                rgb_img = np.array([], dtype=np.uint8)
+
+        depth_rs    = depth_rs_img.astype(np.float32)
+        depth_zivid = depth_zivid_img.astype(np.float32)   # uint16 mm → float32 mm
+
+        zivid_projected_path = entry['depth_zivid'].replace('.png', '_projected.png')  # for debug visualization of projected depth maps
+        if os.path.exists(zivid_projected_path):
+            depth_zivid_projected = cv2.imread(zivid_projected_path, cv2.IMREAD_UNCHANGED).astype(np.float32)
+        else:
+            depth_zivid_projected  = self.project_depth_zivid_to_rs(depth_zivid, depth_rs, finx = index)
+            cv2.imwrite(zivid_projected_path, depth_zivid_projected.astype(np.uint16), [cv2.IMWRITE_PNG_COMPRESSION, 0])  # save projected depth for visualization  
+
+        metadata_rs = None
+        if entry.get('metadata_rs') is not None:
+            with open(entry['metadata_rs'], 'r') as f:
+                metadata_rs = yaml.safe_load(f)
+
+        metadata_zv = None
+        if entry.get('metadata_zv') is not None:
+            with open(entry['metadata_zv'], 'r') as f:
+                metadata_zv = yaml.safe_load(f)
+
+        output_str["left"]        = left_img
+        output_str["right"]       = right_img
+        output_str["depth_zivid"] = depth_zivid_projected   # Zivid GT
+        output_str["depth_rs"]    = depth_rs
+        output_str["rgb"]         = rgb_img
+        output_str["metadata_rs"] = metadata_rs
+        output_str["metadata_zv"] = metadata_zv
+
+        if debug:
+            img_list = [left_img, right_img, depth_rs, depth_zivid_projected]
+            ttl_list = ['left (RS)', 'right (RS)', 'depth RS (mm)', 'depth Zivid (mm)']
+            # if rgb_img.size > 0:
+            #     img_list.append(rgb_img)
+            #     ttl_list.append('rgb (Zivid)')
+            self.show_subset(img_list, ttl_list)
+
+            # create point cloud  & save to ply point cloud for visualization
+            #XYZ = self.project_camera_to_3d(depth_zivid_projected, CAMERA_MATRIX_ZIVID, DIST_COEFFS_ZIVID)
+            XYZ = self.project_camera_to_3d(depth_zivid_projected, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in Zivid camera space
+            zivid_path = entry['depth_zivid'].replace('.png', f'.ply')
+            #self.save_to_ply(XYZ/1000, zivid_path) # save in meters for visualization
+
+            XYZ = self.project_camera_to_3d(depth_rs, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in RS camera space
+            rs_path = entry['depth_rs'].replace('.png', f'.ply')
+            #self.save_to_ply(XYZ/1000, rs_path) 
+
+        return output_str    
+
+    def compute_depth_error(self, depth_pred, depth_gt, depth_mask=None):
+        """Compute absolute depth error between prediction and GT."""
+        depth_pred = depth_pred.astype(np.float32)
+        depth_gt   = depth_gt.astype(np.float32)
+        depth_error = np.zeros_like(depth_pred)
+        mask = np.ones_like(depth_pred, dtype=bool) if depth_mask is None else depth_mask
+        valid = np.logical_and(depth_gt > 0, mask)
+        valid = np.logical_and(depth_pred > 0, valid)
+        depth_error[valid] = (depth_pred[valid] - depth_gt[valid])
+        return depth_error
+
+    def show_subset(self, img_list, ttl_list, vmin=None, vmax=None, save_path='', fig_name=''):
+        """Display a list of images in a grid."""
+        img_num = len(img_list)
+        col_num = min(img_num, 3)
+        row_num = (img_num + col_num - 1) // col_num
+        fig, axes = plt.subplots(row_num, col_num, sharey=True, sharex=True)
+        axes = np.array(axes).reshape(row_num, col_num)
+        for k in range(img_num):
+            ri, ci = k // col_num, k % col_num
+            axes[ri, ci].imshow(img_list[k], vmin=vmin, vmax=vmax)
+            axes[ri, ci].set_title(ttl_list[k])
+        for k in range(img_num, row_num * col_num):
+            axes[k // col_num, k % col_num].axis('off')
+        if save_path and os.path.exists(save_path):
+            fig.savefig(os.path.join(save_path, fig_name + ".png"))
+        plt.show(block=False)
+
+    def save_data_to_folder(self, output_str, output_directory):
+        """Save sample dict to PNG files on disk."""
+        os.makedirs(output_directory, exist_ok=True)
+
+        paths = {
+            "img_left.png":        output_str["left"],
+            "img_right.png":       output_str["right"],
+            "img_depth_zivid.png": output_str["depth_zivid"].astype(np.uint16),
+            "img_depth_rs.png":    output_str["depth_rs"].astype(np.uint16),
+        }
+        success = True
+        for fname, img in paths.items():
+            out = cv2.imwrite(os.path.join(output_directory, fname), img,
+                              [cv2.IMWRITE_PNG_COMPRESSION, 0])
+            success = success and out
+
+        if output_str["rgb"] is not None and np.asarray(output_str["rgb"]).size > 0:
+            cv2.imwrite(os.path.join(output_directory, "img_rgb.png"),
+                        output_str["rgb"], [cv2.IMWRITE_PNG_COMPRESSION, 0])
+
+        return success
+    
+
+    def save_to_ply(self, points: np.ndarray, filename: str):
+        """Save a point cloud to a PLY file for visualization."""
+        with open(filename, 'w') as f:
+            f.write('ply\n')
+            f.write('format ascii 1.0\n')
+            f.write(f'element vertex {len(points)}\n')
+            f.write('property float x\n')
+            f.write('property float y\n')
+            f.write('property float z\n')
+            f.write('end_header\n')
+            for x, y, z in points:
+                f.write(f'{x} {y} {z}\n')
+        log.info(f"Saved point cloud to {filename}")
+
+    def project_camera_to_3d(self, depth_img_mm: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray) -> np.ndarray:
+        """Project 2D pixel coordinates with depth to 3D points in camera space."""
+        h, w = depth_img_mm.shape
+        xs, ys = np.meshgrid(np.arange(w, dtype=np.float32), np.arange(h, dtype=np.float32), indexing='xy')
+
+        # OpenCV expects Nx1x2 contiguous float32/float64 image points in (x, y) order.
+        distorted_points = np.stack([xs, ys], axis=-1).reshape(-1, 1, 2).astype(np.float32)
+        undistorted_points = cv2.undistortPoints(distorted_points,  cam_matrix.astype(np.float32),  dist_coeffs.astype(np.float32) )
+
+        uv = undistorted_points.reshape(-1, 2)
+        Z = depth_img_mm.reshape(-1).astype(np.float32)
+        valid = np.isfinite(Z) & (Z > 0)
+        if not np.any(valid):
+            return np.zeros((0, 3), dtype=np.float32)
+
+        uv      = uv[valid]
+        Z       = Z[valid]
+        X       = uv[:, 0] * Z
+        Y       = uv[:, 1] * Z
+
+        # save to ply point cloud for visualization
+        XYZ     = np.stack([X, Y, Z], axis=1).astype(np.float32)
+
+        return XYZ
+
+    def project_3d_to_camera(self, points_3d: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray, frame_size = (480,640)) -> np.ndarray:
+        """Project 3D points in camera space back to 2D pixel coordinates."""
+        if points_3d.shape[1] != 3:
+            raise ValueError("Input points_3d must have shape (N, 3)")
+        projected_pts, _ = cv2.projectPoints(
+            points_3d.reshape(-1, 1, 3),
+            np.zeros(3, dtype=np.float32),
+            np.zeros(3, dtype=np.float32),
+            cam_matrix.astype(np.float32),
+            dist_coeffs.astype(np.float32),
+        )
+
+        uv_rs = projected_pts.reshape(-1, 2)
+        u_idx = np.rint(uv_rs[:, 0]).astype(np.int32)
+        v_idx = np.rint(uv_rs[:, 1]).astype(np.int32)
+
+        h_rs, w_rs = frame_size
+        in_bounds = (u_idx >= 0) & (u_idx < w_rs) & (v_idx >= 0) & (v_idx < h_rs)
+        if not np.any(in_bounds):
+            return np.zeros((h_rs, w_rs), dtype=np.float32)
+
+        u_idx = u_idx[in_bounds]
+        v_idx = v_idx[in_bounds]
+        z_vals = points_3d[in_bounds, 2]  # Z values of the valid points
+
+        # Rasterize by nearest pixel; if multiple points hit a pixel, keep the closest depth.
+        lin             = v_idx * w_rs + u_idx
+        depth_buffer    = np.full(h_rs * w_rs, np.inf, dtype=np.float32)
+        np.minimum.at(depth_buffer, lin, z_vals)
+        depth_projected = depth_buffer.reshape(h_rs, w_rs)
+        depth_projected[~np.isfinite(depth_projected)] = 0.0
+        return depth_projected
+
+    # project from zivid depth patrix to point cloud and back to depth matrix with rs intrinsics and distortion to get "zivid GT as seen by RealSense" for pixel-level comparison
+    def project_depth_zivid_to_rs(self,depth_zivid_mm: np.ndarray, depth_rs_mm: np.ndarray, finx = 0) -> np.ndarray:
+        # create 3D point cloud from zivid depth
+        XYZ = self.project_camera_to_3d(depth_zivid_mm, CAMERA_MATRIX_ZIVID, DIST_COEFFS_ZIVID)  # (N, 3) array of 3D points in Zivid camera space
+        # save to ply point cloud for visualization
+        #self.save_to_ply(XYZ/1000, f'zivid_original_points_{finx:03d}.ply') # save in meters for visualization
+
+        # project back on imaage RS
+        depth_zivid_projected_mm = self.project_3d_to_camera(XYZ, CAMERA_MATRIX_RS, DIST_COEFFS_RS, frame_size = depth_rs_mm.shape)  # (H, W) depth map of Zivid points projected into RealSense pixel space
+
+        XYZ_RS = self.project_camera_to_3d(depth_zivid_projected_mm, CAMERA_MATRIX_RS, DIST_COEFFS_RS)
+        # save to ply point cloud for visualization
+        #self.save_to_ply(XYZ_RS/1000, f'zivid_projected_points_{finx:03d}.ply') # save in meters for visualization
+
+        return depth_zivid_projected_mm    
+    
+    def show_projection(self, rs_map, zv_map, zv_valid, idx):
+        fig, axes = plt.subplots(1, 3, sharey=True, sharex=True, figsize=(8,4))
+        axes[0].imshow(rs_map, vmin=-10, vmax=1000),axes[0].set_title(f"RealSense Depth Diff (mm)"),
+        axes[1].imshow(zv_map, vmin=-10, vmax=1000),axes[1].set_title(f"Zivid Projected Depth Diff (mm)"),
+        axes[2].imshow(zv_valid, cmap='gray'),axes[2].set_title(f"Valid Mask (Zivid Projection)"),
+        plt.suptitle(f"Sample {idx:03d} Depth Difference Maps and Valid Mask", fontsize=16)
+        plt.tight_layout()
+        plt.show()
+
+
+# --------------------------------
+#%% Tests
+class TestDataSource(unittest.TestCase):
+
+    def test_init_directory(self):
+        p       = DataSource()
+        img_num = p.init_directory()
+        self.assertTrue(img_num > 0)
+
+    def test_get_item(self):
+        p       = DataSource()
+        img_num = p.init_directory()
+        self.assertTrue(img_num > 0)
+        out = p.get_item(0, debug=True)
+        self.assertTrue(len(out["left"]) > 0)
+
+    def test_show_images(self):
+        p       = DataSource()
+        img_num = p.init_directory(r'C:\Work\Data\Depth\Data Collection-02')
+        if img_num == 0:
+            log.warning("No images found.")
+            return
+        for k in np.random.randint(0, img_num, size=min(8, img_num)):
+            out = p.get_item(int(k), debug=True)
+            self.assertTrue(len(out["left"]) > 0)
+            p.show_subset([out["left"], out["right"], out["depth_zivid"], out["depth_rs"], out["rgb"]],
+                          ['left (RS)', 'right (RS)', 'depth Zivid (mm)', 'depth RS (mm)', 'rgb (Zivid)'])
+
+        plt.show()
+
+    def test_get_item_projected(self):
+        p       = DataSource()
+        img_num = p.init_directory(r'C:\Work\Data\Depth\Data Collection-02')
+        self.assertTrue(img_num > 0)
+        #for k in np.random.randint(0, img_num, size=min(12, img_num)):
+        for k in range(0, img_num):
+            out = p.get_item_projected(int(k), debug=False)
+            err = p.compute_depth_error(out["depth_rs"], out["depth_zivid"])
+            self.assertTrue(len(out["left"]) > 0)
+            p.show_subset([out["left"], out["right"], out["depth_zivid"], out["depth_rs"], err],
+                          ['left (RS)', 'right (RS)', 'depth Zivid (mm)', 'depth RS (mm)', 'error (mm)'])
+        plt.show()
+
+
+# --------------------------------
+#%% Run Test
+def RunTest():
+    tst = TestDataSource()
+    #tst.test_get_item()
+    #tst.test_show_images()
+    tst.test_get_item_projected()
+
+
+if __name__ == '__main__':
+    RunTest()
diff --git a/scripts/data_manager_office.py b/scripts/data_manager_office.py
new file mode 100644
index 0000000..3b80123
--- /dev/null
+++ b/scripts/data_manager_office.py
@@ -0,0 +1,274 @@
+'''
+Dataset management for packed Office stereo frames.
+
+Reads `image_d16_*.png` files where channels are packed as:
+    channel 0 -> left IR
+    channel 1 -> right IR
+    channel 2 -> depth (mm)
+
+Expected directory layout:
+    <root>/
+      image_d16_000.png
+      image_d16_001.png
+      ...
+
+Default root:
+    C:\Work\Data\DepthRS\data\pattern_cube
+
+Output dict keys (kept compatible with existing benchmark code):
+    left         : numpy array (H, W) uint16
+    right        : numpy array (H, W) uint16
+    depth_rs     : numpy array (H, W) float32, mm
+    depth_zivid  : numpy array (H, W) float32, mm (mirrors depth_rs for this dataset)
+    rgb          : empty array (no RGB in packed d16 files)
+'''
+
+import glob
+import logging as log
+import os
+import unittest
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+log.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', level=log.INFO)
+
+# ---------------------------------
+# D405
+CAMERA_MATRIX_RS = np.array([
+    [638.77, 0, 644.23],
+    [0, 638.77, 358.049],
+    [0, 0, 1]
+])
+
+DIST_COEFFS_RS = np.array([
+    0.0,
+    -0.0,
+    -0.0,
+    0.0,
+    -0.0
+])
+
+class DataSource:
+    def __init__(self):
+        self.gray_scale_input = False
+        self.imgs = []  # list of packed d16 PNG paths
+        log.info('Source is defined')
+
+    def init_directory(self, input_rectified='', gray_scale_input=False, sub_indexes=None):
+        """Scan root for packed d16 files and populate self.imgs."""
+        if len(input_rectified) < 3:
+            input_rectified = r'C:\Work\Data\DepthRS\data\pattern_cube'
+
+        self.gray_scale_input = gray_scale_input
+
+        if not os.path.isdir(input_rectified):
+            log.error(f"Directory not found: {input_rectified}")
+            self.imgs = []
+            return 0
+
+        # Support flat and nested layouts.
+        self.imgs = sorted(glob.glob(os.path.join(input_rectified, '**', 'image_d16_*.png'), recursive=True))
+
+        if sub_indexes is not None:
+            self.imgs = [self.imgs[i] for i in sub_indexes]
+
+        log.info(f"DataSource: found {len(self.imgs)} samples in {input_rectified}")
+        return len(self.imgs)
+    
+
+    def save_to_ply(self, points: np.ndarray, filename: str):
+        """Save a point cloud to a PLY file for visualization."""
+        with open(filename, 'w') as f:
+            f.write('ply\n')
+            f.write('format ascii 1.0\n')
+            f.write(f'element vertex {len(points)}\n')
+            f.write('property float x\n')
+            f.write('property float y\n')
+            f.write('property float z\n')
+            f.write('end_header\n')
+            for x, y, z in points:
+                f.write(f'{x} {y} {z}\n')
+        log.info(f"Saved point cloud to {filename}")    
+    
+    def project_camera_to_3d(self, depth_img_mm: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray) -> np.ndarray:
+        """Project 2D pixel coordinates with depth to 3D points in camera space."""
+        h, w = depth_img_mm.shape
+        xs, ys = np.meshgrid(np.arange(w, dtype=np.float32), np.arange(h, dtype=np.float32), indexing='xy')
+
+        # OpenCV expects Nx1x2 contiguous float32/float64 image points in (x, y) order.
+        distorted_points = np.stack([xs, ys], axis=-1).reshape(-1, 1, 2).astype(np.float32)
+        undistorted_points = cv2.undistortPoints(distorted_points,  cam_matrix.astype(np.float32),  dist_coeffs.astype(np.float32) )
+
+        uv = undistorted_points.reshape(-1, 2)
+        Z = depth_img_mm.reshape(-1).astype(np.float32)
+        valid = np.isfinite(Z) & (Z > 0)
+        if not np.any(valid):
+            return np.zeros((0, 3), dtype=np.float32)
+
+        uv      = uv[valid]
+        Z       = Z[valid]
+        X       = uv[:, 0] * Z
+        Y       = uv[:, 1] * Z
+
+        # save to ply point cloud for visualization
+        XYZ     = np.stack([X, Y, Z], axis=1).astype(np.float32)
+
+        return XYZ    
+
+    def get_item(self, index: int, debug: bool = False):
+        """Return one sample from packed d16 file as left/right/depth maps."""
+        output_str = {
+            "left": [],
+            "right": [],
+            "depth_rs": [],
+        }
+
+        packed_path = self.imgs[index]
+        packed_img = cv2.imread(packed_path, cv2.IMREAD_UNCHANGED)
+
+        if packed_img is None:
+            log.warning(f"Failed to load sample {index}: {packed_path}")
+            return output_str
+
+        if packed_img.ndim != 3 or packed_img.shape[2] < 3:
+            log.warning(f"Expected 3-channel packed image, got shape={packed_img.shape} at: {packed_path}")
+            return output_str
+
+        left_img = packed_img[:, :, 0]
+        right_img = packed_img[:, :, 1]
+        depth_img = packed_img[:, :, 2].astype(np.float32)
+
+        output_str["left"] = left_img
+        output_str["right"] = right_img
+        output_str["depth_rs"] = depth_img
+
+
+        if debug:
+            self.show_subset(
+                [output_str["left"], output_str["right"], output_str["depth_rs"] ],
+                ['left (packed ch0)', 'right (packed ch1)', 'depth RS (packed ch2, mm)']
+            )
+
+        return output_str
+
+    def get_item_projected(self, index: int, debug: bool = False):
+        """Compatibility wrapper for datasets without Zivid.
+
+        For packed d16 files, depth_zivid is mirrored from depth_rs, so projection is not required.
+        """
+        return self.get_item(index=index, debug=debug)
+
+    def compute_depth_error(self, depth_pred, depth_gt, depth_mask=None):
+        """Compute signed depth error: pred - gt (mm) on valid pixels only."""
+        depth_pred = depth_pred.astype(np.float32)
+        depth_gt = depth_gt.astype(np.float32)
+        depth_error = np.zeros_like(depth_pred)
+        mask = np.ones_like(depth_pred, dtype=bool) if depth_mask is None else depth_mask
+        valid = np.logical_and(depth_gt > 0, mask)
+        valid = np.logical_and(depth_pred > 0, valid)
+        depth_error[valid] = depth_pred[valid] - depth_gt[valid]
+        return depth_error
+
+    def show_subset(self, img_list, ttl_list, vmin=None, vmax=None, save_path='', fig_name=''):
+        """Display a list of images in a compact grid."""
+        img_num = len(img_list)
+        col_num = min(img_num, 3)
+        row_num = (img_num + col_num - 1) // col_num
+        fig, axes = plt.subplots(row_num, col_num, sharey=True, sharex=True)
+        axes = np.array(axes).reshape(row_num, col_num)
+
+        for k in range(img_num):
+            ri, ci = k // col_num, k % col_num
+            axes[ri, ci].imshow(img_list[k], vmin=vmin, vmax=vmax)
+            axes[ri, ci].set_title(ttl_list[k])
+
+        for k in range(img_num, row_num * col_num):
+            axes[k // col_num, k % col_num].axis('off')
+
+        if save_path and os.path.exists(save_path):
+            fig.savefig(os.path.join(save_path, fig_name + '.png'))
+
+        plt.show(block=False)
+
+    def save_data_to_folder(self, output_str, output_directory):
+        """Save sample dict to PNG files on disk."""
+        os.makedirs(output_directory, exist_ok=True)
+
+        paths = {
+            'img_left.png': output_str['left'],
+            'img_right.png': output_str['right'],
+            'img_depth_rs.png': output_str['depth_rs'].astype(np.uint16),
+        }
+
+        success = True
+        for fname, img in paths.items():
+            out = cv2.imwrite(os.path.join(output_directory, fname), img, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+            success = success and out
+
+        if output_str['rgb'] is not None and np.asarray(output_str['rgb']).size > 0:
+            cv2.imwrite(
+                os.path.join(output_directory, 'img_rgb.png'),
+                output_str['rgb'],
+                [cv2.IMWRITE_PNG_COMPRESSION, 0],
+            )
+
+        return success
+
+
+class TestDataSource(unittest.TestCase):
+    def test_init_directory(self):
+        p = DataSource()
+        img_num = p.init_directory(r'C:\Work\Data\DepthRS\data\pattern_cube')
+        self.assertTrue(img_num > 0)
+
+    def test_get_item(self):
+        p = DataSource()
+        img_num = p.init_directory(r'C:\Work\Data\DepthRS\data\pattern_cube')
+        self.assertTrue(img_num > 0)
+        out = p.get_item(0, debug=True)
+        self.assertTrue(len(out['left']) > 0)
+
+    def test_show_images(self):
+        p = DataSource()
+        img_num = p.init_directory(r'C:\Work\Data\DepthRS\data\pattern_cube')
+        if img_num == 0:
+            log.warning('No images found.')
+            return
+
+        for k in np.random.randint(0, img_num, size=min(8, img_num)):
+            out = p.get_item(int(k), debug=True)
+            self.assertTrue(len(out['left']) > 0)
+            p.show_subset(
+                [out['left'], out['right'], out['depth_rs']],
+                ['left', 'right', 'depth_rs(mm)'],
+            )
+
+        plt.show()
+
+    def test_get_item_projected(self):
+        p = DataSource()
+        img_num = p.init_directory(r'C:\Work\Data\DepthRS\data\pattern_cube')
+        self.assertTrue(img_num > 0)
+        for k in np.random.randint(0, img_num, size=min(6, img_num)):
+            out = p.get_item_projected(int(k), debug=True)
+            err = p.compute_depth_error(out['depth_rs'], out['depth_zivid'])
+            self.assertTrue(len(out['left']) > 0)
+            p.show_subset(
+                [out['left'], out['right'], out['depth_zivid'], out['depth_rs'], err],
+                ['left', 'right', 'depth_zivid(mm)', 'depth_rs(mm)', 'error(mm)'],
+            )
+        plt.show()
+
+
+def RunTest():
+    tst = TestDataSource()
+    # tst.test_get_item()
+    tst.test_show_images()
+    #tst.test_get_item_projected()
+
+
+if __name__ == '__main__':
+    RunTest()
diff --git a/scripts/finetune_faro.py b/scripts/finetune_faro.py
new file mode 100644
index 0000000..495dc17
--- /dev/null
+++ b/scripts/finetune_faro.py
@@ -0,0 +1,177 @@
+"""
+Fine-tune FastFoundationStereo on the FARO dataset.
+
+The FARO dataset provides:
+  - img_left.png / img_right.png : uint16 IR stereo images
+  - img_depth_faro.png           : ground-truth depth in mm (FARO scanner)
+
+Strategy:
+  - Freeze the ViT-L backbone (model.feature) to prevent overfitting on 24 samples.
+  - Train everything else with RAFT-style sequence loss over GRU iterations.
+  - IR uint16 images are clipped to [0,255] and replicated to 3 channels.
+  - Depth is converted to disparity: disp = BF / depth_mm  (BF = 49470.45).
+
+Usage:
+  cd /home/administrato/dev/Fast-FoundationStereo
+  python scripts/finetune_faro.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader
+from core.utils.utils import InputPadder
+import Utils as U
+from scripts.data_manager_faro import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+FARO_DIR   = r'/mnt/algonas/Local/Data/Stereo/Faro/FARO_DATA_BASE'  # local path to the dataset
+MODEL_PATH = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+OUT_PATH   = f'{code_dir}/../weights/20-30-48/model_finetuned_faro_office.pth'
+#MODEL_PATH = f'{code_dir}/../weights/20-30-48/model_finetuned_faro_kitchen.pth'
+
+BF         = 49470.45   # focal_px * baseline_mm (calibrated from camera)
+EPOCHS     = 15
+LR         = 2e-5
+ITERS      = 8          # GRU iterations (same as inference)
+GAMMA      = 0.9        # sequence loss weight decay
+
+
+# ── dataset ──────────────────────────────────────────────────────────────────
+
+class FaroDataset(Dataset):
+    def __init__(self, root):
+        self.source = DataSource()
+        n = self.source.init_directory(input_rectified=root, test_keywords=['OFFICE'], split='train')
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data  = self.source.get_item(idx)
+        left  = data['left']
+        right = data['right']
+        depth = data['depth_faro']   # float32, mm
+
+        # uint16 IR → float [0, 255], replicate to 3-channel pseudo-RGB
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)  # H x W x 3
+        right = np.stack([right, right, right], axis=-1)
+
+        # depth (mm) → disparity (pixels):  disp = focal * baseline / depth
+        disp  = np.zeros_like(depth, dtype=np.float32)
+        valid = depth > 0
+        disp[valid] = BF / depth[valid]
+
+        left_t  = torch.from_numpy(left).permute(2, 0, 1).float()   # (3, H, W)
+        right_t = torch.from_numpy(right).permute(2, 0, 1).float()  # (3, H, W)
+        disp_t  = torch.from_numpy(disp).unsqueeze(0).float()       # (1, H, W)
+        valid_t = torch.from_numpy(valid).unsqueeze(0)               # (1, H, W) bool
+
+        return left_t, right_t, disp_t, valid_t, idx
+
+
+# ── loss ─────────────────────────────────────────────────────────────────────
+
+def sequence_loss(disp_preds, disp_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted sum of smooth-L1 losses over GRU iterations."""
+    n    = len(disp_preds)
+    loss = 0.0
+    for i, pred in enumerate(disp_preds):
+        w  = gamma ** (n - 1 - i)
+        gt = disp_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    # load full model object (weights + architecture)
+    logging.info(f"Loading model from {MODEL_PATH}")
+    model = torch.load(MODEL_PATH, map_location='cpu', weights_only=False)
+
+    # freeze the ViT-L backbone — with only 24 samples it would overfit
+    for param in model.feature.parameters():
+        param.requires_grad = False
+    logging.info("ViT backbone frozen.")
+
+    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    total     = sum(p.numel() for p in model.parameters())
+    logging.info(f"Trainable: {trainable:,} / {total:,} parameters")
+
+    model = torch.nn.DataParallel(model, device_ids=[0])
+    model.cuda().train()
+    logging.info("Using DataParallel on GPUs 0 and 1.")
+
+    optimizer = torch.optim.AdamW(
+        [p for p in model.module.parameters() if p.requires_grad], lr=LR, weight_decay=1e-4
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    dataset    = FaroDataset(FARO_DIR)
+    dataloader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4)
+
+    best_loss = float('inf')
+
+    for epoch in range(0,EPOCHS):
+        epoch_loss = 0.0
+
+        for left, right, disp_gt, valid, idx in dataloader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            # pad so H and W are divisible by 32
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            optimizer.zero_grad()
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.module.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss += loss.item()
+            if idx[0] % 10 == 0:
+                logging.info(f"Epoch {epoch+1:3d}/{EPOCHS}  sample {idx[0]:3d}  loss={loss.item():.4f}")
+
+        avg = epoch_loss / len(dataloader)
+        logging.info(f"Epoch {epoch+1:3d}/{EPOCHS}  loss={avg:.4f}")
+
+        if avg < best_loss:
+            best_loss = avg
+            torch.save(model.module, OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth'))
+            logging.info(f"  → saved best model (loss={best_loss:.4f})")
+
+    logging.info(f"Training complete. Best loss: {best_loss:.4f}")
+    logging.info(f"Model saved to {OUT_PATH}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/finetune_inbolt.py b/scripts/finetune_inbolt.py
new file mode 100644
index 0000000..9187190
--- /dev/null
+++ b/scripts/finetune_inbolt.py
@@ -0,0 +1,244 @@
+"""
+Fine-tune FastFoundationStereo on the Inbolt dataset.
+
+The Inbolt dataset provides:
+  - realsense/{idx}/mono0.png  : left IR image  (uint8, 480x640)
+  - realsense/{idx}/mono1.png  : right IR image (uint8, 480x640)
+  - zivid/{idx}/depthmap_mm.png: ground-truth depth in mm (Zivid scanner, 1024x1224)
+
+Strategy:
+  - Freeze the ViT-L backbone (model.feature) to prevent overfitting on small datasets.
+  - Train everything else with RAFT-style sequence loss over GRU iterations.
+  - IR uint8 images are replicated to 3 channels.
+  - Zivid depth is resized to RealSense image resolution before disparity conversion.
+  - Depth is converted to disparity: disp = BF / depth_mm.
+
+Usage:
+  cd /path/to/Fast-FoundationStereo
+  python scripts/finetune_inbolt.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader, random_split
+from core.utils.utils import InputPadder
+import Utils as U
+from scripts.data_manager_inbolt import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+#INBOLT_DIR   = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection'  # local path to the dataset
+INBOLT_DIR   = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection' 
+# MODEL_PATH = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+# OUT_PATH   = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415.pth'
+MODEL_PATH = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+OUT_PATH   = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415.pth'
+
+
+# BF         = 49.8624*385.73  # D435 - focal_px * baseline_mm (calibrated from camera)  # D435 - focal_px * baseline_mm (calibrated from camera)
+BF         = 50.102706998586 * 385.509887695312 # new data
+EPOCHS     = 120
+LR         = 2e-5
+ITERS      = 8          # GRU iterations (same as inference)
+GAMMA      = 0.9        # sequence loss weight decay
+TRAIN_RATIO = 0.75
+SPLIT_SEED  = 0
+
+
+# ── dataset ──────────────────────────────────────────────────────────────────
+
+class InboltDataset(Dataset):
+    def __init__(self, root):
+        self.source = DataSource()
+        n = self.source.init_directory(input_rectified=root)
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data  = self.source.get_item_projected(idx)
+        left  = data['left']
+        right = data['right']
+        depth = data['depth_zivid']   # float32, mm  (Zivid resolution)
+
+        # Resize Zivid depth to match RealSense stereo image resolution
+        h, w  = left.shape[:2]
+        if depth.shape != (h, w):
+            depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        # IR uint8 → float [0, 255], replicate to 3-channel pseudo-RGB
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)  # H x W x 3
+        right = np.stack([right, right, right], axis=-1)
+
+        # depth (mm) → disparity (pixels):  disp = focal * baseline / depth
+        disp  = np.zeros_like(depth, dtype=np.float32)
+        valid = depth > 0
+        disp[valid] = BF / depth[valid]
+
+        left_t  = torch.from_numpy(left).permute(2, 0, 1).float()   # (3, H, W)
+        right_t = torch.from_numpy(right).permute(2, 0, 1).float()  # (3, H, W)
+        disp_t  = torch.from_numpy(disp).unsqueeze(0).float()       # (1, H, W)
+        valid_t = torch.from_numpy(valid).unsqueeze(0)               # (1, H, W) bool
+
+        return left_t, right_t, disp_t, valid_t
+
+
+# ── loss ─────────────────────────────────────────────────────────────────────
+
+def sequence_loss(disp_preds, disp_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted sum of smooth-L1 losses over GRU iterations."""
+    n    = len(disp_preds)
+    loss = 0.0
+    for i, pred in enumerate(disp_preds):
+        w  = gamma ** (n - 1 - i)
+        gt = disp_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+def evaluate_split_loss(model, dataloader):
+    """Evaluate average sequence loss over a dataloader (no gradient updates)."""
+    if len(dataloader) == 0:
+        return float('nan')
+
+    model.eval()
+    total_loss = 0.0
+
+    with torch.no_grad():
+        for left, right, disp_gt, valid in dataloader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            total_loss += loss.item()
+
+    model.train()
+    return total_loss / len(dataloader)
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    # load full model object (weights + architecture)
+    logging.info(f"Loading model from {MODEL_PATH}")
+    model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+
+    # freeze the ViT-L backbone — with only 24 samples it would overfit
+    for param in model.feature.parameters():
+        param.requires_grad = False
+    logging.info("ViT backbone frozen.")
+
+    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    total     = sum(p.numel() for p in model.parameters())
+    logging.info(f"Trainable: {trainable:,} / {total:,} parameters")
+
+    model = model.cuda().train()
+    logging.info("Model on single GPU.")
+
+    optimizer = torch.optim.AdamW(
+        [p for p in model.parameters() if p.requires_grad], lr=LR, weight_decay=1e-4
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    dataset = InboltDataset(INBOLT_DIR)
+    n_total = len(dataset)
+
+    if n_total < 2:
+        raise RuntimeError(f"Need at least 2 samples for a 75/25 train/test split, got {n_total}.")
+
+    n_train = int(round(TRAIN_RATIO * n_total))
+    n_train = min(max(1, n_train), n_total - 1)
+    n_test = n_total - n_train
+
+    split_generator = torch.Generator().manual_seed(SPLIT_SEED)
+    train_set, test_set = random_split(dataset, [n_train, n_test], generator=split_generator)
+
+    train_loader = DataLoader(train_set, batch_size=1, shuffle=True, num_workers=0)
+    test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)
+
+    logging.info(
+        f"Random split with seed={SPLIT_SEED}: total={n_total}, train={len(train_set)} ({100.0*len(train_set)/n_total:.1f}%), "
+        f"test={len(test_set)} ({100.0*len(test_set)/n_total:.1f}%)"
+    )
+
+    best_loss = float('inf')
+
+    for epoch in range(EPOCHS):
+        epoch_loss = 0.0
+
+        for left, right, disp_gt, valid in train_loader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            # pad so H and W are divisible by 32
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            optimizer.zero_grad(set_to_none=True)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss += loss.item()
+
+        train_loss = epoch_loss / len(train_loader)
+        train_eval_error = evaluate_split_loss(model, train_loader)
+        test_eval_error = evaluate_split_loss(model, test_loader)
+
+        logging.info(
+            f"Epoch {epoch+1:3d}/{EPOCHS}  train_loss={train_loss:.4f}  "
+            f"train_eval_error={train_eval_error:.4f}  test_eval_error={test_eval_error:.4f}"
+        )
+
+        if test_eval_error < best_loss:
+            best_loss = test_eval_error
+            torch.save(model, OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth'))
+            logging.info(f"  → saved best model (test_eval_error={best_loss:.4f})")
+
+    final_train_error = evaluate_split_loss(model, train_loader)
+    final_test_error = evaluate_split_loss(model, test_loader)
+    logging.info(f"Final train error: {final_train_error:.4f}")
+    logging.info(f"Final test error:  {final_test_error:.4f}")
+    logging.info(f"Training complete. Best test error: {best_loss:.4f}")
+    logging.info(f"Model saved to {OUT_PATH}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/finetune_inbolt_depthrs.py b/scripts/finetune_inbolt_depthrs.py
new file mode 100644
index 0000000..22f06d0
--- /dev/null
+++ b/scripts/finetune_inbolt_depthrs.py
@@ -0,0 +1,553 @@
+"""
+Fine-tune FastFoundationStereo + RealSense depth fusion on the Inbolt dataset.
+
+Extends finetune_inbolt.py with three-stage depth fusion as specified in
+docs/superpowers/specs/2026-04-30-depth-fusion-design.md:
+
+  Stage 1 - DepthEncoder features fused into left image features (early)
+  Stage 2 - Disparity prior Gaussian bias on cost-volume logits (mid)
+  Stage 3 - Learned blend of stereo soft-argmax and depth prior for GRU init (late)
+
+The Inbolt dataset provides:
+  - realsense/{idx}/mono0.png        : left IR image  (uint8, 480x640)
+  - realsense/{idx}/mono1.png        : right IR image (uint8, 480x640)
+  - realsense/{idx}/depthmap_mm.png  : RealSense depth in mm  ← fusion input
+  - zivid/{idx}/depthmap_mm.png      : GT depth in mm (Zivid scanner)
+
+Freezing strategy:
+  Frozen    : EdgeNeXt backbone (stereo.feature.stem, stereo.feature.stages)
+  Full LR   : DepthEncoder, DepthFusionModule, DepthInitBlend, depth_sigma, depth_prior_scale
+  0.1x LR   : all other trainable stereo parameters
+
+Usage:
+  cd /path/to/Fast-FoundationStereo
+  python scripts/finetune_inbolt_depthrs.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader, random_split
+from core.utils.utils import InputPadder
+from core.submodule import (
+    BasicConv,
+    disparity_regression,
+    build_gwc_volume_optimized_pytorch1,
+    build_concat_volume_optimized_pytorch1,
+)
+from core.geometry import Combined_Geo_Encoding_Volume
+from core.foundation_stereo import normalize_image
+import Utils as U
+from scripts.data_manager_inbolt import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+INBOLT_DIR  = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+MODEL_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+OUT_PATH    = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_depthrs.pth'
+
+BF            = 50.102706998586 * 385.509887695312   # focal_px * baseline_mm
+FOCAL_PX      = 385.509887695312
+BASELINE_MM   = 50.102706998586
+
+DEPTH_NORM_M  = 5.0    # RealSense depth clipped and normalised to [0, 1] over 0–5 m
+
+EPOCHS        = 120
+LR            = 2e-5
+ITERS         = 8
+GAMMA         = 0.9
+TRAIN_RATIO   = 0.75
+SPLIT_SEED    = 0
+
+
+# ── depth fusion modules ──────────────────────────────────────────────────────
+
+class DepthEncoder(nn.Module):
+    """4-scale CNN encoder for a (normalised depth + validity) 2-channel input."""
+    depth_chans = [32, 64, 96, 128]
+
+    def __init__(self):
+        super().__init__()
+        self.stem   = BasicConv(2,   32,  kernel_size=3, stride=2, padding=1, bn=True, relu=True)
+        self.stage1 = BasicConv(32,  32,  kernel_size=3, stride=2, padding=1, bn=True, relu=True)
+        self.stage2 = BasicConv(32,  64,  kernel_size=3, stride=2, padding=1, bn=True, relu=True)
+        self.stage3 = BasicConv(64,  96,  kernel_size=3, stride=2, padding=1, bn=True, relu=True)
+        self.stage4 = BasicConv(96,  128, kernel_size=3, stride=2, padding=1, bn=True, relu=True)
+
+    def forward(self, x):
+        # x: (B, 2, H, W) — ch0: normalised depth, ch1: validity mask
+        validity = x[:, 1:2]  # (B, 1, H, W)
+
+        # build per-scale validity masks via 2x2 max-pool chaining
+        v2  = F.max_pool2d(validity, kernel_size=2, stride=2)   # H/2
+        v4  = F.max_pool2d(v2,  kernel_size=2, stride=2)        # H/4
+        v8  = F.max_pool2d(v4,  kernel_size=2, stride=2)        # H/8
+        v16 = F.max_pool2d(v8,  kernel_size=2, stride=2)        # H/16
+        v32 = F.max_pool2d(v16, kernel_size=2, stride=2)        # H/32
+
+        x2  = self.stem(x)              # (B, 32,  H/2,  W/2)
+        d4  = self.stage1(x2)  * v4    # (B, 32,  H/4,  W/4)
+        d8  = self.stage2(d4)  * v8    # (B, 64,  H/8,  W/8)
+        d16 = self.stage3(d8)  * v16   # (B, 96,  H/16, W/16)
+        d32 = self.stage4(d16) * v32   # (B, 128, H/32, W/32)
+
+        return [d4, d8, d16, d32]
+
+
+class DepthFusionModule(nn.Module):
+    """Per-scale zero-init residual projection that adds depth features into left RGB features."""
+
+    def __init__(self, feat_dims):
+        super().__init__()
+        dc = DepthEncoder.depth_chans   # [32, 64, 96, 128]
+        self.proj_4  = nn.Conv2d(dc[0], feat_dims[0], 1)
+        self.proj_8  = nn.Conv2d(dc[1], feat_dims[1], 1)
+        self.proj_16 = nn.Conv2d(dc[2], feat_dims[2], 1)
+        self.proj_32 = nn.Conv2d(dc[3], feat_dims[3], 1)
+        for proj in [self.proj_4, self.proj_8, self.proj_16, self.proj_32]:
+            nn.init.zeros_(proj.weight)
+            nn.init.zeros_(proj.bias)
+
+    def forward(self, left_feats, depth_feats):
+        projs = [self.proj_4, self.proj_8, self.proj_16, self.proj_32]
+        return [left_feats[i] + projs[i](depth_feats[i]) for i in range(4)]
+
+
+class DepthInitBlend(nn.Module):
+    """3-layer CNN that blends stereo soft-argmax and depth prior for GRU init."""
+
+    def __init__(self):
+        super().__init__()
+        self.net = nn.Sequential(
+            BasicConv(3, 16, kernel_size=3, padding=1, bn=True, relu=True),
+            BasicConv(16, 16, kernel_size=3, padding=1, bn=True, relu=True),
+            nn.Conv2d(16, 1, kernel_size=1),
+        )
+        nn.init.zeros_(self.net[-1].weight)
+        nn.init.constant_(self.net[-1].bias, -5.0)
+
+    def forward(self, stereo_init, disp_prior, validity):
+        # all inputs: (B, 1, H/4, W/4)
+        x = torch.cat([stereo_init, disp_prior, validity], dim=1)  # (B, 3, H/4, W/4)
+        blend_w = torch.sigmoid(self.net(x))
+        blend_w = blend_w * validity                                # 0 where depth invalid
+        return blend_w * disp_prior + (1.0 - blend_w) * stereo_init
+
+
+# ── model wrapper ─────────────────────────────────────────────────────────────
+
+class FastFoundationStereoDepthRS(nn.Module):
+    """
+    Wraps a pretrained FastFoundationStereo and adds RealSense depth fusion
+    at three stages of the pipeline.
+
+    The wrapped stereo model is stored as self.stereo; all new modules live
+    directly on this wrapper so parameter groups are easy to separate.
+    """
+
+    def __init__(self, stereo_model):
+        super().__init__()
+        self.stereo = stereo_model
+        self.args   = stereo_model.args
+
+        feat_dims = stereo_model.feature.d_out   # [224, 192, 320, 304] for vitl
+        self.depth_encoder    = DepthEncoder()
+        self.depth_fusion     = DepthFusionModule(feat_dims)
+        self.depth_init_blend = DepthInitBlend()
+
+        # Stage-2 learned scalars: prior sharpness (σ) and weight (α)
+        self.depth_sigma       = nn.Parameter(torch.tensor(4.0))   # disparity units at 1/4 scale
+        self.depth_prior_scale = nn.Parameter(torch.tensor(0.1))   # starts small → near-stereo behaviour
+
+    # ------------------------------------------------------------------
+    def _preprocess_depth(self, depth_rs_mm):
+        """
+        depth_rs_mm: (B, 1, H, W) float32, RealSense depth in millimetres.
+        Returns:
+          depth_rs_m   (B, 1, H, W) — depth in metres (invalid pixels = 0)
+          validity     (B, 1, H, W) — 1 where depth is valid, else 0
+          enc_input    (B, 2, H, W) — [normalised_depth, validity] for DepthEncoder
+        """
+        validity    = ((depth_rs_mm > 0) & torch.isfinite(depth_rs_mm)).float()
+        depth_rs_m  = (depth_rs_mm / 1000.0) * validity
+        d_norm      = depth_rs_m.clamp(0, DEPTH_NORM_M) / DEPTH_NORM_M
+        enc_input   = torch.cat([d_norm, validity], dim=1)
+        return depth_rs_m, validity, enc_input
+
+    # ------------------------------------------------------------------
+    def upsample_disp(self, disp, mask_feat_4, stem_2x):
+        return self.stereo.upsample_disp(disp, mask_feat_4, stem_2x)
+
+    # ------------------------------------------------------------------
+    def forward(
+        self,
+        image1,
+        image2,
+        depth_rs_mm=None,
+        focal=FOCAL_PX,
+        baseline_mm=BASELINE_MM,
+        iters=12,
+        test_mode=False,
+        low_memory=False,
+        optimize_build_volume='pytorch1',
+    ):
+        s = self.stereo   # shorthand
+        B, C, H, W = image1.shape
+        low_memory  = low_memory or self.args.get('low_memory', False)
+
+        image1 = normalize_image(image1)
+        image2 = normalize_image(image2)
+
+        with torch.amp.autocast('cuda', enabled=self.args.mixed_precision, dtype=U.AMP_DTYPE):
+
+            # ── feature extraction ────────────────────────────────────
+            out            = s.feature(torch.cat([image1, image2], dim=0))
+            features_left  = [o[:B] for o in out]
+            features_right = [o[B:] for o in out]
+            stem_2x        = s.stem_2(image1)
+
+            # ── Stage 1: depth feature fusion ─────────────────────────
+            disp_prior_14 = None
+            validity_14   = None
+
+            if depth_rs_mm is not None:
+                depth_rs_m, validity, enc_input = self._preprocess_depth(depth_rs_mm)
+                depth_feats    = self.depth_encoder(enc_input)
+                features_left  = self.depth_fusion(features_left, depth_feats)
+
+            # ── cost volume (unchanged) ───────────────────────────────
+            gwc_volume = build_gwc_volume_optimized_pytorch1(
+                features_left[0], features_right[0],
+                self.args.max_disp // 4, s.cv_group,
+                normalize=self.args.normalize,
+            )
+            left_tmp       = s.proj_cmb(features_left[0])
+            right_tmp      = s.proj_cmb(features_right[0])
+            concat_volume  = build_concat_volume_optimized_pytorch1(left_tmp, right_tmp, maxdisp=self.args.max_disp // 4)
+            del left_tmp, right_tmp
+
+            comb_volume = torch.cat([gwc_volume, concat_volume], dim=1)
+            del concat_volume, gwc_volume
+
+            comb_volume = s.corr_stem(comb_volume)
+            comb_volume = s.corr_feature_att(comb_volume, features_left[0])
+            comb_volume = s.cost_agg(comb_volume, features_left)
+
+            # ── Stage 2: disparity prior bias on logits ───────────────
+            logits = s.classifier(comb_volume).squeeze(1)   # (B, D, H/4, W/4)
+
+            if depth_rs_mm is not None:
+                D = logits.shape[1]
+                # Use torch.where so invalid pixels get disp_prior=0.
+                # depth_rs_m is in metres; baseline_mm is in mm → divide by 1000
+                # for consistent units: focal(px) * baseline_m(m) / depth_m(m) = px.
+                disp_prior    = torch.where(
+                    validity > 0.5,
+                    (focal * baseline_mm / 1000.0) / depth_rs_m.clamp(min=1e-3),
+                    torch.zeros_like(depth_rs_m),
+                )
+                disp_prior_14 = F.interpolate(disp_prior / 4.0, size=(H // 4, W // 4), mode='nearest')
+                validity_14   = F.interpolate(validity,          size=(H // 4, W // 4), mode='nearest')
+
+                d_idx      = torch.arange(D, device=logits.device, dtype=logits.dtype).view(1, D, 1, 1)
+                sigma      = self.depth_sigma.abs().clamp(min=0.1)
+                prior_bias = -0.5 * ((d_idx - disp_prior_14) / sigma) ** 2
+                prior_bias = prior_bias * validity_14      # flat where depth invalid
+
+                logits = logits + self.depth_prior_scale * prior_bias
+
+            prob        = F.softmax(logits, dim=1)
+            stereo_init = disparity_regression(prob, self.args.max_disp // 4)  # (B,1,H/4,W/4)
+
+            # ── Stage 3: GRU init blend ───────────────────────────────
+            if depth_rs_mm is not None:
+                init_disp = self.depth_init_blend(stereo_init, disp_prior_14, validity_14)
+            else:
+                init_disp = stereo_init
+
+            # ── context network (unchanged) ───────────────────────────
+            cnet_list = s.cnet(features_left[0], features_left[1], features_left[2])
+            cnet_list = list(cnet_list)
+            net_list  = [torch.tanh(x[0]) for x in cnet_list]
+            inp_list  = [torch.relu(x[1]) for x in cnet_list]
+            inp_list  = [s.cam(x) * x for x in inp_list]
+            att       = [s.sam(x) for x in inp_list]
+
+        # ── geometry encoding volume ──────────────────────────────────
+        geo_fn = Combined_Geo_Encoding_Volume(
+            features_left[0].to(s.dtype),
+            features_right[0].to(s.dtype),
+            comb_volume.to(s.dtype),
+            num_levels=self.args.corr_levels,
+        )
+        b, c, h, w = features_left[0].shape
+        coords = torch.arange(w, dtype=torch.float, device=init_disp.device).reshape(1, 1, w, 1).repeat(b, h, 1, 1)
+        disp   = init_disp.to(s.dtype)
+        disp_preds = []
+
+        del comb_volume, features_left, features_right, cnet_list
+
+        # ── GRU iterations (unchanged) ────────────────────────────────
+        for itr in range(iters):
+            disp     = disp.detach()
+            geo_feat = geo_fn(disp, coords, dx=s.dx, low_memory=low_memory)
+
+            with torch.amp.autocast('cuda', enabled=self.args.mixed_precision, dtype=U.AMP_DTYPE):
+                net_list, mask_feat_4, delta_disp = s.update_block(
+                    net_list, inp_list, geo_feat.to(s.dtype), disp, att
+                )
+
+            disp = disp + delta_disp.to(s.dtype)
+            if test_mode and itr < iters - 1:
+                continue
+
+            disp_up = self.upsample_disp(disp.to(s.dtype), mask_feat_4.to(s.dtype), stem_2x.to(s.dtype))
+            disp_preds.append(disp_up)
+
+        if test_mode:
+            return disp_up
+
+        return init_disp, disp_preds
+
+
+# ── dataset ───────────────────────────────────────────────────────────────────
+
+class InboltDepthDataset(Dataset):
+    """
+    Like InboltDataset but also returns the RealSense depth map (mm) as a
+    fourth tensor — used as the depth fusion input during training.
+    """
+
+    def __init__(self, root):
+        self.source = DataSource()
+        n = self.source.init_directory(input_rectified=root)
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data        = self.source.get_item_projected(idx)
+        left        = data['left']
+        right       = data['right']
+        depth_zivid = data['depth_zivid']   # GT float32 mm (Zivid resolution)
+        depth_rs    = data['depth_rs']       # RealSense float32 mm (model input)
+
+        h, w = left.shape[:2]
+
+        if depth_zivid.shape != (h, w):
+            depth_zivid = cv2.resize(depth_zivid, (w, h), interpolation=cv2.INTER_NEAREST)
+        if depth_rs.shape != (h, w):
+            depth_rs = cv2.resize(depth_rs, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        # IR uint8 → float [0, 255], replicated to 3-channel pseudo-RGB
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)
+        right = np.stack([right, right, right], axis=-1)
+
+        # Zivid GT depth (mm) → disparity (pixels)
+        disp  = np.zeros_like(depth_zivid, dtype=np.float32)
+        valid = depth_zivid > 0
+        disp[valid] = BF / depth_zivid[valid]
+
+        left_t     = torch.from_numpy(left).permute(2, 0, 1).float()      # (3, H, W)
+        right_t    = torch.from_numpy(right).permute(2, 0, 1).float()     # (3, H, W)
+        disp_t     = torch.from_numpy(disp).unsqueeze(0).float()          # (1, H, W)
+        valid_t    = torch.from_numpy(valid).unsqueeze(0)                  # (1, H, W) bool
+        depth_rs_t = torch.from_numpy(depth_rs).unsqueeze(0).float()      # (1, H, W) mm
+
+        return left_t, right_t, disp_t, valid_t, depth_rs_t
+
+
+# ── loss ──────────────────────────────────────────────────────────────────────
+
+def sequence_loss(disp_preds, disp_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted sum of smooth-L1 losses over GRU iterations."""
+    n    = len(disp_preds)
+    loss = 0.0
+    for i, pred in enumerate(disp_preds):
+        w = gamma ** (n - 1 - i)
+        gt = disp_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        if not v.any():
+            continue
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+def evaluate_split_loss(model, dataloader):
+    """Evaluate average sequence loss over a dataloader (no gradient updates)."""
+    if len(dataloader) == 0:
+        return float('nan')
+
+    model.eval()
+    total_loss = 0.0
+
+    with torch.no_grad():
+        for left, right, disp_gt, valid, depth_rs in dataloader:
+            left, right       = left.cuda(), right.cuda()
+            disp_gt, valid    = disp_gt.cuda(), valid.cuda()
+            depth_rs          = depth_rs.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p, depth_rs_p = padder.pad(left, right, depth_rs)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, depth_rs_mm=depth_rs_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            total_loss += loss.item()
+
+    model.train()
+    return total_loss / len(dataloader)
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    # ── load and wrap pretrained stereo model ─────────────────────────
+    logging.info(f"Loading base stereo model from {MODEL_PATH}")
+    stereo_model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+    model = FastFoundationStereoDepthRS(stereo_model).cuda()
+    logging.info("Model wrapped with depth fusion modules.")
+
+    # ── freeze EdgeNeXt backbone ──────────────────────────────────────
+    for param in model.stereo.feature.stem.parameters():
+        param.requires_grad = False
+    for param in model.stereo.feature.stages.parameters():
+        param.requires_grad = False
+    logging.info("EdgeNeXt backbone (stem + stages) frozen.")
+
+    # ── two-group optimizer: new modules at full LR, rest at 0.1× LR ─
+    new_params = (
+        list(model.depth_encoder.parameters())    +
+        list(model.depth_fusion.parameters())     +
+        list(model.depth_init_blend.parameters()) +
+        [model.depth_sigma, model.depth_prior_scale]
+    )
+    new_param_ids = {id(p) for p in new_params}
+    finetune_params = [
+        p for p in model.parameters()
+        if p.requires_grad and id(p) not in new_param_ids
+    ]
+
+    trainable_new      = sum(p.numel() for p in new_params)
+    trainable_finetune = sum(p.numel() for p in finetune_params)
+    total              = sum(p.numel() for p in model.parameters())
+    logging.info(
+        f"Parameters — new (full LR): {trainable_new:,}  "
+        f"fine-tune (0.1× LR): {trainable_finetune:,}  "
+        f"frozen: {total - trainable_new - trainable_finetune:,}  "
+        f"total: {total:,}"
+    )
+
+    optimizer = torch.optim.AdamW(
+        [
+            {'params': new_params,       'lr': LR},
+            {'params': finetune_params,  'lr': LR * 0.1},
+        ],
+        weight_decay=1e-4,
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    # ── dataset and splits ────────────────────────────────────────────
+    dataset = InboltDepthDataset(INBOLT_DIR)
+    n_total = len(dataset)
+    if n_total < 2:
+        raise RuntimeError(f"Need at least 2 samples for train/test split, got {n_total}.")
+
+    n_train = min(max(1, int(round(TRAIN_RATIO * n_total))), n_total - 1)
+    n_test  = n_total - n_train
+
+    split_gen = torch.Generator().manual_seed(SPLIT_SEED)
+    train_set, test_set = random_split(dataset, [n_train, n_test], generator=split_gen)
+
+    train_loader = DataLoader(train_set, batch_size=1, shuffle=True,  num_workers=0)
+    test_loader  = DataLoader(test_set,  batch_size=1, shuffle=False, num_workers=0)
+
+    logging.info(
+        f"Split (seed={SPLIT_SEED}): total={n_total}, "
+        f"train={len(train_set)} ({100.0*len(train_set)/n_total:.1f}%), "
+        f"test={len(test_set)} ({100.0*len(test_set)/n_total:.1f}%)"
+    )
+
+    model.train()
+    best_loss = float('inf')
+
+    # ── training loop ─────────────────────────────────────────────────
+    for epoch in range(EPOCHS):
+        epoch_loss = 0.0
+
+        for left, right, disp_gt, valid, depth_rs in train_loader:
+            left, right    = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+            depth_rs       = depth_rs.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p, depth_rs_p = padder.pad(left, right, depth_rs)
+
+            optimizer.zero_grad(set_to_none=True)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, depth_rs_mm=depth_rs_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss += loss.item()
+
+        train_loss       = epoch_loss / len(train_loader)
+        train_eval_error = evaluate_split_loss(model, train_loader)
+        test_eval_error  = evaluate_split_loss(model, test_loader)
+
+        logging.info(
+            f"Epoch {epoch+1:3d}/{EPOCHS}  "
+            f"train_loss={train_loss:.4f}  "
+            f"train_eval={train_eval_error:.4f}  "
+            f"test_eval={test_eval_error:.4f}  "
+            f"depth_sigma={model.depth_sigma.item():.3f}  "
+            f"depth_prior_scale={model.depth_prior_scale.item():.4f}"
+        )
+
+        if test_eval_error < best_loss:
+            best_loss = test_eval_error
+            save_path = OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth')
+            torch.save(model, save_path)
+            logging.info(f"  → saved best model (test_eval={best_loss:.4f}) to {save_path}")
+
+    final_train = evaluate_split_loss(model, train_loader)
+    final_test  = evaluate_split_loss(model, test_loader)
+    logging.info(f"Final train error: {final_train:.4f}")
+    logging.info(f"Final test  error: {final_test:.4f}")
+    logging.info(f"Best  test  error: {best_loss:.4f}")
+    torch.save(model, OUT_PATH)
+    logging.info(f"Final model saved to {OUT_PATH}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/finetune_inbolt_depthrs_2.py b/scripts/finetune_inbolt_depthrs_2.py
new file mode 100644
index 0000000..bd0f9eb
--- /dev/null
+++ b/scripts/finetune_inbolt_depthrs_2.py
@@ -0,0 +1,530 @@
+"""
+Fine-tune FastFoundationStereo + RealSense depth fusion v2 on the Inbolt dataset.
+
+Architecture (no DepthEncoder / no DepthFusionModule — feature extraction fully frozen):
+
+  INPUTS
+    left_IR  (B,3,H,W)
+    right_IR (B,3,H,W)
+    depth_rs_mm (B,1,H,W)
+
+  FEATURE EXTRACTION  [FROZEN — stereo.feature + stereo.stem_2]
+    EdgeNeXt backbone + FPN decoder
+    → feat_left[x4,x8,x16,x32], feat_right[x4,x8,x16,x32], stem_2x
+
+  DEPTH PREPROCESSING  [no learned params]
+    validity M = (depth_rs_mm > 0) & isfinite
+    depth_rs_m = depth_rs_mm / 1000.0 · M
+    disp_prior = focal·baseline_m / depth_rs_m   (full-res pixels)
+    disp_prior_14 = disp_prior / 4               (1/4-scale pixels)
+    validity_14   = F.interpolate(M, H/4, W/4)
+
+  COST VOLUME  [fine-tune 0.1× LR]
+    feat_left[0]+feat_right[0] → GWC+Concat → hourglass → logits (B,D,H/4,W/4)
+
+  STAGE 2 — GAUSSIAN LOGIT PRIOR  [learnable σ, α — full LR]
+    prior_bias = −½·((d_idx − disp_prior_14) / σ)² · validity_14
+    logits' = logits + α·prior_bias
+    → stereo_init = softmax → disparity_regression   (B,1,H/4,W/4)
+
+  STAGE 3a — GRU INIT BLEND  [DepthInitBlend ~5K — full LR]
+    concat(stereo_init, disp_prior_14, validity_14)
+    → sigmoid (3-layer CNN) → blend_w · validity_14
+    → init_disp = blend_w·disp_prior_14 + (1−blend_w)·stereo_init
+
+  CONTEXT NETWORK + GRU ITERATIONS  [fine-tune 0.1× LR]
+    init_disp → 8× GRU update → disp_up (B,1,H,W)
+
+  STAGE 3b — OUTPUT BLEND  [DepthOutputBlend ~8K — full LR]  ← NEW
+    concat(disp_up, disp_prior_full, validity)
+    → sigmoid (3-layer CNN) → blend_w_out · validity
+    → disp_final = disp_up + blend_w_out·(disp_prior_full − disp_up)
+
+  OUTPUT: disp_final (B,1,H,W)
+
+Freezing strategy:
+  Frozen    : stereo.feature (all submodules), stereo.stem_2
+  Full LR   : DepthInitBlend, DepthOutputBlend, depth_sigma, depth_prior_scale
+  0.1× LR   : all other trainable stereo params
+
+Usage:
+  cd /path/to/Fast-FoundationStereo
+  python scripts/finetune_inbolt_depthrs_2.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader, random_split
+from core.utils.utils import InputPadder
+from core.submodule import (
+    BasicConv,
+    disparity_regression,
+    build_gwc_volume_optimized_pytorch1,
+    build_concat_volume_optimized_pytorch1,
+)
+from core.geometry import Combined_Geo_Encoding_Volume
+from core.foundation_stereo import normalize_image
+import Utils as U
+from scripts.data_manager_inbolt import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+INBOLT_DIR  = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+MODEL_PATH  = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth'
+OUT_PATH    = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_depthrs_v2.pth'
+
+BF            = 50.102706998586 * 385.509887695312   # focal_px * baseline_mm
+FOCAL_PX      = 385.509887695312
+BASELINE_MM   = 50.102706998586
+
+EPOCHS        = 120
+LR            = 2e-5
+ITERS         = 8
+GAMMA         = 0.9
+TRAIN_RATIO   = 0.75
+SPLIT_SEED    = 0
+
+
+# ── depth fusion modules ──────────────────────────────────────────────────────
+
+class DepthInitBlend(nn.Module):
+    """Blend stereo soft-argmax init with depth prior at 1/4-scale for GRU warm start."""
+
+    def __init__(self):
+        super().__init__()
+        self.net = nn.Sequential(
+            BasicConv(3, 16, kernel_size=3, padding=1, bn=True, relu=True),
+            BasicConv(16, 16, kernel_size=3, padding=1, bn=True, relu=True),
+            nn.Conv2d(16, 1, kernel_size=1),
+        )
+        nn.init.zeros_(self.net[-1].weight)
+        nn.init.constant_(self.net[-1].bias, -5.0)   # sigmoid(-5) ≈ 0 → no blend at init
+
+    def forward(self, stereo_init, disp_prior, validity):
+        # all inputs: (B, 1, H/4, W/4)
+        x = torch.cat([stereo_init, disp_prior, validity], dim=1)
+        blend_w = torch.sigmoid(self.net(x)) * validity
+        return blend_w * disp_prior + (1.0 - blend_w) * stereo_init
+
+
+class DepthOutputBlend(nn.Module):
+    """Refine full-resolution GRU output with depth prior (residual, zero-init)."""
+
+    def __init__(self):
+        super().__init__()
+        self.net = nn.Sequential(
+            BasicConv(3, 32, kernel_size=3, padding=1, bn=True, relu=True),
+            BasicConv(32, 32, kernel_size=3, padding=1, bn=True, relu=True),
+            nn.Conv2d(32, 1, kernel_size=1),
+        )
+        nn.init.zeros_(self.net[-1].weight)
+        nn.init.constant_(self.net[-1].bias, -5.0)   # no correction at init
+
+    def forward(self, disp_stereo, disp_prior_full, validity):
+        # all inputs: (B, 1, H, W)
+        x = torch.cat([disp_stereo, disp_prior_full, validity], dim=1)
+        blend_w = torch.sigmoid(self.net(x)) * validity
+        return disp_stereo + blend_w * (disp_prior_full - disp_stereo)
+
+
+# ── model wrapper ─────────────────────────────────────────────────────────────
+
+class FastFoundationStereoDepthRS_v2(nn.Module):
+    """
+    Wraps a pretrained FastFoundationStereo and adds RealSense depth fusion
+    at three pipeline stages.  Feature extraction (stereo.feature + stereo.stem_2)
+    is expected to be frozen before training.
+
+    New modules (no DepthEncoder / no DepthFusionModule):
+      depth_sigma, depth_prior_scale  — Stage 2 Gaussian prior on logits
+      DepthInitBlend                  — Stage 3a GRU warm-start blend
+      DepthOutputBlend                — Stage 3b full-resolution output correction
+    """
+
+    def __init__(self, stereo_model):
+        super().__init__()
+        self.stereo = stereo_model
+        self.args   = stereo_model.args
+
+        self.depth_init_blend   = DepthInitBlend()
+        self.depth_output_blend = DepthOutputBlend()
+
+        self.depth_sigma       = nn.Parameter(torch.tensor(4.0))
+        self.depth_prior_scale = nn.Parameter(torch.tensor(0.1))
+
+    # ------------------------------------------------------------------
+    def _preprocess_depth(self, depth_rs_mm):
+        """
+        Returns:
+          depth_rs_m  (B,1,H,W) metres, zeros where invalid
+          validity    (B,1,H,W) float 0/1
+          disp_prior  (B,1,H,W) full-res disparity in pixels, zeros where invalid
+        """
+        validity   = ((depth_rs_mm > 0) & torch.isfinite(depth_rs_mm)).float()
+        depth_rs_m = (depth_rs_mm / 1000.0) * validity
+        disp_prior = torch.where(
+            validity > 0.5,
+            (FOCAL_PX * BASELINE_MM / 1000.0) / depth_rs_m.clamp(min=1e-3),
+            torch.zeros_like(depth_rs_m),
+        )
+        return depth_rs_m, validity, disp_prior
+
+    # ------------------------------------------------------------------
+    def upsample_disp(self, disp, mask_feat_4, stem_2x):
+        return self.stereo.upsample_disp(disp, mask_feat_4, stem_2x)
+
+    # ------------------------------------------------------------------
+    def forward(
+        self,
+        image1,
+        image2,
+        depth_rs_mm=None,
+        iters=12,
+        test_mode=False,
+        low_memory=False,
+    ):
+        s = self.stereo
+        B, C, H, W = image1.shape
+        low_memory = low_memory or self.args.get('low_memory', False)
+
+        image1 = normalize_image(image1)
+        image2 = normalize_image(image2)
+
+        with torch.amp.autocast('cuda', enabled=self.args.mixed_precision, dtype=U.AMP_DTYPE):
+
+            # ── feature extraction (frozen) ───────────────────────────
+            out            = s.feature(torch.cat([image1, image2], dim=0))
+            features_left  = [o[:B] for o in out]
+            features_right = [o[B:] for o in out]
+            stem_2x        = s.stem_2(image1)
+
+            # ── depth preprocessing ───────────────────────────────────
+            disp_prior_14 = None
+            validity_14   = None
+            disp_prior_full = None
+            validity_full   = None
+
+            if depth_rs_mm is not None:
+                depth_rs_m, validity_full, disp_prior_full = self._preprocess_depth(depth_rs_mm)
+                disp_prior_14 = F.interpolate(disp_prior_full / 4.0, size=(H // 4, W // 4), mode='nearest')
+                validity_14   = F.interpolate(validity_full,          size=(H // 4, W // 4), mode='nearest')
+
+            # ── cost volume ───────────────────────────────────────────
+            gwc_volume = build_gwc_volume_optimized_pytorch1(
+                features_left[0], features_right[0],
+                self.args.max_disp // 4, s.cv_group,
+                normalize=self.args.normalize,
+            )
+            left_tmp      = s.proj_cmb(features_left[0])
+            right_tmp     = s.proj_cmb(features_right[0])
+            concat_volume = build_concat_volume_optimized_pytorch1(
+                left_tmp, right_tmp, maxdisp=self.args.max_disp // 4
+            )
+            del left_tmp, right_tmp
+
+            comb_volume = torch.cat([gwc_volume, concat_volume], dim=1)
+            del concat_volume, gwc_volume
+
+            comb_volume = s.corr_stem(comb_volume)
+            comb_volume = s.corr_feature_att(comb_volume, features_left[0])
+            comb_volume = s.cost_agg(comb_volume, features_left)
+
+            # ── Stage 2: Gaussian prior on logits ─────────────────────
+            logits = s.classifier(comb_volume).squeeze(1)   # (B, D, H/4, W/4)
+
+            if depth_rs_mm is not None:
+                D     = logits.shape[1]
+                d_idx = torch.arange(D, device=logits.device, dtype=logits.dtype).view(1, D, 1, 1)
+                sigma = self.depth_sigma.abs().clamp(min=0.1)
+                prior_bias = -0.5 * ((d_idx - disp_prior_14) / sigma) ** 2
+                prior_bias = prior_bias * validity_14
+                logits = logits + self.depth_prior_scale * prior_bias
+
+            prob        = F.softmax(logits, dim=1)
+            stereo_init = disparity_regression(prob, self.args.max_disp // 4)  # (B,1,H/4,W/4)
+
+            # ── Stage 3a: GRU init blend ──────────────────────────────
+            if depth_rs_mm is not None:
+                init_disp = self.depth_init_blend(stereo_init, disp_prior_14, validity_14)
+            else:
+                init_disp = stereo_init
+
+            # ── context network ───────────────────────────────────────
+            cnet_list = s.cnet(features_left[0], features_left[1], features_left[2])
+            cnet_list = list(cnet_list)
+            net_list  = [torch.tanh(x[0]) for x in cnet_list]
+            inp_list  = [torch.relu(x[1]) for x in cnet_list]
+            inp_list  = [s.cam(x) * x for x in inp_list]
+            att       = [s.sam(x) for x in inp_list]
+
+        # ── geometry encoding volume ──────────────────────────────────
+        geo_fn = Combined_Geo_Encoding_Volume(
+            features_left[0].to(s.dtype),
+            features_right[0].to(s.dtype),
+            comb_volume.to(s.dtype),
+            num_levels=self.args.corr_levels,
+        )
+        b, c, h, w = features_left[0].shape
+        coords = torch.arange(w, dtype=torch.float, device=init_disp.device).reshape(1, 1, w, 1).repeat(b, h, 1, 1)
+        disp   = init_disp.to(s.dtype)
+        disp_preds = []
+
+        del comb_volume, features_left, features_right, cnet_list
+
+        # ── GRU iterations ────────────────────────────────────────────
+        for itr in range(iters):
+            disp     = disp.detach()
+            geo_feat = geo_fn(disp, coords, dx=s.dx, low_memory=low_memory)
+
+            with torch.amp.autocast('cuda', enabled=self.args.mixed_precision, dtype=U.AMP_DTYPE):
+                net_list, mask_feat_4, delta_disp = s.update_block(
+                    net_list, inp_list, geo_feat.to(s.dtype), disp, att
+                )
+
+            disp    = disp + delta_disp.to(s.dtype)
+            if test_mode and itr < iters - 1:
+                continue
+
+            disp_up = self.upsample_disp(disp.to(s.dtype), mask_feat_4.to(s.dtype), stem_2x.to(s.dtype))
+            disp_preds.append(disp_up)
+
+        # ── Stage 3b: output blend (full resolution) ──────────────────
+        if depth_rs_mm is not None:
+            disp_preds = [
+                self.depth_output_blend(p, disp_prior_full, validity_full)
+                for p in disp_preds
+            ]
+
+        if test_mode:
+            return disp_preds[-1]
+
+        return init_disp, disp_preds
+
+
+# ── dataset ───────────────────────────────────────────────────────────────────
+
+class InboltDepthDataset(Dataset):
+    """Returns (left, right, disp_gt, valid, depth_rs_mm) for each sample."""
+
+    def __init__(self, root):
+        self.source = DataSource()
+        n = self.source.init_directory(input_rectified=root)
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data        = self.source.get_item_projected(idx)
+        left        = data['left']
+        right       = data['right']
+        depth_zivid = data['depth_zivid']
+        depth_rs    = data['depth_rs']
+
+        h, w = left.shape[:2]
+        if depth_zivid.shape != (h, w):
+            depth_zivid = cv2.resize(depth_zivid, (w, h), interpolation=cv2.INTER_NEAREST)
+        if depth_rs.shape != (h, w):
+            depth_rs = cv2.resize(depth_rs, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)
+        right = np.stack([right, right, right], axis=-1)
+
+        disp        = np.zeros_like(depth_zivid, dtype=np.float32)
+        valid       = depth_zivid > 0
+        disp[valid] = BF / depth_zivid[valid]
+
+        left_t     = torch.from_numpy(left).permute(2, 0, 1).float()
+        right_t    = torch.from_numpy(right).permute(2, 0, 1).float()
+        disp_t     = torch.from_numpy(disp).unsqueeze(0).float()
+        valid_t    = torch.from_numpy(valid).unsqueeze(0)
+        depth_rs_t = torch.from_numpy(depth_rs).unsqueeze(0).float()
+
+        return left_t, right_t, disp_t, valid_t, depth_rs_t
+
+
+# ── loss ──────────────────────────────────────────────────────────────────────
+
+def sequence_loss(disp_preds, disp_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted smooth-L1 sum over GRU iterations."""
+    n    = len(disp_preds)
+    loss = 0.0
+    for i, pred in enumerate(disp_preds):
+        w = gamma ** (n - 1 - i)
+        gt = disp_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        if not v.any():
+            continue
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+def evaluate_split_loss(model, dataloader):
+    if len(dataloader) == 0:
+        return float('nan')
+    model.eval()
+    total = 0.0
+    with torch.no_grad():
+        for left, right, disp_gt, valid, depth_rs in dataloader:
+            left, right    = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+            depth_rs       = depth_rs.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p, dr_p = padder.pad(left, right, depth_rs)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init, preds = model.forward(left_p, right_p, depth_rs_mm=dr_p,
+                                             iters=ITERS, test_mode=False)
+                preds = [padder.unpad(p) for p in preds]
+                total += sequence_loss(preds, disp_gt, valid).item()
+
+    model.train()
+    return total / len(dataloader)
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    logging.info(f"Loading base stereo model from {MODEL_PATH}")
+    stereo_model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+    model = FastFoundationStereoDepthRS_v2(stereo_model).cuda()
+
+    # ── freeze entire feature extractor (backbone + FPN + stem_2) ─────
+    for param in model.stereo.feature.parameters():
+        param.requires_grad = False
+    for param in model.stereo.stem_2.parameters():
+        param.requires_grad = False
+    logging.info("Feature extractor (stereo.feature + stereo.stem_2) fully frozen.")
+
+    # ── parameter groups ──────────────────────────────────────────────
+    new_params = (
+        list(model.depth_init_blend.parameters())   +
+        list(model.depth_output_blend.parameters()) +
+        [model.depth_sigma, model.depth_prior_scale]
+    )
+    new_param_ids   = {id(p) for p in new_params}
+    finetune_params = [
+        p for p in model.parameters()
+        if p.requires_grad and id(p) not in new_param_ids
+    ]
+
+    n_new      = sum(p.numel() for p in new_params)
+    n_ft       = sum(p.numel() for p in finetune_params)
+    n_total    = sum(p.numel() for p in model.parameters())
+    logging.info(
+        f"Parameters — new (full LR): {n_new:,}  "
+        f"fine-tune (0.1× LR): {n_ft:,}  "
+        f"frozen: {n_total - n_new - n_ft:,}  "
+        f"total: {n_total:,}"
+    )
+
+    optimizer = torch.optim.AdamW(
+        [
+            {'params': new_params,      'lr': LR},
+            {'params': finetune_params, 'lr': LR * 0.1},
+        ],
+        weight_decay=1e-4,
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    # ── dataset ───────────────────────────────────────────────────────
+    dataset = InboltDepthDataset(INBOLT_DIR)
+    n_total_data = len(dataset)
+    if n_total_data < 2:
+        raise RuntimeError(f"Need at least 2 samples, got {n_total_data}.")
+
+    n_train = min(max(1, int(round(TRAIN_RATIO * n_total_data))), n_total_data - 1)
+    n_test  = n_total_data - n_train
+
+    split_gen = torch.Generator().manual_seed(SPLIT_SEED)
+    train_set, test_set = random_split(dataset, [n_train, n_test], generator=split_gen)
+
+    train_loader = DataLoader(train_set, batch_size=1, shuffle=True,  num_workers=0)
+    test_loader  = DataLoader(test_set,  batch_size=1, shuffle=False, num_workers=0)
+
+    logging.info(
+        f"Split (seed={SPLIT_SEED}): total={n_total_data}, "
+        f"train={len(train_set)} ({100.0*len(train_set)/n_total_data:.1f}%), "
+        f"test={len(test_set)} ({100.0*len(test_set)/n_total_data:.1f}%)"
+    )
+
+    model.train()
+    best_loss = float('inf')
+
+    # ── training loop ─────────────────────────────────────────────────
+    for epoch in range(EPOCHS):
+        epoch_loss = 0.0
+
+        for left, right, disp_gt, valid, depth_rs in train_loader:
+            left, right    = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+            depth_rs       = depth_rs.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p, dr_p = padder.pad(left, right, depth_rs)
+
+            optimizer.zero_grad(set_to_none=True)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init, preds = model.forward(
+                    left_p, right_p, depth_rs_mm=dr_p, iters=ITERS, test_mode=False
+                )
+                preds = [padder.unpad(p) for p in preds]
+                loss  = sequence_loss(preds, disp_gt, valid)
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss += loss.item()
+
+        train_loss = epoch_loss / len(train_loader)
+        train_eval = evaluate_split_loss(model, train_loader)
+        test_eval  = evaluate_split_loss(model, test_loader)
+
+        logging.info(
+            f"Epoch {epoch+1:3d}/{EPOCHS}  "
+            f"train_loss={train_loss:.4f}  "
+            f"train_eval={train_eval:.4f}  "
+            f"test_eval={test_eval:.4f}  "
+            f"depth_sigma={model.depth_sigma.item():.3f}  "
+            f"depth_prior_scale={model.depth_prior_scale.item():.4f}"
+        )
+
+        if test_eval < best_loss:
+            best_loss = test_eval
+            save_path = OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth')
+            torch.save(model, save_path)
+            logging.info(f"  → saved best model (test_eval={best_loss:.4f}) to {save_path}")
+
+    final_train = evaluate_split_loss(model, train_loader)
+    final_test  = evaluate_split_loss(model, test_loader)
+    logging.info(f"Final train error: {final_train:.4f}")
+    logging.info(f"Final test  error: {final_test:.4f}")
+    logging.info(f"Best  test  error: {best_loss:.4f}")
+    torch.save(model, OUT_PATH)
+    logging.info(f"Final model saved to {OUT_PATH}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/finetune_inbolt_planes.py b/scripts/finetune_inbolt_planes.py
new file mode 100644
index 0000000..968199d
--- /dev/null
+++ b/scripts/finetune_inbolt_planes.py
@@ -0,0 +1,307 @@
+"""
+Fine-tune FastFoundationStereo on the Inbolt dataset.
+
+The Inbolt dataset provides:
+  - realsense/{idx}/mono0.png  : left IR image  (uint8, 480x640)
+  - realsense/{idx}/mono1.png  : right IR image (uint8, 480x640)
+  - zivid/{idx}/depthmap_mm.png: ground-truth depth in mm (Zivid scanner, 1024x1224)
+
+Strategy:
+  - Freeze the ViT-L backbone (model.feature) to prevent overfitting on small datasets.
+  - Train everything else with RAFT-style sequence loss over GRU iterations.
+  - IR uint8 images are replicated to 3 channels.
+  - Zivid depth is resized to RealSense image resolution before disparity conversion.
+  - Depth is converted to disparity: disp = BF / depth_mm.
+
+Usage:
+  cd /path/to/Fast-FoundationStereo
+  python scripts/finetune_inbolt.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader, random_split
+from core.utils.utils import InputPadder
+import Utils as U
+from scripts.data_manager_inbolt import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+#INBOLT_DIR   = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection'  # local path to the dataset
+INBOLT_DIR   = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection' 
+# MODEL_PATH = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+# OUT_PATH   = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415.pth'
+MODEL_PATH = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+OUT_PATH   = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_planes_25.pth'
+
+
+# BF         = 49.8624*385.73  # D435 - focal_px * baseline_mm (calibrated from camera)  # D435 - focal_px * baseline_mm (calibrated from camera)
+BF         = 50.102706998586 * 385.509887695312 # new data
+EPOCHS     = 120
+LR         = 2e-5
+ITERS      = 8          # GRU iterations (same as inference)
+GAMMA      = 0.9        # sequence loss weight decay
+TRAIN_RATIO = 0.75
+SPLIT_SEED  = 0
+
+# -- Helpers -------------------------------
+
+def measure_variability(img, levele_num = 2):
+    "estimate min and max values / std using 7x7 image kernel"
+
+    """
+    Finds the minimum and maximum values within the specified kernel size for each pixel in the image.
+
+    Args:
+        image: The input image as a NumPy array.
+        kernel_size: The size of the square kernel (e.g., 7 for a 7x7 kernel).
+
+    Returns:
+        A tuple containing:
+            - min_values: A NumPy array of the minimum values within each kernel.
+            - max_values: A NumPy array of the maximum values within each kernel.
+    """
+    img_size    = img.shape
+    for k in range(levele_num):
+        img         = cv2.pyrDown(img)
+        
+    img         = np.uint8(img)
+    kernel_size = 7
+
+    # Create a kernel of ones for min/max filtering
+    kernel      = np.ones((kernel_size, kernel_size), np.uint8) 
+
+    # Find minimum values within the kernel
+    min_values = cv2.erode(img, kernel)
+
+    # Find maximum values within the kernel
+    max_values = cv2.dilate(img, kernel)
+
+    # diference
+    max_diff   = cv2.absdiff(max_values , min_values)
+
+    # debug
+    # Display the results using Matplotlib
+    #self.show_image_plt(img, min_values, max_values, max_diff)
+    for k in range(levele_num):
+        max_diff    = cv2.pyrUp(max_diff)
+
+    max_diff    = cv2.resize(max_diff, img_size[::-1])
+
+    return max_diff.astype(np.float32)
+
+def find_flat_regions(disp_gt, valid):
+    """Identify planar regions in the ground-truth disparity map using RANSAC."""
+    # convert disp_gt to numpy for variability measurement
+    disp_gt_np         = disp_gt # (H, W)
+    valid_variability  = valid
+
+    # Fit a plane to the valid disparities using RANSAC
+    disp_variability  = measure_variability(disp_gt_np, levele_num=2)  # (H, W) variability measure (e.g., std or max-min)
+    valid_variability = valid_variability & (disp_variability < 50.0)  # only consider low-variability pixels             
+
+    return valid_variability
+
+
+# ── dataset ──────────────────────────────────────────────────────────────────
+
+class InboltDataset(Dataset):
+    def __init__(self, root,train_mode=True):
+        self.source = DataSource(train_mode=train_mode)
+        n = self.source.init_directory(input_rectified=root)
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data  = self.source.get_item_projected(idx)
+        left  = data['left']
+        right = data['right']
+        depth = data['depth_zivid']   # float32, mm  (Zivid resolution)
+
+        # Resize Zivid depth to match RealSense stereo image resolution
+        h, w  = left.shape[:2]
+        if depth.shape != (h, w):
+            depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        # IR uint8 → float [0, 255], replicate to 3-channel pseudo-RGB
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)  # H x W x 3
+        right = np.stack([right, right, right], axis=-1)
+
+        # depth (mm) → disparity (pixels):  disp = focal * baseline / depth
+        disp  = np.zeros_like(depth, dtype=np.float32)
+        valid = depth > 0
+        disp[valid] = BF / depth[valid]
+
+        #valid = find_flat_regions(disp, valid)
+        valid = find_flat_regions(depth, valid)
+
+        left_t  = torch.from_numpy(left).permute(2, 0, 1).float()   # (3, H, W)
+        right_t = torch.from_numpy(right).permute(2, 0, 1).float()  # (3, H, W)
+        disp_t  = torch.from_numpy(disp).unsqueeze(0).float()       # (1, H, W)
+        valid_t = torch.from_numpy(valid).unsqueeze(0)               # (1, H, W) bool
+
+        return left_t, right_t, disp_t, valid_t
+
+
+# ── loss ─────────────────────────────────────────────────────────────────────
+
+
+def sequence_loss(disp_preds, disp_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted sum of smooth-L1 losses over GRU iterations."""
+    n    = len(disp_preds)
+    loss = 0.0
+    for i, pred in enumerate(disp_preds):
+        w  = gamma ** (n - 1 - i)
+        gt = disp_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+def evaluate_split_loss(model, dataloader):
+    """Evaluate average sequence loss over a dataloader (no gradient updates)."""
+    if len(dataloader) == 0:
+        return float('nan')
+
+    model.eval()
+    total_loss = 0.0
+
+    with torch.no_grad():
+        for left, right, disp_gt, valid in dataloader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            total_loss += loss.item()
+
+    model.train()
+    return total_loss / len(dataloader)
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    # load full model object (weights + architecture)
+    logging.info(f"Loading model from {MODEL_PATH}")
+    model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+
+    # freeze the ViT-L backbone — with only 24 samples it would overfit
+    for param in model.feature.parameters():
+        param.requires_grad = False
+    logging.info("ViT backbone frozen.")
+
+    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    total     = sum(p.numel() for p in model.parameters())
+    logging.info(f"Trainable: {trainable:,} / {total:,} parameters")
+
+    model = model.cuda().train()
+    logging.info("Model on single GPU.")
+
+    optimizer = torch.optim.AdamW(
+        [p for p in model.parameters() if p.requires_grad], lr=LR, weight_decay=1e-4
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    dataset = InboltDataset(INBOLT_DIR, train_mode=True)
+    n_total = len(dataset)
+
+    if n_total < 2:
+        raise RuntimeError(f"Need at least 2 samples for a 75/25 train/test split, got {n_total}.")
+
+    n_train = int(round(TRAIN_RATIO * n_total))
+    n_train = min(max(1, n_train), n_total - 1)
+    n_test = n_total - n_train
+
+    split_generator = torch.Generator().manual_seed(SPLIT_SEED)
+    train_set, test_set = random_split(dataset, [n_train, n_test], generator=split_generator)
+
+    train_loader = DataLoader(train_set, batch_size=1, shuffle=True, num_workers=0)
+    test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)
+
+    logging.info(
+        f"Random split with seed={SPLIT_SEED}: total={n_total}, train={len(train_set)} ({100.0*len(train_set)/n_total:.1f}%), "
+        f"test={len(test_set)} ({100.0*len(test_set)/n_total:.1f}%)"
+    )
+
+    best_loss = float('inf')
+
+    for epoch in range(EPOCHS):
+        epoch_loss = 0.0
+
+        for left, right, disp_gt, valid in train_loader:
+            #valid = find_flat_regions(disp_gt, valid)
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            # pad so H and W are divisible by 32
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            optimizer.zero_grad(set_to_none=True)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss = sequence_loss(disp_preds, disp_gt, valid)
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss += loss.item()
+
+        train_loss = epoch_loss / len(train_loader)
+        train_eval_error = evaluate_split_loss(model, train_loader)
+        test_eval_error = evaluate_split_loss(model, test_loader)
+
+        logging.info(
+            f"Epoch {epoch+1:3d}/{EPOCHS}  train_loss={train_loss:.4f}  "
+            f"train_eval_error={train_eval_error:.4f}  test_eval_error={test_eval_error:.4f}"
+        )
+
+        if test_eval_error < best_loss:
+            best_loss = test_eval_error
+            torch.save(model, OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth'))
+            logging.info(f"  → saved best model (test_eval_error={best_loss:.4f})")
+
+    final_train_error = evaluate_split_loss(model, train_loader)
+    final_test_error = evaluate_split_loss(model, test_loader)
+    logging.info(f"Final train error: {final_train_error:.4f}")
+    logging.info(f"Final test error:  {final_test_error:.4f}")
+    logging.info(f"Training complete. Best test error: {best_loss:.4f}")
+    logging.info(f"Model saved to {OUT_PATH}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/finetune_inbolt_planes_bf.py b/scripts/finetune_inbolt_planes_bf.py
new file mode 100644
index 0000000..926c10c
--- /dev/null
+++ b/scripts/finetune_inbolt_planes_bf.py
@@ -0,0 +1,371 @@
+"""
+Fine-tune FastFoundationStereo on the Inbolt dataset.
+Make loss on depth and not disparity, since depth is more directly related to the Zivid ground-truth and less sensitive to focal length / baseline calibration errors.
+
+The Inbolt dataset provides:
+  - realsense/{idx}/mono0.png  : left IR image  (uint8, 480x640)
+  - realsense/{idx}/mono1.png  : right IR image (uint8, 480x640)
+  - zivid/{idx}/depthmap_mm.png: ground-truth depth in mm (Zivid scanner, 1024x1224)
+
+Strategy:
+  - Freeze the ViT-L backbone (model.feature) to prevent overfitting on small datasets.
+  - Train everything else with RAFT-style sequence loss over GRU iterations.
+  - IR uint8 images are replicated to 3 channels.
+  - Zivid depth is resized to RealSense image resolution before disparity conversion.
+  - Depth is converted to disparity: disp = BF / depth_mm.
+
+Usage:
+  cd /path/to/Fast-FoundationStereo
+  python scripts/finetune_inbolt.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader, random_split
+from core.utils.utils import InputPadder
+import Utils as U
+from scripts.data_manager_inbolt import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+#INBOLT_DIR   = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260322T091926Z-1-001/Data Collection'  # local path to the dataset
+INBOLT_DIR   = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection' 
+# MODEL_PATH = f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth'
+# OUT_PATH   = f'{code_dir}/../weights/20-30-48/model_finetuned_inbolt-20260415.pth'
+MODEL_PATH = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+OUT_PATH   = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_planes_bf.pth'
+
+
+# BF         = 49.8624*385.73  # D435 - focal_px * baseline_mm (calibrated from camera)  # D435 - focal_px * baseline_mm (calibrated from camera)
+BF         = 50.102706998586 * 385.509887695312 # new data
+EPOCHS     = 120
+LR         = 2e-5
+ITERS      = 8          # GRU iterations (same as inference)
+GAMMA      = 0.9        # sequence loss weight decay
+TRAIN_RATIO = 0.75
+SPLIT_SEED  = 0
+
+# -- Helpers -------------------------------
+
+def measure_variability(img, levele_num = 2):
+    "estimate min and max values / std using 7x7 image kernel"
+
+    """
+    Finds the minimum and maximum values within the specified kernel size for each pixel in the image.
+
+    Args:
+        image: The input image as a NumPy array.
+        kernel_size: The size of the square kernel (e.g., 7 for a 7x7 kernel).
+
+    Returns:
+        A tuple containing:
+            - min_values: A NumPy array of the minimum values within each kernel.
+            - max_values: A NumPy array of the maximum values within each kernel.
+    """
+    img_size    = img.shape
+    for k in range(levele_num):
+        img         = cv2.pyrDown(img)
+        
+    img         = np.uint8(img)
+    kernel_size = 7
+
+    # Create a kernel of ones for min/max filtering
+    kernel      = np.ones((kernel_size, kernel_size), np.uint8) 
+
+    # Find minimum values within the kernel
+    min_values = cv2.erode(img, kernel)
+
+    # Find maximum values within the kernel
+    max_values = cv2.dilate(img, kernel)
+
+    # diference
+    max_diff   = cv2.absdiff(max_values , min_values)
+
+    # debug
+    # Display the results using Matplotlib
+    #self.show_image_plt(img, min_values, max_values, max_diff)
+    for k in range(levele_num):
+        max_diff    = cv2.pyrUp(max_diff)
+
+    max_diff    = cv2.resize(max_diff, img_size[::-1])
+
+    return max_diff.astype(np.float32)
+
+def find_flat_regions(disp_gt, valid):
+    """Identify planar regions in the ground-truth disparity map using RANSAC."""
+    # convert disp_gt to numpy for variability measurement
+    disp_gt_np         = disp_gt # (H, W)
+    valid_variability  = valid
+
+    # Fit a plane to the valid disparities using RANSAC
+    disp_variability  = measure_variability(disp_gt_np, levele_num=2)  # (H, W) variability measure (e.g., std or max-min)
+    valid_variability = valid_variability & (disp_variability < 50.0)  # only consider low-variability pixels             
+
+    return valid_variability
+
+
+# ── dataset ──────────────────────────────────────────────────────────────────
+
+class InboltDataset(Dataset):
+    def __init__(self, root,train_mode=True):
+        self.source = DataSource(train_mode=train_mode)
+        n = self.source.init_directory(input_rectified=root)
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data  = self.source.get_item_projected(idx)
+        left  = data['left']
+        right = data['right']
+        depth = data['depth_zivid']   # float32, mm  (Zivid resolution)
+
+        # Resize Zivid depth to match RealSense stereo image resolution
+        h, w  = left.shape[:2]
+        if depth.shape != (h, w):
+            depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        # IR uint8 → float [0, 255], replicate to 3-channel pseudo-RGB
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)  # H x W x 3
+        right = np.stack([right, right, right], axis=-1)
+
+        # depth (mm) → disparity (pixels):  disp = focal * baseline / depth
+        #disp  = np.zeros_like(depth, dtype=np.float32)
+        valid = depth > 0
+        #disp[valid] = BF / depth[valid]
+
+        #valid = find_flat_regions(disp, valid)
+        valid = find_flat_regions(depth, valid)
+
+        left_t  = torch.from_numpy(left).permute(2, 0, 1).float()   # (3, H, W)
+        right_t = torch.from_numpy(right).permute(2, 0, 1).float()  # (3, H, W)
+        depth_t  = torch.from_numpy(depth).unsqueeze(0).float()       # (1, H, W)
+        valid_t = torch.from_numpy(valid).unsqueeze(0)               # (1, H, W) bool
+
+        return left_t, right_t, depth_t, valid_t
+
+
+# ── loss ─────────────────────────────────────────────────────────────────────
+
+
+def sequence_loss(depth_preds, depth_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted sum of smooth-L1 losses over GRU iterations."""
+    n    = len(depth_preds)
+    loss = 0.0
+    for i, pred in enumerate(depth_preds):
+        w  = gamma ** (n - 1 - i)
+        gt = depth_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+def evaluate_split_loss(model, dataloader):
+    """Evaluate average sequence loss over a dataloader (no gradient updates)."""
+    if len(dataloader) == 0:
+        return float('nan')
+
+    model.eval()
+    total_loss = 0.0
+
+    with torch.no_grad():
+        for left, right, depth_gt, valid in dataloader:
+            left, right = left.cuda(), right.cuda()
+            depth_gt, valid = depth_gt.cuda(), valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds, depth_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                depth_preds = [padder.unpad(p) for p in depth_preds]
+                loss = sequence_loss(depth_preds, depth_gt, valid)
+
+            total_loss += loss.item()
+
+    model.train()
+    return total_loss / len(dataloader)
+
+# ── depth head ───────────────────────────────────────────────────────────────
+
+class DepthHead(nn.Module):
+    """Convert predicted disparity to depth via depth = (BF + weight) / disparity.
+
+    `weight` is a single learnable scalar (in the same units as BF, i.e. focal*baseline)
+    that compensates for small focal-length / baseline calibration errors.
+    """
+
+    def __init__(self, bf: float, eps: float = 1e-6):
+        super().__init__()
+        self.register_buffer('bf', torch.tensor(float(bf)))
+        self.weight = nn.Parameter(torch.zeros(1))
+        self.eps = 1
+
+    def forward(self, disparity: torch.Tensor) -> torch.Tensor:
+        #return (self.bf + self.weight) / disparity.clamp(min=self.eps)
+        return (self.bf) / disparity.clamp(min=self.eps)
+
+
+# ── model wrapper ─────────────────────────────────────────────────────────────
+
+class FastFoundationStereoWithDepth(nn.Module):
+    """
+    Wraps a pretrained FastFoundationStereo and adds a DepthHead that inverts
+    the predicted disparity to depth using (BF + trainable_weight) / disparity.
+
+    forward(..., test_mode=False) → (init_disp, disp_preds, depth_preds)
+    forward(..., test_mode=True)  → (disp_up, depth)
+    """
+
+    def __init__(self, base_model: nn.Module, bf: float = BF):
+        super().__init__()
+        self.base = base_model
+        self.depth_head = DepthHead(bf=bf)
+
+    # expose base.feature so the freeze loop in main() still works
+    @property
+    def feature(self):
+        return self.base.feature
+
+    def forward(
+        self,
+        image1: torch.Tensor,
+        image2: torch.Tensor,
+        iters: int = 12,
+        test_mode: bool = False,
+        **kwargs,
+    ):
+        result = self.base.forward(image1, image2, iters=iters, test_mode=test_mode, **kwargs)
+
+        if test_mode:
+            disp_up = result
+            depth = self.depth_head(disp_up.float())
+            return depth, disp_up
+        else:
+            init_disp, disp_preds = result
+            depth_preds = [self.depth_head(p.float()) for p in disp_preds]
+            return init_disp, disp_preds, depth_preds
+
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    # load full model object (weights + architecture)
+    logging.info(f"Loading model from {MODEL_PATH}")
+    base_model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+
+    model = FastFoundationStereoWithDepth(base_model)
+
+    # freeze the ViT-L backbone — with only 24 samples it would overfit
+    for param in model.feature.parameters():
+        param.requires_grad = False
+    logging.info("ViT backbone frozen.")
+
+    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    total     = sum(p.numel() for p in model.parameters())
+    logging.info(f"Trainable: {trainable:,} / {total:,} parameters")
+
+    model = model.cuda().train()
+    logging.info("Model on single GPU.")
+
+    optimizer = torch.optim.AdamW(
+        [p for p in model.parameters() if p.requires_grad], lr=LR, weight_decay=1e-4
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    dataset = InboltDataset(INBOLT_DIR, train_mode=True)
+    n_total = len(dataset)
+
+    if n_total < 2:
+        raise RuntimeError(f"Need at least 2 samples for a 75/25 train/test split, got {n_total}.")
+
+    n_train = int(round(TRAIN_RATIO * n_total))
+    n_train = min(max(1, n_train), n_total - 1)
+    n_test = n_total - n_train
+
+    split_generator = torch.Generator().manual_seed(SPLIT_SEED)
+    train_set, test_set = random_split(dataset, [n_train, n_test], generator=split_generator)
+
+    train_loader = DataLoader(train_set, batch_size=1, shuffle=True, num_workers=0)
+    test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)
+
+    logging.info(
+        f"Random split with seed={SPLIT_SEED}: total={n_total}, train={len(train_set)} ({100.0*len(train_set)/n_total:.1f}%), "
+        f"test={len(test_set)} ({100.0*len(test_set)/n_total:.1f}%)"
+    )
+
+    best_loss = float('inf')
+
+    for epoch in range(EPOCHS):
+        epoch_loss = 0.0
+
+        for left, right, depth_gt, valid in train_loader:
+            left, right = left.cuda(), right.cuda()
+            depth_gt, valid = depth_gt.cuda(), valid.cuda()
+
+            # pad so H and W are divisible by 32
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            optimizer.zero_grad(set_to_none=True)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds, depth_preds = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                depth_preds = [padder.unpad(p) for p in depth_preds]
+                loss = sequence_loss(depth_preds, depth_gt, valid)
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss += loss.item()
+
+        train_loss = epoch_loss / len(train_loader)
+        train_eval_error = evaluate_split_loss(model, train_loader)
+        test_eval_error = evaluate_split_loss(model, test_loader)
+
+        logging.info(
+            f"Epoch {epoch+1:3d}/{EPOCHS}  train_loss={train_loss:.4f}  "
+            f"train_eval_error={train_eval_error:.4f}  test_eval_error={test_eval_error:.4f}"
+        )
+
+        if test_eval_error < best_loss:
+            best_loss = test_eval_error
+            torch.save(model, OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth'))
+            logging.info(f"  → saved best model (test_eval_error={best_loss:.4f})")
+
+    final_train_error = evaluate_split_loss(model, train_loader)
+    final_test_error = evaluate_split_loss(model, test_loader)
+    logging.info(f"Final train error: {final_train_error:.4f}")
+    logging.info(f"Final test error:  {final_test_error:.4f}")
+    logging.info(f"Training complete. Best test error: {best_loss:.4f}")
+    logging.info(f"Model saved to {OUT_PATH}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/finetune_inbolt_with_confidence.py b/scripts/finetune_inbolt_with_confidence.py
new file mode 100644
index 0000000..4b75be1
--- /dev/null
+++ b/scripts/finetune_inbolt_with_confidence.py
@@ -0,0 +1,448 @@
+"""
+Fine-tune FastFoundationStereo on the Inbolt dataset with an additional confidence head.
+
+The confidence head predicts pixel validity from the stereo pair:
+  confidence = 1 → pixel has a valid Zivid depth measurement
+  confidence = 0 → pixel has no valid Zivid measurement (specular, transparent, OOB)
+
+Loss:
+  - RAFT-style smooth-L1 sequence loss on valid pixels  (disparity)
+  - Binary cross-entropy on all pixels                   (confidence, target = valid_mask)
+
+At the end of training an evaluation section compares depth performance on the test split
+between the original model and the newly trained confidence model.
+
+Usage:
+  cd /path/to/Fast-FoundationStereo
+  python scripts/finetune_inbolt_with_confidence.py
+"""
+
+import os, sys, logging
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+sys.path.append(code_dir)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from torch.utils.data import Dataset, DataLoader, random_split
+from core.utils.utils import InputPadder
+from core.foundation_stereo import normalize_image
+import Utils as U
+from scripts.data_manager_inbolt import DataSource
+
+
+# ── constants ────────────────────────────────────────────────────────────────
+
+INBOLT_DIR  = r'/mnt/algonas/Local/Data/new_depth_stereo_datasets/Inbolt_datasets/Data Collection-20260415T084601Z-3-001/Data Collection'
+MODEL_PATH  = f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth'
+OUT_PATH    = f'{code_dir}/../weights/23-36-37/model_finetuned_inbolt_with_confidence-20260507.pth'
+
+BF          = 50.102706998586 * 385.509887695312   # focal_px * baseline_mm
+EPOCHS      = 120
+LR          = 2e-5
+ITERS       = 8
+GAMMA       = 0.9
+CONF_WEIGHT = 1.0    # weight of confidence BCE loss relative to disparity loss
+TRAIN_RATIO = 0.75
+SPLIT_SEED  = 0
+
+
+# ── confidence head ───────────────────────────────────────────────────────────
+
+class ConfidenceHead(nn.Module):
+    """Lightweight head: stem_2 features at H/2 → confidence map at H (sigmoid, 0–1)."""
+
+    def __init__(self, in_channels: int = 16):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(in_channels, 32, 3, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),  # ×2 upsample
+            nn.BatchNorm2d(16),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(16, 1, 1),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.net(x)
+
+
+# ── model wrapper ─────────────────────────────────────────────────────────────
+
+class FastFoundationStereoWithConfidence(nn.Module):
+    """
+    Wraps a pretrained FastFoundationStereo and adds a ConfidenceHead.
+
+    forward(..., test_mode=False) → (init_disp, disp_preds, conf)
+    forward(..., test_mode=True)  → (disp_up, conf)
+    """
+
+    def __init__(self, base_model: nn.Module):
+        super().__init__()
+        self.base = base_model
+        # stem_2 outputs 16 channels at H/2 resolution — cheap to re-evaluate
+        self.conf_head = ConfidenceHead(in_channels=16)
+
+    # expose base.feature so the freeze loop in main() still works
+    @property
+    def feature(self):
+        return self.base.feature
+
+    def forward(
+        self,
+        image1: torch.Tensor,
+        image2: torch.Tensor,
+        iters: int = 12,
+        test_mode: bool = False,
+        **kwargs,
+    ):
+        result = self.base.forward(image1, image2, iters=iters, test_mode=test_mode, **kwargs)
+
+        # Recompute stem_2x for the confidence head.
+        # normalize_image is pure arithmetic; stem_2 is a 3-conv lightweight block.
+        with torch.amp.autocast('cuda', enabled=self.base.args.mixed_precision, dtype=U.AMP_DTYPE):
+            stem_2x = self.base.stem_2(normalize_image(image1))   # (B, 32, H/2, W/2)
+        conf = self.conf_head(stem_2x.float())                     # (B, 1, H, W)
+
+        if test_mode:
+            return result, conf    # (disp_up, conf)
+        else:
+            init_disp, disp_preds = result
+            return init_disp, disp_preds, conf
+
+
+# ── dataset ───────────────────────────────────────────────────────────────────
+
+class InboltDataset(Dataset):
+    def __init__(self, root: str):
+        self.source = DataSource()
+        n = self.source.init_directory(input_rectified=root)
+        logging.info(f"DataSource found {n} samples in {root}")
+
+    def __len__(self):
+        return len(self.source.imgs)
+
+    def __getitem__(self, idx):
+        data  = self.source.get_item_projected(idx)
+        left  = data['left']
+        right = data['right']
+        depth = data['depth_zivid']   # float32, mm  (Zivid resolution)
+
+        h, w = left.shape[:2]
+        if depth.shape != (h, w):
+            depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        left  = np.clip(left.astype(np.float32),  0, 255)
+        right = np.clip(right.astype(np.float32), 0, 255)
+        left  = np.stack([left,  left,  left],  axis=-1)
+        right = np.stack([right, right, right], axis=-1)
+
+        disp  = np.zeros_like(depth, dtype=np.float32)
+        valid = depth > 0
+        disp[valid] = BF / depth[valid]
+
+        left_t  = torch.from_numpy(left).permute(2, 0, 1).float()
+        right_t = torch.from_numpy(right).permute(2, 0, 1).float()
+        disp_t  = torch.from_numpy(disp).unsqueeze(0).float()
+        valid_t = torch.from_numpy(valid).unsqueeze(0)
+
+        return left_t, right_t, disp_t, valid_t
+
+
+# ── loss ──────────────────────────────────────────────────────────────────────
+
+def sequence_loss(disp_preds, disp_gt, valid, gamma=GAMMA):
+    """RAFT-style weighted smooth-L1 loss on valid pixels."""
+    n    = len(disp_preds)
+    loss = 0.0
+    for i, pred in enumerate(disp_preds):
+        w = gamma ** (n - 1 - i)
+        gt = disp_gt
+        v  = valid
+        if pred.shape[-2:] != gt.shape[-2:]:
+            gt = F.interpolate(gt, size=pred.shape[-2:], mode='nearest')
+            v  = F.interpolate(valid.float(), size=pred.shape[-2:], mode='nearest').bool()
+        loss = loss + w * F.smooth_l1_loss(pred[v], gt[v])
+    return loss
+
+
+def sequence_loss_with_confidence(disp_preds, conf, disp_gt, valid, gamma=GAMMA, conf_weight=CONF_WEIGHT):
+    """
+    Combined loss:
+      - Smooth-L1 disparity sequence loss on valid pixels.
+      - BCE confidence loss on all pixels: target=1 where Zivid is valid, 0 otherwise.
+    """
+    disp_loss = sequence_loss(disp_preds, disp_gt, valid, gamma)
+
+    conf_target = valid.float()
+    if conf.shape[-2:] != conf_target.shape[-2:]:
+        conf_target = F.interpolate(conf_target, size=conf.shape[-2:], mode='nearest')
+    # BCE is blocked by PyTorch's autocast dispatcher regardless of dtype; disable it here.
+    with torch.amp.autocast('cuda', enabled=False):
+        conf_loss = F.binary_cross_entropy(conf.float(), conf_target.float())
+
+    return disp_loss + conf_weight * conf_loss, disp_loss, conf_loss
+
+
+# ── evaluation helpers ────────────────────────────────────────────────────────
+
+def evaluate_split_loss(model, dataloader):
+    """Average combined loss over a dataloader (no grad)."""
+    if len(dataloader) == 0:
+        return float('nan')
+
+    model.eval()
+    total_loss = 0.0
+
+    with torch.no_grad():
+        for left, right, disp_gt, valid in dataloader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds, conf = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss, _, _ = sequence_loss_with_confidence(disp_preds, conf, disp_gt, valid)
+
+            total_loss += loss.item()
+
+    model.train()
+    return total_loss / len(dataloader)
+
+
+def _depth_mae_m(model_or_fn, dataloader, with_confidence: bool):
+    """
+    Compute depth MAE (metres) on valid GT pixels for a model.
+    model_or_fn: either a FastFoundationStereoWithConfidence (with_confidence=True)
+                 or the original FastFoundationStereo (with_confidence=False).
+    Returns (mae_mm, coverage_pct)
+    """
+    total_abs_err = 0.0
+    total_valid_gt = 0
+    total_valid_pred = 0
+    total_pixels = 0
+
+    with torch.no_grad():
+        for left, right, disp_gt, valid in dataloader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                if with_confidence:
+                    disp_up, _conf = model_or_fn.forward(left_p, right_p, iters=ITERS, test_mode=True)
+                else:
+                    disp_up = model_or_fn.forward(left_p, right_p, iters=ITERS, test_mode=True)
+
+            disp_up = padder.unpad(disp_up.float())  # (1, 1, H, W)
+
+            # Convert disparity → depth (mm) where disp > 0
+            pred_disp_np = disp_up.squeeze().cpu().numpy().clip(0, None)
+            gt_disp_np   = disp_gt.squeeze().cpu().numpy()
+            valid_np     = valid.squeeze().cpu().numpy().astype(bool)
+
+            pred_depth = np.zeros_like(pred_disp_np)
+            ok = pred_disp_np > 0
+            pred_depth[ok] = BF / pred_disp_np[ok]  # mm
+
+            gt_depth = np.zeros_like(gt_disp_np)
+            gt_ok = gt_disp_np > 0
+            gt_depth[gt_ok] = BF / gt_disp_np[gt_ok]  # mm
+
+            # Only evaluate where GT is valid
+            mask = valid_np & ok
+            if mask.any():
+                total_abs_err   += float(np.abs(pred_depth[mask] - gt_depth[mask]).sum())
+                total_valid_pred += int(mask.sum())
+            total_valid_gt += int(valid_np.sum())
+            total_pixels   += valid_np.size
+
+    mae_mm = total_abs_err / max(total_valid_pred, 1)
+    coverage = 100.0 * total_valid_pred / max(total_valid_gt, 1)
+    return mae_mm, coverage
+
+
+def _confidence_metrics(model, dataloader):
+    """Compute confidence accuracy (treating conf>0.5 as valid prediction)."""
+    tp = tn = fp = fn = 0
+
+    with torch.no_grad():
+        for left, right, _disp_gt, valid in dataloader:
+            left, right = left.cuda(), right.cuda()
+            valid = valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _disp_up, conf = model.forward(left_p, right_p, iters=ITERS, test_mode=True)
+
+            conf = padder.unpad(conf)
+            pred_valid = (conf > 0.5).squeeze().cpu()
+            gt_valid   = valid.squeeze().cpu().bool()
+
+            tp += int((pred_valid & gt_valid).sum())
+            tn += int((~pred_valid & ~gt_valid).sum())
+            fp += int((pred_valid & ~gt_valid).sum())
+            fn += int((~pred_valid & gt_valid).sum())
+
+    accuracy  = 100.0 * (tp + tn) / max(tp + tn + fp + fn, 1)
+    precision = 100.0 * tp / max(tp + fp, 1)
+    recall    = 100.0 * tp / max(tp + fn, 1)
+    return accuracy, precision, recall
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    U.set_logging_format()
+    U.set_seed(0)
+
+    logging.info(f"Loading base model from {MODEL_PATH}")
+    base_model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+
+    model = FastFoundationStereoWithConfidence(base_model)
+
+    # freeze the ViT-L backbone — conf_head and the rest of the model will train
+    for param in model.feature.parameters():
+        param.requires_grad = False
+    logging.info("ViT backbone frozen.")
+
+    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    total     = sum(p.numel() for p in model.parameters())
+    logging.info(f"Trainable: {trainable:,} / {total:,} parameters")
+
+    model = model.cuda().train()
+
+    optimizer = torch.optim.AdamW(
+        [p for p in model.parameters() if p.requires_grad], lr=LR, weight_decay=1e-4
+    )
+    scaler = torch.amp.GradScaler('cuda')
+
+    dataset  = InboltDataset(INBOLT_DIR)
+    n_total  = len(dataset)
+
+    if n_total < 2:
+        raise RuntimeError(f"Need at least 2 samples, got {n_total}.")
+
+    n_train = min(max(1, int(round(TRAIN_RATIO * n_total))), n_total - 1)
+    n_test  = n_total - n_train
+
+    split_gen = torch.Generator().manual_seed(SPLIT_SEED)
+    train_set, test_set = random_split(dataset, [n_train, n_test], generator=split_gen)
+
+    train_loader = DataLoader(train_set, batch_size=1, shuffle=True,  num_workers=0)
+    test_loader  = DataLoader(test_set,  batch_size=1, shuffle=False, num_workers=0)
+
+    logging.info(
+        f"Split seed={SPLIT_SEED}: total={n_total}, "
+        f"train={len(train_set)} ({100.0*len(train_set)/n_total:.1f}%), "
+        f"test={len(test_set)} ({100.0*len(test_set)/n_total:.1f}%)"
+    )
+
+    best_loss    = float('inf')
+    best_ckpt    = None
+
+    # ── training loop ─────────────────────────────────────────────────────────
+    for epoch in range(EPOCHS):
+        epoch_loss = epoch_disp_loss = epoch_conf_loss = 0.0
+
+        for left, right, disp_gt, valid in train_loader:
+            left, right = left.cuda(), right.cuda()
+            disp_gt, valid = disp_gt.cuda(), valid.cuda()
+
+            padder = InputPadder(left.shape, divis_by=32, force_square=False)
+            left_p, right_p = padder.pad(left, right)
+
+            optimizer.zero_grad(set_to_none=True)
+
+            with torch.amp.autocast('cuda', enabled=True, dtype=U.AMP_DTYPE):
+                _init_disp, disp_preds, conf = model.forward(
+                    left_p, right_p, iters=ITERS, test_mode=False
+                )
+                disp_preds = [padder.unpad(p) for p in disp_preds]
+                loss, d_loss, c_loss = sequence_loss_with_confidence(
+                    disp_preds, conf, disp_gt, valid
+                )
+
+            scaler.scale(loss).backward()
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            scaler.step(optimizer)
+            scaler.update()
+
+            epoch_loss      += loss.item()
+            epoch_disp_loss += d_loss.item()
+            epoch_conf_loss += c_loss.item()
+
+        n_batches = len(train_loader)
+        train_loss = epoch_loss      / n_batches
+        train_eval = evaluate_split_loss(model, train_loader)
+        test_eval  = evaluate_split_loss(model, test_loader)
+
+        logging.info(
+            f"Epoch {epoch+1:3d}/{EPOCHS}  "
+            f"train_loss={train_loss:.4f}  "
+            f"(disp={epoch_disp_loss/n_batches:.4f}  conf={epoch_conf_loss/n_batches:.4f})  "
+            f"train_eval={train_eval:.4f}  test_eval={test_eval:.4f}"
+        )
+
+        if test_eval < best_loss:
+            best_loss = test_eval
+            best_ckpt = OUT_PATH.replace('.pth', f'_epoch_{epoch+1:03d}.pth')
+            torch.save(model, best_ckpt)
+            logging.info(f"  → saved best model (test_eval={best_loss:.4f})")
+
+    logging.info(f"Training complete. Best test_eval={best_loss:.4f}")
+
+    # ── depth performance comparison ──────────────────────────────────────────
+    logging.info("\n── Depth performance comparison (test split) ──────────────────────────")
+
+    model.eval()
+
+    # confidence model (best checkpoint or final model)
+    eval_model = model
+    if best_ckpt is not None:
+        logging.info(f"Loading best checkpoint for evaluation: {best_ckpt}")
+        eval_model = torch.load(best_ckpt, map_location='cuda', weights_only=False)
+        eval_model.eval()
+
+    conf_mae, conf_cov = _depth_mae_m(eval_model, test_loader, with_confidence=True)
+    conf_acc, conf_prec, conf_rec = _confidence_metrics(eval_model, test_loader)
+    logging.info(
+        f"[Confidence model]  depth MAE={conf_mae:.2f} mm  coverage={conf_cov:.1f}%  "
+        f"conf_acc={conf_acc:.1f}%  conf_prec={conf_prec:.1f}%  conf_rec={conf_rec:.1f}%"
+    )
+
+    # original model for comparison
+    logging.info(f"Loading original model for comparison: {MODEL_PATH}")
+    orig_model = torch.load(MODEL_PATH, map_location='cuda', weights_only=False)
+    orig_model.eval()
+    orig_mae, orig_cov = _depth_mae_m(orig_model, test_loader, with_confidence=False)
+    logging.info(
+        f"[Original model]    depth MAE={orig_mae:.2f} mm  coverage={orig_cov:.1f}%"
+    )
+
+    mae_delta = conf_mae - orig_mae
+    sign = "+" if mae_delta >= 0 else ""
+    logging.info(
+        f"\nDepth MAE delta (confidence − original): {sign}{mae_delta:.2f} mm  "
+        f"(negative = confidence model is better)"
+    )
+    logging.info(f"Model saved to {OUT_PATH} (best: {best_ckpt})")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/make_onnx.py b/scripts/make_onnx.py
index aaa9915..3361024 100755
--- a/scripts/make_onnx.py
+++ b/scripts/make_onnx.py
@@ -63,7 +63,8 @@ def forward(self, left, right):
         opset_version=17,
         input_names = ['left', 'right'],
         output_names = ['features_left_04', 'features_left_08', 'features_left_16', 'features_left_32', 'features_right_04', 'stem_2x'],
-        do_constant_folding=True
+        do_constant_folding=True,
+        dynamo=True,
     )
 
     features_left_04, features_left_08, features_left_16, features_left_32, features_right_04, stem_2x = feature_runner(left_img, right_img)
@@ -77,7 +78,8 @@ def forward(self, left, right):
         opset_version=17,
         input_names = ['features_left_04', 'features_left_08', 'features_left_16', 'features_left_32', 'features_right_04', 'stem_2x', 'gwc_volume'],
         output_names = ['disp'],
-        do_constant_folding=True
+        do_constant_folding=True,
+        dynamo=True,
     )
 
     with open(f'{args.save_path}/onnx.yaml', 'w') as f:
diff --git a/scripts/make_trt_engine.py b/scripts/make_trt_engine.py
new file mode 100644
index 0000000..568d8d7
--- /dev/null
+++ b/scripts/make_trt_engine.py
@@ -0,0 +1,52 @@
+"""Convert ONNX models to TensorRT engines using the Python TRT API."""
+import argparse
+import os
+import tensorrt as trt
+
+TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
+
+
+def build_engine(onnx_path: str, engine_path: str, fp16: bool = True, workspace_gb: int = 4):
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_gb << 30)
+
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+    if not parser.parse_from_file(onnx_path):
+        for i in range(parser.num_errors):
+            print(f"ONNX parse error {i}: {parser.get_error(i)}")
+        raise RuntimeError(f"Failed to parse ONNX: {onnx_path}")
+
+    if fp16 and builder.platform_has_fast_fp16:
+        config.set_flag(trt.BuilderFlag.FP16)
+        print("FP16 enabled")
+
+    print(f"Building TRT engine from {onnx_path} …")
+    serialized = builder.build_serialized_network(network, config)
+    if serialized is None:
+        raise RuntimeError("build_serialized_network returned None")
+
+    with open(engine_path, "wb") as f:
+        f.write(serialized)
+    print(f"Engine saved -> {engine_path}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--onnx_dir", type=str,
+                        default="output/onnx_trt",
+                        help="Directory containing feature_runner.onnx and post_runner.onnx")
+    parser.add_argument("--engine_dir", type=str, default=None,
+                        help="Output directory for .engine files (defaults to onnx_dir)")
+    parser.add_argument("--fp16", action="store_true", default=True)
+    parser.add_argument("--workspace_gb", type=int, default=4)
+    args = parser.parse_args()
+
+    engine_dir = args.engine_dir or args.onnx_dir
+    os.makedirs(engine_dir, exist_ok=True)
+
+    for name in ("feature_runner", "post_runner"):
+        onnx_path = os.path.join(args.onnx_dir, f"{name}.onnx")
+        engine_path = os.path.join(engine_dir, f"{name}.engine")
+        build_engine(onnx_path, engine_path, fp16=args.fp16, workspace_gb=args.workspace_gb)
diff --git a/scripts/metrics.py b/scripts/metrics.py
new file mode 100644
index 0000000..ac92fe6
--- /dev/null
+++ b/scripts/metrics.py
@@ -0,0 +1,193 @@
+"""Depth quality metric dataclasses and computation helpers."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+
+
+# Distance bins used throughout the report
+DIST_BINS: List[Tuple[float, float]] = [
+    (0.05, 0.25),
+    (0.25, 0.5),
+    (0.5, 0.75),
+    (0.75, 1.0),
+    (1.0, 1.5),
+    (1.5, 2.0),
+]
+BIN_LABELS = ["0.05–0.25 m", "0.25–0.5 m", "0.5–0.75 m", "0.75–1 m", "1–1.5 m", "1.5–2 m"]
+BIN_CENTERS = [0.125, 0.375, 0.625, 0.875, 1.25, 1.75]
+CLOSE_RANGE_THRESHOLD_M = 0.05
+
+
+@dataclass
+class FrameMetrics:
+    """Quality metrics for a single (method, frame) pair vs ground truth."""
+    method_name: str
+    mae: float           # mean absolute error (m) — valid-only
+    rmse: float          # root mean squared error (m) — valid-only
+    mre: float           # mean relative error (dimensionless) — valid-only
+    delta1: float        # % pixels within 1.25× of GT — valid-only
+    coverage: float      # % valid pixels (pred > 0 AND gt > 0)
+    time_ms: float       # wall-clock processing time
+    # Penalised metrics: missing pixels (pred=0 where GT>0) count as 100% error
+    mae_pen: float = 0.0
+    mre_pen: float = 0.0
+
+
+@dataclass
+class AggregateStats:
+    """Per-method aggregate statistics over all benchmark frames."""
+    method_name: str
+    label: str
+    color: str
+    mae_mean: float
+    mae_std: float
+    rmse_mean: float
+    mre_mean: float
+    delta1_mean: float
+    coverage_mean: float
+    fps_mean: float
+    time_ms_mean: float
+    time_ms_std: float
+    close_range_coverage: float   # % valid where GT < CLOSE_RANGE_THRESHOLD_M
+    mae_pen_mean: float = 0.0    # penalised MAE (holes count as full error)
+    mre_pen_mean: float = 0.0    # penalised MRE (holes count as 100% error)
+    gpu_load_mean: float = 0.0   # GPU utilisation % during inference
+
+
+@dataclass
+class BenchmarkResults:
+    """All data collected during a benchmark run."""
+    method_names: List[str]
+    method_labels: Dict[str, str]
+    method_colors: Dict[str, str]
+    ground_truth_name: str
+    n_frames: int
+    width: int
+    height: int
+    all_metrics: List[FrameMetrics]
+    viz_frames: List[Dict[str, np.ndarray]]    # [{method_name: float32_m_array}, ...]
+    coverage_maps: Dict[str, np.ndarray]        # method_name -> (H,W) float [0,1]
+    dist_bin_mae: Dict[str, List[List[float]]] # method_name -> [frame][bin]
+    close_range_valid: Dict[str, List[float]]  # method_name -> per-frame %
+    source: str
+    method_configs: Dict[str, Dict[str, str]] = field(default_factory=dict)  # method_name -> config dict
+
+
+def compute_metrics(
+    pred_m: np.ndarray,
+    gt_m: np.ndarray,
+    elapsed_ms: float,
+    method_name: str,
+) -> FrameMetrics:
+    """Compute per-frame quality metrics between prediction and ground truth.
+
+    Two sets of error metrics are computed:
+      - **valid-only** (mae, mre): only pixels where both pred and GT > 0.
+      - **penalised** (mae_pen, mre_pen): over all GT > 0 pixels.  Where
+        pred == 0 (hole), the error equals the GT depth itself (i.e. 100%
+        relative error).  This makes methods with poor coverage pay a price
+        in the error scores, not just in coverage %.
+    """
+    gt_mask = gt_m > 0.0
+    valid = gt_mask & (pred_m > 0.0)
+    n_gt = int(gt_mask.sum())
+    n_valid = int(valid.sum())
+    n_total = gt_m.size
+
+    if n_valid == 0:
+        return FrameMetrics(method_name, 0.0, 0.0, 0.0, 0.0, 0.0, elapsed_ms)
+
+    # --- valid-only metrics (unchanged) ---
+    p, g = pred_m[valid], gt_m[valid]
+    diff = np.abs(p - g)
+    mae = float(diff.mean())
+    rmse = float(np.sqrt((diff ** 2).mean()))
+    mre = float((diff / (g + 1e-6)).mean())
+    ratio = np.maximum(p / (g + 1e-6), g / (p + 1e-6))
+    delta1 = float((ratio < 1.25).mean()) * 100.0
+    coverage = float(n_valid / n_total) * 100.0
+
+    # --- penalised metrics (missing pixels = full GT depth as error) ---
+    if n_gt > 0:
+        missing = gt_mask & (pred_m <= 0.0)
+        # For missing pixels: absolute error = gt depth, relative error = 1.0
+        pen_abs = np.zeros_like(gt_m)
+        pen_abs[valid] = diff
+        pen_abs[missing] = gt_m[missing]
+        mae_pen = float(pen_abs[gt_mask].mean())
+
+        pen_rel = np.zeros_like(gt_m)
+        pen_rel[valid] = diff / (gt_m[valid] + 1e-6)
+        pen_rel[missing] = 1.0  # 100% relative error for holes
+        mre_pen = float(pen_rel[gt_mask].mean())
+    else:
+        mae_pen = mae
+        mre_pen = mre
+
+    return FrameMetrics(method_name, mae, rmse, mre, delta1, coverage, elapsed_ms,
+                        mae_pen=mae_pen, mre_pen=mre_pen)
+
+
+def compute_bin_mae(
+    pred_m: np.ndarray,
+    gt_m: np.ndarray,
+) -> List[float]:
+    """MAE per distance bin; returns NaN for bins with no valid GT pixels."""
+    result = []
+    for lo, hi in DIST_BINS:
+        mask = (gt_m >= lo) & (gt_m < hi) & (gt_m > 0) & (pred_m > 0)
+        if mask.sum() == 0:
+            result.append(float("nan"))
+        else:
+            result.append(float(np.abs(pred_m[mask] - gt_m[mask]).mean()))
+    return result
+
+
+def aggregate(
+    results: BenchmarkResults,
+    timing_ms: Dict[str, float],
+    gpu_load: Optional[Dict[str, float]] = None,
+) -> Dict[str, AggregateStats]:
+    """Compute per-method aggregate statistics from raw benchmark results."""
+    stats: Dict[str, AggregateStats] = {}
+    gt = results.ground_truth_name
+
+    for name in results.method_names:
+        label = results.method_labels.get(name, name)
+        color = results.method_colors.get(name, "#888888")
+        fm_list = [m for m in results.all_metrics if m.method_name == name]
+        if not fm_list:
+            continue
+
+        t_ms = timing_ms.get(name, 0.0)
+        if name == "hardware":
+            fps = 30.0
+            t_ms = 1000.0 / 30.0
+        else:
+            fps = 1000.0 / t_ms if t_ms > 0 else float("inf")
+
+        cr_vals = results.close_range_valid.get(name, [0.0])
+
+        stats[name] = AggregateStats(
+            method_name=name,
+            label=label,
+            color=color,
+            mae_mean=float(np.mean([m.mae for m in fm_list])),
+            mae_std=float(np.std([m.mae for m in fm_list])),
+            rmse_mean=float(np.mean([m.rmse for m in fm_list])),
+            mre_mean=float(np.mean([m.mre for m in fm_list])),
+            delta1_mean=float(np.mean([m.delta1 for m in fm_list])),
+            coverage_mean=float(np.mean([m.coverage for m in fm_list])),
+            fps_mean=fps,
+            time_ms_mean=t_ms,
+            time_ms_std=0.0,
+            close_range_coverage=float(np.mean(cr_vals)) if cr_vals else 0.0,
+            mae_pen_mean=float(np.mean([m.mae_pen for m in fm_list])),
+            mre_pen_mean=float(np.mean([m.mre_pen for m in fm_list])),
+            gpu_load_mean=(gpu_load or {}).get(name, 0.0),
+        )
+    return stats
\ No newline at end of file
diff --git a/scripts/object_chessboard.py b/scripts/object_chessboard.py
new file mode 100644
index 0000000..3fac5bf
--- /dev/null
+++ b/scripts/object_chessboard.py
@@ -0,0 +1,763 @@
+'''
+
+Chessboard Object Manager.
+Supported detection of the chess board object in the image and video. It is used for testing and demonstration of the pose estimation.:
+
+
+
+Usage :
+    python object_chessboard.py
+
+
+Environemt : 
+    .\\envs\\pyqt5g
+
+Install : 
+
+
+'''
+
+
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
+import sys, os
+current_dir = os.path.dirname(__file__)
+parent_dir  = os.path.abspath(os.path.join(current_dir, '..'))
+sys.path.append(parent_dir)
+
+ # importing common Use modules 
+#from src.logger                     import log
+import logging as log
+
+
+# --------------------------------
+#%% ObjectManager - manages the entire experiment
+class ObjectChessboard:
+
+    def __init__(self, config = None):
+
+        # params
+        self.config             = config
+        self.name               = 'chessboard'
+        self.frame              = []
+        self.resolution         = (1280,720)
+        self.square_size         = 21.0                  # size in mm of the pattern square         
+        self.debug_on            = True
+        self.pattern_size       = (9,6)
+        self._rt_plot            = None
+        
+        
+        self.init()
+        log.info("Chess Object is Created")
+
+            
+    def init(self):
+        # can be also string from file    
+        
+        # should not be done - connect to external
+        if self.config is None:
+            log.info("Configuration is not connected")
+        else:
+            self.pattern_size       = self.config.get('chessboard_size', self.pattern_size)
+            
+        ret = True
+        return ret
+            
+    def set_square_size(self,sqSize = 21.8):
+        # set chessboard size
+        if sqSize < 0 or sqSize > 100:
+            log.info('Square size should be in range 0.1:100 mm')
+            return
+         
+        self.square_size = sqSize # 21mm, 
+        
+        log.info('Square size is %4.2f mm' % self.square_size)
+        
+    def set_pattern_size(self, pattern_size = (9,6)):
+        # set chessboard pattern size
+        if pattern_size[0] < 2 or pattern_size[1] < 2:
+            log.info('Pattern size should be at least 2x2')
+            return False
+        
+        self.pattern_size = pattern_size # (9,6) for 9x6 chessboard
+        
+        log.info('Pattern size is %dx%d' % self.pattern_size)
+        return True
+
+    def get_object_points(self):
+        # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
+        a = self.pattern_size[0]
+        b = self.pattern_size[1]
+        s = self.square_size # 21.8 # 21mm, but i want the units to be in meters
+         
+        objCorners        = np.zeros((b*a,3), np.float32)
+        objCorners[:,:2]  = np.mgrid[0:a,0:b].T.reshape(-1,2)*s 
+        return objCorners
+    
+    def get_grid_points(self, grid_size=0.1):
+        # prepare grid points with step size of 1 mm, 
+        # like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
+        scale_factor = self.square_size / grid_size
+        a = int(self.pattern_size[0]*scale_factor)
+        b = int(self.pattern_size[1]*scale_factor)
+        s = grid_size # 21.8 # 21mm, but i want the units to be in meters
+         
+        grid_corners        = np.zeros((b*a,3), np.float32)
+        grid_corners[:,:2]  = np.mgrid[0:a,0:b].T.reshape(-1,2)*s 
+        return grid_corners    
+
+    def get_image_points(self, img):
+        "detect corners in the image and return their coordinates"
+
+        # reduce size
+        if len(img.shape) > 2: 
+            gray  = cv2.cvtColor(img.astype(np.uint8),cv2.COLOR_BGR2GRAY)
+        else:
+            gray = img.astype(np.uint8)
+
+        # find the chess board (calibration pattern) corners
+        flags           = cv2.CALIB_CB_NORMALIZE_IMAGE | cv2.CALIB_CB_EXHAUSTIVE | cv2.CALIB_CB_ACCURACY
+        ret, imgCorners = cv2.findChessboardCornersSB(gray, self.pattern_size, flags=flags)
+        if ret:
+            # Refine the corners of the detected corners
+            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
+            imgCorners2 = cv2.cornerSubPix(gray,imgCorners,(11,11),(-1,-1),criteria)
+        else:
+            imgCorners2= []
+            #log.info('Chessboard corners not found in the image')
+        return imgCorners2
+
+    def detect(self, img):
+        """Detect chessboard corners and return a unified detection dictionary.
+
+        Args:
+            img: BGR image.
+
+        Returns:
+            dict: Detection result with keys:
+                - success (bool)
+                - image_points (np.ndarray or list)
+                - object_points (np.ndarray)
+                - quality (float)
+                - reason (str, when failed)
+        """
+        if img is None:
+            return {
+                'success': False,
+                'reason': 'input image is None',
+                'image_points': [],
+                'object_points': self.get_object_points(),
+                'quality': 0.0,
+            }
+
+        try:
+            img_points = self.get_image_points(img)
+        except Exception as e:
+            return {
+                'success': False,
+                'reason': f'chessboard detection error: {e}',
+                'image_points': [],
+                'object_points': self.get_object_points(),
+                'quality': 0.0,
+            }
+
+        has_points = len(img_points) > 0
+        detected_count = int(len(img_points)) if has_points else 0
+        expected_count = int(self.pattern_size[0] * self.pattern_size[1])
+        quality = float(detected_count / max(expected_count, 1))
+
+        result = {
+            'success': has_points,
+            'image_points': img_points,
+            'object_points': self.get_object_points(),
+            'quality': quality,
+        }
+        if not has_points:
+            result['reason'] = 'chessboard corners not found'
+
+        return result
+
+    def estimate_camera_pose(self, img, camera_matrix, dist_coeffs=None):
+        """Detect chessboard and estimate camera pose with solvePnP.
+
+        Args:
+            img: BGR image.
+            camera_matrix: Intrinsic matrix (3x3).
+            dist_coeffs: Distortion coefficients (optional).
+
+        Returns:
+            dict: {
+                success (bool), reason (str), image_points, object_points,
+                rvec, tvec, rotation_matrix, camera_position
+            }
+            camera_position is in chessboard coordinates, same unit as square_size.
+        """
+        detection = self.detect(img)
+        if not detection.get('success', False):
+            return {
+                'success': False,
+                'reason': detection.get('reason', 'chessboard detection failed'),
+                'image_points': detection.get('image_points', []),
+                'object_points': detection.get('object_points', self.get_object_points()),
+            }
+
+        obj_points = detection['object_points'].astype(np.float32)
+        img_points = np.asarray(detection['image_points'], dtype=np.float32)
+
+        if img_points.ndim == 3 and img_points.shape[1] == 1:
+            img_points = img_points.reshape(-1, 2)
+
+        cam_mtx = np.asarray(camera_matrix, dtype=np.float32)
+        dist = None if dist_coeffs is None else np.asarray(dist_coeffs, dtype=np.float32)
+
+        ok, rvec, tvec = cv2.solvePnP(obj_points, img_points, cam_mtx, dist, flags=cv2.SOLVEPNP_ITERATIVE)
+        if not ok:
+            return {
+                'success': False,
+                'reason': 'solvePnP failed',
+                'image_points': img_points,
+                'object_points': obj_points,
+            }
+
+        rot_mtx, _ = cv2.Rodrigues(rvec)
+        # Camera center in object/chessboard coordinates: C = -R^T * t
+        camera_position = -rot_mtx.T @ tvec
+
+        return {
+            'success': True,
+            'image_points': img_points,
+            'object_points': obj_points,
+            'rvec': rvec,
+            'tvec': tvec,
+            'rotation_matrix': rot_mtx,
+            'camera_position': camera_position.reshape(3),
+        }
+
+    def estimate_board_pose_in_camera(self, img, camera_matrix, dist_coeffs=None):
+        """Detect chessboard and estimate the board pose in the camera coordinate system.
+
+        solvePnP returns rvec/tvec that transform points from object (board) space into
+        camera space, so tvec is already the board origin expressed in the camera frame,
+        and rot_mtx columns are the board X/Y/Z axes expressed in the camera frame.
+
+        Args:
+            img: grayscale or BGR image.
+            camera_matrix: Intrinsic matrix (3x3).
+            dist_coeffs: Distortion coefficients (optional).
+
+        Returns:
+            dict on success:
+                success          : bool
+                image_points     : (N, 2) detected corners in the image
+                object_points    : (N, 3) 3-D corners in board frame
+                rvec             : (3, 1) rotation vector (board → camera)
+                tvec             : (3, 1) translation vector (board → camera)
+                rotation_matrix  : (3, 3) rotation matrix R (board → camera)
+                board_position   : (3,)  position of the board origin in camera frame (= tvec)
+                board_center     : (3,)  position of the board geometric centre in camera frame
+                board_x_axis     : (3,)  board X axis expressed in camera frame
+                board_y_axis     : (3,)  board Y axis expressed in camera frame
+                board_z_axis     : (3,)  board normal expressed in camera frame
+            dict with success=False and reason on failure.
+        """
+        detection = self.detect(img)
+        if not detection.get('success', False):
+            return {
+                'success': False,
+                'reason': detection.get('reason', 'chessboard detection failed'),
+                'image_points': detection.get('image_points', []),
+                'object_points': detection.get('object_points', self.get_object_points()),
+            }
+
+        obj_points = detection['object_points'].astype(np.float32)
+        img_points = np.asarray(detection['image_points'], dtype=np.float32)
+        if img_points.ndim == 3 and img_points.shape[1] == 1:
+            img_points = img_points.reshape(-1, 2)
+
+        cam_mtx = np.asarray(camera_matrix, dtype=np.float32)
+        dist = None if dist_coeffs is None else np.asarray(dist_coeffs, dtype=np.float32)
+
+        ok, rvec, tvec = cv2.solvePnP(obj_points, img_points, cam_mtx, dist, flags=cv2.SOLVEPNP_ITERATIVE)
+        if not ok:
+            return {
+                'success': False,
+                'reason': 'solvePnP failed',
+                'image_points': img_points,
+                'object_points': obj_points,
+            }
+
+        rot_mtx, _ = cv2.Rodrigues(rvec)
+
+        # Board origin and axes in camera frame.
+        board_position = tvec.reshape(3)                  # origin corner [0,0] in camera frame
+        board_x_axis   = rot_mtx[:, 0]                   # board +X in camera frame
+        board_y_axis   = rot_mtx[:, 1]                   # board +Y in camera frame
+        board_z_axis   = rot_mtx[:, 2]                   # board normal in camera frame
+
+        # Geometric centre: average of all object points transformed into camera frame.
+        obj_center_board  = obj_points.mean(axis=0).reshape(3, 1).astype(np.float32)
+        board_center       = (rot_mtx @ obj_center_board + tvec).reshape(3)
+
+        return {
+            'success': True,
+            'image_points': img_points,
+            'object_points': obj_points,
+            'rvec': rvec,
+            'tvec': tvec,
+            'rotation_matrix': rot_mtx,
+            'board_position': board_position,
+            'board_center': board_center,
+            'board_x_axis': board_x_axis,
+            'board_y_axis': board_y_axis,
+            'board_z_axis': board_z_axis,
+        }
+
+    def get_grid_in_camera_coordinates(self, rvec, tvec, camera_matrix, dist_coeffs):
+        """Project 3D grid points onto the camera image using the estimated pose.
+
+        Args:
+            grid_points: (N, 3) array of 3D points in board coordinates.
+            rvec: (3, 1) rotation vector from solvePnP (board → camera).
+            tvec: (3, 1) translation vector from solvePnP (board → camera).
+            camera_matrix: Intrinsic matrix (3x3).
+            dist_coeffs: Distortion coefficients (optional).
+
+        Returns:
+            projected_points: (N, 2) array of 2D points in image coordinates.
+        """
+        grid_points         = self.get_grid_points()
+        cam_mtx             = np.asarray(camera_matrix, dtype=np.float32)
+        dist                = np.asarray(dist_coeffs, dtype=np.float32)
+        rot_mtx, _          = cv2.Rodrigues(rvec)
+
+        # transform grid points from board frame to camera frame: P_cam = R * P_board + t
+        #grid_transformed    = (rot_mtx @ grid_points + tvec).reshape(-1,3)  
+        grid_transformed    = (grid_points @ rot_mtx.T + tvec.T)
+        #Z                   = grid_transformed[:,2]  # depth of each point in camera frame      
+
+        # points projected to the camera image plane (with distortion):  p_img = project(P_cam)
+        projected_points, _ = cv2.projectPoints(grid_points.astype(np.float32), rvec, tvec, cam_mtx, dist)
+        return grid_transformed, projected_points.reshape(-1, 2)
+
+    def render_board_and_camera(self, object_points, camera_position, rvec=None, axis_length=None, show=True):
+        """Render chessboard points and estimated camera position in 3D (matplotlib)."""
+        a, b = self.pattern_size
+        s = float(self.square_size)
+        if axis_length is None:
+            axis_length = max(2.0 * s, 1.0)
+
+        pts = np.asarray(object_points, dtype=np.float32).reshape(-1, 3)
+        cam = np.asarray(camera_position, dtype=np.float32).reshape(3)
+
+        fig = plt.figure(figsize=(9, 7))
+        ax = fig.add_subplot(111, projection='3d')
+
+        # Draw board grid as wireframe in object coordinates.
+        grid = pts.reshape(b, a, 3)
+        ax.plot_wireframe(grid[:, :, 0], grid[:, :, 1], grid[:, :, 2], color='tab:blue', linewidth=1.0)
+        ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2], c='tab:cyan', s=15, label='Chessboard corners')
+
+        # Camera center.
+        ax.scatter([cam[0]], [cam[1]], [cam[2]], c='tab:red', s=70, marker='^', label='Camera center')
+        ax.text(cam[0], cam[1], cam[2], ' camera', color='tab:red')
+
+        # Optional camera orientation axes (in object frame).
+        if rvec is not None:
+            rot_mtx, _ = cv2.Rodrigues(np.asarray(rvec, dtype=np.float32))
+            # camera frame unit axes expressed in object frame = columns of R^T
+            cam_axes = rot_mtx.T
+            colors = ['r', 'g', 'b']
+            labels = ['Xc', 'Yc', 'Zc']
+            for i in range(3):
+                end = cam + cam_axes[:, i] * axis_length
+                ax.plot([cam[0], end[0]], [cam[1], end[1]], [cam[2], end[2]], color=colors[i], linewidth=2)
+                ax.text(end[0], end[1], end[2], labels[i], color=colors[i])
+
+        # Keep axes visually balanced.
+        x_vals = np.concatenate([pts[:, 0], np.array([cam[0]], dtype=np.float32)])
+        y_vals = np.concatenate([pts[:, 1], np.array([cam[1]], dtype=np.float32)])
+        z_vals = np.concatenate([pts[:, 2], np.array([cam[2]], dtype=np.float32)])
+        max_range = max(np.ptp(x_vals), np.ptp(y_vals), np.ptp(z_vals), axis_length) * 0.6
+        center = np.array([np.mean(x_vals), np.mean(y_vals), np.mean(z_vals)], dtype=np.float32)
+        ax.set_xlim(center[0] - max_range, center[0] + max_range)
+        ax.set_ylim(center[1] - max_range, center[1] + max_range)
+        ax.set_zlim(center[2] - max_range, center[2] + max_range)
+
+        ax.set_xlabel(f'X [{self.square_size} mm units]')
+        ax.set_ylabel(f'Y [{self.square_size} mm units]')
+        ax.set_zlabel(f'Z [{self.square_size} mm units]')
+        ax.set_title('Chessboard and estimated camera pose')
+        ax.legend(loc='best')
+        plt.tight_layout()
+
+        if show:
+            plt.show(block=False)
+
+        return fig, ax
+
+    def render_board_and_camera_real_time(self, object_points, camera_position, rvec=None,
+                                          axis_length=None, show=True, pause_sec=0.001):
+        """Realtime version of render_board_and_camera.
+
+        Reuses the same matplotlib figure/artists and updates their data in-place,
+        so repeated calls can refresh visualization without creating a new plot.
+        """
+        a, b = self.pattern_size
+        s = float(self.square_size)
+        if axis_length is None:
+            axis_length = max(2.0 * s, 1.0)
+
+        pts = np.asarray(object_points, dtype=np.float32).reshape(-1, 3)
+        cam = np.asarray(camera_position, dtype=np.float32).reshape(3)
+        grid = pts.reshape(b, a, 3)
+
+        # Create plot once, then update artists only.
+        need_init = (
+            self._rt_plot is None
+            or self._rt_plot.get('fig', None) is None
+            or not plt.fignum_exists(self._rt_plot['fig'].number)
+        )
+
+        if need_init:
+            plt.ion()
+            fig = plt.figure(figsize=(9, 7))
+            ax = fig.add_subplot(111, projection='3d')
+
+            # Board as persistent line artists (rows + cols), easier to update than wireframe collection.
+            grid_lines = []
+            for ri in range(b):
+                line, = ax.plot(grid[ri, :, 0], grid[ri, :, 1], grid[ri, :, 2], color='tab:blue', linewidth=1.0)
+                grid_lines.append(line)
+            for ci in range(a):
+                line, = ax.plot(grid[:, ci, 0], grid[:, ci, 1], grid[:, ci, 2], color='tab:blue', linewidth=1.0)
+                grid_lines.append(line)
+
+            board_scatter = ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2], c='tab:cyan', s=15, label='Chessboard corners')
+            cam_scatter = ax.scatter([cam[0]], [cam[1]], [cam[2]], c='tab:red', s=70, marker='^', label='Camera center')
+            cam_text = ax.text(cam[0], cam[1], cam[2], ' camera', color='tab:red')
+
+            cam_axes_lines = []
+            colors = ['r', 'g', 'b']
+            labels = ['Xc', 'Yc', 'Zc']
+            for i in range(3):
+                line, = ax.plot([cam[0], cam[0]], [cam[1], cam[1]], [cam[2], cam[2]], color=colors[i], linewidth=2)
+                txt = ax.text(cam[0], cam[1], cam[2], labels[i], color=colors[i])
+                cam_axes_lines.append((line, txt))
+
+            ax.set_xlabel(f'X [{self.square_size} mm units]')
+            ax.set_ylabel(f'Y [{self.square_size} mm units]')
+            ax.set_zlabel(f'Z [{self.square_size} mm units]')
+            ax.set_title('Chessboard and estimated camera pose (real-time)')
+            ax.legend(loc='best')
+            plt.tight_layout()
+
+            self._rt_plot = {
+                'fig': fig,
+                'ax': ax,
+                'grid_lines': grid_lines,
+                'board_scatter': board_scatter,
+                'cam_scatter': cam_scatter,
+                'cam_text': cam_text,
+                'cam_axes_lines': cam_axes_lines,
+            }
+        else:
+            fig = self._rt_plot['fig']
+            ax = self._rt_plot['ax']
+
+        # --- Update board lines ---
+        line_idx = 0
+        for ri in range(b):
+            line = self._rt_plot['grid_lines'][line_idx]
+            line.set_data_3d(grid[ri, :, 0], grid[ri, :, 1], grid[ri, :, 2])
+            line_idx += 1
+        for ci in range(a):
+            line = self._rt_plot['grid_lines'][line_idx]
+            line.set_data_3d(grid[:, ci, 0], grid[:, ci, 1], grid[:, ci, 2])
+            line_idx += 1
+
+        # --- Update scatters ---
+        self._rt_plot['board_scatter']._offsets3d = (pts[:, 0], pts[:, 1], pts[:, 2])
+        self._rt_plot['cam_scatter']._offsets3d = (np.array([cam[0]]), np.array([cam[1]]), np.array([cam[2]]))
+
+        # --- Update camera label text ---
+        old_text = self._rt_plot.get('cam_text', None)
+        if old_text is not None:
+            old_text.remove()
+        self._rt_plot['cam_text'] = ax.text(cam[0], cam[1], cam[2], ' camera', color='tab:red')
+
+        # --- Update camera orientation axes ---
+        if rvec is not None:
+            rot_mtx, _ = cv2.Rodrigues(np.asarray(rvec, dtype=np.float32))
+            cam_axes = rot_mtx.T
+            labels = ['Xc', 'Yc', 'Zc']
+            for i in range(3):
+                line, txt = self._rt_plot['cam_axes_lines'][i]
+                end = cam + cam_axes[:, i] * axis_length
+                line.set_data_3d([cam[0], end[0]], [cam[1], end[1]], [cam[2], end[2]])
+                txt.remove()
+                color = ['r', 'g', 'b'][i]
+                self._rt_plot['cam_axes_lines'][i] = (line, ax.text(end[0], end[1], end[2], labels[i], color=color))
+
+        # Keep axes balanced around board + camera.
+        x_vals = np.concatenate([pts[:, 0], np.array([cam[0]], dtype=np.float32)])
+        y_vals = np.concatenate([pts[:, 1], np.array([cam[1]], dtype=np.float32)])
+        z_vals = np.concatenate([pts[:, 2], np.array([cam[2]], dtype=np.float32)])
+        max_range = max(np.ptp(x_vals), np.ptp(y_vals), np.ptp(z_vals), axis_length) * 0.6
+        center = np.array([np.mean(x_vals), np.mean(y_vals), np.mean(z_vals)], dtype=np.float32)
+        ax.set_xlim(center[0] - max_range, center[0] + max_range)
+        ax.set_ylim(center[1] - max_range, center[1] + max_range)
+        ax.set_zlim(center[2] - max_range, center[2] + max_range)
+
+        if show:
+            fig.canvas.draw_idle()
+            fig.canvas.flush_events()
+            plt.pause(pause_sec)
+
+        return fig, ax
+
+    def detect_estimate_and_render(self, img, camera_matrix, dist_coeffs=None, axis_length=None, show=True):
+        """Detect chessboard, estimate camera 3D pose, and render board + camera.
+
+        Returns:
+            dict pose result from estimate_camera_pose(), plus optional 'figure' and 'axes'.
+        """
+        pose = self.estimate_camera_pose(img, camera_matrix, dist_coeffs)
+        if not pose.get('success', False):
+            log.info(f"Pose estimation failed: {pose.get('reason', 'unknown reason')}")
+            return pose
+
+        fig, ax = self.render_board_and_camera(
+            object_points=pose['object_points'],
+            camera_position=pose['camera_position'],
+            rvec=pose['rvec'],
+            axis_length=axis_length,
+            show=show,
+        )
+        pose['figure'] = fig
+        pose['axes'] = ax
+        return pose
+
+    def draw_corners(self, img, corners):
+        # draw corners on the image
+        if len(corners) == 0:
+            return img
+        
+        img_drawn = cv2.drawChessboardCorners(img, self.pattern_size, corners, True)
+        return img_drawn
+
+    def show_corners(self, img, corners):
+        # show results
+        if not self.debug_on:
+            return False
+        
+        img = self.draw_corners(img, corners)
+        
+        cv2.imshow('Image with Corners',img)
+                    
+        #press q if you want to end the loop
+        ret = cv2.waitKey(0) & 0xFF == ord('q')
+        return ret
+
+
+# ----------------------
+#%% Tests
+class TestObjectChessboard():
+
+    def __init__(self):
+        "init test"
+        self.s = ObjectChessboard()
+        log.info('TestObjectChessboard tests started')
+
+    def assertTrue(self, isOk = True):
+        "assert true"
+        if not isOk:
+            raise AssertionError("Test failed")
+
+    def assertFalse(self, isOk = False):
+        "assert false"
+        if isOk:
+            raise AssertionError("Test failed")        
+
+
+    def test_object_detect_single_image(self):
+        """
+        Function that loads images, does measurement and shows final result
+        """
+        file_path           = r"data\calib_robot_0006.jpg"
+        img                 = cv2.imread(file_path)
+        self.assertTrue(img is not None)
+
+        img_points         = self.s.get_image_points(img)
+        self.assertTrue(len(img_points) > 0)
+        
+        isOk                = self.s.show_corners(img, img_points)
+        self.assertTrue(isOk)
+
+    def test_render_board_and_camera(self):
+        """Test chessboard pose estimation and 3D rendering on a calibration image."""
+        file_path = r"C:\Work\Code\robot_vision\pose6d\data\camera_calibration\calib_robot_0001.jpg"
+        img = cv2.imread(file_path)
+        self.assertTrue(img is not None)
+
+        h, w = img.shape[:2]
+        # Approximate intrinsics for test robustness; replace with calibrated values when available.
+        fx = 600.0
+        fy = 600.0
+        cx = w / 2.0
+        cy = h / 2.0
+        camera_matrix = np.array([
+            [fx, 0.0, cx],
+            [0.0, fy, cy],
+            [0.0, 0.0, 1.0],
+        ], dtype=np.float32)
+        dist_coeffs = np.zeros((5, 1), dtype=np.float32)
+
+        pose = self.s.estimate_camera_pose(img, camera_matrix, dist_coeffs)
+        self.assertTrue(pose.get('success', False))
+
+        fig, ax = self.s.render_board_and_camera(
+            object_points=pose['object_points'],
+            camera_position=pose['camera_position'],
+            rvec=pose.get('rvec', None),
+            show=True,
+        )
+
+        self.assertTrue(fig is not None)
+        self.assertTrue(ax is not None)
+        plt.close(fig)
+
+    def test_object_detect_video(self):
+        """
+        Function that does processing using video file
+        """
+        object_path         = r"D:\RobotAI\Customers\Plasel\Objects\plasel_gray-01"
+        file_path           = r"D:\RobotAI\Customers\Plasel\Objects\plasel_gray-01\videos\object_0002.mp4"
+
+        isOk                = self.s.pose6d.ObjectSelectSingle(object_path)
+        self.assertTrue(isOk)
+        
+        isOk                = self.s.pose6d.TestRunFile(file_path)
+        self.assertTrue(isOk)
+
+    def test_rs_camera_connection(self):
+        """
+        Function that connects to RS camera and shows live stream,
+        chessboard detection and real-time 3D pose rendering.
+        """
+        import importlib.util
+        #from opencv_realsense_camera import RealSense
+
+        cam_module_path = r"C:\Work\Code\Fast-FoundationStereo\scripts\opencv_realsense_camera.py"
+        self.assertTrue(os.path.isfile(cam_module_path))
+
+        spec = importlib.util.spec_from_file_location("opencv_realsense_camera", cam_module_path)
+        rs_mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(rs_mod)
+        RealSense = rs_mod.RealSense
+
+        cap = RealSense(frame_size=(1280, 720), use_ir=False, mode = 'd16')
+        self.assertTrue(cap is not None)
+
+        # intr = cap.config.resolve(rs_mod.rs.pipeline_wrapper(cap.pipeline)) \
+        #     .get_stream(rs_mod.rs.infrared, 1).as_video_stream_profile().get_intrinsics()
+        # intr = cap.intr #get_camera_intrinsics(1)
+        # camera_matrix = np.array([
+        #     [intr.fx, 0.0, intr.ppx],
+        #     [0.0, intr.fy, intr.ppy],
+        #     [0.0, 0.0, 1.0],
+        # ], dtype=np.float32)
+
+        h, w = 720, 1280
+        fx = 600.0
+        fy = 600.0
+        cx = w / 2.0
+        cy = h / 2.0
+        camera_matrix = np.array([
+            [fx, 0.0, cx],
+            [0.0, fy, cy],
+            [0.0, 0.0, 1.0],
+        ], dtype=np.float32)        
+        #dist_coeffs = np.array(intr.coeffs, dtype=np.float32).reshape(-1, 1)
+        dist_coeffs = np.zeros((5, 1), dtype=np.float32)
+
+        # Use a finite loop for test-style behavior; press 'q' in OpenCV window to exit early.
+        try:
+            for _ in range(300):
+                ret, _ = cap.read()
+                self.assertTrue(ret)
+
+                # Use left IR image for chessboard detection.
+                ir_left = cap.img_l
+                pose = self.s.estimate_camera_pose(ir_left, camera_matrix, dist_coeffs)
+                if pose.get('success', False):
+                    self.s.render_board_and_camera_real_time(
+                        object_points=pose['object_points'],
+                        camera_position=pose['camera_position'],
+                        rvec=pose.get('rvec', None),
+                        show=True,
+                        pause_sec=0.001,
+                    )
+
+                # Keep OpenCV feed visible and allow keyboard control ('q' to break).
+                should_exit = cap.show_image(cap.img_l)
+                if should_exit:
+                    break
+        finally:
+            cap.close()
+            cv2.destroyAllWindows()
+           
+    def test_get_grid_in_camera_coordinates(self):
+        """Test projecting a 3D grid onto the camera image using the estimated pose."""
+        file_path = r"C:\Work\Code\robot_vision\pose6d\data\camera_calibration\calib_robot_0001.jpg"
+        img = cv2.imread(file_path)
+        self.assertTrue(img is not None)
+
+        h, w = img.shape[:2]
+        fx = 600.0
+        fy = 600.0
+        cx = w / 2.0
+        cy = h / 2.0
+        camera_matrix = np.array([
+            [fx, 0.0, cx],
+            [0.0, fy, cy],
+            [0.0, 0.0, 1.0],
+        ], dtype=np.float32)
+        dist_coeffs = np.zeros((5, 1), dtype=np.float32)
+
+        pose = self.s.estimate_camera_pose(img, camera_matrix, dist_coeffs)
+        self.assertTrue(pose.get('success', False))
+
+        XYZ, projected_points = self.s.get_grid_in_camera_coordinates(
+            rvec=pose['rvec'],
+            tvec=pose['tvec'],
+            camera_matrix=camera_matrix,
+            dist_coeffs=dist_coeffs,
+        )
+
+        # Draw projected grid points on the image.
+        for pt in projected_points:
+            cv2.circle(img, (int(pt[0]), int(pt[1])), radius=1, color=(0, 255, 0), thickness=-1)
+
+        cv2.imshow('Projected Grid', img)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+
+# ----------------------------------------------------
+#%% Run Test
+def RunTest():
+    "Run all tests in the MainApp class"
+    tst = TestObjectChessboard()
+
+    # tst.test_object_detect_single_image()  # interactive (waits for key press)
+    #tst.test_render_board_and_camera()
+    tst.test_rs_camera_connection()
+    #tst.test_get_grid_in_camera_coordinates()
+
+    
+
+#%% Run ALL
+if __name__ == '__main__':
+    #print(__doc__)
+    RunTest()
diff --git a/scripts/opencv_realsense_camera.py b/scripts/opencv_realsense_camera.py
new file mode 100644
index 0000000..e1e1e9d
--- /dev/null
+++ b/scripts/opencv_realsense_camera.py
@@ -0,0 +1,1023 @@
+
+'''
+OpenCV like wrapper for Real Sense Camera
+
+==================
+
+Allows to read, display store video and images of RGB - Depth combinations in different formats.  
+Can extract left and right IR images.
+Aligns RGB and Depth data.
+Can save data as mp4 or single images.
+Can control laser power, exposure and other parameters.
+Mouse point clicks will show depth value at the point
+
+Usage:
+    python opencv_realsense_camera.py 
+    will run the camera and open the image window with live stream.
+    Use keys outlines in test() function to switch different modes
+
+    Press 'd' to show different display optins
+    Press 's' to save the current image
+    Press 't' to save the left and right images in separate files
+    Press 'r' to start recording and one more time 'r' to stop video recording
+    Press 'e' to change exposure (1-9)
+    Press 'g' to change gain (1-9)
+    Press 'q' to exit
+    
+                                        
+Environment : 
+    C:\\Users\\udubin\\Documents\\Envs\\barcode
+
+Install : 
+    pip install pyrealsense2-2.56.0.7981-cp310-cp310-win_amd64.whl
+
+'''
+import os
+#import profile
+#from flask import ctx
+import pyrealsense2 as rs
+import numpy as np
+import cv2 as cv
+import time
+
+
+#%% Draw
+
+def draw_str(dst, target, s):
+    x, y = target
+    dst = cv.putText(dst, s, (x+1, y+1), cv.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv.LINE_AA)
+    dst = cv.putText(dst, s, (x, y), cv.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv.LINE_AA)
+    return dst
+
+#%% Helper
+DS5_product_ids = ["0AD1", "0AD2", "0AD3", "0AD4", "0AD5", "0AF6", "0AFE", "0AFF", "0B00", "0B01", "0B03", "0B07", "0B3A", "0B5C", "0B5B"]
+
+def find_device_that_supports_advanced_mode() :
+    ctx = rs.context()
+    ds5_dev = rs.device()
+    devices = ctx.query_devices()
+    for dev in devices:
+        if dev.supports(rs.camera_info.product_id) and str(dev.get_info(rs.camera_info.product_id)) in DS5_product_ids:
+            if dev.supports(rs.camera_info.name):
+                print("Found device that supports advanced mode:", dev.get_info(rs.camera_info.name))
+            return dev
+    raise Exception("No D400 product line device that supports advanced mode was found")
+
+
+#%% Main
+class RealSense(object):
+    def __init__(self,  mode = 'rgb', frame_size = None, use_ir = None, use_advanced = None):
+        
+        self.frame_size     = (1280, 720) if frame_size is None else frame_size  #frame_size #(1280, 720)#(640,480)
+        self.display_mode   = 'rgb' if mode is None else mode 
+        self.use_ir         = True if use_ir is None else use_ir
+        self.use_projector  = False
+        self.use_advanced   = False if use_advanced is None else use_advanced  # advanced mode is enabled
+        self.control_mode   = 'no controls' 
+        self.DISPLAY_MODES  = ['rgb','irl','irr','rgd','ddd','d16','gdd','scl','sc2','iid','ii2','iig','iir','gd','ggd','dep']
+
+        self.fig_title      = 'RS Camera (d,e,g,f,p,o,g,m,s,t,r,k: q - to exit)'
+        self.mouse_x        = 0
+        self.mouse_y        = 0
+        self.mouse_value    = -1
+
+
+        # noise measurement
+        self.img_int_mean   = None
+        self.img_int_std    = None
+        self.use_measure    = False   
+        self.rect           = None   
+        self.depth_scale    = 0.001      # scale depth to meter
+
+        # Configure depth and color streams
+        self.pipeline       = rs.pipeline()
+        self.config         = rs.config()
+
+        #  Get device product line for setting a supporting resolution
+        device_name         = self.get_device_name()
+        self.set_frame_size(device_name)
+
+        # set visual preset
+        #self.set_visual_preset('RS2_RS400_VISUAL_PRESET_CUSTOM')
+        #self.set_visual_preset('RS2_RS400_VISUAL_PRESET_HIGH_ACCURACY')
+
+        # scale factor
+        self.depth_scale    = self.get_depth_scale_factor()
+
+        # start streaming
+        self.set_start_streaming()
+
+        # set advanced mode - disparity in pixels
+        self.set_advanced_mode()
+        self.advance_mode = None
+
+        # # turn emitter on-off
+        self.has_projector       = device_name.find('D455') > 0  or device_name.find('D555') > 0 or device_name.find('D435') > 0 or device_name.find('D585') > 0
+        self.switch_projector()   
+
+        # if the safety camera - to modify exposure, first change safety mode to service
+        self.set_safety_mode(device_name)
+        # if device_name.find('D585') > 0:
+        #     ctx                 = rs.context()
+        #     dev                 = ctx.query_devices().front()
+        #     safety_sensor       = dev.first_safety_sensor()
+        #     safety_sensor.set_option(rs.option.safety_mode, rs.safety_mode.service)
+        #self.intr             = self.get_camera_intrinsics(3) # depth intrinsics
+
+
+
+        # Depth controls to defaults
+        #self.set_exposure()
+        #self.set_gain()
+        #self.set_laser_power()        
+
+        # Create an align object
+        # rs.align allows us to perform alignment of depth frames to others frames
+        # The "align_to" is the stream type to which we plan to align depth frames.
+        # align_to        = rs.stream.color
+        # self.align      = rs.align(align_to)
+        align_to        = rs.stream.depth
+        self.align      = rs.align(align_to)
+
+        # output support
+        self.output_range = [0,255]  # extract range to map 16 bit to 8
+
+        # record video
+        self.vout       = None
+        self.record_on  = False # toggle recording
+        self.count      = 0
+
+        # save images
+        self.img_d = self.img_c = self.img_l = self.img_r = []
+
+    def render(self, dst):
+        pass
+
+    def get_device_name(self):
+        "find device name"
+        device_name         = ''
+        pipeline_wrapper    = rs.pipeline_wrapper(self.pipeline)
+        try: # 545
+            pipeline_profile    = self.config.resolve(pipeline_wrapper)
+            device              = pipeline_profile.get_device()
+            device_product_line = str(device.get_info(rs.camera_info.product_line))
+            device_name         = device.get_info(rs.camera_info.name)
+            print('Device name : ', device_name)
+            print('Device product line : ', device_product_line)
+        except Exception as e:
+            print('Real Sense new version - possibly will require a new driver version')
+            print(e)
+
+        return device_name
+    
+    def get_depth_scale_factor(self):
+        "get depth scale factor"
+
+        pipeline_wrapper    = rs.pipeline_wrapper(self.pipeline)
+        try: 
+            pipeline_profile    = self.config.resolve(pipeline_wrapper)
+            depth_sensor        = pipeline_profile.get_device().first_depth_sensor()
+            depth_scale         = depth_sensor.get_depth_scale()
+
+            print(f'Device depth scale factor : {depth_scale}')
+        except Exception as e:
+            print('Real Sense new version - possibly will require a new driver version')
+            print(e)
+            depth_scale         = 0.001
+
+        return depth_scale    
+    
+    def set_safety_mode(self, device_name):
+        if device_name.find('D585') > 0:
+            ctx                 = rs.context()
+            dev                 = ctx.query_devices().front()
+            safety_sensor       = dev.first_safety_sensor()
+            safety_sensor.set_option(rs.option.safety_mode, rs.safety_mode.service)
+
+    def set_visual_preset(self, preset_name = 'RS2_RS400_VISUAL_PRESET_HIGH_ACCURACY'):
+        "set visual preset"
+        VISUAL_PRESET = {'RS2_RS400_VISUAL_PRESET_CUSTOM': 0,
+                        'RS2_RS400_VISUAL_PRESET_DEFAULT': 1,
+                        'RS2_RS400_VISUAL_PRESET_HAND': 2,
+                        'RS2_RS400_VISUAL_PRESET_HIGH_ACCURACY': 3,
+                        'RS2_RS400_VISUAL_PRESET_HIGH_DENSITY': 4,
+                        'RS2_RS400_VISUAL_PRESET_MEDIUM_DENSITY': 5,
+                        'RS2_RS400_VISUAL_PRESET_REMOVE_IR_PATTERN': 6,
+                        'RS2_RS400_VISUAL_PRESET_COUNT': 7
+        }
+        ctx = rs.context()
+        dev = ctx.query_devices().front()
+        depth_sensor = dev.first_depth_sensor()
+        options = depth_sensor.get_supported_options()
+        #print(options)
+        vp      = depth_sensor.get_option(rs.option.visual_preset)
+        #print(vp)
+        depth_sensor.set_option(rs.option.visual_preset, VISUAL_PRESET[preset_name])
+        vp      = depth_sensor.get_option(rs.option.visual_preset)
+        print(vp)
+
+    def set_frame_size(self, device_name):
+        "device dependent data"
+        if  device_name.find('D585') > 0 or device_name.find('D555') > 0:
+            print(f'Configured for {device_name}')     
+            self.frame_size = (1280, 720)  
+
+        print(f'Frame size  : {self.frame_size[0]} x {self.frame_size[1]}')
+
+    def set_start_streaming(self):
+        "start stremaing"
+        self.config.enable_stream(rs.stream.depth, self.frame_size[0], self.frame_size[1], rs.format.z16, 30)
+        self.config.enable_stream(rs.stream.color, self.frame_size[0], self.frame_size[1], rs.format.bgr8, 30)
+        
+        if self.use_ir:
+            self.config.enable_stream(rs.stream.infrared, 1)
+            self.config.enable_stream(rs.stream.infrared, 2)
+            print('IR is enabled')
+        else:
+            print('IR is disabled')                 
+
+
+        # Start streaming
+        profile             = self.pipeline.start(self.config)
+
+        # Getting the depth sensor's depth scale (see rs-align example for explanation)
+        self.depth_sensor   = profile.get_device().first_depth_sensor()
+        #depth_scale         = self.depth_sensor.get_depth_scale()
+        #print("Depth Scale is: " , depth_scale) 
+        # 
+    def set_disparity_mode(self):
+        "enable disparity mode output"
+
+        try:
+            dev         = find_device_that_supports_advanced_mode()
+            advnc_mode  = rs.rs400_advanced_mode(dev)
+            print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
+
+            # Loop until we successfully enable advanced mode
+            while not advnc_mode.is_enabled():
+                print("Trying to enable advanced mode...")
+                advnc_mode.toggle_advanced_mode(True)
+                # At this point the device will disconnect and re-connect.
+                print("Sleeping for 5 seconds...")
+                time.sleep(5)
+                # The 'dev' object will become invalid and we need to initialize it again
+                dev = find_device_that_supports_advanced_mode()
+                advnc_mode = rs.rs400_advanced_mode(dev)
+                print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
+
+            # Get each control's current value
+            print("Depth Control: \n", advnc_mode.get_depth_control())
+            print("RSM: \n", advnc_mode.get_rsm())
+            print("RAU Support Vector Control: \n", advnc_mode.get_rau_support_vector_control())
+            print("Color Control: \n", advnc_mode.get_color_control())
+            print("RAU Thresholds Control: \n", advnc_mode.get_rau_thresholds_control())
+            print("SLO Color Thresholds Control: \n", advnc_mode.get_slo_color_thresholds_control())
+            print("SLO Penalty Control: \n", advnc_mode.get_slo_penalty_control())
+            print("HDAD: \n", advnc_mode.get_hdad())
+            print("Color Correction: \n", advnc_mode.get_color_correction())
+            print("Depth Table: \n", advnc_mode.get_depth_table())
+            print("Auto Exposure Control: \n", advnc_mode.get_ae_control())
+            print("Census: \n", advnc_mode.get_census())
+
+        except Exception as e:
+            print(e)
+            return   
+
+        #UD - enable disparity mode output
+        depth_table = advnc_mode.get_depth_table()
+        depth_table.disparityMode = 1   # 0-depth,1-disparity
+        advnc_mode.set_depth_table(depth_table)
+        print("Depth Table: \n", advnc_mode.get_depth_table()) # confirm the settings               
+
+    def set_advanced_mode(self):
+        "enable camera advanced mode"
+        if not self.use_advanced:
+            return
+        
+        try:
+            dev         = find_device_that_supports_advanced_mode()
+            advnc_mode  = rs.rs400_advanced_mode(dev)
+            print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
+
+            # Loop until we successfully enable advanced mode
+            while not advnc_mode.is_enabled():
+                print("Trying to enable advanced mode...")
+                advnc_mode.toggle_advanced_mode(True)
+                # At this point the device will disconnect and re-connect.
+                print("Sleeping for 5 seconds...")
+                time.sleep(5)
+                # The 'dev' object will become invalid and we need to initialize it again
+                dev = find_device_that_supports_advanced_mode()
+                advnc_mode = rs.rs400_advanced_mode(dev)
+                print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
+
+            # Get each control's current value
+            print("Depth Control: \n", advnc_mode.get_depth_control())
+            print("RSM: \n", advnc_mode.get_rsm())
+            print("RAU Support Vector Control: \n", advnc_mode.get_rau_support_vector_control())
+            print("Color Control: \n", advnc_mode.get_color_control())
+            print("RAU Thresholds Control: \n", advnc_mode.get_rau_thresholds_control())
+            print("SLO Color Thresholds Control: \n", advnc_mode.get_slo_color_thresholds_control())
+            print("SLO Penalty Control: \n", advnc_mode.get_slo_penalty_control())
+            print("HDAD: \n", advnc_mode.get_hdad())
+            print("Color Correction: \n", advnc_mode.get_color_correction())
+            print("Depth Table: \n", advnc_mode.get_depth_table())
+            print("Auto Exposure Control: \n", advnc_mode.get_ae_control())
+            print("Census: \n", advnc_mode.get_census())
+
+        except Exception as e:
+            print(e)
+            return   
+
+        #UD - enable disparity mode output
+        depth_table = advnc_mode.get_depth_table()
+        depth_table.disparityMode = 1   # 0-depth,1-disparity
+        advnc_mode.set_depth_table(depth_table)
+        print("Depth Table: \n", advnc_mode.get_depth_table()) # confirm the settings
+
+
+        # #UD - Simulator settings
+        hdad = advnc_mode.get_hdad()
+        hdad.ignoreSAD = 1
+        advnc_mode.set_hdad(hdad)
+        print("HDAD: \n", advnc_mode.get_hdad())
+
+        color_cntrl = advnc_mode.get_color_control()
+        color_cntrl.disableSADColor = 1
+        color_cntrl.disableRAUColor = 1
+        advnc_mode.set_color_control(color_cntrl)
+        print("Color Correction: \n", advnc_mode.get_color_control())
+
+        # no difference
+        # rau_cntrl = advnc_mode.get_rau_support_vector_control()
+        # rau_cntrl.minWEsum = 1
+        # rau_cntrl.minNSsum = 1
+        # advnc_mode.set_rau_support_vector_control(rau_cntrl)
+        # print("RAU Support Vector Control: \n", advnc_mode.get_color_control())        
+
+        rsm = advnc_mode.get_rsm()
+        rsm.rsmBypass = 1   
+        advnc_mode.set_rsm(rsm)
+        print("RSM: \n", advnc_mode.get_rsm())
+
+        depth_cntrl = advnc_mode.get_depth_control()
+        depth_cntrl.scoreThreshA = 0
+        depth_cntrl.deepSeaSecondPeakThreshold = 50
+        advnc_mode.set_depth_control(depth_cntrl)
+        print("Depth Control: \n", advnc_mode.get_depth_control())
+
+        slo_cntrl = advnc_mode.get_slo_penalty_control()
+        slo_cntrl.sloK1Penalty = 400
+        slo_cntrl.sloK2Penalty = 511
+        advnc_mode.set_slo_penalty_control(slo_cntrl)
+        print("SLO Penalty Control: \n", advnc_mode.get_slo_penalty_control())
+
+        #self.depth_sensor = dev
+        #return dev    
+
+    def load_preset_from_file(self, filename = 'depth_preset.json'):
+        "load preset from a file "
+        #json_config_string = filename
+        if not os.path.isfile(filename):
+            print(f'File {filename} not found')
+            return
+        
+        with open(filename) as f:
+            json_config_string = f.read().strip()
+
+        # Loading json file onto device
+        dev = find_device_that_supports_advanced_mode()
+        advanced_mode = rs.rs400_advanced_mode(dev)
+        print("Advanced mode is", "enabled" if advanced_mode.is_enabled() else "disabled")
+        try:
+            advanced_mode.load_json(json_config_string)
+            print('Preset loaded from file : ', filename)
+        except Exception as e:
+            print(e)
+        return
+
+
+
+
+    def set_exposure(self, exposure_value = None, exposure_percent = None):
+        "set exposure to the correct values. use exposure_percent - to set exposure as a percentage"
+
+        if not self.depth_sensor.supports(rs.option.exposure):
+            print('Exposure has no support')
+            return
+        
+        range           = self.depth_sensor.get_option_range(rs.option.exposure)
+        if exposure_percent is None:
+            # old way - set directly
+            exposure_value  = exposure_value if exposure_value is not None else range.default 
+        else:
+            value_range     = 2**(np.linspace(np.log2(range.min), np.log2(range.max/10) , 10))
+            exposure_index  = int(exposure_percent)
+            exposure_value  = value_range[exposure_index]
+
+        exposure_value  = exposure_value if exposure_value > range.min else range.min 
+        exposure_value  = exposure_value if exposure_value < range.max else range.max 
+
+        self.depth_sensor.set_option(rs.option.exposure, int(exposure_value))
+        print(f'Exposure is : {exposure_value}')
+
+    def set_gain(self,gain_value = None):
+        "set gain to the correct values"
+        if self.depth_sensor.supports(rs.option.gain):
+            range           = self.depth_sensor.get_option_range(rs.option.gain)
+            gain_value  = gain_value if gain_value is not None else range.default 
+            gain_value  = gain_value if gain_value > range.min else range.min 
+            gain_value  = gain_value if gain_value < range.max else range.max 
+
+            self.depth_sensor.set_option(rs.option.gain, int(gain_value))
+            print(f'Gain is : {gain_value}')
+        else:
+            print('Gain has no support')    
+
+    def set_output_range(self, range_value = 0):
+        "maps 16 bit to 8"
+        range_value             = range_value * 255
+        self.output_range[0]    = range_value
+        self.output_range[1]    = range_value + 255
+        print(f'Output range is set to min {self.output_range[0]} and max {self.output_range[1]}')  
+
+    def get_baseline(self):
+        "returns camera baseline"
+        B = self.depth_sensor.get_option(rs.option.stereo_baseline)
+        print(f'Baseline is : {B} mm')
+        return B
+    
+    def get_focal_length(self):
+        "intrinsic parameters and returns focal length"
+        pipeline_wrapper    = rs.pipeline_wrapper(self.pipeline)
+        pipeline_profile    = self.config.resolve(pipeline_wrapper)        
+        intr                = pipeline_profile.get_stream(rs.stream.depth).as_video_stream_profile().get_intrinsics()
+        print(f'Intrinsics Fx is : {intr.fx} ')
+        return intr.fx
+    
+    def get_camera_intrinsics(self, stream_type = 1):
+        "intrinsic parameters of the camera"
+        if stream_type == 1:
+            stream_type = rs.stream.depth
+        elif stream_type == 2:
+            stream_type = rs.stream.color
+        elif stream_type == 3:
+            stream_type = rs.stream.infrared                        
+
+        pipeline_wrapper    = rs.pipeline_wrapper(self.pipeline)
+        pipeline_profile    = self.config.resolve(pipeline_wrapper)        
+        intr                = pipeline_profile.get_stream(stream_type).as_video_stream_profile().get_intrinsics()
+        #print(f'Intrinsics Fx is : {intr.fx} ')
+        print(intr)
+        return intr   
+    
+    def get_bf(self):
+        "read baseline and focal length for inverse depth compute"
+        b = self.get_baseline()
+        f = self.get_focal_length()
+        print(f'Total BF is : {b*f} ')
+        return b*f
+    
+    def get_camera_params(self, value_in = 0):
+        "whoch camera params toi show"
+        if value_in == 0:
+            self.get_bf()
+        elif value_in == 1:
+            self.get_camera_intrinsics()
+
+    def set_laser_power(self, laser_power_value = None):
+        "set laser power to the correct values"
+        
+        if self.depth_sensor.supports(rs.option.laser_power):
+            range           = self.depth_sensor.get_option_range(rs.option.laser_power)
+            laser_power_value  = laser_power_value if laser_power_value is not None else range.default 
+            laser_power_value  = laser_power_value if laser_power_value > range.min else range.min 
+            laser_power_value  = laser_power_value if laser_power_value < range.max else range.max 
+
+            self.depth_sensor.set_option(rs.option.laser_power, int(laser_power_value))
+            print(f'Laser power is : {laser_power_value}')
+        else:
+            print('Laser power has no support')       
+
+    def switch_projector(self, projector_on = True):
+        "switch projector on-off"
+        if not self.has_projector:
+            print('Camera is without projector')
+        else:
+            #if self.use_projector is False:
+            #self.depth_sensor.set_option(rs.option.emitter_always_on, projector_on)
+            self.depth_sensor.set_option(rs.option.emitter_enabled, projector_on)
+                
+            time.sleep(0.1) # wait for camera on - off
+            print('Camera projector : %s' %str(self.use_projector)) 
+
+    def switch_disparity(self):
+        "switch disparity on"        
+
+        if self.advance_mode is None:
+            dev         = find_device_that_supports_advanced_mode()
+            advnc_mode  = rs.rs400_advanced_mode(dev)
+            print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
+
+            # Loop until we successfully enable advanced mode
+            while not advnc_mode.is_enabled():
+                print("Trying to enable advanced mode...")
+                advnc_mode.toggle_advanced_mode(True)
+                # At this point the device will disconnect and re-connect.
+                print("Sleeping for 5 seconds...")
+                time.sleep(5)
+                # The 'dev' object will become invalid and we need to initialize it again
+                dev = find_device_that_supports_advanced_mode()
+                advnc_mode = rs.rs400_advanced_mode(dev)
+                print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")  
+
+            self.advance_mode       = advnc_mode 
+
+        depth_table                 = self.advance_mode.get_depth_table()
+        depth_table.disparityMode   = 1 - depth_table.disparityMode  # 0-depth,1-disparity - switch
+        self.advance_mode.set_depth_table(depth_table)
+        print("Depth Table: \n", self.advance_mode.get_depth_table()) # confirm the settings                     
+            
+    def set_display_mode(self, mode = 'rgb'):
+        "changes display mode by umber or by string"
+        
+        if isinstance(mode,int): # integer
+            mode  = mode % len(self.DISPLAY_MODES)
+            mode  = self.DISPLAY_MODES[mode]
+
+        if not(mode in self.DISPLAY_MODES):
+             print(f'Not supported mode = {mode}')
+               
+        self.display_mode = mode  
+        print(f'Current mode {mode}')
+
+    def set_controls(self, value_in = 0):
+        "implements differnt controls according to the selected control mode. Input is an integer from 0-9"
+        if self.control_mode == 'display':
+            self.set_display_mode(value_in)
+
+        elif self.control_mode == 'exposure':
+            self.set_exposure( None, exposure_percent=value_in)
+
+        elif self.control_mode == 'gain':
+            self.set_gain(value_in*10)      
+
+        elif self.control_mode == 'projector':
+            self.use_projector = value_in == 1
+            self.switch_projector(value_in == 1)    
+
+        elif self.control_mode == 'disparity':     
+            self.switch_disparity()   
+
+        elif self.control_mode == 'range':     
+            self.set_output_range(value_in) 
+
+        elif self.control_mode == 'focal':     
+            self.get_camera_params(value_in)                           
+        else:
+            pass        
+
+    def convert_depth_to_disparity(self, img_depth):
+        "from GIL"
+        focal_len           = 175.910019
+        baseline            = 94.773
+        #replacementDepth    = focal_len *  baseline / (RectScaledInfra1.x - (maxLoc.x + RectScaledInfra2.x));
+        img_disparity       = img_depth.copy()
+        valid               = img_depth > 0
+        img_disparity[valid]= focal_len*baseline/img_depth[valid]*32
+        return img_disparity
+
+    def measure_noise(self, img):
+        "makes integration over ROI"
+        x0, y0, x1, y1  = self.rect
+        if len(img.shape) < 3:
+            img_roi         = img[y0:y1,x0:x1].astype(np.float32)
+        else: # protect from rgb display
+            img_roi         = img[y0:y1,x0:x1,0].astype(np.float32)
+
+        if self.img_int_mean is None:
+            self.img_int_mean = img_roi
+            self.img_int_std  = np.zeros_like(img_roi)
+        elif self.img_int_mean.shape[1] != img_roi.shape[1]: # image display is changed
+            self.img_int_mean = None
+            return 0
+        
+        valid_bool        = img_roi > 0
+        #valid_num         = valid_bool.sum()
+        #nr,nc             = img_roi.shape[:2]
+
+        self.img_int_mean += 0.1*(img_roi - self.img_int_mean)
+        self.img_int_std  += 0.1*(np.abs(img_roi - self.img_int_mean) - self.img_int_std)
+
+        err_std_valid      = self.img_int_std.copy()
+        #err_std_valid[~valid_bool]    = 100
+        err_std            = err_std_valid[valid_bool].mean()        
+
+        return err_std
+    
+    # Callback function
+    def track_mouse(self, event, x, y, flags, param):
+        if event == cv.EVENT_MOUSEMOVE:
+            self.mouse_x, self.mouse_y = x, y
+            self.mouse_value = self.img_d[y, x]    
+
+    def create_output_image(self, depth_image, color_image, irl_image, irr_image):
+        "defines the output image"
+
+        if self.display_mode == 'rgb':
+            image_out       = color_image
+        elif self.display_mode == 'irl':
+            image_out       = irl_image
+        elif self.display_mode == 'irr':
+            image_out       = irr_image                      
+        elif self.display_mode == 'ddd':
+            # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.03)
+            depth_colormap  = cv.applyColorMap(depth_scaled, cv.COLORMAP_JET)            
+            image_out       = depth_scaled
+        elif self.display_mode == 'rgd':
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.03)
+            image_out       = np.concatenate((color_image[:,:,:2], depth_scaled[:,:,np.newaxis] ), axis = 2)
+        elif self.display_mode == 'gd':
+            gray_image      = cv.cvtColor(color_image, cv.COLOR_RGB2GRAY)
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.03)
+            image_out       = np.concatenate((gray_image, depth_scaled ), axis = 1)
+        elif self.display_mode == 'ggd':
+            gray_image      = cv.cvtColor(color_image, cv.COLOR_RGB2GRAY)
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.03)
+            image_out       = np.stack((gray_image, gray_image, depth_scaled ), axis = 2)            
+        elif self.display_mode == 'gdd':
+            gray_image      = cv.cvtColor(color_image, cv.COLOR_RGB2GRAY)
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.03)
+            image_out       = np.stack((gray_image, depth_scaled, depth_scaled ), axis = 2) 
+        elif self.display_mode == 'scl':
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.05)
+            image_out       = cv.applyColorMap(depth_scaled, cv.COLORMAP_JET)    
+        elif self.display_mode == 'sc2':
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.1)
+            image_out       = cv.applyColorMap(depth_scaled, cv.COLORMAP_JET)    
+        elif self.display_mode == 'ii2':
+            image_out       = np.concatenate((irl_image, irr_image), axis = 1)  
+        elif self.display_mode == 'iid':
+            #print(f'Depth {depth_image.min()} - {depth_image.max()}')
+            depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.1)
+            image_out       = np.stack((irl_image, irr_image, depth_scaled), axis = 2)  
+            #image_out       = np.concatenate((irl_image, depth_scaled), axis = 1) 
+        elif self.display_mode == 'd16':
+            image_out       = np.stack((irl_image.astype(np.uint16), irr_image.astype(np.uint16), depth_image), axis = 2)              
+        elif self.display_mode == 'iig':
+            image_out       = np.stack((irl_image, irr_image, color_image[:,:,1]), axis = 2)                  
+        elif self.display_mode == 'iir':
+            image_out       = np.stack((irl_image, irr_image, color_image[:,:,0]), axis = 2) 
+            #image_out       = np.concatenate((irl_image, color_image[:,:,0]), axis = 1) 
+        elif self.display_mode == 'dep':
+            image_out       = depth_image.astype(np.float32)
+            image_out       = image_out - self.output_range[0]
+            image_out[image_out < 0]   = 0
+            image_out[image_out > 255] = 255
+
+            
+            #image_out       = depth_image / 32 * 4  # 10 for scaling  
+            #image_out = self.convert_depth_to_disparity(depth_image)             
+        return image_out        
+
+    def read_aligned(self, dst=None):
+        "with frame alignments and color space transformations"
+        #self.use_projector = not self.use_projector # testing
+        w, h                = self.frame_size
+
+        # Wait for a coherent pair of frames: depth and color
+        frames              = self.pipeline.wait_for_frames()
+        # Align the depth frame to color frame
+        aligned_frames      = self.align.process(frames)
+
+        # Get aligned frames
+        depth_frame         = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image
+        color_frame         = aligned_frames.get_color_frame()
+        if not depth_frame or not color_frame:
+            return False, None
+
+        # Convert images to numpy arrays
+        depth_image         = np.asanyarray(depth_frame.get_data())
+        color_image         = np.asanyarray(color_frame.get_data())
+        #color_image = cv.cvtColor(depth_image, cv.COLOR_GRAY2RGB)
+        #depth_image = cv.cvtColor(color_image, cv.COLOR_RGB2GRAY)
+
+        # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
+        depth_scaled        = cv.convertScaleAbs(depth_image, alpha=0.03)
+        depth_colormap      = cv.applyColorMap(depth_scaled, cv.COLORMAP_JET)
+
+        depth_colormap_dim = depth_colormap.shape
+        color_colormap_dim = color_image.shape
+
+        #If depth and color resolutions are different, resize color image to match depth image for display
+        if depth_colormap_dim != color_colormap_dim:
+            raise ValueError('depth and image size missmatch')
+            #color_image = cv.resize(color_image, dsize=(depth_colormap_dim[1], depth_colormap_dim[0]), interpolation=cv.INTER_AREA)
+            #images = np.hstack((resized_color_image, depth_colormap))
+        # else:
+        #     images = np.hstack((color_image, depth_colormap))
+
+        if self.use_ir:
+            ir_left     = aligned_frames.get_infrared_frame(1)
+            irl_image   = np.asanyarray(ir_left.get_data())
+            ir_right    = aligned_frames.get_infrared_frame(2)
+            irr_image   = np.asanyarray(ir_right.get_data())
+        else:
+            #print('Enable IR use at the start. use_ir = True')    
+            irl_image   = color_image[:,:,0]
+            irr_image   = color_image[:,:,1]
+            image_out   = color_image            
+
+        self.img_d, self.img_c, self.img_l, self.img_r = depth_image, color_image, irl_image, irr_image
+        image_out =  self.create_output_image(depth_image, color_image, irl_image, irr_image)                
+        return True, image_out
+
+    def read(self, dst=None):
+        "color and depth are not aligned"
+        w, h = self.frame_size
+
+        # Wait for a coherent pair of frames: depth and color
+        frames = self.pipeline.wait_for_frames()
+        depth_frame = frames.get_depth_frame()
+        color_frame = frames.get_color_frame()
+        if not depth_frame or not color_frame:
+            return False, None
+
+        # Convert images to numpy arrays
+        depth_image     = np.asanyarray(depth_frame.get_data())
+        color_image     = np.asanyarray(color_frame.get_data())
+        #color_image = cv.cvtColor(depth_image, cv.COLOR_GRAY2RGB)
+        #depth_image = cv.cvtColor(color_image, cv.COLOR_RGB2GRAY)
+
+        # convert depth to mm
+        depth_image     = (depth_image * self.depth_scale * 1000).astype(np.uint16)
+
+        # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
+        depth_scaled    = cv.convertScaleAbs(depth_image, alpha=0.03)
+        depth_colormap  = cv.applyColorMap(depth_scaled, cv.COLORMAP_JET)
+
+        depth_colormap_dim = depth_colormap.shape
+        color_colormap_dim = color_image.shape
+
+        if depth_colormap_dim != color_colormap_dim:
+            raise ValueError('depth and image size missmatch')
+            #color_image = cv.resize(color_image, dsize=(depth_colormap_dim[1], depth_colormap_dim[0]), interpolation=cv.INTER_AREA)
+            #images = np.hstack((resized_color_image, depth_colormap))
+        # else:
+        #     images = np.hstack((color_image, depth_colormap))
+
+        if self.use_ir:
+            ir_left     = frames.get_infrared_frame(1)
+            irl_image   = np.asanyarray(ir_left.get_data())
+            ir_right    = frames.get_infrared_frame(2)
+            irr_image   = np.asanyarray(ir_right.get_data())
+        else:
+            #print('Enable IR use at the start. use_ir = True')    
+            irl_image   = color_image[:,:,0]
+            irr_image   = color_image[:,:,1]
+            image_out   = color_image            
+
+        # save if needed
+        self.img_d, self.img_c, self.img_l, self.img_r = depth_image, color_image, irl_image, irr_image
+        image_out =  self.create_output_image(depth_image, color_image, irl_image, irr_image)                
+        return True, image_out        
+
+
+
+    def isOpened(self):
+        "OpenCV compatability"
+        return True
+    
+    def save_image(self, frame, fname = ''):
+        fn = '/home/administrato/videos/image_%s_%03d.png' % (fname, self.count)
+        frame = frame.astype(np.uint16) #cv.cvtColor(frame, cv.CV_16U)
+        cv.imwrite(fn, frame, [cv.IMWRITE_PNG_COMPRESSION, 0])
+        print(fn, 'saved')
+        self.count += 1   
+
+    def save_two_images(self, frame):
+        "saves two differnet files"
+        if len(frame.shape)  < 3:
+            print('Image should have 3 chnnels. Try differnet display options')
+            return
+
+        fl = '.\\imageL_%s_%03d.png' % (self.display_mode, self.count)
+        cv.imwrite(fl, frame[:,:,0])
+        fr = '.\\imageR_%s_%03d.png' % (self.display_mode, self.count)
+        cv.imwrite(fr, frame[:,:,1])
+        print('Saving %s and %s' %(fl,fr))
+        self.count += 1          
+
+    def record_video(self, frame):
+        # record video to a file is switched on
+        if (self.vout is None) and (self.record_on is True):
+            fourcc  = cv.VideoWriter_fourcc(*'mp4v')
+            k       = 0
+            fname   = '.\\video_%s_%03d.mp4' % (self.display_mode,k)
+            while os.path.exists(fname):
+                k      +=1
+                fname   = '.\\video_%s_%03d.mp4' % (self.display_mode,k)
+
+            self.vout     = cv.VideoWriter(fname, fourcc, 20.0, self.frame_size)
+            print('Writing video to file %s' %fname)
+            self.count = 0
+
+        # write frame
+        if (self.vout is not None) and (self.record_on is True):
+            ""
+            if len(frame.shape) < 3:
+                frame = frame[:self.frame_size[1],:self.frame_size[0]]
+                frame = cv.cvtColor(frame, cv.COLOR_GRAY2RGB)
+
+            self.vout.write(frame)
+            self.count += 1  
+            if self.count % 100 == 0:
+                print('Writing frame %s' %str(self.count))
+
+        # record on is switched off
+        if (self.vout is not None) and (self.record_on is False):
+            self.vout.release()
+            self.vout = None
+            print('Video file created')
+
+    def record_release(self):
+        "finish record"         
+        if self.vout is not None:
+            self.vout.release()
+            self.vout = None
+            print('Video file created')
+
+    def show_mouse_info(self, frame):
+        # Prepare the display text
+        if self.mouse_value < 0:
+            return frame
+        coord_text = f"X: {self.mouse_x}, Y: {self.mouse_y}"
+        color_text = f"V: {self.mouse_value}"
+        # Draw a background rectangle for better text visibility
+        cv.rectangle(frame, (self.mouse_x + 10, self.mouse_y - 45), (self.mouse_x + 180, self.mouse_y + 5), (0, 0, 0), -1)
+        # Draw the text on the frame
+        cv.putText(frame, coord_text, (self.mouse_x + 15, self.mouse_y - 25), 
+                    cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        cv.putText(frame, color_text, (self.mouse_x + 15, self.mouse_y - 5), 
+                    cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        # Draw a circle that changes color based on what it's hovering over     
+        #cv.circle(frame, (self.mouse_x, self.mouse_y), 6, (int(self.mouse_value), int(self.mouse_value), int(self.mouse_value)), -1)
+        # Add a white border to the circle so it's visible on dark colors
+        cv.circle(frame, (self.mouse_x, self.mouse_y), 6, (int(255-self.mouse_value), 255, int(255-self.mouse_value)), 1)
+
+        return frame
+
+    def show_controls(self, frame):
+        "show image on opencv window"
+        if self.control_mode == 'display':
+            frame = cv.putText(frame, 'Display (0-RGB, 1,2,3...9-I1+I2)', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,12), 2)
+
+        elif self.control_mode == 'exposure':
+            frame = cv.putText(frame, 'Exposure (1,2,3...9) ', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,200), 2)
+
+        elif self.control_mode == 'gain':
+            frame = cv.putText(frame, 'Gain (1,2,3...9) ', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,200), 2)     
+
+        elif self.control_mode == 'projector':
+            frame = cv.putText(frame, 'Projector (0,1) ', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,200), 2)                    
+
+        elif self.control_mode == 'disparity':
+            frame = cv.putText(frame, 'Disparity Out (0,1) ', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,200), 2)  
+
+        elif self.control_mode == 'range':
+            frame = cv.putText(frame, 'Bit Range Out (0,1,2...9) ', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,200), 2)  
+       
+        elif self.control_mode == 'focal':
+            frame = cv.putText(frame, 'Camera Params (0-BL+F,1-Cam Mtrx+Dist, 2..) ', (10, 30), cv.FONT_HERSHEY_SIMPLEX, 0.9, (200,200,200), 2)  
+        else:
+            pass
+
+        return frame
+
+    def show_measurements(self,frame):
+        "show measurements of the noise"
+        if not self.use_measure:
+            self.img_int_mean = None  # reset when enabled
+            return frame
+        
+        if self.rect is None:         
+            h,w          = frame.shape[0]>>1, frame.shape[1]>>1
+            h2,w2        = h>>2,w>>2
+            self.rect    = [w-h2, h-h2, w+h2, h+h2]        
+                
+        err_std          = self.measure_noise(frame)
+        # show  min and max
+        print(f'Frame min {frame.min()} and max {frame.max()}')
+
+        x0, y0, x1, y1   = self.rect
+        clr = (0, 0, 0) if frame[y0:y1,x0:x1].mean() > 128 else (240,240,240)
+        frame           = cv.rectangle(frame, (x0, y0), (x1, y1), clr, 2)
+        frame           = draw_str(frame,(x0,y0-10),str(err_std))
+        
+        return frame
+
+    def show_image(self, frame):
+        "show image on opencv window"
+        do_exit     = False
+        frame_show  = np.uint8(frame.copy())
+
+        #frame_show  = self.show_controls(frame_show)
+
+        cv.imshow(self.fig_title, frame_show)
+        ch = cv.waitKeyEx(1) & 0xff
+        if ch == ord('q') or ch == 27:
+            do_exit = True 
+        elif ch in np.arange(48,58) : # numbers only
+            self.set_controls(ch - 48)
+        elif ch in np.arange(65,75) : # 2 digit numbers using SHIFT key and keys a,b,c,d,e,f,g
+            self.set_controls(ch - 55)            
+        elif ch == ord('d'): # depth image
+            self.control_mode = 'no controls' if self.control_mode == 'display' else 'display'    
+        elif ch == ord('e'): # exposure control
+            self.control_mode = 'no controls' if self.control_mode == 'exposure' else 'exposure'     
+        elif ch == ord('g'): # exposure control
+            self.control_mode = 'no controls' if self.control_mode == 'gain' else 'gain'    
+        elif ch == ord('p'): 
+            self.control_mode = 'no controls' if self.control_mode == 'projector' else 'projector'  
+        elif ch == ord('o'): 
+            self.control_mode = 'no controls' if self.control_mode == 'range' else 'range'     
+        elif ch == ord('x'): 
+            self.control_mode = 'no controls' if self.control_mode == 'disparity' else 'disparity'        
+        elif ch == ord('f'): 
+            self.control_mode = 'no controls' if self.control_mode == 'focal' else 'focal'                            
+        elif ch == ord('m'):   
+            self.use_measure = not self.use_measure
+            print(f'Noise measurement is {self.use_measure}')
+        elif ch == ord('s'):
+            self.save_image(frame) 
+        elif ch == ord('t'):
+            self.save_two_images(frame) 
+        elif ch == 2490368: # Left: 2424832 Up: 2490368 Right: 2555904 Down: 2621440
+            pass
+        elif ch == ord('a'): # enable advanced mode
+            self.use_advanced = not self.use_advanced
+            self.set_advanced_mode()                                    
+        elif ch == ord('r'):
+            self.record_on = not self.record_on
+            print('Video record %s' %str(self.record_on))
+        elif ch == ord('k'):
+            if self.mouse_value < 0: # enable mouse tracking
+                cv.setMouseCallback(self.fig_title, self.track_mouse)    
+            else:
+                cv.setMouseCallback(self.fig_title, lambda *args : None)    
+                self.mouse_value = -1  # disable mouse tracking                         
+        elif ch != 255:
+            print(f'Unrecognized key {ch} - check your language setttings on the keyboard, must be English.')
+
+        return do_exit
+          
+    def close(self):
+        # stop record
+        self.record_release()
+
+        # Stop streaming
+        self.pipeline.stop()
+        #self.depth_sensor.stop()
+        #self.depth_sensor.close()
+        print('closed')
+
+    def release(self):
+        "opencv compatability"
+        self.close()
+
+    def test_intrinsics(self):
+        "check if left right and depth intrinsics"
+        c1 = self.get_camera_intrinsics(1)
+        c2 = self.get_camera_intrinsics(2)
+        c3 = self.get_camera_intrinsics(3)
+        #get_video_stream_intrinsics
+
+    def test(self):
+        while True:
+            ret, frame = self.read()
+            #ret, frame = self.read_aligned()
+            if ret is False:
+                break
+
+            frame = self.show_mouse_info(frame)
+            frame = self.show_measurements(frame)
+            frame = self.show_controls(frame)
+            ret   = self.show_image(frame)
+            if ret :
+                break  
+
+            # check if record is required
+            self.record_video(frame)   
+
+        if ret is False:
+            print('Failed to read image')
+        else:
+            self.close()
+        cv.destroyAllWindows()
+
+if __name__ == '__main__':
+    cap = RealSense(frame_size=(1280,720))
+    #cap = RealSense(frame_size=(640,360))
+    #cap.test()
+    cap.test_intrinsics()
\ No newline at end of file
diff --git a/scripts/report.py b/scripts/report.py
new file mode 100644
index 0000000..0dfa621
--- /dev/null
+++ b/scripts/report.py
@@ -0,0 +1,522 @@
+"""ReportGenerator — produces all PNG figures, HTML index, and JSON results."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Dict, List
+
+# import matplotlib
+# matplotlib.use("Agg")  # headless — must be before pyplot import
+import matplotlib.pyplot as plt
+import numpy as np
+
+try:
+    from .metrics import (
+        CLOSE_RANGE_THRESHOLD_M,
+        BIN_CENTERS,
+        BIN_LABELS,
+        AggregateStats,
+        BenchmarkResults,
+    )
+except ImportError:
+    import sys as _sys
+    from pathlib import Path as _Path
+    _sys.path.insert(0, str(_Path(__file__).parent))
+    from metrics import (
+        CLOSE_RANGE_THRESHOLD_M,
+        BIN_CENTERS,
+        BIN_LABELS,
+        AggregateStats,
+        BenchmarkResults,
+    )
+
+
+class ReportGenerator:
+    """Generates the full depth quality report in *output_dir*.
+
+    Output structure::
+
+        output_dir/
+          index.html               ← self-contained HTML (relative img srcs)
+          results.json             ← aggregate stats
+          depth_comparison.png     ← side-by-side depth maps
+          error_maps.png           ← |pred − GT| per method
+          coverage_heatmaps.png    ← valid-pixel fraction over all frames
+          distance_error_curve.png ← MAE vs distance bins
+          error_histograms.png     ← per-pixel error distributions
+          summary_table.png        ← statistics table
+          close_range_analysis.png ← < 0.55 m coverage bar + per-frame curve
+          timing_bars.png          ← FPS per method
+    """
+
+    def __init__(
+        self,
+        results: BenchmarkResults,
+        stats: Dict[str, AggregateStats],
+        output_dir: Path,
+    ) -> None:
+        self._r = results
+        self._stats = stats
+        self._out = Path(output_dir)
+        self._out.mkdir(parents=True, exist_ok=True)
+        self._gt = results.ground_truth_name
+        self._non_gt = [n for n in results.method_names if n != self._gt]
+
+    def generate(self) -> None:
+        """Generate all figures, JSON, and HTML index."""
+        fig_paths = [
+            self._fig_depth_comparison(),
+            self._fig_error_maps(),
+            self._fig_coverage_heatmaps(),
+            self._fig_distance_error_curve(),
+            self._fig_error_histograms(),
+            self._fig_summary_table(),
+            self._fig_close_range_analysis(),
+            self._fig_timing_bars(),
+        ]
+        self._write_json()
+        self._write_html([p for p in fig_paths if p])
+        print(f"\nReport written to: {self._out / 'index.html'}")
+
+    # ------------------------------------------------------------------ figures
+
+    def _save(self, fig, name: str) -> str:
+        fig.savefig(self._out / name, dpi=150, bbox_inches="tight")
+        plt.close(fig)
+        return name
+
+    def _empty_fig(self, filename: str, msg: str) -> str:
+        fig, ax = plt.subplots(figsize=(6, 3), dpi=100)
+        ax.axis("off")
+        ax.text(0.5, 0.5, msg, transform=ax.transAxes,
+                ha="center", va="center", fontsize=12, color="gray")
+        return self._save(fig, filename)
+
+    def _depth_cmap(self):
+        cmap = plt.get_cmap("plasma").copy()
+        cmap.set_under("black")
+        return cmap
+
+    def _grid_layout(self, n, max_cols=4):
+        """Return (nrows, ncols) for a grid that fits n items with at most max_cols columns."""
+        ncols = min(n, max_cols)
+        nrows = (n + ncols - 1) // ncols
+        return nrows, ncols
+
+    def _fig_depth_comparison(self) -> str:
+        if not self._r.viz_frames:
+            return self._empty_fig("depth_comparison.png", "No viz frames")
+        vf = self._r.viz_frames[0]
+        names = [n for n in self._r.method_names if n in vf]
+        n = len(names)
+        nrows, ncols = self._grid_layout(n)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 4 * nrows))
+        axes = np.asarray(axes).flatten().tolist()
+        cmap = self._depth_cmap()
+        for i, (ax, name) in enumerate(zip(axes[:n], names)):
+            im = ax.imshow(vf[name], cmap=cmap, vmin=1e-4, vmax=5.0)
+            plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="meters")
+            ax.set_title(self._r.method_labels.get(name, name), fontsize=9, wrap=True)
+            ax.axis("off")
+        for ax in axes[n:]:
+            ax.axis("off")
+        fig.suptitle("Depth Map Comparison (single frame)", fontsize=11, y=1.02)
+        fig.tight_layout()
+        return self._save(fig, "depth_comparison.png")
+
+    def _fig_error_maps(self) -> str:
+        if not self._r.viz_frames or not self._non_gt:
+            return self._empty_fig("error_maps.png", "No comparison methods")
+        vf = self._r.viz_frames[0]
+        gt = vf.get(self._gt)
+        # Include GT itself first (MAE=0 sanity check), then all other methods
+        names = ([self._gt] if self._gt in vf else []) + [n for n in self._non_gt if n in vf]
+        if gt is None or not names:
+            return self._empty_fig("error_maps.png", "Ground truth not available in viz frame")
+        n = len(names)
+        nrows, ncols = self._grid_layout(n)
+        cmap = plt.get_cmap("hot").copy()
+        cmap.set_under("#222222")
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 4 * nrows))
+        axes = np.asarray(axes).flatten().tolist()
+        for ax, name in zip(axes[:n], names):
+            pred = vf[name]
+            valid = (gt > 0) & (pred > 0)
+            err = np.where(valid, np.abs(pred - gt), 0.0).astype(np.float32)
+            im = ax.imshow(err, cmap=cmap, vmin=1e-4, vmax=0.5)
+            plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="|error| (m)")
+            mean_err = float(np.abs(pred[valid] - gt[valid]).mean()) if valid.any() else 0.0
+            label = self._r.method_labels.get(name, name)
+            ax.set_title(f"{label}\nMAE={mean_err:.3f}m", fontsize=9)
+            ax.axis("off")
+        for ax in axes[n:]:
+            ax.axis("off")
+        gt_label = self._r.method_labels.get(self._gt, self._gt)
+        fig.suptitle(f"Absolute Error vs {gt_label}", fontsize=11, y=1.02)
+        fig.tight_layout()
+        return self._save(fig, "error_maps.png")
+
+    def _fig_coverage_heatmaps(self) -> str:
+        names = list(self._r.coverage_maps.keys())
+        n = len(names)
+        if n == 0:
+            return self._empty_fig("coverage_heatmaps.png", "No coverage data")
+        nrows, ncols = self._grid_layout(n)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(3.5 * ncols, 3.5 * nrows))
+        axes = np.asarray(axes).flatten().tolist()
+        for ax, name in zip(axes[:n], names):
+            cov = self._r.coverage_maps[name]
+            im = ax.imshow(cov, cmap="viridis", vmin=0, vmax=1)
+            plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04, label="Valid fraction")
+            mean_cov = float(cov.mean()) * 100.0
+            label = self._r.method_labels.get(name, name)
+            ax.set_title(f"{label}\n{mean_cov:.1f}% mean", fontsize=9)
+            ax.axis("off")
+        for ax in axes[n:]:
+            ax.axis("off")
+        fig.suptitle("Valid Pixel Coverage Heatmap (all frames avg)", fontsize=11, y=1.02)
+        fig.tight_layout()
+        return self._save(fig, "coverage_heatmaps.png")
+
+    def _fig_distance_error_curve(self) -> str:
+        if not self._non_gt:
+            return self._empty_fig("distance_error_curve.png", "No comparison methods")
+        fig, ax = plt.subplots(figsize=(8, 5))
+        for name in self._non_gt:
+            bin_data = self._r.dist_bin_mae.get(name, [])
+            if not bin_data:
+                continue
+            arr = np.array(bin_data)
+            mean_per_bin = np.array([
+                np.nanmean(arr[:, i]) if np.any(~np.isnan(arr[:, i])) else 0.0
+                for i in range(arr.shape[1])
+            ])
+            color = self._r.method_colors.get(name, "#888")
+            label = self._r.method_labels.get(name, name)
+            ax.plot(BIN_CENTERS, mean_per_bin, marker="o", color=color,
+                    label=label, linewidth=2, markersize=7)
+        ax.set_xticks(BIN_CENTERS)
+        ax.set_xticklabels(BIN_LABELS, fontsize=9)
+        ax.set_xlabel("Distance range", fontsize=10)
+        ax.set_ylabel("Mean Absolute Error (m)", fontsize=10)
+        ax.set_title("Depth Error vs Distance", fontsize=12)
+        ax.legend(fontsize=9)
+        ax.grid(alpha=0.3)
+        fig.tight_layout()
+        return self._save(fig, "distance_error_curve.png")
+
+    def _fig_error_histograms(self) -> str:
+        if not self._non_gt or not self._r.viz_frames:
+            return self._empty_fig("error_histograms.png", "No comparison data")
+        names = [n for n in self._non_gt
+                 if any(n in vf and self._gt in vf for vf in self._r.viz_frames)]
+        if not names:
+            return self._empty_fig("error_histograms.png", "No viz data for comparison")
+        n = len(names)
+        nrows, ncols = self._grid_layout(n)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(5 * ncols, 4 * nrows))
+        axes = np.asarray(axes).flatten().tolist()
+        for ax, name in zip(axes, names):
+            errors = []
+            for vf in self._r.viz_frames:
+                if name not in vf or self._gt not in vf:
+                    continue
+                pred, gt = vf[name], vf[self._gt]
+                valid = (gt > 0) & (pred > 0)
+                if valid.any():
+                    errors.extend(np.abs(pred[valid] - gt[valid]).tolist())
+            if not errors:
+                ax.text(0.5, 0.5, "No data", transform=ax.transAxes,
+                        ha="center", va="center", color="gray")
+                continue
+            color = self._r.method_colors.get(name, "#888")
+            label = self._r.method_labels.get(name, name)
+            ax.hist(errors, bins=50, range=(0.0, 1.0), color=color,
+                    alpha=0.8, edgecolor="none")
+            mean_e = float(np.mean(errors))
+            ax.axvline(mean_e, color="red", linestyle="--", linewidth=1.5,
+                       label=f"mean={mean_e:.3f}m")
+            ax.set_xlabel("Absolute error (m)", fontsize=9)
+            ax.set_ylabel("Pixel count", fontsize=9)
+            ax.set_title(label, fontsize=9)
+            ax.legend(fontsize=8)
+        for ax in axes[n:]:
+            ax.axis("off")
+        fig.suptitle("Per-Pixel Error Distribution (vs GT, viz frames)", fontsize=11)
+        fig.tight_layout()
+        return self._save(fig, "error_histograms.png")
+
+    def _fig_summary_table(self) -> str:
+        if not self._stats:
+            return self._empty_fig("summary_table.png", "No stats")
+        cols = ["Method", "MRE* (%)", "MRE (%)", "MAE (m)", "δ1 (%)", "Coverage (%)", "FPS", "GPU %", "GT?"]
+        gt_rows, other_rows = [], []
+        for name, s in self._stats.items():
+            is_gt = (name == self._gt)
+            row = [
+                s.label,
+                "—" if is_gt else f"{s.mre_pen_mean * 100:.1f}",
+                "—" if is_gt else f"{s.mre_mean * 100:.1f}",
+                "—" if is_gt else f"{s.mae_mean:.4f}",
+                "—" if is_gt else f"{s.delta1_mean:.1f}",
+                f"{s.coverage_mean:.1f}",
+                f"{s.fps_mean:.1f}" if s.fps_mean < 999 else "≈30",
+                f"{s.gpu_load_mean:.0f}" if s.gpu_load_mean > 0 else "—",
+                "★ GT" if is_gt else "",
+            ]
+            (gt_rows if is_gt else other_rows).append((name, row))
+        ordered = gt_rows + other_rows
+        cell_text = [r for _, r in ordered]
+        n = len(ordered)
+        fig, ax = plt.subplots(figsize=(13, 1.0 + 0.55 * n))
+        ax.axis("off")
+        table = ax.table(cellText=cell_text, colLabels=cols,
+                         cellLoc="center", loc="center")
+        table.auto_set_font_size(False)
+        table.set_fontsize(9)
+        table.scale(1, 1.6)
+        for (row, col), cell in table.get_celld().items():
+            if row == 0:
+                cell.set_facecolor("#2c3e50")
+                cell.set_text_props(color="white", fontweight="bold")
+            elif row % 2 == 0:
+                cell.set_facecolor("#f7f7f7")
+            cell.set_edgecolor("#cccccc")
+            if row > 0 and cell_text[row - 1][-1] == "★ GT":
+                cell.set_facecolor("#d5f5d5")
+        ax.set_title("Depth Quality Summary", fontsize=12, pad=10, fontweight="bold")
+        fig.tight_layout()
+        return self._save(fig, "summary_table.png")
+
+    def _fig_close_range_analysis(self) -> str:
+        names = list(self._r.method_names)
+        if not names:
+            return self._empty_fig("close_range_analysis.png", "No methods")
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+
+        labels = [self._r.method_labels.get(n, n) for n in names]
+        coverages = [self._stats[n].close_range_coverage if n in self._stats else 0.0
+                     for n in names]
+        colors = [self._r.method_colors.get(n, "#888") for n in names]
+        bars = ax1.bar(labels, coverages, color=colors, alpha=0.85, edgecolor="white")
+        ax1.bar_label(bars, labels=[f"{v:.1f}%" for v in coverages], padding=3, fontsize=7)
+        ax1.set_ylabel(f"Coverage at < {CLOSE_RANGE_THRESHOLD_M}m (%)", fontsize=10)
+        ax1.set_title(f"Close-Range Coverage (< {CLOSE_RANGE_THRESHOLD_M} m)", fontsize=11)
+        ax1.tick_params(axis="x", rotation=45, labelsize=7)
+        ax1.set_xticklabels(labels, ha="right")
+        ax1.set_ylim(0, 115)
+        ax1.grid(axis="y", alpha=0.3)
+
+        for name in names:
+            vals = self._r.close_range_valid.get(name, [])
+            if not vals:
+                continue
+            color = self._r.method_colors.get(name, "#888")
+            label = self._r.method_labels.get(name, name)
+            ax2.plot(range(1, len(vals) + 1), vals, color=color,
+                     label=label, alpha=0.8, linewidth=1.5)
+        ax2.set_xlabel("Frame", fontsize=10)
+        ax2.set_ylabel(f"Coverage at < {CLOSE_RANGE_THRESHOLD_M}m (%)", fontsize=10)
+        ax2.set_title("Close-Range Coverage per Frame", fontsize=11)
+        ax2.legend(fontsize=6, loc="best")
+        ax2.grid(alpha=0.3)
+        ax2.set_ylim(-5, 115)
+
+        fig.suptitle("Close-Range Depth Analysis", fontsize=13, fontweight="bold")
+        fig.tight_layout()
+        return self._save(fig, "close_range_analysis.png")
+
+    def _fig_timing_bars(self) -> str:
+        if not self._stats:
+            return self._empty_fig("timing_bars.png", "No timing data")
+        names = list(self._stats.keys())
+        labels = [self._stats[n].label for n in names]
+        fps_vals = [min(self._stats[n].fps_mean, 200.0) for n in names]
+        colors = [self._stats[n].color for n in names]
+        fig, ax = plt.subplots(figsize=(8, 1.0 + 0.6 * len(names)))
+        bars = ax.barh(labels, fps_vals, color=colors, alpha=0.85, edgecolor="white")
+        ax.bar_label(bars, labels=[f"{f:.1f}" for f in fps_vals], padding=4, fontsize=9)
+        ax.axvline(30, color="gray", linestyle="--", alpha=0.5, label="30 FPS target")
+        ax.set_xlabel("Throughput (FPS)", fontsize=10)
+        ax.set_title("Processing Speed by Method", fontsize=11)
+        ax.legend(fontsize=8)
+        ax.grid(axis="x", alpha=0.3)
+        fig.tight_layout()
+        return self._save(fig, "timing_bars.png")
+
+    # ------------------------------------------------------------------ JSON / HTML
+
+    def _write_json(self) -> None:
+        data = {
+            "source": self._r.source,
+            "n_frames": self._r.n_frames,
+            "resolution": f"{self._r.width}x{self._r.height}",
+            "ground_truth": self._r.ground_truth_name,
+            "distance_bins": BIN_LABELS,
+            "model_configs": self._r.method_configs,
+            "methods": {
+                name: {
+                    "label": s.label,
+                    "mae_mean": s.mae_mean,
+                    "mae_std": s.mae_std,
+                    "rmse_mean": s.rmse_mean,
+                    "mre_mean": s.mre_mean,
+                    "delta1_mean": s.delta1_mean,
+                    "coverage_mean": s.coverage_mean,
+                    "fps_mean": s.fps_mean if s.fps_mean < 1e6 else -1,
+                    "time_ms_mean": s.time_ms_mean,
+                    "close_range_coverage": s.close_range_coverage,
+                    "gpu_load_mean": s.gpu_load_mean,
+                    "is_ground_truth": (name == self._gt),
+                }
+                for name, s in self._stats.items()
+            },
+        }
+        (self._out / "results.json").write_text(json.dumps(data, indent=2))
+
+    def _write_html(self, fig_paths: List[str]) -> None:
+        import datetime
+        ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        method_list = ", ".join(
+            self._r.method_labels.get(n, n) for n in self._r.method_names
+        )
+        meta_rows = (
+            f"<tr><td><b>Source</b></td><td>{self._r.source}</td></tr>"
+            f"<tr><td><b>Frames</b></td><td>{self._r.n_frames}</td></tr>"
+            f"<tr><td><b>Resolution</b></td><td>{self._r.width}×{self._r.height}</td></tr>"
+            f"<tr><td><b>Ground truth</b></td>"
+            f"<td>{self._r.method_labels.get(self._gt, self._gt)}</td></tr>"
+            f"<tr><td><b>Methods evaluated</b></td><td>{method_list}</td></tr>"
+            f"<tr><td><b>Generated</b></td><td>{ts}</td></tr>"
+        )
+        # Model configuration details for NN methods
+        config_html = ""
+        if self._r.method_configs:
+            config_html = '\n    <div class="section">\n      <h2>Model Configuration</h2>\n      <table style="border-collapse:collapse;width:100%;font-size:.9em;">'
+            config_html += '\n        <tr style="background:#2c3e50;color:white;font-weight:bold;"><td style="padding:6px 12px;">Method</td><td style="padding:6px 12px;">max_disp</td><td style="padding:6px 12px;">valid_iters</td><td style="padding:6px 12px;">Engine Resolution</td><td style="padding:6px 12px;">Engine Dir</td></tr>'
+            for name, cfg in self._r.method_configs.items():
+                label = self._r.method_labels.get(name, name)
+                md = cfg.get("max_disp", "—")
+                vi = cfg.get("valid_iters", "—")
+                res = cfg.get("engine_resolution", "—")
+                edir = cfg.get("engine_dir", "—")
+                config_html += f'\n        <tr><td style="padding:6px 12px;">{label}</td><td style="padding:6px 12px;">{md}</td><td style="padding:6px 12px;">{vi}</td><td style="padding:6px 12px;">{res}</td><td style="padding:6px 12px;font-size:.8em;">{edir}</td></tr>'
+            config_html += '\n      </table>\n    </div>'
+        captions = {
+            "depth_comparison.png":
+                "Side-by-side depth maps from a single representative frame. Invalid pixels are black.",
+            "error_maps.png":
+                "Per-pixel absolute error |pred − GT| clipped at 0.1 m. Brighter = more error.",
+            "coverage_heatmaps.png":
+                "Fraction of frames each pixel has valid depth, averaged over all benchmark frames.",
+            "distance_error_curve.png":
+                "Mean Absolute Error (MAE) broken down by distance range.",
+            "error_histograms.png":
+                "Distribution of per-pixel absolute errors from the stored visualisation frames.",
+            "summary_table.png":
+                "Aggregate quality metrics — see legend below the table for column explanations.",
+            "close_range_analysis.png":
+                f"Coverage and stability for objects closer than {CLOSE_RANGE_THRESHOLD_M} m. Highlights MinZ benefit.",
+            "timing_bars.png":
+                "Processing speed in FPS. Hardware baseline is fixed at ~30 FPS (camera frame rate).",
+        }
+        metric_legend = (
+            '\n    <div class="legend">'
+            "\n      <h3>How to Read the Summary Table</h3>"
+            "\n      <table>"
+            "\n        <tr><td><b>MRE* (%%)</b></td>"
+            "\n            <td><b>Overall score (recommended).</b> Mean Relative Error with hole penalty &mdash; "
+            "pixels where the method has no depth but ground truth does count as 100%% error. "
+            "This is the fairest single metric because it penalises both inaccuracy and missing coverage. <b>Lower is better.</b></td></tr>"
+            "\n        <tr><td><b>MRE (%%)</b></td>"
+            "\n            <td>Mean Relative Error over valid pixels only (holes ignored). "
+            "5%% means each measured pixel is ~5%% off on average. <b>Lower is better.</b></td></tr>"
+            "\n        <tr><td><b>MAE (m)</b></td>"
+            "\n            <td>Mean Absolute Error in meters, valid pixels only. <b>Lower is better.</b></td></tr>"
+            '\n        <tr><td><b>&delta;1 (%%)</b></td>'
+            "\n            <td>Percentage of valid pixels within 1.25&times; of ground truth depth. <b>Higher is better.</b> 100%% is perfect.</td></tr>"
+            "\n        <tr><td><b>Coverage (%%)</b></td>"
+            "\n            <td>Percentage of pixels that produced valid depth. <b>Higher is better.</b> "
+            "MinZ improves this at close range (&lt;0.55m) by filling holes the hardware camera cannot see.</td></tr>"
+            "\n        <tr><td><b>FPS</b></td>"
+            "\n            <td>Processing speed (frames per second). <b>Higher is faster.</b></td></tr>"
+            '\n        <tr><td><b>GT?</b></td>'
+            '\n            <td>&starf; GT marks the ground truth method (NNDepth accurate). Its error columns show "&mdash;" because you don\'t compare ground truth to itself.</td></tr>'
+            "\n      </table>"
+            "\n    </div>"
+        )
+        sections = ""
+        for path in fig_paths:
+            caption = captions.get(path, "")
+            title = path.replace(".png", "").replace("_", " ").title()
+            sections += (
+                f'\n    <div class="section">'
+                f"\n      <h2>{title}</h2>"
+                f'\n      <div class="figure-wrapper">'
+                f'\n        <img src="{path}" alt="{title}">'
+                f'\n        <p class="caption">{caption}</p>'
+                f"\n      </div>"
+                f"\n    </div>"
+            )
+            if path == "summary_table.png":
+                sections += metric_legend
+        html = f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Depth Quality Report — rs-enhanced-depth</title>
+  <style>
+    * {{ box-sizing: border-box; margin: 0; padding: 0; }}
+    body {{ font-family: system-ui, -apple-system, sans-serif; background: #f0f2f5;
+            color: #222; max-width: 1200px; margin: 0 auto; padding: 24px 16px; }}
+    h1 {{ font-size: 1.8em; color: #1a2e4a; margin-bottom: 4px; }}
+    .subtitle {{ color: #555; font-size: .95em; margin-bottom: 8px; }}
+    h2 {{ font-size: 1.15em; color: #1a2e4a; border-bottom: 2px solid #0f3460;
+          padding-bottom: 6px; margin-bottom: 16px; }}
+    .meta {{ background: white; border-radius: 10px; padding: 16px 20px;
+             margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }}
+    .meta table {{ border-collapse: collapse; width: 100%; font-size: .9em; }}
+    .meta td {{ padding: 5px 12px; }}
+    .meta tr:nth-child(even) {{ background: #f5f7fa; }}
+    .section {{ background: white; border-radius: 10px; padding: 20px 24px;
+                margin-bottom: 24px; box-shadow: 0 2px 6px rgba(0,0,0,.08); }}
+    .figure-wrapper {{ text-align: center; }}
+    .figure-wrapper img {{ max-width: 100%; height: auto;
+                           border: 1px solid #e0e0e0; border-radius: 6px; }}
+    .caption {{ font-size: .83em; color: #666; margin-top: 10px; }}
+    .legend {{ background: #e8f4fd; border: 1px solid #b3d7f0; border-radius: 10px;
+               padding: 18px 22px; margin-bottom: 24px; }}
+    .legend h3 {{ font-size: 1em; color: #1a5276; margin-bottom: 10px; }}
+    .legend table {{ border-collapse: collapse; width: 100%; font-size: .85em; }}
+    .legend td {{ padding: 5px 10px; vertical-align: top; }}
+    .legend td:first-child {{ white-space: nowrap; width: 110px; }}
+    .legend tr:nth-child(even) {{ background: rgba(255,255,255,.5); }}
+    footer {{ text-align: center; color: #999; font-size: .8em; padding: 24px 0 8px; }}
+    .pdf-btn {{
+      display: inline-flex; align-items: center; gap: 7px;
+      background: #0f3460; color: white; border: none; border-radius: 7px;
+      padding: 9px 20px; font-size: .95em; font-weight: 600; cursor: pointer;
+      margin: 12px 0 20px; text-decoration: none; transition: background .15s;
+    }}
+    .pdf-btn:hover {{ background: #16213e; }}
+    @media print {{
+      .pdf-btn {{ display: none; }}
+      body {{ background: white; padding: 0; }}
+      .section {{ box-shadow: none; border: 1px solid #ddd; break-inside: avoid; }}
+      .meta {{ box-shadow: none; border: 1px solid #ddd; }}
+    }}
+  </style>
+</head>
+<body>
+  <h1>Depth Quality Report</h1>
+  <p class="subtitle">rs-enhanced-depth — multi-method depth quality analysis</p>
+  <button class="pdf-btn" onclick="window.print()">&#x1F4E5; Export as PDF</button>
+  <div class="meta"><table>{meta_rows}</table></div>
+{config_html}
+{sections}
+  <footer>Generated by <code>tests/depth_report/</code> on {ts}</footer>
+</body>
+</html>"""
+        (self._out / "index.html").write_text(html)
\ No newline at end of file
diff --git a/scripts/run_box_detector_fs.py b/scripts/run_box_detector_fs.py
new file mode 100644
index 0000000..6165c75
--- /dev/null
+++ b/scripts/run_box_detector_fs.py
@@ -0,0 +1,2710 @@
+#!/usr/bin/env python
+
+'''
+Tester for multi planar plain detector with foundation stereo
+==================
+
+Using depth image to compute depth planes locally for specific ROI.
+
+
+Usage:
+
+Environemt : 
+    ..\\docker
+
+Install : 
+
+
+
+'''
+
+import sys 
+import numpy as np
+import cv2 as cv
+import random
+import unittest
+#from scipy.spatial.transform import Rotation as Rot
+import matplotlib.pyplot as plt
+import logging 
+log = logging.getLogger("robot")
+log.setLevel(logging.DEBUG)
+from opencv_realsense_camera import RealSense, draw_str
+from run_fast_foundation_with_rs import convert_disparity_to_depth, foundation_stereo_algo_init, foundation_stereo_algo, process_arguments
+
+#!/usr/bin/env python
+# for rotations
+from scipy.spatial.transform import Rotation as R
+
+
+
+
+
+#%% Main
+class PlaneDetector:
+    def __init__(self, detect_type = 'p', image_size = (1280,720)):
+
+        self.detect_type    = detect_type   # plane
+
+        self.frame_size     = image_size
+        self.img            = None
+        self.cam_matrix     = np.array([[1000,0,self.frame_size[0]/2],[0,1000,self.frame_size[1]/2],[0,0,1]], dtype = np.float32)
+        self.cam_distort    = np.array([0,0,0,0,0],dtype = np.float32)
+
+        self.img3d          = None  # contains x,y and depth plains
+        self.img_xyz        = None  # comntains X,Y,Z information after depth image to XYZ transform
+        self.img_mask       = None  # which pixels belongs to the plain
+        self.rect           = None  # roi
+        self.img_roi        = None  # roi image
+        self.img_roi_normal = None  # normals at roi image
+
+        # detector type     
+        self.matrix_inv     = None     # holds inverse params of the 
+        self.rect_z         = None     # flat z for ROI         
+        self.rect_dir       = None     # direct u,v,1 for ROI
+        self.rect_xyz       = None     # direct u,v,1 multiplied by z ROI 
+        self.full_dir       = None     # direct u,v,1 for entire image
+        self.full_xyz       = None     # direct u,v,1 multiplied by z entire image         
+        self.roi_index      = None     # index of the points og an ROI in the original image     
+        self.plane_params   = None     # rvec not normalized
+        self.plane_center   = None     # tvec
+        self.plane_confidence = 0      # reliability of the detcetion
+
+        #self.corner_ind     = [0, 10, 40, 50]  # corner of the rectnagle for the projection
+        self.rect_3d        = None    # roi but projected on 3D 
+
+        # for the box
+        self.contour        = None
+        self.hull           = None
+        self.floor_z        = 10000
+
+        # params
+        self.MIN_SPLIT_SIZE  = 32
+        self.MIN_STD_ERROR   = 0.01
+
+        # color for the mask
+        self.color_mask     = np.random.randint(0,255,3) # random color
+
+        # help variable
+        self.ang_vec     = np.zeros((3,1))  # help variable
+
+    def init_image(self, img = None):
+        "load image"
+
+        self.img            = img
+        h,w                 = img.shape[:2]
+        self.frame_size     = (w,h)
+        self.img_mask       = np.zeros((h,w),dtype = np.uint8)
+        return True
+
+    def init_roi(self, roi_type = 1):
+        "load the test case"
+        w,h     = self.frame_size[0],self.frame_size[1]
+        w2,h2   = w>>1,h>>1
+        roi     = [0,0,w,h]
+        if roi_type == 1:
+            roi = [w2-3,h2-3,w2+3,h2+3] # xlu, ylu, xrb, yrb
+        elif roi_type == 2:
+            roi = [300,220,340,260] # xlu, ylu, xrb, yrb
+        elif roi_type == 3:
+            roi = [280,200,360,280] # xlu, ylu, xrb, yrb            
+        elif roi_type == 4:
+            roi = [220,140,420,340] # xlu, ylu, xrb, yrb      
+        elif roi_type == 5:
+            roi = [200,120,440,360] # xlu, ylu, xrb, yrb    
+        elif roi_type == 11:
+            roi = [w2-16,h2-16,w2+16,h2+16] # xlu, ylu, xrb, yrb             
+        elif roi_type == 12:
+            roi = [w2-32,h2-32,w2+32,h2+32] # xlu, ylu, xrb, yrb    
+        elif roi_type == 13:
+            roi = [w2-64,h2-64,w2+64,h2+64] # xlu, ylu, xrb, yrb      
+        elif roi_type == 14:
+            roi = [w2-64,h2-48,w2+64,h2+48] # xlu, ylu, xrb, yrb      
+        elif roi_type == 21: # lower center image position
+            roi = [w2-64,h2+128,w2+64,h2+196] # xlu, ylu, xrb, yrb                
+        elif roi_type == 22: # lower left image position
+            roi = [w2-400,h2+128,w2-272,h2+196] # xlu, ylu, xrb, yrb  
+        elif roi_type == 23: # lower right image position
+            roi = [w2+272,h2+128,w2+400,h2+196] # xlu, ylu, xrb, yrb       
+        elif roi_type == 31: # upper center image position
+            #roi = [w2-64,h2-196,w2+64,h2-128] # xlu, ylu, xrb, yrb         
+            roi = [w2+64,h2-128,w2+128,h2-64] # xlu, ylu, xrb, yrb                
+        elif roi_type == 32: # upper left image position
+            roi = [w2-400,h2-196,w2-272,h2-128] # xlu, ylu, xrb, yrb  
+        elif roi_type == 33: # upper right image position
+            roi = [w2+302,h2-196,w2+430,h2-128] # xlu, ylu, xrb, yrb    
+        elif roi_type == 41: # center center image position
+            #roi = [w2-64,h2+32,w2+64,h2+96] # xlu, ylu, xrb, yrb    
+            roi = [w2-128,h2+64,w2+128,h2+196] # xlu, ylu, xrb, yrb    
+        elif roi_type == 42: # center left image position
+            roi = [w2-200,h2+32,w2-72,h2+96] # xlu, ylu, xrb, yrb 
+        elif roi_type == 43: # center right image position
+            roi = [w2+72,h2+32,w2+200,h2+96] # xlu, ylu, xrb, yrb   
+        elif roi_type == 43: # center right image position
+            roi = [w2+72,h2+32,w2+200,h2+96] # xlu, ylu, xrb, yrb      
+        elif roi_type == 52: # upper left image position for data 432
+            roi = [w2-256,h2-256,w2-128,h2-192] # xlu, ylu, xrb, yrb  
+        elif roi_type == 53: # upper center image position
+            roi = [w2-128,h2+16,w2+128,h2+300] # xlu, ylu, xrb, yrb                                            
+        
+        elif roi_type == 60: # 422 on cube smaller
+            roi = [645,395,665,415] # xlu, ylu, xrb, yrb          
+        elif roi_type == 61: # 422 on cube
+            roi = [640,390,670,420] # xlu, ylu, xrb, yrb   
+        elif roi_type == 62: # 422 right cube side
+            roi = [690,390,720,420] # xlu, ylu, xrb, yrb     
+        elif roi_type == 71: # set 422 - cube 
+            roi = [630,h2+72,690,h2+128] # xlu, ylu, xrb, yrb    
+        elif roi_type == 72: # set 422 - cube 
+            roi = [430,h2-30,890,h2+228] # xlu, ylu, xrb, yrb   
+        elif roi_type == 73: # set 422 - down side of the cube 
+            roi = [630,h2+140,690,h2+190] # xlu, ylu, xrb, yrb             
+        elif roi_type == 74: # set 422 - right side of the cube 
+            roi = [700,h2+64,800,h2+164] # xlu, ylu, xrb, yrb  
+        elif roi_type == 75: # set 422 - up side of the cube 
+            roi = [700,h2+20,800,h2+96] # xlu, ylu, xrb, yrb            
+        self.rect = roi       
+        #self.rect_3d        = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]                                                           
+        log.info(f'Using ROI : {roi}')         
+        return roi    
+
+    def preprocess(self, img = None):
+        "image preprocessing - extracts roi and converts from uint8 to float using log function"
+        if img is None:
+            log.info('No image provided')
+            return False        
+
+        if self.img_mask is None:
+            ret = self.init_image(img)
+
+        if self.rect is None: # use entire image
+            roi = self.init_roi(4)
+
+        # init params of the inverse
+        if self.full_dir is None:
+            self.fit_plane_init()              
+            
+        #x0, y0, x1, y1  = self.rect
+        if len(img.shape) > 2:
+            #img_roi        = img[y0:y1,x0:x1,2].astype(np.float32)
+            img_roi        = img[:,:,2].astype(np.float32)
+        else:
+            #img_roi        = img[y0:y1,x0:x1].astype(np.float32)
+            img_roi        = img.astype(np.float32)
+        return img_roi         
+
+    def init_img3d(self, img = None):
+        "initializes xyz coordinates for each point"
+        img     = self.img if img is None else img
+        h,w     = img.shape[:2]
+        x       = np.arange(w)
+        y       = np.arange(h)
+        x,y     = np.meshgrid(x,y)
+        fx      = self.cam_matrix[0,0]
+        fy      = self.cam_matrix[1,1]
+        
+        xy      = np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
+        xy      = np.expand_dims(xy, axis=1).astype(np.float32)
+        xy_undistorted = cv.undistortPoints(xy, self.cam_matrix, self.cam_distort)
+
+        u       = xy_undistorted[:,0,0].reshape((h,w))
+        v       = xy_undistorted[:,0,1].reshape((h,w))
+        z3d     = img.astype(np.float32)
+        x3d     = z3d.copy()
+        y3d     = z3d.copy()
+
+        #ii        = np.logical_and(z3d> 1e-6 , np.isfinite(z3d))
+        ii        = z3d > 5
+        x3d[ii]   = u[ii]*z3d[ii] #/fx
+        y3d[ii]   = v[ii]*z3d[ii] #/fy
+        z3d[ii]   = z3d[ii]
+
+        #self.img3d = np.stack((u/fx,v/fy,z3d), axis = 2)
+        self.img3d      = np.stack((u,v,z3d), axis = 2)
+        self.img_mask   = np.zeros((h,w))
+        return self.img3d
+    
+    def compute_img3d(self, img = None):
+        "compute xyz coordinates for each point using prvious init"
+        img         = self.img if img is None else img
+        xyz         = self.img3d
+        if xyz is None:
+            xyz = self.init_img3d(img)
+
+        if np.any(img.shape[:2] != xyz.shape[:2]):
+            print('Image dimension change')
+            return 
+
+        imgXYZ      = self.img3d.copy()
+
+        z3d         = img.astype(np.float32)
+        x3d         = self.img3d[:,:,0].copy()  # u/f
+        y3d         = self.img3d[:,:,1].copy()  # v/f
+
+        # filter bad z values
+        #ii          = np.logical_and(z3d > 1e-6 , np.isfinite(z3d))
+        ii          = z3d > 15
+        x3d[ii]     = x3d[ii]*z3d[ii]
+        y3d[ii]     = y3d[ii]*z3d[ii]
+        z3d[ii]     = z3d[ii]
+
+        # x,y,z coordinates in 3D
+        imgXYZ[:,:,0] = x3d
+        imgXYZ[:,:,1] = y3d
+        imgXYZ[:,:,2] = z3d
+
+        self.img_xyz = imgXYZ
+        return imgXYZ
+
+    def check_error(self, xyz1_mtrx, vnorm):
+        "checking the error norm"
+        err         = np.dot(xyz1_mtrx, vnorm)
+        err_std     = err.std()
+        return err_std
+    
+    def get_plane_params(self):
+        "for external interface support"
+        rvec_left              = self.plane_params
+        tvec_left              = self.plane_center
+        conf_left              = self.plane_confidence
+        return tvec_left, rvec_left, conf_left
+
+    def convert_plane_params(self, plane_equation):
+        "convert plane params to rvec"
+        # 4. Convert plane parameters to rvec and tvec
+        #    - The plane normal vector is (A, B, C).
+        #    - We can use the normal vector to get the rotation.
+        #    - A point on the plane can be used for the translation vector.
+
+        # Normalize the plane normal vector
+        normal      = plane_equation #np.array([plane_equation[0], plane_equation[1], plane_equation[2]])
+        normal_norm = np.linalg.norm(normal)
+        if normal_norm == 0:
+            log.error("Error: Zero norm for plane normal vector.")
+            return None
+        normal = normal / normal_norm
+
+        # Use the normalized normal vector to get the rotation matrix
+        # This is a common method, but there are other ways to do this.
+        z_axis        = np.array([0, 0, 1])
+        rotation_axis = np.cross(z_axis, normal)
+        rotation_angle = np.arccos(np.dot(z_axis, normal))
+
+        # Handle the case where the rotation axis is zero (normal is parallel to z-axis)
+        if np.linalg.norm(rotation_axis) < 1e-6:
+            if normal[2] > 0:
+                rvec = np.zeros(3)  # Rotation is identity
+            else:
+                rvec = np.array([0, np.pi, 0]) # Rotation by 180 degrees around X or Y.
+        else:
+            rvec, _ = cv.Rodrigues(rotation_axis * rotation_angle)
+            rvec, _ = cv.Rodrigues(rvec)
+
+        return rvec
+
+    def convert_plane_params_to_pose(self, plane_params = None, plane_center = None):
+        "converting params of the plane to the pose vector"
+
+        plane_params = self.plane_params if plane_params is None else plane_params[:3].flatten()
+        plane_center = self.plane_center if plane_center is None else plane_center[:3].flatten()
+
+        tvec       = plane_center.reshape((1,-1))
+        rvec       = plane_params.reshape((1,-1)) #reshape((-1,1))
+        rvec       = rvec/np.linalg.norm(rvec.flatten())
+
+        pose_norm  = np.hstack((tvec, rvec))
+        #log.info('roi to pose')
+        return pose_norm #.flatten()
+
+    def fit_plane_init(self):
+        "prepares data for real time fit a*x+b*y+c = z"
+        if self.cam_matrix is None:
+            self.cam_matrix   = np.array([[650,0,self.frame_size[0]/2],[0,650,self.frame_size[1]/2],[0,0,1]], dtype = np.float32)
+            self.cam_distort  = np.array([0,0,0,0,0],dtype = np.float32)
+            log.info('Camera matrix is initialized to default.')
+
+        x0,y0,x1,y1     = 0,0,self.frame_size[0],self.frame_size[1] #self.rect 
+        h,w             = y1-y0, x1-x0
+        x_grid          = np.arange(x0, x1, 1)
+        y_grid          = np.arange(y0, y1, 1)
+        x, y            = np.meshgrid(x_grid, y_grid)  
+
+        # remember corner indexes for reprojection [0 .... h*(w-1))
+        #                                           .        .
+        #                                           h ......h*w-1]
+        #self.corner_ind = [0, h,  h*w-1, h*(w-1), 0]
+        #h2,w2           = h>>1, w>>1
+        #self.rect_3d    = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+
+        # camera coordinates
+        xy              = np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
+        xy              = np.expand_dims(xy, axis=1).astype(np.float32)
+        xy_undistorted  = cv.undistortPoints(xy, self.cam_matrix, self.cam_distort)
+
+        u               = xy_undistorted[:,0,0].reshape((h,w)).reshape(-1,1)
+        v               = xy_undistorted[:,0,1].reshape((h,w)).reshape(-1,1)
+
+        # check
+        #u, v            = u*self.cam_matrix[0,0], v*self.cam_matrix[1,1]
+
+        self.full_dir   = np.hstack((u,v,u*0+1))
+        #self.matrix_inv = np.linalg.pinv(self.rect_dir)
+
+    def fit_plane_init_old(self):
+        "prepares data for real time fit a*x+b*y+c = z"
+        self.cam_matrix   = np.array([[650,0,self.frame_size[0]/2],[0,650,self.frame_size[1]/2],[0,0,1]], dtype = np.float32)
+        self.cam_distort  = np.array([0,0,0,0,0],dtype = np.float32)
+
+        x0,y0,x1,y1     = self.rect 
+        h,w             = y1-y0, x1-x0
+        x_grid          = np.arange(x0, x1, 1)
+        y_grid          = np.arange(y0, y1, 1)
+        x, y            = np.meshgrid(x_grid, y_grid)  
+
+        # remember corner indexes for reprojection [0 .... h*(w-1))
+        #                                           .        .
+        #                                           h ......h*w-1]
+        #self.corner_ind = [0, h,  h*w-1, h*(w-1), 0]
+        h2,w2           = h>>1, w>>1
+        self.rect_3d    = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+
+        # camera coordinates
+        xy              = np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
+        xy              = np.expand_dims(xy, axis=1).astype(np.float32)
+        xy_undistorted  = cv.undistortPoints(xy, self.cam_matrix, self.cam_distort)
+
+        u               = xy_undistorted[:,0,0].reshape((h,w)).reshape(-1,1)
+        v               = xy_undistorted[:,0,1].reshape((h,w)).reshape(-1,1)
+
+        # check
+        #u, v            = u*self.cam_matrix[0,0], v*self.cam_matrix[1,1]
+
+        self.rect_dir = np.hstack((u,v,u*0+1))
+        #self.matrix_inv = np.linalg.pinv(self.rect_dir)
+
+    def convert_roi_to_points(self, img, point_num = 30, step_size = 1, roi_rect = None):
+        "converting roi to pts in XYZ - Nx3 array. point_num - is the target point number"
+
+        # init params of the inverse
+        if self.full_dir is None:  # do not use mtrix_dir - initialized before
+            self.fit_plane_init()  
+
+        # deal iwth different rect options
+        roi_rect            = self.rect if roi_rect is None else roi_rect
+        x0, y0, x1, y1      = roi_rect
+
+        # make rectangle 
+        h,w                 = (y1-y0)>>1, (x1-x0)>>1
+        self.rect_3d        = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+
+        # extract roi - must be compatible with image dimensions
+        # n,m                 = img.shape[:2]
+        # img_roi_mask        = np.zeros((n,m), dtype = np.bool_)
+        # img_roi_mask[y0:y1,x0:x1] = True  
+        # valid_bool          = img_roi_mask > 0 & img > 0
+
+        # check if roi is valid. +1 to grow in positive x and y since arange does not include x1,y1
+        x_grid              = np.arange(x0, x1, 1)
+        y_grid              = np.arange(y0, y1, 1)
+        x, y                = np.meshgrid(x_grid, y_grid) 
+        #flat_indices        = np.ravel_multi_index((y, x), img.shape[:2]).reshape((-1,1))         
+        flat_indices        = y * self.frame_size[0] + x
+        flat_indices        = flat_indices.ravel().astype(np.int32)
+        # valid under mask
+        #valid_bool          = img.flat[flat_indices] > 0        
+        #ii                  = flat_indices[valid_bool]
+        img_roi             = img[y0:y1,x0:x1].flatten() #.astype(np.float32).reshape((-1,1)) 
+        valid_bool          = img_roi > 1 # valid pixels in the roi
+        ii                  = np.where(valid_bool)[0]
+  
+        valid_point_num     = len(ii)
+        if valid_point_num < 5:
+            return np.zeros((0,3))
+        
+        step_size           = np.maximum(step_size, np.int32(valid_point_num/point_num))
+        ii                  = ii[::step_size]
+
+        # plane params - using only valid
+        z                   = img_roi[ii].reshape((-1,1))
+        jj                  = flat_indices[ii].flatten()
+        uv1_matrix          = self.full_dir[jj,:]
+        xyz_matrix          = uv1_matrix[:,:3]*z  # keep 1 intact
+
+        #self.plane_center   = xyz_center.flatten() 
+        self.rect_z          = z
+        self.rect_dir        = uv1_matrix
+        self.rect_xyz        = xyz_matrix
+        self.roi_index       = jj
+        self.img_roi         = img[y0:y1,x0:x1]
+
+        return xyz_matrix
+
+
+    def convert_roi_to_points_old(self, img_roi, point_num = 30, step_size = 1):
+        "converting roi to pts in XYZ - Nx3 array. point_num - is the target point number"
+        # x1,y1       = self.img_xyz.shape[:2]
+        # roi_area    = x1*y1
+
+        # # reduce size of the grid for speed
+        # if step_size < 1 and roi_area > 100:
+        #     step_size   = np.maximum(1,int(np.sqrt(roi_area)/10))
+
+          
+        # #roi3d       = self.img_xyz[y0:y1:step_size,x0:x1:step_size,:]   
+        # roi3d       = self.img_xyz[::step_size,::step_size,:]           
+        # x,y,z       = roi3d[:,:,0].reshape((-1,1)), roi3d[:,:,1].reshape((-1,1)), roi3d[:,:,2].reshape((-1,1)) 
+        # xyz_matrix  = np.hstack((x,y,z)) 
+        # 
+        
+        # init params of the inverse
+        if self.rect_dir is None:
+            self.fit_plane_init_old()  
+
+        # extract roi 
+
+        n,m                 = img_roi.shape[:2]
+        img_roi             = img_roi.reshape((-1,1))
+        valid_bool          = img_roi > 0
+        valid_bool          = valid_bool.flatten()
+        #log.info(f'Timing : 1')  
+
+        # all non valid
+        ii                  = np.where(valid_bool)[0]
+        valid_point_num     = len(ii)
+        if valid_point_num < 5:
+            return None
+        step_size           = np.maximum(step_size, np.int32(valid_point_num/point_num))
+        ii                  = ii[::step_size]
+
+        # plane params - using only valid
+        z                   = img_roi[ii]
+        xyz_matrix          = self.rect_dir[ii,:]
+        xyz_matrix[:,:3]    = xyz_matrix[:,:3]*z  # keep 1 intact
+
+        # update corners of the rect in 3d
+        #self.rect_3d        = self.rect_dir[self.corner_ind,:]*img_roi[self.corner_ind]
+        # rect to show
+        x0, y0, x1, y1      = self.rect
+        h,w                 = y1-y0, x1-x0
+        self.rect_3d        = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+        # substract mean
+        #xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        #xyz_matrix          = xyz_matrix - xyz_center   
+        #log.info(f'Timing : 2')     
+
+        # mtrx_dir            = np.hstack((self.rect_dir[valid_bool,0]*z,self.rect_dir[valid_bool,1]*z,z*0+1))
+        # mtrx_inv            = np.linalg.pinv(mtrx_dir)
+        # #mtrx_inv            = self.matrix_inv[:,valid_bool]
+        # plane_params        = np.dot(mtrx_inv,z)
+
+        # decimate to make it run faster  reduce size of the grid for speed. 1000 pix - 30x30 - step 1, 10000 pix - step=3
+        #roi_area            = n*m
+        #step_size           = int(np.sqrt(roi_area)/7) if roi_area > 1000 else 1  
+
+        #self.plane_center   = xyz_center.flatten()   
+        self.rect_xyz      = xyz_matrix          
+
+        return xyz_matrix
+
+    def fit_plane_svd(self, img_roi):
+        "estimates mean and std of the plane fit"
+
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 600, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0    
+
+        # substract mean
+        xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        xyz_matrix          = xyz_matrix - xyz_center   
+        #log.info(f'Timing : 2')     
+
+        # mtrx_dir            = np.hstack((self.rect_dir[valid_bool,0]*z,self.rect_dir[valid_bool,1]*z,z*0+1))
+        # mtrx_inv            = np.linalg.pinv(mtrx_dir)
+        # #mtrx_inv            = self.matrix_inv[:,valid_bool]
+        # plane_params        = np.dot(mtrx_inv,z)
+
+        # decimate to make it run faster  reduce size of the grid for speed. 1000 pix - 30x30 - step 1, 10000 pix - step=3
+        #roi_area            = n*m
+        #step_size           = int(np.sqrt(roi_area)/7) if roi_area > 1000 else 1
+        
+        # using svd to make the fit
+        U, S, Vh            = np.linalg.svd(xyz_matrix, full_matrices=True)
+        ii                  = np.argmin(S)
+        vnorm               = Vh[ii,:]
+        #log.info(f'Timing : 3') 
+
+        # keep orientation
+        plane_params       = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err                = np.dot(xyz_matrix,plane_params)
+        #z_est              = z + err + xyz_center[2]
+
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std  
+    
+    def fit_plane_svd_old(self, img_roi):
+        "estimates mean and std of the plane fit"
+        # n,m             = img_roi.shape[:2]
+        # img_roi         = img_roi.reshape((-1,1))
+        # valid_bool      = img_roi > 0
+        # valid_bool      = valid_bool.flatten()
+        # #log.info(f'Timing : 1')  
+
+        # # init params of the inverse
+        # if self.matrix_inv is None:
+        #     self.fit_plane_init()
+
+        # # plane params - using only valid
+        # z                   = img_roi[valid_bool]
+        # xyz_matrix          = self.rect_dir[valid_bool,:]
+        # xyz_matrix[:,:3]    = xyz_matrix[:,:3]*z  # keep 1 intact
+
+        # update corners of the rect in 3d
+        #self.rect_3d        = self.rect_dir[self.corner_ind,:]*img_roi[self.corner_ind]
+
+        # roi converted to points with step size on the grid
+        #xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 1e4, step_size = 1)    
+        xyz_matrix          = self.convert_roi_to_points_old(img_roi, point_num = 1e4, step_size = 1) 
+
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0                 
+
+        # substract mean
+        xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        xyz_matrix          = xyz_matrix - xyz_center   
+        #log.info(f'Timing : 2')     
+
+        # mtrx_dir            = np.hstack((self.rect_dir[valid_bool,0]*z,self.rect_dir[valid_bool,1]*z,z*0+1))
+        # mtrx_inv            = np.linalg.pinv(mtrx_dir)
+        # #mtrx_inv            = self.matrix_inv[:,valid_bool]
+        # plane_params        = np.dot(mtrx_inv,z)
+
+        # decimate to make it run faster  reduce size of the grid for speed. 1000 pix - 30x30 - step 1, 10000 pix - step=3
+        #roi_area            = n*m
+        #step_size           = int(np.sqrt(roi_area)/7) if roi_area > 1000 else 1
+        
+        # using svd to make the fit
+        U, S, Vh            = np.linalg.svd(xyz_matrix, full_matrices=True)
+        ii                  = np.argmin(S)
+        vnorm               = Vh[ii,:]
+        #log.info(f'Timing : 3') 
+
+        # keep orientation
+        plane_params       = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err                = np.dot(xyz_matrix,plane_params)
+        #z_est              = z + err + xyz_center[2]
+
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std  
+        
+    def fit_plane_svd_weighted(self, img_roi):
+        "estimates mean and std of the plane fit - fit is weighted assuming Sigma(z) = a*z"
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 500, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0 
+        
+        # # substract mean
+        # xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        # xyz_matrix          = xyz_matrix - xyz_center          
+
+        # plane params - using only valid
+        z                   = self.rect_z 
+        uv1_matrix          = self.rect_dir  # (x - x0)/f, (y - y0)/f, 1
+ 
+        # minimization function min |ua/f + vb/f + c + dz|^2/|z|^2
+        f                   = self.cam_matrix[0,0]
+        # uvf_matrix          = np.dot(uv1_matrix , np.diag([1/f,1/f,1]))
+
+        mtrx_inv            = np.linalg.pinv(uv1_matrix)
+        b                   = f/z
+        plane_params        = np.dot(mtrx_inv,b)
+        plane_params        = plane_params/np.linalg.norm(plane_params)
+
+        # center
+        xyz_matrix          = self.rect_xyz  #uv1_matrix[:,:3]*z  # keep 1 intact
+        xyz_center          = xyz_matrix.mean(axis=0)                
+
+        # estimate error
+        err                = np.dot(uv1_matrix,plane_params) 
+        z_est              = z - err
+
+        img_mean           = z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        log.info(f'Plane : {self.plane_params}, error {img_std:.3f}')
+        
+        return img_mean, img_std 
+
+    def fit_plane_with_outliers(self, img_roi):
+        "computes normal for the specifric roi and evaluates error. Do it twice to reject outliers"
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 1500, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0    
+        
+        # substract mean
+        xyz_center_1 = xyz_matrix[:,:3].mean(axis=0)
+        xyz_1        = xyz_matrix - xyz_center_1         
+
+        # using svd to make the fit to a sub group     
+        U, S, Vh    = np.linalg.svd(xyz_1, full_matrices=True)
+        ii          = np.argmin(S)
+        vnorm       = Vh[ii,:]
+        #vnorm       = vnorm*np.sign(vnorm[2]) # keep orientation
+
+        # keep orientation
+        plane_params = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err         = np.dot(xyz_1,plane_params)        
+        err_std     = err.std()
+        log.info('Fit error iteration 1: %s' %str(err_std))
+
+        # filter only the matching points
+        inlier_ind  = np.abs(err) < 3*err_std
+
+        # substract mean when only inliers are kept
+        xyz_center_2 = xyz_matrix[inlier_ind,:3].mean(axis=0)#  
+        xyz_2        = xyz_matrix[inlier_ind,:] - xyz_center_2         
+
+        # perform svd one more time 
+        U, S, Vh    = np.linalg.svd(xyz_2, full_matrices=True)
+        ii          = np.argmin(S)
+        vnorm       = Vh[ii,:]
+
+        # keep orientation
+        plane_params = vnorm*np.sign(vnorm[2])
+
+        # checking error
+        err         = np.dot(xyz_2, plane_params)
+        err_std     = err.std()
+        log.info('Fit error iteration 2: %s' %str(err_std))    
+
+        # # We can convert this flat index to row and column indices
+        # row_index, col_index = np.unravel_index(inlier_ind, self.img_mask.shape)
+        # self.img_mask[row_index, col_index] = 1    
+
+        img_mean           = xyz_center_2[2] #z_est.mean()
+        img_std            = err_std
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center_2.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std   
+    
+    def fit_plane_ransac(self, img_roi):
+        
+        """
+        Find the best equation for a plane.
+
+        :param pts: 3D point cloud as a `np.array (N,3)`.
+        :param thresh: Threshold distance from the plane which is considered inlier.
+        :param maxIteration: Number of maximum iteration which RANSAC will loop over.
+        :returns:
+        - `self.equation`:  Parameters of the plane using Ax+By+Cy+D `np.array (1, 4)`
+        - `self.inliers`: points from the dataset considered inliers
+
+        """
+        #log.info('Fit ransac: ...')  
+        # roi converted to points with step size on the grid
+        #xyz_matrix     = self.convert_roi_to_points_old(img_roi, point_num = 250, step_size = 1)
+        xyz_matrix     = self.convert_roi_to_points(img_roi, point_num = 250, step_size = 1)
+        if xyz_matrix is None:
+            log.error('No points in the ROI')
+            return 0, 0
+
+        thresh         = 1.05
+        maxIteration   = 100
+
+
+        n_points        = xyz_matrix.shape[0]
+        best_eq         = []
+        best_inliers    = []
+
+        for it in range(maxIteration):
+
+            # Samples 3 random points
+            if n_points < 3: break
+            id_samples = random.sample(range(0, n_points), 3)
+            pt_samples = xyz_matrix[id_samples,:]
+
+            # We have to find the plane equation described by those 3 points
+            # We find first 2 vectors that are part of this plane
+            # A = pt2 - pt1
+            # B = pt3 - pt1
+
+            vecA        = pt_samples[1, :] - pt_samples[0, :]
+            vecB        = pt_samples[2, :] - pt_samples[0, :]
+
+            # Now we compute the cross product of vecA and vecB to get vecC which is normal to the plane
+            vecC        = np.cross(vecA, vecB)
+            vecC_norm   = np.linalg.norm(vecC)
+
+            # protect from the close spaced points
+            if vecC_norm < 10e-6:
+                continue
+
+            # make sure that Z direction is positive
+            vecC        = vecC * np.sign(vecC[2])
+
+            # The plane equation will be vecC[0]*x + vecC[1]*y + vecC[0]*z = -k
+            # We have to use a point to find k
+            vecC        = vecC / vecC_norm
+            #k           = -np.sum(np.multiply(vecC, pt_samples[1, :]))
+            k           = -np.dot(vecC, pt_samples[1, :])
+            plane_eq    = [vecC[0], vecC[1], vecC[2], k]
+
+            # Distance from a point to a plane
+            # https://mathworld.wolfram.com/Point-PlaneDistance.html
+            # pt_id_inliers = []  # list of inliers ids
+            # dist_pt = (
+            #     plane_eq[0] * xyz_matrix[:, 0] + plane_eq[1] * xyz_matrix[:, 1] + plane_eq[2] * xyz_matrix[:, 2] + plane_eq[3]
+            # ) / np.sqrt(plane_eq[0] ** 2 + plane_eq[1] ** 2 + plane_eq[2] ** 2)
+
+            dist_pt            = np.dot(xyz_matrix, vecC) + plane_eq[3]
+
+            # Select indexes where distance is biggers than the threshold
+            pt_id_inliers       = np.where(np.abs(dist_pt) <= thresh)[0]
+            if len(pt_id_inliers) > len(best_inliers):
+                best_eq         = plane_eq
+                best_inliers    = pt_id_inliers
+        
+        #self.inliers = best_inliers
+        #self.equation = best_eq
+
+        # rtansform to pose output
+        #tvec            = xyz_matrix[best_inliers,:].mean(axis=0)
+        #pts_best        = xyz_matrix[best_inliers,:] - tvec
+        tvec            = xyz_matrix.mean(axis=0)
+        pts_best        = xyz_matrix - tvec        
+        vnorm           = np.array(best_eq[:3])
+
+        # checking error
+        err             = np.dot(pts_best, vnorm)
+        err_std         = err.std()
+        log.info('Fit error ransac: %s' %str(err_std))  
+
+        img_mean           = tvec[2] #z_est.mean()
+        img_std            = err_std
+        self.plane_params  = vnorm.flatten()
+        self.plane_center  = tvec.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std 
+    
+    def estimate_normals_from_depth_map(self,depth_map):
+        """
+        Estimates the surface normal vector for each pixel in a depth map
+        using the image gradient (Sobel operator).
+
+        Args:
+            depth_map (np.ndarray): A single-channel depth image (e.g., CV_32F or CV_64F).
+                                    Depth values must be in a consistent metric (e.g., meters).
+
+        Returns:
+            np.ndarray: A 3-channel image (H, W, 3) where each pixel contains the
+                        (nx, ny, nz) unit normal vector, as CV_32F.
+        """
+        # 1. Convert to CV_32F for accurate gradient calculation
+        if depth_map.dtype != np.float32:
+            depth_map = depth_map.astype(np.float32)
+
+        depth_map = cv.GaussianBlur(depth_map, (5, 5), 0)   
+
+        # 2. Calculate Derivatives using Sobel Operator (Gradient)
+        # The kernel size 'ksize=1' is often preferred for depth maps as it corresponds 
+        # to a 3x1 or 1x3 kernel, providing a close approximation of the derivative.
+        ksize = 1 
+        
+        # Calculate dz/du (gradient in X/horizontal direction)
+        # dx=1, dy=0
+        grad_x = cv.Sobel(depth_map, cv.CV_32F, 1, 0, ksize=ksize, borderType=cv.BORDER_DEFAULT)
+        
+        # Calculate dz/dv (gradient in Y/vertical direction)
+        # dx=0, dy=1
+        grad_y = cv.Sobel(depth_map, cv.CV_32F, 0, 1, ksize=ksize, borderType=cv.BORDER_DEFAULT)
+
+        # 3. Construct the Normal Vector Components
+        # The normal vector is proportional to n = (-dz/du, -dz/dv, 1)
+        
+        # Reshape the gradients to (H, W, 1) for stacking
+        grad_x = grad_x[:, :, np.newaxis]
+        grad_y = grad_y[:, :, np.newaxis]
+        
+        # Create the 'z' component of the direction vector, which is always 1
+        # np.ones_like creates an array with the same shape and type as the gradient arrays
+        z_component = np.ones_like(grad_x)
+
+        # Stack the components to create the direction vector (H, W, 3)
+        # The X and Y gradients are negated: -dz/du and -dz/dv
+        direction_vectors = np.concatenate((-grad_x, -grad_y, z_component), axis=2)
+
+        # 4. Normalize the Direction Vectors
+        # Calculate the magnitude (Euclidean norm) of each (nx, ny, nz) vector
+        # axis=2 computes the norm across the 3 channels
+        magnitude = np.linalg.norm(direction_vectors, axis=2, keepdims=True)
+        
+        # Use np.divide and np.where to prevent division by zero for magnitude=0
+        # Set normals to (0, 0, 0) or another placeholder where magnitude is zero (flat or invalid depth)
+        normals = np.divide(direction_vectors, magnitude, out=np.zeros_like(direction_vectors), where=magnitude != 0)
+
+        return normals
+    
+    def estimate_normals_using_box_filters(self,img_roi):
+        """
+        Estimates the surface normal vector for each pixel in a depth map
+        using the image gradient (Sobel operator).
+
+        Args:
+            img_roi (np.ndarray): A single-channel depth image (e.g., CV_32F or CV_64F).
+                                    Depth values must be in a consistent metric (e.g., meters).
+
+        Returns:
+            np.ndarray: A 3-channel image (H, W, 3) where each pixel contains the
+                        (nx, ny, nz) unit normal vector, as CV_32F.
+        """    
+
+        # sum of image pixels using box filter
+        img_roi                 = img_roi.astype(np.float32)
+        roih, roiw              = img_roi.shape[:2]                 
+        kernel_size             = 5
+        # count good points
+        img_roi_mask            = (img_roi > 0).astype(np.float32)
+
+        img_roi_sum             = cv.boxFilter(img_roi, -1,      (kernel_size, kernel_size), normalize=False) 
+        img_roi_count           = cv.boxFilter(img_roi_mask, -1, (kernel_size, kernel_size), normalize=False) 
+
+        # protect from non valid
+        img_roi_count[img_roi_count < 1] = 1
+        img_roi_mean            = img_roi_sum/img_roi_count
+
+        # compute gradients
+        shift                   = kernel_size>>1
+        shift2                  = shift<<1
+        img_normal              = np.ones((roih,roiw,3))
+        # cross product
+        #a × b = (a₂b₃ - a₃b₂)i + (a₃b₁ - a₁b₃)j + (a₁b₂ - a₂b₁)k        
+        # dzdx, dzdy
+        img_normal[:,shift:-shift,0]    = img_roi_mean[:,shift2:]   - img_roi_mean[:,:-shift2]
+        img_normal[shift:-shift,:,1]    = img_roi_mean[shift2:,:]   - img_roi_mean[:-shift2,:]
+
+        # align directions
+        img_normal[:,:,0]               = -img_normal[:,:,0] # dx
+        img_normal[:,:,1]               = -img_normal[:,:,1] # dy
+
+        # normalize each vector to unit length
+        norm2              = np.sqrt(np.sum(img_normal**2, axis=2))
+        img_normal         = img_normal / norm2[:,:,np.newaxis]
+        return img_normal
+
+    def fit_plane_using_gradients(self, img_full, roi_rect = None):
+        "estimates normal to the plane fit using gradients"
+
+        # roi converted to points with step size on the grid
+        xyz_roi             = self.convert_roi_to_points(img_full, point_num = 500, step_size = 1, roi_rect = roi_rect)    
+        if self.img_roi is None:
+            log.error('No ROI in image')
+            return 0, 0
+        img_roi             = self.img_roi.astype(np.float32)
+
+        # old code
+        #img_normal          = self.estimate_normals_using_box_filters(img_roi)
+
+        img_normal           = self.estimate_normals_from_depth_map(img_roi)
+
+        # roih, roiw          = img_roi.shape[:2]        
+
+        # # count good points
+        # img_roi_mask        = (img_roi > 0).astype(np.float32)
+
+        # # sum of image pixels using box filter
+        # kernel_size        = 7
+        # img_roi_sum        = cv.boxFilter(img_roi, -1,      (kernel_size, kernel_size), normalize=False) 
+        # img_roi_count      = cv.boxFilter(img_roi_mask, -1, (kernel_size, kernel_size), normalize=False) 
+
+        # # protect from non valid
+        # img_roi_count[img_roi_count < 1] = 1
+        # img_roi_mean       = img_roi_sum/img_roi_count
+
+        # # compute gradients
+        # shift                   = kernel_size>>1
+        # shift2                  = shift<<1
+        # img_normal              = np.ones((roih,roiw,3))
+        # # cross product
+        # #a × b = (a₂b₃ - a₃b₂)i + (a₃b₁ - a₁b₃)j + (a₁b₂ - a₂b₁)k        
+        # # dzdx, dzdy
+        # img_normal[:,shift:-shift,0]    = img_roi_mean[:,shift2:]   - img_roi_mean[:,:-shift2]
+        # img_normal[shift:-shift,:,1]    = img_roi_mean[shift2:,:]   - img_roi_mean[:-shift2,:]
+
+        # # align directions
+        # img_normal[:,:,0]               = -img_normal[:,:,0] # dx
+        # img_normal[:,:,1]               = -img_normal[:,:,1] # dy
+
+        # # normalize each vector to unit length
+        # norm2              = np.sqrt(np.sum(img_normal**2, axis=2))
+        # img_normal         = img_normal / norm2[:,:,np.newaxis]
+
+        # plane normal
+        plane_params       = img_normal.mean(axis=(0,1))
+
+        # some problem with points
+        if xyz_roi.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0
+
+        # estimate error
+        xyz_center         = xyz_roi[:,:3].mean(axis=0)
+        xyz_matrix         = xyz_roi - xyz_center         
+        err                = np.dot(xyz_matrix,plane_params)
+
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+        #self.plane_confidence = 1/(1+img_std)
+
+        self.img_roi_normal = img_normal # save for debug and display
+
+        log.info(f'Plane : {self.plane_params}, error {img_std:.3f}')
+        
+        return img_mean, img_std  
+    
+    def fit_plane_ransac_and_grow(self, img_full):
+        
+        """
+        Find the best equation for a plane of the predefined ROI and then grow the ROI
+        """
+        h,w                         = img_full.shape[:2]
+        if len(img_full.shape) > 2:
+            img_full        = img_full[:,:,2].astype(np.float32)
+
+        # start from the original ROI
+        if self.img_mask is None:
+            isOk                    = self.init_image(img_full)
+
+        #img_mean, img_std           = self.fit_plane_ransac(img_full) 
+
+        # make sure that mask is not empty - initial rectangle
+        x0, y0, x1, y1              = self.rect
+        self.img_mask[y0:y1,x0:x1]  = 1
+
+        # grow the mask
+        y,x                         = np.where(self.img_mask > 0.7)
+        y_min, y_max                = y.min(), y.max()
+        x_min, x_max                = x.min(), x.max()
+        y_min, y_max                = np.maximum(0,y_min-1), np.minimum(self.img_mask.shape[0],y_max+2)
+        x_min, x_max                = np.maximum(0,x_min-1), np.minimum(self.img_mask.shape[1],x_max+2)
+
+        # extract ROI
+        roi_rect                    = [x_min, y_min, x_max, y_max]
+        #img_roi                     = img_full[y_min:y_max,x_min:x_max].astype(np.float32)
+        xyz_matrix                  = self.convert_roi_to_points(img_full, point_num = 5000, step_size = 1, roi_rect = roi_rect)
+
+        # check against the plane : do not substract plane.center from all the points
+        vecC                        = self.plane_params[:3]
+        dist_offset                 = np.dot(self.plane_center, vecC) 
+        dist_pt                     = np.dot(xyz_matrix, vecC) - dist_offset
+
+        # Select indexes where distance is biggers than the threshold
+        thresh                      = 3.5
+        err                         = np.abs(dist_pt)
+        i2                          = np.where( err <= thresh)[0]
+
+        # transfer xi,yi coordinates to the original image index
+        ii                          = self.roi_index[i2] # convert to 2D index
+
+        # update mask according to the valid pixels
+        self.img_mask               = 0.95*self.img_mask
+        self.img_mask.flat[ii]      = self.img_mask.flat[ii] + 0.5*(1 - self.img_mask.flat[ii])
+
+
+        # position in 2d array
+        # unravel_index(a.argmax(), a.shape)   
+
+        # output
+        img_std                    = err.std()
+        img_mean                   = xyz_matrix[i2].mean(axis=0)[2]
+
+
+        return img_mean, img_std 
+        
+    def fit_and_split_roi_recursively(self, roi, level = 0):
+        # splits ROI on 4 regions and recursevly call 
+        x0,y0,x1,y1     = roi
+        #roi3d           = self.img_xyz[y0:y1,x0:x1,:]   
+        log.info('Processing level %d, region x = %d, y = %d' %(level,x0,y0))
+        # check the current fit
+        roi_params_f    = self.fit_plane(roi)
+        roi_params_ret  = [roi_params_f]
+        if roi_params_f['error'] < self.MIN_STD_ERROR:
+            log.info('Fit is good enough x = %d, y = %d' %(x0,y0))
+            return roi_params_ret
+
+        # too small exit
+        xs, ys          = int((x1 + x0)/2), int((y1 + y0)/2)
+        if (xs - x0) < self.MIN_SPLIT_SIZE or (ys - y0) < self.MIN_SPLIT_SIZE:
+            log.info('Min size is reached x = %d, y = %d' %(x0,y0))
+            return roi_params_ret
+        
+        # 4 ROIs - accept the split if error of one of them is lower from the total
+        roi_params_list = []
+        roi_split   = [[x0,y0,xs,ys],[x0,ys,xs,y1],[xs,y0,x1,ys],[xs,ys,x1,y1]]
+        for roi_s in roi_split:
+            roi_params_prev = self.fit_and_split_roi_recursively(roi_s, level + 1)
+            # save locally
+            #roi_params_list.append(roi_params_prev)
+            roi_params_list = roi_params_list + roi_params_prev
+            
+        # extract each of the below and check the error
+        makeTheSplit = False
+        for roi_params_s in roi_params_list:
+            #roi_params_s       = roi_params_prev[-1]
+            # accept the split if twice lower (if noise of 4 split should be 2)
+            if roi_params_s['error'] < roi_params_f['error']/2:
+                makeTheSplit = True
+                break
+
+        # decide what to return
+        if makeTheSplit:
+            roi_params_ret = roi_params_list
+            log.info('Split at level %d, region x = %d, y = %d' %(level,x0,y0))
+        else:
+            log.info('No split level %d, region x = %d, y = %d' %(level,x0,y0))
+
+        return roi_params_ret
+    
+    def fit_plane_svd_weighted_4d(self, img_roi):
+        "estimates weighted plane fit using weight inversely proportional to the depth"
+
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 350, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0         
+
+        # form matrix [u,v,f,zf] result of sigma(z) = alpha*z
+        if self.roi_index is None:
+            log.error('No ROI index found')
+            return 0,0
+        
+        # form matrix for svd
+        f                   = self.cam_matrix[0,0]          
+        # uv1z_matrix         = np.hstack((self.full_dir[self.roi_index,:],xyz_matrix[:,2].reshape((-1,1))))
+        # uv1z_matrix[:,2:3] *= f  # keep 1 intact
+
+        # plane params - using only valid
+        z                   = self.rect_z 
+        uv1_matrix          = self.rect_dir     
+        uv1z_matrix         = np.hstack((uv1_matrix,1/z))
+        uv1z_matrix[:,2:3] *= f  # keep 1 intact           
+  
+        # using svd to make the fit
+        U, S, Vh            = np.linalg.svd(uv1z_matrix, full_matrices=True)
+        ii                  = np.argmin(S)
+        vnorm               = Vh[ii,:]
+
+        # keep orientation
+        plane_params       = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err                = np.dot(xyz_matrix,plane_params[:3])
+
+        # patch mean
+        xyz_center         = xyz_matrix[:,:3].mean(axis=0)        
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        log.info(f'Plane : {self.plane_params}, error {img_std:.3f}')
+        
+        return img_mean, img_std  
+    
+    def fit_plane_and_project_the_image(self, img_full):
+        
+        """
+        Find the best equation for a plane of the predefined ROI and then projecvt the entire image on the plane
+        """
+        h,w                         = img_full.shape[:2]
+        if len(img_full.shape) > 2:
+            img_full        = img_full[:,:,2].astype(np.float32)
+
+        # start from the original ROI
+        if self.img_mask is None:
+            isOk                    = self.init_image(img_full)
+
+        # extract ROI
+        roi_rect                    = [50, 50, w-50, h-50]
+        xyz_matrix                  = self.convert_roi_to_points(img_full, point_num = 500000, step_size = 1, roi_rect = roi_rect)
+
+        # check against the plane : do not substract plane.center from all the points
+        vecC                        = self.plane_params[:3]
+        dist_offset                 = np.dot(self.plane_center, vecC) 
+        dist_pt                     = np.dot(xyz_matrix, vecC) - dist_offset
+
+        # Select indexes where distance is biggers than the threshold
+        thresh                      = 2.5
+        err                         = np.abs(dist_pt)
+        i2                          = np.where(err <= thresh)[0]
+
+        # transfer xi,yi coordinates to the original image index
+        ii                          = self.roi_index[i2] # convert to 2D index
+
+        # update mask according to the valid pixels
+        self.img_mask.flat[ii]      = 1
+
+        # output
+        img_std                    = err.std()
+        img_mean                   = xyz_matrix[i2].mean(axis=0)[2]
+
+        return img_mean, img_std 
+    
+    def fit_box(self):
+        "using mask finds box"
+        self.contour        = None
+        self.hull           = None
+
+        # find convex hull
+        contours, hierarchy = cv.findContours(self.img_mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+
+        # Select a contour (e.g., the first one)
+        max_len_index       = np.argmax([len(cnt) for cnt in contours])
+        cnt                 = contours[max_len_index] 
+
+        # Find the convex hull
+        hull                = cv.convexHull(cnt) 
+
+        self.contour        = cnt
+        self.hull           = hull
+        return True
+
+     
+        
+    def fit_plane_and_project_the_image_using_gradients(self, img_full):
+        
+        """
+        Find the best equation for a plane of the predefined ROI and then projecvt the entire image on the plane
+        """
+        h,w                         = img_full.shape[:2]
+        if len(img_full.shape) > 2:
+            img_full        = img_full[:,:,2].astype(np.float32)
+
+        # start from the original ROI
+        if self.img_mask is None:
+            isOk                    = self.init_image(img_full)
+
+        # extract ROI
+        #roi_rect                    = [50, 50, w-50, h-50]
+        #xyz_matrix                  = self.convert_roi_to_points(img_full, point_num = 50000, step_size = 1, roi_rect = roi_rect)
+
+        # estimate normals for the entire image
+        img_normal                  = self.estimate_normals_from_depth_map(img_full)
+
+        # check against the plane : do not substract plane.center from all the points
+        vecC                        = self.plane_params[:3]
+        dist_pt                     = np.dot(img_normal, vecC)
+
+        # Select indexes where distance is biggers than the threshold
+        thresh                      = 0.1
+        dist_abs                    = np.abs(dist_pt)
+        #ii                          = np.where(dist_abs > thresh)[0]
+        ii                          = dist_abs > thresh
+        # transfer xi,yi coordinates to the original image index
+        #ii                          = self.roi_index[i2] # convert to 2D index
+
+        # update mask according to the valid pixels
+        #self.img_mask.flat[ii]      = 1
+        # make sure that mask is not empty - initial rectangle
+        x0, y0, x1, y1              = self.rect
+        self.img_mask[y0:y1,x0:x1]  = 1
+
+        # update mask according to the valid pixels
+        self.img_mask               = 0.95*self.img_mask
+        #self.img_mask.flat[ii]      = self.img_mask.flat[ii] + 0.5*(1 - self.img_mask.flat[ii]) 
+        self.img_mask[ii]           = self.img_mask[ii] + 0.5*(1 - self.img_mask[ii]) 
+
+        # output
+        img_std                    = dist_abs.std()
+        img_mean                   = 0 #xyz_matrix[i2].mean(axis=0)[2]
+
+
+        return img_mean, img_std 
+        
+    def growingStep(self, depths:np.ndarray, step:int, direction:str, mean:float, std:float,
+                    left:int, top:int, right:int, bottom:int, numConfInt:int=3, testRatio:float=0.95):
+        """
+        testing whether growing in a given direction and size is acceptable
+        :param depths:  uncropped depth image
+        :param step: step size to test
+        :param direction: growing direction. should be either: 'left', 'right', 'top' or  'bottom'
+        :param mean: the current mean depth of the segment
+        :param std: the current standard deviation of the segment's depths
+        :param left: the current left bound of the segment
+        :param top: the current top bound of the segment
+        :param right: the current right bound of the segment
+        :param bottom: the current bottom bound of the segment
+        :param numConfInt: number of confidence intervals threshold
+        :param testRatio: ratio of elements in growing area to be passed the confidence interval threshold
+        :return: true if growing is accepted, otherwise false
+        """
+        directions = ['left', 'right', 'bottom', 'top']
+        if direction not in directions:
+            raise ValueError('direction must be one of "left", "right", "bottom", "top"')
+
+        if step < 0:
+            raise ValueError('step must be non-negative')
+
+        if numConfInt < 1:
+            raise ValueError('numConfInt must be at least 1')
+
+        if testRatio > 1 or testRatio < 0:
+            raise ValueError('testRatio must be between 0 and 1')
+
+        data = {
+            'left': depths[bottom:top + 1, left - step:left] if step > 0 else None,
+            'right': depths[bottom:top + 1, right + 1:right + step + 1] if step > 0 else None,
+            'top': depths[top + 1:top + step + 1, left:right] if step > 0 else None,
+            'bottom': depths[bottom - step:bottom, left:right] if step > 0 else None,
+        }
+        testData = data[direction]
+        testRows, testCols = np.nonzero(testData)  # getting the indexes of all nonzero elements
+        norTestData = np.abs(testData[testRows, testCols] - mean) / std  # normalizing test data to standard values
+        test = np.nonzero(norTestData < numConfInt)[0]  # test if elements are below confidence interval threshold
+        if test.size / testData.size > testRatio:
+            return True     # growing is acceptable
+        else:
+            return False    # growing is denied
+
+    def findMaxROI(self, depths:np.ndarray, initialRoi) -> tuple[int, int, int, int]:
+        """
+        Find maximum ROI in depth image using region growing from an initial ROI
+        :param depths: entire depth image
+        :param initialRoi: initial ROI to grow from as list/tuple in the order: left, top, right, bottom
+        :return: final ROI found as a tuple in the order: left, top, right, bottom
+        """
+        height, width = depths.shape
+        left, top, right, bottom = initialRoi
+        stepLeft = stepRight = stepTop = stepBottom = 1     # initializing growing steps
+
+        while stepLeft + stepRight + stepTop + stepBottom > 0:
+            currentData = depths[bottom:top + 1, left:right + 1]
+            currentRows, currentCols = np.nonzero(currentData)   # getting the indexes of all nonzero elements
+            currentMean = currentData[currentRows, currentCols].mean()
+            currentStd = currentData[currentRows, currentCols].std()
+
+            if stepLeft > 0:    # attempting to grow to the left
+                if left - stepLeft < 0:  # growing left with current step exceeded image dimensions
+                    stepLeft = 1 if stepLeft > 1 else 0
+                elif left == 0:          # growing reached and of image, no more growing available
+                    stepLeft = 0
+                else:
+                    res = self.growingStep(depths, stepLeft, 'left', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        left -= stepLeft
+                        stepLeft *= 2   # increasing growing step for next iteration
+                    else:
+                        stepLeft = 1 if stepLeft > 1 else 0
+
+            if stepRight > 0:   # attempting to grow to the right
+                if right + stepRight > width:  # growing right with current step exceeded image dimensions
+                    stepRight = 1 if stepRight > 1 else 0
+                elif right == width - 1:       # growing reached and of image, no more growing available
+                    stepRight = 0
+                else:
+                    res = self.growingStep(depths, stepRight, 'right', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        right += stepRight
+                        stepRight *= 2   # increasing growing step for next iteration
+                    else:
+                        stepRight = 1 if stepRight > 1 else 0
+
+            if stepBottom > 0:    # attempting to grow down
+                if bottom - stepBottom < 0:  # growing down with current step exceeded image dimensions
+                    stepBottom = 1 if stepBottom > 1 else 0
+                elif bottom == 0:          # growing reached and of image, no more growing available
+                    stepBottom = 0
+                else:
+                    res = self.growingStep(depths, stepBottom, 'bottom', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        bottom -= stepBottom
+                        stepBottom *= 2   # increasing growing step for next iteration
+                    else:
+                        stepBottom = 1 if stepBottom > 1 else 0
+
+            if stepTop > 0:   # attempting to grow up
+                if top + stepTop > height:  # growing up with current step exceeded image dimensions
+                    stepTop = 1 if stepTop > 1 else 0
+                elif top == height - 1:       # growing reached and of image, no more growing available
+                    stepTop = 0
+                else:
+                    res = self.growingStep(depths, stepTop, 'top', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        top += stepTop
+                        stepTop *= 2   # increasing growing step for next iteration
+                    else:
+                        stepTop = 1 if stepTop > 1 else 0
+
+            # print(left, top, right, bottom, '|', stepLeft, stepTop, stepRight, stepBottom)
+
+        return left, top, right, bottom    
+    
+    def find_planes(self, img):
+        "finds planes using different algo"
+        detect_type         = self.detect_type.upper()
+
+        img_mean, img_std   = 0,0             
+        if detect_type == 'P':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi)  
+
+        elif detect_type == 'W':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd_weighted(img_roi)   
+
+        elif detect_type == 'O':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_with_outliers(img_roi)  
+
+        elif detect_type == 'T': # weighted by depth
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd_weighted_4d(img_roi) 
+
+        elif detect_type == 'R':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_ransac(img_roi) 
+
+        elif detect_type == 'G': # gradients
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_using_gradients(img_roi) 
+            
+        elif detect_type == 'F':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane          
+            img_mean, img_std   = self.fit_plane_ransac_and_grow(img)                
+
+        elif detect_type == 'S': # project all the points on the plane and color them accordingly
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane 
+            #img_mean, img_std   = self.fit_plane_svd_weighted(img_roi)      
+            # img_mean, img_std   = self.fit_plane_svd_weighted_4d(img_roi)       
+            img_mean, img_std   = self.fit_plane_and_project_the_image(img)  
+
+        elif detect_type == 'N': # project all the points on the plane and color them accordingly
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane 
+            img_mean, img_std   = self.fit_plane_and_project_the_image_using_gradients(img)             
+                               
+        elif detect_type == 'B': # pbox detector
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane      
+            img_mean, img_std   = self.fit_plane_and_project_the_image(img)  
+            ret                 = self.fit_box()            
+
+
+        #log.debug(f'camera noise           - roi mean : {img_mean}')
+        self.img_mean       = img_mean        # final measurements per frame
+        self.img_std        = img_std    
+        return True 
+
+    def process_frame(self, img):
+        "process the entire image and find the planes"
+
+        img_roi     = self.preprocess(img)
+        img3d       = self.init_img3d(img_roi)
+        imgXYZ      = self.compute_img3d(img_roi)
+        roim,rois   = self.fit_plane_with_outliers(img_roi)
+        pose        = self.convert_plane_params_to_pose()
+
+        return pose
+
+
+#%% Helpers
+def draw_axis(img, rvec, tvec, cam_mtrx, cam_dist, len = 10):
+    # unit is mm
+    try:
+        points          = np.float32([[len, 0, 0], [0, len, 0], [0, 0, len], [0, 0, 0]]).reshape(-1, 3)
+        axisPoints, _   = cv.projectPoints(points, rvec.astype(np.float32), tvec.astype(np.float32), cam_mtrx, cam_dist)
+        axisPoints      = axisPoints.squeeze().astype(np.int32)
+        img = cv.line(img, tuple(axisPoints[3].ravel()), tuple(axisPoints[0].ravel()), (0,0,255), 3)
+        img = cv.line(img, tuple(axisPoints[3].ravel()), tuple(axisPoints[1].ravel()), (0,255,0), 3)
+        img = cv.line(img, tuple(axisPoints[3].ravel()), tuple(axisPoints[2].ravel()), (255,0,0), 3)
+    except Exception as e:
+        print(e)
+        print(rvec, tvec, img.shape)
+    return img
+
+def draw_polygon(img, rvec, tvec, cam_mtrx, cam_dist, points3d):
+    # unit is mm
+    points              = np.float32(points3d).reshape(-1, 3)
+    polygon_points, _   = cv.projectPoints(points, rvec, tvec, cam_mtrx, cam_dist)
+    polygon_points      = polygon_points.squeeze().astype(np.int32)
+    img                 = cv.polylines(img, [polygon_points], True, (0, 200, 200), 1)
+
+    # To fill the polygon, use thickness=-1
+    # cv2.fillPoly(img, [pts], color)
+
+    return img
+
+def draw_cube(img, corners, imgpts):
+    imgpts = np.int32(imgpts).reshape(-1,2)
+    # draw ground floor in green
+    img = cv.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
+    # draw pillars in blue color
+    for i,j in zip(range(4),range(4,8)):
+        img = cv.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
+
+    # draw top layer in red color
+    img = cv.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
+    return img
+
+
+#%% ROI selector from OpenCV
+class RectSelector:
+    def __init__(self, win, callback):
+        self.win = win
+        self.callback = callback
+        cv.setMouseCallback(win, self.onmouse)
+        self.drag_start = None
+        self.drag_rect = None
+    def onmouse(self, event, x, y, flags, param):
+        x, y = np.int16([x, y]) # BUG
+        if event == cv.EVENT_LBUTTONDOWN:
+            self.drag_start = (x, y)
+            return
+        if self.drag_start:
+            if flags & cv.EVENT_FLAG_LBUTTON:
+                xo, yo = self.drag_start
+                x0, y0 = np.minimum([xo, yo], [x, y])
+                x1, y1 = np.maximum([xo, yo], [x, y])
+                self.drag_rect = None
+                if x1-x0 > 0 and y1-y0 > 0:
+                    self.drag_rect = (x0, y0, x1, y1)
+            else:
+                rect = self.drag_rect
+                self.drag_start = None
+                self.drag_rect = None
+                if rect:
+                    self.callback(rect)
+    def draw(self, vis):
+        if not self.drag_rect:
+            return False
+        x0, y0, x1, y1 = self.drag_rect
+        cv.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
+        return True
+    @property
+    def dragging(self):
+        return self.drag_rect is not None
+
+#%% Data Generator
+class DataGen:
+    def __init__(self, img_size = (640,480)):
+
+        self.frame_size     = img_size
+        self.img            = None
+        self.rect           = None  # roi  
+
+
+    def add_noise(self, img_gray, noise_percentage = 0.01):
+        "salt and pepper noise"
+        if noise_percentage < 0.001:
+            return img_gray
+
+
+        # Get the image size (number of pixels in the image).
+        img_size = img_gray.size
+
+        # Set the percentage of pixels that should contain noise
+        #noise_percentage = 0.1  # Setting to 10%
+
+        # Determine the size of the noise based on the noise precentage
+        noise_size = int(noise_percentage*img_size)
+
+        # Randomly select indices for adding noise.
+        random_indices = np.random.choice(img_size, noise_size)
+
+        # Create a copy of the original image that serves as a template for the noised image.
+        img_noised = img_gray.copy()
+
+        # Create a noise list with random placements of min and max values of the image pixels.
+        #noise = np.random.choice([img_gray.min(), img_gray.max()], noise_size)
+        noise = np.random.choice([-10, 10], noise_size)
+
+        # Replace the values of the templated noised image at random indices with the noise, to obtain the final noised image.
+        img_noised.flat[random_indices] += noise
+        
+        log.info('adding image noise')
+        return img_noised
+
+    def init_image(self, img_type = 1):
+        # create some images for test
+        w,h             = self.frame_size
+        if img_type == 1: # /
+            
+            self.img        = np.tile(np.linspace(100, 300, w), (h,1))
+
+        elif img_type == 2: # /|/
+
+            self.img        = np.tile(np.linspace(100, 200, int(w/2)), (h,2))
+         
+        elif img_type == 3: # |_|
+
+            self.img        = np.tile(np.linspace(100, 200, h).reshape((-1,1)), (1,w)) 
+        
+        elif img_type == 4: # /\
+
+            self.img        = np.tile(np.hstack((np.linspace(300, 500, w>>1),np.linspace(500, 300, w>>1))), (h,1))        
+
+        elif img_type == 5: # dome
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = (np.abs(x - w/2) + np.abs(y - h/2))/10 + 200 # less slope
+
+        elif img_type == 6: # sphere
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = np.sqrt((x - w/2)**2 + (y - h/2)**2)/10 + 200 # less slope   
+
+        elif img_type == 7: # stair
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = (np.sign(x - w/2) + np.sign(y - h/2))*5 + 200 # less slope     
+
+
+        elif img_type == 8: # corner
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = np.ones((h,w))*250
+            img_bool        = np.logical_and((x - w/2) < 0, (y - h/2) < 0)
+            self.img[img_bool] = 230 # quarter                            
+
+        elif img_type == 10: # flat
+
+            self.img        = np.ones((h,w))*500             
+
+        elif img_type == 11:
+            "chess board"
+            fname           = r"C:\Users\udubin\Documents\Code\opencv-4x\samples\data\left04.jpg"
+            self.img        = cv.imread(fname)
+
+        elif img_type == 12:
+            self.img = cv.imread('image_scl_001.png', cv.IMREAD_GRAYSCALE)
+            #self.img = cv.resize(self.img , dsize = self.frame_size) 
+            
+        elif img_type == 13:
+            self.img = cv.imread(r"wrappers\python\applications\planes\data\image_ddd_000.png", cv.IMREAD_GRAYSCALE)
+            #self.img = cv.resize(self.img , dsize = self.frame_size) 
+
+        elif img_type == 21:
+            self.img = cv.imread(r"C:\Data\Depth\Plane\image_scl_000.png", cv.IMREAD_GRAYSCALE)  
+            #self.img = cv.resize(self.img , dsize = self.frame_size)                                     
+            
+        #self.img        = np.uint8(self.img) 
+
+        self.img = self.add_noise(self.img, 0)
+        self.frame_size = self.img.shape[:2]      
+        return self.img
+      
+    def init_roi(self, test_type = 1):
+        "load the test case"
+        roi = [0,0,self.frame_size[0],self.frame_size[1]]
+        if test_type == 1:
+            roi = [310,230,330,250] # xlu, ylu, xrb, yrb
+        elif test_type == 2:
+            roi = [300,220,340,260] # xlu, ylu, xrb, yrb
+        elif test_type == 3:
+            roi = [280,200,360,280] # xlu, ylu, xrb, yrb            
+        elif test_type == 4:
+            roi = [220,140,420,340] # xlu, ylu, xrb, yrb      
+        elif test_type == 4:
+            roi = [200,120,440,360] # xlu, ylu, xrb, yrb            
+        return roi    
+    
+    def test_image(self):
+        "test single image depth"
+        img  = self.init_image(1)
+        roi  = self.init_roi(1)      
+
+#%% Adds display functionality to the PlaneDetector
+class PlaneDetectorDisplay(PlaneDetector):
+    def __init__(self, detect_type='p',image_size = (1280,720)):
+        super().__init__(detect_type, image_size=image_size)
+        self.detect_type    = detect_type
+        self.frame_size     = image_size
+        self.img            = None
+        self.show_dict     = {}       # show figures in 3D
+
+    def show_image_data(self, frame, display_mode = 1):
+        "converts image data to 3d color"
+
+        if display_mode == 1: # left
+            img = frame[:,:,0]
+        elif display_mode == 2: # right
+            img = frame[:,:,1]
+        elif display_mode == 3:
+            img  = cv.convertScaleAbs(frame[:,:,2], alpha=0.1)            
+        else: # depth
+            img  = cv.convertScaleAbs(frame[:,:,2], alpha=0.03)
+
+        vis     = np.uint8(img)
+        vis     = cv.cvtColor(vis, cv.COLOR_GRAY2BGR)
+        return vis  
+
+    def show_data(self, frame = None, ttl = 'Depth'):
+        "draw relevant image data"
+            
+        if frame is None :
+            log.info('No images found')
+            return False
+        
+        if len(frame.shape) > 2 and frame.shape[2]==2: # extract 3 images
+            img_show    = np.concatenate((frame[:,:,0], frame[:,:,1]), axis = 1)
+            img_show    = np.uint8(img_show)
+
+        elif len(frame.shape) == 2 and frame.dtype == 'uint16':
+            img_show    = cv.convertScaleAbs(frame, alpha=0.03)
+            img_show    = np.uint8(img_show)
+        else:   
+            img_show    = np.uint8(frame)
+
+        while img_show.shape[1] > 2000:
+            img_show    = cv.resize(img_show, (img_show.shape[1]>>1,img_show.shape[0]>>1), interpolation=cv.INTER_LINEAR)
+
+        while img_show.shape[0] < 300:
+            img_show    = cv.resize(img_show, (img_show.shape[1]<<1,img_show.shape[0]<<1), interpolation=cv.INTER_LINEAR)            
+
+        cv.imshow(ttl + ' (q-Quit)', img_show)
+        ch  = cv.waitKey(10)
+        ret = ch != ord('q')
+        return ret          
+
+    def show_image_with_axis(self, img, poses = []):
+        "draw results : axis on the image. poses are list of 6D vectors"
+        axis_number = len(poses)
+        if axis_number < 1:
+            log.error('No poses found')
+            
+        # deal with black and white
+        img_show = np.uint8(img) #.copy()
+        if len(img.shape) < 3:
+            img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+         
+        for k in range(axis_number):
+            
+            euler_angles    = poses[k][3:] # orientation in degrees
+            rvec            = Rot.from_euler('xyz',euler_angles[:3], degrees=True).as_rotvec()
+            tvec            = np.array(poses[k][:3]) #np.array(, dtype = np.float32).reshape(rvec.shape) # center of the patch
+            img_show        = draw_axis(img_show, rvec, tvec, self.cam_matrix, self.cam_distort, len = 10)
+
+        cv.imshow('Image & Axis', img_show)
+        log.info('show done')
+        ch = cv.waitKey()
+
+    def show_image_with_rois(self, img, roi_params_ret = []):
+        "draw results by projecting ROIs on image"
+
+        axis_number = len(roi_params_ret)
+        if axis_number < 1:
+            print('No poses found')
+            
+        # deal with black and white
+        img_show = np.uint8(img) #.copy()
+        if len(img.shape) < 3:
+            img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+         
+        for roi_p in roi_params_ret:
+
+            pose    = self.convert_roi_params_to_pose(roi_p)            
+            
+            avec    = pose[3:6] # orientation in degrees
+            levl    = pose[6]   # level
+            #R       = eulerAnglesToRotationMatrix(avec)
+            R       = Rot.from_euler('zyx',avec, degrees = True).as_matrix()
+            rvec, _ = cv.Rodrigues(R)
+            tvec    = np.array(pose[:3], dtype = np.float32).reshape(rvec.shape) # center of the patch
+            img_show= draw_axis(img_show, rvec, tvec, self.cam_matrix, self.cam_distort, len = levl)
+
+        cv.imshow('Image & Axis', img_show)
+        log.info('show done')
+        ch = cv.waitKey()
+
+    def show_points_3d_with_normal(self, img3d, pose = None):
+        "display in 3D"
+        fig = plt.figure()
+        ax  = fig.add_subplot(projection='3d')
+
+        #xs,ys,zs       = img3d[:,:,0].reshape((-1,1)), img3d[:,:,1].reshape((-1,1)), img3d[:,:,2].reshape((-1,1))
+        
+        xs,ys,zs       = img3d[:,0].reshape((-1,1)), img3d[:,1].reshape((-1,1)), img3d[:,2].reshape((-1,1))
+        ax.scatter(xs, ys, zs, marker='.')
+        
+        if pose is not None:
+            pose       = pose.flatten()
+            vnorm      = pose[3:6].flatten()*10
+            xa, ya, za = [pose[0], pose[0]+vnorm[0]], [pose[1], pose[1]+vnorm[1]], [pose[2], pose[2]+vnorm[2]]
+            ax.plot(xa, ya, za, 'r', label='Normal')
+
+
+        ax.set_xlabel('X [mm]')
+        ax.set_ylabel('Y [mm]')
+        ax.set_zlabel('Z [mm]')
+        ax.set_aspect('equal', 'box')
+        plt.show()
+
+    def show_rois_3d_with_normals(self, roi_params_ret = [], roi_init = None):
+        "display in 3D each ROI region with split"
+        
+        if len(roi_params_ret) < 1:
+            log.info('roi_params_ret is empty')
+            return
+
+        # extract the initial ROI - to make the show more compact
+        roi_init       = [0,0,self.frame_size[1], self.frame_size[0]] if roi_init is None else roi_init
+        x0,y0,x1,y1    = roi_init
+
+        if self.img_xyz is None:
+            log.info('Need init')
+            return      
+
+        img3d          = self.img_xyz[y0:y1,x0:x1,:] 
+        xs,ys,zs       = img3d[:,:,0].reshape((-1,1)), img3d[:,:,1].reshape((-1,1)), img3d[:,:,2].reshape((-1,1))
+
+        fig = plt.figure()
+        ax = fig.add_subplot(projection='3d')
+        ax.scatter(xs, ys, zs, marker='.')
+        
+        for roi_p in roi_params_ret:
+            pose       = self.convert_roi_params_to_pose(roi_p)
+            pose       = pose.flatten()
+            # R          = Rot.from_euler('zyx',pose[3:6],degrees=True).as_matrix()
+            # vnorm      = R[:,2]*pose[6]
+            vnorm      = pose[3:6]*pose[6]
+            #log.info(str(vnorm))
+            xa, ya, za = [pose[0], pose[0]+vnorm[0]], [pose[1], pose[1]+vnorm[1]], [pose[2], pose[2]+vnorm[2]]
+            ax.plot(xa, ya, za, 'r', label='Normal')
+
+
+        ax.set_xlabel('X [mm]')
+        ax.set_ylabel('Y [mm]')
+        ax.set_zlabel('Z [mm]')
+        ax.set_aspect('equal', 'box')
+        plt.show() #block=False)  
+
+    def show_3d_point_cloud(self):
+        "displays point cloud in real time"
+        if self.rect is None or self.rect_xyz is None:
+            return 
+        X = self.rect_xyz[:,:3]
+        if not 'line' in self.show_dict : #len(self.show_dict) < 1:
+
+            fig_num     = int(self.rect[0]+self.rect[1])
+            fig         = plt.figure(fig_num)
+            plt.clf() 
+            #fig.canvas.set_window_title('3D Scene')
+            try:
+                ax = fig.gca(projection='3d')
+            except:
+                ax = fig.add_subplot(projection = '3d')
+            fig.tight_layout()
+            fig.suptitle(f'ROI : {self.rect[0]},{self.rect[1]}')
+            ax.set_proj_type('ortho')
+            #lineGray,      = ax.plot3D(X[:,0], X[:,1], X[:,2], color='k')
+            lineGray    = ax.scatter(X[:,0], X[:,1], X[:,2])
+
+            ax.xaxis.set_pane_color((0.0, 0.0, 0.0, 0.0))
+            ax.yaxis.set_pane_color((0.0, 0.0, 0.0, 0.0))
+            ax.zaxis.set_pane_color((0.0, 0.0, 0.0, 0.0))             
+
+            ax.set_xlabel('x')
+            ax.set_ylabel('y')
+            ax.set_zlabel('z')
+            ax.invert_yaxis()
+            ax.view_init(elev=-70, azim=-90)
+         
+            lims = 500
+            ax.set_xlim(X[:,0].min()*0.8, X[:,0].max()*1.2)
+            ax.set_ylim(X[:,1].min()*0.8, X[:,1].max()*1.2)
+            ax.set_zlim(X[:,2].min()*0.9, X[:,2].max()*1.2)
+            plt.ion()
+            plt.show(block = False)
+            self.show_dict = {'fig':fig, 'ax':ax, 'line':lineGray}
+        else:
+            #self.show_dict['line'].set_data(X[:,0], X[:,1])
+            #self.show_dict['line'].set_3d_properties(X[:,2])
+            self.show_dict['line']._offsets3d = (X[:,0], X[:,1], X[:,2])
+            #self.show_dict['ax'].set_ylim(low_limit, high_limit)
+        
+        self.show_dict['fig'].canvas.draw_idle()
+        self.show_dict['fig'].canvas.flush_events()
+        return              
+
+    def show_axis(self, vis):
+        "draw axis after plane estimation"
+        if self.plane_params is None:
+            return vis
+        
+        #rvec = self.plane_params/np.sum(self.plane_params**2) # normalize
+        rvec = self.convert_plane_params(self.plane_params)
+        #rvec = self.convert_plane_to_rvec(self.plane_params)
+        
+        tvec = self.plane_center
+        vis  = draw_axis(vis, rvec, tvec, self.cam_matrix, self.cam_distort, len = 50)
+        return vis
+    
+    def show_text(self, vis):
+        "draw text plane estimation"
+        err_mean, err_std = self.img_mean, self.img_std
+        if err_mean is None:
+            return vis
+        
+        if self.rect is None:
+            return vis
+        
+        x0, y0, x1, y1 = self.rect
+        txt = f'{self.detect_type}:{err_mean:.2f}:{err_std:.3f}'
+        #if self.detect_type == 'F':
+        #    txt = f'{self.detect_type}:{self.img_fill:.2f} %'
+        vis = draw_str(vis,(x0,y0-10),txt)
+
+        return vis 
+
+    def show_rect_and_text(self, vis):
+        "draw axis after plane estimation"
+        err_mean, err_std = self.img_mean, self.img_std
+        if err_mean is None:
+            return vis
+        
+        if self.rect is None:
+            return vis
+        
+        x0, y0, x1, y1 = self.rect
+        clr = (0, 0, 0) if vis[y0:y1,x0:x1].mean() > 128 else (240,240,240)
+        vis = cv.rectangle(vis, (x0, y0), (x1, y1), clr, 2)
+        txt = f'{self.detect_type}:{err_mean:.2f}-{err_std:.3f}'
+        if self.detect_type == 'F':
+            txt = f'{self.detect_type}:{self.img_fill:.2f} %'
+        vis = draw_str(vis,(x0,y0-10),txt)
+
+        return vis 
+
+    def show_rect_and_axis_projected(self, vis):
+        "projects rectangle on the plane"
+        if self.rect is None:
+            return vis
+        if self.plane_params is None:
+            return vis
+        
+        rvec = self.convert_plane_params(self.plane_params)
+        tvec = self.plane_center
+
+        vis  = draw_axis(vis, rvec, tvec, self.cam_matrix, self.cam_distort, len = 50)        
+        vis  = draw_polygon(vis, rvec, tvec, self.cam_matrix, self.cam_distort, self.rect_3d)
+    
+        return vis 
+
+    def show_mask(self, img):
+        "draw image mask"
+
+        # deal with black and white
+        img_show = np.uint8(img) #.copy()
+        if len(img.shape) < 3:
+            img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+
+        if not np.all(self.img_mask.shape[:2] == img_show.shape[:2]):
+            log.error('mask and image size are not equal')
+            return img_show
+        
+        img_show[self.img_mask > 0.75] = self.color_mask
+        return img_show
+    
+    def show_polygon(self, img):
+        "shows polygon on top of the image"
+        # Define polygon vertices (e.g., a triangle)
+        if self.polygon is None:
+            return img
+        
+        polygon = np.array(self.polygon, np.int32)
+
+        # Reshape for OpenCV (required shape: (n_points, 1, 2))
+        polygon = polygon.reshape((-1, 1, 2))
+
+        # Draw the polygon on the image
+        img     = cv.polylines(img, [polygon], isClosed=True, color=(0, 255, 0), thickness=2)
+
+        return img    
+    
+    def show_box(self, img):
+        "shows rectangular on top of the image"
+        # Define polygon vertices (e.g., a triangle)
+        if self.contour is None:
+            return img
+        
+        # # check the tracker distance from the floor - dont show the floor
+        # plane_z        = self.plane_center[2]
+        # if self.floor_z + 20 > plane_z:
+        #     return img
+        
+        # make mask not persistent
+        #self.img_mask[:,:] = 0
+
+
+        # Draw the original contour (optional, in green)
+        #cv.drawContours(img, [self.contour], 0, (0, 255, 0), 2) 
+
+        #epsilon       = 0.1*cv.arcLength(self.contour,True)
+        #box_approx    = cv.approxPolyDP(self.contour,epsilon,True)        
+
+        # Draw the convex hull (in red)
+        cv.drawContours(img, [self.hull], 0, (0, 0, 255), 1)   
+
+        # 5. Min/Max Axis (using minAreaRect)
+        rect            = cv.minAreaRect(self.contour) # (center(x,y), (width, height), angle - (0:-90))
+        box             = cv.boxPoints(rect)           # Get 4 corners
+        box             = np.array(box).astype(int)
+        cv.drawContours(img, [box], 0, (0, 255, 255), 2) # Draw oriented box   
+        #print(rect)
+
+        # 1. Euler angles (pitch, roll, yaw) in radians
+        euler_angles    = [0, 0, np.deg2rad(rect[2])]
+
+        # 2. Convert Euler to Rotation Matrix (e.g., ZYX order)
+        rotation        = R.from_euler('xyz', euler_angles, degrees=False)
+        rot_matrix      = rotation.as_matrix()
+
+        # 3. Convert Rotation Matrix to rvec (Rodrigues)
+        rvec, _         = cv.Rodrigues(rot_matrix)  
+        tvec            = self.plane_center    
+
+        # save it to draw axis   
+        img             = draw_axis(img, rvec, tvec, self.cam_matrix, self.cam_distort, len = 50)        
+           
+        return img         
+
+    
+    def show_image_colored_by_normals(self, img = None):
+        "draw results : show normals at each point of ROI"
+        if img is None:
+            log.error('No image found')
+            return img
+        if self.rect is None:  # roi
+            log.error('No ROI found')
+            return img
+        if self.img_roi_normal is None:
+            log.error('No normals found')
+            return img
+
+        # deal with black and white
+        img_show    = np.uint8(img).copy()
+        if len(img.shape) < 3:
+            #img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+            img_show = np.tile(img_show[:,:,np.newaxis], (1,1,3))
+
+        # scale normals to fir RGB color space
+        normals                  = (self.img_roi_normal + 1)*127.5
+        x0,y0,x1,y1              = self.rect
+        #img_show[y0:y1,x0:x1, :] = np.uint8(normals)
+        img_show[y0:y1,x0:x1, :] = cv.addWeighted(img_show[y0:y1,x0:x1, :], 0.2, np.uint8(normals), 0.8, 0)
+
+        # make black outside roi or non valid
+        non_valid_pixels = img[:,:,2] < 1   
+        img_show[non_valid_pixels,0] = 0     
+        img_show[non_valid_pixels,1] = 0     
+        img_show[non_valid_pixels,2] = 0     
+         
+        #cv.imshow('Image & Normals', img_show)
+        #log.info('show done')
+        #ch = cv.waitKey(0) 
+        return img_show      
+
+    def show_scene(self, vis):
+        "draw ROI and Info"
+
+        #vis = self.show_rect_and_text(vis)
+        #vis = self.show_axis(vis)
+
+        vis = self.show_mask(vis)
+        vis = self.show_box(vis)
+        #vis = self.show_image_colored_by_normals(vis)        
+        #vis = self.show_rect_and_axis_projected(vis)
+        vis = self.show_text(vis)
+
+        return vis
+    
+
+
+
+
+
+        
+
+# ----------------------
+#%% Tests
+class TestPlaneDetector(unittest.TestCase):
+
+    def test_image_show(self):
+        "checking image show"
+        d       = DataGen()
+        img     = d.init_image(1)
+        p       = PlaneDetectorDisplay()
+        poses   = [[0,0,100,0,0,45,10]]
+        p.show_image_with_axis(img,poses)
+        self.assertFalse(d.img is None)    
+
+    def test_init_img3d(self):
+        "XYZ point cloud structure init"
+        d       = DataGen()
+        img     = d.init_image(1)
+        p       = PlaneDetectorDisplay()
+        isOk    = p.init_image(img)
+        img3d   = p.init_img3d()
+        self.assertFalse(img3d is None)    
+
+    def test_compute_img3d(self):
+        "XYZ point cloud structure init and compute"
+        d       = DataGen()
+        img     = d.init_image(1)        
+        p       = PlaneDetectorDisplay()
+        img3d   = p.init_img3d(img)
+        imgXYZ  = p.compute_img3d(img)
+        self.assertFalse(imgXYZ is None)     
+
+    def test_show_img3d(self):
+        "XYZ point cloud structure init and compute"
+        d       = DataGen()
+        img     = d.init_image(1)        
+        p       = PlaneDetectorDisplay()
+        img3d   = p.init_img3d(img)
+        imgXYZ  = p.compute_img3d(img)
+        roi     = p.init_roi(1)
+        x0,y0,x1,y1 = roi
+        roiXYZ    = imgXYZ[y0:y1,x0:x1,:]
+        p.show_points_3d_with_normal(roiXYZ)
+        self.assertFalse(imgXYZ is None)  
+
+    def test_convert_roi_to_points(self):
+        "computes 3d points of the ROI"
+        im_size     = (640,480)
+        d           = DataGen(img_size=im_size)
+        img         = d.init_image(1)        
+        p           = PlaneDetectorDisplay(image_size=im_size)
+        roi         = p.init_roi(1)
+        rect3d      = p.convert_roi_to_points(img, roi_rect=roi)
+        self.assertTrue(rect3d.shape[0] > 4)
+                     
+    def test_fit_plane_svd(self):
+        "computes normal to the ROI"
+        d           = DataGen()
+        img         = d.init_image(5)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_svd(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.matrix_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)         
+
+    def test_fit_plane_depth_image(self):
+        "computes normal to the ROI"
+        d           = DataGen()
+        img         = d.init_image(13)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_svd(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)  
+
+    def test_fit_plane_with_outliers(self):
+        "computes normal to the ROI"
+        d           = DataGen()
+        img         = d.init_image(13)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_with_outliers(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)  
+
+    def test_fit_plane_ransac(self):
+        "computes with ransac"
+        d           = DataGen()
+        img         = d.init_image(6)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_ransac(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)  
+
+    def test_fit_plane_using_gradients(self):
+        "computes normal of the ROI using gradients and then cross product"
+        im_size     = (640,480)
+        d           = DataGen(img_size=im_size)
+        img         = d.init_image(1)     # corner     
+        p           = PlaneDetectorDisplay(image_size=im_size)        
+        roi         = p.init_roi(13)      # image center
+        roim,rois   = p.fit_plane_using_gradients(img, roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_colored_by_normals(img)
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)         
+
+    def test_split_roi(self):
+        "computes ROIS and splits if needed"
+        p       = PlaneDetector()
+        p.MIN_STD_ERROR = 0.1
+        img     = p.init_image(13)
+        roi     = p.init_roi(4)
+        img3d   = p.init_img3d(img)
+        imgXYZ  = p.compute_img3d(img)
+        roi_list= p.fit_and_split_roi_recursively(roi)
+        p.show_rois_3d_with_normals(roi_list, roi)
+        p.show_image_with_rois(p.img, roi_list)
+
+        for roi_s in roi_list:
+            self.assertFalse(roi_s['error'] > 0.01) 
+
+    def test_plane_fit(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('P') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)     
+
+    def test_plane_fit_weighted(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('W') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data()  
+            if not ret: break      
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret) 
+
+    def test_plane_fit_outliers(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 421            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('O') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(41)  # center image
+        while ret:
+            ret,img     = d.get_data()  
+            if not ret: break      
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret) 
+
+    def test_plane_fit_ransac(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('R') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)   
+
+    def test_plane_fit_weighted_4d(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('T') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(53)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)   
+
+    def test_plane_fit_using_gradients(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('G') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(33)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)               
+
+    def test_multi_plane_fit(self):
+        "plane fit data multiple ROIs"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        roi_types       = [21,22,23,31,32,33,52,53]
+        pm              = []
+        for rt in roi_types:
+            p           = PlaneDetectorDisplay('T') #estimator_type=self.estim_type, estimator_id=estim_ind)
+            roi         = p.init_roi(rt)  # center image
+            pm.append(p)
+
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break
+            vis         = pm[0].show_image_data(img, display_mode=3)
+            for p in pm:         
+                retp        = p.find_planes(img)
+                vis         = p.show_scene(vis)
+
+            ret         = p.show_data(vis) 
+
+        d.finish()
+        self.assertFalse(ret)   
+
+    def test_grid_plane_fit(self):
+        "plane fit data multiple ROIs in grid"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        
+        "generate grid of trackers"
+        nx, ny          = 10, 10
+        w,h             = 1280, 720     
+        dx, dy          = int(w/(nx+1)), int(h/(ny+1))
+        wx,wy           = int(dx*0.4), int(dy*0.4)
+        pm              = []
+        for ix in range(nx):
+            for iy in range(ny):
+                x0, y0      = (ix+1)*dx, (iy+1)*dy
+                #x1, y1      = min((ix+1)*dx, w-1), min((iy+1)*dy, h-1)
+                rect        = (x0-wx,y0-wy,x0+wx,y0+wy)
+                p           = PlaneDetectorDisplay('P') #estimator_type=self.estim_type, estimator_id=estim_ind)
+                p.rect      = rect  # center image
+                pm.append(p)                             
+
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break
+            vis         = pm[0].show_image_data(img, display_mode=3)
+            for p in pm:         
+                retp        = p.find_planes(img)
+                vis         = p.show_scene(vis)
+
+            ret         = p.show_data(vis) 
+
+        d.finish()
+        self.assertFalse(ret)        
+
+
+    def test_plane_fit_with_show_3d(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 421            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('P') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(41)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            p.show_3d_point_cloud()
+        d.finish()
+        self.assertFalse(ret)      
+
+    def test_plane_fit_colored_by_normals(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 422           # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('G') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(0)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_image_colored_by_normals(vis)
+            #vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            #p.show_3d_point_cloud()
+            
+        d.finish()
+        self.assertFalse(ret)      
+
+    def test_plane_fit_with_grow(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 422           # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('F') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_mask(vis)
+            #vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            #p.show_3d_point_cloud()
+            
+        d.finish()
+        self.assertFalse(ret)       
+
+    def test_plane_fit_with_project_on_image(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 422           # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('S') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_mask(vis)
+            #vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            #p.show_3d_point_cloud()
+            
+        d.finish()
+        self.assertFalse(ret) 
+
+    def test_multi_plane_fit_with_project_on_image(self):
+        "plane fit data multiple ROIs with image coloring per ROI mask"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        roi_types       = [21,31,52,33] #22,23,31,32,33,52,53]
+        pm              = []
+        for rt in roi_types:
+            p           = PlaneDetectorDisplay('S') #estimator_type=self.estim_type, estimator_id=estim_ind)
+            roi         = p.init_roi(rt)  # center image
+            pm.append(p)
+
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break
+            vis         = pm[0].show_image_data(img, display_mode=3)
+            for p in pm:         
+                retp        = p.find_planes(img)
+                vis         = p.show_mask(vis)
+
+            ret         = p.show_data(vis) 
+
+        d.finish()
+        self.assertFalse(ret)                                                            
+
+# ----------------------
+#%% Run Test
+def RunTest():
+    #unittest.main()
+    #suite = unittest.TestSuite()
+    suite = TestPlaneDetector()
+    #suite.test_image_show() # ok
+    #suite.test_init_img3d()  # ok
+    #suite.test_compute_img3d() # ok
+    #suite.test_show_img3d() # ok
+    #suite.test_convert_roi_to_points()  # ok
+
+
+    #suite.test_fit_plane_svd() # ok
+    #suite.test_fit_plane_depth_image() #
+    #suite.test_fit_plane_with_outliers() 
+    #suite.test_fit_plane_ransac()  
+    #suite.test_fit_plane_using_gradients()  # ok 
+    #suite.test_split_roi() 
+
+    #suite.test_plane_fit() # ok
+    #suite.test_plane_fit_weighted() # ok
+    #suite.test_plane_fit_outliers() # ok
+    #suite.test_plane_fit_with_show_3d() # ok
+    
+    #suite.test_plane_fit_ransac() # ok
+    #suite.test_plane_fit_weighted_4d() # ok
+    #suite.test_multi_plane_fit() # ok
+    #suite.test_plane_fit_using_gradients() # ok
+    #suite.test_plane_fit_colored_by_normals() # ok
+    #suite.test_plane_fit_with_grow() #
+    #suite.test_grid_plane_fit() # ok
+    #suite.test_plane_fit_with_project_on_image() # ok
+    suite.test_multi_plane_fit_with_project_on_image() # ok
+    
+
+   
+    # runner = unittest.TextTestRunner()
+    # runner.run(suite)    
+
+# ----------------------
+#%% App
+class PlaneApp:
+    def __init__(self):
+        self.cap            = RealSense() #
+        self.cap.set_display_mode('d16')
+        #self.cap.set_exposure(1000)
+        self.frame          = None
+        self.rect           = None
+        self.paused         = False
+        self.trackers       = []
+
+        self.camera_bf      = self.cap.get_bf() # for depth conversion (should be in meter)
+        self.args          = process_arguments()
+        self.args.scale    = 0.5
+
+        # init
+        self.model        = foundation_stereo_algo_init(self.args)
+
+        self.show_dict      = {} # hist show
+
+        self.detect_type    = 'G'
+        self.show_type      = 'depth' # left, depth
+        self.win_name       = 'Plane Detector (q-quit, c-clear, a,b,r,p,o,g,f,t,s,n,w 0-depth,1-left,2-right)'
+
+        cv.namedWindow(self.win_name )
+        self.rect_sel       = RectSelector(self.win_name , self.on_rect)
+        self.run()
+
+    def on_rect(self, rect):
+        "remember ROI defined by user"
+        #self.define_roi(self.frame, rect)
+        tracker             = PlaneDetectorDisplay() #estimator_type=self.estim_type, estimator_id=estim_ind)
+        tracker.rect        = rect
+        tracker.detect_type = self.detect_type
+        self.trackers.append(tracker)        
+        log.info(f'Adding plane estimator at  : {rect}') 
+
+    def generate_grid_trackers(self):
+        "generate grid of trackers"
+        nx, ny          = 10, 10
+        w,h             = self.cap.frame_size     
+        dx, dy          = int(w/nx), int(h/ny)
+        for ix in range(nx):
+            for iy in range(ny):
+                x0, y0 = ix*dx, iy*dy
+                x1, y1 = min((ix+1)*dx, w-1), min((iy+1)*dy, h-1)
+                rect    = (x0,y0,x1,y1)
+                self.on_rect(rect)
+
+    def process_image(self, img_depth):
+        "makes measurements"
+        for tracker in self.trackers:
+            tracker.find_planes(img_depth) 
+
+    def show_scene(self, frame, img_depth_fs):
+        "draw ROI and Info"
+        if self.show_type == 'left':
+            vis     = frame[:,:,0].astype(np.uint8)
+        elif self.show_type == 'right':
+            vis     = frame[:,:,1].astype(np.uint8)   
+        elif self.show_type == 'rs':
+            vis     = cv.convertScaleAbs(frame[:,:,2], alpha=0.1).astype(np.uint8)                     
+        else:
+            vis     = cv.convertScaleAbs(img_depth_fs, alpha=0.1).astype(np.uint8)
+
+        vis     = cv.cvtColor(vis, cv.COLOR_GRAY2BGR)
+        self.rect_sel.draw(vis)
+
+        # first tracker is the floor
+        if len(self.trackers) > 0:
+            floor_z = self.trackers[0].plane_center[2]
+        else:
+            floor_z = 10000
+
+        for tracker in self.trackers:
+            tracker.floor_z = floor_z
+            vis = tracker.show_scene(vis)
+            
+
+        return vis 
+    
+    def show_histogram(self, img):
+        "show roi histgram"
+        if self.rect is None:
+            #print('define ROI')
+            return 0
+        
+        x0, y0, x1, y1 = self.rect
+        img_roi = img[y0:y1,x0:x1].astype(np.float32)
+        # Compute histogram
+        hist, bins = np.histogram(img_roi.flatten(), bins=1024, range=[0, 2**15])
+
+        if not 'fig' in self.show_dict : #len(self.show_dict) < 1:
+            fig, ax = plt.subplots()
+            fig.set_size_inches([24, 16])
+            ax.set_title('Histogram (Depth)')
+            ax.set_xlabel('Bin')
+            ax.set_ylabel('Frequency')
+            lineGray, = ax.plot(bins[:-1], hist, c='k', lw=3)
+            ax.set_xlim(bins[0], bins[-1])
+            ax.set_ylim(0, max(hist)+10)
+            plt.ion()
+            #plt.show()
+
+            self.show_dict = {'fig':fig, 'ax':ax, 'line':lineGray}
+        else:
+            self.show_dict['line'].set_ydata(hist)
+        
+        self.show_dict['fig'].canvas.draw()
+        return    
+
+    def run(self):
+        while True:
+            playing = not self.paused and not self.rect_sel.dragging
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = frame.copy()
+
+            # depth
+            img_left, img_right, img_depth_rs = self.frame[:,:,0], self.frame[:,:,1], self.frame[:,:,2]    
+
+            img_disparity   = foundation_stereo_algo(self.args, self.model, img_left, img_right)
+            img_depth_fs    = convert_disparity_to_depth(self.camera_bf, img_disparity)
+
+            
+            #self.statistics(frame)
+            self.process_image(img_depth_fs)
+
+            vis     = self.show_scene(frame, img_depth_fs)
+            cv.imshow(self.win_name , vis)
+            ch = cv.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
+            elif ch == ord('a'):
+                self.detect_type = 'A' 
+                log.info(f'Detect type : {self.detect_type}')
+            elif ch == ord('r'):
+                self.detect_type = 'R'  
+                log.info(f'Detect Ransac : {self.detect_type}')
+            elif ch == ord('p'):
+                self.detect_type = 'P'  
+                log.info(f'Detect svd : {self.detect_type}')
+            elif ch == ord('o'):
+                self.detect_type = 'O'  
+                log.info(f'Detect with outliers : {self.detect_type}') 
+            elif ch == ord('g'):
+                self.detect_type = 'G'    
+                log.info(f'Detect gradients : {self.detect_type}')   
+            elif ch == ord('f'):
+                self.detect_type = 'F'    
+                log.info(f'Detect fit and grow : {self.detect_type}')     
+            elif ch == ord('w'):
+                self.detect_type = 'W'    
+                log.info(f'Detect weighted svd : {self.detect_type}')     
+            elif ch == ord('s'):
+                self.detect_type = 'S'    
+                log.info(f'Detect entire image : {self.detect_type}')    
+            elif ch == ord('n'):
+                self.detect_type = 'N'    
+                log.info(f'Detect entire image using normals : {self.detect_type}')                                              
+            elif ch == ord('t'):
+                self.detect_type = 'T'    
+                log.info(f'Detect weighted svd 4d : {self.detect_type}')     
+            elif ch == ord('b'):
+                self.detect_type = 'B'    
+                log.info(f'Detect boxes : {self.detect_type}')                                            
+            elif ch == ord('0'):
+                self.show_type = 'rs'      
+                log.info(f'Show type : {self.show_type}')                               
+            elif ch == ord('1'):
+                self.show_type = 'left' 
+                log.info(f'Show type : {self.show_type}')   
+            elif ch == ord('2'):
+                self.show_type = 'right'   
+                log.info(f'Show type : {self.show_type}')   
+            elif ch == ord('3'):
+                self.show_type = 'fs'   
+                log.info(f'Show type : {self.show_type}')                  
+            elif ch == ord('m'):
+                self.generate_grid_trackers()                                             
+            elif ch == ord('c'):
+                if len(self.trackers) > 0:
+                    t = self.trackers.pop()
+            elif ch == 27 or ch == ord('q'):
+                break              
+
+
+if __name__ == '__main__':
+    #print(__doc__)
+
+    #RunTest()
+    PlaneApp()
+
+
+
diff --git a/scripts/run_demo.py b/scripts/run_demo.py
index ef75362..6d72e07 100755
--- a/scripts/run_demo.py
+++ b/scripts/run_demo.py
@@ -23,11 +23,14 @@
 if __name__=="__main__":
   code_dir = os.path.dirname(os.path.realpath(__file__))
   parser = argparse.ArgumentParser()
+
   parser.add_argument('--model_dir', default=f'{code_dir}/../weights/23-36-37/model_best_bp2_serialize.pth', type=str)
+  # parser.add_argument('--left_file', default=f'{code_dir}/../data/mono/img0074.jpg', type=str)
+  # parser.add_argument('--right_file', default=f'{code_dir}/../data/mono/img0076.jpg', type=str)  
   parser.add_argument('--left_file', default=f'{code_dir}/../demo_data/left.png', type=str)
   parser.add_argument('--right_file', default=f'{code_dir}/../demo_data/right.png', type=str)
   parser.add_argument('--intrinsic_file', default=f'{code_dir}/../demo_data/K.txt', type=str, help='camera intrinsic matrix and baseline file')
-  parser.add_argument('--out_dir', default='/home/bowen/debug/stereo_output', type=str)
+  parser.add_argument('--out_dir', default=f'{code_dir}/../demo_data_out', type=str)
   parser.add_argument('--remove_invisible', default=1, type=int)
   parser.add_argument('--denoise_cloud', default=0, type=int)
   parser.add_argument('--denoise_nb_points', type=int, default=30, help='number of points to consider for radius outlier removal')
@@ -44,7 +47,7 @@
   set_seed(0)
   torch.autograd.set_grad_enabled(False)
 
-  os.system(f'rm -rf {args.out_dir} && mkdir -p {args.out_dir}')
+  #os.system(f'rm -rf {args.out_dir} && mkdir -p {args.out_dir}')
 
   with open(f'{os.path.dirname(args.model_dir)}/cfg.yaml', 'r') as ff:
     cfg:dict = yaml.safe_load(ff)
@@ -76,8 +79,8 @@
   img0_ori = img0.copy()
   img1_ori = img1.copy()
   logging.info(f"img0: {img0.shape}")
-  imageio.imwrite(f'{args.out_dir}/left.png', img0)
-  imageio.imwrite(f'{args.out_dir}/right.png', img1)
+  # imageio.imwrite(f'{args.out_dir}/left.png', img0)
+  # imageio.imwrite(f'{args.out_dir}/right.png', img1)
 
   img0 = torch.as_tensor(img0).cuda().float()[None].permute(0,3,1,2)
   img1 = torch.as_tensor(img1).cuda().float()[None].permute(0,3,1,2)
@@ -99,7 +102,7 @@
   max_val = None
   vis = vis_disparity(disp, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
   vis = np.concatenate([img0_ori, img1_ori, vis], axis=1)
-  imageio.imwrite(f'{args.out_dir}/disp_vis.png', vis)
+  imageio.imwrite(f'{args.out_dir}/disp_0074.png', vis)
   s = 1280/vis.shape[1]
   resized_vis = cv2.resize(vis, (int(vis.shape[1]*s), int(vis.shape[0]*s)))
   cv2.imshow('disp', resized_vis[:,:,::-1])
diff --git a/scripts/run_fast_foundation_with_faro.py b/scripts/run_fast_foundation_with_faro.py
new file mode 100644
index 0000000..bfba560
--- /dev/null
+++ b/scripts/run_fast_foundation_with_faro.py
@@ -0,0 +1,763 @@
+
+'''
+python scripts/run_fast_foundation_with_rs.py - github
+
+Environment:
+    ffs - GPU laptop
+
+Installation:
+    python -m venv ./envs/ffs
+    source ./envs/ffs/bin/activate
+    pip install torch==2.6.0 torchvision==0.21.0 xformers --index-url https://download.pytorch.org/whl/cu124
+    cd Fast-FoundationStereo
+    pip install -r requirements.txt
+    pip install pyrealsense2
+    download weights : https://drive.google.com/drive/folders/1HuTt7UIp7gQsMiDvJwVuWmKpvFzIIMap
+
+    python scripts/run_demo.py --model_dir weights/20-30-48/model_best_bp2_serialize.pth --left_file demo_data/left.png --right_file demo_data/right.png --intrinsic_file assets/K.txt --out_dir output/ --remove_invisible 0 --denoise_cloud 1  --scale 1 --get_pc 1 --valid_iters 8 --max_disp 192 --zfar 100
+
+
+'''
+
+
+
+
+import numpy as np
+from argparse import ArgumentParser
+
+import os,sys
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+from omegaconf import OmegaConf
+from core.utils.utils import InputPadder
+import argparse, torch, logging, yaml
+import numpy as np
+from Utils import (
+    AMP_DTYPE, set_logging_format, set_seed, vis_disparity,
+    depth2xyzmap, toOpen3dCloud, o3d,
+)
+import cv2
+#from faro_data_manager import DataSource
+from scripts.data_manager_faro_laptop import DataSource
+
+
+def process_arguments():
+    parser = ArgumentParser()
+    code_dir = os.path.dirname(os.path.realpath(__file__))
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_dir', default=f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth', type=str)
+    parser.add_argument('--left_file', default=f'{code_dir}/../demo_data/left.png', type=str)
+    parser.add_argument('--right_file', default=f'{code_dir}/../demo_data/right.png', type=str)
+    parser.add_argument('--intrinsic_file', default=f'{code_dir}/../demo_data/K.txt', type=str, help='camera intrinsic matrix and baseline file')
+    parser.add_argument('--out_dir', default='/home/bowen/debug/stereo_output', type=str)
+    parser.add_argument('--remove_invisible', default=1, type=int)
+    parser.add_argument('--denoise_cloud', default=0, type=int)
+    parser.add_argument('--denoise_nb_points', type=int, default=30, help='number of points to consider for radius outlier removal')
+    parser.add_argument('--denoise_radius', type=float, default=0.03, help='radius to use for outlier removal')
+    parser.add_argument('--scale', default=1, type=float)
+    parser.add_argument('--hiera', default=0, type=int)
+    parser.add_argument('--get_pc', type=int, default=1, help='save point cloud output')
+    parser.add_argument('--valid_iters', type=int, default=8, help='number of flow-field updates during forward pass')
+    parser.add_argument('--max_disp', type=int, default=192, help='maximum disparity')
+    parser.add_argument('--zfar', type=float, default=100, help="max depth to include in point cloud")
+
+    return parser.parse_args()
+
+# Original -------------------------------------------
+
+# Original Split -------------------------------------------
+#from opencv_realsense_camera import RealSense
+import time
+
+def foundation_stereo_algo_init(args):
+    "initialize the algorithm"
+
+    set_logging_format()
+    set_seed(0)
+    torch.autograd.set_grad_enabled(False)
+
+    os.system(f'rm -rf {args.out_dir} && mkdir -p {args.out_dir}')
+
+    with open(f'{os.path.dirname(args.model_dir)}/cfg.yaml', 'r') as ff:
+        cfg:dict = yaml.safe_load(ff)
+    for k in args.__dict__:
+        if args.__dict__[k] is not None:
+            cfg[k] = args.__dict__[k]
+    args = OmegaConf.create(cfg)
+    logging.info(f"args:\n{args}")
+    model = torch.load(args.model_dir, map_location='cpu', weights_only=False)
+    model.args.valid_iters = args.valid_iters
+    model.args.max_disp = args.max_disp
+
+    model.cuda().eval()
+    return model
+
+def foundation_stereo_algo(args, model, np_left, np_right):
+    "stereo algo"
+    scale = args.scale
+
+    img0 = np_left #imageio.imread(args.left_file)
+    img1 = np_right #imageio.imread(args.right_file)
+    if len(img0.shape)==2:
+        img0 = np.tile(img0[...,None], (1,1,3))
+        img1 = np.tile(img1[...,None], (1,1,3))
+
+    img0    = img0[...,:3]
+    img1    = img1[...,:3]
+    Ho,Wo   = img0.shape[:2]
+
+    img0    = cv2.resize(img0, fx=scale, fy=scale, dsize=None)
+    img1    = cv2.resize(img1, dsize=(img0.shape[1], img0.shape[0]))
+
+    H,W     = img0.shape[:2]
+    #img0_ori = img0.copy()
+    #img1_ori = img1.copy()
+    #logging.info(f"img0: {img0.shape}")
+    #imageio.imwrite(f'{args.out_dir}/left.png', img0)
+    #imageio.imwrite(f'{args.out_dir}/right.png', img1)
+
+    img0    = torch.as_tensor(img0).cuda().float()[None].permute(0,3,1,2)
+    img1    = torch.as_tensor(img1).cuda().float()[None].permute(0,3,1,2)
+    padder  = InputPadder(img0.shape, divis_by=32, force_square=False)
+    img0, img1 = padder.pad(img0, img1)
+
+    #logging.info(f"Start forward, 1st time run can be slow due to compilation")
+    with torch.amp.autocast('cuda', enabled=True, dtype=AMP_DTYPE):
+        if not args.hiera:
+            disp = model.forward(img0, img1, iters=args.valid_iters, test_mode=True, optimize_build_volume='pytorch1')
+        else:
+            disp = model.run_hierachical(img0, img1, iters=args.valid_iters, test_mode=True, small_ratio=0.5)
+
+    #logging.info("forward done")
+    
+    scale_factor   = 1/scale
+    disp    = padder.unpad(disp.float())
+    #disp    = disp.data.cpu().numpy().reshape(H,W).clip(0, None)
+    disp    = disp.data.cpu().numpy().reshape(H,W)*scale_factor
+    disp    = disp.clip(0, None)
+
+    # recover original size
+    #orig_shape     = np_left.shape[::-1]
+    orig_shape     = (np_left.shape[1],np_left.shape[0])
+    #print(orig_shape, disp.shape)
+    #scale_factor   = orig_shape[0]/process_shape[0]
+    disp_out = cv2.resize(disp, orig_shape, interpolation=cv2.INTER_NEAREST) # A: changed    
+
+    # cmap    = None
+    # min_val = None
+    # max_val = None
+    # vis     = vis_disparity(disp, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+    # vis     = np.concatenate([img0_ori, img1_ori, vis], axis=1)
+    # imageio.imwrite(f'{args.out_dir}/disp_vis.png', vis)
+    # s = 1280/vis.shape[1]
+    # resized_vis = cv2.resize(vis, (int(vis.shape[1]*s), int(vis.shape[0]*s)))
+    # cv2.imshow('disp', resized_vis[:,:,::-1])
+    # cv2.waitKey(0)
+
+    # if args.remove_invisible:
+    #     yy,xx = np.meshgrid(np.arange(disp.shape[0]), np.arange(disp.shape[1]), indexing='ij')
+    #     us_right = xx-disp
+    #     invalid = us_right<0
+    #     disp[invalid] = np.inf
+
+    # debug and show
+    #show_point_cloud(args, disp, img0_ori)
+    #disp = disp * 1000 # must be in mm
+    return disp_out
+
+def convert_disparity_to_depth(BF, disparity):
+    "from GIL"
+    
+    disparity           = disparity.astype(np.float32) 
+    depth               = np.zeros_like(disparity) 
+    disparity_valid     = disparity > 0.1
+    depth[disparity_valid]   = BF / disparity[disparity_valid]
+    #depth[disparity_valid]   += 0.5  # LUT in the simulator
+    return depth.astype(np.uint16)
+
+def depth_opencv_rs_merge(depth_rs, depth_cv):
+    "computing disparity by merging depth from real sense and opencv"
+    depth_merged    = np.copy(depth_rs)
+    # lesss than 400 mm use opencv depth (520 min Z in HD D455)
+    mask               = (depth_cv < 550) & (depth_cv > 80)
+    depth_merged[mask] = depth_cv[mask]
+
+    # A: changed
+    # # if the real sense depth is invalid use opencv depth
+    # mask            = (depth_rs < 1) & (depth_cv > 0)
+    # depth_merged[mask] = depth_cv[mask]
+
+    return depth_merged
+
+def depth_fs_rs_error(depth_rs, depth_fs):
+    "computing depth error between real sense and fs"
+    depth_rs, depth_fs  = depth_rs.astype(np.float32), depth_fs.astype(np.float32) 
+    depth_error         = np.abs(depth_rs - depth_fs)
+
+    # lesss than 400 mm use opencv depth (520 min Z in HD D455)
+    mask               = depth_rs < 2
+    depth_error[mask]  = 0
+
+    return depth_error
+
+def preprocess(frame, scale_factor = 0.5):
+    "convert and downscale"
+    frame           = frame.astype(np.float32)
+
+    # assign
+    imgL            = frame[:,:,0]
+    imgR            = frame[:,:,1]
+    imgD            = frame[:,:,2] 
+        
+    # if scale_factor < 0.9:
+
+    #     imgL           = cv2.resize(imgL, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+    #     imgR           = cv2.resize(imgR, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+    #     imgD           = cv2.resize(imgD, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_NEAREST)
+
+    return imgL, imgR, imgD
+
+def show_images_depth(imgD = None, imgC = None,  fig_num = 1, fig_name = 'Depth Image', vmax = 1500):
+    "draw results of depth estimation"
+          
+
+    if (imgD is None) and (imgC is None):
+        print('No images found')
+        return False
+    
+    elif imgD is None: # no data acquired
+        img_show = imgC
+
+    elif imgC is None: # no data is processed
+        img_show = imgD      
+        #img_show = cv.applyColorMap(self.imgD, cv.COLORMAP_TURBO)     
+
+    elif np.all(imgD.shape == imgC.shape):
+        img_show = np.concatenate((imgD, imgC ), axis = 1)
+
+    # deal with 16 uint    
+    if img_show.dtype == 'uint16' or img_show.dtype == 'float32':
+        img_show    = cv2.convertScaleAbs(img_show, alpha=0.1)
+        img_show    = cv2.applyColorMap(img_show, cv2.COLORMAP_TURBO) #   
+        pass
+    else:
+        #self.imgD = np.repeat(self.imgD[:,:,np.newaxis], 3, axis = 2)
+        #img_show = np.concatenate((self.imgD, self.imgC ), axis = 1)
+        #img_show = cv.applyColorMap(img_show.astype(np.uint8), cv.COLORMAP_TURBO) 
+        #img_show = self.imgC #np.concatenate((self.imgD, self.imgC ), axis = 1)
+        pass
+
+    # deal with black and white
+    if img_show.shape[1] > 2400:
+        img_show = cv2.pyrDown(img_show)
+            
+    cv2.imshow(f'{fig_name} (q-exit)', img_show)
+    ch = cv2.waitKey(5)
+    ret = ch == ord('q')
+
+    # plt.figure()
+    # plt.imshow(imgD, vmin = 100, vmax=vmax)
+    # plt.title(fig_name)
+    # plt.show(block=False)
+
+    return ret
+
+def show_point_cloud(args, disp, img0_ori):
+    "from fs"
+    if not args.get_pc:
+      return
+      
+    scale = args.scale
+    with open(args.intrinsic_file, 'r') as f:
+        lines = f.readlines()
+        K = np.array(list(map(float, lines[0].rstrip().split()))).astype(np.float32).reshape(3,3)
+        baseline = float(lines[1])
+        K[:2] *= scale
+        depth = K[0,0]*baseline/disp
+        np.save(f'{args.out_dir}/depth_meter.npy', depth)
+        xyz_map = depth2xyzmap(depth, K)
+        pcd = toOpen3dCloud(xyz_map.reshape(-1,3), img0_ori.reshape(-1,3))
+        keep_mask = (np.asarray(pcd.points)[:,2]>0) & (np.asarray(pcd.points)[:,2]<=args.zfar)
+        keep_ids = np.arange(len(np.asarray(pcd.points)))[keep_mask]
+        pcd = pcd.select_by_index(keep_ids)
+        #o3d.io.write_point_cloud(f'{args.out_dir}/cloud.ply', pcd)
+        #logging.info(f"PCL saved to {args.out_dir}")
+
+    if args.denoise_cloud:
+        logging.info("[Optional step] denoise point cloud...")
+        cl, ind = pcd.remove_radius_outlier(nb_points=args.denoise_nb_points, radius=args.denoise_radius)
+        inlier_cloud = pcd.select_by_index(ind)
+        #o3d.io.write_point_cloud(f'{args.out_dir}/cloud_denoise.ply', inlier_cloud)
+        pcd = inlier_cloud
+
+    logging.info("Visualizing point cloud. Press ESC to exit.")
+    vis = o3d.visualization.Visualizer()
+    vis.create_window()
+    vis.add_geometry(pcd)
+    vis.get_render_option().point_size = 1.0
+    vis.get_render_option().background_color = np.array([0.5, 0.5, 0.5])
+    ctr = vis.get_view_control()
+    ctr.set_front([0, 0, -1])
+    id = np.asarray(pcd.points)[:,2].argmin()
+    ctr.set_lookat(np.asarray(pcd.points)[id])
+    ctr.set_up([0, -1, 0])
+    vis.run()
+    vis.destroy_window()
+
+def depth2xyz(depth:np.ndarray, K, us=None, vs=None, zmin=0.1):
+  #invalid_mask = (depth<zmin)
+  # one time init
+  if us is None:
+    H,W = depth.shape[:2]
+    vs,us = np.meshgrid(np.arange(0,H),np.arange(0,W), sparse=False, indexing='ij')
+    vs = vs.reshape(-1)
+    us = us.reshape(-1)
+
+  zs = depth[vs,us]
+  zs[zs<zmin] = zmin
+  xs = (us-K[0,2])*zs/K[0,0]
+  ys = (vs-K[1,2])*zs/K[1,1]
+  pts = np.stack((xs.reshape(-1),ys.reshape(-1),zs.reshape(-1)), 1)  #(N,3)
+
+  return pts,us,vs    
+
+# ----------------------------------------
+#import open3d as o3d
+import numpy as np
+import time
+
+def test_point_cloud_rt():
+    "show point cloud update in RT"
+    # Source - https://stackoverflow.com/a/74669788
+    # Posted by Javier TG, modified by community. See post 'Timeline' for change history
+    # Retrieved 2026-02-10, License - CC BY-SA 4.0
+
+    # create visualizer and window.
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(height=480, width=640)
+
+    # initialize pointcloud instance.
+    pcd = o3d.geometry.PointCloud()
+    # *optionally* add initial points
+    points = np.random.rand(10, 3)
+    pcd.points = o3d.utility.Vector3dVector(points)
+
+    # include it in the visualizer before non-blocking visualization.
+    vis.add_geometry(pcd)
+
+    # to add new points each dt secs.
+    dt = 0.01
+    # number of points that will be added
+    n_new = 10
+
+    previous_t = time.time()
+
+    # run non-blocking visualization. 
+    # To exit, press 'q' or click the 'x' of the window.
+    keep_running = True
+    while keep_running:
+        
+        if time.time() - previous_t > dt:
+            # Options (uncomment each to try them out):
+            # 1) extend with ndarrays.
+            pcd.points.extend(np.random.rand(n_new, 3))
+            
+            # 2) extend with Vector3dVector instances.
+            # pcd.points.extend(
+            #     o3d.utility.Vector3dVector(np.random.rand(n_new, 3)))
+            
+            # 3) other iterables, e.g
+            # pcd.points.extend(np.random.rand(n_new, 3).tolist())
+            
+            vis.update_geometry(pcd)
+            previous_t = time.time()
+
+        keep_running = vis.poll_events()
+        vis.update_renderer()
+
+    vis.destroy_window()
+
+def test_video_stream_rs_and_foundation():
+    "streaming rs to foundation"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #self.cap.set_exposure(1000) # 10ms
+    d.switch_projector(True)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_merged    = depth_opencv_rs_merge(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_merged, None, fig_name='Depth Merged')
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+        
+    d.release()
+
+def test_video_error_rs_versus_foundation():
+    "streaming rs to foundation - checks error between them"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        #print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        #print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+
+        # plt.show()
+        
+    d.release()
+
+def test_point_cloud_rs_versus_foundation():
+    "streaming rs to foundation - checks error between them"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #self.cap.set_exposure(1000) # 10ms
+    #d.switch_projector(True)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    #camera_k    = d.get_camera_intrinsics()
+    args        = process_arguments()
+    K           = np.array([[637,0,640],[0,637,360],[0,0,1]])
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+
+    # create visualizer and window.
+    vis         = o3d.visualization.Visualizer()
+    vis.create_window(height=720, width=1280)
+
+    # initialize pointcloud instance.
+    pcd         = o3d.geometry.PointCloud()
+    # *optionally* add initial points
+    points      = np.random.rand(10, 3)*1000
+    pcd.points  = o3d.utility.Vector3dVector(points)
+    us, vs      = None, None
+
+    # include it in the visualizer before non-blocking visualization.
+    vis.add_geometry(pcd)    
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+        #xyz_map, pts     = depth2xyzmap(img_depth_fs, K)
+        pts, us, vs     = depth2xyz(img_depth_fs, K, us, vs, zmin=100)
+        clr             = np.zeros_like(pts)
+        clr[:,0] = clr[:,1] = clr[:,2] = img_left.flatten()
+
+        # show point cloud
+        #pcd.points.extend(pts)
+        
+        #vis.remove_geometry(pcd)
+        #pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(pts)
+        #pcd.colors = o3d.utility.Vector3dVector(clr)
+        downpcd = pcd.voxel_down_sample(voxel_size=0.1)
+        #vis.add_geometry(pcd)
+        
+        # 3) other iterables, e.g
+        # pcd.points.extend(np.random.rand(n_new, 3).tolist())
+        
+        vis.update_geometry(downpcd)
+
+        keep_running = vis.poll_events()
+        if not keep_running: break
+        vis.update_renderer()        
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error')
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+        
+    d.release()
+    vis.destroy_window()
+
+def test_video_stream_rs_versus_foundation_x2():
+    "streaming rs to foundation - checks speed"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+    args.scale  = 0.5
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    d.release()
+
+def merge_fs_rs(depth_rs, depth_fs):
+    "trying to merge information and deal with non valid regions"
+    nr, nc       = depth_rs.shape
+    depth_rs_out = depth_rs.copy()
+    valid_rs     = depth_rs > 1
+    start_rs     = np.zeros_like(valid_rs)
+    start_rs[:,:-1] = np.logical_and(valid_rs[:,:-1] , ~valid_rs[:,1:]) # if pixel k is valid and k + 1 is not
+    stop_rs      = np.zeros_like(valid_rs)
+    stop_rs[:,1:] = np.logical_and(~valid_rs[:,:-1] , valid_rs[:,1:]) # if pixel k-1 is not valid and k  is valid
+
+    for r in range(nr):
+        start_ind = np.where(start_rs[r,:])[0]
+        stop_ind  = np.where(stop_rs[r,:])[0]
+        if len(start_ind) < 1 or len(stop_ind) < 1:
+            continue
+        
+        for s in start_ind:
+            ii = np.where(s < stop_ind)[0]
+            if len(ii) < 1: continue
+            f = stop_ind[0]
+            if np.abs(depth_fs[r,s] - depth_fs[r,f]) < 0.1*depth_fs[r,s]:
+                depth_rs_out[r,s:f] = depth_fs[r,s:f]
+                print('.')
+
+    return depth_rs_out
+      
+def test_video_stream_rs_fs_merge():
+    "streaming rs to foundation - checks speed"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+    args.scale  = 0.5
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        img_depth_merge = merge_fs_rs(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(img_depth_merge, None, fig_name='Depth Merge', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        d.save_image(img_depth_rs,fname='depth_rs')
+        d.save_image(img_depth_fs,fname='depth_fs')
+        d.save_image(img_left,fname='img_left')
+        d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    d.release()
+
+def test_faro_rs_fs_error():
+    "reading data files from FARO and comparing the results"
+
+    #d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    d           = DataSource()
+    img_num     = d.init_directory()    
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+
+    args        = process_arguments()
+    args.scale  = 0.5
+    #args.model_dir = '/home/administrato/dev/Fast-FoundationStereo/weights/20-30-48/model_finetuned_faro.pth'
+    #args.model_dir = '../weights/20-30-48/model_finetuned_faro.pth'
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+
+
+    img_index   = np.random.randint(0,img_num,8)
+    for k in img_index:
+
+        # frame is I1,I2, D data
+        out_data       = d.get_item(k, debug = True)
+        #out_data       = d.load_specific_files(debug = True)
+        #print(out_data)
+    
+        # extract
+        img_left, img_right, img_depth_rs, img_depth_faro = out_data["img_left"], out_data["img_right"], out_data["img_depth_rs"], out_data["img_depth_faro"]
+        print(img_left.shape, img_right.shape)
+
+        # process
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        img_error_fs    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        img_error_faro  = depth_fs_rs_error(img_depth_faro, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS', vmax = 400)
+        ret3 = show_images_depth(img_depth_faro, None, fig_name='Depth Faro')
+        ret4 = show_images_depth(img_error_fs.astype(np.uint8),     None, fig_name='Error RS-FS', vmax = 100)
+        ret5 = show_images_depth(img_error_faro.astype(np.uint8), None, fig_name='Error Faro-FS', vmax = 100)
+        ret6 = show_images_depth(img_left.astype(np.uint8), None, fig_name='Images L')
+        ret7 = show_images_depth(img_right.astype(np.uint8), None, fig_name='Images R')
+        ret = ret1 or ret2 or ret3 or ret4 or ret5 or ret6 or ret7
+        if ret: break
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        #d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    #d.ckose()
+
+
+if __name__ == '__main__':
+    #main()
+    #test_video_stream_rs_and_foundation() # ok
+    #test_video_error_rs_versus_foundation() # ok
+    # test_point_cloud_rt() # ok
+    #test_point_cloud_rs_versus_foundation()
+    #test_video_stream_rs_versus_foundation_x2() # ok
+    #test_video_stream_rs_fs_merge()
+    test_faro_rs_fs_error()
+
+
+
+
+
+
+
+
+    
\ No newline at end of file
diff --git a/scripts/run_fast_foundation_with_rs.py b/scripts/run_fast_foundation_with_rs.py
new file mode 100644
index 0000000..10bce17
--- /dev/null
+++ b/scripts/run_fast_foundation_with_rs.py
@@ -0,0 +1,763 @@
+
+'''
+python scripts/run_fast_foundation_with_rs.py - github
+
+Environment:
+    ffs - GPU laptop
+
+Installation:
+    python -m venv ./envs/ffs
+    source ./envs/ffs/bin/activate
+    pip install torch==2.6.0 torchvision==0.21.0 xformers --index-url https://download.pytorch.org/whl/cu124
+    cd Fast-FoundationStereo
+    pip install -r requirements.txt
+    pip install pyrealsense2
+    download weights : https://drive.google.com/drive/folders/1HuTt7UIp7gQsMiDvJwVuWmKpvFzIIMap
+
+    python scripts/run_demo.py --model_dir weights/20-30-48/model_best_bp2_serialize.pth --left_file demo_data/left.png --right_file demo_data/right.png --intrinsic_file assets/K.txt --out_dir output/ --remove_invisible 0 --denoise_cloud 1  --scale 1 --get_pc 1 --valid_iters 8 --max_disp 192 --zfar 100
+
+
+'''
+
+
+
+
+import numpy as np
+from argparse import ArgumentParser
+
+import os,sys
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+from omegaconf import OmegaConf
+from core.utils.utils import InputPadder
+import argparse, torch, logging, yaml
+import numpy as np
+from Utils import (
+    AMP_DTYPE, set_logging_format, set_seed, vis_disparity,
+    depth2xyzmap, toOpen3dCloud, o3d,
+)
+import cv2
+#from faro_data_manager import DataSource
+from scripts.data_manager_faro_laptop import DataSource
+
+
+def process_arguments():
+    parser = ArgumentParser()
+    code_dir = os.path.dirname(os.path.realpath(__file__))
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_dir', default=f'{code_dir}/../weights/20-30-48/model_best_bp2_serialize.pth', type=str)
+    parser.add_argument('--left_file', default=f'{code_dir}/../demo_data/left.png', type=str)
+    parser.add_argument('--right_file', default=f'{code_dir}/../demo_data/right.png', type=str)
+    parser.add_argument('--intrinsic_file', default=f'{code_dir}/../demo_data/K.txt', type=str, help='camera intrinsic matrix and baseline file')
+    parser.add_argument('--out_dir', default='/home/bowen/debug/stereo_output', type=str)
+    parser.add_argument('--remove_invisible', default=1, type=int)
+    parser.add_argument('--denoise_cloud', default=0, type=int)
+    parser.add_argument('--denoise_nb_points', type=int, default=30, help='number of points to consider for radius outlier removal')
+    parser.add_argument('--denoise_radius', type=float, default=0.03, help='radius to use for outlier removal')
+    parser.add_argument('--scale', default=1, type=float)
+    parser.add_argument('--hiera', default=0, type=int)
+    parser.add_argument('--get_pc', type=int, default=1, help='save point cloud output')
+    parser.add_argument('--valid_iters', type=int, default=8, help='number of flow-field updates during forward pass')
+    parser.add_argument('--max_disp', type=int, default=192, help='maximum disparity')
+    parser.add_argument('--zfar', type=float, default=100, help="max depth to include in point cloud")
+
+    return parser.parse_args()
+
+# Original -------------------------------------------
+
+# Original Split -------------------------------------------
+from opencv_realsense_camera import RealSense
+import time
+
+def foundation_stereo_algo_init(args):
+    "initialize the algorithm"
+
+    set_logging_format()
+    set_seed(0)
+    torch.autograd.set_grad_enabled(False)
+
+    os.system(f'rm -rf {args.out_dir} && mkdir -p {args.out_dir}')
+
+    with open(f'{os.path.dirname(args.model_dir)}/cfg.yaml', 'r') as ff:
+        cfg:dict = yaml.safe_load(ff)
+    for k in args.__dict__:
+        if args.__dict__[k] is not None:
+            cfg[k] = args.__dict__[k]
+    args = OmegaConf.create(cfg)
+    logging.info(f"args:\n{args}")
+    model = torch.load(args.model_dir, map_location='cpu', weights_only=False)
+    model.args.valid_iters = args.valid_iters
+    model.args.max_disp = args.max_disp
+
+    model.cuda().eval()
+    return model
+
+def foundation_stereo_algo(args, model, np_left, np_right):
+    "stereo algo"
+    scale = args.scale
+
+    img0 = np_left #imageio.imread(args.left_file)
+    img1 = np_right #imageio.imread(args.right_file)
+    if len(img0.shape)==2:
+        img0 = np.tile(img0[...,None], (1,1,3))
+        img1 = np.tile(img1[...,None], (1,1,3))
+
+    img0    = img0[...,:3]
+    img1    = img1[...,:3]
+    Ho,Wo   = img0.shape[:2]
+
+    img0    = cv2.resize(img0, fx=scale, fy=scale, dsize=None)
+    img1    = cv2.resize(img1, dsize=(img0.shape[1], img0.shape[0]))
+
+    H,W     = img0.shape[:2]
+    #img0_ori = img0.copy()
+    #img1_ori = img1.copy()
+    #logging.info(f"img0: {img0.shape}")
+    #imageio.imwrite(f'{args.out_dir}/left.png', img0)
+    #imageio.imwrite(f'{args.out_dir}/right.png', img1)
+
+    img0    = torch.as_tensor(img0).cuda().float()[None].permute(0,3,1,2)
+    img1    = torch.as_tensor(img1).cuda().float()[None].permute(0,3,1,2)
+    padder  = InputPadder(img0.shape, divis_by=32, force_square=False)
+    img0, img1 = padder.pad(img0, img1)
+
+    #logging.info(f"Start forward, 1st time run can be slow due to compilation")
+    with torch.amp.autocast('cuda', enabled=True, dtype=AMP_DTYPE):
+        if not args.hiera:
+            disp = model.forward(img0, img1, iters=args.valid_iters, test_mode=True, optimize_build_volume='pytorch1')
+        else:
+            disp = model.run_hierachical(img0, img1, iters=args.valid_iters, test_mode=True, small_ratio=0.5)
+
+    #logging.info("forward done")
+    
+    scale_factor   = 1/scale
+    disp    = padder.unpad(disp.float())
+    #disp    = disp.data.cpu().numpy().reshape(H,W).clip(0, None)
+    disp    = disp.data.cpu().numpy().reshape(H,W)*scale_factor
+    disp    = disp.clip(0, None)
+
+    # recover original size
+    #orig_shape     = np_left.shape[::-1]
+    orig_shape     = (np_left.shape[1],np_left.shape[0])
+    #print(orig_shape, disp.shape)
+    #scale_factor   = orig_shape[0]/process_shape[0]
+    disp_out = cv2.resize(disp, orig_shape, interpolation=cv2.INTER_NEAREST) # A: changed    
+
+    # cmap    = None
+    # min_val = None
+    # max_val = None
+    # vis     = vis_disparity(disp, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+    # vis     = np.concatenate([img0_ori, img1_ori, vis], axis=1)
+    # imageio.imwrite(f'{args.out_dir}/disp_vis.png', vis)
+    # s = 1280/vis.shape[1]
+    # resized_vis = cv2.resize(vis, (int(vis.shape[1]*s), int(vis.shape[0]*s)))
+    # cv2.imshow('disp', resized_vis[:,:,::-1])
+    # cv2.waitKey(0)
+
+    # if args.remove_invisible:
+    #     yy,xx = np.meshgrid(np.arange(disp.shape[0]), np.arange(disp.shape[1]), indexing='ij')
+    #     us_right = xx-disp
+    #     invalid = us_right<0
+    #     disp[invalid] = np.inf
+
+    # debug and show
+    #show_point_cloud(args, disp, img0_ori)
+    #disp = disp * 1000 # must be in mm
+    return disp_out
+
+def convert_disparity_to_depth(BF, disparity):
+    "from GIL"
+    
+    disparity           = disparity.astype(np.float32) 
+    depth               = np.zeros_like(disparity) 
+    disparity_valid     = disparity > 0.1
+    depth[disparity_valid]   = BF / disparity[disparity_valid]
+    #depth[disparity_valid]   += 0.5  # LUT in the simulator
+    return depth.astype(np.uint16)
+
+def depth_opencv_rs_merge(depth_rs, depth_cv):
+    "computing disparity by merging depth from real sense and opencv"
+    depth_merged    = np.copy(depth_rs)
+    # lesss than 400 mm use opencv depth (520 min Z in HD D455)
+    mask               = (depth_cv < 550) & (depth_cv > 80)
+    depth_merged[mask] = depth_cv[mask]
+
+    # A: changed
+    # # if the real sense depth is invalid use opencv depth
+    # mask            = (depth_rs < 1) & (depth_cv > 0)
+    # depth_merged[mask] = depth_cv[mask]
+
+    return depth_merged
+
+def depth_fs_rs_error(depth_rs, depth_fs):
+    "computing depth error between real sense and fs"
+    depth_rs, depth_fs  = depth_rs.astype(np.float32), depth_fs.astype(np.float32) 
+    depth_error         = np.abs(depth_rs - depth_fs)
+
+    # lesss than 400 mm use opencv depth (520 min Z in HD D455)
+    mask               = depth_rs < 2
+    depth_error[mask]  = 0
+
+    return depth_error
+
+def preprocess(frame, scale_factor = 0.5):
+    "convert and downscale"
+    frame           = frame.astype(np.float32)
+
+    # assign
+    imgL            = frame[:,:,0]
+    imgR            = frame[:,:,1]
+    imgD            = frame[:,:,2] 
+        
+    # if scale_factor < 0.9:
+
+    #     imgL           = cv2.resize(imgL, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+    #     imgR           = cv2.resize(imgR, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+    #     imgD           = cv2.resize(imgD, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_NEAREST)
+
+    return imgL, imgR, imgD
+
+def show_images_depth(imgD = None, imgC = None,  fig_num = 1, fig_name = 'Depth Image', vmax = 1500):
+    "draw results of depth estimation"
+          
+
+    if (imgD is None) and (imgC is None):
+        print('No images found')
+        return False
+    
+    elif imgD is None: # no data acquired
+        img_show = imgC
+
+    elif imgC is None: # no data is processed
+        img_show = imgD      
+        #img_show = cv.applyColorMap(self.imgD, cv.COLORMAP_TURBO)     
+
+    elif np.all(imgD.shape == imgC.shape):
+        img_show = np.concatenate((imgD, imgC ), axis = 1)
+
+    # deal with 16 uint    
+    if img_show.dtype == 'uint16' or img_show.dtype == 'float32':
+        img_show    = cv2.convertScaleAbs(img_show, alpha=0.1)
+        img_show    = cv2.applyColorMap(img_show, cv2.COLORMAP_TURBO) #   
+        pass
+    else:
+        #self.imgD = np.repeat(self.imgD[:,:,np.newaxis], 3, axis = 2)
+        #img_show = np.concatenate((self.imgD, self.imgC ), axis = 1)
+        #img_show = cv.applyColorMap(img_show.astype(np.uint8), cv.COLORMAP_TURBO) 
+        #img_show = self.imgC #np.concatenate((self.imgD, self.imgC ), axis = 1)
+        pass
+
+    # deal with black and white
+    if img_show.shape[1] > 2400:
+        img_show = cv2.pyrDown(img_show)
+            
+    cv2.imshow(f'{fig_name} (q-exit)', img_show)
+    ch = cv2.waitKey(5)
+    ret = ch == ord('q')
+
+    # plt.figure()
+    # plt.imshow(imgD, vmin = 100, vmax=vmax)
+    # plt.title(fig_name)
+    # plt.show(block=False)
+
+    return ret
+
+def show_point_cloud(args, disp, img0_ori):
+    "from fs"
+    if not args.get_pc:
+      return
+      
+    scale = args.scale
+    with open(args.intrinsic_file, 'r') as f:
+        lines = f.readlines()
+        K = np.array(list(map(float, lines[0].rstrip().split()))).astype(np.float32).reshape(3,3)
+        baseline = float(lines[1])
+        K[:2] *= scale
+        depth = K[0,0]*baseline/disp
+        np.save(f'{args.out_dir}/depth_meter.npy', depth)
+        xyz_map = depth2xyzmap(depth, K)
+        pcd = toOpen3dCloud(xyz_map.reshape(-1,3), img0_ori.reshape(-1,3))
+        keep_mask = (np.asarray(pcd.points)[:,2]>0) & (np.asarray(pcd.points)[:,2]<=args.zfar)
+        keep_ids = np.arange(len(np.asarray(pcd.points)))[keep_mask]
+        pcd = pcd.select_by_index(keep_ids)
+        #o3d.io.write_point_cloud(f'{args.out_dir}/cloud.ply', pcd)
+        #logging.info(f"PCL saved to {args.out_dir}")
+
+    if args.denoise_cloud:
+        logging.info("[Optional step] denoise point cloud...")
+        cl, ind = pcd.remove_radius_outlier(nb_points=args.denoise_nb_points, radius=args.denoise_radius)
+        inlier_cloud = pcd.select_by_index(ind)
+        #o3d.io.write_point_cloud(f'{args.out_dir}/cloud_denoise.ply', inlier_cloud)
+        pcd = inlier_cloud
+
+    logging.info("Visualizing point cloud. Press ESC to exit.")
+    vis = o3d.visualization.Visualizer()
+    vis.create_window()
+    vis.add_geometry(pcd)
+    vis.get_render_option().point_size = 1.0
+    vis.get_render_option().background_color = np.array([0.5, 0.5, 0.5])
+    ctr = vis.get_view_control()
+    ctr.set_front([0, 0, -1])
+    id = np.asarray(pcd.points)[:,2].argmin()
+    ctr.set_lookat(np.asarray(pcd.points)[id])
+    ctr.set_up([0, -1, 0])
+    vis.run()
+    vis.destroy_window()
+
+def depth2xyz(depth:np.ndarray, K, us=None, vs=None, zmin=0.1):
+  #invalid_mask = (depth<zmin)
+  # one time init
+  if us is None:
+    H,W = depth.shape[:2]
+    vs,us = np.meshgrid(np.arange(0,H),np.arange(0,W), sparse=False, indexing='ij')
+    vs = vs.reshape(-1)
+    us = us.reshape(-1)
+
+  zs = depth[vs,us]
+  zs[zs<zmin] = zmin
+  xs = (us-K[0,2])*zs/K[0,0]
+  ys = (vs-K[1,2])*zs/K[1,1]
+  pts = np.stack((xs.reshape(-1),ys.reshape(-1),zs.reshape(-1)), 1)  #(N,3)
+
+  return pts,us,vs    
+
+# ----------------------------------------
+import open3d as o3d
+import numpy as np
+import time
+
+def test_point_cloud_rt():
+    "show point cloud update in RT"
+    # Source - https://stackoverflow.com/a/74669788
+    # Posted by Javier TG, modified by community. See post 'Timeline' for change history
+    # Retrieved 2026-02-10, License - CC BY-SA 4.0
+
+    # create visualizer and window.
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(height=480, width=640)
+
+    # initialize pointcloud instance.
+    pcd = o3d.geometry.PointCloud()
+    # *optionally* add initial points
+    points = np.random.rand(10, 3)
+    pcd.points = o3d.utility.Vector3dVector(points)
+
+    # include it in the visualizer before non-blocking visualization.
+    vis.add_geometry(pcd)
+
+    # to add new points each dt secs.
+    dt = 0.01
+    # number of points that will be added
+    n_new = 10
+
+    previous_t = time.time()
+
+    # run non-blocking visualization. 
+    # To exit, press 'q' or click the 'x' of the window.
+    keep_running = True
+    while keep_running:
+        
+        if time.time() - previous_t > dt:
+            # Options (uncomment each to try them out):
+            # 1) extend with ndarrays.
+            pcd.points.extend(np.random.rand(n_new, 3))
+            
+            # 2) extend with Vector3dVector instances.
+            # pcd.points.extend(
+            #     o3d.utility.Vector3dVector(np.random.rand(n_new, 3)))
+            
+            # 3) other iterables, e.g
+            # pcd.points.extend(np.random.rand(n_new, 3).tolist())
+            
+            vis.update_geometry(pcd)
+            previous_t = time.time()
+
+        keep_running = vis.poll_events()
+        vis.update_renderer()
+
+    vis.destroy_window()
+
+def test_video_stream_rs_and_foundation():
+    "streaming rs to foundation"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #self.cap.set_exposure(1000) # 10ms
+    d.switch_projector(True)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_merged    = depth_opencv_rs_merge(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_merged, None, fig_name='Depth Merged')
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+        
+    d.release()
+
+def test_video_error_rs_versus_foundation():
+    "streaming rs to foundation - checks error between them"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        #print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        #print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+
+        # plt.show()
+        
+    d.release()
+
+def test_point_cloud_rs_versus_foundation():
+    "streaming rs to foundation - checks error between them"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #self.cap.set_exposure(1000) # 10ms
+    #d.switch_projector(True)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    #camera_k    = d.get_camera_intrinsics()
+    args        = process_arguments()
+    K           = np.array([[637,0,640],[0,637,360],[0,0,1]])
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+
+    # create visualizer and window.
+    vis         = o3d.visualization.Visualizer()
+    vis.create_window(height=720, width=1280)
+
+    # initialize pointcloud instance.
+    pcd         = o3d.geometry.PointCloud()
+    # *optionally* add initial points
+    points      = np.random.rand(10, 3)*1000
+    pcd.points  = o3d.utility.Vector3dVector(points)
+    us, vs      = None, None
+
+    # include it in the visualizer before non-blocking visualization.
+    vis.add_geometry(pcd)    
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        print(f'Disp : {img_disparity.min()} - {img_disparity.max()}')
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        print(f'Depth : {img_depth_fs.min()} - {img_depth_fs.max()}')
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+        #xyz_map, pts     = depth2xyzmap(img_depth_fs, K)
+        pts, us, vs     = depth2xyz(img_depth_fs, K, us, vs, zmin=100)
+        clr             = np.zeros_like(pts)
+        clr[:,0] = clr[:,1] = clr[:,2] = img_left.flatten()
+
+        # show point cloud
+        #pcd.points.extend(pts)
+        
+        #vis.remove_geometry(pcd)
+        #pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(pts)
+        #pcd.colors = o3d.utility.Vector3dVector(clr)
+        downpcd = pcd.voxel_down_sample(voxel_size=0.1)
+        #vis.add_geometry(pcd)
+        
+        # 3) other iterables, e.g
+        # pcd.points.extend(np.random.rand(n_new, 3).tolist())
+        
+        vis.update_geometry(downpcd)
+
+        keep_running = vis.poll_events()
+        if not keep_running: break
+        vis.update_renderer()        
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error')
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+        
+    d.release()
+    vis.destroy_window()
+
+def test_video_stream_rs_versus_foundation_x2():
+    "streaming rs to foundation - checks speed"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+    args.scale  = 0.5
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        depth_error    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(depth_error, None, fig_name='Depth Error', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    d.release()
+
+def merge_fs_rs(depth_rs, depth_fs):
+    "trying to merge information and deal with non valid regions"
+    nr, nc       = depth_rs.shape
+    depth_rs_out = depth_rs.copy()
+    valid_rs     = depth_rs > 1
+    start_rs     = np.zeros_like(valid_rs)
+    start_rs[:,:-1] = np.logical_and(valid_rs[:,:-1] , ~valid_rs[:,1:]) # if pixel k is valid and k + 1 is not
+    stop_rs      = np.zeros_like(valid_rs)
+    stop_rs[:,1:] = np.logical_and(~valid_rs[:,:-1] , valid_rs[:,1:]) # if pixel k-1 is not valid and k  is valid
+
+    for r in range(nr):
+        start_ind = np.where(start_rs[r,:])[0]
+        stop_ind  = np.where(stop_rs[r,:])[0]
+        if len(start_ind) < 1 or len(stop_ind) < 1:
+            continue
+        
+        for s in start_ind:
+            ii = np.where(s < stop_ind)[0]
+            if len(ii) < 1: continue
+            f = stop_ind[0]
+            if np.abs(depth_fs[r,s] - depth_fs[r,f]) < 0.1*depth_fs[r,s]:
+                depth_rs_out[r,s:f] = depth_fs[r,s:f]
+                print('.')
+
+    return depth_rs_out
+      
+def test_video_stream_rs_fs_merge():
+    "streaming rs to foundation - checks speed"
+    d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    #d.load_preset_from_file('vpi/preset_rsm.json')
+    #d.set_exposure(100000) # 10ms
+    #d.switch_projector(False)
+    #self.cap.set_laser_power(100) # max power
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+    args        = process_arguments()
+    args.scale  = 0.5
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+    
+    ret  = False
+    while not ret:
+        # frame is I1,I2, D data
+        retf, frame = d.read()
+        if retf is False:
+            print('is your camera open/connected?')
+            break
+    
+        # extract
+        img_left, img_right, img_depth_rs = preprocess(frame, scale_factor=1.0)
+
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        img_depth_merge = merge_fs_rs(img_depth_rs, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+        
+        # massage
+        #cmap, min_val, max_val = None, None, None
+        #img_depth_fs = vis_disparity(img_disparity, min_val=min_val, max_val=max_val, cmap=cmap, color_map=cv2.COLORMAP_TURBO)
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS')
+        ret3 = show_images_depth(img_depth_merge, None, fig_name='Depth Merge', vmax = 100)
+        ret4 = show_images_depth(img_left.astype(np.uint8), img_right.astype(np.uint8), fig_name='Images L-R')
+        ret = ret1 or ret2 or ret3 or ret4
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        # d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    d.release()
+
+def test_faro_rs_fs_error():
+    "reading data files from FARO and comparing the results"
+
+    #d           = RealSense(mode = 'd16', use_ir = True, frame_size = (1280,720))
+    d           = DataSource()
+    img_num     = d.init_directory()    
+    camera_bf   = d.get_bf() # for depth conversion (should be in meter)
+
+    args        = process_arguments()
+    args.scale  = 0.5
+    args.model_dir = '/home/administrato/dev/Fast-FoundationStereo/weights/20-30-48/model_finetuned_faro.pth'
+    args.model_dir = '/home/administrato/dev/Fast-FoundationStereo/weights/20-30-48/model_finetuned_faro.pth'
+
+    # init
+    model       = foundation_stereo_algo_init(args)
+
+
+    img_index   = np.random.randint(0,img_num,8)
+    for k in img_index:
+
+        # frame is I1,I2, D data
+        out_data       = d.get_item(k, debug = True)
+        #out_data       = d.load_specific_files(debug = True)
+        #print(out_data)
+    
+        # extract
+        img_left, img_right, img_depth_rs, img_depth_faro = out_data["img_left"], out_data["img_right"], out_data["img_depth_rs"], out_data["img_depth_faro"]
+        print(img_left.shape, img_right.shape)
+
+        # process
+        t_start         = time.time()
+        img_disparity   = foundation_stereo_algo(args, model, img_left, img_right)
+        img_depth_fs    = convert_disparity_to_depth(camera_bf, img_disparity)
+        img_error_fs    = depth_fs_rs_error(img_depth_rs, img_depth_fs)
+        img_error_faro  = depth_fs_rs_error(img_depth_faro, img_depth_fs)
+        print(f'Proces time : {time.time()-t_start} sec')
+
+        # show
+        ret1 = show_images_depth(img_depth_rs, None, fig_name='Depth RS')
+        ret2 = show_images_depth(img_depth_fs, None, fig_name='Depth FS', vmax = 400)
+        ret3 = show_images_depth(img_depth_faro, None, fig_name='Depth Faro')
+        ret4 = show_images_depth(img_error_fs.astype(np.uint8),     None, fig_name='Error RS-FS', vmax = 100)
+        ret5 = show_images_depth(img_error_faro.astype(np.uint8), None, fig_name='Error Faro-FS', vmax = 100)
+        ret6 = show_images_depth(img_left.astype(np.uint8), None, fig_name='Images L')
+        ret7 = show_images_depth(img_right.astype(np.uint8), None, fig_name='Images R')
+        ret = ret1 or ret2 or ret3 or ret4 or ret5 or ret6 or ret7
+        if ret: break
+
+        # d.save_image(img_depth_rs,fname='depth_rs')
+        #d.save_image(img_depth_fs,fname='depth_fs')
+        # d.save_image(img_left,fname='img_left')
+        # d.save_image(img_right,fname='img_right')
+        # plt.show()
+        
+    #d.ckose()
+
+
+if __name__ == '__main__':
+    #main()
+    #test_video_stream_rs_and_foundation() # ok
+    #test_video_error_rs_versus_foundation() # ok
+    # test_point_cloud_rt() # ok
+    #test_point_cloud_rs_versus_foundation()
+    #test_video_stream_rs_versus_foundation_x2() # ok
+    test_video_stream_rs_fs_merge()
+    #test_faro_rs_fs_error()
+
+
+
+
+
+
+
+
+    
\ No newline at end of file
diff --git a/scripts/run_plane_detector_fs.py b/scripts/run_plane_detector_fs.py
new file mode 100644
index 0000000..839d63d
--- /dev/null
+++ b/scripts/run_plane_detector_fs.py
@@ -0,0 +1,2651 @@
+#!/usr/bin/env python
+
+'''
+Tester for multi planar plain detector with foundation stereo
+==================
+
+Using depth image to compute depth planes locally for specific ROI.
+
+
+Usage:
+
+Environemt : 
+    ..\\docker
+
+Install : 
+
+
+
+'''
+
+import sys 
+import numpy as np
+import cv2 as cv
+import random
+import unittest
+#from scipy.spatial.transform import Rotation as Rot
+import matplotlib.pyplot as plt
+import logging 
+log = logging.getLogger("robot")
+log.setLevel(logging.DEBUG)
+from opencv_realsense_camera import RealSense, draw_str
+from run_fast_foundation_with_rs import convert_disparity_to_depth, foundation_stereo_algo_init, foundation_stereo_algo, process_arguments
+
+#!/usr/bin/env python
+
+
+
+
+
+
+#%% Main
+class PlaneDetector:
+    def __init__(self, detect_type = 'p', image_size = (1280,720)):
+
+        self.detect_type    = detect_type   # plane
+
+        self.frame_size     = image_size
+        self.img            = None
+        self.cam_matrix     = np.array([[1000,0,self.frame_size[0]/2],[0,1000,self.frame_size[1]/2],[0,0,1]], dtype = np.float32)
+        self.cam_distort    = np.array([0,0,0,0,0],dtype = np.float32)
+
+        self.img3d          = None  # contains x,y and depth plains
+        self.img_xyz        = None  # comntains X,Y,Z information after depth image to XYZ transform
+        self.img_mask       = None  # which pixels belongs to the plain
+        self.rect           = None  # roi
+        self.img_roi        = None  # roi image
+        self.img_roi_normal = None  # normals at roi image
+
+        # detector type     
+        self.matrix_inv     = None     # holds inverse params of the 
+        self.rect_z         = None     # flat z for ROI         
+        self.rect_dir       = None     # direct u,v,1 for ROI
+        self.rect_xyz       = None     # direct u,v,1 multiplied by z ROI 
+        self.full_dir       = None     # direct u,v,1 for entire image
+        self.full_xyz       = None     # direct u,v,1 multiplied by z entire image         
+        self.roi_index      = None     # index of the points og an ROI in the original image     
+        self.plane_params   = None     # rvec not normalized
+        self.plane_center   = None     # tvec
+        self.plane_confidence = 0      # reliability of the detcetion
+
+        #self.corner_ind     = [0, 10, 40, 50]  # corner of the rectnagle for the projection
+        self.rect_3d        = None    # roi but projected on 3D 
+
+        # params
+        self.MIN_SPLIT_SIZE  = 32
+        self.MIN_STD_ERROR   = 0.01
+
+        # color for the mask
+        self.color_mask     = np.random.randint(0,255,3) # random color
+
+        # help variable
+        self.ang_vec     = np.zeros((3,1))  # help variable
+
+    def init_image(self, img = None):
+        "load image"
+
+        self.img            = img
+        h,w                 = img.shape[:2]
+        self.frame_size     = (w,h)
+        self.img_mask       = np.zeros((h,w))
+        return True
+
+    def init_roi(self, roi_type = 1):
+        "load the test case"
+        w,h     = self.frame_size[0],self.frame_size[1]
+        w2,h2   = w>>1,h>>1
+        roi     = [0,0,w,h]
+        if roi_type == 1:
+            roi = [w2-3,h2-3,w2+3,h2+3] # xlu, ylu, xrb, yrb
+        elif roi_type == 2:
+            roi = [300,220,340,260] # xlu, ylu, xrb, yrb
+        elif roi_type == 3:
+            roi = [280,200,360,280] # xlu, ylu, xrb, yrb            
+        elif roi_type == 4:
+            roi = [220,140,420,340] # xlu, ylu, xrb, yrb      
+        elif roi_type == 5:
+            roi = [200,120,440,360] # xlu, ylu, xrb, yrb    
+        elif roi_type == 11:
+            roi = [w2-16,h2-16,w2+16,h2+16] # xlu, ylu, xrb, yrb             
+        elif roi_type == 12:
+            roi = [w2-32,h2-32,w2+32,h2+32] # xlu, ylu, xrb, yrb    
+        elif roi_type == 13:
+            roi = [w2-64,h2-64,w2+64,h2+64] # xlu, ylu, xrb, yrb      
+        elif roi_type == 14:
+            roi = [w2-64,h2-48,w2+64,h2+48] # xlu, ylu, xrb, yrb      
+        elif roi_type == 21: # lower center image position
+            roi = [w2-64,h2+128,w2+64,h2+196] # xlu, ylu, xrb, yrb                
+        elif roi_type == 22: # lower left image position
+            roi = [w2-400,h2+128,w2-272,h2+196] # xlu, ylu, xrb, yrb  
+        elif roi_type == 23: # lower right image position
+            roi = [w2+272,h2+128,w2+400,h2+196] # xlu, ylu, xrb, yrb       
+        elif roi_type == 31: # upper center image position
+            #roi = [w2-64,h2-196,w2+64,h2-128] # xlu, ylu, xrb, yrb         
+            roi = [w2+64,h2-128,w2+128,h2-64] # xlu, ylu, xrb, yrb                
+        elif roi_type == 32: # upper left image position
+            roi = [w2-400,h2-196,w2-272,h2-128] # xlu, ylu, xrb, yrb  
+        elif roi_type == 33: # upper right image position
+            roi = [w2+302,h2-196,w2+430,h2-128] # xlu, ylu, xrb, yrb    
+        elif roi_type == 41: # center center image position
+            #roi = [w2-64,h2+32,w2+64,h2+96] # xlu, ylu, xrb, yrb    
+            roi = [w2-128,h2+64,w2+128,h2+196] # xlu, ylu, xrb, yrb    
+        elif roi_type == 42: # center left image position
+            roi = [w2-200,h2+32,w2-72,h2+96] # xlu, ylu, xrb, yrb 
+        elif roi_type == 43: # center right image position
+            roi = [w2+72,h2+32,w2+200,h2+96] # xlu, ylu, xrb, yrb   
+        elif roi_type == 43: # center right image position
+            roi = [w2+72,h2+32,w2+200,h2+96] # xlu, ylu, xrb, yrb      
+        elif roi_type == 52: # upper left image position for data 432
+            roi = [w2-256,h2-256,w2-128,h2-192] # xlu, ylu, xrb, yrb  
+        elif roi_type == 53: # upper center image position
+            roi = [w2-128,h2+16,w2+128,h2+300] # xlu, ylu, xrb, yrb                                            
+        
+        elif roi_type == 60: # 422 on cube smaller
+            roi = [645,395,665,415] # xlu, ylu, xrb, yrb          
+        elif roi_type == 61: # 422 on cube
+            roi = [640,390,670,420] # xlu, ylu, xrb, yrb   
+        elif roi_type == 62: # 422 right cube side
+            roi = [690,390,720,420] # xlu, ylu, xrb, yrb     
+        elif roi_type == 71: # set 422 - cube 
+            roi = [630,h2+72,690,h2+128] # xlu, ylu, xrb, yrb    
+        elif roi_type == 72: # set 422 - cube 
+            roi = [430,h2-30,890,h2+228] # xlu, ylu, xrb, yrb   
+        elif roi_type == 73: # set 422 - down side of the cube 
+            roi = [630,h2+140,690,h2+190] # xlu, ylu, xrb, yrb             
+        elif roi_type == 74: # set 422 - right side of the cube 
+            roi = [700,h2+64,800,h2+164] # xlu, ylu, xrb, yrb  
+        elif roi_type == 75: # set 422 - up side of the cube 
+            roi = [700,h2+20,800,h2+96] # xlu, ylu, xrb, yrb            
+        self.rect = roi       
+        #self.rect_3d        = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]                                                           
+        log.info(f'Using ROI : {roi}')         
+        return roi    
+
+    def preprocess(self, img = None):
+        "image preprocessing - extracts roi and converts from uint8 to float using log function"
+        if img is None:
+            log.info('No image provided')
+            return False        
+
+        if self.img_mask is None:
+            ret = self.init_image(img)
+
+        if self.rect is None: # use entire image
+            roi = self.init_roi(4)
+
+        # init params of the inverse
+        if self.full_dir is None:
+            self.fit_plane_init()              
+            
+        #x0, y0, x1, y1  = self.rect
+        if len(img.shape) > 2:
+            #img_roi        = img[y0:y1,x0:x1,2].astype(np.float32)
+            img_roi        = img[:,:,2].astype(np.float32)
+        else:
+            #img_roi        = img[y0:y1,x0:x1].astype(np.float32)
+            img_roi        = img.astype(np.float32)
+        return img_roi         
+
+    def init_img3d(self, img = None):
+        "initializes xyz coordinates for each point"
+        img     = self.img if img is None else img
+        h,w     = img.shape[:2]
+        x       = np.arange(w)
+        y       = np.arange(h)
+        x,y     = np.meshgrid(x,y)
+        fx      = self.cam_matrix[0,0]
+        fy      = self.cam_matrix[1,1]
+        
+        xy      = np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
+        xy      = np.expand_dims(xy, axis=1).astype(np.float32)
+        xy_undistorted = cv.undistortPoints(xy, self.cam_matrix, self.cam_distort)
+
+        u       = xy_undistorted[:,0,0].reshape((h,w))
+        v       = xy_undistorted[:,0,1].reshape((h,w))
+        z3d     = img.astype(np.float32)
+        x3d     = z3d.copy()
+        y3d     = z3d.copy()
+
+        #ii        = np.logical_and(z3d> 1e-6 , np.isfinite(z3d))
+        ii        = z3d > 5
+        x3d[ii]   = u[ii]*z3d[ii] #/fx
+        y3d[ii]   = v[ii]*z3d[ii] #/fy
+        z3d[ii]   = z3d[ii]
+
+        #self.img3d = np.stack((u/fx,v/fy,z3d), axis = 2)
+        self.img3d      = np.stack((u,v,z3d), axis = 2)
+        self.img_mask   = np.zeros((h,w))
+        return self.img3d
+    
+    def compute_img3d(self, img = None):
+        "compute xyz coordinates for each point using prvious init"
+        img         = self.img if img is None else img
+        xyz         = self.img3d
+        if xyz is None:
+            xyz = self.init_img3d(img)
+
+        if np.any(img.shape[:2] != xyz.shape[:2]):
+            print('Image dimension change')
+            return 
+
+        imgXYZ      = self.img3d.copy()
+
+        z3d         = img.astype(np.float32)
+        x3d         = self.img3d[:,:,0].copy()  # u/f
+        y3d         = self.img3d[:,:,1].copy()  # v/f
+
+        # filter bad z values
+        #ii          = np.logical_and(z3d > 1e-6 , np.isfinite(z3d))
+        ii          = z3d > 15
+        x3d[ii]     = x3d[ii]*z3d[ii]
+        y3d[ii]     = y3d[ii]*z3d[ii]
+        z3d[ii]     = z3d[ii]
+
+        # x,y,z coordinates in 3D
+        imgXYZ[:,:,0] = x3d
+        imgXYZ[:,:,1] = y3d
+        imgXYZ[:,:,2] = z3d
+
+        self.img_xyz = imgXYZ
+        return imgXYZ
+
+    def check_error(self, xyz1_mtrx, vnorm):
+        "checking the error norm"
+        err         = np.dot(xyz1_mtrx, vnorm)
+        err_std     = err.std()
+        return err_std
+    
+    def get_plane_params(self):
+        "for external interface support"
+        rvec_left              = self.plane_params
+        tvec_left              = self.plane_center
+        conf_left              = self.plane_confidence
+        return tvec_left, rvec_left, conf_left
+
+    def convert_plane_params(self, plane_equation):
+        "convert plane params to rvec"
+        # 4. Convert plane parameters to rvec and tvec
+        #    - The plane normal vector is (A, B, C).
+        #    - We can use the normal vector to get the rotation.
+        #    - A point on the plane can be used for the translation vector.
+
+        # Normalize the plane normal vector
+        normal      = plane_equation #np.array([plane_equation[0], plane_equation[1], plane_equation[2]])
+        normal_norm = np.linalg.norm(normal)
+        if normal_norm == 0:
+            log.error("Error: Zero norm for plane normal vector.")
+            return None
+        normal = normal / normal_norm
+
+        # Use the normalized normal vector to get the rotation matrix
+        # This is a common method, but there are other ways to do this.
+        z_axis        = np.array([0, 0, 1])
+        rotation_axis = np.cross(z_axis, normal)
+        rotation_angle = np.arccos(np.dot(z_axis, normal))
+
+        # Handle the case where the rotation axis is zero (normal is parallel to z-axis)
+        if np.linalg.norm(rotation_axis) < 1e-6:
+            if normal[2] > 0:
+                rvec = np.zeros(3)  # Rotation is identity
+            else:
+                rvec = np.array([0, np.pi, 0]) # Rotation by 180 degrees around X or Y.
+        else:
+            rvec, _ = cv.Rodrigues(rotation_axis * rotation_angle)
+            rvec, _ = cv.Rodrigues(rvec)
+
+        return rvec
+
+    def convert_plane_params_to_pose(self, plane_params = None, plane_center = None):
+        "converting params of the plane to the pose vector"
+
+        plane_params = self.plane_params if plane_params is None else plane_params[:3].flatten()
+        plane_center = self.plane_center if plane_center is None else plane_center[:3].flatten()
+
+        tvec       = plane_center.reshape((1,-1))
+        rvec       = plane_params.reshape((1,-1)) #reshape((-1,1))
+        rvec       = rvec/np.linalg.norm(rvec.flatten())
+
+        pose_norm  = np.hstack((tvec, rvec))
+        #log.info('roi to pose')
+        return pose_norm #.flatten()
+
+    def fit_plane_init(self):
+        "prepares data for real time fit a*x+b*y+c = z"
+        if self.cam_matrix is None:
+            self.cam_matrix   = np.array([[650,0,self.frame_size[0]/2],[0,650,self.frame_size[1]/2],[0,0,1]], dtype = np.float32)
+            self.cam_distort  = np.array([0,0,0,0,0],dtype = np.float32)
+            log.info('Camera matrix is initialized to default.')
+
+        x0,y0,x1,y1     = 0,0,self.frame_size[0],self.frame_size[1] #self.rect 
+        h,w             = y1-y0, x1-x0
+        x_grid          = np.arange(x0, x1, 1)
+        y_grid          = np.arange(y0, y1, 1)
+        x, y            = np.meshgrid(x_grid, y_grid)  
+
+        # remember corner indexes for reprojection [0 .... h*(w-1))
+        #                                           .        .
+        #                                           h ......h*w-1]
+        #self.corner_ind = [0, h,  h*w-1, h*(w-1), 0]
+        #h2,w2           = h>>1, w>>1
+        #self.rect_3d    = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+
+        # camera coordinates
+        xy              = np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
+        xy              = np.expand_dims(xy, axis=1).astype(np.float32)
+        xy_undistorted  = cv.undistortPoints(xy, self.cam_matrix, self.cam_distort)
+
+        u               = xy_undistorted[:,0,0].reshape((h,w)).reshape(-1,1)
+        v               = xy_undistorted[:,0,1].reshape((h,w)).reshape(-1,1)
+
+        # check
+        #u, v            = u*self.cam_matrix[0,0], v*self.cam_matrix[1,1]
+
+        self.full_dir   = np.hstack((u,v,u*0+1))
+        #self.matrix_inv = np.linalg.pinv(self.rect_dir)
+
+    def fit_plane_init_old(self):
+        "prepares data for real time fit a*x+b*y+c = z"
+        self.cam_matrix   = np.array([[650,0,self.frame_size[0]/2],[0,650,self.frame_size[1]/2],[0,0,1]], dtype = np.float32)
+        self.cam_distort  = np.array([0,0,0,0,0],dtype = np.float32)
+
+        x0,y0,x1,y1     = self.rect 
+        h,w             = y1-y0, x1-x0
+        x_grid          = np.arange(x0, x1, 1)
+        y_grid          = np.arange(y0, y1, 1)
+        x, y            = np.meshgrid(x_grid, y_grid)  
+
+        # remember corner indexes for reprojection [0 .... h*(w-1))
+        #                                           .        .
+        #                                           h ......h*w-1]
+        #self.corner_ind = [0, h,  h*w-1, h*(w-1), 0]
+        h2,w2           = h>>1, w>>1
+        self.rect_3d    = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+
+        # camera coordinates
+        xy              = np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
+        xy              = np.expand_dims(xy, axis=1).astype(np.float32)
+        xy_undistorted  = cv.undistortPoints(xy, self.cam_matrix, self.cam_distort)
+
+        u               = xy_undistorted[:,0,0].reshape((h,w)).reshape(-1,1)
+        v               = xy_undistorted[:,0,1].reshape((h,w)).reshape(-1,1)
+
+        # check
+        #u, v            = u*self.cam_matrix[0,0], v*self.cam_matrix[1,1]
+
+        self.rect_dir = np.hstack((u,v,u*0+1))
+        #self.matrix_inv = np.linalg.pinv(self.rect_dir)
+
+    def convert_roi_to_points(self, img, point_num = 30, step_size = 1, roi_rect = None):
+        "converting roi to pts in XYZ - Nx3 array. point_num - is the target point number"
+
+        # init params of the inverse
+        if self.full_dir is None:  # do not use mtrix_dir - initialized before
+            self.fit_plane_init()  
+
+        # deal iwth different rect options
+        roi_rect            = self.rect if roi_rect is None else roi_rect
+        x0, y0, x1, y1      = roi_rect
+
+        # make rectangle 
+        h,w                 = (y1-y0)>>1, (x1-x0)>>1
+        self.rect_3d        = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+
+        # extract roi - must be compatible with image dimensions
+        # n,m                 = img.shape[:2]
+        # img_roi_mask        = np.zeros((n,m), dtype = np.bool_)
+        # img_roi_mask[y0:y1,x0:x1] = True  
+        # valid_bool          = img_roi_mask > 0 & img > 0
+
+        # check if roi is valid. +1 to grow in positive x and y since arange does not include x1,y1
+        x_grid              = np.arange(x0, x1, 1)
+        y_grid              = np.arange(y0, y1, 1)
+        x, y                = np.meshgrid(x_grid, y_grid) 
+        #flat_indices        = np.ravel_multi_index((y, x), img.shape[:2]).reshape((-1,1))         
+        flat_indices        = y * self.frame_size[0] + x
+        flat_indices        = flat_indices.ravel().astype(np.int32)
+        # valid under mask
+        #valid_bool          = img.flat[flat_indices] > 0        
+        #ii                  = flat_indices[valid_bool]
+        img_roi             = img[y0:y1,x0:x1].flatten() #.astype(np.float32).reshape((-1,1)) 
+        valid_bool          = img_roi > 1 # valid pixels in the roi
+        ii                  = np.where(valid_bool)[0]
+  
+        valid_point_num     = len(ii)
+        if valid_point_num < 5:
+            return np.zeros((0,3))
+        
+        step_size           = np.maximum(step_size, np.int32(valid_point_num/point_num))
+        ii                  = ii[::step_size]
+
+        # plane params - using only valid
+        z                   = img_roi[ii].reshape((-1,1))
+        jj                  = flat_indices[ii].flatten()
+        uv1_matrix          = self.full_dir[jj,:]
+        xyz_matrix          = uv1_matrix[:,:3]*z  # keep 1 intact
+
+        #self.plane_center   = xyz_center.flatten() 
+        self.rect_z          = z
+        self.rect_dir        = uv1_matrix
+        self.rect_xyz        = xyz_matrix
+        self.roi_index       = jj
+        self.img_roi         = img[y0:y1,x0:x1]
+
+        return xyz_matrix
+
+
+    def convert_roi_to_points_old(self, img_roi, point_num = 30, step_size = 1):
+        "converting roi to pts in XYZ - Nx3 array. point_num - is the target point number"
+        # x1,y1       = self.img_xyz.shape[:2]
+        # roi_area    = x1*y1
+
+        # # reduce size of the grid for speed
+        # if step_size < 1 and roi_area > 100:
+        #     step_size   = np.maximum(1,int(np.sqrt(roi_area)/10))
+
+          
+        # #roi3d       = self.img_xyz[y0:y1:step_size,x0:x1:step_size,:]   
+        # roi3d       = self.img_xyz[::step_size,::step_size,:]           
+        # x,y,z       = roi3d[:,:,0].reshape((-1,1)), roi3d[:,:,1].reshape((-1,1)), roi3d[:,:,2].reshape((-1,1)) 
+        # xyz_matrix  = np.hstack((x,y,z)) 
+        # 
+        
+        # init params of the inverse
+        if self.rect_dir is None:
+            self.fit_plane_init_old()  
+
+        # extract roi 
+
+        n,m                 = img_roi.shape[:2]
+        img_roi             = img_roi.reshape((-1,1))
+        valid_bool          = img_roi > 0
+        valid_bool          = valid_bool.flatten()
+        #log.info(f'Timing : 1')  
+
+        # all non valid
+        ii                  = np.where(valid_bool)[0]
+        valid_point_num     = len(ii)
+        if valid_point_num < 5:
+            return None
+        step_size           = np.maximum(step_size, np.int32(valid_point_num/point_num))
+        ii                  = ii[::step_size]
+
+        # plane params - using only valid
+        z                   = img_roi[ii]
+        xyz_matrix          = self.rect_dir[ii,:]
+        xyz_matrix[:,:3]    = xyz_matrix[:,:3]*z  # keep 1 intact
+
+        # update corners of the rect in 3d
+        #self.rect_3d        = self.rect_dir[self.corner_ind,:]*img_roi[self.corner_ind]
+        # rect to show
+        x0, y0, x1, y1      = self.rect
+        h,w                 = y1-y0, x1-x0
+        self.rect_3d        = [[-w,-h,0],[w,-h,0],[w,h,0],[-w,h,0],[-w,-h,0]]
+        # substract mean
+        #xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        #xyz_matrix          = xyz_matrix - xyz_center   
+        #log.info(f'Timing : 2')     
+
+        # mtrx_dir            = np.hstack((self.rect_dir[valid_bool,0]*z,self.rect_dir[valid_bool,1]*z,z*0+1))
+        # mtrx_inv            = np.linalg.pinv(mtrx_dir)
+        # #mtrx_inv            = self.matrix_inv[:,valid_bool]
+        # plane_params        = np.dot(mtrx_inv,z)
+
+        # decimate to make it run faster  reduce size of the grid for speed. 1000 pix - 30x30 - step 1, 10000 pix - step=3
+        #roi_area            = n*m
+        #step_size           = int(np.sqrt(roi_area)/7) if roi_area > 1000 else 1  
+
+        #self.plane_center   = xyz_center.flatten()   
+        self.rect_xyz      = xyz_matrix          
+
+        return xyz_matrix
+
+    def fit_plane_svd(self, img_roi):
+        "estimates mean and std of the plane fit"
+
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 600, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0    
+
+        # substract mean
+        xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        xyz_matrix          = xyz_matrix - xyz_center   
+        #log.info(f'Timing : 2')     
+
+        # mtrx_dir            = np.hstack((self.rect_dir[valid_bool,0]*z,self.rect_dir[valid_bool,1]*z,z*0+1))
+        # mtrx_inv            = np.linalg.pinv(mtrx_dir)
+        # #mtrx_inv            = self.matrix_inv[:,valid_bool]
+        # plane_params        = np.dot(mtrx_inv,z)
+
+        # decimate to make it run faster  reduce size of the grid for speed. 1000 pix - 30x30 - step 1, 10000 pix - step=3
+        #roi_area            = n*m
+        #step_size           = int(np.sqrt(roi_area)/7) if roi_area > 1000 else 1
+        
+        # using svd to make the fit
+        U, S, Vh            = np.linalg.svd(xyz_matrix, full_matrices=True)
+        ii                  = np.argmin(S)
+        vnorm               = Vh[ii,:]
+        #log.info(f'Timing : 3') 
+
+        # keep orientation
+        plane_params       = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err                = np.dot(xyz_matrix,plane_params)
+        #z_est              = z + err + xyz_center[2]
+
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std  
+    
+    def fit_plane_svd_old(self, img_roi):
+        "estimates mean and std of the plane fit"
+        # n,m             = img_roi.shape[:2]
+        # img_roi         = img_roi.reshape((-1,1))
+        # valid_bool      = img_roi > 0
+        # valid_bool      = valid_bool.flatten()
+        # #log.info(f'Timing : 1')  
+
+        # # init params of the inverse
+        # if self.matrix_inv is None:
+        #     self.fit_plane_init()
+
+        # # plane params - using only valid
+        # z                   = img_roi[valid_bool]
+        # xyz_matrix          = self.rect_dir[valid_bool,:]
+        # xyz_matrix[:,:3]    = xyz_matrix[:,:3]*z  # keep 1 intact
+
+        # update corners of the rect in 3d
+        #self.rect_3d        = self.rect_dir[self.corner_ind,:]*img_roi[self.corner_ind]
+
+        # roi converted to points with step size on the grid
+        #xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 1e4, step_size = 1)    
+        xyz_matrix          = self.convert_roi_to_points_old(img_roi, point_num = 1e4, step_size = 1) 
+
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0                 
+
+        # substract mean
+        xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        xyz_matrix          = xyz_matrix - xyz_center   
+        #log.info(f'Timing : 2')     
+
+        # mtrx_dir            = np.hstack((self.rect_dir[valid_bool,0]*z,self.rect_dir[valid_bool,1]*z,z*0+1))
+        # mtrx_inv            = np.linalg.pinv(mtrx_dir)
+        # #mtrx_inv            = self.matrix_inv[:,valid_bool]
+        # plane_params        = np.dot(mtrx_inv,z)
+
+        # decimate to make it run faster  reduce size of the grid for speed. 1000 pix - 30x30 - step 1, 10000 pix - step=3
+        #roi_area            = n*m
+        #step_size           = int(np.sqrt(roi_area)/7) if roi_area > 1000 else 1
+        
+        # using svd to make the fit
+        U, S, Vh            = np.linalg.svd(xyz_matrix, full_matrices=True)
+        ii                  = np.argmin(S)
+        vnorm               = Vh[ii,:]
+        #log.info(f'Timing : 3') 
+
+        # keep orientation
+        plane_params       = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err                = np.dot(xyz_matrix,plane_params)
+        #z_est              = z + err + xyz_center[2]
+
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std  
+        
+    def fit_plane_svd_weighted(self, img_roi):
+        "estimates mean and std of the plane fit - fit is weighted assuming Sigma(z) = a*z"
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 500, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0 
+        
+        # # substract mean
+        # xyz_center          = xyz_matrix[:,:3].mean(axis=0)
+        # xyz_matrix          = xyz_matrix - xyz_center          
+
+        # plane params - using only valid
+        z                   = self.rect_z 
+        uv1_matrix          = self.rect_dir  # (x - x0)/f, (y - y0)/f, 1
+ 
+        # minimization function min |ua/f + vb/f + c + dz|^2/|z|^2
+        f                   = self.cam_matrix[0,0]
+        # uvf_matrix          = np.dot(uv1_matrix , np.diag([1/f,1/f,1]))
+
+        mtrx_inv            = np.linalg.pinv(uv1_matrix)
+        b                   = f/z
+        plane_params        = np.dot(mtrx_inv,b)
+        plane_params        = plane_params/np.linalg.norm(plane_params)
+
+        # center
+        xyz_matrix          = self.rect_xyz  #uv1_matrix[:,:3]*z  # keep 1 intact
+        xyz_center          = xyz_matrix.mean(axis=0)                
+
+        # estimate error
+        err                = np.dot(uv1_matrix,plane_params) 
+        z_est              = z - err
+
+        img_mean           = z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        log.info(f'Plane : {self.plane_params}, error {img_std:.3f}')
+        
+        return img_mean, img_std 
+
+    def fit_plane_with_outliers(self, img_roi):
+        "computes normal for the specifric roi and evaluates error. Do it twice to reject outliers"
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 1500, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0    
+        
+        # substract mean
+        xyz_center_1 = xyz_matrix[:,:3].mean(axis=0)
+        xyz_1        = xyz_matrix - xyz_center_1         
+
+        # using svd to make the fit to a sub group     
+        U, S, Vh    = np.linalg.svd(xyz_1, full_matrices=True)
+        ii          = np.argmin(S)
+        vnorm       = Vh[ii,:]
+        #vnorm       = vnorm*np.sign(vnorm[2]) # keep orientation
+
+        # keep orientation
+        plane_params = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err         = np.dot(xyz_1,plane_params)        
+        err_std     = err.std()
+        log.info('Fit error iteration 1: %s' %str(err_std))
+
+        # filter only the matching points
+        inlier_ind  = np.abs(err) < 3*err_std
+
+        # substract mean when only inliers are kept
+        xyz_center_2 = xyz_matrix[inlier_ind,:3].mean(axis=0)#  
+        xyz_2        = xyz_matrix[inlier_ind,:] - xyz_center_2         
+
+        # perform svd one more time 
+        U, S, Vh    = np.linalg.svd(xyz_2, full_matrices=True)
+        ii          = np.argmin(S)
+        vnorm       = Vh[ii,:]
+
+        # keep orientation
+        plane_params = vnorm*np.sign(vnorm[2])
+
+        # checking error
+        err         = np.dot(xyz_2, plane_params)
+        err_std     = err.std()
+        log.info('Fit error iteration 2: %s' %str(err_std))    
+
+        # # We can convert this flat index to row and column indices
+        # row_index, col_index = np.unravel_index(inlier_ind, self.img_mask.shape)
+        # self.img_mask[row_index, col_index] = 1    
+
+        img_mean           = xyz_center_2[2] #z_est.mean()
+        img_std            = err_std
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center_2.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std   
+    
+    def fit_plane_ransac(self, img_roi):
+        
+        """
+        Find the best equation for a plane.
+
+        :param pts: 3D point cloud as a `np.array (N,3)`.
+        :param thresh: Threshold distance from the plane which is considered inlier.
+        :param maxIteration: Number of maximum iteration which RANSAC will loop over.
+        :returns:
+        - `self.equation`:  Parameters of the plane using Ax+By+Cy+D `np.array (1, 4)`
+        - `self.inliers`: points from the dataset considered inliers
+
+        """
+        #log.info('Fit ransac: ...')  
+        # roi converted to points with step size on the grid
+        #xyz_matrix     = self.convert_roi_to_points_old(img_roi, point_num = 250, step_size = 1)
+        xyz_matrix     = self.convert_roi_to_points(img_roi, point_num = 250, step_size = 1)
+        if xyz_matrix is None:
+            log.error('No points in the ROI')
+            return 0, 0
+
+        thresh         = 1.05
+        maxIteration   = 100
+
+
+        n_points        = xyz_matrix.shape[0]
+        best_eq         = []
+        best_inliers    = []
+
+        for it in range(maxIteration):
+
+            # Samples 3 random points
+            if n_points < 3: break
+            id_samples = random.sample(range(0, n_points), 3)
+            pt_samples = xyz_matrix[id_samples,:]
+
+            # We have to find the plane equation described by those 3 points
+            # We find first 2 vectors that are part of this plane
+            # A = pt2 - pt1
+            # B = pt3 - pt1
+
+            vecA        = pt_samples[1, :] - pt_samples[0, :]
+            vecB        = pt_samples[2, :] - pt_samples[0, :]
+
+            # Now we compute the cross product of vecA and vecB to get vecC which is normal to the plane
+            vecC        = np.cross(vecA, vecB)
+            vecC_norm   = np.linalg.norm(vecC)
+
+            # protect from the close spaced points
+            if vecC_norm < 10e-6:
+                continue
+
+            # make sure that Z direction is positive
+            vecC        = vecC * np.sign(vecC[2])
+
+            # The plane equation will be vecC[0]*x + vecC[1]*y + vecC[0]*z = -k
+            # We have to use a point to find k
+            vecC        = vecC / vecC_norm
+            #k           = -np.sum(np.multiply(vecC, pt_samples[1, :]))
+            k           = -np.dot(vecC, pt_samples[1, :])
+            plane_eq    = [vecC[0], vecC[1], vecC[2], k]
+
+            # Distance from a point to a plane
+            # https://mathworld.wolfram.com/Point-PlaneDistance.html
+            # pt_id_inliers = []  # list of inliers ids
+            # dist_pt = (
+            #     plane_eq[0] * xyz_matrix[:, 0] + plane_eq[1] * xyz_matrix[:, 1] + plane_eq[2] * xyz_matrix[:, 2] + plane_eq[3]
+            # ) / np.sqrt(plane_eq[0] ** 2 + plane_eq[1] ** 2 + plane_eq[2] ** 2)
+
+            dist_pt            = np.dot(xyz_matrix, vecC) + plane_eq[3]
+
+            # Select indexes where distance is biggers than the threshold
+            pt_id_inliers       = np.where(np.abs(dist_pt) <= thresh)[0]
+            if len(pt_id_inliers) > len(best_inliers):
+                best_eq         = plane_eq
+                best_inliers    = pt_id_inliers
+        
+        #self.inliers = best_inliers
+        #self.equation = best_eq
+
+        # rtansform to pose output
+        #tvec            = xyz_matrix[best_inliers,:].mean(axis=0)
+        #pts_best        = xyz_matrix[best_inliers,:] - tvec
+        tvec            = xyz_matrix.mean(axis=0)
+        pts_best        = xyz_matrix - tvec        
+        vnorm           = np.array(best_eq[:3])
+
+        # checking error
+        err             = np.dot(pts_best, vnorm)
+        err_std         = err.std()
+        log.info('Fit error ransac: %s' %str(err_std))  
+
+        img_mean           = tvec[2] #z_est.mean()
+        img_std            = err_std
+        self.plane_params  = vnorm.flatten()
+        self.plane_center  = tvec.flatten()
+
+        #log.info(f'Plane : {self.plane_params}, error {img_std:.3f}, step {step_size}')
+        
+        return img_mean, img_std 
+    
+    def estimate_normals_from_depth_map(self,depth_map):
+        """
+        Estimates the surface normal vector for each pixel in a depth map
+        using the image gradient (Sobel operator).
+
+        Args:
+            depth_map (np.ndarray): A single-channel depth image (e.g., CV_32F or CV_64F).
+                                    Depth values must be in a consistent metric (e.g., meters).
+
+        Returns:
+            np.ndarray: A 3-channel image (H, W, 3) where each pixel contains the
+                        (nx, ny, nz) unit normal vector, as CV_32F.
+        """
+        # 1. Convert to CV_32F for accurate gradient calculation
+        if depth_map.dtype != np.float32:
+            depth_map = depth_map.astype(np.float32)
+
+        depth_map = cv.GaussianBlur(depth_map, (5, 5), 0)   
+
+        # 2. Calculate Derivatives using Sobel Operator (Gradient)
+        # The kernel size 'ksize=1' is often preferred for depth maps as it corresponds 
+        # to a 3x1 or 1x3 kernel, providing a close approximation of the derivative.
+        ksize = 1 
+        
+        # Calculate dz/du (gradient in X/horizontal direction)
+        # dx=1, dy=0
+        grad_x = cv.Sobel(depth_map, cv.CV_32F, 1, 0, ksize=ksize, borderType=cv.BORDER_DEFAULT)
+        
+        # Calculate dz/dv (gradient in Y/vertical direction)
+        # dx=0, dy=1
+        grad_y = cv.Sobel(depth_map, cv.CV_32F, 0, 1, ksize=ksize, borderType=cv.BORDER_DEFAULT)
+
+        # 3. Construct the Normal Vector Components
+        # The normal vector is proportional to n = (-dz/du, -dz/dv, 1)
+        
+        # Reshape the gradients to (H, W, 1) for stacking
+        grad_x = grad_x[:, :, np.newaxis]
+        grad_y = grad_y[:, :, np.newaxis]
+        
+        # Create the 'z' component of the direction vector, which is always 1
+        # np.ones_like creates an array with the same shape and type as the gradient arrays
+        z_component = np.ones_like(grad_x)
+
+        # Stack the components to create the direction vector (H, W, 3)
+        # The X and Y gradients are negated: -dz/du and -dz/dv
+        direction_vectors = np.concatenate((-grad_x, -grad_y, z_component), axis=2)
+
+        # 4. Normalize the Direction Vectors
+        # Calculate the magnitude (Euclidean norm) of each (nx, ny, nz) vector
+        # axis=2 computes the norm across the 3 channels
+        magnitude = np.linalg.norm(direction_vectors, axis=2, keepdims=True)
+        
+        # Use np.divide and np.where to prevent division by zero for magnitude=0
+        # Set normals to (0, 0, 0) or another placeholder where magnitude is zero (flat or invalid depth)
+        normals = np.divide(direction_vectors, magnitude, out=np.zeros_like(direction_vectors), where=magnitude != 0)
+
+        return normals
+    
+    def estimate_normals_using_box_filters(self,img_roi):
+        """
+        Estimates the surface normal vector for each pixel in a depth map
+        using the image gradient (Sobel operator).
+
+        Args:
+            img_roi (np.ndarray): A single-channel depth image (e.g., CV_32F or CV_64F).
+                                    Depth values must be in a consistent metric (e.g., meters).
+
+        Returns:
+            np.ndarray: A 3-channel image (H, W, 3) where each pixel contains the
+                        (nx, ny, nz) unit normal vector, as CV_32F.
+        """    
+
+        # sum of image pixels using box filter
+        img_roi                 = img_roi.astype(np.float32)
+        roih, roiw              = img_roi.shape[:2]                 
+        kernel_size             = 5
+        # count good points
+        img_roi_mask            = (img_roi > 0).astype(np.float32)
+
+        img_roi_sum             = cv.boxFilter(img_roi, -1,      (kernel_size, kernel_size), normalize=False) 
+        img_roi_count           = cv.boxFilter(img_roi_mask, -1, (kernel_size, kernel_size), normalize=False) 
+
+        # protect from non valid
+        img_roi_count[img_roi_count < 1] = 1
+        img_roi_mean            = img_roi_sum/img_roi_count
+
+        # compute gradients
+        shift                   = kernel_size>>1
+        shift2                  = shift<<1
+        img_normal              = np.ones((roih,roiw,3))
+        # cross product
+        #a × b = (a₂b₃ - a₃b₂)i + (a₃b₁ - a₁b₃)j + (a₁b₂ - a₂b₁)k        
+        # dzdx, dzdy
+        img_normal[:,shift:-shift,0]    = img_roi_mean[:,shift2:]   - img_roi_mean[:,:-shift2]
+        img_normal[shift:-shift,:,1]    = img_roi_mean[shift2:,:]   - img_roi_mean[:-shift2,:]
+
+        # align directions
+        img_normal[:,:,0]               = -img_normal[:,:,0] # dx
+        img_normal[:,:,1]               = -img_normal[:,:,1] # dy
+
+        # normalize each vector to unit length
+        norm2              = np.sqrt(np.sum(img_normal**2, axis=2))
+        img_normal         = img_normal / norm2[:,:,np.newaxis]
+        return img_normal
+
+    def fit_plane_using_gradients(self, img_full, roi_rect = None):
+        "estimates normal to the plane fit using gradients"
+
+        # roi converted to points with step size on the grid
+        xyz_roi             = self.convert_roi_to_points(img_full, point_num = 500, step_size = 1, roi_rect = roi_rect)    
+        if self.img_roi is None:
+            log.error('No ROI in image')
+            return 0, 0
+        img_roi             = self.img_roi.astype(np.float32)
+
+        # old code
+        #img_normal          = self.estimate_normals_using_box_filters(img_roi)
+
+        img_normal           = self.estimate_normals_from_depth_map(img_roi)
+
+        # roih, roiw          = img_roi.shape[:2]        
+
+        # # count good points
+        # img_roi_mask        = (img_roi > 0).astype(np.float32)
+
+        # # sum of image pixels using box filter
+        # kernel_size        = 7
+        # img_roi_sum        = cv.boxFilter(img_roi, -1,      (kernel_size, kernel_size), normalize=False) 
+        # img_roi_count      = cv.boxFilter(img_roi_mask, -1, (kernel_size, kernel_size), normalize=False) 
+
+        # # protect from non valid
+        # img_roi_count[img_roi_count < 1] = 1
+        # img_roi_mean       = img_roi_sum/img_roi_count
+
+        # # compute gradients
+        # shift                   = kernel_size>>1
+        # shift2                  = shift<<1
+        # img_normal              = np.ones((roih,roiw,3))
+        # # cross product
+        # #a × b = (a₂b₃ - a₃b₂)i + (a₃b₁ - a₁b₃)j + (a₁b₂ - a₂b₁)k        
+        # # dzdx, dzdy
+        # img_normal[:,shift:-shift,0]    = img_roi_mean[:,shift2:]   - img_roi_mean[:,:-shift2]
+        # img_normal[shift:-shift,:,1]    = img_roi_mean[shift2:,:]   - img_roi_mean[:-shift2,:]
+
+        # # align directions
+        # img_normal[:,:,0]               = -img_normal[:,:,0] # dx
+        # img_normal[:,:,1]               = -img_normal[:,:,1] # dy
+
+        # # normalize each vector to unit length
+        # norm2              = np.sqrt(np.sum(img_normal**2, axis=2))
+        # img_normal         = img_normal / norm2[:,:,np.newaxis]
+
+        # plane normal
+        plane_params       = img_normal.mean(axis=(0,1))
+
+        # some problem with points
+        if xyz_roi.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0
+
+        # estimate error
+        xyz_center         = xyz_roi[:,:3].mean(axis=0)
+        xyz_matrix         = xyz_roi - xyz_center         
+        err                = np.dot(xyz_matrix,plane_params)
+
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+        #self.plane_confidence = 1/(1+img_std)
+
+        self.img_roi_normal = img_normal # save for debug and display
+
+        log.info(f'Plane : {self.plane_params}, error {img_std:.3f}')
+        
+        return img_mean, img_std  
+    
+    def fit_plane_ransac_and_grow(self, img_full):
+        
+        """
+        Find the best equation for a plane of the predefined ROI and then grow the ROI
+        """
+        h,w                         = img_full.shape[:2]
+        if len(img_full.shape) > 2:
+            img_full        = img_full[:,:,2].astype(np.float32)
+
+        # start from the original ROI
+        if self.img_mask is None:
+            isOk                    = self.init_image(img_full)
+
+        #img_mean, img_std           = self.fit_plane_ransac(img_full) 
+
+        # make sure that mask is not empty - initial rectangle
+        x0, y0, x1, y1              = self.rect
+        self.img_mask[y0:y1,x0:x1]  = 1
+
+        # grow the mask
+        y,x                         = np.where(self.img_mask > 0.7)
+        y_min, y_max                = y.min(), y.max()
+        x_min, x_max                = x.min(), x.max()
+        y_min, y_max                = np.maximum(0,y_min-1), np.minimum(self.img_mask.shape[0],y_max+2)
+        x_min, x_max                = np.maximum(0,x_min-1), np.minimum(self.img_mask.shape[1],x_max+2)
+
+        # extract ROI
+        roi_rect                    = [x_min, y_min, x_max, y_max]
+        #img_roi                     = img_full[y_min:y_max,x_min:x_max].astype(np.float32)
+        xyz_matrix                  = self.convert_roi_to_points(img_full, point_num = 5000, step_size = 1, roi_rect = roi_rect)
+
+        # check against the plane : do not substract plane.center from all the points
+        vecC                        = self.plane_params[:3]
+        dist_offset                 = np.dot(self.plane_center, vecC) 
+        dist_pt                     = np.dot(xyz_matrix, vecC) - dist_offset
+
+        # Select indexes where distance is biggers than the threshold
+        thresh                      = 3.5
+        err                         = np.abs(dist_pt)
+        i2                          = np.where( err <= thresh)[0]
+
+        # transfer xi,yi coordinates to the original image index
+        ii                          = self.roi_index[i2] # convert to 2D index
+
+        # update mask according to the valid pixels
+        self.img_mask               = 0.95*self.img_mask
+        self.img_mask.flat[ii]      = self.img_mask.flat[ii] + 0.5*(1 - self.img_mask.flat[ii])
+
+
+        # position in 2d array
+        # unravel_index(a.argmax(), a.shape)   
+
+        # output
+        img_std                    = err.std()
+        img_mean                   = xyz_matrix[i2].mean(axis=0)[2]
+
+
+        return img_mean, img_std 
+        
+    def fit_and_split_roi_recursively(self, roi, level = 0):
+        # splits ROI on 4 regions and recursevly call 
+        x0,y0,x1,y1     = roi
+        #roi3d           = self.img_xyz[y0:y1,x0:x1,:]   
+        log.info('Processing level %d, region x = %d, y = %d' %(level,x0,y0))
+        # check the current fit
+        roi_params_f    = self.fit_plane(roi)
+        roi_params_ret  = [roi_params_f]
+        if roi_params_f['error'] < self.MIN_STD_ERROR:
+            log.info('Fit is good enough x = %d, y = %d' %(x0,y0))
+            return roi_params_ret
+
+        # too small exit
+        xs, ys          = int((x1 + x0)/2), int((y1 + y0)/2)
+        if (xs - x0) < self.MIN_SPLIT_SIZE or (ys - y0) < self.MIN_SPLIT_SIZE:
+            log.info('Min size is reached x = %d, y = %d' %(x0,y0))
+            return roi_params_ret
+        
+        # 4 ROIs - accept the split if error of one of them is lower from the total
+        roi_params_list = []
+        roi_split   = [[x0,y0,xs,ys],[x0,ys,xs,y1],[xs,y0,x1,ys],[xs,ys,x1,y1]]
+        for roi_s in roi_split:
+            roi_params_prev = self.fit_and_split_roi_recursively(roi_s, level + 1)
+            # save locally
+            #roi_params_list.append(roi_params_prev)
+            roi_params_list = roi_params_list + roi_params_prev
+            
+        # extract each of the below and check the error
+        makeTheSplit = False
+        for roi_params_s in roi_params_list:
+            #roi_params_s       = roi_params_prev[-1]
+            # accept the split if twice lower (if noise of 4 split should be 2)
+            if roi_params_s['error'] < roi_params_f['error']/2:
+                makeTheSplit = True
+                break
+
+        # decide what to return
+        if makeTheSplit:
+            roi_params_ret = roi_params_list
+            log.info('Split at level %d, region x = %d, y = %d' %(level,x0,y0))
+        else:
+            log.info('No split level %d, region x = %d, y = %d' %(level,x0,y0))
+
+        return roi_params_ret
+    
+    def fit_plane_svd_weighted_4d(self, img_roi):
+        "estimates weighted plane fit using weight inversely proportional to the depth"
+
+        # roi converted to points with step size on the grid
+        xyz_matrix          = self.convert_roi_to_points(img_roi, point_num = 350, step_size = 1)    
+
+        # some problem with points
+        if xyz_matrix.shape[0] < 2:
+            log.warning('Not enough points in the ROI')
+            return 0, 0         
+
+        # form matrix [u,v,f,zf] result of sigma(z) = alpha*z
+        if self.roi_index is None:
+            log.error('No ROI index found')
+            return 0,0
+        
+        # form matrix for svd
+        f                   = self.cam_matrix[0,0]          
+        # uv1z_matrix         = np.hstack((self.full_dir[self.roi_index,:],xyz_matrix[:,2].reshape((-1,1))))
+        # uv1z_matrix[:,2:3] *= f  # keep 1 intact
+
+        # plane params - using only valid
+        z                   = self.rect_z 
+        uv1_matrix          = self.rect_dir     
+        uv1z_matrix         = np.hstack((uv1_matrix,1/z))
+        uv1z_matrix[:,2:3] *= f  # keep 1 intact           
+  
+        # using svd to make the fit
+        U, S, Vh            = np.linalg.svd(uv1z_matrix, full_matrices=True)
+        ii                  = np.argmin(S)
+        vnorm               = Vh[ii,:]
+
+        # keep orientation
+        plane_params       = vnorm*np.sign(vnorm[2])
+
+        # estimate error
+        err                = np.dot(xyz_matrix,plane_params[:3])
+
+        # patch mean
+        xyz_center         = xyz_matrix[:,:3].mean(axis=0)        
+        img_mean           = xyz_center[2] #z_est.mean()
+        img_std            = err.std()
+        self.plane_params  = plane_params[:3].flatten()
+        self.plane_center  = xyz_center.flatten()
+
+        log.info(f'Plane : {self.plane_params}, error {img_std:.3f}')
+        
+        return img_mean, img_std  
+    
+    def fit_plane_and_project_the_image(self, img_full):
+        
+        """
+        Find the best equation for a plane of the predefined ROI and then projecvt the entire image on the plane
+        """
+        h,w                         = img_full.shape[:2]
+        if len(img_full.shape) > 2:
+            img_full        = img_full[:,:,2].astype(np.float32)
+
+        # start from the original ROI
+        if self.img_mask is None:
+            isOk                    = self.init_image(img_full)
+
+        # extract ROI
+        roi_rect                    = [50, 50, w-50, h-50]
+        xyz_matrix                  = self.convert_roi_to_points(img_full, point_num = 50000, step_size = 1, roi_rect = roi_rect)
+
+        # check against the plane : do not substract plane.center from all the points
+        vecC                        = self.plane_params[:3]
+        dist_offset                 = np.dot(self.plane_center, vecC) 
+        dist_pt                     = np.dot(xyz_matrix, vecC) - dist_offset
+
+        # Select indexes where distance is biggers than the threshold
+        thresh                      = 1.5
+        err                         = np.abs(dist_pt)
+        i2                          = np.where(err <= thresh)[0]
+
+        # transfer xi,yi coordinates to the original image index
+        ii                          = self.roi_index[i2] # convert to 2D index
+
+        # update mask according to the valid pixels
+        self.img_mask.flat[ii]      = 1
+        # make sure that mask is not empty - initial rectangle
+        # x0, y0, x1, y1              = self.rect
+        # self.img_mask[y0:y1,x0:x1]  = 1
+
+        # position in 2d array
+        # unravel_index(a.argmax(), a.shape)   
+
+        # output
+        img_std                    = err.std()
+        img_mean                   = xyz_matrix[i2].mean(axis=0)[2]
+
+
+        return img_mean, img_std 
+        
+    def fit_plane_and_project_the_image_using_gradients(self, img_full):
+        
+        """
+        Find the best equation for a plane of the predefined ROI and then projecvt the entire image on the plane
+        """
+        h,w                         = img_full.shape[:2]
+        if len(img_full.shape) > 2:
+            img_full        = img_full[:,:,2].astype(np.float32)
+
+        # start from the original ROI
+        if self.img_mask is None:
+            isOk                    = self.init_image(img_full)
+
+        # extract ROI
+        #roi_rect                    = [50, 50, w-50, h-50]
+        #xyz_matrix                  = self.convert_roi_to_points(img_full, point_num = 50000, step_size = 1, roi_rect = roi_rect)
+
+        # estimate normals for the entire image
+        img_normal                  = self.estimate_normals_from_depth_map(img_full)
+
+        # check against the plane : do not substract plane.center from all the points
+        vecC                        = self.plane_params[:3]
+        dist_pt                     = np.dot(img_normal, vecC)
+
+        # Select indexes where distance is biggers than the threshold
+        thresh                      = 0.1
+        dist_abs                    = np.abs(dist_pt)
+        #ii                          = np.where(dist_abs > thresh)[0]
+        ii                          = dist_abs > thresh
+        # transfer xi,yi coordinates to the original image index
+        #ii                          = self.roi_index[i2] # convert to 2D index
+
+        # update mask according to the valid pixels
+        #self.img_mask.flat[ii]      = 1
+        # make sure that mask is not empty - initial rectangle
+        x0, y0, x1, y1              = self.rect
+        self.img_mask[y0:y1,x0:x1]  = 1
+
+        # update mask according to the valid pixels
+        self.img_mask               = 0.95*self.img_mask
+        #self.img_mask.flat[ii]      = self.img_mask.flat[ii] + 0.5*(1 - self.img_mask.flat[ii]) 
+        self.img_mask[ii]           = self.img_mask[ii] + 0.5*(1 - self.img_mask[ii]) 
+
+        # output
+        img_std                    = dist_abs.std()
+        img_mean                   = 0 #xyz_matrix[i2].mean(axis=0)[2]
+
+
+        return img_mean, img_std 
+        
+
+
+    def growingStep(self, depths:np.ndarray, step:int, direction:str, mean:float, std:float,
+                    left:int, top:int, right:int, bottom:int, numConfInt:int=3, testRatio:float=0.95):
+        """
+        testing whether growing in a given direction and size is acceptable
+        :param depths:  uncropped depth image
+        :param step: step size to test
+        :param direction: growing direction. should be either: 'left', 'right', 'top' or  'bottom'
+        :param mean: the current mean depth of the segment
+        :param std: the current standard deviation of the segment's depths
+        :param left: the current left bound of the segment
+        :param top: the current top bound of the segment
+        :param right: the current right bound of the segment
+        :param bottom: the current bottom bound of the segment
+        :param numConfInt: number of confidence intervals threshold
+        :param testRatio: ratio of elements in growing area to be passed the confidence interval threshold
+        :return: true if growing is accepted, otherwise false
+        """
+        directions = ['left', 'right', 'bottom', 'top']
+        if direction not in directions:
+            raise ValueError('direction must be one of "left", "right", "bottom", "top"')
+
+        if step < 0:
+            raise ValueError('step must be non-negative')
+
+        if numConfInt < 1:
+            raise ValueError('numConfInt must be at least 1')
+
+        if testRatio > 1 or testRatio < 0:
+            raise ValueError('testRatio must be between 0 and 1')
+
+        data = {
+            'left': depths[bottom:top + 1, left - step:left] if step > 0 else None,
+            'right': depths[bottom:top + 1, right + 1:right + step + 1] if step > 0 else None,
+            'top': depths[top + 1:top + step + 1, left:right] if step > 0 else None,
+            'bottom': depths[bottom - step:bottom, left:right] if step > 0 else None,
+        }
+        testData = data[direction]
+        testRows, testCols = np.nonzero(testData)  # getting the indexes of all nonzero elements
+        norTestData = np.abs(testData[testRows, testCols] - mean) / std  # normalizing test data to standard values
+        test = np.nonzero(norTestData < numConfInt)[0]  # test if elements are below confidence interval threshold
+        if test.size / testData.size > testRatio:
+            return True     # growing is acceptable
+        else:
+            return False    # growing is denied
+
+    def findMaxROI(self, depths:np.ndarray, initialRoi) -> tuple[int, int, int, int]:
+        """
+        Find maximum ROI in depth image using region growing from an initial ROI
+        :param depths: entire depth image
+        :param initialRoi: initial ROI to grow from as list/tuple in the order: left, top, right, bottom
+        :return: final ROI found as a tuple in the order: left, top, right, bottom
+        """
+        height, width = depths.shape
+        left, top, right, bottom = initialRoi
+        stepLeft = stepRight = stepTop = stepBottom = 1     # initializing growing steps
+
+        while stepLeft + stepRight + stepTop + stepBottom > 0:
+            currentData = depths[bottom:top + 1, left:right + 1]
+            currentRows, currentCols = np.nonzero(currentData)   # getting the indexes of all nonzero elements
+            currentMean = currentData[currentRows, currentCols].mean()
+            currentStd = currentData[currentRows, currentCols].std()
+
+            if stepLeft > 0:    # attempting to grow to the left
+                if left - stepLeft < 0:  # growing left with current step exceeded image dimensions
+                    stepLeft = 1 if stepLeft > 1 else 0
+                elif left == 0:          # growing reached and of image, no more growing available
+                    stepLeft = 0
+                else:
+                    res = self.growingStep(depths, stepLeft, 'left', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        left -= stepLeft
+                        stepLeft *= 2   # increasing growing step for next iteration
+                    else:
+                        stepLeft = 1 if stepLeft > 1 else 0
+
+            if stepRight > 0:   # attempting to grow to the right
+                if right + stepRight > width:  # growing right with current step exceeded image dimensions
+                    stepRight = 1 if stepRight > 1 else 0
+                elif right == width - 1:       # growing reached and of image, no more growing available
+                    stepRight = 0
+                else:
+                    res = self.growingStep(depths, stepRight, 'right', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        right += stepRight
+                        stepRight *= 2   # increasing growing step for next iteration
+                    else:
+                        stepRight = 1 if stepRight > 1 else 0
+
+            if stepBottom > 0:    # attempting to grow down
+                if bottom - stepBottom < 0:  # growing down with current step exceeded image dimensions
+                    stepBottom = 1 if stepBottom > 1 else 0
+                elif bottom == 0:          # growing reached and of image, no more growing available
+                    stepBottom = 0
+                else:
+                    res = self.growingStep(depths, stepBottom, 'bottom', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        bottom -= stepBottom
+                        stepBottom *= 2   # increasing growing step for next iteration
+                    else:
+                        stepBottom = 1 if stepBottom > 1 else 0
+
+            if stepTop > 0:   # attempting to grow up
+                if top + stepTop > height:  # growing up with current step exceeded image dimensions
+                    stepTop = 1 if stepTop > 1 else 0
+                elif top == height - 1:       # growing reached and of image, no more growing available
+                    stepTop = 0
+                else:
+                    res = self.growingStep(depths, stepTop, 'top', currentMean, currentStd, left, top, right, bottom)
+                    if res:
+                        top += stepTop
+                        stepTop *= 2   # increasing growing step for next iteration
+                    else:
+                        stepTop = 1 if stepTop > 1 else 0
+
+            # print(left, top, right, bottom, '|', stepLeft, stepTop, stepRight, stepBottom)
+
+        return left, top, right, bottom    
+    
+    def find_planes(self, img):
+        "finds planes using different algo"
+        detect_type         = self.detect_type.upper()
+
+        img_mean, img_std   = 0,0             
+        if detect_type == 'P':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi)  
+
+        elif detect_type == 'W':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd_weighted(img_roi)   
+
+        elif detect_type == 'O':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_with_outliers(img_roi)  
+
+        elif detect_type == 'T': # weighted by depth
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd_weighted_4d(img_roi) 
+
+        elif detect_type == 'R':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_ransac(img_roi) 
+
+        elif detect_type == 'G': # gradients
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_using_gradients(img_roi) 
+            
+        elif detect_type == 'F':
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane          
+            img_mean, img_std   = self.fit_plane_ransac_and_grow(img)                
+
+        elif detect_type == 'S': # project all the points on the plane and color them accordingly
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane 
+            #img_mean, img_std   = self.fit_plane_svd_weighted(img_roi)      
+            # img_mean, img_std   = self.fit_plane_svd_weighted_4d(img_roi)       
+            img_mean, img_std   = self.fit_plane_and_project_the_image(img)  
+
+        elif detect_type == 'N': # project all the points on the plane and color them accordingly
+            img_roi             = self.preprocess(img)
+            img_mean, img_std   = self.fit_plane_svd(img_roi) #fit_plane_ransac(img_roi)   # initial ROI plane 
+            img_mean, img_std   = self.fit_plane_and_project_the_image_using_gradients(img)             
+                               
+            
+        #log.debug(f'camera noise           - roi mean : {img_mean}')
+        self.img_mean       = img_mean        # final measurements per frame
+        self.img_std        = img_std    
+        return True 
+
+    def process_frame(self, img):
+        "process the entire image and find the planes"
+
+        img_roi     = self.preprocess(img)
+        img3d       = self.init_img3d(img_roi)
+        imgXYZ      = self.compute_img3d(img_roi)
+        roim,rois   = self.fit_plane_with_outliers(img_roi)
+        pose        = self.convert_plane_params_to_pose()
+
+        return pose
+
+
+#%% Helpers
+def draw_axis(img, rvec, tvec, cam_mtrx, cam_dist, len = 10):
+    # unit is mm
+    try:
+        points          = np.float32([[len, 0, 0], [0, len, 0], [0, 0, len], [0, 0, 0]]).reshape(-1, 3)
+        axisPoints, _   = cv.projectPoints(points, rvec.astype(np.float32), tvec.astype(np.float32), cam_mtrx, cam_dist)
+        axisPoints      = axisPoints.squeeze().astype(np.int32)
+        img = cv.line(img, tuple(axisPoints[3].ravel()), tuple(axisPoints[0].ravel()), (0,0,255), 3)
+        img = cv.line(img, tuple(axisPoints[3].ravel()), tuple(axisPoints[1].ravel()), (0,255,0), 3)
+        img = cv.line(img, tuple(axisPoints[3].ravel()), tuple(axisPoints[2].ravel()), (255,0,0), 3)
+    except Exception as e:
+        print(e)
+        print(rvec, tvec, img.shape)
+    return img
+
+def draw_polygon(img, rvec, tvec, cam_mtrx, cam_dist, points3d):
+    # unit is mm
+    points              = np.float32(points3d).reshape(-1, 3)
+    polygon_points, _   = cv.projectPoints(points, rvec, tvec, cam_mtrx, cam_dist)
+    polygon_points      = polygon_points.squeeze().astype(np.int32)
+    img                 = cv.polylines(img, [polygon_points], True, (0, 200, 200), 1)
+
+    # To fill the polygon, use thickness=-1
+    # cv2.fillPoly(img, [pts], color)
+
+    return img
+
+def draw_cube(img, corners, imgpts):
+    imgpts = np.int32(imgpts).reshape(-1,2)
+    # draw ground floor in green
+    img = cv.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
+    # draw pillars in blue color
+    for i,j in zip(range(4),range(4,8)):
+        img = cv.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
+
+    # draw top layer in red color
+    img = cv.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
+    return img
+
+#%% ROI selector from OpenCV
+class RectSelector:
+    def __init__(self, win, callback):
+        self.win = win
+        self.callback = callback
+        cv.setMouseCallback(win, self.onmouse)
+        self.drag_start = None
+        self.drag_rect = None
+    def onmouse(self, event, x, y, flags, param):
+        x, y = np.int16([x, y]) # BUG
+        if event == cv.EVENT_LBUTTONDOWN:
+            self.drag_start = (x, y)
+            return
+        if self.drag_start:
+            if flags & cv.EVENT_FLAG_LBUTTON:
+                xo, yo = self.drag_start
+                x0, y0 = np.minimum([xo, yo], [x, y])
+                x1, y1 = np.maximum([xo, yo], [x, y])
+                self.drag_rect = None
+                if x1-x0 > 0 and y1-y0 > 0:
+                    self.drag_rect = (x0, y0, x1, y1)
+            else:
+                rect = self.drag_rect
+                self.drag_start = None
+                self.drag_rect = None
+                if rect:
+                    self.callback(rect)
+    def draw(self, vis):
+        if not self.drag_rect:
+            return False
+        x0, y0, x1, y1 = self.drag_rect
+        cv.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
+        return True
+    @property
+    def dragging(self):
+        return self.drag_rect is not None
+
+#%% Data Generator
+class DataGen:
+    def __init__(self, img_size = (640,480)):
+
+        self.frame_size     = img_size
+        self.img            = None
+        self.rect           = None  # roi  
+
+
+    def add_noise(self, img_gray, noise_percentage = 0.01):
+        "salt and pepper noise"
+        if noise_percentage < 0.001:
+            return img_gray
+
+
+        # Get the image size (number of pixels in the image).
+        img_size = img_gray.size
+
+        # Set the percentage of pixels that should contain noise
+        #noise_percentage = 0.1  # Setting to 10%
+
+        # Determine the size of the noise based on the noise precentage
+        noise_size = int(noise_percentage*img_size)
+
+        # Randomly select indices for adding noise.
+        random_indices = np.random.choice(img_size, noise_size)
+
+        # Create a copy of the original image that serves as a template for the noised image.
+        img_noised = img_gray.copy()
+
+        # Create a noise list with random placements of min and max values of the image pixels.
+        #noise = np.random.choice([img_gray.min(), img_gray.max()], noise_size)
+        noise = np.random.choice([-10, 10], noise_size)
+
+        # Replace the values of the templated noised image at random indices with the noise, to obtain the final noised image.
+        img_noised.flat[random_indices] += noise
+        
+        log.info('adding image noise')
+        return img_noised
+
+    def init_image(self, img_type = 1):
+        # create some images for test
+        w,h             = self.frame_size
+        if img_type == 1: # /
+            
+            self.img        = np.tile(np.linspace(100, 300, w), (h,1))
+
+        elif img_type == 2: # /|/
+
+            self.img        = np.tile(np.linspace(100, 200, int(w/2)), (h,2))
+         
+        elif img_type == 3: # |_|
+
+            self.img        = np.tile(np.linspace(100, 200, h).reshape((-1,1)), (1,w)) 
+        
+        elif img_type == 4: # /\
+
+            self.img        = np.tile(np.hstack((np.linspace(300, 500, w>>1),np.linspace(500, 300, w>>1))), (h,1))        
+
+        elif img_type == 5: # dome
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = (np.abs(x - w/2) + np.abs(y - h/2))/10 + 200 # less slope
+
+        elif img_type == 6: # sphere
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = np.sqrt((x - w/2)**2 + (y - h/2)**2)/10 + 200 # less slope   
+
+        elif img_type == 7: # stair
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = (np.sign(x - w/2) + np.sign(y - h/2))*5 + 200 # less slope     
+
+
+        elif img_type == 8: # corner
+
+            x,y             = np.meshgrid(np.arange(w),np.arange(h))   
+            self.img        = np.ones((h,w))*250
+            img_bool        = np.logical_and((x - w/2) < 0, (y - h/2) < 0)
+            self.img[img_bool] = 230 # quarter                            
+
+        elif img_type == 10: # flat
+
+            self.img        = np.ones((h,w))*500             
+
+        elif img_type == 11:
+            "chess board"
+            fname           = r"C:\Users\udubin\Documents\Code\opencv-4x\samples\data\left04.jpg"
+            self.img        = cv.imread(fname)
+
+        elif img_type == 12:
+            self.img = cv.imread('image_scl_001.png', cv.IMREAD_GRAYSCALE)
+            #self.img = cv.resize(self.img , dsize = self.frame_size) 
+            
+        elif img_type == 13:
+            self.img = cv.imread(r"wrappers\python\applications\planes\data\image_ddd_000.png", cv.IMREAD_GRAYSCALE)
+            #self.img = cv.resize(self.img , dsize = self.frame_size) 
+
+        elif img_type == 21:
+            self.img = cv.imread(r"C:\Data\Depth\Plane\image_scl_000.png", cv.IMREAD_GRAYSCALE)  
+            #self.img = cv.resize(self.img , dsize = self.frame_size)                                     
+            
+        #self.img        = np.uint8(self.img) 
+
+        self.img = self.add_noise(self.img, 0)
+        self.frame_size = self.img.shape[:2]      
+        return self.img
+      
+    def init_roi(self, test_type = 1):
+        "load the test case"
+        roi = [0,0,self.frame_size[0],self.frame_size[1]]
+        if test_type == 1:
+            roi = [310,230,330,250] # xlu, ylu, xrb, yrb
+        elif test_type == 2:
+            roi = [300,220,340,260] # xlu, ylu, xrb, yrb
+        elif test_type == 3:
+            roi = [280,200,360,280] # xlu, ylu, xrb, yrb            
+        elif test_type == 4:
+            roi = [220,140,420,340] # xlu, ylu, xrb, yrb      
+        elif test_type == 4:
+            roi = [200,120,440,360] # xlu, ylu, xrb, yrb            
+        return roi    
+    
+    def test_image(self):
+        "test single image depth"
+        img  = self.init_image(1)
+        roi  = self.init_roi(1)      
+
+#%% Adds display functionality to the PlaneDetector
+class PlaneDetectorDisplay(PlaneDetector):
+    def __init__(self, detect_type='p',image_size = (1280,720)):
+        super().__init__(detect_type, image_size=image_size)
+        self.detect_type    = detect_type
+        self.frame_size     = image_size
+        self.img            = None
+        self.show_dict     = {}       # show figures in 3D
+
+    def show_image_data(self, frame, display_mode = 1):
+        "converts image data to 3d color"
+
+        if display_mode == 1: # left
+            img = frame[:,:,0]
+        elif display_mode == 2: # right
+            img = frame[:,:,1]
+        elif display_mode == 3:
+            img  = cv.convertScaleAbs(frame[:,:,2], alpha=0.1)            
+        else: # depth
+            img  = cv.convertScaleAbs(frame[:,:,2], alpha=0.03)
+
+        vis     = np.uint8(img)
+        vis     = cv.cvtColor(vis, cv.COLOR_GRAY2BGR)
+        return vis  
+
+    def show_data(self, frame = None, ttl = 'Depth'):
+        "draw relevant image data"
+            
+        if frame is None :
+            log.info('No images found')
+            return False
+        
+        if len(frame.shape) > 2 and frame.shape[2]==2: # extract 3 images
+            img_show    = np.concatenate((frame[:,:,0], frame[:,:,1]), axis = 1)
+            img_show    = np.uint8(img_show)
+
+        elif len(frame.shape) == 2 and frame.dtype == 'uint16':
+            img_show    = cv.convertScaleAbs(frame, alpha=0.03)
+            img_show    = np.uint8(img_show)
+        else:   
+            img_show    = np.uint8(frame)
+
+        while img_show.shape[1] > 2000:
+            img_show    = cv.resize(img_show, (img_show.shape[1]>>1,img_show.shape[0]>>1), interpolation=cv.INTER_LINEAR)
+
+        while img_show.shape[0] < 300:
+            img_show    = cv.resize(img_show, (img_show.shape[1]<<1,img_show.shape[0]<<1), interpolation=cv.INTER_LINEAR)            
+
+        cv.imshow(ttl + ' (q-Quit)', img_show)
+        ch  = cv.waitKey(10)
+        ret = ch != ord('q')
+        return ret          
+
+    def show_image_with_axis(self, img, poses = []):
+        "draw results : axis on the image. poses are list of 6D vectors"
+        axis_number = len(poses)
+        if axis_number < 1:
+            log.error('No poses found')
+            
+        # deal with black and white
+        img_show = np.uint8(img) #.copy()
+        if len(img.shape) < 3:
+            img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+         
+        for k in range(axis_number):
+            
+            euler_angles    = poses[k][3:] # orientation in degrees
+            rvec            = Rot.from_euler('xyz',euler_angles[:3], degrees=True).as_rotvec()
+            tvec            = np.array(poses[k][:3]) #np.array(, dtype = np.float32).reshape(rvec.shape) # center of the patch
+            img_show        = draw_axis(img_show, rvec, tvec, self.cam_matrix, self.cam_distort, len = 10)
+
+        cv.imshow('Image & Axis', img_show)
+        log.info('show done')
+        ch = cv.waitKey()
+
+    # def show_image_with_roi_normals(self, img = None):
+    #     "draw results : show normals at each point of ROI"
+    #     if img is None:
+    #         log.error('No image found')
+    #         return img
+    #     if self.rect is None:  # roi
+    #         log.error('No ROI found')
+    #         return img
+    #     if self.img_roi_normal is None:
+    #         log.error('No normals found')
+    #         return img
+
+    #     # deal with black and white
+    #     img_show = np.uint8(img) #.copy()
+    #     if len(img.shape) < 3:
+    #         #img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+    #         img_show = np.tile(img_show[:,:,np.newaxis], (1,1,3))
+
+    #     # scale normals to fir RGB color space
+    #     normals         = (self.img_roi_normal + 1)*127.5
+    #     x0,y0,x1,y1     = self.rect
+    #     img_show[y0:y1,x0:x1, :] = np.uint8(normals)
+         
+    #     #cv.imshow('Image & Normals', img_show)
+    #     #log.info('show done')
+    #     #ch = cv.waitKey(0) 
+    #     return img_show       
+
+    def show_image_with_rois(self, img, roi_params_ret = []):
+        "draw results by projecting ROIs on image"
+
+        axis_number = len(roi_params_ret)
+        if axis_number < 1:
+            print('No poses found')
+            
+        # deal with black and white
+        img_show = np.uint8(img) #.copy()
+        if len(img.shape) < 3:
+            img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+         
+        for roi_p in roi_params_ret:
+
+            pose    = self.convert_roi_params_to_pose(roi_p)            
+            
+            avec    = pose[3:6] # orientation in degrees
+            levl    = pose[6]   # level
+            #R       = eulerAnglesToRotationMatrix(avec)
+            R       = Rot.from_euler('zyx',avec, degrees = True).as_matrix()
+            rvec, _ = cv.Rodrigues(R)
+            tvec    = np.array(pose[:3], dtype = np.float32).reshape(rvec.shape) # center of the patch
+            img_show= draw_axis(img_show, rvec, tvec, self.cam_matrix, self.cam_distort, len = levl)
+
+        cv.imshow('Image & Axis', img_show)
+        log.info('show done')
+        ch = cv.waitKey()
+
+    def show_points_3d_with_normal(self, img3d, pose = None):
+        "display in 3D"
+        fig = plt.figure()
+        ax  = fig.add_subplot(projection='3d')
+
+        #xs,ys,zs       = img3d[:,:,0].reshape((-1,1)), img3d[:,:,1].reshape((-1,1)), img3d[:,:,2].reshape((-1,1))
+        
+        xs,ys,zs       = img3d[:,0].reshape((-1,1)), img3d[:,1].reshape((-1,1)), img3d[:,2].reshape((-1,1))
+        ax.scatter(xs, ys, zs, marker='.')
+        
+        if pose is not None:
+            pose       = pose.flatten()
+            vnorm      = pose[3:6].flatten()*10
+            xa, ya, za = [pose[0], pose[0]+vnorm[0]], [pose[1], pose[1]+vnorm[1]], [pose[2], pose[2]+vnorm[2]]
+            ax.plot(xa, ya, za, 'r', label='Normal')
+
+
+        ax.set_xlabel('X [mm]')
+        ax.set_ylabel('Y [mm]')
+        ax.set_zlabel('Z [mm]')
+        ax.set_aspect('equal', 'box')
+        plt.show()
+
+    def show_rois_3d_with_normals(self, roi_params_ret = [], roi_init = None):
+        "display in 3D each ROI region with split"
+        
+        if len(roi_params_ret) < 1:
+            log.info('roi_params_ret is empty')
+            return
+
+        # extract the initial ROI - to make the show more compact
+        roi_init       = [0,0,self.frame_size[1], self.frame_size[0]] if roi_init is None else roi_init
+        x0,y0,x1,y1    = roi_init
+
+        if self.img_xyz is None:
+            log.info('Need init')
+            return      
+
+        img3d          = self.img_xyz[y0:y1,x0:x1,:] 
+        xs,ys,zs       = img3d[:,:,0].reshape((-1,1)), img3d[:,:,1].reshape((-1,1)), img3d[:,:,2].reshape((-1,1))
+
+        fig = plt.figure()
+        ax = fig.add_subplot(projection='3d')
+        ax.scatter(xs, ys, zs, marker='.')
+        
+        for roi_p in roi_params_ret:
+            pose       = self.convert_roi_params_to_pose(roi_p)
+            pose       = pose.flatten()
+            # R          = Rot.from_euler('zyx',pose[3:6],degrees=True).as_matrix()
+            # vnorm      = R[:,2]*pose[6]
+            vnorm      = pose[3:6]*pose[6]
+            #log.info(str(vnorm))
+            xa, ya, za = [pose[0], pose[0]+vnorm[0]], [pose[1], pose[1]+vnorm[1]], [pose[2], pose[2]+vnorm[2]]
+            ax.plot(xa, ya, za, 'r', label='Normal')
+
+
+        ax.set_xlabel('X [mm]')
+        ax.set_ylabel('Y [mm]')
+        ax.set_zlabel('Z [mm]')
+        ax.set_aspect('equal', 'box')
+        plt.show() #block=False)  
+
+    def show_3d_point_cloud(self):
+        "displays point cloud in real time"
+        if self.rect is None or self.rect_xyz is None:
+            return 
+        X = self.rect_xyz[:,:3]
+        if not 'line' in self.show_dict : #len(self.show_dict) < 1:
+
+            fig_num     = int(self.rect[0]+self.rect[1])
+            fig         = plt.figure(fig_num)
+            plt.clf() 
+            #fig.canvas.set_window_title('3D Scene')
+            try:
+                ax = fig.gca(projection='3d')
+            except:
+                ax = fig.add_subplot(projection = '3d')
+            fig.tight_layout()
+            fig.suptitle(f'ROI : {self.rect[0]},{self.rect[1]}')
+            ax.set_proj_type('ortho')
+            #lineGray,      = ax.plot3D(X[:,0], X[:,1], X[:,2], color='k')
+            lineGray    = ax.scatter(X[:,0], X[:,1], X[:,2])
+
+            ax.xaxis.set_pane_color((0.0, 0.0, 0.0, 0.0))
+            ax.yaxis.set_pane_color((0.0, 0.0, 0.0, 0.0))
+            ax.zaxis.set_pane_color((0.0, 0.0, 0.0, 0.0))             
+
+            ax.set_xlabel('x')
+            ax.set_ylabel('y')
+            ax.set_zlabel('z')
+            ax.invert_yaxis()
+            ax.view_init(elev=-70, azim=-90)
+         
+            lims = 500
+            ax.set_xlim(X[:,0].min()*0.8, X[:,0].max()*1.2)
+            ax.set_ylim(X[:,1].min()*0.8, X[:,1].max()*1.2)
+            ax.set_zlim(X[:,2].min()*0.9, X[:,2].max()*1.2)
+            plt.ion()
+            plt.show(block = False)
+            self.show_dict = {'fig':fig, 'ax':ax, 'line':lineGray}
+        else:
+            #self.show_dict['line'].set_data(X[:,0], X[:,1])
+            #self.show_dict['line'].set_3d_properties(X[:,2])
+            self.show_dict['line']._offsets3d = (X[:,0], X[:,1], X[:,2])
+            #self.show_dict['ax'].set_ylim(low_limit, high_limit)
+        
+        self.show_dict['fig'].canvas.draw_idle()
+        self.show_dict['fig'].canvas.flush_events()
+        return              
+
+    def show_axis(self, vis):
+        "draw axis after plane estimation"
+        if self.plane_params is None:
+            return vis
+        
+        #rvec = self.plane_params/np.sum(self.plane_params**2) # normalize
+        rvec = self.convert_plane_params(self.plane_params)
+        #rvec = self.convert_plane_to_rvec(self.plane_params)
+        
+        tvec = self.plane_center
+        vis  = draw_axis(vis, rvec, tvec, self.cam_matrix, self.cam_distort, len = 50)
+        return vis
+    
+    def show_text(self, vis):
+        "draw text plane estimation"
+        err_mean, err_std = self.img_mean, self.img_std
+        if err_mean is None:
+            return vis
+        
+        if self.rect is None:
+            return vis
+        
+        x0, y0, x1, y1 = self.rect
+        txt = f'{self.detect_type}:{err_mean:.2f}:{err_std:.3f}'
+        #if self.detect_type == 'F':
+        #    txt = f'{self.detect_type}:{self.img_fill:.2f} %'
+        vis = draw_str(vis,(x0,y0-10),txt)
+
+        return vis 
+
+    def show_rect_and_text(self, vis):
+        "draw axis after plane estimation"
+        err_mean, err_std = self.img_mean, self.img_std
+        if err_mean is None:
+            return vis
+        
+        if self.rect is None:
+            return vis
+        
+        x0, y0, x1, y1 = self.rect
+        clr = (0, 0, 0) if vis[y0:y1,x0:x1].mean() > 128 else (240,240,240)
+        vis = cv.rectangle(vis, (x0, y0), (x1, y1), clr, 2)
+        txt = f'{self.detect_type}:{err_mean:.2f}-{err_std:.3f}'
+        if self.detect_type == 'F':
+            txt = f'{self.detect_type}:{self.img_fill:.2f} %'
+        vis = draw_str(vis,(x0,y0-10),txt)
+
+        return vis 
+
+    def show_rect_and_axis_projected(self, vis):
+        "projects rectangle on the plane"
+        if self.rect is None:
+            return vis
+        if self.plane_params is None:
+            return vis
+        
+        rvec = self.convert_plane_params(self.plane_params)
+        tvec = self.plane_center
+
+        vis  = draw_axis(vis, rvec, tvec, self.cam_matrix, self.cam_distort, len = 50)        
+        vis  = draw_polygon(vis, rvec, tvec, self.cam_matrix, self.cam_distort, self.rect_3d)
+    
+        return vis 
+
+    def show_mask(self, img):
+        "draw image mask"
+
+        # deal with black and white
+        img_show = np.uint8(img) #.copy()
+        if len(img.shape) < 3:
+            img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+
+        if not np.all(self.img_mask.shape[:2] == img_show.shape[:2]):
+            log.error('mask and image size are not equal')
+            return img_show
+        
+        img_show[self.img_mask > 0.75] = self.color_mask
+        return img_show
+    
+    def show_polygon(self, img):
+        "shows polygon on top of the image"
+        # Define polygon vertices (e.g., a triangle)
+        if self.polygon is None:
+            return img
+        
+        polygon = np.array(self.polygon, np.int32)
+
+        # Reshape for OpenCV (required shape: (n_points, 1, 2))
+        polygon = polygon.reshape((-1, 1, 2))
+
+        # Draw the polygon on the image
+        img     = cv.polylines(img, [polygon], isClosed=True, color=(0, 255, 0), thickness=2)
+
+        return img    
+    
+    def show_image_colored_by_normals(self, img = None):
+        "draw results : show normals at each point of ROI"
+        if img is None:
+            log.error('No image found')
+            return img
+        if self.rect is None:  # roi
+            log.error('No ROI found')
+            return img
+        if self.img_roi_normal is None:
+            log.error('No normals found')
+            return img
+
+        # deal with black and white
+        img_show    = np.uint8(img).copy()
+        if len(img.shape) < 3:
+            #img_show = cv.applyColorMap(img_show, cv.COLORMAP_JET)
+            img_show = np.tile(img_show[:,:,np.newaxis], (1,1,3))
+
+        # scale normals to fir RGB color space
+        normals                  = (self.img_roi_normal + 1)*127.5
+        x0,y0,x1,y1              = self.rect
+        #img_show[y0:y1,x0:x1, :] = np.uint8(normals)
+        img_show[y0:y1,x0:x1, :] = cv.addWeighted(img_show[y0:y1,x0:x1, :], 0.2, np.uint8(normals), 0.8, 0)
+
+        # make black outside roi or non valid
+        non_valid_pixels = img[:,:,2] < 1   
+        img_show[non_valid_pixels,0] = 0     
+        img_show[non_valid_pixels,1] = 0     
+        img_show[non_valid_pixels,2] = 0     
+         
+        #cv.imshow('Image & Normals', img_show)
+        #log.info('show done')
+        #ch = cv.waitKey(0) 
+        return img_show      
+
+    def show_scene(self, vis):
+        "draw ROI and Info"
+
+        #vis = self.show_rect_and_text(vis)
+        #vis = self.show_axis(vis)
+
+        vis = self.show_mask(vis)
+
+        #vis = self.show_image_colored_by_normals(vis)        
+        vis = self.show_rect_and_axis_projected(vis)
+        vis = self.show_text(vis)
+
+
+
+        return vis
+
+        
+
+# ----------------------
+#%% Tests
+class TestPlaneDetector(unittest.TestCase):
+
+    def test_image_show(self):
+        "checking image show"
+        d       = DataGen()
+        img     = d.init_image(1)
+        p       = PlaneDetectorDisplay()
+        poses   = [[0,0,100,0,0,45,10]]
+        p.show_image_with_axis(img,poses)
+        self.assertFalse(d.img is None)    
+
+    def test_init_img3d(self):
+        "XYZ point cloud structure init"
+        d       = DataGen()
+        img     = d.init_image(1)
+        p       = PlaneDetectorDisplay()
+        isOk    = p.init_image(img)
+        img3d   = p.init_img3d()
+        self.assertFalse(img3d is None)    
+
+    def test_compute_img3d(self):
+        "XYZ point cloud structure init and compute"
+        d       = DataGen()
+        img     = d.init_image(1)        
+        p       = PlaneDetectorDisplay()
+        img3d   = p.init_img3d(img)
+        imgXYZ  = p.compute_img3d(img)
+        self.assertFalse(imgXYZ is None)     
+
+    def test_show_img3d(self):
+        "XYZ point cloud structure init and compute"
+        d       = DataGen()
+        img     = d.init_image(1)        
+        p       = PlaneDetectorDisplay()
+        img3d   = p.init_img3d(img)
+        imgXYZ  = p.compute_img3d(img)
+        roi     = p.init_roi(1)
+        x0,y0,x1,y1 = roi
+        roiXYZ    = imgXYZ[y0:y1,x0:x1,:]
+        p.show_points_3d_with_normal(roiXYZ)
+        self.assertFalse(imgXYZ is None)  
+
+    def test_convert_roi_to_points(self):
+        "computes 3d points of the ROI"
+        im_size     = (640,480)
+        d           = DataGen(img_size=im_size)
+        img         = d.init_image(1)        
+        p           = PlaneDetectorDisplay(image_size=im_size)
+        roi         = p.init_roi(1)
+        rect3d      = p.convert_roi_to_points(img, roi_rect=roi)
+        self.assertTrue(rect3d.shape[0] > 4)
+                     
+    def test_fit_plane_svd(self):
+        "computes normal to the ROI"
+        d           = DataGen()
+        img         = d.init_image(5)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_svd(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.matrix_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)         
+
+    def test_fit_plane_depth_image(self):
+        "computes normal to the ROI"
+        d           = DataGen()
+        img         = d.init_image(13)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_svd(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)  
+
+    def test_fit_plane_with_outliers(self):
+        "computes normal to the ROI"
+        d           = DataGen()
+        img         = d.init_image(13)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_with_outliers(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)  
+
+    def test_fit_plane_ransac(self):
+        "computes with ransac"
+        d           = DataGen()
+        img         = d.init_image(6)        
+        p           = PlaneDetectorDisplay()
+        roi         = p.init_roi(4)
+        img_roi     = p.preprocess(img)
+        roim,rois   = p.fit_plane_ransac(img_roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)  
+
+    def test_fit_plane_using_gradients(self):
+        "computes normal of the ROI using gradients and then cross product"
+        im_size     = (640,480)
+        d           = DataGen(img_size=im_size)
+        img         = d.init_image(1)     # corner     
+        p           = PlaneDetectorDisplay(image_size=im_size)        
+        roi         = p.init_roi(13)      # image center
+        roim,rois   = p.fit_plane_using_gradients(img, roi)
+        pose        = p.convert_plane_params_to_pose()
+        p.show_image_colored_by_normals(img)
+        p.show_image_with_axis(img, pose)
+        p.show_points_3d_with_normal(p.rect_xyz, pose)
+        self.assertTrue(pose[0][2] > 0.01)         
+
+    def test_split_roi(self):
+        "computes ROIS and splits if needed"
+        p       = PlaneDetector()
+        p.MIN_STD_ERROR = 0.1
+        img     = p.init_image(13)
+        roi     = p.init_roi(4)
+        img3d   = p.init_img3d(img)
+        imgXYZ  = p.compute_img3d(img)
+        roi_list= p.fit_and_split_roi_recursively(roi)
+        p.show_rois_3d_with_normals(roi_list, roi)
+        p.show_image_with_rois(p.img, roi_list)
+
+        for roi_s in roi_list:
+            self.assertFalse(roi_s['error'] > 0.01) 
+
+    def test_plane_fit(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('P') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)     
+
+    def test_plane_fit_weighted(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('W') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data()  
+            if not ret: break      
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret) 
+
+    def test_plane_fit_outliers(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 421            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('O') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(41)  # center image
+        while ret:
+            ret,img     = d.get_data()  
+            if not ret: break      
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret) 
+
+    def test_plane_fit_ransac(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('R') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)   
+
+    def test_plane_fit_weighted_4d(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('T') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(53)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)   
+
+    def test_plane_fit_using_gradients(self):
+        "plane fit data"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('G') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(33)  # center image
+        while ret:
+            ret,img     = d.get_data() 
+            if not ret: break        
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+        d.finish()
+        self.assertFalse(ret)               
+
+    def test_multi_plane_fit(self):
+        "plane fit data multiple ROIs"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        roi_types       = [21,22,23,31,32,33,52,53]
+        pm              = []
+        for rt in roi_types:
+            p           = PlaneDetectorDisplay('T') #estimator_type=self.estim_type, estimator_id=estim_ind)
+            roi         = p.init_roi(rt)  # center image
+            pm.append(p)
+
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break
+            vis         = pm[0].show_image_data(img, display_mode=3)
+            for p in pm:         
+                retp        = p.find_planes(img)
+                vis         = p.show_scene(vis)
+
+            ret         = p.show_data(vis) 
+
+        d.finish()
+        self.assertFalse(ret)   
+
+    def test_grid_plane_fit(self):
+        "plane fit data multiple ROIs in grid"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        
+        "generate grid of trackers"
+        nx, ny          = 10, 10
+        w,h             = 1280, 720     
+        dx, dy          = int(w/(nx+1)), int(h/(ny+1))
+        wx,wy           = int(dx*0.4), int(dy*0.4)
+        pm              = []
+        for ix in range(nx):
+            for iy in range(ny):
+                x0, y0      = (ix+1)*dx, (iy+1)*dy
+                #x1, y1      = min((ix+1)*dx, w-1), min((iy+1)*dy, h-1)
+                rect        = (x0-wx,y0-wy,x0+wx,y0+wy)
+                p           = PlaneDetectorDisplay('P') #estimator_type=self.estim_type, estimator_id=estim_ind)
+                p.rect      = rect  # center image
+                pm.append(p)                             
+
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break
+            vis         = pm[0].show_image_data(img, display_mode=3)
+            for p in pm:         
+                retp        = p.find_planes(img)
+                vis         = p.show_scene(vis)
+
+            ret         = p.show_data(vis) 
+
+        d.finish()
+        self.assertFalse(ret)        
+
+
+    def test_plane_fit_with_show_3d(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 421            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('P') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(41)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=3)
+            vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            p.show_3d_point_cloud()
+        d.finish()
+        self.assertFalse(ret)      
+
+    def test_plane_fit_colored_by_normals(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 422           # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('G') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(0)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_image_colored_by_normals(vis)
+            #vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            #p.show_3d_point_cloud()
+            
+        d.finish()
+        self.assertFalse(ret)      
+
+    def test_plane_fit_with_grow(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 422           # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('F') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_mask(vis)
+            #vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            #p.show_3d_point_cloud()
+            
+        d.finish()
+        self.assertFalse(ret)       
+
+    def test_plane_fit_with_project_on_image(self):
+        "plane fit data and show 3d data"
+        d               = DataSourceMovie()
+        srcid           = 422           # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        p               = PlaneDetectorDisplay('S') #estimator_type=self.estim_type, estimator_id=estim_ind)
+        roi             = p.init_roi(21)  # center image
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break           
+            retp        = p.find_planes(img)
+            vis         = p.show_image_data(img, display_mode=1)
+            vis         = p.show_mask(vis)
+            #vis         = p.show_scene(vis)
+            ret         = p.show_data(vis) & ret
+            #p.show_3d_point_cloud()
+            
+        d.finish()
+        self.assertFalse(ret) 
+
+    def test_multi_plane_fit_with_project_on_image(self):
+        "plane fit data multiple ROIs with image coloring per ROI mask"
+        d               = DataSourceMovie()
+        srcid           = 422            # 421,422, 423-ok
+        ret             = d.init_video(srcid)
+        roi_types       = [21,31,52,33] #22,23,31,32,33,52,53]
+        pm              = []
+        for rt in roi_types:
+            p           = PlaneDetectorDisplay('S') #estimator_type=self.estim_type, estimator_id=estim_ind)
+            roi         = p.init_roi(rt)  # center image
+            pm.append(p)
+
+        while ret:
+            ret,img     = d.get_data()
+            if not ret: break
+            vis         = pm[0].show_image_data(img, display_mode=3)
+            for p in pm:         
+                retp        = p.find_planes(img)
+                vis         = p.show_mask(vis)
+
+            ret         = p.show_data(vis) 
+
+        d.finish()
+        self.assertFalse(ret)                                                            
+
+# ----------------------
+#%% Run Test
+def RunTest():
+    #unittest.main()
+    #suite = unittest.TestSuite()
+    suite = TestPlaneDetector()
+    #suite.test_image_show() # ok
+    #suite.test_init_img3d()  # ok
+    #suite.test_compute_img3d() # ok
+    #suite.test_show_img3d() # ok
+    #suite.test_convert_roi_to_points()  # ok
+
+
+    #suite.test_fit_plane_svd() # ok
+    #suite.test_fit_plane_depth_image() #
+    #suite.test_fit_plane_with_outliers() 
+    #suite.test_fit_plane_ransac()  
+    #suite.test_fit_plane_using_gradients()  # ok 
+    #suite.test_split_roi() 
+
+    #suite.test_plane_fit() # ok
+    #suite.test_plane_fit_weighted() # ok
+    #suite.test_plane_fit_outliers() # ok
+    #suite.test_plane_fit_with_show_3d() # ok
+    
+    #suite.test_plane_fit_ransac() # ok
+    #suite.test_plane_fit_weighted_4d() # ok
+    #suite.test_multi_plane_fit() # ok
+    #suite.test_plane_fit_using_gradients() # ok
+    #suite.test_plane_fit_colored_by_normals() # ok
+    #suite.test_plane_fit_with_grow() #
+    #suite.test_grid_plane_fit() # ok
+    #suite.test_plane_fit_with_project_on_image() # ok
+    suite.test_multi_plane_fit_with_project_on_image() # ok
+    
+
+   
+    # runner = unittest.TextTestRunner()
+    # runner.run(suite)    
+
+# ----------------------
+#%% App
+class PlaneApp:
+    def __init__(self):
+        self.cap            = RealSense() #
+        self.cap.set_display_mode('d16')
+        #self.cap.set_exposure(1000)
+        self.frame          = None
+        self.rect           = None
+        self.paused         = False
+        self.trackers       = []
+
+        self.camera_bf      = self.cap.get_bf() # for depth conversion (should be in meter)
+        self.args          = process_arguments()
+        self.args.scale    = 0.5
+
+        # init
+        self.model        = foundation_stereo_algo_init(self.args)
+
+        self.show_dict      = {} # hist show
+
+        self.detect_type    = 'G'
+        self.show_type      = 'depth' # left, depth
+        self.win_name       = 'Plane Detector (q-quit, c-clear, a,r,p,o,g,f,t,s,n,w 0-depth,1-left,2-right)'
+
+        cv.namedWindow(self.win_name )
+        self.rect_sel       = RectSelector(self.win_name , self.on_rect)
+        self.run()
+
+    def on_rect(self, rect):
+        "remember ROI defined by user"
+        #self.define_roi(self.frame, rect)
+        tracker             = PlaneDetectorDisplay() #estimator_type=self.estim_type, estimator_id=estim_ind)
+        tracker.rect        = rect
+        tracker.detect_type = self.detect_type
+        self.trackers.append(tracker)        
+        log.info(f'Adding plane estimator at  : {rect}') 
+
+    def generate_grid_trackers(self):
+        "generate grid of trackers"
+        nx, ny          = 10, 10
+        w,h             = self.cap.frame_size     
+        dx, dy          = int(w/nx), int(h/ny)
+        for ix in range(nx):
+            for iy in range(ny):
+                x0, y0 = ix*dx, iy*dy
+                x1, y1 = min((ix+1)*dx, w-1), min((iy+1)*dy, h-1)
+                rect    = (x0,y0,x1,y1)
+                self.on_rect(rect)
+
+    def process_image(self, img_depth):
+        "makes measurements"
+        for tracker in self.trackers:
+            tracker.find_planes(img_depth) 
+
+    def show_scene(self, frame, img_depth_fs):
+        "draw ROI and Info"
+        if self.show_type == 'left':
+            vis     = frame[:,:,0].astype(np.uint8)
+        elif self.show_type == 'right':
+            vis     = frame[:,:,1].astype(np.uint8)   
+        elif self.show_type == 'rs':
+            vis     = cv.convertScaleAbs(frame[:,:,2], alpha=0.1).astype(np.uint8)                     
+        else:
+            vis     = cv.convertScaleAbs(img_depth_fs, alpha=0.1).astype(np.uint8)
+
+        vis     = cv.cvtColor(vis, cv.COLOR_GRAY2BGR)
+        self.rect_sel.draw(vis)
+
+        for tracker in self.trackers:
+            vis = tracker.show_scene(vis) 
+
+        return vis 
+    
+    def show_histogram(self, img):
+        "show roi histgram"
+        if self.rect is None:
+            #print('define ROI')
+            return 0
+        
+        x0, y0, x1, y1 = self.rect
+        img_roi = img[y0:y1,x0:x1].astype(np.float32)
+        # Compute histogram
+        hist, bins = np.histogram(img_roi.flatten(), bins=1024, range=[0, 2**15])
+
+        if not 'fig' in self.show_dict : #len(self.show_dict) < 1:
+            fig, ax = plt.subplots()
+            fig.set_size_inches([24, 16])
+            ax.set_title('Histogram (Depth)')
+            ax.set_xlabel('Bin')
+            ax.set_ylabel('Frequency')
+            lineGray, = ax.plot(bins[:-1], hist, c='k', lw=3)
+            ax.set_xlim(bins[0], bins[-1])
+            ax.set_ylim(0, max(hist)+10)
+            plt.ion()
+            #plt.show()
+
+            self.show_dict = {'fig':fig, 'ax':ax, 'line':lineGray}
+        else:
+            self.show_dict['line'].set_ydata(hist)
+        
+        self.show_dict['fig'].canvas.draw()
+        return    
+
+    def run(self):
+        while True:
+            playing = not self.paused and not self.rect_sel.dragging
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = frame.copy()
+
+            # depth
+            img_left, img_right, img_depth_rs = self.frame[:,:,0], self.frame[:,:,1], self.frame[:,:,2]    
+
+            img_disparity   = foundation_stereo_algo(self.args, self.model, img_left, img_right)
+            img_depth_fs    = convert_disparity_to_depth(self.camera_bf, img_disparity)
+
+            
+            #self.statistics(frame)
+            self.process_image(img_depth_fs)
+
+            vis     = self.show_scene(frame, img_depth_fs)
+            cv.imshow(self.win_name , vis)
+            ch = cv.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
+            elif ch == ord('a'):
+                self.detect_type = 'A' 
+                log.info(f'Detect type : {self.detect_type}')
+            elif ch == ord('r'):
+                self.detect_type = 'R'  
+                log.info(f'Detect Ransac : {self.detect_type}')
+            elif ch == ord('p'):
+                self.detect_type = 'P'  
+                log.info(f'Detect svd : {self.detect_type}')
+            elif ch == ord('o'):
+                self.detect_type = 'O'  
+                log.info(f'Detect with outliers : {self.detect_type}') 
+            elif ch == ord('g'):
+                self.detect_type = 'G'    
+                log.info(f'Detect gradients : {self.detect_type}')   
+            elif ch == ord('f'):
+                self.detect_type = 'F'    
+                log.info(f'Detect fit and grow : {self.detect_type}')     
+            elif ch == ord('w'):
+                self.detect_type = 'W'    
+                log.info(f'Detect weighted svd : {self.detect_type}')     
+            elif ch == ord('s'):
+                self.detect_type = 'S'    
+                log.info(f'Detect entire image : {self.detect_type}')    
+            elif ch == ord('n'):
+                self.detect_type = 'N'    
+                log.info(f'Detect entire image using normals : {self.detect_type}')                                              
+            elif ch == ord('t'):
+                self.detect_type = 'T'    
+                log.info(f'Detect weighted svd 4d : {self.detect_type}')                              
+            elif ch == ord('0'):
+                self.show_type = 'rs'      
+                log.info(f'Show type : {self.show_type}')                               
+            elif ch == ord('1'):
+                self.show_type = 'left' 
+                log.info(f'Show type : {self.show_type}')   
+            elif ch == ord('2'):
+                self.show_type = 'right'   
+                log.info(f'Show type : {self.show_type}')   
+            elif ch == ord('3'):
+                self.show_type = 'fs'   
+                log.info(f'Show type : {self.show_type}')                  
+            elif ch == ord('m'):
+                self.generate_grid_trackers()                                             
+            elif ch == ord('c'):
+                if len(self.trackers) > 0:
+                    t = self.trackers.pop()
+            elif ch == 27 or ch == ord('q'):
+                break              
+
+
+if __name__ == '__main__':
+    #print(__doc__)
+
+    #RunTest()
+    PlaneApp()
+
+
+
diff --git a/scripts/synthetic_data_manager.py b/scripts/synthetic_data_manager.py
new file mode 100644
index 0000000..5f274c2
--- /dev/null
+++ b/scripts/synthetic_data_manager.py
@@ -0,0 +1,366 @@
+'''
+
+Dataset management for Synthetic stereo dataset.
+
+Loads RealSense IR stereo pairs left, right images
+and Depth depthmap and computes the ground-truth from chess baord pattern in the left image.
+The png file have 3 channels: first chnnel is left, second is right, third is depth.
+
+The left channel contains images of the chess board pattern captured by the left RealSense camera.
+, which is used for computing the synthetic depth ground truth. 
+The right channel contains the corresponding images from the right RealSense camera. 
+The depth channel contains the depth maps obtained from the RealSense sensor, 
+which can be used for comparison against the synthetic depth computed from the chessboard pattern.
+
+Expected directory layout (one or more session folders under root):
+    <root>/
+      <session>/
+        405/
+          <type_1>/
+            image_d16_<idx>.png          # left, right and depth image  (uint16)
+            image_d16_<idx>.png          # left, right and depth image  (uint16)
+          <type_2>/
+            image_d16_<idx>.png          # left, right and depth image  (uint16)            
+
+
+Only samples that have BOTH a realsense pair AND a matching zivid depth are
+included. Sessions that lack a zivid subfolder (e.g. freedrive-only captures)
+are silently skipped.
+
+Output dict keys (same as faro_data_manager for compatibility):
+    left        : numpy array  (H, W)   uint8/uint16 IR
+    right       : numpy array  (H, W)   uint8/uint16 IR
+    depth_syn   : numpy array  (H, W)   float32, mm  ← Synthetic depth from chessboard pattern (empty / zeros if absent)
+    depth_rs    : numpy array  (H, W)   float32, mm  (empty / zeros if absent)
+
+'''
+
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+import os
+import glob
+import unittest
+import logging as log
+import yaml
+
+from object_chessboard import ObjectChessboard
+
+# --------------------------------
+# 405 / 1280x720
+CAMERA_MATRIX_RS = np.array([
+    [644.471, 0, 649.253],
+    [0, 644.471, 365.398],
+    [0, 0, 1]
+])
+DIST_COEFFS_RS = np.array([ 0.0,  -0.0,     -0.0,     0.0,    -0.0])
+
+
+
+# --------------------------------
+#%% Data source
+class DataSource:
+
+    def __init__(self):
+        self.gray_scale_input   = False
+        self.depth_estimator    = ObjectChessboard()  # for synthetic GT depth estimation from chessboard pattern
+        self.imgs               = []   # list of dicts: {packed_png}
+        log.info('Source is defined')
+
+    def init_directory(self, input_rectified='', gray_scale_input=False, sub_indexes=None):
+        """Scan root for packed synthetic PNG files and populate self.imgs.
+
+        Expected layout (one or more sessions):
+            <root>/<session>/405/<type>/image_d16_<idx>.png
+        """
+        if len(input_rectified) < 3:
+            input_rectified = r'C:\Work\Data\DepthRS\ffs'
+
+        self.gray_scale_input = gray_scale_input
+        self.imgs = []
+
+        if not os.path.isdir(input_rectified):
+            log.error(f"Directory not found: {input_rectified}")
+            return 0
+
+        patterns = [
+            os.path.join(input_rectified, '**', '405', '*', 'image_d16_*.png'),  # recursive, supports <root>/405/... and <root>/<session>/405/...   # legacy/session path
+        ]
+
+        packed_paths = []
+        for pattern in patterns:
+            packed_paths.extend(glob.glob(pattern, recursive=True))
+        packed_paths = sorted(set(packed_paths))
+
+        for packed_path in packed_paths:
+            self.imgs.append({'packed_png': packed_path})
+
+        if sub_indexes is not None:
+            self.imgs = [self.imgs[i] for i in sub_indexes]
+
+        log.info(f"DataSource: found {len(self.imgs)} samples in {input_rectified}")
+        return len(self.imgs)
+
+    def get_item(self, index: int, debug: bool = False):
+        """Return one sample as a dict with left, right, depth_syn, depth_rs."""
+        output_str = {
+            "left": [],
+            "right": [],
+            "depth_syn": [],
+            "depth_rs": [],
+            "rgb": np.array([], dtype=np.uint8)
+        }
+
+        entry = self.imgs[index]
+
+        packed_img = cv2.imread(entry['packed_png'], cv2.IMREAD_UNCHANGED)
+        if packed_img is None:
+            log.warning(f"Failed to load sample {index}: {entry['packed_png']}")
+            return output_str
+
+        if packed_img.ndim != 3 or packed_img.shape[2] < 3:
+            log.warning(f"Invalid packed PNG format (expected 3 channels): {entry['packed_png']}")
+            return output_str
+
+        left_img  = packed_img[:, :, 0]
+        right_img = packed_img[:, :, 1]
+        depth_rs  = packed_img[:, :, 2].astype(np.float32)
+
+        # Synthetic GT can be computed from chessboard if available;
+        depth_syn                 = self.get_synthetic_depth(left_img)
+
+        output_str["left"]        = left_img
+        output_str["right"]       = right_img
+        output_str["depth_syn"]   = depth_syn
+        output_str["depth_rs"]    = depth_rs
+
+        if debug:
+            img_list = [left_img, right_img, depth_rs, depth_syn]
+            ttl_list = ['left', 'right', 'depth RS (mm)', 'depth SYN (mm)']
+            self.show_subset(img_list, ttl_list)
+
+        return output_str
+    
+    def get_synthetic_depth(self, left_img):
+        """Compute synthetic depth from chessboard pattern in the left image."""
+        result = self.depth_estimator.estimate_camera_pose(left_img, camera_matrix = CAMERA_MATRIX_RS, dist_coeffs = DIST_COEFFS_RS)
+        if result["success"]:
+            XYZ, projected_points = self.depth_estimator.get_grid_in_camera_coordinates(
+                rvec=result['rvec'],
+                tvec=result['tvec'],
+                camera_matrix=CAMERA_MATRIX_RS,
+                dist_coeffs=DIST_COEFFS_RS
+            )
+            depth_syn = self.project_3d_to_camera(XYZ, CAMERA_MATRIX_RS, DIST_COEFFS_RS, frame_size = left_img.shape)  # Project back to image space to get depth map
+            return depth_syn
+        else:
+            log.warning("Failed to estimate camera pose for synthetic depth computation.")
+            return np.zeros_like(left_img, dtype=np.float32)
+
+    def get_item_projected(self, index: int, debug: bool = False):
+        """Compatibility wrapper for synthetic data; returns the same as get_item."""
+        return self.get_item(index=index, debug=debug)
+
+    def compute_depth_error(self, depth_pred, depth_gt, depth_mask=None):
+        """Compute absolute depth error between prediction and GT."""
+        depth_pred = depth_pred.astype(np.float32)
+        depth_gt   = depth_gt.astype(np.float32)
+        depth_error = np.zeros_like(depth_pred)
+        mask = np.ones_like(depth_pred, dtype=bool) if depth_mask is None else depth_mask
+        valid = np.logical_and(depth_gt > 0, mask)
+        valid = np.logical_and(depth_pred > 0, valid)
+        depth_error[valid] = (depth_pred[valid] - depth_gt[valid])
+        return depth_error
+
+    def show_subset(self, img_list, ttl_list, vmin=None, vmax=None, save_path='', fig_name=''):
+        """Display a list of images in a grid."""
+        img_num = len(img_list)
+        col_num = min(img_num, 3)
+        row_num = (img_num + col_num - 1) // col_num
+        fig, axes = plt.subplots(row_num, col_num, sharey=True, sharex=True)
+        axes = np.array(axes).reshape(row_num, col_num)
+        for k in range(img_num):
+            ri, ci = k // col_num, k % col_num
+            axes[ri, ci].imshow(img_list[k], vmin=vmin, vmax=vmax)
+            axes[ri, ci].set_title(ttl_list[k])
+        for k in range(img_num, row_num * col_num):
+            axes[k // col_num, k % col_num].axis('off')
+        if save_path and os.path.exists(save_path):
+            fig.savefig(os.path.join(save_path, fig_name + ".png"))
+        plt.show(block=False)
+
+    def save_data_to_folder(self, output_str, output_directory):
+        """Save sample dict to PNG files on disk."""
+        os.makedirs(output_directory, exist_ok=True)
+
+        paths = {
+            "img_left.png":        output_str["left"],
+            "img_right.png":       output_str["right"],
+            "img_depth_syn.png":   output_str["depth_syn"].astype(np.uint16),
+            "img_depth_rs.png":    output_str["depth_rs"].astype(np.uint16),
+        }
+        success = True
+        for fname, img in paths.items():
+            out = cv2.imwrite(os.path.join(output_directory, fname), img,
+                              [cv2.IMWRITE_PNG_COMPRESSION, 0])
+            success = success and out
+
+        if output_str["rgb"] is not None and np.asarray(output_str["rgb"]).size > 0:
+            cv2.imwrite(os.path.join(output_directory, "img_rgb.png"),
+                        output_str["rgb"], [cv2.IMWRITE_PNG_COMPRESSION, 0])
+
+        return success
+    
+
+    def save_to_ply(self, points: np.ndarray, filename: str):
+        """Save a point cloud to a PLY file for visualization."""
+        with open(filename, 'w') as f:
+            f.write('ply\n')
+            f.write('format ascii 1.0\n')
+            f.write(f'element vertex {len(points)}\n')
+            f.write('property float x\n')
+            f.write('property float y\n')
+            f.write('property float z\n')
+            f.write('end_header\n')
+            for x, y, z in points:
+                f.write(f'{x} {y} {z}\n')
+
+    def project_camera_to_3d(self, depth_img_mm: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray) -> np.ndarray:
+        """Project 2D pixel coordinates with depth to 3D points in camera space."""
+        h, w = depth_img_mm.shape
+        xs, ys = np.meshgrid(np.arange(w, dtype=np.float32), np.arange(h, dtype=np.float32), indexing='xy')
+
+        # OpenCV expects Nx1x2 contiguous float32/float64 image points in (x, y) order.
+        distorted_points = np.stack([xs, ys], axis=-1).reshape(-1, 1, 2).astype(np.float32)
+        undistorted_points = cv2.undistortPoints(distorted_points,  cam_matrix.astype(np.float32),  dist_coeffs.astype(np.float32) )
+
+        uv = undistorted_points.reshape(-1, 2)
+        Z = depth_img_mm.reshape(-1).astype(np.float32)
+        valid = np.isfinite(Z) & (Z > 0)
+        if not np.any(valid):
+            return np.zeros((0, 3), dtype=np.float32)
+
+        uv      = uv[valid]
+        Z       = Z[valid]
+        X       = uv[:, 0] * Z
+        Y       = uv[:, 1] * Z
+
+        # save to ply point cloud for visualization
+        XYZ     = np.stack([X, Y, Z], axis=1).astype(np.float32)
+
+        return XYZ
+
+    def project_3d_to_camera(self, points_3d: np.ndarray, cam_matrix: np.ndarray, dist_coeffs: np.ndarray, frame_size = (480,640)) -> np.ndarray:
+        """Project 3D points in camera space back to 2D pixel coordinates."""
+        if points_3d.shape[1] != 3:
+            raise ValueError("Input points_3d must have shape (N, 3)")
+        
+        projected_pts, _ = cv2.projectPoints(
+            points_3d.reshape(-1, 1, 3),
+            np.zeros(3, dtype=np.float32),
+            np.zeros(3, dtype=np.float32),
+            cam_matrix.astype(np.float32),
+            dist_coeffs.astype(np.float32),
+        )
+
+        uv_rs = projected_pts.reshape(-1, 2)
+        u_idx = np.rint(uv_rs[:, 0]).astype(np.int32)
+        v_idx = np.rint(uv_rs[:, 1]).astype(np.int32)
+
+        h_rs, w_rs = frame_size
+        in_bounds = (u_idx >= 0) & (u_idx < w_rs) & (v_idx >= 0) & (v_idx < h_rs)
+        if not np.any(in_bounds):
+            return np.zeros((h_rs, w_rs), dtype=np.float32)
+
+        u_idx = u_idx[in_bounds]
+        v_idx = v_idx[in_bounds]
+        z_vals = points_3d[in_bounds, 2]  # Z values of the valid points
+
+        # Rasterize by nearest pixel; if multiple points hit a pixel, keep the closest depth.
+        lin             = v_idx * w_rs + u_idx
+        depth_buffer    = np.full(h_rs * w_rs, np.inf, dtype=np.float32)
+        np.minimum.at(depth_buffer, lin, z_vals)
+        depth_projected = depth_buffer.reshape(h_rs, w_rs)
+        depth_projected[~np.isfinite(depth_projected)] = 0.0
+        return depth_projected
+
+    # project from zivid depth patrix to point cloud and back to depth matrix with rs intrinsics and distortion to get "zivid GT as seen by RealSense" for pixel-level comparison
+    def project_depth_zivid_to_rs(self,depth_zivid_mm: np.ndarray, depth_rs_mm: np.ndarray, finx = 0) -> np.ndarray:
+        # create 3D point cloud from zivid depth
+        XYZ = self.project_camera_to_3d(depth_zivid_mm, CAMERA_MATRIX_RS, DIST_COEFFS_RS)  # (N, 3) array of 3D points in Zivid camera space
+        # save to ply point cloud for visualization
+        #save_to_ply(XYZ/1000, f'zivid_original_points_{finx:03d}.ply') # save in meters for visualization
+
+        # project back on imaage RS
+        depth_zivid_projected_mm = self.project_3d_to_camera(XYZ, CAMERA_MATRIX_RS, DIST_COEFFS_RS, frame_size = depth_rs_mm.shape)  # (H, W) depth map of Zivid points projected into RealSense pixel space
+
+        XYZ_RS = self.project_camera_to_3d(depth_zivid_projected_mm, CAMERA_MATRIX_RS, DIST_COEFFS_RS)
+            # save to ply point cloud for visualization
+        #save_to_ply(XYZ_RS/1000, f'zivid_projected_points_{finx:03d}.ply') # save in meters for visualization
+
+        return depth_zivid_projected_mm    
+    
+    def show_projection(self, rs_map, zv_map, zv_valid, idx):
+        fig, axes = plt.subplots(1, 3, sharey=True, sharex=True, figsize=(8,4))
+        axes[0].imshow(rs_map, vmin=-10, vmax=1000),axes[0].set_title(f"RealSense Depth Diff (mm)"),
+        axes[1].imshow(zv_map, vmin=-10, vmax=1000),axes[1].set_title(f"Zivid Projected Depth Diff (mm)"),
+        axes[2].imshow(zv_valid, cmap='gray'),axes[2].set_title(f"Valid Mask (Zivid Projection)"),
+        plt.suptitle(f"Sample {idx:03d} Depth Difference Maps and Valid Mask", fontsize=16)
+        plt.tight_layout()
+        plt.show()
+
+
+# --------------------------------
+#%% Tests
+class TestDataSource(unittest.TestCase):
+
+    def test_init_directory(self):
+        p       = DataSource()
+        img_num = p.init_directory()
+        self.assertTrue(img_num > 0)
+
+    def test_get_item(self):
+        p       = DataSource()
+        img_num = p.init_directory()
+        self.assertTrue(img_num > 0)
+        out = p.get_item(0, debug=True)
+        self.assertTrue(len(out["left"]) > 0)
+
+    def test_show_images(self):
+        p       = DataSource()
+        img_num = p.init_directory()
+        if img_num == 0:
+            log.warning("No images found.")
+            return
+        for k in np.random.randint(0, img_num, size=min(4, img_num)):
+            out = p.get_item(int(k), debug=True)
+            self.assertTrue(len(out["left"]) > 0)
+            p.show_subset([out["left"], out["right"], out["depth_rs"]],
+                          ['left (RS)', 'right (RS)', 'depth RS (mm)'])
+
+        plt.show()
+
+    def test_get_item_projected(self):
+        p       = DataSource()
+        img_num = p.init_directory()
+        self.assertTrue(img_num > 0)
+        out     = p.get_item_projected(80, debug=False)
+        err     = p.compute_depth_error(out["depth_rs"], out["depth_syn"])
+        self.assertTrue(len(out["left"]) > 0)
+        p.show_subset([out["left"], out["right"], out["depth_rs"], out["depth_syn"], err],
+                          ['left (RS)', 'right (RS)', 'depth RS (mm)', 'depth SYN (mm)', 'error (mm)'], vmax=None)
+        plt.show()
+
+
+# --------------------------------
+#%% Run Test
+def RunTest():
+    tst = TestDataSource()
+    #tst.test_init_directory()
+    #tst.test_get_item()
+    #tst.test_show_images()
+    tst.test_get_item_projected()
+
+
+if __name__ == '__main__':
+    RunTest()
diff --git a/uris_model_chat.txt b/uris_model_chat.txt
new file mode 100644
index 0000000..2806c62
--- /dev/null
+++ b/uris_model_chat.txt
@@ -0,0 +1,1059 @@
+ ▐▛███▜▌   Claude Code v2.1.126
+▝▜█████▛▘  Sonnet 4.6 · Claude Team
+  ▘▘ ▝▝    ~/repos/Fast-FoundationStereo
+
+❯ in the environment tao_stereo install onnx and trt compilation tool and run   
+  example of the network conversion                                             
+
+● Let me first explore the project structure to understand the context before
+  proceeding.
+
+● Bash(ls /home/adiroha/repos/Fast-FoundationStereo/ && conda env list
+      2>/dev/null || true)
+  ⎿  assets
+     core
+     data
+     … +26 lines (ctrl+o to expand)
+
+  Read 1 file (ctrl+o to expand)
+
+● Bash(find /home/adiroha/repos/Fast-FoundationStereo -name "*.py" | xargs grep
+      -l -i "onnx\|tensorrt\|trt" 2>/dev/null | head -20)
+  ⎿  /home/adiroha/repos/Fast-FoundationStereo/core/foundation_stereo.py
+     /home/adiroha/repos/Fast-FoundationStereo/scripts/make_onnx.py
+     /home/adiroha/repos/Fast-FoundationStereo/scripts/run_demo_tensorrt.py
+
+● Bash(conda run -n tao_stereo pip list 2>/dev/null | grep -i
+      "onnx\|tensorrt\|torch\|trt" | head -20)
+  ⎿  open_clip_torch           3.3.0
+     pytorch-lightning         2.6.1
+     torch                     2.10.0+cu126
+     … +3 lines (ctrl+o to expand)
+
+  Read 2 files (ctrl+o to expand)
+
+● Let me check CUDA version, requirements, and what's needed for TRT conversion.
+
+  Read 1 file (ctrl+o to expand)
+
+● Bash(nvcc --version 2>/dev/null; nvidia-smi 2>/dev/null | head -10)
+  ⎿  nvcc: NVIDIA (R) Cuda compiler driver
+     Copyright (c) 2005-2023 NVIDIA Corporation
+     Built on Fri_Jan__6_16:45:21_PST_2023
+     … +16 lines (ctrl+o to expand)
+
+● Bash(which trtexec 2>/dev/null; dpkg -l | grep -i tensorrt 2>/dev/null | head
+      -10; ls /usr/lib/python3/dist-packages/ 2>/dev/null | grep -i trt)
+  ⎿  No matches found
+
+  Searched for 2 patterns, read 1 file (ctrl+o to expand)
+
+● Now I have the full picture. Let me check the weights and then install the
+  required packages.
+
+  Listed 3 directories (ctrl+o to expand)
+
+● Now let me install the ONNX and TensorRT packages in the tao_stereo
+  environment.
+
+● Bash(conda run -n tao_stereo pip install onnx onnxruntime-gpu tensorrt-cu12
+      tensorrt-lean-cu12 tensorrt-dispatch-cu12 2>&1 | tail -20)
+  ⎿  Running in the background (↓ to manage)
+
+● Installation running in background. Let me set up the output directory and
+  review the ONNX script arguments.
+
+● Bash(mkdir -p /home/adiroha/repos/Fast-FoundationStereo/output/onnx_trt)
+  ⎿  Done
+
+● Background command "Install ONNX and TensorRT packages in tao_stereo env" completed (exit code 0)
+
+  Read 1 file (ctrl+o to expand)
+
+● All packages installed. Now let me find trtexec and run the ONNX export.
+
+● Bash(find /home/adiroha/miniconda3/envs/tao_stereo -name "trtexec"
+      2>/dev/null; conda run -n tao_stereo python -c "import tensorrt as trt;
+      print('TRT version:', trt.…)
+  ⎿  TRT version: 10.16.1.11
+  Listed 1 directory (ctrl+o to expand)
+
+● trtexec isn't included in the pip package — I'll use the TensorRT Python API
+  for ONNX→engine conversion. First, let me run the ONNX export.
+
+● Bash(conda run -n tao_stereo python -c "import cv2;
+      img=cv2.imread('assets/left.png'); print('Image size:', img.shape if img
+      is not None else 'not found')")
+  ⎿  [ WARN:0@0.009] global loadsave.cpp:278 findDecoder imread_('assets/le
+     ft.png'): can't open/read file: check file path/integrity
+
+     Image size: not found
+
+  Listed 1 directory (ctrl+o to expand)
+
+● No stereo test images in assets — I'll use the default 448x640 dimensions. Now
+   let me run the ONNX export.
+
+● Bash(conda run -n tao_stereo python scripts/make_onnx.py \
+          --model_dir weights/20-26-39/model_best_bp2_serialize.pth \…)
+  ⎿  Running in the background (↓ to manage)
+
+● While the ONNX export runs, let me write the TRT engine conversion script.
+
+● Write(scripts/make_trt_engine.py)
+  ⎿  Wrote 53 lines to scripts/make_trt_engine.py
+         API."""ert ONNX models to TensorRT engines using the Python TRT 
+       2 import argparse
+       3 import os
+       4 import tensorrt as trt
+       5 
+       7 TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
+       8 
+       9 def build_engine(onnx_path: str, engine_path: str, fp16: bool = 
+         True, workspace_gb: int = 4):
+      10     builder = trt.Builder(TRT_LOGGER)
+     … +51 lines (ctrl+o to expand)
+
+● Background command "Export model to ONNX format" failed with exit code 1
+
+  Read 1 file (ctrl+o to expand)
+
+● Bash(conda run -n tao_stereo pip install onnxscript 2>&1 | tail -5)
+  ⎿  Downloading onnx_ir-0.2.1-py3-none-any.whl (166 kB)
+     Installing collected packages: onnx_ir, onnxscript
+
+     Successfully installed onnx_ir-0.2.1 onnxscript-0.7.0
+
+● Bash(conda run -n tao_stereo python scripts/make_onnx.py \
+          --model_dir weights/20-26-39/model_best_bp2_serialize.pth \…)
+  ⎿  Running in the background (↓ to manage)
+
+● ONNX export running again with onnxscript now installed. Let me wait for it.
+
+  Read 1 file (ctrl+o to expand)
+
+● Waiting for the ONNX export to produce output...
+
+  Listed 1 directory (ctrl+o to expand)
+
+● feature_runner.onnx is done (38MB data file). The post_runner.onnx export is
+  still in progress. Let me keep monitoring.
+
+  Read 1 file, listed 1 directory (ctrl+o to expand)
+
+● Background command "Re-run ONNX export" failed with exit code 1
+
+● The new PyTorch 2.10 ONNX exporter doesn't handle adaptive_max_pool2d. I need
+  to force the legacy TorchScript-based exporter with dynamo=False.
+
+  Read 1 file (ctrl+o to expand)
+
+● Update(scripts/make_onnx.py)
+  ⎿  Added 4 lines, removed 2 lines
+      63          opset_version=17,
+      64          input_names = ['left', 'right'],
+      65          output_names = ['features_left_04', 'features_left_08',
+           'features_left_16', 'features_left_32', 'features_right_04', '
+          stem_2x'],
+      66 -        do_constant_folding=True                               
+      66 +        do_constant_folding=True,                              
+      67 +        dynamo=False,                                          
+      68      )
+      69  
+      70      features_left_04, features_left_08, features_left_16, featu
+          res_left_32, features_right_04, stem_2x = feature_runner(left_i
+          mg, right_img)
+     ...
+      78          opset_version=17,
+      79          input_names = ['features_left_04', 'features_left_08', 
+          'features_left_16', 'features_left_32', 'features_right_04', 's
+          tem_2x', 'gwc_volume'],
+      80          output_names = ['disp'],
+      80 -        do_constant_folding=True                               
+      81 +        do_constant_folding=True,                              
+      82 +        dynamo=False,                                          
+      83      )
+      84  
+      85      with open(f'{args.save_path}/onnx.yaml', 'w') as f:
+
+● Bash(rm -f /home/adiroha/repos/Fast-FoundationStereo/output/onnx_trt/*.onnx
+      /home/adiroha/repos/Fast-FoundationStereo/output/onnx_trt/*.data && conda
+      run -n tao_ster…)
+  ⎿  Running in the background (↓ to manage)
+● Bash(sleep 60 && cat /tmp/claude-1000/-home-adiroha-repos-Fast-FoundationStere
+      o/9e5eae82-b63e-4097-921a-bd15bd674371/tasks/bqvmt6zhn.output 2>/dev/null
+      | tail -20 &&…)
+  ⎿  Error: Blocked: sleep 60 followed by: cat 
+     /tmp/claude-1000/-home-adiroha-repos-Fast-FoundationStereo/9e5eae82-b63e-40
+     97-921a-bd15bd674371/tasks/bqvmt6zhn.output tail -20 ls -lh 
+     /home/adiroha/repos/Fast-FoundationStereo/output/onnx_trt/. To wait for a 
+     condition, use Monitor with an until-loop (e.g. `until <check>; do sleep 2;
+      done`). To wait for a command you started, use run_in_background: true. Do
+      not chain shorter sleeps to work around this block.
+
+● Background command "Re-run ONNX export with legacy exporter" completed (exit code 0)
+
+  Listed 1 directory (ctrl+o to expand)
+
+● Both ONNX files exported successfully (37MB + 18MB). Now converting to TRT
+  engines.
+
+● Bash(conda run -n tao_stereo python scripts/make_trt_engine.py --onnx_dir
+      output/onnx_trt 2>&1)
+  ⎿  Running in the background (↓ to manage)
+
+● TRT engine compilation can take several minutes. Let me monitor progress.
+
+  Read 1 file (ctrl+o to expand)
+
+● TRT compilation in progress. I'll report back when the engines are ready —
+  this typically takes 3–10 minutes for the first compile on a new model.
+
+  ---
+  What's been done so far:
+
+  ┌──────────────────────────────────────────────────────────────┬──────────┐
+  │                             Step                             │  Status  │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Install onnx, onnxruntime-gpu, tensorrt-cu12 10.16.1         │ ✅       │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Install onnxscript (missing dep for PyTorch 2.10 exporter)   │ ✅       │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Fix make_onnx.py to use legacy TorchScript exporter          │ ✅       │
+  │ (dynamo=False) to avoid adaptive_max_pool2d dispatch error   │          │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Export feature_runner.onnx (37 MB)                           │ ✅       │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Export post_runner.onnx (18 MB)                              │ ✅       │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Write scripts/make_trt_engine.py (Python TRT API builder)    │ ✅       │
+  ├──────────────────────────────────────────────────────────────┼──────────┤
+  │ Build TRT engines (feature_runner.engine,                    │ ⏳       │
+  │ post_runner.engine)                                          │ running  │
+  └──────────────────────────────────────────────────────────────┴──────────┘
+
+✻ Baked for 10m 48s
+
+✻ Claude resuming /loop wakeup (Apr 26 3:02pm)
+
+  Read 1 file, listed 1 directory (ctrl+o to expand)
+
+● feature_runner.engine (21 MB) is compiled. post_runner.engine still in
+  progress — let me wait for it.
+
+● feature_runner.engine done (21 MB). post_runner.engine still compiling — I'll
+  report back when it finishes.
+
+✻ Claude resuming /loop wakeup (Apr 26 3:03pm)
+
+  Listed 1 directory (ctrl+o to expand)
+
+● Both ONNX files exist and TRT conversion is already running (job byephtlsb).
+  feature_runner.engine is done (21 MB), post_runner.engine still compiling.
+  Output file is empty so far — TRT logging goes to stderr after the build
+  completes.
+
+● Still waiting on post_runner.engine. I'll update you when it's done.
+
+✻ Claude resuming /loop wakeup (Apr 26 3:05pm)
+
+✻ Claude resuming /loop wakeup (Apr 26 3:05pm)
+
+  Listed 1 directory (ctrl+o to expand)
+
+● post_runner.engine still not written — compilation ongoing. Scheduling another
+   check.
+● Still compiling — post_runner is the larger/more complex subgraph. Will check
+  again in ~4 minutes.
+
+✻ Claude resuming /loop wakeup (Apr 26 3:06pm)
+
+  Listed 1 directory (ctrl+o to expand)
+
+● Bash(ps aux | grep -i "make_trt\|trt_engine\|tensorrt" | grep -v grep)
+  ⎿  adiroha  2166525  0.0  0.0  10072  3740 ?        Ss   14:48   0:00 /bi
+     n/bash -c source /home/adiroha/.claude/shell-snapshots/snapshot-bash-1
+     777204063714-7d3qxc.sh 2>/dev/null || true && shopt -u extglob 2>/dev/
+     … +13 lines (ctrl+o to expand)
+
+● Process is alive and running hot (PID 2191990, ~80% CPU, 7.6 GB RAM). The TRT
+  builder is crunching through post_runner. Output is still buffered — nothing
+  printed until build_serialized_network() returns. This is normal for complex
+  models.
+
+● Still compiling post_runner — process is healthy, just CPU-bound on kernel
+  optimization. Checking again in ~4 min.
+
+✻ Sautéed for 38s
+
+● Background command "Convert ONNX models to TRT engines" completed (exit code
+0)
+
+  Read 1 file, listed 1 directory (ctrl+o to expand)
+
+● Both engines compiled and saved successfully. Here's the full summary:
+
+  ONNX export — done (using legacy TorchScript exporter with dynamo=False)
+  - feature_runner.onnx — 37 MB, opset 17, PyTorch 2.10.0
+  - post_runner.onnx — 18 MB
+
+  TRT engine compilation — done in ~6 minutes (15:00 → 15:06), FP16 enabled on
+  RTX 4080
+  - feature_runner.engine — 21 MB
+  - post_runner.engine — 14 MB
+
+  All four files are in output/onnx_trt/. The engines are ready to use with
+  scripts/run_demo_tensorrt.py — you just need a stereo pair (--left_file,
+  --right_file) and camera intrinsics (--intrinsic_file).
+
+✻ Claude resuming /loop wakeup (Apr 26 3:09pm)
+
+● Both engines already completed — I reported this in my previous message. To
+  confirm:
+
+  Listed 1 directory (ctrl+o to expand)
+
+● Everything is complete and healthy:
+
+  ┌───────────────────────┬───────┬──────────────────────────────┐
+  │         File          │ Size  │            Status            │
+  ├───────────────────────┼───────┼──────────────────────────────┤
+  │ feature_runner.onnx   │ 37 MB │ ✅ exported                  │
+  ├───────────────────────┼───────┼──────────────────────────────┤
+  │ post_runner.onnx      │ 18 MB │ ✅ exported                  │
+  ├───────────────────────┼───────┼──────────────────────────────┤
+  │ feature_runner.engine │ 21 MB │ ✅ compiled (FP16, RTX 4080) │
+  ├───────────────────────┼───────┼──────────────────────────────┤
+  │ post_runner.engine    │ 14 MB │ ✅ compiled (FP16, RTX 4080) │
+  ├───────────────────────┼───────┼──────────────────────────────┤
+  │ onnx.yaml             │ 178 B │ ✅ model config              │
+  └───────────────────────┴───────┴──────────────────────────────┘
+
+  The engines are ready. To run inference, provide a stereo pair:
+
+  conda run -n tao_stereo python scripts/run_demo_tensorrt.py \
+    --onnx_dir output/onnx_trt \
+    --left_file <left.png> \
+    --right_file <right.png> \
+    --intrinsic_file <K.txt> \
+    --out_dir output/trt_demo
+
+✻ Claude resuming /loop wakeup (Apr 26 3:11pm)
+
+● Already done — both engines compiled successfully. No need to recheck.
+
+  Final status:
+
+  ┌───────────────────────┬───────┬────────────────────────┐
+  │         File          │ Size  │        Details         │
+  ├───────────────────────┼───────┼────────────────────────┤
+  │ feature_runner.engine │ 21 MB │ FP16, compiled 15:01   │
+  ├───────────────────────┼───────┼────────────────────────┤
+  │ post_runner.engine    │ 14 MB │ FP16, compiled 15:06   │
+  ├───────────────────────┼───────┼────────────────────────┤
+  │ feature_runner.onnx   │ 37 MB │ opset 17, PyTorch 2.10 │
+  ├───────────────────────┼───────┼────────────────────────┤
+  │ post_runner.onnx      │ 18 MB │ opset 17, PyTorch 2.10 │
+  └───────────────────────┴───────┴────────────────────────┘
+
+  What was installed in tao_stereo:
+  - onnx 1.21.0 + onnxscript 0.7.0
+  - onnxruntime-gpu 1.23.2
+  - tensorrt-cu12 10.16.1 + lean/dispatch variants
+
+  Fix applied to make_onnx.py: added dynamo=False to both torch.onnx.export
+  calls — the new PyTorch 2.10 dynamo exporter lacks a decomposition for
+  aten.adaptive_max_pool2d, the legacy TorchScript exporter handles it fine.
+
+  New file added: scripts/make_trt_engine.py — Python TRT API builder (replaces
+  trtexec which isn't included in pip-installed TensorRT).
+
+※ recap: Installed ONNX and TensorRT 10.16 in tao_stereo and converted the FoundationStereo model to two TRT engines (FP16, RTX 4080) saved in output/onnx_trt. Next: run inference with scripts/run_demo_tensorrt.py using a real stereo pair. (disable recaps in /config)
+
+❯ create new file benchmark_inbolt_trt.py that will run the orifinal tests plus 
+   trt model created from fast foundation finetuned model                       
+
+● Let me read the existing benchmark files and TRT runner to understand what to
+  combine.
+
+  Read 3 files, listed 1 directory (ctrl+o to expand)
+
+● Now let me check the ONNX yaml and the data manager to understand image sizes.
+
+  Read 1 file (ctrl+o to expand)
+
+● Now I have everything I need. Let me write the benchmark file.
+
+● Write(scripts/benchmark_inbolt_trt.py)
+  ⎿  Wrote 457 lines to scripts/benchmark_inbolt_trt.py
+        FS-TRT (TensorRT FP16) on Inbolt.d FFS (PyTorch) and fine-tuned F
+      3 Extends ``benchmark_inbolt_fs.py`` by adding a TensorRT FP16 mode
+        l alongside the
+        ccuracy and speedd RealSense hardware depth, so you can compare a
+      5 between the full-precision PyTorch path and the compiled TRT engi
+      6 ne.
+        l weights:gines must be built beforehand from the fine-tuned mode
+      8   1. python scripts/make_onnx.py --model_dir <finetuned.pth> --sa
+      9   2. python scripts/make_trt_engine.py --onnx_dir output/onnx_trt
+        _ft
+     … +535 lines (ctrl+o to expand)
+
+● Now let me do a quick import check to catch any obvious errors:
+
+● Bash(conda run -n tao_stereo python -c "
+      import sys, os…)  ⎿  AST parse OK
+     benchmark_inbolt imports OK
+     TrtRunner import OK
+
+● All imports resolve cleanly. Here's what the new file adds on top of
+  benchmark_inbolt_fs.py:
+
+  New method: finetuned_trt (purple)
+  - Loaded via load_trt_model(trt_dir) — reads feature_runner.engine +
+  post_runner.engine + onnx.yaml; gracefully skips if any file is missing
+  - infer_depth_m_trt resizes the IR images to the fixed TRT engine resolution
+  (448×640), runs TrtRunner.forward(), then resizes disparity back and divides
+  by fx to restore original-pixel-unit disparity before the BF depth conversion
+
+  Key CLI args:
+  python scripts/benchmark_inbolt_trt.py \
+    --trt_dir output/onnx_trt \   # default — point here or at a dir built from 
+  finetuned weights
+    --trt_height 448 --trt_width 640 \
+    --out_dir reports/inbolt_trt_benchmark
+
+  Note: the current engines in output/onnx_trt/ were built from the original
+  model weights. To compare the finetuned TRT model, rebuild the engines from
+  the finetuned .pth:
+  python scripts/make_onnx.py \
+    --model_dir weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth \
+    --save_path output/onnx_trt_ft
+  python scripts/make_trt_engine.py --onnx_dir output/onnx_trt_ft
+  Then pass --trt_dir output/onnx_trt_ft.
+
+✻ Cooked for 3m 40s
+
+※ recap: Benchmarking FastFoundationStereo on the Inbolt dataset with TensorRT acceleration. ONNX export and TRT engine compilation are complete; the next step is rebuilding the engines from the finetuned model weights and running benchmark_inbolt_trt.py. (disable recaps in /config)
+
+❯ add rebuild command to the benchmark_inbolt_trt.py to run at the beginning    
+
+  Read 2 files (ctrl+o to expand)
+
+● Now I have everything. I'll add the rebuild function inline (no subprocess),
+  importing TrtFeatureRunner, TrtPostRunner, build_gwc_volume_triton, and
+  build_engine directly.
+
+  Read 1 file (ctrl+o to expand)
+
+● Update(scripts/benchmark_inbolt_trt.py)
+  ⎿  Added 8 lines, removed 7 lines
+       4  PyTorch models and RealSense hardware depth, so you can compare
+           accuracy and speed
+       5  between the full-precision PyTorch path and the compiled TRT en
+          gine.
+       6  
+       7 -The TRT engines must be built beforehand from the fine-tuned mo
+         -del weights:                                                   
+       8 -  1. python scripts/make_onnx.py --model_dir <finetuned.pth> --
+         -save_path output/onnx_trt_ft                                   
+       9 -  2. python scripts/make_trt_engine.py --onnx_dir output/onnx_t
+         -rt_ft                                                          
+       7 +Pass --rebuild_trt to export ONNX and compile TRT engines from 
+         +the fine-tuned model                                           
+       8 +at the start of the run (requires ~10 min on first compile).  T
+         +he engines are written                                         
+       9 +to --trt_dir and reused on subsequent runs without --rebuild_tr
+         +t.                                                             
+      10  
+      11 -Then point --trt_dir at the directory containing feature_runner
+         -.engine and                                                    
+      12 -post_runner.engine (and onnx.yaml).                            
+      13 -                                                               
+      11  Usage:
+      12    cd /home/adiroha/repos/Fast-FoundationStereo
+      16 -  python scripts/benchmark_inbolt_trt.py [--trt_dir output/onnx
+         -_trt] [--out_dir reports/inbolt_trt_benchmark]                 
+      13 +  # First run: build engines then benchmark                    
+      14 +  python scripts/benchmark_inbolt_trt.py --rebuild_trt [--trt_d
+         +ir output/onnx_trt_ft]                                         
+      15 +  # Subsequent runs: reuse existing engines                    
+      16 +  python scripts/benchmark_inbolt_trt.py [--trt_dir output/onnx
+         +_trt_ft] [--out_dir reports/inbolt_trt_benchmark]              
+      17  """
+      18  
+      19  import argparse
+     ...
+      21  import os
+      22  import sys
+      23  import time
+      24 +import warnings                                                
+      25  import cv2
+      26  from pathlib import Path
+      27  from typing import Dict, Optional
+
+● Update(scripts/benchmark_inbolt_trt.py)
+  ⎿  Added 2 lines, removed 1 line
+      52      CLOSE_RANGE_THRESHOLD_M,
+      53  )
+      54  from report import ReportGenerator
+      55 -from core.foundation_stereo import TrtRunner                   
+      55 +from core.foundation_stereo import TrtRunner, TrtFeatureRunner,
+         + TrtPostRunner, build_gwc_volume_triton                        
+      56 +from make_trt_engine import build_engine                       
+      57  
+      58  
+      59  # ── constants ────────────────────────────────────────────────
+          ────────────────
+
+● Now add the rebuild_trt_engines function right after the constants block:
+
+● Update(scripts/benchmark_inbolt_trt.py)
+  ⎿  Added 86 lines
+       78  RS_NAME = 'depth_rs'
+       79  
+       80  
+       81 +# ── TRT build ───────────────────────────────────────────────
+          +─────────────────                                             
+       82 +                                                              
+       83 +def rebuild_trt_engines(model_path: str, trt_dir: str, height:
+          + int, width: int) -> None:                                    
+       84 +    """Export the model at *model_path* to ONNX, then compile 
+          +both TRT engines.                                             
+       85 +                                                              
+       86 +    Writes to *trt_dir*:                                      
+       87 +        feature_runner.onnx / feature_runner.engine           
+       88 +        post_runner.onnx    / post_runner.engine              
+       89 +        onnx.yaml                                             
+       90 +    """                                                       
+       91 +    assert height % 32 == 0 and width % 32 == 0, \            
+       92 +        f"height and width must be divisible by 32, got {heigh
+          +t}x{width}"                                                   
+       93 +                                                              
+       94 +    os.makedirs(trt_dir, exist_ok=True)                       
+       95 +    logging.info(f'[rebuild] Loading model from {model_path}')
+       96 +    model = torch.load(model_path, map_location='cpu', weights
+          +_only=False)                                                  
+       97 +    model.cuda().eval()                                       
+       98 +                                                              
+       99 +    feature_runner = TrtFeatureRunner(model).cuda().eval()    
+      100 +    post_runner    = TrtPostRunner(model).cuda().eval()       
+      101 +                                                              
+      102 +    dummy_left  = torch.randn(1, 3, height, width, device='cud
+          +a').float() * 255                                             
+      103 +    dummy_right = torch.randn(1, 3, height, width, device='cud
+          +a').float() * 255                                             
+      104 +                                                              
+      105 +    # ── feature runner → ONNX ───────────────────────────────
+          +─────────────────                                             
+      106 +    feature_onnx = os.path.join(trt_dir, 'feature_runner.onnx'
+          +)                                                             
+      107 +    logging.info(f'[rebuild] Exporting feature_runner → {featu
+          +re_onnx}')                                                    
+      108 +    with warnings.catch_warnings():                           
+      109 +        warnings.simplefilter('ignore')                       
+      110 +        torch.onnx.export(                                    
+      111 +            feature_runner,                                   
+      112 +            (dummy_left, dummy_right),                        
+      113 +            feature_onnx,                                     
+      114 +            opset_version=17,                                 
+      115 +            input_names=['left', 'right'],                    
+      116 +            output_names=['features_left_04', 'features_left_0
+          +8', 'features_left_16',                                       
+      117 +                          'features_left_32', 'features_right_
+          +04', 'stem_2x'],                                              
+      118 +            do_constant_folding=True,                         
+      119 +            dynamo=False,                                     
+      120 +        )                                                     
+      121 +                                                              
+      122 +    # ── post runner → ONNX ──────────────────────────────────
+          +─────────────────                                             
+      123 +    with torch.no_grad():                                     
+      124 +        feats = feature_runner(dummy_left, dummy_right)       
+      125 +        f04, f08, f16, f32, fr04, stem_2x = feats             
+      126 +        cv_group = getattr(model, 'cv_group', 8)              
+      127 +        gwc_volume = build_gwc_volume_triton(                 
+      128 +            f04.half(), fr04.half(), model.args.max_disp // 4,
+          + cv_group                                                     
+      129 +        )                                                     
+      130 +                                                              
+      131 +    post_onnx = os.path.join(trt_dir, 'post_runner.onnx')     
+      132 +    logging.info(f'[rebuild] Exporting post_runner → {post_onn
+          +x}')                                                          
+      133 +    with warnings.catch_warnings():                           
+      134 +        warnings.simplefilter('ignore')                       
+      135 +        torch.onnx.export(                                    
+      136 +            post_runner,                                      
+      137 +            (f04.float(), f08.float(), f16.float(), f32.float(
+          +),                                                            
+      138 +             fr04.float(), stem_2x.float(), gwc_volume.float()
+          +),                                                            
+      139 +            post_onnx,                                        
+      140 +            opset_version=17,                                 
+      141 +            input_names=['features_left_04', 'features_left_08
+          +', 'features_left_16',                                        
+      142 +                         'features_left_32', 'features_right_0
+          +4', 'stem_2x', 'gwc_volume'],                                 
+      143 +            output_names=['disp'],                            
+      144 +            do_constant_folding=True,                         
+      145 +            dynamo=False,                                     
+      146 +        )                                                     
+      147 +                                                              
+      148 +    # ── save model config ───────────────────────────────────
+          +──────────────────                                            
+      149 +    yaml_path = os.path.join(trt_dir, 'onnx.yaml')            
+      150 +    with open(yaml_path, 'w') as f:                           
+      151 +        yaml.safe_dump(OmegaConf.to_container(model.args), f) 
+      152 +    logging.info(f'[rebuild] Saved onnx.yaml → {yaml_path}')  
+      153 +                                                              
+      154 +    # ── compile TRT engines ─────────────────────────────────
+          +──────────────────                                            
+      155 +    del model, feature_runner, post_runner  # free GPU memory 
+          +before TRT build                                              
+      156 +    torch.cuda.empty_cache()                                  
+      157 +                                                              
+      158 +    for name in ('feature_runner', 'post_runner'):            
+      159 +        onnx_path   = os.path.join(trt_dir, f'{name}.onnx')   
+      160 +        engine_path = os.path.join(trt_dir, f'{name}.engine') 
+      161 +        logging.info(f'[rebuild] Compiling {name}.engine (this
+          + may take several minutes) …')                                
+      162 +        build_engine(onnx_path, engine_path, fp16=True, worksp
+          +ace_gb=4)                                                     
+      163 +                                                              
+      164 +    logging.info(f'[rebuild] TRT engines ready in {trt_dir}') 
+      165 +                                                              
+      166 +                                                              
+      167  # ── TRT helpers ─────────────────────────────────────────────
+           ──────────────────
+      168  
+      169  def load_trt_model(trt_dir: str) -> Optional[TrtRunner]:
+
+● Now wire it into main() — add the flag and call it before loading the TRT
+  model:
+
+● Update(scripts/benchmark_inbolt_trt.py)
+  ⎿  Added 2 lines
+      354                          help='Directory with feature_runner.en
+           gine, post_runner.engine, onnx.yaml')
+      355      parser.add_argument('--trt_height', type=int, default=448,
+            help='TRT engine input height')
+      356      parser.add_argument('--trt_width',  type=int, default=640,
+            help='TRT engine input width')
+      357 +    parser.add_argument('--rebuild_trt', action='store_true', 
+      358 +                        help='Re-export ONNX and recompile TRT
+          + engines from --finetuned before benchmarking')               
+      359      parser.add_argument('--n_viz', type=int, default=N_VIZ, he
+           lp='Frames saved for visual comparison')
+      360      args = parser.parse_args()
+      361  
+
+● Update(scripts/benchmark_inbolt_trt.py)
+  ⎿  Added 6 lines
+      363      out_dir = Path(args.out_dir)
+      364      out_dir.mkdir(parents=True, exist_ok=True)
+      365  
+      366 +    # ── optionally rebuild TRT engines ──────────────────────
+          +──────────────────                                            
+      367 +    if args.rebuild_trt:                                      
+      368 +        build_src = resolve_finetuned_model_path(args.finetune
+          +d) or args.original                                           
+      369 +        logging.info(f'[rebuild_trt] Building TRT engines from
+          + {build_src} → {args.trt_dir}')                               
+      370 +        rebuild_trt_engines(build_src, args.trt_dir, args.trt_
+          +height, args.trt_width)                                       
+      371 +                                                              
+      372      # ── load PyTorch models ─────────────────────────────────
+           ──────────────────
+      373      pt_models = {}
+      374      finetuned_path = resolve_finetuned_model_path(args.finetun
+           ed)
+
+● Now verify the final file parses and all symbols exist:
+
+● Bash(conda run -n tao_stereo python -c "
+      import sys, os, ast, pathlib…)
+  ⎿  AST parse OK
+     build_engine import OK
+     TrtFeatureRunner/TrtPostRunner/build_gwc_volume_triton imports OK
+     all symbols OK
+
+● Everything checks out. Here's what was added:
+
+  rebuild_trt_engines(model_path, trt_dir, height, width) — runs in-process (no
+  subprocess):
+  1. Loads the model weights onto GPU
+  2. Wraps in TrtFeatureRunner / TrtPostRunner and exports both to ONNX with the
+   legacy TorchScript exporter (dynamo=False, warnings suppressed)
+  3. Saves onnx.yaml from the model's args
+  4. Frees GPU memory, then calls build_engine() for each ONNX file to compile
+  the TRT engines
+
+  Wired into main() — runs before anything else if --rebuild_trt is passed.
+  Falls back to the original model if no finetuned checkpoint is found:
+
+  # build engines from finetuned model, then benchmark — one command
+  python scripts/benchmark_inbolt_trt.py \
+    --rebuild_trt \
+    --finetuned weights/23-36-37/model_finetuned_inbolt-20260415_epoch_111.pth \
+    --trt_dir output/onnx_trt_ft \
+    --out_dir reports/inbolt_trt_benchmark
+
+  # subsequent runs reuse existing engines (no rebuild)
+  python scripts/benchmark_inbolt_trt.py \
+    --trt_dir output/onnx_trt_ft \
+    --out_dir reports/inbolt_trt_benchmark
+
+✻ Cooked for 1m 56s
+
+※ recap: Built ONNX export and TRT engine compilation for FastFoundationStereo in the tao_stereo env, then created benchmark_inbolt_trt.py to compare PyTorch and TRT models on Inbolt. Run with --rebuild_trt to compile engines from the finetuned model and benchmark in one command. (disable recaps in /config)
+
+● Background command "Search for trtexec binary and tensorrt directories"
+completed (exit code 0)
+
+● That's a stale notification from the initial trtexec search at the start of
+  the session — no action needed. All TRT work is already complete.
+
+❯ /branch 
+  ⎿  Branched conversation. You are now in the branch. Use /resume 9e5eae82-b63e-4097-921a-bd15bd674371 to return to the original.
+
+❯ /rename niv_fork_for_uris_net 
+  ⎿  Session renamed to: niv_fork_for_uris_net
+
+❯ im trying to understand the concepts you used when creating this new network  
+  lets go step by step what you added to the ffs so i will understand that      
+
+● Good question to start with — let me be precise about what we actually did,
+  because we didn't change the FFS network architecture at all. The model
+  weights and math are identical. What we built is a deployment pipeline that
+  takes the existing model and makes it run faster on GPU.
+
+  Here's the outline — tell me which step to dive into first:
+
+  ---
+  Step 1 — The original model (FFS)
+  The full PyTorch model runs as one monolithic forward pass. It has two
+  conceptually distinct stages:
+  - A feature extractor (ViT backbone → multi-scale feature maps)
+  - A disparity estimator (cost volume + GRU refinement loop → upsampled
+  disparity)
+
+  Step 2 — Splitting into TrtFeatureRunner + TrtPostRunner
+  We split those two stages into separate modules. This is required because the
+  middle step (building the GWC correlation volume with Triton) isn't
+  expressible in ONNX — it has to stay in PyTorch and run between the two
+  exported parts.
+
+  Step 3 — ONNX export
+  Each part is traced with dummy inputs and serialized to the ONNX format — a
+  vendor-neutral graph representation of the computation.
+
+  Step 4 — TensorRT compilation
+  TensorRT takes each ONNX graph and compiles it into a GPU-native .engine file:
+   it fuses layers, picks the fastest CUDA kernels for this specific GPU (RTX
+  4080), and converts weights to FP16.
+
+  Step 5 — Inference at benchmark time
+  The engines expect a fixed input size (448×640), so we resize the camera
+  images, run both engines with the Triton volume step in between, then scale
+  the disparity back to original resolution for depth conversion.
+
+  ---
+  Which step do you want to unpack first?
+
+✻ Cooked for 23s
+
+❯ i think you drifted away from what i mean, you just enhanced the ffs model    
+  with rs_prior data and im trying to understand the concepts you used there    
+● Let me look at those files — they were in the repo before this session.
+
+  Read 1 file (ctrl+o to expand)
+
+● Got it, now I see exactly what you mean. The finetune_inbolt_depthrs.py adds
+  three fusion points where RealSense depth is injected into the FFS pipeline.
+  Let me walk through them one by one.
+
+  ---
+  The core idea
+
+  FFS normally only sees two IR images (left + right). This enhancement says: we
+   also have a noisy-but-free depth reading from the RS hardware sensor — let's 
+  teach the network to use it as a hint at three places in the pipeline.
+
+  ---
+  Stage 1 — Early: depth features fused into image features
+
+  DepthEncoder takes the RS depth map (normalized to [0,1]) plus a validity mask
+   (1=valid pixel, 0=missing) and runs it through a small 4-scale CNN, producing
+   depth feature maps at 1/4, 1/8, 1/16, 1/32 resolution.
+
+  DepthFusionModule then adds those depth features residually into the
+  left-image feature maps that the backbone already produced — one 1×1
+  convolution per scale, zero-initialized so at the start of training it
+  contributes nothing and the model starts from the pretrained behaviour.
+
+  Stage 2 — Mid: Gaussian prior bias on the cost volume
+
+  After the cost volume is built and aggregated, the model computes a softmax
+  over disparity bins to get an initial disparity estimate. This stage adds a
+  Gaussian "bump" centered at the disparity predicted from the RS depth
+  measurement onto the logits before that softmax — making the network's initial
+   guess gravitate toward the RS reading where it's valid. Two learned scalars
+  control this: depth_sigma (how sharp/wide the Gaussian is) and
+  depth_prior_scale (overall weight of the bias).
+
+  Stage 3 — Late: learned blend for GRU initialization
+
+  DepthInitBlend takes the stereo soft-argmax result, the RS-derived disparity
+  prior, and the validity mask, and learns a pixel-wise blend weight. Where RS
+  depth is valid and the blend weight is high, the GRU starts from the
+  RS-derived disparity rather than the pure stereo estimate. The final output
+  nn.Conv2d is bias-initialized to −5 so sigmoid(−5) ≈ 0.007 — meaning it starts
+   almost fully trusting stereo and only slowly learns to trust RS depth.
+
+  ---
+  Which stage do you want to go deeper on?
+
+✻ Brewed for 29s
+
+❯ each and every one of them, i want to understand if they are all necessary,   
+  and if they might create more noise than signal. lets deep dive into each one 
+
+● Let me go through each stage honestly — including where I think the design is
+  sound and where it could backfire.
+
+  ---
+  Stage 1 — Early: DepthEncoder + DepthFusionModule
+
+  What it does
+
+  RS depth + validity mask → small 4-scale CNN → feature maps at 1/4…1/32
+  resolution → added residually (via 1×1 conv, zero-init) into the left image 
+  feature maps from the backbone.
+
+  Why the zero-init matters
+
+  At training start, the 1×1 projections output exactly zero, so the model
+  starts from the pretrained stereo behaviour. The network has to earn the depth
+   contribution via gradient descent. This is a sound training stability trick.
+
+  The validity masking per scale
+
+  v4  = F.max_pool2d(validity, 2, 2)   # H/4
+  d4  = self.stage1(x2) * v4
+  Using max-pool on the validity mask means a coarse-scale cell is valid if any
+  fine-scale pixel in it was valid. That's intentionally conservative — better
+  to use a slightly noisy valid region than to zero out real signal.
+
+  Where it can create noise over signal
+
+  The depth features are added to the left features only. Those left features
+  then go into the cost volume — which is a cross-correlation between left and
+  right. The right features are untouched. So you're introducing an asymmetry:
+  the left side now "knows" something about depth that the right side doesn't.
+  If the RS depth is accurate, this helps the left features tell the cost volume
+   "look near this disparity". If RS depth is wrong (reflective surface,
+  occluded region, edge bleed), it corrupts the feature matching for that region
+   and the cost volume gets a misleading signal that's hard to override
+  downstream.
+
+  Verdict: This is the riskiest of the three. The benefit is real but the
+  corruption of feature-matching asymmetry is a genuine concern. Worth ablating
+  — Stage 2 alone may give most of the gain at lower risk.
+
+  ---
+  Stage 2 — Mid: Gaussian prior bias on cost volume logits
+
+  What it does
+
+  After the cost volume produces logits over disparity bins (shape B × D × H/4 ×
+   W/4), this adds a Gaussian bump centered at the disparity predicted from RS
+  depth:
+
+  d_idx      = arange(D)                          # disparity bin index
+  prior_bias = -0.5 * ((d_idx - disp_prior_14) / sigma)²
+  logits     = logits + depth_prior_scale * prior_bias
+
+  Why this is the most principled stage
+
+  It's a proper Bayesian-style prior injection. The logits are log-probabilities
+   (before softmax), so adding a Gaussian in log-space is exactly equivalent to
+  multiplying the stereo probability distribution by a Gaussian prior — which is
+   the textbook way to combine two uncertain measurements. The stereo evidence
+  can still dominate if it's strong; the prior only matters when stereo evidence
+   is weak (textureless regions, bad lighting).
+
+  The two learned scalars
+
+  - depth_sigma (init=4.0 disparity units at 1/4 scale): controls how tight the
+  prior is. Too small → a slightly wrong RS reading creates a sharp misleading
+  spike. Too large → the prior is so diffuse it does nothing.
+  - depth_prior_scale (init=0.1): overall weight. Starts near-zero, network
+  learns how much to trust RS globally.
+
+  Where it can create noise
+
+  RS depth suffers from mixed-pixel / edge bleed — at depth discontinuities the
+  sensor averages foreground and background, giving a reading that belongs to
+  neither. In those pixels the Gaussian bump is centered at a phantom disparity,
+   and if depth_sigma has learned to be tight, it actively suppresses the
+  correct bin. The validity mask doesn't help here — the pixel is valid, just
+  wrong.
+
+  Verdict: The cleanest and lowest-risk stage. This one is almost certainly
+  worth keeping.
+
+  ---
+  Stage 3 — Late: DepthInitBlend (GRU initialisation)
+
+  What it does
+
+  Takes three inputs at 1/4 resolution: the stereo soft-argmax stereo_init, the
+  RS-derived disparity disp_prior_14, and the validity mask. A small 3-layer CNN
+   predicts a per-pixel blend weight w = sigmoid(...), and outputs:
+
+  blend_w * disp_prior + (1 - blend_w) * stereo_init
+
+  The last conv is bias-init to −5 → sigmoid(−5) ≈ 0.007 at the start, so the
+  GRU initialises almost entirely from the stereo soft-argmax.
+
+  The claimed benefit
+
+  The GRU is iterative (8 iterations here). A better initialisation means it
+  needs fewer iterations to converge, and reduces the risk of getting stuck at
+  the wrong disparity. RS depth, even noisy, can cut across textureless regions
+  where stereo's soft-argmax is genuinely confused.
+
+  The redundancy problem
+
+  This is where I'd push back hardest. After Stage 2, stereo_init is already 
+  biased toward the RS prior — the Gaussian bump in logits has already nudged
+  the soft-argmax in that direction. Stage 3 then takes that already-biased
+  estimate and blends it again with the raw RS disparity. In the good case (RS
+  correct, Stage 2 worked) you've over-counted the RS signal twice. In the bad
+  case (RS wrong at an edge), the GRU starts from the wrong disparity and has to
+   spend all 8 iterations fighting its way back to the correct answer — the
+  iterative mechanism that's supposed to be robust to bad init is now working
+  against you.
+
+  The other risk: local minima
+
+  RAFT-style GRUs are known to be fairly robust to initialisation within a 
+  reasonable range. But "reasonable" here means within a few disparity units. If
+   a RS edge-bleed pixel pushes the init 30 disparity units off, 8 iterations
+  may genuinely not be enough to recover.
+
+  Verdict: Most questionable of the three. Partially redundant with Stage 2, and
+   introduces GRU convergence risk. If you had to cut one stage, cut this one
+  first and see if loss changes at all.
+
+  ---
+  Summary table
+
+  ┌────────────────┬──────────────────┬───────────────────────┬────────────┐
+  │     Stage      │    Mechanism     │         Risk          │   Worth    │
+  │                │                  │                       │  keeping?  │
+  ├────────────────┼──────────────────┼───────────────────────┼────────────┤
+  │                │ Residual depth   │ Corrupts              │            │
+  │ 1 —            │ features → left  │ feature-matching      │ Ablate     │
+  │ DepthEncoder   │ image features   │ asymmetry if RS is    │ first      │
+  │                │                  │ wrong                 │            │
+  ├────────────────┼──────────────────┼───────────────────────┼────────────┤
+  │ 2 — Gaussian   │ Adds prior bump  │ Edge-bleed pixels get │ Almost     │
+  │ prior          │ to cost-volume   │  tight wrong prior    │ certainly  │
+  │                │ logits           │                       │ yes        │
+  ├────────────────┼──────────────────┼───────────────────────┼────────────┤
+  │ 3 —            │ Blends stereo    │ Redundant with Stage  │ Ablate     │
+  │ DepthInitBlend │ init with RS     │ 2; GRU local minima   │ second     │
+  │                │ disparity        │ risk                  │            │
+  └────────────────┴──────────────────┴───────────────────────┴────────────┘
+
+  The clean experiment to run: train three variants — Stage 2 only, Stages 1+2,
+  all three — and compare validation loss. My guess is Stage 2 alone gets you
+  80%+ of the gain.
+
+✻ Cooked for 55s
+
+❯ two follow up questions- 1. is it possible that using just one would yeild    
+  superior results? 2. this model uses all 192 disparities?                     
+
+● Two sharp questions. Let me take them in order.
+
+  ---
+  1. Could using just one stage yield superior results?
+
+  Yes, genuinely possible — and Stage 2 alone is my best candidate to outperform
+   all three combined. Here's why.
+
+  The double-counting problem
+
+  After Stage 2 biases the soft-argmax toward the RS prior, stereo_init is
+  already a mix of stereo evidence and RS prior. Stage 3 then takes that
+  already-biased estimate and blends it again with disp_prior. If you expand the
+   algebra:
+
+  stereo_init  ≈ α·disp_prior + (1-α)·pure_stereo        # after Stage 2
+  final_init    = β·disp_prior + (1-β)·stereo_init
+               = (β + (1-β)·α)·disp_prior + (1-β)·(1-α)·pure_stereo
+
+  The effective weight on the RS prior is higher than either stage intended.
+  When RS is correct this is fine; when RS is wrong at an edge, both stages
+  compound the error into the GRU starting point.
+
+  The limited data problem
+
+  The Inbolt dataset isn't large. Three fusion stages means three sets of new
+  parameters competing for gradient signal. With less data, a single well-placed
+   module generalises better than three weakly-constrained ones trying to
+  cooperate.
+
+  The asymmetry problem in Stage 1
+
+  Adding depth features to left-only features while right features stay
+  unchanged creates an input mismatch the cost volume wasn't designed to handle.
+   If Stage 1 learned to encode RS noise, the cross-correlation degrades in a
+  way Stage 2 and 3 can't fully compensate. Stage 2 doesn't have this problem —
+  it operates after features are already matched.
+
+  What to run: three training runs — Stage 2 only, Stages 2+3, all three — same
+  epochs, same data split. My prediction is Stage 2 alone wins or ties on the
+  test set.
+
+  ---
+  2. Does this model use all 192 disparities?
+
+  Not quite — and this is an important architectural point.
+
+  The cost volume is built at 1/4 resolution with max_disp // 4 = 48 bins. Each
+  bin represents a 4-pixel step in disparity at full resolution, covering the
+  range 0–192. So the explicit search is 48 coarse bins, not 192 individual
+  values.
+
+  The RS Gaussian prior in Stage 2 operates over these same 48 bins:
+
+  d_idx        = torch.arange(D)          # 0 … 47
+  disp_prior_14 = disp_prior / 4.0        # convert to 1/4-scale units
+  prior_bias   = -0.5 * ((d_idx - disp_prior_14) / sigma)²
+
+  With sigma initialised at 4.0 (= 4 bins = 16 full-resolution pixels), the
+  prior is fairly broad. It's not pinning the network to a single disparity —
+  it's weighting a neighbourhood.
+
+  The GRU then refines continuously from the soft-argmax initialisation, so
+  sub-bin and sub-pixel accuracy is still reachable. The 48-bin cost volume just
+   determines where the GRU starts from.
+
+  The efficiency implication you might be heading toward
+
+  Because we have the RS prior, we could potentially build a much smaller cost
+  volume — say, only 16 bins centered around the RS-predicted disparity for each
+   pixel — rather than always computing all 48. That would cut cost-volume
+  memory and compute significantly, and Stage 2's Gaussian would collapse to a
+  much tighter prior over a smaller range.
+
+  The current implementation doesn't do this — it still builds the full 48-bin
+  volume and just biases the logits. That's leaving performance on the table:
+  you pay the full compute cost but don't use the RS prior to reduce the search
+  space, only to reweight it after the fact.
+
+  Is that the direction you were thinking?
+
+✻ Churned for 1m 17s
\ No newline at end of file
diff --git a/weights/23-36-37/cfg.yaml b/weights/23-36-37/cfg.yaml
new file mode 100644
index 0000000..eac5ad7
--- /dev/null
+++ b/weights/23-36-37/cfg.yaml
@@ -0,0 +1,12 @@
+corr_levels: 2
+corr_radius: 4
+hidden_dims:
+- 128
+low_memory: 0
+max_disp: 416
+mixed_precision: true
+n_downsample: 2
+n_gru_layers: 1
+slow_fast_gru: false
+valid_iters: 8
+vit_size: vitl
diff --git a/weights/weights_niv/model_best_bp2_serialize.pth b/weights/weights_niv/model_best_bp2_serialize.pth
new file mode 100644
index 0000000..86c6749
Binary files /dev/null and b/weights/weights_niv/model_best_bp2_serialize.pth differ
diff --git a/weights/weights_niv/stage3_best.pt b/weights/weights_niv/stage3_best.pt
new file mode 100644
index 0000000..fca0704
Binary files /dev/null and b/weights/weights_niv/stage3_best.pt differ