diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index d8a31cb..7650f4a 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -432,9 +432,7 @@ def __len__(self): return len(self.indices) def _preload_all(self) -> None: - device = torch.device( - "cuda" if self.preload_mode == "cuda" else "cpu" - ) + device = torch.device("cuda" if self.preload_mode == "cuda" else "cpu") self._preload_cache = {} for idx in self.indices: base = self._load_base_sample(idx) @@ -518,9 +516,7 @@ def _convert_to_tensors( self, sample: Dict[str, Any], device: torch.device ) -> Dict[str, Optional[torch.Tensor]]: tensor_sample: Dict[str, Optional[torch.Tensor]] = {} - tensor_sample["image"] = ( - torch.from_numpy(sample["image"]).float().to(device) - ) + tensor_sample["image"] = torch.from_numpy(sample["image"]).float().to(device) tensor_sample["depth"] = ( torch.from_numpy(sample["depth"]).float().to(device) if sample["depth"] is not None diff --git a/examples/preprocess/hloc_utils.py b/examples/preprocess/hloc_utils.py index b067472..87e637e 100644 --- a/examples/preprocess/hloc_utils.py +++ b/examples/preprocess/hloc_utils.py @@ -22,8 +22,11 @@ import sys import shutil from pathlib import Path -from typing import Any, List, Literal, Tuple, Set +from typing import Any, List, Literal, Optional, Tuple, Set +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation from enum import Enum # Import Enum since CameraModel extends it from rich.console import Console @@ -44,6 +47,57 @@ class CameraModel(Enum): CONSOLE = Console(width=120) +PANO_CONFIG = { + "fov": 100.0, + "views": [ + (0.0, 0.0), + (90.0, 0.0), + (180.0, 0.0), + (-90.0, 0.0), + (0.0, 90.0), + ], +} + + +def get_rig_rotations() -> List[np.ndarray]: + """Return panorama rig rotations defined in PANO_CONFIG.""" + + rotations: List[np.ndarray] = [] + for yaw, pitch in PANO_CONFIG["views"]: + rot = Rotation.from_euler("XY", [-pitch, -yaw], degrees=True).as_matrix() + rotations.append(rot) + return rotations + + +def create_pano_rig_config(ref_idx: int = 0): + """Create a pycolmap RigConfig describing the five-view panorama setup.""" + + try: + import pycolmap + except ImportError: + return None + + cams_from_pano = get_rig_rotations() + rig_cameras = [] + for idx, cam_from_pano_R in enumerate(cams_from_pano): + if idx == ref_idx: + cam_from_rig = None + else: + cam_from_ref_R = cam_from_pano_R @ cams_from_pano[ref_idx].T + cam_from_rig = pycolmap.Rigid3d( + pycolmap.Rotation3d(cam_from_ref_R), np.zeros(3) + ) + + rig_cameras.append( + pycolmap.RigConfigCamera( + ref_sensor=(idx == ref_idx), + image_prefix=f"pano_camera{idx}/", + cam_from_rig=cam_from_rig, + ) + ) + return pycolmap.RigConfig(cameras=rig_cameras) + + MODEL_FILENAMES = ( "cameras.bin", "images.bin", @@ -147,6 +201,8 @@ def run_hloc( num_matched: int = 50, refine_pixsfm: bool = False, use_single_camera_mode: bool = True, + is_panorama: bool = False, + enable_gpu_ba: bool = False, ) -> None: """Runs hloc on the images. @@ -161,6 +217,7 @@ def run_hloc( num_matched: Number of image pairs for loc. refine_pixsfm: If True, refine the reconstruction using pixel-perfect-sfm. use_single_camera_mode: If True, uses one camera for all frames. Otherwise uses one camera per frame. + enable_gpu_ba: Whether to enable GPU bundle adjustment in pycolmap. """ try: @@ -199,6 +256,16 @@ def run_hloc( ) sys.exit(1) + if is_panorama and camera_model not in ( + CameraModel.PINHOLE, + CameraModel.SIMPLE_PINHOLE, + ): + CONSOLE.print( + "[bold yellow]⚠️ Panorama mode currently supports PINHOLE/SIMPLE_PINHOLE only." + " Forcing camera_model=PINHOLE.[/bold yellow]" + ) + camera_model = CameraModel.PINHOLE + outputs = colmap_dir sfm_pairs = outputs / "pairs-netvlad.txt" features = outputs / "features.h5" @@ -213,22 +280,34 @@ def run_hloc( feature_conf = extract_features.confs[feature_type] # type: ignore matcher_conf = match_features.confs[matcher_type] # type: ignore - references = [p.relative_to(image_dir).as_posix() for p in image_dir.iterdir()] + valid_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"} + references = sorted( + [ + p.relative_to(image_dir).as_posix() + for p in image_dir.rglob("*") + if p.is_file() and p.suffix.lower() in valid_extensions + ] + ) extract_features.main(feature_conf, image_dir, image_list=references, feature_path=features) # type: ignore if matching_method == "exhaustive": - pairs_from_exhaustive.main(sfm_pairs, image_list=references) # type: ignore + pairs_from_exhaustive.main( # type: ignore + sfm_pairs, + image_list=references, + groupby_folder=is_panorama, + ) elif matching_method == "sequential": # Use sequential matching with specified parameters retrieval_path = extract_features.main(retrieval_conf, image_dir, outputs) pairs_from_sequential.main( output=sfm_pairs, image_list=references, - window_size=6, + window_size=8, quadratic_overlap=True, use_loop_closure=True, retrieval_path=retrieval_path, retrieval_interval=2, - num_loc=5, + num_loc=3, + groupby_folder=is_panorama, ) else: retrieval_path = extract_features.main(retrieval_conf, image_dir, outputs) # type: ignore @@ -237,8 +316,88 @@ def run_hloc( match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches) # type: ignore image_options = pycolmap.ImageReaderOptions(camera_model=camera_model.value) # type: ignore + mapper_opts: Optional[Any] = None + rig_config = None + + # Configure IncrementalPipelineOptions following current pycolmap API. + try: + mapper_opts_candidate = pycolmap.IncrementalPipelineOptions() + except Exception: + mapper_opts_candidate = None + + if mapper_opts_candidate is not None: + mapper_opts = mapper_opts_candidate + if hasattr(mapper_opts, "ba_use_gpu"): + mapper_opts.ba_use_gpu = enable_gpu_ba + if enable_gpu_ba and hasattr(mapper_opts, "ba_gpu_index"): + mapper_opts.ba_gpu_index = "0" + + if enable_gpu_ba: + local_getter = getattr(mapper_opts, "get_local_bundle_adjustment", None) + global_getter = getattr(mapper_opts, "get_global_bundle_adjustment", None) + if callable(local_getter) and callable(global_getter): + for ba_opts in (local_getter(), global_getter()): + ba_opts.use_gpu = True + ba_opts.gpu_index = "0" + ba_opts.min_num_images_gpu_solver = 50 + elif all( + hasattr(mapper_opts, attr) + for attr in ("ba_local_bundle_options", "ba_global_bundle_options") + ): + for attr in ("ba_local_bundle_options", "ba_global_bundle_options"): + ba_opts = getattr(mapper_opts, attr) + ba_opts.use_gpu = True + ba_opts.gpu_index = "0" + ba_opts.min_num_images_gpu_solver = 50 + else: + CONSOLE.print( + "[bold yellow]⚠️ Unable to locate bundle adjustment accessors on pycolmap " + "IncrementalPipelineOptions; GPU BA settings will not be customized.[/bold yellow]" + ) + else: + mapper_opts = {} + + def _assign_mapper_option(option_name: str, value: Any) -> None: + nonlocal mapper_opts + if mapper_opts is None: + mapper_opts = {} + if isinstance(mapper_opts, dict): + mapper_opts[option_name] = value + else: + setattr(mapper_opts, option_name, value) + + if is_panorama: + camera_mode = pycolmap.CameraMode.PER_FOLDER # type: ignore + CONSOLE.print( + "[bold green]ℹ️ Creating Panorama Rig Configuration...[/bold green]" + ) + rig_config = create_pano_rig_config() + + if references: + first_img_path = image_dir / references[0] + img = cv2.imread(str(first_img_path)) + if img is None: + raise RuntimeError(f"Cannot read calibration image: {first_img_path}") + h, w = img.shape[:2] + hfov_deg = PANO_CONFIG["fov"] + hfov_rad = np.deg2rad(hfov_deg) + focal = w / (2 * np.tan(hfov_rad / 2)) + cx, cy = w / 2.0 - 0.5, h / 2.0 - 0.5 + + if camera_model == CameraModel.PINHOLE: + image_options.camera_params = f"{focal},{focal},{cx},{cy}" + elif camera_model == CameraModel.SIMPLE_PINHOLE: + image_options.camera_params = f"{focal},{cx},{cy}" + + CONSOLE.print( + f"[bold green]ℹ️ Initialized Intrinsics: {w}x{h}, params={image_options.camera_params}[/bold green]" + ) - if use_single_camera_mode: # one camera per all frames + _assign_mapper_option("ba_refine_sensor_from_rig", False) + _assign_mapper_option("ba_refine_focal_length", True) + _assign_mapper_option("ba_refine_principal_point", False) + _assign_mapper_option("ba_refine_extra_params", False) + elif use_single_camera_mode: # one camera per all frames camera_mode = pycolmap.CameraMode.SINGLE # type: ignore else: # one camera per frame camera_mode = pycolmap.CameraMode.PER_IMAGE # type: ignore @@ -277,6 +436,8 @@ def run_hloc( camera_mode=camera_mode, # type: ignore image_options=image_options, verbose=verbose, + mapper_options=mapper_opts or None, + rig_config=rig_config, ) try: @@ -305,7 +466,55 @@ def run_hloc( target_images_dir = project_root / "images" target_sparse_dir = project_root / "sparse" / "0" - if camera_model in [CameraModel.PINHOLE, CameraModel.SIMPLE_PINHOLE]: + if is_panorama: + CONSOLE.print( + "[bold green]ℹ️ Processing Panorama: Flattening directory structure for 3DGS...[/bold green]" + ) + + if target_images_dir.exists(): + shutil.rmtree(target_images_dir) + target_images_dir.mkdir(parents=True, exist_ok=True) + + if target_sparse_dir.exists(): + shutil.rmtree(target_sparse_dir) + target_sparse_dir.mkdir(parents=True, exist_ok=True) + + name_map: dict[str, str] = {} + src_images = sorted( + [ + p + for p in image_dir.rglob("*") + if p.is_file() and p.suffix.lower() in valid_extensions + ] + ) + CONSOLE.print(f" > Flattening and copying {len(src_images)} images...") + for src_path in src_images: + rel_path = src_path.relative_to(image_dir) + new_name = rel_path.as_posix().replace("/", "__") + dst_path = target_images_dir / new_name + shutil.copy2(src_path, dst_path) + name_map[rel_path.as_posix()] = new_name + + CONSOLE.print(" > Updating COLMAP model...") + try: + recon = pycolmap.Reconstruction(sfm_dir) + for image in recon.images.values(): + if image.name in name_map: + image.name = name_map[image.name] + else: + CONSOLE.print( + f"[bold yellow]Warning: Model image {image.name} not found in source folders.[/bold yellow]" + ) + recon.write(target_sparse_dir) + CONSOLE.print( + f"[bold green]✅ Panorama data ready at {target_images_dir}[/bold green]" + ) + except Exception as err: + CONSOLE.print( + f"[bold red]❌ Failed to update panorama model: {err}[/bold red]" + ) + + elif camera_model in [CameraModel.PINHOLE, CameraModel.SIMPLE_PINHOLE]: CONSOLE.print( f"[bold green]ℹ️ Camera model is {camera_model.name}. Skipping undistortion.[/bold green]" ) diff --git a/examples/preprocess/run_hloc_sfm.py b/examples/preprocess/run_hloc_sfm.py index e274081..c686b47 100644 --- a/examples/preprocess/run_hloc_sfm.py +++ b/examples/preprocess/run_hloc_sfm.py @@ -26,17 +26,22 @@ from concurrent.futures import ThreadPoolExecutor from pathlib import Path +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation from tqdm import tqdm -from hloc_utils import run_hloc, CameraModel +from hloc_utils import run_hloc, CameraModel, PANO_CONFIG -def copy_images_fast(image_dir: Path, image_prefix: str = "frame_") -> Path: - # Copy assets into distorted/images so the pipeline runs entirely inside distorted - new_dir = image_dir.parent / "distorted" / "images" - if new_dir.exists(): - shutil.rmtree(new_dir) - new_dir.mkdir(parents=True, exist_ok=True) +def copy_images_fast( + image_dir: Path, output_root: Path, image_prefix: str = "frame_" +) -> Path: + """Copy original images into the provided distorted/images directory.""" + + if output_root.exists(): + shutil.rmtree(output_root) + output_root.mkdir(parents=True, exist_ok=True) image_exts = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"] image_paths = sorted( @@ -46,11 +51,11 @@ def copy_images_fast(image_dir: Path, image_prefix: str = "frame_") -> Path: if not image_paths: raise RuntimeError(f"No images found in {image_dir}") - print(f"📁 Copying {len(image_paths)} images to {new_dir} ...") + print(f"📁 Copying {len(image_paths)} images to {output_root} ...") def copy_one(idx_path): idx, src_path = idx_path - dst_path = new_dir / f"{image_prefix}{idx:05d}{src_path.suffix.lower()}" + dst_path = output_root / f"{image_prefix}{idx:05d}{src_path.suffix.lower()}" shutil.copy2(src_path, dst_path) with ThreadPoolExecutor(max_workers=16) as ex: @@ -63,7 +68,114 @@ def copy_one(idx_path): ) print("✅ Copy finished.") - return new_dir + return output_root + + +def get_pano_virtual_configs(): + """Return rotations and FOV based on shared PANO_CONFIG.""" + + hfov_deg = PANO_CONFIG["fov"] + vfov_deg = PANO_CONFIG["fov"] + rots = [] + for yaw, pitch in PANO_CONFIG["views"]: + rot = Rotation.from_euler("XY", [-pitch, -yaw], degrees=True).as_matrix() + rots.append(rot) + + return rots, hfov_deg, vfov_deg + + +def split_panoramas( + image_dir: Path, + output_root: Path, + image_prefix: str = "frame_", + downscale: float = 1.0, +) -> Path: + """Split each panorama into five perspective views and save by camera index.""" + + if output_root.exists(): + shutil.rmtree(output_root) + output_root.mkdir(parents=True, exist_ok=True) + + image_exts = [".jpg", ".jpeg", ".png", ".bmp", ".tiff"] + pano_paths = sorted( + [p for p in image_dir.iterdir() if p.suffix.lower() in image_exts] + ) + if not pano_paths: + raise RuntimeError(f"No images found in {image_dir}") + + print( + f"🧩 Detected {len(pano_paths)} panoramas. Splitting into perspective views..." + ) + print(f"📂 Output Directory: {output_root}") + + first_img = cv2.imread(str(pano_paths[0])) + if first_img is None: + raise RuntimeError(f"Cannot read image: {pano_paths[0]}") + pano_h, pano_w = first_img.shape[:2] + + rots, hfov, vfov = get_pano_virtual_configs() + w_virt_raw = int(pano_w * hfov / 360) + h_virt_raw = int(pano_h * vfov / 180) + w_virt = max(1, int(w_virt_raw / downscale)) + h_virt = max(1, int(h_virt_raw / downscale)) + print(f"📏 Resolution: {w_virt}x{h_virt} (Downscale factor: {downscale})") + focal = w_virt / (2 * np.tan(np.deg2rad(hfov) / 2)) + + cx, cy = w_virt / 2 - 0.5, h_virt / 2 - 0.5 + y_grid, x_grid = np.indices((h_virt, w_virt)) + rays = np.stack( + [ + (x_grid - cx), + (y_grid - cy), + np.full_like(x_grid, focal, dtype=np.float32), + ], + axis=-1, + ) + rays /= np.linalg.norm(rays, axis=-1, keepdims=True) + + def process_one_pano(idx_path): + idx, src_path = idx_path + img = cv2.imread(str(src_path)) + if img is None: + return + + for cam_idx, rot in enumerate(rots): + rays_rotated = rays @ rot + x, y, z = ( + rays_rotated[..., 0], + rays_rotated[..., 1], + rays_rotated[..., 2], + ) + yaw = np.arctan2(x, z) + pitch = -np.arctan2(y, np.linalg.norm(rays_rotated[..., [0, 2]], axis=-1)) + + u = (1 + yaw / np.pi) / 2 * pano_w + v = (1 - pitch * 2 / np.pi) / 2 * pano_h + + perspective_img = cv2.remap( + img, + u.astype(np.float32), + v.astype(np.float32), + cv2.INTER_CUBIC, + borderMode=cv2.BORDER_WRAP, + ) + + sub_dir = output_root / f"pano_camera{cam_idx}" + sub_dir.mkdir(parents=True, exist_ok=True) + save_path = sub_dir / f"{image_prefix}{idx:05d}{src_path.suffix.lower()}" + cv2.imwrite(str(save_path), perspective_img) + + with ThreadPoolExecutor(max_workers=16) as ex: + list( + tqdm( + ex.map(process_one_pano, enumerate(pano_paths, start=1)), + total=len(pano_paths), + unit="pano", + ) + ) + + print("✅ Panorama splitting finished.") + return output_root def main(): @@ -89,11 +201,38 @@ def main(): parser.add_argument("--feature_type", type=str, default="superpoint_aachen") parser.add_argument("--matcher_type", type=str, default="superglue") parser.add_argument("--use_single_camera_mode", type=bool, default=True) + parser.add_argument( + "--is_panorama", + action="store_true", + help="If set, split panoramas into perspective views and exit.", + ) + parser.add_argument( + "--pano_downscale", + type=float, + default=1.0, + help="Downscale factor for panorama splitting (>=1).", + ) + parser.add_argument( + "--gpu_ba", + type=str.lower, + choices=["on", "off"], + default="off", + help="Whether to enable GPU bundle adjustment (default: off).", + ) args = parser.parse_args() + distorted_images_dir = args.input_image_dir.parent / "distorted" / "images" + enable_gpu_ba = args.gpu_ba == "on" - # Step 1: copy and standardize images (with progress bar) - working_dir = copy_images_fast(args.input_image_dir) + if args.is_panorama: + print("\n🔄 Mode: Panorama Processing") + working_dir = split_panoramas( + args.input_image_dir, + distorted_images_dir, + downscale=max(1.0, args.pano_downscale), + ) + else: + working_dir = copy_images_fast(args.input_image_dir, distorted_images_dir) # Step 2: configure the output directory (distorted/colmap) colmap_dir = working_dir.parent / "colmap" @@ -105,10 +244,13 @@ def main(): image_dir=working_dir, colmap_dir=colmap_dir, camera_model=CameraModel[args.camera_model], + verbose=False, matching_method=args.matching_method, feature_type=args.feature_type, matcher_type=args.matcher_type, use_single_camera_mode=args.use_single_camera_mode, + is_panorama=args.is_panorama, + enable_gpu_ba=enable_gpu_ba, ) project_root = args.input_image_dir.parent