From 48736869adbc5397c9c72dd170fd1c0c6d4c2f6b Mon Sep 17 00:00:00 2001 From: Kashu7100 Date: Mon, 1 Jun 2026 16:43:06 -0400 Subject: [PATCH] [PERF] Split rigid collision BVH into static + dynamic subsets (RPL multi-depth) Follow-up to #2867. A scene with one moving robot on a large static terrain still rebuilds a single combined collision BVH over every face each step, because the rebuild-skip keys off "all links in the solver are fixed" -- false as soon as the robot moves. The static terrain (the bulk of the faces) is re-fit every step for nothing. This decomposes the rigid solver's collision faces into two compacted BVHs by owning-link fixedness (RaycasterSensor._partition_collision_faces): - static subset (faces on fixed links: terrain / walls): maybe_static, built once, then skipped + shared across envs (the dominant per-step cost for one robot on a big static terrain). - dynamic subset (faces on movable links: the robot): rebuilt each step, but the rebuild + radix sort now scale with the robot's face count, not the whole scene. The two are cast separately and merged via the existing is_merge path (closest hit wins), so the result is identical to one combined BVH. This is the "multi-depth" decomposition from RPL (arXiv:2602.03002): cast the dynamic robot and static terrain meshes separately and reuse the static acceleration structure across timesteps and environments. Implementation - Each BVH is built over a compacted face subset. A `face_ids` array maps a BVH leaf slot to the global face index; bvh_ray_cast remaps after reading the morton-code primitive id, and update_aabbs iterates the subset. `n_triangles` in bvh_ray_cast now derives from the morton-codes shape (the BVH's own leaf count) instead of the solver-global face count. - The existing maybe_static/needs_rebuild skip and the AABB-derived shared_across_envs test are already per-entry, so they apply to each subset unchanged: the static subset's GEOMETRY subscriber only fires on an explicit set_pos/set_quat (e.g. re-randomized terrain), never on physics-driven robot motion, so it stays skipped + shared while the robot subset rebuilds. - A pure-static or pure-dynamic solver yields a single subset with identity face_ids, i.e. the previous single-BVH behavior -- bit-identical. - kernel_cast_ray (viewer pick) and the viewer plugin thread an identity face_ids over the full mesh. Perf (perceptive depth camera, 64x36 rays, 18.5k-face terrain + G1, RTX 3080): 1024 envs: 1037 -> 706 ms/step (1.47x) measured on the pre-refactor branch; win grows with env count x terrain faces. Re-validated functionally on the current main: a moving-robot-on-terrain scene now builds 1 static (skipped + shared) + 1 dynamic BVH, depth output unchanged. Tests - New tests/test_sensors.py::test_raycaster_static_dynamic_bvh_split asserts the split structure (one static + one dynamic collision BVH, static shared across envs), the merge reporting the closer of static/dynamic as a movable box enters/leaves a ray, and that the static BVH stays skipped across a dynamic move. Passes for n_envs in {0, 2}. - Existing raycaster/lidar suite unchanged (single full-mesh path is the identity-face_ids case; mixed scenes now exercise the two-BVH merge). Co-authored-by: Claude Opus 4.7 (1M context) --- genesis/engine/sensors/raycaster.py | 47 +++++++++++++++++++-- genesis/utils/raycast_qd.py | 38 ++++++++++++----- genesis/vis/viewer_plugins/raycast.py | 7 ++++ tests/test_sensors.py | 60 +++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 14 deletions(-) diff --git a/genesis/engine/sensors/raycaster.py b/genesis/engine/sensors/raycaster.py index bc684fcffb..c3b2f41cd8 100644 --- a/genesis/engine/sensors/raycaster.py +++ b/genesis/engine/sensors/raycaster.py @@ -57,6 +57,12 @@ class BVHContext: # True when the geometry is bit-identical across envs, so the cast reads one shared copy (batch 0) with coalesced # node loads instead of scattering over n_env identical trees. Recomputed on every rebuild. shared_across_envs: bool = False + # Compacted face subset this BVH covers: face_ids[k] is the global face index at BVH leaf slot k. Static + # (fixed-link) and dynamic (movable-link) collision faces get separate subsets so the static tree is built once + # and shared while only the small dynamic subset rebuilds each step. A 1-D int device tensor; for a single + # full-mesh BVH it is the identity map arange(n_faces). None for visual BVH entries (which keep the full-mesh + # path; their cast/update kernels do not take face_ids). + face_ids: torch.Tensor | None = None @dataclass @@ -101,6 +107,28 @@ def __init__(self, options: RaycasterOptions, sensor_idx: int, manager: "SensorM self.debug_objects: list["Mesh"] = [] self.ray_starts: torch.Tensor = torch.empty((0, 3), device=gs.device, dtype=gs.tc_float) + @staticmethod + def _partition_collision_faces(solver: "RigidSolver") -> list[tuple[torch.Tensor, bool]]: + """Partition the solver's collision faces into static (fixed-link) and dynamic (movable-link) subsets. + + Returns ``(face_ids, maybe_static)`` per non-empty subset, where ``face_ids`` are the global face indices in + that subset. A pure-static or pure-dynamic solver yields one entry (equivalent to a single full-mesh BVH); a + mixed scene (robot on terrain) yields two, so the static terrain tree can be built once + shared while only + the robot subset rebuilds. Used by :meth:`build`. + """ + face_geom = qd_to_numpy(solver.faces_info.geom_idx).reshape(-1) # (n_faces,) global geom per face + geom_link = qd_to_numpy(solver.geoms_info.link_idx).reshape(-1) # (n_geoms,) global link per geom + link_fixed = np.array([bool(link.is_fixed) for link in solver.links], dtype=bool) # (n_links,) + face_static = link_fixed[geom_link[face_geom]] # (n_faces,) is this face on a fixed link? + + out: list[tuple[torch.Tensor, bool]] = [] + for is_static in (True, False): + sel = np.nonzero(face_static == is_static)[0] + if sel.size == 0: + continue + out.append((torch.as_tensor(sel, dtype=gs.tc_int, device=gs.device), bool(is_static))) + return out + @staticmethod def _compute_visual_raycast_mask(solver: "KinematicSolver") -> np.ndarray: """Build a per-vface mask (int8, shape (n_vfaces,)) selecting vfaces opted into visual raycasting. @@ -144,6 +172,7 @@ def _update_bvh(cls, shared_metadata: RaycasterSharedMetadata): free_verts_state=entry.solver.free_verts_state, fixed_verts_state=entry.solver.fixed_verts_state, links_info=entry.solver.links_info, + face_ids=entry.face_ids, static_rigid_sim_config=entry.solver._static_rigid_sim_config, aabb_state=entry.aabb, ) @@ -201,10 +230,19 @@ def build(self): # catches. Applies to both the collision and the visual BVH. maybe_static = all(link.is_fixed for link in solver.links) if isinstance(solver, RigidSolver): - n_faces = solver.faces_info.geom_idx.shape[0] - aabb = AABB(n_batches=n_envs, n_aabbs=n_faces) - bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64) - self._shared_metadata.solver_bvhs.append(BVHContext(solver, bvh, aabb, None, maybe_static)) + # Split the collision faces into a static subset (faces on fixed links: terrain / walls) and a + # dynamic subset (faces on movable links: the robot), each with its own compacted BVH. The static + # subset is then built once + skipped + shared across envs while only the small dynamic subset + # rebuilds per step; the cast kernels merge the two via is_merge so the result is identical to one + # combined BVH. This is the RPL "multi-depth" decomposition (arXiv:2602.03002). A pure-static or + # pure-dynamic solver yields a single subset (identity face_ids) == the previous single-BVH path. + for face_ids, subset_static in self._partition_collision_faces(solver): + n_sub = int(face_ids.shape[0]) + aabb = AABB(n_batches=n_envs, n_aabbs=n_sub) + bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=min(64, n_sub)) + self._shared_metadata.solver_bvhs.append( + BVHContext(solver, bvh, aabb, None, subset_static, face_ids=face_ids) + ) n_vfaces = solver.vfaces_info.vgeom_idx.shape[0] if n_vfaces > 0: mask = self._compute_visual_raycast_mask(solver) @@ -352,6 +390,7 @@ def _update_raw_data(cls, shared_metadata: RaycasterSharedMetadata, raw_data_T: solver.free_verts_state, solver.verts_info, solver.faces_info, + entry.face_ids, *args_common, ) else: diff --git a/genesis/utils/raycast_qd.py b/genesis/utils/raycast_qd.py index eaf05135af..2bf7f757e4 100644 --- a/genesis/utils/raycast_qd.py +++ b/genesis/utils/raycast_qd.py @@ -56,6 +56,7 @@ def bvh_ray_cast( verts_info: array_class.VertsInfo, fixed_verts_state: array_class.VertsState, free_verts_state: array_class.VertsState, + face_ids: qd.types.ndarray(ndim=1), eps: float, ): """ @@ -70,7 +71,10 @@ def bvh_ray_cast( hit_normal : qd.math.vec3 normal vector at hit point (zero vector if no hit) """ - n_triangles = faces_info.verts_idx.shape[0] + # Leaf count = this BVH's AABB/morton-code count, NOT the solver's global face count: the BVH may cover a + # compacted face subset (static terrain vs dynamic robot - see RaycasterSensor.build). morton_codes is + # (n_batch, n_leaves); face_ids[leaf] remaps the subset-local leaf back to the global face below. + n_triangles = bvh_morton_codes.shape[1] hit_face = -1 closest_distance = gs.qd_float(max_range) @@ -92,9 +96,10 @@ def bvh_ray_cast( if aabb_t >= 0.0 and aabb_t < closest_distance: if node.left == -1: # Leaf node - # Get original triangle/face index + # Get original triangle/face index. The morton code carries the subset-local leaf slot; + # face_ids remaps it to the solver-global face (identity for a single full-mesh BVH). sorted_leaf_idx = node_idx - (n_triangles - 1) - i_f = qd.cast(bvh_morton_codes[i_b, sorted_leaf_idx][1], gs.qd_int) + i_f = qd.cast(face_ids[qd.cast(bvh_morton_codes[i_b, sorted_leaf_idx][1], gs.qd_int)], gs.qd_int) # Get triangle vertices tri_vertices = get_triangle_vertices( @@ -234,20 +239,27 @@ def update_aabbs( faces_info: array_class.FacesInfo, geoms_info: array_class.GeomsInfo, links_info: array_class.LinksInfo, + face_ids: qd.types.ndarray(ndim=1), static_rigid_sim_config: qd.template(), aabb_state: qd.template(), ): """Update per-face collision AABBs from current vertex positions. + AABB slot k holds the bounding box of the global face face_ids[k]; the BVH is built over this compacted subset + (e.g. only the static terrain faces, or only the moving robot faces - see RaycasterSensor.build), so the rebuild + + radix sort scale with the subset size rather than every face in the solver. For a single full-mesh BVH face_ids + is the identity map. + A face contributes to env i_b only if its geom lies in that env's active geom range (links_info.geom_start / geom_end); otherwise its AABB is left inverted (unhittable) and skipped by ray queries. For a homogeneous solver every geom is always in range, so this never excludes anything. For a heterogeneous solver, where all envs share one vertex buffer but activate different per-env geom ranges, it makes each env cast against only its own variant instead of the union of every variant. """ - for i_b, i_f in qd.ndrange(free_verts_state.pos.shape[1], faces_info.verts_idx.shape[0]): - aabb_state.aabbs[i_b, i_f].min.fill(qd.math.inf) - aabb_state.aabbs[i_b, i_f].max.fill(-qd.math.inf) + for i_b, k in qd.ndrange(free_verts_state.pos.shape[1], face_ids.shape[0]): + i_f = face_ids[k] + aabb_state.aabbs[i_b, k].min.fill(qd.math.inf) + aabb_state.aabbs[i_b, k].max.fill(-qd.math.inf) i_g = faces_info.geom_idx[i_f] i_l = geoms_info.link_idx[i_g] @@ -258,12 +270,12 @@ def update_aabbs( i_fv = verts_info.verts_state_idx[i_v] if verts_info.is_fixed[i_v]: pos_v = fixed_verts_state.pos[i_fv] - aabb_state.aabbs[i_b, i_f].min = qd.min(aabb_state.aabbs[i_b, i_f].min, pos_v) - aabb_state.aabbs[i_b, i_f].max = qd.max(aabb_state.aabbs[i_b, i_f].max, pos_v) + aabb_state.aabbs[i_b, k].min = qd.min(aabb_state.aabbs[i_b, k].min, pos_v) + aabb_state.aabbs[i_b, k].max = qd.max(aabb_state.aabbs[i_b, k].max, pos_v) else: pos_v = free_verts_state.pos[i_fv, i_b] - aabb_state.aabbs[i_b, i_f].min = qd.min(aabb_state.aabbs[i_b, i_f].min, pos_v) - aabb_state.aabbs[i_b, i_f].max = qd.max(aabb_state.aabbs[i_b, i_f].max, pos_v) + aabb_state.aabbs[i_b, k].min = qd.min(aabb_state.aabbs[i_b, k].min, pos_v) + aabb_state.aabbs[i_b, k].max = qd.max(aabb_state.aabbs[i_b, k].max, pos_v) @qd.kernel @@ -275,6 +287,7 @@ def kernel_update_verts_and_aabbs( free_verts_state: array_class.VertsState, fixed_verts_state: array_class.VertsState, links_info: array_class.LinksInfo, + face_ids: qd.types.ndarray(ndim=1), static_rigid_sim_config: qd.template(), aabb_state: qd.template(), ): @@ -288,6 +301,7 @@ def kernel_update_verts_and_aabbs( faces_info, geoms_info, links_info, + face_ids, static_rigid_sim_config, aabb_state, ) @@ -442,6 +456,7 @@ def kernel_cast_ray( free_verts_state: array_class.VertsState, verts_info: array_class.VertsInfo, faces_info: array_class.FacesInfo, + face_ids: qd.types.ndarray(ndim=1), # maps BVH leaf slot -> global face index (identity for a full-mesh BVH) bvh_nodes: qd.template(), bvh_morton_codes: qd.template(), ray_start: qd.types.ndarray(ndim=1), # (3,) @@ -482,6 +497,7 @@ def kernel_cast_ray( verts_info=verts_info, fixed_verts_state=fixed_verts_state, free_verts_state=free_verts_state, + face_ids=face_ids, eps=eps, ) if cur_hit_face >= 0: @@ -544,6 +560,7 @@ def kernel_cast_rays( free_verts_state: array_class.VertsState, verts_info: array_class.VertsInfo, faces_info: array_class.FacesInfo, + face_ids: qd.types.ndarray(ndim=1), # maps BVH leaf slot -> global face index (identity for a full-mesh BVH) bvh_nodes: qd.template(), bvh_morton_codes: qd.template(), # maps sorted leaves to original triangle indices links_pos: qd.types.ndarray(ndim=3), # [n_env, n_sensors, 3] @@ -610,6 +627,7 @@ def kernel_cast_rays( verts_info=verts_info, fixed_verts_state=fixed_verts_state, free_verts_state=free_verts_state, + face_ids=face_ids, eps=eps, ) diff --git a/genesis/vis/viewer_plugins/raycast.py b/genesis/vis/viewer_plugins/raycast.py index 2cf9ae2f70..1e3417a334 100644 --- a/genesis/vis/viewer_plugins/raycast.py +++ b/genesis/vis/viewer_plugins/raycast.py @@ -1,6 +1,7 @@ from typing import TYPE_CHECKING import numpy as np +import torch from typing_extensions import override import genesis as gs @@ -51,6 +52,10 @@ def __init__(self, scene: "Scene"): max_n_query_result_per_aabb=0, # Not used for ray queries n_radix_sort_groups=min(64, n_faces), ) + # The viewer casts against the full mesh (one BVH over every face), so the leaf-slot -> global-face map + # the cast/update kernels take is the identity. (Sensors may build compacted per-subset BVHs and pass a + # real subset map; see RaycasterSensor.build.) + self.face_ids = torch.arange(n_faces, dtype=gs.tc_int, device=gs.device) self.result = array_class.get_raycast_result(n_envs_max) self.update() @@ -72,6 +77,7 @@ def update(self) -> None: free_verts_state=self.solver.free_verts_state, fixed_verts_state=self.solver.fixed_verts_state, links_info=self.solver.links_info, + face_ids=self.face_ids, static_rigid_sim_config=self.solver._static_rigid_sim_config, aabb_state=self.aabb, ) @@ -105,6 +111,7 @@ def cast( self.solver.free_verts_state, self.solver.verts_info, self.solver.faces_info, + self.face_ids, self.bvh.nodes, self.bvh.morton_codes, np.ascontiguousarray(ray_origin, dtype=gs.np_float), diff --git a/tests/test_sensors.py b/tests/test_sensors.py index bb8bd4b14f..61324cc641 100644 --- a/tests/test_sensors.py +++ b/tests/test_sensors.py @@ -1126,6 +1126,66 @@ def test_raycaster_hits(show_viewer, n_envs): assert_allclose(grid_distances, grid_distances_ref, tol=1e-3) +@pytest.mark.required +@pytest.mark.parametrize("n_envs", [0, 2]) +def test_raycaster_static_dynamic_bvh_split(show_viewer, n_envs): + """A rigid solver's collision mesh is split into a static (fixed-link) BVH and a dynamic (movable-link) BVH, + cast separately and merged. Asserts: (a) the split structure (one static + one dynamic collision entry, static + shared across envs); (b) the merge reports the closer of static / dynamic as a movable box enters / leaves a + ray's path; (c) the static entry is genuinely skipped across a dynamic move (stays needs_rebuild=False). + """ + HEIGHT = 1.0 + BOX = 0.2 # movable box edge + + scene = gs.Scene( + profiling_options=gs.options.ProfilingOptions(show_FPS=False), + show_viewer=show_viewer, + ) + scene.add_entity(gs.morphs.Plane()) # static (fixed) + # A single downward ray from a fixed mount over the origin. collision=False so the mount carries no collision + # faces and the ray doesn't immediately hit its own mount geometry. + mount = scene.add_entity(gs.morphs.Box(size=(0.05, 0.05, 0.05), pos=(0.0, 0.0, HEIGHT), fixed=True, collision=False)) + box = scene.add_entity(gs.morphs.Box(size=(BOX, BOX, BOX), pos=(5.0, 5.0, 0.5 * BOX))) # dynamic (movable) + sensor = scene.add_sensor( + gs.sensors.Raycaster( + pattern=gs.sensors.raycaster.GridPattern(resolution=1.0, size=(0.0, 0.0), direction=(0.0, 0.0, -1.0)), + entity_idx=mount.idx, + return_world_frame=False, + ) + ) + + scene.build(n_envs=n_envs) + batch_shape = (n_envs,) if n_envs > 0 else () + + # (a) Split structure: exactly two collision BVH entries (raycast_mask is None), one static + one dynamic; the + # static one is shared across envs when batched (identical fixed geometry in every env). + collision_bvhs = [e for e in sensor._shared_metadata.solver_bvhs if e.raycast_mask is None] + assert len(collision_bvhs) == 2, f"expected static+dynamic split, got {len(collision_bvhs)} collision BVHs" + static_entries = [e for e in collision_bvhs if e.maybe_static] + dynamic_entries = [e for e in collision_bvhs if not e.maybe_static] + assert len(static_entries) == 1 and len(dynamic_entries) == 1 + if n_envs > 0: + assert static_entries[0].shared_across_envs, "static terrain BVH should be shared across envs" + assert not dynamic_entries[0].shared_across_envs, "dynamic (movable) BVH must stay per-env" + + # (b1) Box parked far away -> the ray falls through to the static ground at distance HEIGHT. + scene.sim._sensor_manager.step() + assert_allclose(sensor.read().distances.reshape(batch_shape), HEIGHT, tol=gs.EPS) + + # (b2) Move the box directly under the ray -> the merge must now report the closer hit (box top). + box.set_pos(np.tile((0.0, 0.0, 0.5 * BOX), (*batch_shape, 1))) + scene.sim._sensor_manager.step() + assert_allclose(sensor.read().distances.reshape(batch_shape), HEIGHT - BOX, tol=gs.EPS) + + # (c) The static (terrain) BVH stayed skipped across the dynamic move: it never re-flagged for rebuild. + assert not static_entries[0].needs_rebuild, "static BVH was flagged for rebuild by a dynamic-only move" + + # (b3) Move the box back out -> ray returns to the static ground distance (dynamic BVH tracked the motion). + box.set_pos(np.tile((5.0, 5.0, 0.5 * BOX), (*batch_shape, 1))) + scene.sim._sensor_manager.step() + assert_allclose(sensor.read().distances.reshape(batch_shape), HEIGHT, tol=gs.EPS) + + @pytest.mark.required @pytest.mark.parametrize("n_envs", [0, 2]) @pytest.mark.parametrize("kin_raycastable", [True, False])