From 30bd3d403ae165d853e8035937a6993409af769d Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 13 Jan 2026 11:10:19 -0800 Subject: [PATCH 1/2] Benchmark for sharedmemory operations Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/benches/benchmarks.rs | 59 ++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 70b0a0416..3b47781fb 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -23,12 +23,13 @@ use std::sync::{Arc, Barrier, Mutex}; use std::thread; use std::time::{Duration, Instant}; -use criterion::{Criterion, criterion_group, criterion_main}; +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use flatbuffers::FlatBufferBuilder; use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; use hyperlight_common::flatbuffer_wrappers::function_types::{ParameterValue, ReturnType}; use hyperlight_common::flatbuffer_wrappers::util::estimate_flatbuffer_capacity; use hyperlight_host::GuestBinary; +use hyperlight_host::mem::shared_mem::ExclusiveSharedMemory; use hyperlight_host::sandbox::{MultiUseSandbox, SandboxConfiguration, UninitializedSandbox}; use hyperlight_testing::sandbox_sizes::{LARGE_HEAP_SIZE, MEDIUM_HEAP_SIZE, SMALL_HEAP_SIZE}; use hyperlight_testing::{c_simple_guest_as_string, simple_guest_as_string}; @@ -492,6 +493,59 @@ fn sample_workloads_benchmark(c: &mut Criterion) { group.finish(); } +// ============================================================================ +// Benchmark Category: Shared Memory Operations +// ============================================================================ + +fn shared_memory_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("shared_memory"); + + let sizes: &[(usize, &str)] = &[(1024 * 1024, "1MB"), (64 * 1024 * 1024, "64MB")]; + + for &(size, name) in sizes { + group.throughput(Throughput::Bytes(size as u64)); + + // Benchmark fill + group.bench_with_input(BenchmarkId::new("fill", name), &size, |b, &size| { + let eshm = ExclusiveSharedMemory::new(size).unwrap(); + let (mut hshm, _) = eshm.build(); + b.iter(|| { + hshm.fill(0xAB, 0, size).unwrap(); + }); + }); + + // Benchmark copy_to_slice (read from shared memory) + group.bench_with_input( + BenchmarkId::new("copy_to_slice", name), + &size, + |b, &size| { + let eshm = ExclusiveSharedMemory::new(size).unwrap(); + let (hshm, _) = eshm.build(); + let mut dst = vec![0u8; size]; + b.iter(|| { + hshm.copy_to_slice(&mut dst, 0).unwrap(); + }); + }, + ); + + // Benchmark copy_from_slice (write to shared memory) + group.bench_with_input( + BenchmarkId::new("copy_from_slice", name), + &size, + |b, &size| { + let eshm = ExclusiveSharedMemory::new(size).unwrap(); + let (hshm, _) = eshm.build(); + let src = vec![0xCDu8; size]; + b.iter(|| { + hshm.copy_from_slice(&src, 0).unwrap(); + }); + }, + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); @@ -501,6 +555,7 @@ criterion_group! { snapshots_benchmark, guest_call_benchmark_large_param, function_call_serialization_benchmark, - sample_workloads_benchmark + sample_workloads_benchmark, + shared_memory_benchmark } criterion_main!(benches); From 7de43ecca28ace801c5df097682319bc22dcb740 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 13 Jan 2026 11:21:42 -0800 Subject: [PATCH 2/2] Optimize shared memory operations Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Change u64 to u128 Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/benches/benchmarks.rs | 1 + src/hyperlight_host/src/mem/shared_mem.rs | 331 +++++++++++++++++++++- 2 files changed, 323 insertions(+), 9 deletions(-) diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 3b47781fb..7dc51d299 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -524,6 +524,7 @@ fn shared_memory_benchmark(c: &mut Criterion) { let mut dst = vec![0u8; size]; b.iter(|| { hshm.copy_to_slice(&mut dst, 0).unwrap(); + std::hint::black_box(&dst); }); }, ); diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index 824cfde04..4d04dbe30 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -17,6 +17,7 @@ limitations under the License. use std::any::type_name; use std::ffi::c_void; use std::io::Error; +use std::mem::{align_of, size_of}; #[cfg(target_os = "linux")] use std::ptr::null_mut; use std::sync::{Arc, RwLock}; @@ -783,12 +784,39 @@ impl HostSharedMemory { .lock .try_read() .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; - // todo: replace with something a bit more optimized + correct - for (i, b) in slice.iter_mut().enumerate() { + + const CHUNK: usize = size_of::(); + let len = slice.len(); + let mut i = 0; + + // Handle unaligned head bytes until we reach u128 alignment. + // Note: align_offset can return usize::MAX if alignment is impossible. + // In that case, head_len = len via .min(), so we fall back to byte-by-byte + // operations for the entire slice. + let align_offset = base.align_offset(align_of::()); + let head_len = align_offset.min(len); + while i < head_len { + unsafe { + slice[i] = base.add(i).read_volatile(); + } + i += 1; + } + + // Read aligned u128 chunks + while i + CHUNK <= len { + let value = unsafe { (base.add(i) as *const u128).read_volatile() }; + slice[i..i + CHUNK].copy_from_slice(&value.to_ne_bytes()); + i += CHUNK; + } + + // Handle remaining tail bytes + while i < len { unsafe { - *b = base.wrapping_add(i).read_volatile(); + slice[i] = base.add(i).read_volatile(); } + i += 1; } + drop(guard); Ok(()) } @@ -802,12 +830,51 @@ impl HostSharedMemory { .lock .try_read() .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; - // todo: replace with something a bit more optimized + correct - for (i, b) in slice.iter().enumerate() { + + const CHUNK: usize = size_of::(); + let len = slice.len(); + let mut i = 0; + + // Handle unaligned head bytes until we reach u128 alignment. + // Note: align_offset can return usize::MAX if alignment is impossible. + // In that case, head_len = len via .min(), so we fall back to byte-by-byte + // operations for the entire slice. + let align_offset = base.align_offset(align_of::()); + let head_len = align_offset.min(len); + while i < head_len { + unsafe { + base.add(i).write_volatile(slice[i]); + } + i += 1; + } + + // Write aligned u128 chunks + while i + CHUNK <= len { + let chunk: [u8; CHUNK] = slice[i..i + CHUNK].try_into().map_err(|_| { + new_error!( + "Failed to convert slice to fixed-size array for u128 chunk: \ + expected length {}, got {} (total slice len {}, offset {})", + CHUNK, + slice[i..i + CHUNK].len(), + len, + i, + ) + })?; + let value = u128::from_ne_bytes(chunk); + unsafe { + (base.add(i) as *mut u128).write_volatile(value); + } + i += CHUNK; + } + + // Handle remaining tail bytes + while i < len { unsafe { - base.wrapping_add(i).write_volatile(*b); + base.add(i).write_volatile(slice[i]); } + i += 1; } + drop(guard); Ok(()) } @@ -821,10 +888,40 @@ impl HostSharedMemory { .lock .try_read() .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; - // todo: replace with something a bit more optimized + correct - for i in 0..len { - unsafe { base.wrapping_add(i).write_volatile(value) }; + + const CHUNK: usize = size_of::(); + let value_u128 = u128::from_ne_bytes([value; CHUNK]); + let mut i = 0; + + // Handle unaligned head bytes until we reach u128 alignment. + // Note: align_offset can return usize::MAX if alignment is impossible. + // In that case, head_len = len via .min(), so we fall back to byte-by-byte + // operations for the entire slice. + let align_offset = base.align_offset(align_of::()); + let head_len = align_offset.min(len); + while i < head_len { + unsafe { + base.add(i).write_volatile(value); + } + i += 1; + } + + // Write aligned u128 chunks + while i + CHUNK <= len { + unsafe { + (base.add(i) as *mut u128).write_volatile(value_u128); + } + i += CHUNK; + } + + // Handle remaining tail bytes + while i < len { + unsafe { + base.add(i).write_volatile(value); + } + i += 1; } + drop(guard); Ok(()) } @@ -1137,6 +1234,222 @@ mod tests { assert_eq!(data, ret_vec); } + /// Tests for the optimized aligned memory operations. + /// These tests verify that the u128 chunk optimization works correctly + /// for various alignment scenarios and buffer sizes. + mod alignment_tests { + use super::*; + + const CHUNK_SIZE: usize = 16; // size_of::() + + /// Test copy operations with all possible starting alignment offsets (0-15) + #[test] + fn copy_with_various_alignments() { + // Use a buffer large enough to test all alignment cases + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (hshm, _) = eshm.build(); + + // Test all 16 possible alignment offsets (0 through 15) + for start_offset in 0..CHUNK_SIZE { + let test_len = 64; // Enough to cover head, aligned chunks, and tail + let test_data: Vec = (0..test_len).map(|i| (i + start_offset) as u8).collect(); + + // Write data at the given offset + hshm.copy_from_slice(&test_data, start_offset).unwrap(); + + // Read it back + let mut read_buf = vec![0u8; test_len]; + hshm.copy_to_slice(&mut read_buf, start_offset).unwrap(); + + assert_eq!( + test_data, read_buf, + "Mismatch at alignment offset {}", + start_offset + ); + } + } + + /// Test copy operations with lengths smaller than chunk size (< 16 bytes) + #[test] + fn copy_small_lengths() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (hshm, _) = eshm.build(); + + for len in 0..CHUNK_SIZE { + let test_data: Vec = (0..len).map(|i| i as u8).collect(); + + hshm.copy_from_slice(&test_data, 0).unwrap(); + + let mut read_buf = vec![0u8; len]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + + assert_eq!(test_data, read_buf, "Mismatch for length {}", len); + } + } + + /// Test copy operations with lengths that don't align to chunk boundaries + #[test] + fn copy_non_aligned_lengths() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (hshm, _) = eshm.build(); + + // Test lengths like 17, 31, 33, 47, 63, 65, etc. + let test_lengths = [17, 31, 33, 47, 63, 65, 100, 127, 129, 255, 257]; + + for &len in &test_lengths { + let test_data: Vec = (0..len).map(|i| (i % 256) as u8).collect(); + + hshm.copy_from_slice(&test_data, 0).unwrap(); + + let mut read_buf = vec![0u8; len]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + + assert_eq!(test_data, read_buf, "Mismatch for length {}", len); + } + } + + /// Test copy with exactly one chunk (16 bytes) + #[test] + fn copy_exact_chunk_size() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (hshm, _) = eshm.build(); + + let test_data: Vec = (0..CHUNK_SIZE).map(|i| i as u8).collect(); + + hshm.copy_from_slice(&test_data, 0).unwrap(); + + let mut read_buf = vec![0u8; CHUNK_SIZE]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + + assert_eq!(test_data, read_buf); + } + + /// Test fill with various alignment offsets + #[test] + fn fill_with_various_alignments() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (mut hshm, _) = eshm.build(); + + for start_offset in 0..CHUNK_SIZE { + let fill_len = 64; + let fill_value = (start_offset % 256) as u8; + + // Clear memory first + hshm.fill(0, 0, mem_size).unwrap(); + + // Fill at the given offset + hshm.fill(fill_value, start_offset, fill_len).unwrap(); + + // Read it back and verify + let mut read_buf = vec![0u8; fill_len]; + hshm.copy_to_slice(&mut read_buf, start_offset).unwrap(); + + assert!( + read_buf.iter().all(|&b| b == fill_value), + "Fill mismatch at alignment offset {}", + start_offset + ); + } + } + + /// Test fill with lengths smaller than chunk size + #[test] + fn fill_small_lengths() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (mut hshm, _) = eshm.build(); + + for len in 0..CHUNK_SIZE { + let fill_value = 0xAB; + + hshm.fill(0, 0, mem_size).unwrap(); // Clear + hshm.fill(fill_value, 0, len).unwrap(); + + let mut read_buf = vec![0u8; len]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + + assert!( + read_buf.iter().all(|&b| b == fill_value), + "Fill mismatch for length {}", + len + ); + } + } + + /// Test fill with non-aligned lengths + #[test] + fn fill_non_aligned_lengths() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (mut hshm, _) = eshm.build(); + + let test_lengths = [17, 31, 33, 47, 63, 65, 100, 127, 129, 255, 257]; + + for &len in &test_lengths { + let fill_value = 0xCD; + + hshm.fill(0, 0, mem_size).unwrap(); // Clear + hshm.fill(fill_value, 0, len).unwrap(); + + let mut read_buf = vec![0u8; len]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + + assert!( + read_buf.iter().all(|&b| b == fill_value), + "Fill mismatch for length {}", + len + ); + } + } + + /// Test edge cases: length 0 and length 1 + #[test] + fn copy_edge_cases() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (hshm, _) = eshm.build(); + + // Length 0 + let empty: Vec = vec![]; + hshm.copy_from_slice(&empty, 0).unwrap(); + let mut read_buf: Vec = vec![]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + assert!(read_buf.is_empty()); + + // Length 1 + let single = vec![0x42u8]; + hshm.copy_from_slice(&single, 0).unwrap(); + let mut read_buf = vec![0u8; 1]; + hshm.copy_to_slice(&mut read_buf, 0).unwrap(); + assert_eq!(single, read_buf); + } + + /// Test combined: unaligned start + non-aligned length + #[test] + fn copy_unaligned_start_and_length() { + let mem_size: usize = 4096; + let eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let (hshm, _) = eshm.build(); + + // Start at offset 7 (unaligned), length 37 (not a multiple of 16) + let start_offset = 7; + let len = 37; + let test_data: Vec = (0..len).map(|i| (i * 3) as u8).collect(); + + hshm.copy_from_slice(&test_data, start_offset).unwrap(); + + let mut read_buf = vec![0u8; len]; + hshm.copy_to_slice(&mut read_buf, start_offset).unwrap(); + + assert_eq!(test_data, read_buf); + } + } + /// A test to ensure that, if a `SharedMem` instance is cloned /// and _all_ clones are dropped, the memory region will no longer /// be valid.