diff --git a/Cargo.lock b/Cargo.lock index 025a246b82..81264e4bc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5893,6 +5893,7 @@ dependencies = [ "memory_range", "mesh", "pal_async", + "pal_event", "parking_lot", "pci_bus", "pci_core", diff --git a/Guide/src/reference/devices/firmware/linux_direct.md b/Guide/src/reference/devices/firmware/linux_direct.md index b7954297ca..3151db3aa5 100644 --- a/Guide/src/reference/devices/firmware/linux_direct.md +++ b/Guide/src/reference/devices/firmware/linux_direct.md @@ -55,9 +55,9 @@ OpenVMM synthesizes a minimal set of EFI structures in guest memory: 2. **EFI Memory Map** — describes the EFI metadata region, ACPI tables, and conventional RAM. 3. **ACPI Tables** — FADT (with `HW_REDUCED_ACPI`), MADT (GIC distributor, GICv3 - redistributors or GICv2 CPU interfaces, optional v2m MSI frame), GTDT + redistributors or GICv2 CPU interfaces, GICv3 ITS or v2m MSI frame), GTDT (virtual timer), DSDT (VMBus, serial UARTs), and optionally MCFG/SSDT for - PCIe. + PCIe and IORT for PCIe interrupt routing via the ITS. A **stub device tree** is then built. Unlike a full device tree, it contains no hardware nodes — no CPUs, GIC, timer, or devices. Its only purpose is a diff --git a/Guide/src/reference/emulated/pcie/overview.md b/Guide/src/reference/emulated/pcie/overview.md index 50e8a6d13f..84b19f2da7 100644 --- a/Guide/src/reference/emulated/pcie/overview.md +++ b/Guide/src/reference/emulated/pcie/overview.md @@ -77,6 +77,26 @@ hotplug, PME, AER, and other PCIe features rather than ACPI-based fallbacks. Linux assumes native control regardless, but Windows requires `_OSC` to enable native hotplug. +### MSI Interrupt Routing (aarch64) + +On aarch64, PCIe MSI/MSI-X interrupts are routed through either +a GICv3 ITS or a GICv2m MSI frame, depending on the platform: + +- **GICv3 ITS** (default on KVM with GICv3): The VMM creates a + KVM in-kernel ITS device. Each PCIe device gets a 32-bit + device ID composed as `(segment << 16) | BDF`, injected + transparently by per-device wrappers in the interrupt path. + ACPI boots emit an IORT with an ITS Group node and per-root- + complex ID mappings. The device tree includes an `its` child + node under the GIC with `msi-controller`. + +- **GICv2m**: MSI writes map to a fixed pool of 64 SPIs via + a v2m doorbell register. The MADT includes a GICv2m MSI + frame entry. + +The MSI controller can be overridden with the `--gic-msi` +CLI option (`auto`, `its`, or `v2m`). + ### Implementation notes ```admonish note title="No Command Completed support" diff --git a/openhcl/bootloader_fdt_parser/src/lib.rs b/openhcl/bootloader_fdt_parser/src/lib.rs index 3623f84013..302292b48b 100644 --- a/openhcl/bootloader_fdt_parser/src/lib.rs +++ b/openhcl/bootloader_fdt_parser/src/lib.rs @@ -537,7 +537,7 @@ fn parse_gic(node: &Node<'_>) -> anyhow::Result { gic_version: vm_topology::processor::aarch64::GicVersion::V3 { redistributors_base: reg[2], }, - gic_v2m: None, + gic_msi: vm_topology::processor::aarch64::GicMsiController::None, pmu_gsiv: None, // TODO: parse from the DT timer node instead of hardcoding. virt_timer_ppi: 20, @@ -1078,7 +1078,7 @@ mod tests { gic_version: vm_topology::processor::aarch64::GicVersion::V3 { redistributors_base: 0x20000, }, - gic_v2m: None, + gic_msi: vm_topology::processor::aarch64::GicMsiController::None, pmu_gsiv: Some(0x17), virt_timer_ppi: 20, gic_nr_irqs: 992, diff --git a/openhcl/virt_mshv_vtl/src/lib.rs b/openhcl/virt_mshv_vtl/src/lib.rs index 60ffabb807..aa6b3d77a1 100755 --- a/openhcl/virt_mshv_vtl/src/lib.rs +++ b/openhcl/virt_mshv_vtl/src/lib.rs @@ -1314,7 +1314,7 @@ struct UhInterruptTarget { } impl pci_core::msi::SignalMsi for UhInterruptTarget { - fn signal_msi(&self, _rid: u32, address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.partition .request_msi(self.vtl, MsiRequest { address, data }); } diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index 079a7273d4..db6f43d360 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -492,6 +492,7 @@ impl ExtractTopologyConfig for ProcessorTopology { Some(gsiv) => PmuGsivConfig::Gsiv(gsiv), None => PmuGsivConfig::Disabled, }, + gic_msi: Default::default(), })), } } @@ -504,6 +505,8 @@ impl BuildTopology for ProcessorTopologyConfig { platform_info: &virt::PlatformInfo, ) -> anyhow::Result> { use vm_topology::processor::aarch64::Aarch64PlatformConfig; + use vm_topology::processor::aarch64::GicItsInfo; + use vm_topology::processor::aarch64::GicMsiController; use vm_topology::processor::aarch64::GicV2mInfo; let arch = match &self.arch { @@ -511,11 +514,7 @@ impl BuildTopology for ProcessorTopologyConfig { Some(ArchTopologyConfig::Aarch64(arch)) => arch.clone(), _ => anyhow::bail!("invalid architecture config"), }; - let gic_v2m = Some(GicV2mInfo { - frame_base: openvmm_defs::config::DEFAULT_GIC_V2M_MSI_FRAME_BASE, - spi_base: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_BASE, - spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT, - }); + let pmu_gsiv = match arch.pmu_gsiv { PmuGsivConfig::Disabled => None, PmuGsivConfig::Gsiv(gsiv) => Some(gsiv), @@ -585,10 +584,39 @@ impl BuildTopology for ProcessorTopologyConfig { } }; + // Use the ITS for MSI delivery when the backend supports it + // (KVM with GICv3). Otherwise fall back to GICv2m (SPI-based MSIs). + use openvmm_defs::config::GicMsiConfig; + let is_gicv2 = matches!(gic_version, GicVersion::V2 { .. }); + let use_its = match arch.gic_msi { + GicMsiConfig::Auto => platform_info.supports_its && !is_gicv2, + GicMsiConfig::Its => { + if is_gicv2 { + anyhow::bail!("ITS is incompatible with GICv2"); + } + if !platform_info.supports_its { + anyhow::bail!("ITS requested but the hypervisor does not support it"); + } + true + } + GicMsiConfig::V2m => false, + }; + let gic_msi = if use_its { + GicMsiController::Its(GicItsInfo { + its_base: openvmm_defs::config::DEFAULT_GIC_ITS_BASE, + }) + } else { + GicMsiController::V2m(GicV2mInfo { + frame_base: openvmm_defs::config::DEFAULT_GIC_V2M_MSI_FRAME_BASE, + spi_base: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_BASE, + spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT, + }) + }; + let platform = Aarch64PlatformConfig { gic_distributor_base, gic_version, - gic_v2m, + gic_msi, pmu_gsiv, virt_timer_ppi: openvmm_defs::config::DEFAULT_VIRT_TIMER_PPI, gic_nr_irqs: openvmm_defs::config::DEFAULT_GIC_NR_IRQS, @@ -1810,8 +1838,46 @@ impl InitializedVm { (pcie_host_bridges, pcie_root_complexes) }; + // Build a port-name→(segment, bus_range) map covering all ports in + // the PCIe topology (root complex ports and switch downstream ports). + // The segment is used for ITS device ID composition; the bus_range is + // a shared atomic that the config space emulator updates when the + // guest programs secondary/subordinate bus numbers. + struct PortInfo { + segment: u16, + bus_range: pcie::bus_range::AssignedBusRange, + } + let mut port_info: std::collections::HashMap, PortInfo> = + std::collections::HashMap::new(); + for (hb, rc) in pcie_host_bridges.iter().zip(pcie_root_complexes.iter()) { + for p in rc.lock().downstream_ports() { + if let Some(_existing) = port_info.insert( + p.name.clone(), + PortInfo { + segment: hb.segment, + bus_range: p.bus_range, + }, + ) { + anyhow::bail!("duplicate PCIe port name '{}'", p.name); + } + } + } + for switch in cfg.pcie_switches { let device_name = format!("pcie-switch:{}", switch.name); + + // Inherit the segment from the switch's parent port. + let parent_segment = port_info + .get(switch.parent_port.as_str()) + .ok_or_else(|| { + anyhow::anyhow!( + "switch '{}' parent port '{}' not found in any root complex", + switch.name, + switch.parent_port + ) + })? + .segment; + let switch_device = chipset_builder .arc_mutex_device(device_name) .on_pcie_port(vmotherboard::BusId::new(&switch.parent_port)) @@ -1824,6 +1890,20 @@ impl InitializedVm { GenericPcieSwitch::new(definition) })?; + // Query the switch's actual downstream port names instead of + // reconstructing them from the naming convention. + for p in switch_device.lock().downstream_ports() { + if let Some(_existing) = port_info.insert( + p.name.clone(), + PortInfo { + segment: parent_segment, + bus_range: p.bus_range, + }, + ) { + anyhow::bail!("duplicate PCIe port name '{}'", p.name); + } + } + let bus_id = vmotherboard::BusId::new(&switch.name); chipset_builder.register_weak_mutex_pcie_enumerator(bus_id, Box::new(switch_device)); } @@ -1846,7 +1926,23 @@ impl InitializedVm { Some(handle) }; + // Determine whether ITS wrappers are needed for PCIe MSI delivery. + // Only aarch64 VMs configured with a GICv3 ITS need device ID + // injection; all other configurations pass through directly. + #[cfg(guest_arch = "aarch64")] + let use_its = matches!( + processor_topology.gic_msi(), + vm_topology::processor::aarch64::GicMsiController::Its(_) + ); + #[cfg(not(guest_arch = "aarch64"))] + let use_its = false; + // Resolve PCIe devices concurrently. + // + // Each port's ConfigSpaceType1Emulator owns an AssignedBusRange + // (Arc). We clone it into the ITS wrappers so that when + // the guest programs bus numbers, the emulator writes the new values + // and the ITS wrapper reads them at interrupt delivery time. try_join_all(cfg.pcie_devices.into_iter().map(|dev_cfg| { let chipset_builder = &chipset_builder; let driver_source = &driver_source; @@ -1854,18 +1950,53 @@ impl InitializedVm { let gm = &gm; let partition = &partition; let mapper = &mapper; + let port_info = &port_info; async move { + let port_name: Arc = dev_cfg.port_name.into(); + let pi = port_info.get(&port_name).ok_or_else(|| { + anyhow::anyhow!( + "device port '{}' not found in any root complex or switch", + port_name + ) + })?; + + // When ITS is active, wrap the partition's SignalMsi + // and IrqFd to inject the device identity. Otherwise + // pass through directly. + let signal_msi = partition.as_signal_msi(Vtl::Vtl0).map(|s| { + if use_its { + Arc::new(pcie::its::ItsSignalMsi::new( + s, + pi.bus_range.clone(), + pi.segment, + )) as Arc + } else { + s + } + }); + let irqfd = partition.irqfd().map(|fd| { + if use_its { + Arc::new(pcie::its::ItsIrqFd::new( + fd, + pi.bus_range.clone(), + pi.segment, + )) as Arc + } else { + fd + } + }); + vmm_core::device_builder::build_pcie_device( chipset_builder, - dev_cfg.port_name.into(), + port_name.clone(), driver_source, resolver, gm, dev_cfg.resource, partition.clone().into_doorbell_registration(Vtl::Vtl0), Some(mapper), - partition.as_signal_msi(Vtl::Vtl0), - partition.irqfd(), + signal_msi, + irqfd, ) .await } @@ -2872,19 +3003,59 @@ impl LoadedVm { } VmRpc::AddPcieDevice(rpc) => { rpc.handle_failable(async |(port_name, resource)| { - // Validate the port exists before creating the device - // to avoid leaking a DynamicDeviceUnit on error. - let rc = self.inner.pcie_root_complexes.iter() - .find(|rc| { - rc.lock().downstream_ports().iter().any(|(_, name)| name.as_ref() == port_name.as_str()) + // Find the root complex and its index for the named port. + let (rc_idx, rc) = self.inner.pcie_root_complexes.iter() + .enumerate() + .find(|(_, rc)| { + rc.lock().downstream_ports().iter().any(|p| p.name.as_ref() == port_name.as_str()) }) .ok_or_else(|| anyhow::anyhow!("port '{}' not found in any root complex", port_name))?; - let msi_conn = match self.inner.partition.irqfd() { + #[cfg(guest_arch = "aarch64")] + let use_its = matches!( + self.inner.processor_topology.gic_msi(), + vm_topology::processor::aarch64::GicMsiController::Its(_) + ); + #[cfg(not(guest_arch = "aarch64"))] + let use_its = false; + + // Get the bus_range from the port's config space emulator. + let bus_range = rc.lock() + .downstream_ports() + .into_iter() + .find(|p| p.name.as_ref() == port_name.as_str()) + .expect("port was just found above") + .bus_range; + + let signal_msi = self.inner.partition.as_signal_msi(Vtl::Vtl0).map(|s| { + if use_its { + let segment = self.inner.pcie_host_bridges[rc_idx].segment; + Arc::new(pcie::its::ItsSignalMsi::new( + s, + bus_range.clone(), + segment, + )) as Arc + } else { + s + } + }); + let irqfd = self.inner.partition.irqfd().map(|fd| { + if use_its { + let segment = self.inner.pcie_host_bridges[rc_idx].segment; + Arc::new(pcie::its::ItsIrqFd::new( + fd, + bus_range.clone(), + segment, + )) as Arc + } else { + fd + } + }); + + let msi_conn = match irqfd { Some(fd) => pci_core::msi::MsiConnection::with_irqfd(fd), None => pci_core::msi::MsiConnection::new(), }; - let signal_msi = self.inner.partition.as_signal_msi(Vtl::Vtl0); let (unit, device) = self.inner.chipset_devices.add_dyn_device( &self.inner.driver_source, @@ -2957,7 +3128,7 @@ impl LoadedVm { // Find the root complex containing the target port let rc = self.inner.pcie_root_complexes.iter() .find(|rc| { - rc.lock().downstream_ports().iter().any(|(_, name)| name.as_ref() == port_name.as_str()) + rc.lock().downstream_ports().iter().any(|p| p.name.as_ref() == port_name.as_str()) }) .ok_or_else(|| anyhow::anyhow!("port '{}' not found in any root complex", port_name))?; diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index 8b6253ee4c..83589086e0 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -161,6 +161,7 @@ fn build_dt( let num_cpus = processor_topology.vps().len(); + use vm_topology::processor::aarch64::GicMsiController; use vm_topology::processor::aarch64::GicVersion; let gic_dist_base: u64 = processor_topology.gic_distributor_base(); @@ -237,6 +238,7 @@ fn build_dt( const PHANDLE_GIC: u32 = 1; const PHANDLE_APB_PCLK: u32 = 2; const PHANDLE_V2M: u32 = 3; + const PHANDLE_ITS: u32 = 4; const GIC_SPI: u32 = 0; const GIC_PPI: u32 = 1; @@ -311,8 +313,9 @@ fn build_dt( // ARM64 Generic Interrupt Controller. // GICv3 uses "arm,gic-v3"; GICv2 uses "arm,cortex-a15-gic". - // Both versions can have a v2m child for SPI-based MSIs (PCIe). - let v2m_info = processor_topology.gic_v2m(); + // GICv3 can have an ITS child for LPI-based MSIs; v2m is the + // fallback for SPI-based MSIs (GICv2 or GICv3 without ITS). + let gic_msi = processor_topology.gic_msi(); let gic_compatible = match processor_topology.gic_version() { GicVersion::V3 { .. } => "arm,gic-v3", GicVersion::V2 { .. } => "arm,cortex-a15-gic", @@ -335,8 +338,16 @@ fn build_dt( .add_null(p_interrupt_controller)? .add_u32(p_phandle, PHANDLE_GIC)? .add_null(p_ranges)?; - root_builder = if let Some(v2m) = v2m_info { - gic_node + root_builder = match gic_msi { + GicMsiController::Its(its) => gic_node + .start_node(format!("its@{:x}", its.its_base).as_str())? + .add_str(p_compatible, "arm,gic-v3-its")? + .add_null(p_msi_controller)? + .add_u64_array(p_reg, &[its.its_base, openvmm_defs::config::GIC_ITS_SIZE])? + .add_u32(p_phandle, PHANDLE_ITS)? + .end_node()? + .end_node()?, + GicMsiController::V2m(v2m) => gic_node .start_node(format!("v2m@{:x}", v2m.frame_base).as_str())? .add_str(p_compatible, "arm,gic-v2m-frame")? .add_null(p_msi_controller)? @@ -348,9 +359,8 @@ fn build_dt( .add_u32(p_arm_msi_num_spis, v2m.spi_count)? .add_u32(p_phandle, PHANDLE_V2M)? .end_node()? - .end_node()? - } else { - gic_node.end_node()? + .end_node()?, + GicMsiController::None => gic_node.end_node()?, }; // ARM64 Architectural Timer. @@ -424,7 +434,7 @@ fn build_dt( } // No interrupt-map is provided because all devices use MSIs via the - // v2m frame; legacy INTx routing is not supported. + // ITS or v2m frame; legacy INTx routing is not supported. let mut node = root_builder .start_node(name.as_str())? .add_str(p_compatible, "pci-host-ecam-generic")? @@ -439,8 +449,14 @@ fn build_dt( .add_u32(p_size_cells, 2)? .add_u32(p_interrupt_parent, PHANDLE_GIC)? .add_u32_array(p_ranges, &ranges)?; - if v2m_info.is_some() { - node = node.add_u32(p_msi_parent, PHANDLE_V2M)?; + match gic_msi { + GicMsiController::Its(_) => { + node = node.add_u32(p_msi_parent, PHANDLE_ITS)?; + } + GicMsiController::V2m(_) => { + node = node.add_u32(p_msi_parent, PHANDLE_V2M)?; + } + GicMsiController::None => {} } root_builder = node.end_node()?; } diff --git a/openvmm/openvmm_defs/src/config.rs b/openvmm/openvmm_defs/src/config.rs index 749301c0bf..c4ebf45e43 100644 --- a/openvmm/openvmm_defs/src/config.rs +++ b/openvmm/openvmm_defs/src/config.rs @@ -113,6 +113,13 @@ pub const DEFAULT_GIC_V2M_SPI_BASE: u32 = 512; /// Number of SPIs reserved for PCIe MSIs. pub const DEFAULT_GIC_V2M_SPI_COUNT: u32 = 64; +/// Base address of the GICv3 ITS MMIO region. Must be 64 KiB aligned, +/// below the v2m frame address, and not overlap other devices. +/// The region extends from this base to base + GIC_ITS_SIZE (128 KiB). +pub const DEFAULT_GIC_ITS_BASE: u64 = 0xEFFC_0000; +/// Size of the ITS MMIO region (control frame + translation frame, 2×64 KiB). +pub const GIC_ITS_SIZE: u64 = 0x2_0000; + /// Default virtual timer PPI (GIC INTID). PPI 4 = INTID 16 + 4 = 20. /// This is the EL1 virtual timer interrupt used across Hyper-V, KVM, and HVF. pub const DEFAULT_VIRT_TIMER_PPI: u32 = 20; @@ -291,10 +298,24 @@ pub enum PmuGsivConfig { Disabled, } +/// MSI controller selection for aarch64 PCIe interrupt delivery. +#[derive(Debug, Protobuf, Default, Clone)] +pub enum GicMsiConfig { + /// Automatically select the best available MSI controller: + /// ITS when the hypervisor supports it, otherwise GICv2m. + #[default] + Auto, + /// Force GICv3 ITS for MSI delivery via LPIs. + Its, + /// Force GICv2m for MSI delivery via SPIs. + V2m, +} + #[derive(Debug, Protobuf, Default, Clone)] pub struct Aarch64TopologyConfig { pub gic_config: Option, pub pmu_gsiv: PmuGsivConfig, + pub gic_msi: GicMsiConfig, } /// GIC configuration for the virtual machine. diff --git a/openvmm/openvmm_entry/src/cli_args.rs b/openvmm/openvmm_entry/src/cli_args.rs index f060fd3a8c..45803fc8ba 100644 --- a/openvmm/openvmm_entry/src/cli_args.rs +++ b/openvmm/openvmm_entry/src/cli_args.rs @@ -386,6 +386,11 @@ options: #[clap(long, default_value = "auto", value_parser = parse_x2apic)] pub x2apic: X2ApicConfig, + /// configure PCIe MSI controller for aarch64 (auto | its | v2m) + #[cfg(guest_arch = "aarch64")] + #[clap(long, default_value = "auto")] + pub gic_msi: GicMsiCli, + /// COM1 binding (console | stderr | listen=\ | file=\ (overwrites) | listen=tcp:\:\ | term[=\]\[,name=\\] | none) #[clap(long, value_name = "SERIAL")] pub com1: Option, @@ -2064,6 +2069,18 @@ pub enum Vtl0LateMapPolicyCli { Exception, } +/// PCIe MSI controller selection for aarch64. +#[derive(Debug, Copy, Clone, Default, ValueEnum)] +pub enum GicMsiCli { + /// Use ITS when available, fall back to GICv2m. + #[default] + Auto, + /// Force GICv3 ITS (LPI-based MSIs). + Its, + /// Force GICv2m (SPI-based MSIs). + V2m, +} + #[derive(Debug, Copy, Clone, ValueEnum)] pub enum IsolationCli { Vbs, diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index c83aa291d1..d32a4b7442 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -1316,6 +1316,11 @@ async fn vm_config_from_command_line( // TODO: allow this to be configured from the command line gic_config: None, pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform, + gic_msi: match opt.gic_msi { + cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto, + cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its, + cli_args::GicMsiCli::V2m => openvmm_defs::config::GicMsiConfig::V2m, + }, }, ); #[cfg(guest_arch = "x86_64")] diff --git a/tmk/tmk_vmm/src/run.rs b/tmk/tmk_vmm/src/run.rs index 6839606568..5576e18e0e 100644 --- a/tmk/tmk_vmm/src/run.rs +++ b/tmk/tmk_vmm/src/run.rs @@ -65,7 +65,7 @@ impl CommonState { gic_version: vm_topology::processor::aarch64::GicVersion::V3 { redistributors_base: 0xff020000, }, - gic_v2m: None, + gic_msi: vm_topology::processor::aarch64::GicMsiController::None, pmu_gsiv: None, virt_timer_ppi: 20, // DEFAULT_VIRT_TIMER_PPI gic_nr_irqs: 256, diff --git a/vm/acpi_spec/src/iort.rs b/vm/acpi_spec/src/iort.rs new file mode 100644 index 0000000000..d8000f1bec --- /dev/null +++ b/vm/acpi_spec/src/iort.rs @@ -0,0 +1,205 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! IORT (IO Remapping Table) types for aarch64 PCIe topology. + +use super::Table; +use crate::packed_nums::*; +use core::mem::size_of; +use static_assertions::const_assert_eq; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; +use zerocopy::Unaligned; + +pub const IORT_REVISION: u8 = 5; +pub const IORT_NODE_OFFSET: u32 = size_of::() as u32 + size_of::() as u32; + +pub const IORT_NODE_TYPE_ITS_GROUP: u8 = 0x00; +pub const IORT_NODE_TYPE_PCI_ROOT_COMPLEX: u8 = 0x02; + +pub const IORT_PCI_ROOT_COMPLEX_REVISION: u8 = 3; +pub const IORT_ITS_GROUP_REVISION: u8 = 1; + +pub const IORT_NODE_COHERENT: u32 = 0x00000001; +pub const IORT_MEMORY_ACCESS_COHERENCY: u8 = 1 << 0; +pub const IORT_MEMORY_ACCESS_ATTRIBUTES: u8 = 1 << 1; +pub const IORT_ID_SINGLE_MAPPING: u32 = 1 << 0; + +#[repr(C)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct Iort { + pub node_count: u32_ne, + pub node_offset: u32_ne, + pub reserved: u32_ne, +} + +impl Iort { + pub fn new(node_count: u32) -> Self { + Self { + node_count: node_count.into(), + node_offset: IORT_NODE_OFFSET.into(), + reserved: 0.into(), + } + } +} + +impl Table for Iort { + const SIGNATURE: [u8; 4] = *b"IORT"; +} + +const_assert_eq!(size_of::(), 12); +const_assert_eq!(IORT_NODE_OFFSET as usize, 48); + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct IortNodeHeader { + pub node_type: u8, + pub length: u16_ne, + pub revision: u8, + pub identifier: u32_ne, + pub mapping_count: u32_ne, + pub mapping_offset: u32_ne, +} + +impl IortNodeHeader { + pub fn new(node_type: u8, revision: u8, identifier: u32, mapping_count: u32) -> Self { + Self { + node_type, + length: (size_of::() as u16).into(), + revision, + identifier: identifier.into(), + mapping_count: mapping_count.into(), + mapping_offset: if mapping_count == 0 { + 0.into() + } else { + (size_of::() as u32).into() + }, + } + } +} + +const_assert_eq!(size_of::(), 16); + +#[repr(C)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct IortMemoryAccessProperties { + pub cache_coherency: u32_ne, + pub hints: u8, + pub reserved: u16_ne, + pub memory_flags: u8, +} + +impl IortMemoryAccessProperties { + pub fn coherent() -> Self { + Self { + cache_coherency: IORT_NODE_COHERENT.into(), + hints: 0, + reserved: 0.into(), + memory_flags: IORT_MEMORY_ACCESS_COHERENCY | IORT_MEMORY_ACCESS_ATTRIBUTES, + } + } +} + +const_assert_eq!(size_of::(), 8); + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct IortPciRootComplex { + pub header: IortNodeHeader, + pub memory_properties: IortMemoryAccessProperties, + pub ats_attribute: u32_ne, + pub pci_segment_number: u32_ne, + pub memory_address_limit: u8, + pub reserved: [u8; 3], +} + +impl IortPciRootComplex { + /// Create a PCI Root Complex node. The `length` field in the header + /// includes space for `mapping_count` trailing `IortIdMapping` entries, + /// which must be appended separately after serializing this struct. + pub fn new(identifier: u32, pci_segment_number: u16, mapping_count: u32) -> Self { + let mut header = IortNodeHeader::new::( + IORT_NODE_TYPE_PCI_ROOT_COMPLEX, + IORT_PCI_ROOT_COMPLEX_REVISION, + identifier, + mapping_count, + ); + // The node length must include the variable-length ID mapping array. + let total = + size_of::() as u16 + (mapping_count as u16) * size_of::() as u16; + header.length = total.into(); + Self { + header, + memory_properties: IortMemoryAccessProperties::coherent(), + ats_attribute: 0.into(), + pci_segment_number: u32::from(pci_segment_number).into(), + memory_address_limit: 64, + reserved: [0; 3], + } + } +} + +const_assert_eq!(size_of::(), 36); + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct IortIdMapping { + pub input_base: u32_ne, + pub id_count: u32_ne, + pub output_base: u32_ne, + pub output_reference: u32_ne, + pub flags: u32_ne, +} + +impl IortIdMapping { + pub fn new( + input_base: u32, + id_count: u32, + output_base: u32, + output_reference: u32, + flags: u32, + ) -> Self { + Self { + input_base: input_base.into(), + id_count: id_count.into(), + output_base: output_base.into(), + output_reference: output_reference.into(), + flags: flags.into(), + } + } +} + +const_assert_eq!(size_of::(), 20); + +/// ITS Group node. Followed by `its_count` u32 ITS identifiers. +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct IortItsGroup { + pub header: IortNodeHeader, + pub its_count: u32_ne, +} + +impl IortItsGroup { + /// Create an ITS Group node. The `length` field in the header includes + /// space for `its_count` trailing u32 ITS identifiers, which must be + /// appended separately after serializing this struct. + pub fn new(identifier: u32, its_count: u32) -> Self { + let mut header = IortNodeHeader::new::( + IORT_NODE_TYPE_ITS_GROUP, + IORT_ITS_GROUP_REVISION, + identifier, + 0, + ); + // The node length must include the variable-length ITS ID array. + let total = size_of::() as u16 + (its_count as u16) * 4; + header.length = total.into(); + Self { + header, + its_count: its_count.into(), + } + } +} + +const_assert_eq!(size_of::(), 20); diff --git a/vm/acpi_spec/src/lib.rs b/vm/acpi_spec/src/lib.rs index 0a881b984d..3c4933fe13 100644 --- a/vm/acpi_spec/src/lib.rs +++ b/vm/acpi_spec/src/lib.rs @@ -13,6 +13,7 @@ extern crate alloc; pub mod aspt; pub mod fadt; pub mod gtdt; +pub mod iort; pub mod madt; pub mod mcfg; pub mod pptt; diff --git a/vm/acpi_spec/src/madt.rs b/vm/acpi_spec/src/madt.rs index 63574a5bf5..3c140b1d4a 100644 --- a/vm/acpi_spec/src/madt.rs +++ b/vm/acpi_spec/src/madt.rs @@ -43,6 +43,7 @@ open_enum! { GICC = 0xb, GICD = 0xc, GIC_MSI_FRAME = 0xd, + GIC_ITS = 0xf, } } @@ -269,6 +270,33 @@ impl MadtGicMsiFrame { } } +/// ACPI 6.5 MADT GIC ITS structure (Table 5-68). +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned)] +pub struct MadtGicIts { + pub typ: MadtType, + pub length: u8, + pub reserved: u16, + pub gic_its_id: u32, + pub base_address: u64, + pub reserved2: u32, +} + +const_assert_eq!(size_of::(), 20); + +impl MadtGicIts { + pub fn new(gic_its_id: u32, base_address: u64) -> Self { + Self { + typ: MadtType::GIC_ITS, + length: size_of::() as u8, + reserved: 0, + gic_its_id, + base_address, + reserved2: 0, + } + } +} + // TODO: use LE types everywhere, as here, to avoid #[repr(packed)] and to be // specific about endianness (which the ACPI spec dictates is always LE). #[repr(C)] diff --git a/vm/devices/pci/pci_core/src/bus_range.rs b/vm/devices/pci/pci_core/src/bus_range.rs new file mode 100644 index 0000000000..bb11026c9a --- /dev/null +++ b/vm/devices/pci/pci_core/src/bus_range.rs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shared PCIe bus range tracking. +//! +//! An [`AssignedBusRange`] holds the segment-local bus range +//! `(secondary_bus, subordinate_bus)` assigned to the PCIe port that owns a +//! device. It is updated automatically by +//! [`ConfigSpaceType1Emulator`](crate::cfg_space_emu::ConfigSpaceType1Emulator) +//! when the guest writes bus number registers, and on restore/reset. +//! +//! Consumers (ITS wrappers, SMMU) compose a full device identity from the +//! bus range plus the device's BDF. The segment number is not included +//! here — it is a static property of the root complex and is held +//! separately by the consumer. + +use std::sync::Arc; +use std::sync::atomic::AtomicU16; +use std::sync::atomic::Ordering; + +/// Segment-local bus range assigned to a PCIe downstream port. +/// +/// Stores a packed `(secondary_bus, subordinate_bus)` as an atomic u16, +/// updated when the PCIe port's bus numbers change. The segment number +/// is not included here — it is a static property of the root complex +/// and is held separately by the consumer (e.g., ITS wrappers). +/// +/// Clone is cheap (just an `Arc` bump). +#[derive(Clone, Debug)] +pub struct AssignedBusRange(Arc); + +impl Default for AssignedBusRange { + fn default() -> Self { + Self::new() + } +} + +impl AssignedBusRange { + /// Creates a new bus range initialized to zero. + pub fn new() -> Self { + Self(Arc::new(AtomicU16::new(0))) + } + + /// Updates the bus range for the downstream port. + pub fn set_bus_range(&self, secondary: u8, subordinate: u8) { + self.0.store( + (secondary as u16) << 8 | subordinate as u16, + Ordering::Relaxed, + ); + } + + /// Returns the current `(secondary_bus, subordinate_bus)`. + pub fn bus_range(&self) -> (u8, u8) { + let v = self.0.load(Ordering::Relaxed); + ((v >> 8) as u8, v as u8) + } + + /// Composes an ITS device ID from the current bus range, segment, and + /// an optional per-device BDF override. + /// + /// Returns `None` if the secondary bus has not been assigned yet (still 0). + /// When `devid` is `None`, defaults to `(secondary_bus, dev 0, fn 0)`. + /// Logs a rate-limited warning and returns `None` if the BDF's bus + /// number falls outside the port's assigned range. + pub fn compose_its_devid(&self, segment: u16, devid: Option) -> Option { + let (secondary, subordinate) = self.bus_range(); + if secondary == 0 { + return None; + } + let bdf = devid.unwrap_or((secondary as u32) << 8); + let bus = (bdf >> 8) as u8; + if bus < secondary || bus > subordinate { + tracelimit::warn_ratelimited!(bus, secondary, subordinate, "BDF out of port bus range"); + return None; + } + Some((segment as u32) << 16 | (bdf & 0xFFFF)) + } +} diff --git a/vm/devices/pci/pci_core/src/capabilities/msix.rs b/vm/devices/pci/pci_core/src/capabilities/msix.rs index a5c8ac3440..5618cb96a9 100644 --- a/vm/devices/pci/pci_core/src/capabilities/msix.rs +++ b/vm/devices/pci/pci_core/src/capabilities/msix.rs @@ -174,7 +174,7 @@ impl MsiInterrupt { } if state.pending { - state.target.signal_msi(0, address, data); + state.target.signal_msi(address, data); state.pending = false; } } @@ -210,7 +210,7 @@ impl InterruptTarget for MsiInterruptTarget { fn deliver(&self) { let mut state = self.0.lock(); if state.enabled { - state.target.signal_msi(0, state.address, state.data); + state.target.signal_msi(state.address, state.data); } else { state.pending = true; } @@ -675,7 +675,7 @@ mod tests { &self.event } - fn enable(&self, address: u64, data: u32) { + fn enable(&self, address: u64, data: u32, _devid: Option) { self.calls.lock().push(RouteCall::SetMsi { address, data }); } diff --git a/vm/devices/pci/pci_core/src/cfg_space_emu.rs b/vm/devices/pci/pci_core/src/cfg_space_emu.rs index 6a3f36f5d8..dc3b56aba5 100644 --- a/vm/devices/pci/pci_core/src/cfg_space_emu.rs +++ b/vm/devices/pci/pci_core/src/cfg_space_emu.rs @@ -1097,6 +1097,9 @@ pub struct ConfigSpaceType1Emulator { common: ConfigSpaceCommonHeaderEmulatorType1, /// Type 1 specific state state: ConfigSpaceType1EmulatorState, + /// Shared bus range, synced automatically on writes, reset, and restore. + #[inspect(skip)] + bus_range: crate::bus_range::AssignedBusRange, } impl ConfigSpaceType1Emulator { @@ -1108,6 +1111,7 @@ impl ConfigSpaceType1Emulator { Self { common, state: ConfigSpaceType1EmulatorState::new(), + bus_range: crate::bus_range::AssignedBusRange::new(), } } @@ -1115,6 +1119,7 @@ impl ConfigSpaceType1Emulator { pub fn reset(&mut self) { self.common.reset(); self.state = ConfigSpaceType1EmulatorState::new(); + self.sync_bus_range(); } /// Set the multi-function bit for this device. @@ -1134,6 +1139,23 @@ impl ConfigSpaceType1Emulator { } } + /// Returns a clone of the shared bus range. + /// + /// The returned handle shares the same underlying atomic — bus number + /// changes from writes, resets, and restores are reflected automatically. + pub fn bus_range(&self) -> crate::bus_range::AssignedBusRange { + self.bus_range.clone() + } + + /// Pushes the current secondary/subordinate bus numbers into the shared + /// atomic so that consumers (ITS wrappers, SMMU) see the latest values. + fn sync_bus_range(&self) { + self.bus_range.set_bus_range( + self.state.secondary_bus_number, + self.state.subordinate_bus_number, + ); + } + fn decode_memory_range(&self, base_register: u16, limit_register: u16) -> (u32, u32) { let base_addr = u32::from(base_register) << 16; let limit_addr = (u32::from(limit_register) << 16) | 0xF_FFFF; @@ -1247,6 +1269,7 @@ impl ConfigSpaceType1Emulator { self.state.subordinate_bus_number = (val >> 16) as u8; self.state.secondary_bus_number = (val >> 8) as u8; self.state.primary_bus_number = val as u8; + self.sync_bus_range(); } HeaderType01::MEMORY_RANGE => { let (base, limit) = to_low_high(val); @@ -1569,6 +1592,8 @@ mod save_restore { bridge_control, }; + self.sync_bus_range(); + // Pad base_addresses to 6 elements for common header (Type 1 uses 2 BARs) let mut full_base_addresses = [0u32; 6]; for (i, &addr) in base_addresses.iter().enumerate().take(2) { diff --git a/vm/devices/pci/pci_core/src/lib.rs b/vm/devices/pci/pci_core/src/lib.rs index 4e056dc99a..504231a7b6 100644 --- a/vm/devices/pci/pci_core/src/lib.rs +++ b/vm/devices/pci/pci_core/src/lib.rs @@ -11,6 +11,7 @@ pub mod test_helpers; pub mod bar_mapping; +pub mod bus_range; pub mod capabilities; pub mod cfg_space_emu; pub mod chipset_device_ext; diff --git a/vm/devices/pci/pci_core/src/msi.rs b/vm/devices/pci/pci_core/src/msi.rs index 92ccfdb83a..df73b35d1b 100644 --- a/vm/devices/pci/pci_core/src/msi.rs +++ b/vm/devices/pci/pci_core/src/msi.rs @@ -13,8 +13,11 @@ use vmcore::irqfd::IrqFdRoute; pub trait SignalMsi: Send + Sync { /// Signals a message-signaled interrupt at the specified address with the specified data. /// - /// `rid` is the requester ID of the PCI device sending the interrupt. - fn signal_msi(&self, rid: u32, address: u64, data: u32); + /// `devid` is an optional device identity. Its meaning is layer-dependent: + /// at the device layer it is a BDF for multi-function devices (`None` for + /// single-function); at the ITS wrapper layer it is the fully composed ITS + /// device ID; backends that don't need it ignore it. + fn signal_msi(&self, devid: Option, address: u64, data: u32); } /// A kernel-mediated MSI interrupt route for a single vector. @@ -43,7 +46,16 @@ impl MsiRoute { /// `address` and `data` are the MSI address and data values that /// the hypervisor will use when injecting the interrupt. pub fn enable(&self, address: u64, data: u32) { - self.0.enable(address, data) + self.0.enable(address, data, None) + } + + /// Configures the MSI address and data for this route. + /// + /// `rid` is the PCIe requester ID (RID) of the device that will signal the + /// interrupt. `address` and `data` are the MSI address and data values that + /// the hypervisor will use when injecting the interrupt. + pub fn enable_with_rid(&self, address: u64, data: u32, rid: u16) { + self.0.enable(address, data, Some(rid.into())) } /// Disables the MSI route. Interrupts that arrive while disabled @@ -64,7 +76,7 @@ impl MsiRoute { struct DisconnectedMsiTarget; impl SignalMsi for DisconnectedMsiTarget { - fn signal_msi(&self, _rid: u32, _address: u64, _data: u32) { + fn signal_msi(&self, _devid: Option, _address: u64, _data: u32) { tracelimit::warn_ratelimited!("dropped MSI interrupt to disconnected target"); } } @@ -143,12 +155,16 @@ impl MsiConnection { } impl MsiTarget { - /// Signals an MSI interrupt to this target from the specified RID. - /// - /// A single-RID device should use `0` as the RID. - pub fn signal_msi(&self, rid: u32, address: u64, data: u32) { + /// Signals an MSI interrupt to this target. + pub fn signal_msi(&self, address: u64, data: u32) { + let inner = self.inner.read(); + inner.signal_msi.signal_msi(None, address, data); + } + + /// Signals an MSI interrupt to this target from a specific RID. + pub fn signal_msi_with_rid(&self, rid: u16, address: u64, data: u32) { let inner = self.inner.read(); - inner.signal_msi.signal_msi(rid, address, data); + inner.signal_msi.signal_msi(Some(rid.into()), address, data); } /// Creates a new kernel-mediated MSI route for direct interrupt diff --git a/vm/devices/pci/pci_core/src/test_helpers/mod.rs b/vm/devices/pci/pci_core/src/test_helpers/mod.rs index efb045b703..2888bd0e3a 100644 --- a/vm/devices/pci/pci_core/src/test_helpers/mod.rs +++ b/vm/devices/pci/pci_core/src/test_helpers/mod.rs @@ -43,8 +43,7 @@ impl TestPciInterruptController { } impl SignalMsi for TestPciInterruptControllerInner { - fn signal_msi(&self, rid: u32, address: u64, data: u32) { - assert_eq!(rid, 0); + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.msi_requests.lock().push_back((address, data)); } } diff --git a/vm/devices/pci/pcie/Cargo.toml b/vm/devices/pci/pcie/Cargo.toml index b99da29991..d4e1ac481f 100644 --- a/vm/devices/pci/pcie/Cargo.toml +++ b/vm/devices/pci/pcie/Cargo.toml @@ -13,6 +13,7 @@ inspect.workspace = true memory_range.workspace = true mesh.workspace = true pal_async.workspace = true +pal_event.workspace = true pci_bus.workspace = true pci_core.workspace = true tracing.workspace = true diff --git a/vm/devices/pci/pcie/src/bus_range.rs b/vm/devices/pci/pcie/src/bus_range.rs new file mode 100644 index 0000000000..7fc80b1431 --- /dev/null +++ b/vm/devices/pci/pcie/src/bus_range.rs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Re-export of [`pci_core::bus_range`]. + +pub use pci_core::bus_range::AssignedBusRange; diff --git a/vm/devices/pci/pcie/src/its.rs b/vm/devices/pci/pcie/src/its.rs new file mode 100644 index 0000000000..462154e0a5 --- /dev/null +++ b/vm/devices/pci/pcie/src/its.rs @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! GICv3 ITS interrupt wrappers for PCIe devices. +//! +//! The ITS routes MSIs using a 32-bit device ID. For PCIe, this is `(segment << +//! 16) | bdf`, where `bdf = (bus << 8) | (dev << 3) | fn`. +//! +//! [`ItsSignalMsi`] and [`ItsIrqFd`] wrap a partition's generic MSI and irqfd +//! implementations to inject the ITS device ID. The bus range comes from a +//! shared [`AssignedBusRange`] (updated by the PCIe port when the guest assigns +//! bus numbers); the segment is fixed at construction time. +//! +//! For single-function devices (`devid == None`), the wrapper defaults to +//! device 0, function 0 on the port's secondary bus. Multi-function devices +//! pass `Some(bdf)` where `bdf = (bus << 8) | (dev << 3) | fn`. + +use crate::bus_range::AssignedBusRange; +use pal_event::Event; +use pci_core::msi::SignalMsi; +use std::sync::Arc; +use vmcore::irqfd::IrqFd; +use vmcore::irqfd::IrqFdRoute; + +/// A [`SignalMsi`] wrapper that composes the ITS device ID before +/// forwarding to the inner implementation. +pub struct ItsSignalMsi { + inner: Arc, + bus_range: AssignedBusRange, + segment: u16, +} + +impl ItsSignalMsi { + /// Creates a new wrapper. + /// + /// `segment` is the PCI segment number of the root complex that + /// owns this device. + pub fn new(inner: Arc, bus_range: AssignedBusRange, segment: u16) -> Self { + Self { + inner, + bus_range, + segment, + } + } +} + +impl SignalMsi for ItsSignalMsi { + fn signal_msi(&self, devid: Option, address: u64, data: u32) { + let Some(its_devid) = self.bus_range.compose_its_devid(self.segment, devid) else { + return; + }; + self.inner.signal_msi(Some(its_devid), address, data); + } +} + +/// An [`IrqFd`] wrapper that produces ITS irqfd routes, each +/// of which injects the ITS device ID into the `devid` parameter on +/// `enable`. +pub struct ItsIrqFd { + inner: Arc, + bus_range: AssignedBusRange, + segment: u16, +} + +impl ItsIrqFd { + /// Creates a new wrapper. + /// + /// `segment` is the PCI segment number of the root complex that + /// owns this device. + pub fn new(inner: Arc, bus_range: AssignedBusRange, segment: u16) -> Self { + Self { + inner, + bus_range, + segment, + } + } +} + +impl IrqFd for ItsIrqFd { + fn new_irqfd_route(&self) -> anyhow::Result> { + let inner_route = self.inner.new_irqfd_route()?; + Ok(Box::new(ItsIrqFdRoute { + inner: inner_route, + bus_range: self.bus_range.clone(), + segment: self.segment, + })) + } +} + +/// An [`IrqFdRoute`] wrapper that composes the ITS device ID on +/// `enable`. +struct ItsIrqFdRoute { + inner: Box, + bus_range: AssignedBusRange, + segment: u16, +} + +impl IrqFdRoute for ItsIrqFdRoute { + fn event(&self) -> &Event { + self.inner.event() + } + + fn enable(&self, address: u64, data: u32, devid: Option) { + let Some(its_devid) = self.bus_range.compose_its_devid(self.segment, devid) else { + return; + }; + self.inner.enable(address, data, Some(its_devid)); + } + + fn disable(&self) { + self.inner.disable(); + } +} diff --git a/vm/devices/pci/pcie/src/lib.rs b/vm/devices/pci/pcie/src/lib.rs index a5f6a7dd09..29444418e3 100644 --- a/vm/devices/pci/pcie/src/lib.rs +++ b/vm/devices/pci/pcie/src/lib.rs @@ -5,6 +5,8 @@ #![forbid(unsafe_code)] +pub mod bus_range; +pub mod its; pub(crate) mod port; pub mod root; pub mod switch; diff --git a/vm/devices/pci/pcie/src/port.rs b/vm/devices/pci/pcie/src/port.rs index e7efc11aaf..c41070b480 100644 --- a/vm/devices/pci/pcie/src/port.rs +++ b/vm/devices/pci/pcie/src/port.rs @@ -7,6 +7,7 @@ use anyhow::bail; use chipset_device::io::IoResult; use inspect::Inspect; use pci_bus::GenericPciBusDevice; +use pci_core::bus_range::AssignedBusRange; use pci_core::capabilities::msi_cap::MsiCapability; use pci_core::capabilities::pci_express::PciExpressCapability; use pci_core::cfg_space_emu::ConfigSpaceType1Emulator; @@ -79,6 +80,14 @@ impl PcieDownstreamPort { } } + /// Returns a clone of the config space emulator's shared bus range. + /// + /// The returned handle shares the same underlying atomic as the + /// emulator — writes, resets, and restores are reflected automatically. + pub fn bus_range(&self) -> AssignedBusRange { + self.cfg_space.bus_range() + } + /// Notify the guest of a hotplug event via MSI. /// /// Fires MSI if the guest has enabled hot_plug_interrupt_enable in diff --git a/vm/devices/pci/pcie/src/root.rs b/vm/devices/pci/pcie/src/root.rs index 5163deb6fd..a51f2ec4e3 100644 --- a/vm/devices/pci/pcie/src/root.rs +++ b/vm/devices/pci/pcie/src/root.rs @@ -12,6 +12,7 @@ use crate::PAGE_SHIFT; use crate::PAGE_SIZE64; use crate::ROOT_PORT_DEVICE_ID; use crate::VENDOR_ID; +use crate::bus_range::AssignedBusRange; use crate::port::PcieDownstreamPort; use chipset_device::ChipsetDevice; use chipset_device::io::IoError; @@ -48,6 +49,17 @@ pub struct GenericPcieRootComplex { ports: HashMap, RootPort)>, } +/// Information about a downstream port in a PCIe topology. +pub struct DownstreamPortInfo { + /// The port number (device/function index). + pub port_number: u8, + /// The port name. + pub name: Arc, + /// Shared bus range, updated by the config space emulator when the + /// guest programs secondary/subordinate bus numbers. + pub bus_range: AssignedBusRange, +} + /// A description of a generic PCIe root port. pub struct GenericPcieRootPortDefinition { /// The name of the root port. @@ -169,14 +181,15 @@ impl GenericPcieRootComplex { } /// Enumerate the downstream ports of the root complex. - pub fn downstream_ports(&self) -> Vec<(u8, Arc)> { - let ports: Vec<(u8, Arc)> = self - .ports + pub fn downstream_ports(&self) -> Vec { + self.ports .iter() - .map(|(port, (name, _))| (*port, name.clone())) - .collect(); - - ports + .map(|(port, (name, rp))| DownstreamPortInfo { + port_number: *port, + name: name.clone(), + bus_range: rp.port.bus_range(), + }) + .collect() } /// Hot-add a device to a named port. @@ -1084,4 +1097,53 @@ mod tests { let result = rc2.restore(saved_state); assert!(result.is_err()); } + + #[test] + fn test_bus_range_updated_on_cfg_write() { + const SECONDARY_BUS_NUM_REG: u64 = 0x19; + const SUBORDINATE_BUS_NUM_REG: u64 = 0x1A; + + let mut rc = instantiate_root_complex(0, 255, 1); + + let endpoint = TestPcieEndpoint::new( + |_, _| Some(IoResult::Err(IoError::InvalidRegister)), + |_, _| Some(IoResult::Err(IoError::InvalidRegister)), + ); + + // Get the bus_range from the port before attaching a device. + let bus_range = rc.downstream_ports().into_iter().next().unwrap().bus_range; + assert_eq!(bus_range.bus_range(), (0, 0)); + + rc.add_pcie_device(0, "ep", Box::new(endpoint)).unwrap(); + + // Program secondary=5, subordinate=10 via ECAM MMIO writes. + rc.mmio_write(SECONDARY_BUS_NUM_REG, &[5]).unwrap(); + rc.mmio_write(SUBORDINATE_BUS_NUM_REG, &[10]).unwrap(); + + // The shared AssignedBusRange should reflect the new values. + assert_eq!(bus_range.bus_range(), (5, 10)); + + // compose_its_devid should produce (segment << 16 | secondary << 8) + // for a single-function device (devid=None). + let segment = 2u16; + let devid = bus_range.compose_its_devid(segment, None); + assert_eq!(devid, Some((2 << 16) | (5 << 8))); + + // With a specific BDF within range: bus=7, dev=1, fn=2 + let bdf: u32 = (7 << 8) | (1 << 3) | 2; + let devid = bus_range.compose_its_devid(segment, Some(bdf)); + assert_eq!(devid, Some((2 << 16) | bdf)); + + // BDF outside range should return None. + let out_of_range_bdf: u32 = 11 << 8; // bus=11, beyond subordinate=10 + assert_eq!( + bus_range.compose_its_devid(segment, Some(out_of_range_bdf)), + None + ); + + // Reprogram bus numbers and verify tracking follows. + rc.mmio_write(SECONDARY_BUS_NUM_REG, &[20]).unwrap(); + rc.mmio_write(SUBORDINATE_BUS_NUM_REG, &[30]).unwrap(); + assert_eq!(bus_range.bus_range(), (20, 30)); + } } diff --git a/vm/devices/pci/pcie/src/switch.rs b/vm/devices/pci/pcie/src/switch.rs index 930b56f8f5..e8290daaeb 100644 --- a/vm/devices/pci/pcie/src/switch.rs +++ b/vm/devices/pci/pcie/src/switch.rs @@ -208,10 +208,14 @@ impl GenericPcieSwitch { } /// Enumerate the downstream ports of the switch. - pub fn downstream_ports(&self) -> Vec<(u8, Arc)> { + pub fn downstream_ports(&self) -> Vec { self.downstream_ports .iter() - .map(|(port, (name, _))| (*port, name.clone())) + .map(|(port, (name, dsp))| crate::root::DownstreamPortInfo { + port_number: *port, + name: name.clone(), + bus_range: dsp.port.bus_range(), + }) .collect() } @@ -708,14 +712,14 @@ mod tests { // Verify downstream port names (HashMap doesn't guarantee order, so check each one exists) let ports = switch.downstream_ports(); let port_names: std::collections::HashSet<_> = - ports.iter().map(|(_, name)| name.as_ref()).collect(); + ports.iter().map(|p| p.name.as_ref()).collect(); assert!(port_names.contains("test-switch-downstream-0")); assert!(port_names.contains("test-switch-downstream-1")); assert!(port_names.contains("test-switch-downstream-2")); // Verify port numbers let port_numbers: std::collections::HashSet<_> = - ports.iter().map(|(num, _)| *num).collect(); + ports.iter().map(|p| p.port_number).collect(); assert!(port_numbers.contains(&0)); assert!(port_numbers.contains(&1)); assert!(port_numbers.contains(&2)); @@ -750,7 +754,7 @@ mod tests { .add_pcie_device( 0, // Port number instead of port name "downstream-dev", - Box::new(downstream_device) + Box::new(downstream_device), ) .is_ok() ); @@ -972,7 +976,8 @@ mod tests { let multi_port_switch = GenericPcieSwitch::new(multi_port_definition); // Verify each downstream port has the multi-function bit set - for (port_num, _) in multi_port_switch.downstream_ports() { + for p in multi_port_switch.downstream_ports() { + let port_num = p.port_number; if let Some((_, downstream_port)) = multi_port_switch.downstream_ports.get(&port_num) { let mut header_type_value: u32 = 0; downstream_port @@ -1010,7 +1015,8 @@ mod tests { let single_port_switch = GenericPcieSwitch::new(single_port_definition); // Verify the single downstream port does NOT have the multi-function bit set - for (port_num, _) in single_port_switch.downstream_ports() { + for p in single_port_switch.downstream_ports() { + let port_num = p.port_number; if let Some((_, downstream_port)) = single_port_switch.downstream_ports.get(&port_num) { let mut header_type_value: u32 = 0; downstream_port diff --git a/vm/devices/pci/vpci/src/test_helpers/mod.rs b/vm/devices/pci/vpci/src/test_helpers/mod.rs index 69b4a85482..f0391da2d9 100644 --- a/vm/devices/pci/vpci/src/test_helpers/mod.rs +++ b/vm/devices/pci/vpci/src/test_helpers/mod.rs @@ -76,8 +76,7 @@ impl TestVpciInterruptControllerInner { } impl SignalMsi for TestVpciInterruptControllerInner { - fn signal_msi(&self, rid: u32, address: u64, data: u32) { - assert_eq!(rid, 0); + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.deliver_interrupt(address, data); } } diff --git a/vm/devices/storage/nvme/src/tests/test_helpers.rs b/vm/devices/storage/nvme/src/tests/test_helpers.rs index 6fd2edaa3f..8f95f57a60 100644 --- a/vm/devices/storage/nvme/src/tests/test_helpers.rs +++ b/vm/devices/storage/nvme/src/tests/test_helpers.rs @@ -46,8 +46,7 @@ impl TestPciInterruptController { } impl SignalMsi for TestPciInterruptController { - fn signal_msi(&self, rid: u32, address: u64, data: u32) { - assert_eq!(rid, 0); + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.inner.msi_requests.lock().push_back((address, data)); } } diff --git a/vm/devices/storage/nvme_test/src/tests/test_helpers.rs b/vm/devices/storage/nvme_test/src/tests/test_helpers.rs index 6fd2edaa3f..8f95f57a60 100644 --- a/vm/devices/storage/nvme_test/src/tests/test_helpers.rs +++ b/vm/devices/storage/nvme_test/src/tests/test_helpers.rs @@ -46,8 +46,7 @@ impl TestPciInterruptController { } impl SignalMsi for TestPciInterruptController { - fn signal_msi(&self, rid: u32, address: u64, data: u32) { - assert_eq!(rid, 0); + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.inner.msi_requests.lock().push_back((address, data)); } } diff --git a/vm/devices/user_driver_emulated_mock/src/lib.rs b/vm/devices/user_driver_emulated_mock/src/lib.rs index f04476b41f..b6f95d8c22 100644 --- a/vm/devices/user_driver_emulated_mock/src/lib.rs +++ b/vm/devices/user_driver_emulated_mock/src/lib.rs @@ -59,11 +59,8 @@ impl MsiController { } impl SignalMsi for MsiController { - fn signal_msi(&self, rid: u32, address: u64, _data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, _data: u32) { let index = address as usize; - if rid != 0 { - return; - } if let Some(event) = self.events.get(index) { tracing::debug!(index, "signaling interrupt"); event.signal_uncached(); diff --git a/vm/kvm/src/lib.rs b/vm/kvm/src/lib.rs index 61c2e7caf4..00d5c953ee 100644 --- a/vm/kvm/src/lib.rs +++ b/vm/kvm/src/lib.rs @@ -520,24 +520,37 @@ impl Partition { entries: [Default::default(); MAX_ROUTES], }; for (i, route) in routes.iter().enumerate() { - let (type_, u) = match route.1 { + let (type_, flags, u) = match route.1 { RoutingEntry::Msi { address_lo, address_hi, data, - } => ( - KVM_IRQ_ROUTING_MSI, - kvm_irq_routing_entry__bindgen_ty_1 { - msi: kvm_irq_routing_msi { - address_lo, - address_hi, - data, - __bindgen_anon_1: Default::default(), + devid, + } => { + let (flags, anon) = if let Some(devid) = devid { + ( + KVM_MSI_VALID_DEVID, + kvm_irq_routing_msi__bindgen_ty_1 { devid }, + ) + } else { + (0, Default::default()) + }; + ( + KVM_IRQ_ROUTING_MSI, + flags, + kvm_irq_routing_entry__bindgen_ty_1 { + msi: kvm_irq_routing_msi { + address_lo, + address_hi, + data, + __bindgen_anon_1: anon, + }, }, - }, - ), + ) + } RoutingEntry::HvSint { vp, sint } => ( KVM_IRQ_ROUTING_HV_SINT, + 0, kvm_irq_routing_entry__bindgen_ty_1 { hv_sint: kvm_irq_routing_hv_sint { vcpu: vp, @@ -547,6 +560,7 @@ impl Partition { ), RoutingEntry::Irqchip { pin } => ( KVM_IRQ_ROUTING_IRQCHIP, + 0, kvm_irq_routing_entry__bindgen_ty_1 { irqchip: kvm_irq_routing_irqchip { pin, irqchip: 0 }, }, @@ -555,7 +569,7 @@ impl Partition { kvm_routes.entries[i] = kvm_irq_routing_entry { gsi: route.0, type_, - flags: 0, + flags, pad: 0, u, }; @@ -719,6 +733,7 @@ pub enum RoutingEntry { address_lo: u32, address_hi: u32, data: u32, + devid: Option, }, HvSint { vp: u32, diff --git a/vm/vmcore/src/irqfd.rs b/vm/vmcore/src/irqfd.rs index 97d3e6219e..4ecaf95f7e 100644 --- a/vm/vmcore/src/irqfd.rs +++ b/vm/vmcore/src/irqfd.rs @@ -51,7 +51,9 @@ pub trait IrqFdRoute: Send + Sync { /// /// `address` and `data` are the MSI address and data values that the /// hypervisor will use when injecting the interrupt into the guest. - fn enable(&self, address: u64, data: u32); + /// `devid` is an optional device identity used by backends that need a + /// device ID for MSI routing (e.g., GICv3 ITS). + fn enable(&self, address: u64, data: u32, devid: Option); /// Disables the MSI routing for this irqfd's GSI. /// diff --git a/vm/vmcore/vm_topology/src/processor/aarch64.rs b/vm/vmcore/vm_topology/src/processor/aarch64.rs index 2660108eee..6af59067ec 100644 --- a/vm/vmcore/vm_topology/src/processor/aarch64.rs +++ b/vm/vmcore/vm_topology/src/processor/aarch64.rs @@ -71,8 +71,8 @@ pub struct Aarch64PlatformConfig { pub gic_distributor_base: u64, /// GIC version and version-specific addresses. pub gic_version: GicVersion, - /// GIC v2m MSI frame, if MSIs via v2m are supported. - pub gic_v2m: Option, + /// MSI controller for PCIe interrupt delivery. + pub gic_msi: GicMsiController, /// Performance Monitor Unit GSIV (GIC INTID). `None` if not available. pub pmu_gsiv: Option, /// Virtual timer PPI (GIC INTID, e.g. 20 for PPI 4). @@ -97,6 +97,28 @@ pub struct GicV2mInfo { pub spi_count: u32, } +/// GICv3 ITS (Interrupt Translation Service) parameters. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(feature = "inspect", derive(inspect::Inspect))] +pub struct GicItsInfo { + /// Physical base address of the ITS MMIO region (must be 64 KiB aligned). + #[cfg_attr(feature = "inspect", inspect(hex))] + pub its_base: u64, +} + +/// MSI controller configuration for PCIe interrupt delivery. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(feature = "inspect", derive(inspect::Inspect))] +#[cfg_attr(feature = "inspect", inspect(external_tag))] +pub enum GicMsiController { + /// No MSI controller configured. + None, + /// GICv2m — maps MSI writes to a fixed pool of SPIs. + V2m(GicV2mInfo), + /// GICv3 ITS — routes MSIs via LPIs using (DeviceID, EventID) lookup. + Its(GicItsInfo), +} + /// ARM64 specific VP info. #[cfg_attr(feature = "inspect", derive(inspect::Inspect))] #[derive(Debug, Copy, Clone)] @@ -240,9 +262,9 @@ impl ProcessorTopology { self.arch.platform.pmu_gsiv } - /// Returns the GIC v2m MSI frame info, if present. - pub fn gic_v2m(&self) -> Option { - self.arch.platform.gic_v2m + /// Returns the MSI controller configuration. + pub fn gic_msi(&self) -> GicMsiController { + self.arch.platform.gic_msi } /// Returns the virtual timer PPI (GIC INTID). diff --git a/vmm_core/src/acpi_builder.rs b/vmm_core/src/acpi_builder.rs index a3cb682cf7..791edcb857 100644 --- a/vmm_core/src/acpi_builder.rs +++ b/vmm_core/src/acpi_builder.rs @@ -89,6 +89,15 @@ pub const OEM_INFO: acpi::builder::OemInfo = acpi::builder::OemInfo { pub trait AcpiTopology: ArchTopology + Inspect + Sized { fn extend_srat(topology: &ProcessorTopology, srat: &mut Vec); fn extend_madt(topology: &ProcessorTopology, madt: &mut Vec); + fn needs_iort(_topology: &ProcessorTopology) -> bool { + false + } + /// If the platform has an ITS, return its identifier for the IORT ITS + /// Group node. Returns `None` when no ITS is present (root complex + /// nodes will have no ID mappings). + fn iort_its_id(_topology: &ProcessorTopology) -> Option { + None + } } /// The maximum ID that can be used for a legacy APIC ID in an ACPI table. @@ -188,7 +197,7 @@ impl AcpiTopology for Aarch64Topology { } // GIC v2m MSI frame for PCIe MSI support. - if let Some(v2m) = topology.gic_v2m() { + if let vm_topology::processor::aarch64::GicMsiController::V2m(v2m) = topology.gic_msi() { madt.extend_from_slice( acpi_spec::madt::MadtGicMsiFrame::new( 0, @@ -199,6 +208,22 @@ impl AcpiTopology for Aarch64Topology { .as_bytes(), ); } + + // GICv3 ITS for PCIe MSI routing via LPIs. + if let vm_topology::processor::aarch64::GicMsiController::Its(its) = topology.gic_msi() { + madt.extend_from_slice(acpi_spec::madt::MadtGicIts::new(0, its.its_base).as_bytes()); + } + } + + fn needs_iort(_topology: &ProcessorTopology) -> bool { + true + } + + fn iort_its_id(topology: &ProcessorTopology) -> Option { + match topology.gic_msi() { + vm_topology::processor::aarch64::GicMsiController::Its(_) => Some(0), + _ => None, + } } } @@ -324,6 +349,62 @@ impl AcpiTablesBuilder<'_, T> { )) } + fn with_iort(&self, f: F) -> R + where + F: FnOnce(&acpi::builder::Table<'_>) -> R, + { + use acpi_spec::iort; + + let its_id = T::iort_its_id(self.processor_topology); + let has_its = its_id.is_some(); + let its_node_count: u32 = if has_its { 1 } else { 0 }; + let node_count = its_node_count + self.pcie_host_bridges.len() as u32; + let mapping_count: u32 = if has_its { 1 } else { 0 }; + + let mut iort_extra: Vec = Vec::new(); + + // ITS Group node comes first so root complexes can reference it. + // The ITS Group node offset (from table start) is IORT_NODE_OFFSET. + let its_group_offset = iort::IORT_NODE_OFFSET; + if let Some(id) = its_id { + iort_extra.extend_from_slice(iort::IortItsGroup::new(0, 1).as_bytes()); + // Followed by the ITS identifier (u32). + iort_extra.extend_from_slice(&id.to_ne_bytes()); + } + + for bridge in self.pcie_host_bridges { + let rc = iort::IortPciRootComplex::new(bridge.index, bridge.segment, mapping_count); + iort_extra.extend_from_slice(rc.as_bytes()); + + if has_its { + // Single ID mapping: full RID range → ITS Group node. + // output_base uses (segment << 16) so device IDs in the + // ITS namespace are unique across PCI segments. + iort_extra.extend_from_slice( + iort::IortIdMapping::new( + 0, // input_base + 0xFFFF, // id_count (full 16-bit BDF range, minus 1 per IORT spec) + (bridge.segment as u32) << 16, // output_base + its_group_offset, // output_reference + 0, // flags + ) + .as_bytes(), + ); + } + } + + (f)(&acpi::builder::Table::new_dyn( + iort::IORT_REVISION, + None, + &iort::Iort::new(node_count), + &[iort_extra.as_slice()], + )) + } + + fn should_build_iort(&self) -> bool { + T::needs_iort(self.processor_topology) && !self.pcie_host_bridges.is_empty() + } + fn with_pptt(&self, f: F) -> R where F: FnOnce(&acpi::builder::Table<'_>) -> R, @@ -658,6 +739,10 @@ impl AcpiTablesBuilder<'_, T> { if !self.pcie_host_bridges.is_empty() { self.with_mcfg(|t| b.append(t)); + if self.should_build_iort() { + self.with_iort(|t| b.append(t)); + } + let mut ssdt = Ssdt::new(); for bridge in self.pcie_host_bridges { ssdt.add_pcie( @@ -704,6 +789,13 @@ impl AcpiTablesBuilder<'_, T> { self.with_mcfg(|t| t.to_vec(&OEM_INFO)) } + /// Helper method to construct an IORT without constructing the rest of the + /// ACPI tables. Returns `None` if IORT is not needed for this configuration. + pub fn build_iort(&self) -> Option> { + self.should_build_iort() + .then(|| self.with_iort(|t| t.to_vec(&OEM_INFO))) + } + /// Helper method to construct a PPTT without constructing the rest of the /// ACPI tables. /// @@ -873,4 +965,175 @@ mod test { }) .unwrap(); } + + fn new_aarch64_its_topology() -> ProcessorTopology { + use vm_topology::processor::aarch64::Aarch64PlatformConfig; + use vm_topology::processor::aarch64::GicItsInfo; + use vm_topology::processor::aarch64::GicMsiController; + use vm_topology::processor::aarch64::GicVersion; + + TopologyBuilder::new_aarch64(Aarch64PlatformConfig { + gic_distributor_base: 0xffff0000, + gic_version: GicVersion::V3 { + redistributors_base: 0xefff0000, + }, + gic_msi: GicMsiController::Its(GicItsInfo { + its_base: 0xeffc0000, + }), + pmu_gsiv: None, + virt_timer_ppi: 20, + gic_nr_irqs: 992, + }) + .build(2) + .unwrap() + } + + fn new_aarch64_builder<'a>( + mem_layout: &'a MemoryLayout, + processor_topology: &'a ProcessorTopology, + pcie_host_bridges: &'a Vec, + ) -> AcpiTablesBuilder<'a, Aarch64Topology> { + AcpiTablesBuilder { + processor_topology, + mem_layout, + cache_topology: None, + pcie_host_bridges, + arch: AcpiArchConfig::Aarch64 { + hypervisor_vendor_identity: 0, + virt_timer_ppi: 20, + }, + } + } + + fn u32_at(data: &[u8], offset: usize) -> u32 { + u32::from_ne_bytes(data[offset..offset + 4].try_into().unwrap()) + } + + fn checksum(data: &[u8]) -> u8 { + data.iter().fold(0, |sum, byte| sum.wrapping_add(*byte)) + } + + fn contains_signature(data: &[u8], signature: &[u8; 4]) -> bool { + data.windows(signature.len()) + .any(|window| window == signature) + } + + #[test] + fn test_aarch64_iort_with_its() { + use acpi_spec::iort; + + let mem = new_mem(); + let topology = new_aarch64_its_topology(); + let pcie_host_bridges = vec![ + PcieHostBridge { + index: 0, + segment: 0, + start_bus: 0, + end_bus: 255, + ecam_range: MemoryRange::new(0..256 * 256 * 4096), + low_mmio: MemoryRange::new(0xdc000000..0xe0000000), + high_mmio: MemoryRange::new(0x1000000000..0x1040000000), + }, + PcieHostBridge { + index: 7, + segment: 3, + start_bus: 32, + end_bus: 63, + ecam_range: MemoryRange::new(5 * GB..5 * GB + 32 * 256 * 4096), + low_mmio: MemoryRange::new(0xe0000000..0xe4000000), + high_mmio: MemoryRange::new(0x1040000000..0x1080000000), + }, + ]; + let builder = new_aarch64_builder(&mem, &topology, &pcie_host_bridges); + + let data = builder.build_iort().unwrap(); + + // IORT header + assert_eq!(&data[0..4], b"IORT"); + assert_eq!(u32_at(&data, 4) as usize, data.len()); + assert_eq!(checksum(&data), 0); + + // 3 nodes: 1 ITS Group + 2 Root Complexes + assert_eq!(u32_at(&data, 36), 3); + assert_eq!(u32_at(&data, 40), iort::IORT_NODE_OFFSET); + + // First node: ITS Group at IORT_NODE_OFFSET + let its_node = iort::IORT_NODE_OFFSET as usize; + assert_eq!(data[its_node], iort::IORT_NODE_TYPE_ITS_GROUP); + // its_count = 1 + assert_eq!(u32_at(&data, its_node + 16), 1); + // ITS identifier = 0 + assert_eq!(u32_at(&data, its_node + 20), 0); + + // Second node: Root Complex 0 (after ITS Group: 20 + 4 = 24 bytes) + let rc0 = its_node + 24; + assert_eq!(data[rc0], iort::IORT_NODE_TYPE_PCI_ROOT_COMPLEX); + assert_eq!(u32_at(&data, rc0 + 4), 0); // identifier + assert_eq!(u32_at(&data, rc0 + 8), 1); // mapping_count + // pci_segment_number at offset 28 from node start + assert_eq!(u32_at(&data, rc0 + 28), 0); + // ID mapping follows the root complex node (36 bytes in) + let mapping0 = rc0 + 36; + assert_eq!(u32_at(&data, mapping0), 0); // input_base + assert_eq!(u32_at(&data, mapping0 + 4), 0xFFFF); // id_count + assert_eq!(u32_at(&data, mapping0 + 8), 0); // output_base (seg 0 << 16) + assert_eq!(u32_at(&data, mapping0 + 12), iort::IORT_NODE_OFFSET); // -> ITS group + + // Third node: Root Complex 7 + let rc1 = mapping0 + 20; + assert_eq!(data[rc1], iort::IORT_NODE_TYPE_PCI_ROOT_COMPLEX); + assert_eq!(u32_at(&data, rc1 + 4), 7); // identifier + assert_eq!(u32_at(&data, rc1 + 28), 3); // pci_segment_number + let mapping1 = rc1 + 36; + assert_eq!(u32_at(&data, mapping1 + 8), 3 << 16); // output_base (seg 3 << 16) + } + + #[test] + fn test_iort_not_built_for_x86() { + let mem = new_mem(); + let topology = TopologyBuilder::new_x86().build(1).unwrap(); + let pcie_host_bridges = vec![PcieHostBridge { + index: 0, + segment: 0, + start_bus: 0, + end_bus: 255, + ecam_range: MemoryRange::new(0..256 * 256 * 4096), + low_mmio: MemoryRange::new(0xdc000000..0xe0000000), + high_mmio: MemoryRange::new(0x1000000000..0x1040000000), + }]; + let builder = new_builder(&mem, &topology, &pcie_host_bridges); + assert!(builder.build_iort().is_none()); + + let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + assert!(!contains_signature(&tables.tables, b"IORT")); + } + + #[test] + fn test_iort_not_built_without_pcie() { + let mem = new_mem(); + let topology = new_aarch64_its_topology(); + let empty: Vec = Vec::new(); + let builder = new_aarch64_builder(&mem, &topology, &empty); + assert!(builder.build_iort().is_none()); + } + + #[test] + fn test_aarch64_acpi_tables_include_iort() { + let mem = new_mem(); + let topology = new_aarch64_its_topology(); + let pcie_host_bridges = vec![PcieHostBridge { + index: 0, + segment: 0, + start_bus: 0, + end_bus: 255, + ecam_range: MemoryRange::new(0..256 * 256 * 4096), + low_mmio: MemoryRange::new(0xdc000000..0xe0000000), + high_mmio: MemoryRange::new(0x1000000000..0x1040000000), + }]; + let builder = new_aarch64_builder(&mem, &topology, &pcie_host_bridges); + + let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + assert!(contains_signature(&tables.tables, b"MCFG")); + assert!(contains_signature(&tables.tables, b"IORT")); + } } diff --git a/vmm_core/virt/src/aarch64/gic_software_device.rs b/vmm_core/virt/src/aarch64/gic_software_device.rs index bebbcb49be..c252e02789 100644 --- a/vmm_core/virt/src/aarch64/gic_software_device.rs +++ b/vmm_core/virt/src/aarch64/gic_software_device.rs @@ -65,7 +65,7 @@ impl MapVpciInterrupt for GicSoftwareDevice { } impl SignalMsi for GicSoftwareDevice { - fn signal_msi(&self, _rid: u32, _address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, _address: u64, data: u32) { if SPI_RANGE.contains(&data) { self.irqcon.set_spi_irq(data, true); } diff --git a/vmm_core/virt/src/aarch64/gic_v2m.rs b/vmm_core/virt/src/aarch64/gic_v2m.rs index bd22bedb78..33a517806b 100644 --- a/vmm_core/virt/src/aarch64/gic_v2m.rs +++ b/vmm_core/virt/src/aarch64/gic_v2m.rs @@ -39,7 +39,7 @@ impl GicV2mSignalMsi { } impl SignalMsi for GicV2mSignalMsi { - fn signal_msi(&self, _rid: u32, address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { if address != self.setspi_addr { tracelimit::warn_ratelimited!( address, diff --git a/vmm_core/virt/src/generic.rs b/vmm_core/virt/src/generic.rs index 5bfcad421a..e31204174a 100644 --- a/vmm_core/virt/src/generic.rs +++ b/vmm_core/virt/src/generic.rs @@ -58,6 +58,10 @@ pub struct PlatformInfo { /// Whether the hypervisor supports GICv3. When `false`, only /// GICv2 is available (e.g., Raspberry Pi 5 with GIC-400). pub supports_gic_v3: bool, + /// Whether the hypervisor supports an in-kernel GICv3 ITS for + /// MSI delivery via LPIs. When `true`, the topology can include + /// a `GicItsInfo` and the backend will create/manage the ITS device. + pub supports_its: bool, } pub trait Hypervisor: 'static { @@ -680,7 +684,7 @@ impl MapVpciInterrupt for UnimplementedDevice { } impl SignalMsi for UnimplementedDevice { - fn signal_msi(&self, _rid: u32, _address: u64, _data: u32) { + fn signal_msi(&self, _devid: Option, _address: u64, _data: u32) { match *self {} } } diff --git a/vmm_core/virt/src/x86/apic_software_device.rs b/vmm_core/virt/src/x86/apic_software_device.rs index 4f13a67d53..5bf2d5369f 100644 --- a/vmm_core/virt/src/x86/apic_software_device.rs +++ b/vmm_core/virt/src/x86/apic_software_device.rs @@ -260,13 +260,13 @@ impl InterruptTable { } impl SignalMsi for ApicSoftwareDevice { - fn signal_msi(&self, _rid: u32, address: u64, _data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, _data: u32) { let mut table = self.table.lock(); let table = &mut *table; let index = InterruptTable::interrupt_index_from_address(address); if let Some(interrupt) = table.entries.get(index) { let target = interrupt.msi_params(); - self.target.signal_msi(0, target.address, target.data) + self.target.signal_msi(None, target.address, target.data) } } } diff --git a/vmm_core/virt_hvf/src/lib.rs b/vmm_core/virt_hvf/src/lib.rs index c49c09dd7f..ae32d12165 100644 --- a/vmm_core/virt_hvf/src/lib.rs +++ b/vmm_core/virt_hvf/src/lib.rs @@ -96,6 +96,7 @@ impl virt::Hypervisor for HvfHypervisor { virt::PlatformInfo { platform_gsiv: None, supports_gic_v3: true, + supports_its: false, } } diff --git a/vmm_core/virt_kvm/src/arch/aarch64/mod.rs b/vmm_core/virt_kvm/src/arch/aarch64/mod.rs index e6788b7038..890a99b13a 100644 --- a/vmm_core/virt_kvm/src/arch/aarch64/mod.rs +++ b/vmm_core/virt_kvm/src/arch/aarch64/mod.rs @@ -29,10 +29,12 @@ use kvm::KVM_DEV_ARM_VGIC_CTRL_INIT; use kvm::KVM_DEV_ARM_VGIC_GRP_ADDR; use kvm::KVM_DEV_ARM_VGIC_GRP_CTRL; use kvm::KVM_DEV_ARM_VGIC_GRP_NR_IRQS; +use kvm::KVM_VGIC_ITS_ADDR_TYPE; use kvm::KVM_VGIC_V2_ADDR_TYPE_CPU; use kvm::KVM_VGIC_V2_ADDR_TYPE_DIST; use kvm::KVM_VGIC_V3_ADDR_TYPE_DIST; use kvm::KVM_VGIC_V3_ADDR_TYPE_REDIST; +use kvm::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS; use kvm::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; use kvm::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3; use kvm::kvm_regs; @@ -53,6 +55,7 @@ use virt::vp::Registers; use virt::vp::SystemRegisters; use virt::x86::DebugState; use vm_topology::processor::aarch64::Aarch64VpInfo; +use vm_topology::processor::aarch64::GicMsiController; use vmcore::reference_time::ReferenceTimeSource; use vmcore::vmtime::VmTimeAccess; @@ -220,6 +223,7 @@ use vm_topology::processor::aarch64::GicVersion; pub struct Kvm { kvm: kvm::Kvm, supports_gic_v3: bool, + supports_its: bool, } impl Kvm { @@ -250,9 +254,18 @@ impl Kvm { tracing::info!(supports_gic_v3, "detected KVM GIC version"); + // Probe ITS support: only available with GICv3. + let supports_its = supports_gic_v3 + && probe_vm + .test_create_device(kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS) + .is_ok(); + + tracing::info!(supports_its, "detected KVM ITS support"); + Ok(Self { kvm, supports_gic_v3, + supports_its, }) } } @@ -691,6 +704,44 @@ impl KvmProtoPartition<'_> { Ok(gicv2) } + fn add_its(&mut self, its_base: u64) -> Result { + const ITS_ALIGNMENT: u64 = 0x10000; + if !its_base.is_multiple_of(ITS_ALIGNMENT) { + return Err(KvmError::Misaligned); + } + + let its = self + .vm + .create_device(kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS, 0) + .map_err(kvm::Error::CreateDevice)?; + + // SAFETY: passing the right type for the attribute. + unsafe { + its.set_device_attr::( + KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, + &its_base, + 0, + ) + .map_err(kvm::Error::SetDeviceAttr)?; + } + + // Initialize the ITS device. + // + // SAFETY: passing the right type for the attribute. + unsafe { + its.set_device_attr::<()>( + KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, + &(), + 0, + ) + .map_err(kvm::Error::SetDeviceAttr)?; + } + + Ok(its) + } + fn set_timer_ppis(&mut self, virt: u32, phys: u32) -> Result<(), KvmError> { // SAFETY: passing the right type for the attribute. unsafe { @@ -746,6 +797,14 @@ impl virt::ProtoPartition for KvmProtoPartition<'_> { GicVersion::V2 { cpu_interface_base } => self.add_gicv2(cpu_interface_base)?, }; + // Create the ITS device after the GIC, if configured. + let gic_msi = self.config.processor_topology.gic_msi(); + let its_device = if let GicMsiController::Its(its_info) = &gic_msi { + Some(self.add_its(its_info.its_base)?) + } else { + None + }; + // Configure the virtual timer PPI from topology. KVM also requires // a physical timer PPI, but we don't expose it to the guest. self.set_timer_ppis( @@ -787,7 +846,8 @@ impl virt::ProtoPartition for KvmProtoPartition<'_> { gsi_routing: Mutex::new(GsiRouting::new()), caps, _gic_device: gic_device, - gic_v2m: self.config.processor_topology.gic_v2m(), + _its_device: its_device, + gic_msi, gic_nr_irqs: self.config.processor_topology.gic_nr_irqs(), synic_ports: Default::default(), }); @@ -834,16 +894,25 @@ impl virt::Partition for KvmPartition { } fn as_signal_msi(&self, _minimum_vtl: Vtl) -> Option> { - let v2m = self.inner.gic_v2m.as_ref()?; - let irqcon = self.inner.clone() as Arc; - Some(Arc::new(virt::aarch64::gic_v2m::GicV2mSignalMsi::new( - v2m, irqcon, - ))) + match &self.inner.gic_msi { + GicMsiController::Its(its) => Some(Arc::new(GicItsSignalMsi { + kvm: self.inner.clone(), + translater_addr: its.its_base + GITS_TRANSLATER_OFFSET, + })), + GicMsiController::V2m(v2m) => { + let irqcon = self.inner.clone() as Arc; + Some(Arc::new(virt::aarch64::gic_v2m::GicV2mSignalMsi::new( + v2m, irqcon, + ))) + } + GicMsiController::None => None, + } } fn irqfd(&self) -> Option> { - // The irqfd implementation requires a GICv2m frame to be present. - self.inner.gic_v2m?; + if matches!(self.inner.gic_msi, GicMsiController::None) { + return None; + } Some(self.irqfd_state.clone()) } @@ -923,11 +992,12 @@ impl MsiRouteBuilder for KvmGicV2mRouteBuilder { partition: &KvmPartitionInner, address: u64, data: u32, + _devid: Option, ) -> Option { - let v2m = partition - .gic_v2m - .as_ref() - .expect("partition does not expose a GICv2m MSI frame"); + let v2m = match &partition.gic_msi { + GicMsiController::V2m(v2m) => v2m, + _ => panic!("partition does not expose a GICv2m MSI frame"), + }; let setspi_addr = v2m.frame_base + GicV2mRegister::SETSPI_NS.0 as u64; if address != setspi_addr { return None; @@ -941,13 +1011,94 @@ impl MsiRouteBuilder for KvmGicV2mRouteBuilder { } } +/// ITS MSI route builder for irqfd-based interrupt delivery. +/// +/// The ITS GITS_TRANSLATER address is `its_base + 0x10040`. All MSI writes +/// target this single address; the device ID distinguishes sources. +struct KvmItsRouteBuilder { + /// GITS_TRANSLATER physical address. + translater_addr: u64, +} + +/// Offset of the GITS_TRANSLATER register from the ITS base. +/// It's in the second 64 KiB page at offset 0x40. +const GITS_TRANSLATER_OFFSET: u64 = 0x10040; + +impl MsiRouteBuilder for KvmItsRouteBuilder { + fn routing_entry( + &self, + _partition: &KvmPartitionInner, + address: u64, + data: u32, + devid: Option, + ) -> Option { + if address != self.translater_addr { + return None; + } + Some(kvm::RoutingEntry::Msi { + address_lo: address as u32, + address_hi: (address >> 32) as u32, + data, + devid, + }) + } +} + +/// A [`SignalMsi`](pci_core::msi::SignalMsi) implementation that injects MSIs +/// via `KVM_SIGNAL_MSI` with the `KVM_MSI_VALID_DEVID` flag for ITS routing. +struct GicItsSignalMsi { + kvm: Arc, + /// GITS_TRANSLATER physical address. + translater_addr: u64, +} + +impl pci_core::msi::SignalMsi for GicItsSignalMsi { + fn signal_msi(&self, devid: Option, address: u64, data: u32) { + if address != self.translater_addr { + tracelimit::warn_ratelimited!( + address, + data, + expected = self.translater_addr, + "unexpected MSI address (expected ITS GITS_TRANSLATER)" + ); + return; + } + let (flags, raw_devid) = match devid { + Some(id) => (kvm::KVM_MSI_VALID_DEVID, id), + None => (0, 0), + }; + let msi = kvm::kvm_msi { + address_lo: address as u32, + address_hi: (address >> 32) as u32, + data, + flags, + devid: raw_devid, + pad: [0; 12], + }; + if let Err(err) = self.kvm.kvm.request_msi(&msi) { + tracelimit::warn_ratelimited!( + ?devid, + data, + err = &err as &dyn std::error::Error, + "failed to signal MSI via ITS" + ); + } + } +} + impl virt::irqfd::IrqFd for KvmIrqFdState { fn new_irqfd_route(&self) -> anyhow::Result> { - assert!( - self.partition.gic_v2m.is_some(), - "GICv2m is required for irqfd support" - ); - Ok(Box::new(self.new_irqfd_route(KvmGicV2mRouteBuilder)?)) + match &self.partition.gic_msi { + GicMsiController::Its(its) => { + Ok(Box::new(self.new_irqfd_route(KvmItsRouteBuilder { + translater_addr: its.its_base + GITS_TRANSLATER_OFFSET, + })?)) + } + GicMsiController::V2m(_) => Ok(Box::new(self.new_irqfd_route(KvmGicV2mRouteBuilder)?)), + GicMsiController::None => { + anyhow::bail!("no MSI controller configured for irqfd") + } + } } } @@ -994,6 +1145,7 @@ impl virt::Hypervisor for Kvm { virt::PlatformInfo { platform_gsiv: None, supports_gic_v3: self.supports_gic_v3, + supports_its: self.supports_its, } } diff --git a/vmm_core/virt_kvm/src/arch/x86_64/mod.rs b/vmm_core/virt_kvm/src/arch/x86_64/mod.rs index 4c91f12b13..c12c6d083e 100644 --- a/vmm_core/virt_kvm/src/arch/x86_64/mod.rs +++ b/vmm_core/virt_kvm/src/arch/x86_64/mod.rs @@ -851,6 +851,7 @@ impl MsiRouteBuilder for KvmX86MsiRouteBuilder { _partition: &KvmPartitionInner, address: u64, data: u32, + _devid: Option, ) -> Option { let KvmMsi { address_lo, @@ -861,6 +862,7 @@ impl MsiRouteBuilder for KvmX86MsiRouteBuilder { address_lo, address_hi, data, + devid: None, }) } } @@ -883,6 +885,7 @@ impl IoApicRouting for KvmPartitionInner { address_lo, address_hi, data, + devid: None, }), None => { tracelimit::warn_ratelimited!( @@ -1456,7 +1459,7 @@ impl GuestEventPort for KvmGuestEventPort { } impl SignalMsi for KvmPartitionInner { - fn signal_msi(&self, _rid: u32, address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.request_msi(MsiRequest { address, data }); } } diff --git a/vmm_core/virt_kvm/src/gsi.rs b/vmm_core/virt_kvm/src/gsi.rs index f070d81311..97eb79710e 100644 --- a/vmm_core/virt_kvm/src/gsi.rs +++ b/vmm_core/virt_kvm/src/gsi.rs @@ -216,6 +216,7 @@ pub(crate) trait MsiRouteBuilder: Send + Sync { partition: &KvmPartitionInner, address: u64, data: u32, + devid: Option, ) -> Option; } @@ -224,9 +225,9 @@ impl IrqFdRoute for KvmIrqFdRoute { &self.event } - fn enable(&self, address: u64, data: u32) { + fn enable(&self, address: u64, data: u32, devid: Option) { if let Some(partition) = self.route.partition.upgrade() { - if let Some(entry) = self.builder.routing_entry(&partition, address, data) { + if let Some(entry) = self.builder.routing_entry(&partition, address, data, devid) { self.route.inner.enable(&partition, entry); } else { tracelimit::warn_ratelimited!( diff --git a/vmm_core/virt_kvm/src/lib.rs b/vmm_core/virt_kvm/src/lib.rs index 91609f3214..de9901ce33 100644 --- a/vmm_core/virt_kvm/src/lib.rs +++ b/vmm_core/virt_kvm/src/lib.rs @@ -110,9 +110,14 @@ struct KvmPartitionInner { #[cfg(guest_arch = "aarch64")] #[inspect(skip)] _gic_device: kvm::Device, + /// The ITS device fd, kept alive for the VM lifetime. #[cfg(guest_arch = "aarch64")] #[inspect(skip)] - gic_v2m: Option, + _its_device: Option, + /// MSI controller configuration (v2m, ITS, or none). + #[cfg(guest_arch = "aarch64")] + #[inspect(skip)] + gic_msi: vm_topology::processor::aarch64::GicMsiController, /// Total configured GIC interrupt count (SGIs + PPIs + SPIs). #[cfg(guest_arch = "aarch64")] gic_nr_irqs: u32, diff --git a/vmm_core/virt_mshv/src/aarch64/mod.rs b/vmm_core/virt_mshv/src/aarch64/mod.rs index 038ded7646..9e95dfbe91 100644 --- a/vmm_core/virt_mshv/src/aarch64/mod.rs +++ b/vmm_core/virt_mshv/src/aarch64/mod.rs @@ -57,6 +57,7 @@ impl virt::Hypervisor for LinuxMshv { platform_gsiv: None, // TODO: query from hypervisor supports_gic_v3: true, + supports_its: false, } } @@ -192,7 +193,7 @@ impl virt::Partition for MshvPartition { } fn request_msi(&self, _vtl: Vtl, request: MsiRequest) { - self.inner.signal_msi(0, request.address, request.data); + self.inner.signal_msi(None, request.address, request.data); } fn request_yield(&self, vp_index: VpIndex) { @@ -294,7 +295,7 @@ impl virt::DeviceBuilder for MshvPartition { } impl SignalMsi for MshvPartitionInner { - fn signal_msi(&self, _rid: u32, _address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, _address: u64, data: u32) { self.set_spi_irq(data, true); } } diff --git a/vmm_core/virt_mshv/src/irqfd.rs b/vmm_core/virt_mshv/src/irqfd.rs index 45ba2e4fcf..da17abdd9b 100644 --- a/vmm_core/virt_mshv/src/irqfd.rs +++ b/vmm_core/virt_mshv/src/irqfd.rs @@ -240,7 +240,7 @@ impl IrqFdRoute for MshvIrqFdRoute { &self.event } - fn enable(&self, address: u64, data: u32) { + fn enable(&self, address: u64, data: u32, _devid: Option) { let mut armed = self.armed.lock(); let route = MsiRoute { address_lo: address as u32, diff --git a/vmm_core/virt_mshv/src/x86_64/mod.rs b/vmm_core/virt_mshv/src/x86_64/mod.rs index 3114f350d3..89848d5ac2 100644 --- a/vmm_core/virt_mshv/src/x86_64/mod.rs +++ b/vmm_core/virt_mshv/src/x86_64/mod.rs @@ -479,7 +479,7 @@ impl MshvPartitionInner { } impl SignalMsi for MshvPartitionInner { - fn signal_msi(&self, _rid: u32, address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { self.request_msi(MsiRequest { address, data }); } } diff --git a/vmm_core/virt_whp/src/device.rs b/vmm_core/virt_whp/src/device.rs index f170b4f84e..af7dce3804 100644 --- a/vmm_core/virt_whp/src/device.rs +++ b/vmm_core/virt_whp/src/device.rs @@ -106,7 +106,7 @@ impl Drop for Device { } impl SignalMsi for Device { - fn signal_msi(&self, _rid: u32, address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { if let Err(err) = self.device().interrupt(address, data) { tracelimit::warn_ratelimited!( address, diff --git a/vmm_core/virt_whp/src/lib.rs b/vmm_core/virt_whp/src/lib.rs index 71432b757d..7611128055 100644 --- a/vmm_core/virt_whp/src/lib.rs +++ b/vmm_core/virt_whp/src/lib.rs @@ -120,7 +120,7 @@ struct WhpPartitionInner { isolation: IsolationType, #[cfg(guest_arch = "aarch64")] #[inspect(skip)] - gic_v2m: Option, + gic_msi: vm_topology::processor::aarch64::GicMsiController, synic_ports: virt::synic::SynicPortMap, } @@ -554,7 +554,10 @@ impl virt::Partition for WhpPartition { #[cfg(guest_arch = "aarch64")] fn as_signal_msi(&self, minimum_vtl: Vtl) -> Option> { - let v2m = self.inner.gic_v2m.as_ref()?; + let v2m = match &self.inner.gic_msi { + vm_topology::processor::aarch64::GicMsiController::V2m(v2m) => v2m, + _ => return None, + }; let irqcon = self.with_vtl(minimum_vtl).clone() as Arc; Some(Arc::new(virt::aarch64::gic_v2m::GicV2mSignalMsi::new( v2m, irqcon, @@ -774,6 +777,7 @@ impl virt::Hypervisor for Whp { virt::PlatformInfo { platform_gsiv: Some(WHP_PMU_GSIV), supports_gic_v3: true, + supports_its: false, } } } @@ -1144,7 +1148,7 @@ impl WhpPartitionInner { hvstate, isolation: proto_config.isolation, #[cfg(guest_arch = "aarch64")] - gic_v2m: proto_config.processor_topology.gic_v2m(), + gic_msi: proto_config.processor_topology.gic_msi(), synic_ports: Default::default(), }; @@ -1344,7 +1348,10 @@ impl VtlPartition { // (GICD_TYPER.LPIS=0) so Linux uses the GICv2m MSI frame // instead of ITS for PCIe MSIs. Otherwise keep LPI // enabled (1 ID bit minimum). - GicLpiIntIdBits: if config.processor_topology.gic_v2m().is_some() { + GicLpiIntIdBits: if matches!( + config.processor_topology.gic_msi(), + vm_topology::processor::aarch64::GicMsiController::V2m(_) + ) { 0 } else { 1 diff --git a/vmm_core/virt_whp/src/synic.rs b/vmm_core/virt_whp/src/synic.rs index e2a591af0c..e4fb04f789 100644 --- a/vmm_core/virt_whp/src/synic.rs +++ b/vmm_core/virt_whp/src/synic.rs @@ -441,7 +441,7 @@ mod x86 { use virt::irqcon::MsiRequest; impl SignalMsi for WhpPartitionAndVtl { - fn signal_msi(&self, _rid: u32, address: u64, data: u32) { + fn signal_msi(&self, _devid: Option, address: u64, data: u32) { if let Err(err) = self .partition .interrupt(self.vtl, MsiRequest { address, data }) diff --git a/vmm_core/vmotherboard/src/base_chipset.rs b/vmm_core/vmotherboard/src/base_chipset.rs index 4c2d602870..48c6032d05 100644 --- a/vmm_core/vmotherboard/src/base_chipset.rs +++ b/vmm_core/vmotherboard/src/base_chipset.rs @@ -927,7 +927,7 @@ mod weak_mutex_pci { }) } - fn downstream_ports(&self) -> Vec<(u8, Arc)> { + fn downstream_ports(&self) -> Vec { self.lock().downstream_ports() } } @@ -948,7 +948,7 @@ mod weak_mutex_pci { }) } - fn downstream_ports(&self) -> Vec<(u8, Arc)> { + fn downstream_ports(&self) -> Vec { self.lock().downstream_ports() } } diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs index c6df3cde7e..a87bd7e97c 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs @@ -87,7 +87,7 @@ pub trait RegisterWeakMutexPcie: Send { ) -> Result<(), PcieConflict>; /// Enumerate the downstream ports. - fn downstream_ports(&self) -> Vec<(u8, Arc)>; + fn downstream_ports(&self) -> Vec; } pub struct WeakMutexPcieDeviceEntry { diff --git a/vmm_core/vmotherboard/src/chipset/builder/mod.rs b/vmm_core/vmotherboard/src/chipset/builder/mod.rs index 74872d0006..58a136d80e 100644 --- a/vmm_core/vmotherboard/src/chipset/builder/mod.rs +++ b/vmm_core/vmotherboard/src/chipset/builder/mod.rs @@ -224,16 +224,15 @@ impl<'a> ChipsetBuilder<'a> { bus_id ); - for (port_number, port_name) in downstream_ports { - let existing = inner - .bus_resolver - .pcie - .ports - .insert(BusId::new(&port_name), (port_number, bus_id.clone())); + for port_info in downstream_ports { + let existing = inner.bus_resolver.pcie.ports.insert( + BusId::new(&port_info.name), + (port_info.port_number, bus_id.clone()), + ); assert!( existing.is_none(), "duplicate pcie port ID: {:?}", - port_name + port_info.name ); } }