Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cuda_core/cuda/core/_memory/_device_memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -7,7 +7,9 @@ from cuda.core._memory._ipc cimport IPCDataForMR


cdef class DeviceMemoryResource(_MemPool):
pass
cdef:
int _dev_id
object _peer_accessible_by


cpdef DMR_mempool_get_access(DeviceMemoryResource, int)
158 changes: 135 additions & 23 deletions cuda_core/cuda/core/_memory/_device_memory_resource.pyx
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from cuda.bindings cimport cydriver
from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
from cuda.core._memory._memory_pool cimport (
_MemPool, MP_init_create_pool, MP_raise_release_threshold,
)
from cuda.core._memory cimport _ipc
from cuda.core._memory._ipc cimport IPCAllocationHandle
from cuda.core._resource_handles cimport (
as_cu,
get_device_mempool,
)
from cuda.core._utils.cuda_utils cimport (
check_or_create_options,
HANDLE_RETURN,
)
from cpython.mem cimport PyMem_Malloc, PyMem_Free

from dataclasses import dataclass
import multiprocessing
import platform # no-cython-lint
import uuid

from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
from cuda.core._resource_handles cimport as_cu

__all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions']

Expand Down Expand Up @@ -122,27 +128,26 @@ cdef class DeviceMemoryResource(_MemPool):
associated MMR.
"""

def __init__(self, device_id: Device | int, options=None):
from .._device import Device
cdef int dev_id = Device(device_id).device_id
cdef DeviceMemoryResourceOptions opts = check_or_create_options(
DeviceMemoryResourceOptions, options, "DeviceMemoryResource options",
keep_none=True
)
cdef _MemPoolOptions opts_base = _MemPoolOptions()

cdef bint ipc_enabled = False
if opts:
ipc_enabled = opts.ipc_enabled
if ipc_enabled and not _ipc.is_supported():
raise RuntimeError("IPC is not available on {platform.system()}")
opts_base._max_size = opts.max_size
opts_base._use_current = False
opts_base._ipc_enabled = ipc_enabled
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
def __cinit__(self, *args, **kwargs):
self._dev_id = cydriver.CU_DEVICE_INVALID
self._peer_accessible_by = ()

super().__init__(dev_id, opts_base)
def __init__(self, device_id: Device | int, options=None):
_DMR_init(self, device_id, options)

def __dealloc__(self):
try:
self.close()
except Exception:
pass

def close(self):
"""Close the memory resource, revoking peer access before destruction."""
# nvbug 5698116: clear peer access before pool destruction; also
# needed for non-owned (default) pools to undo modifications.
if self._peer_accessible_by:
_DMR_set_peer_accessible_by(self, [])
super().close()

def __reduce__(self):
return DeviceMemoryResource.from_registry, (self.uuid,)
Expand Down Expand Up @@ -215,6 +220,37 @@ cdef class DeviceMemoryResource(_MemPool):
raise RuntimeError("Memory resource is not IPC-enabled")
return self._ipc_data._alloc_handle

@property
def device_id(self) -> int:
"""The associated device ordinal."""
return self._dev_id

@property
def peer_accessible_by(self):
"""
Get or set the devices that can access allocations from this memory
pool. Access can be modified at any time and affects all allocations
from this memory pool.

Returns a tuple of sorted device IDs that currently have peer access to
allocations from this memory pool.

When setting, accepts a sequence of Device objects or device IDs.
Setting to an empty sequence revokes all peer access.

Examples
--------
>>> dmr = DeviceMemoryResource(0)
>>> dmr.peer_accessible_by = [1] # Grant access to device 1
>>> assert dmr.peer_accessible_by == (1,)
>>> dmr.peer_accessible_by = [] # Revoke access
"""
return self._peer_accessible_by

@peer_accessible_by.setter
def peer_accessible_by(self, devices):
_DMR_set_peer_accessible_by(self, devices)

@property
def is_device_accessible(self) -> bool:
"""Return True. This memory resource provides device-accessible buffers."""
Expand All @@ -226,6 +262,82 @@ cdef class DeviceMemoryResource(_MemPool):
return False


cdef inline _DMR_set_peer_accessible_by(DeviceMemoryResource self, devices):
from .._device import Device

cdef set[int] target_ids = {Device(dev).device_id for dev in devices}
target_ids.discard(self._dev_id)
this_dev = Device(self._dev_id)
cdef list bad = [dev for dev in target_ids if not this_dev.can_access_peer(dev)]
if bad:
raise ValueError(f"Device {self._dev_id} cannot access peer(s): {', '.join(map(str, bad))}")
cdef set[int] cur_ids = set(self._peer_accessible_by)
cdef set[int] to_add = target_ids - cur_ids
cdef set[int] to_rm = cur_ids - target_ids
cdef size_t count = len(to_add) + len(to_rm)
cdef cydriver.CUmemAccessDesc* access_desc = NULL
cdef size_t i = 0

if count > 0:
access_desc = <cydriver.CUmemAccessDesc*>PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
if access_desc == NULL:
raise MemoryError("Failed to allocate memory for access descriptors")

try:
for dev_id in to_add:
access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
access_desc[i].location.id = dev_id
i += 1

for dev_id in to_rm:
access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
access_desc[i].location.id = dev_id
i += 1

with nogil:
HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self._h_pool), access_desc, count))
finally:
if access_desc != NULL:
PyMem_Free(access_desc)

self._peer_accessible_by = tuple(target_ids)


cdef inline _DMR_init(DeviceMemoryResource self, device_id, options):
from .._device import Device
cdef int dev_id = Device(device_id).device_id
cdef DeviceMemoryResourceOptions opts = check_or_create_options(
DeviceMemoryResourceOptions, options, "DeviceMemoryResource options",
keep_none=True
)
cdef bint ipc_enabled = False
cdef size_t max_size = 0

self._dev_id = dev_id

if opts is not None:
ipc_enabled = opts.ipc_enabled
if ipc_enabled and not _ipc.is_supported():
raise RuntimeError(f"IPC is not available on {platform.system()}")
max_size = opts.max_size

if opts is None:
self._h_pool = get_device_mempool(dev_id)
self._mempool_owned = False
MP_raise_release_threshold(self)
else:
MP_init_create_pool(
self,
cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE,
dev_id,
cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
ipc_enabled,
max_size,
)


# Note: this is referenced in instructions to debug nvbug 5698116.
cpdef DMR_mempool_get_access(DeviceMemoryResource dmr, int device_id):
"""
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand Down
86 changes: 51 additions & 35 deletions cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from __future__ import annotations

from cuda.bindings cimport cydriver

from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
from cuda.core._memory._memory_pool cimport _MemPool, MP_init_create_pool, MP_init_current_pool
from cuda.core._utils.cuda_utils cimport (
HANDLE_RETURN,
check_or_create_options,
Expand Down Expand Up @@ -64,40 +64,12 @@ cdef class ManagedMemoryResource(_MemPool):
"""

def __init__(self, options=None):
cdef ManagedMemoryResourceOptions opts = check_or_create_options(
ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
keep_none=True
)
cdef _MemPoolOptions opts_base = _MemPoolOptions()

cdef int device_id = -1
cdef object preferred_location = None
if opts:
preferred_location = opts.preferred_location
if preferred_location is not None:
device_id = preferred_location
opts_base._use_current = False

opts_base._ipc_enabled = False # IPC not supported for managed memory pools

IF CUDA_CORE_BUILD_MAJOR >= 13:
# Set location based on preferred_location
if preferred_location is None:
# Let the driver decide
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE
elif device_id == -1:
# CPU/host preference
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
else:
# Device preference
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE

opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED

super().__init__(device_id, opts_base)
_check_concurrent_managed_access()
ELSE:
raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")
_MMR_init(self, options)

@property
def device_id(self) -> int:
"""Return -1. Managed memory migrates automatically and is not tied to a specific device."""
return -1
Comment on lines +69 to +72
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: Wouldn't this be a breaking change? In the old implementation, device_id was used to initialize _MemPool. _dev_id, which is used to back .device_id, but the new implementation returns .device_id = -1 unconditionally. I understand we meant to say the pages are migratable (not pinned), but maybe there is a better way to restore the capability of querying the preferred location?


@property
def is_device_accessible(self) -> bool:
Expand All @@ -110,6 +82,50 @@ cdef class ManagedMemoryResource(_MemPool):
return True


cdef inline _MMR_init(ManagedMemoryResource self, options):
cdef ManagedMemoryResourceOptions opts = check_or_create_options(
ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
keep_none=True
)
cdef int location_id = -1
cdef object preferred_location = None
cdef cydriver.CUmemLocationType loc_type

if opts is not None:
preferred_location = opts.preferred_location
if preferred_location is not None:
location_id = preferred_location

IF CUDA_CORE_BUILD_MAJOR >= 13:
if preferred_location is None:
loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE
elif location_id == -1:
loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
else:
loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE

if opts is None:
MP_init_current_pool(
self,
loc_type,
location_id,
cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
)
else:
MP_init_create_pool(
self,
loc_type,
location_id,
cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
False,
0,
)

_check_concurrent_managed_access()
ELSE:
raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")


cdef bint _concurrent_access_warned = False
cdef object _concurrent_access_lock = threading.Lock()

Expand Down
31 changes: 19 additions & 12 deletions cuda_core/cuda/core/_memory/_memory_pool.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,32 @@ from cuda.core._resource_handles cimport MemoryPoolHandle

cdef class _MemPool(MemoryResource):
cdef:
int _dev_id
MemoryPoolHandle _h_pool
bint _mempool_owned
IPCDataForMR _ipc_data
object _attributes
object _peer_accessible_by
object __weakref__


cdef int MP_init_create_pool(
_MemPool self,
cydriver.CUmemLocationType loc_type,
int loc_id,
cydriver.CUmemAllocationType alloc_type,
bint ipc_enabled,
size_t max_size,
) except? -1

cdef int MP_init_current_pool(
_MemPool self,
cydriver.CUmemLocationType loc_type,
int loc_id,
cydriver.CUmemAllocationType alloc_type,
) except? -1

cdef int MP_raise_release_threshold(_MemPool self) except? -1


cdef class _MemPoolAttributes:
cdef:
MemoryPoolHandle _h_pool
Expand All @@ -27,13 +44,3 @@ cdef class _MemPoolAttributes:
cdef _MemPoolAttributes _init(MemoryPoolHandle h_pool)

cdef int _getattribute(self, cydriver.CUmemPool_attribute attr_enum, void* value) except? -1


cdef class _MemPoolOptions:

cdef:
bint _ipc_enabled
size_t _max_size
cydriver.CUmemLocationType _location
cydriver.CUmemAllocationType _type
bint _use_current
Loading
Loading