@@ -726,6 +726,95 @@ static inline bool ublk_dev_need_req_ref(const struct ublk_device *ub)
726726 ublk_dev_support_auto_buf_reg (ub );
727727}
728728
729+ /*
730+ * ublk IO Reference Counting Design
731+ * ==================================
732+ *
733+ * For user-copy and zero-copy modes, ublk uses a split reference model with
734+ * two counters that together track IO lifetime:
735+ *
736+ * - io->ref: refcount for off-task buffer registrations and user-copy ops
737+ * - io->task_registered_buffers: count of buffers registered on the IO task
738+ *
739+ * Key Invariant:
740+ * --------------
741+ * When IO is dispatched to the ublk server (UBLK_IO_FLAG_OWNED_BY_SRV set),
742+ * the sum (io->ref + io->task_registered_buffers) must equal UBLK_REFCOUNT_INIT
743+ * when no active references exist. After IO completion, both counters become
744+ * zero. For I/Os not currently dispatched to the ublk server, both ref and
745+ * task_registered_buffers are 0.
746+ *
747+ * This invariant is checked by ublk_check_and_reset_active_ref() during daemon
748+ * exit to determine if all references have been released.
749+ *
750+ * Why Split Counters:
751+ * -------------------
752+ * Buffers registered on the IO daemon task can use the lightweight
753+ * task_registered_buffers counter (simple increment/decrement) instead of
754+ * atomic refcount operations. The ublk_io_release() callback checks if
755+ * current == io->task to decide which counter to update.
756+ *
757+ * This optimization only applies before IO completion. At completion,
758+ * ublk_sub_req_ref() collapses task_registered_buffers into the atomic ref.
759+ * After that, all subsequent buffer unregistrations must use the atomic ref
760+ * since they may be releasing the last reference.
761+ *
762+ * Reference Lifecycle:
763+ * --------------------
764+ * 1. ublk_init_req_ref(): Sets io->ref = UBLK_REFCOUNT_INIT at IO dispatch
765+ *
766+ * 2. During IO processing:
767+ * - On-task buffer reg: task_registered_buffers++ (no ref change)
768+ * - Off-task buffer reg: ref++ via ublk_get_req_ref()
769+ * - Buffer unregister callback (ublk_io_release):
770+ * * If on-task: task_registered_buffers--
771+ * * If off-task: ref-- via ublk_put_req_ref()
772+ *
773+ * 3. ublk_sub_req_ref() at IO completion:
774+ * - Computes: sub_refs = UBLK_REFCOUNT_INIT - task_registered_buffers
775+ * - Subtracts sub_refs from ref and zeroes task_registered_buffers
776+ * - This effectively collapses task_registered_buffers into the atomic ref,
777+ * accounting for the initial UBLK_REFCOUNT_INIT minus any on-task
778+ * buffers that were already counted
779+ *
780+ * Example (zero-copy, register on-task, unregister off-task):
781+ * - Dispatch: ref = UBLK_REFCOUNT_INIT, task_registered_buffers = 0
782+ * - Register buffer on-task: task_registered_buffers = 1
783+ * - Unregister off-task: ref-- (UBLK_REFCOUNT_INIT - 1), task_registered_buffers stays 1
784+ * - Completion via ublk_sub_req_ref():
785+ * sub_refs = UBLK_REFCOUNT_INIT - 1,
786+ * ref = (UBLK_REFCOUNT_INIT - 1) - (UBLK_REFCOUNT_INIT - 1) = 0
787+ *
788+ * Example (auto buffer registration):
789+ * Auto buffer registration sets task_registered_buffers = 1 at dispatch.
790+ *
791+ * - Dispatch: ref = UBLK_REFCOUNT_INIT, task_registered_buffers = 1
792+ * - Buffer unregister: task_registered_buffers-- (becomes 0)
793+ * - Completion via ublk_sub_req_ref():
794+ * sub_refs = UBLK_REFCOUNT_INIT - 0, ref becomes 0
795+ *
796+ * Example (zero-copy, ublk server killed):
797+ * When daemon is killed, io_uring cleanup unregisters buffers off-task.
798+ * ublk_check_and_reset_active_ref() waits for the invariant to hold.
799+ *
800+ * - Dispatch: ref = UBLK_REFCOUNT_INIT, task_registered_buffers = 0
801+ * - Register buffer on-task: task_registered_buffers = 1
802+ * - Daemon killed, io_uring cleanup unregisters buffer (off-task):
803+ * ref-- (UBLK_REFCOUNT_INIT - 1), task_registered_buffers stays 1
804+ * - Daemon exit check: sum = (UBLK_REFCOUNT_INIT - 1) + 1 = UBLK_REFCOUNT_INIT
805+ * - Sum equals UBLK_REFCOUNT_INIT, then both two counters are zeroed by
806+ * ublk_check_and_reset_active_ref(), so ublk_abort_queue() can proceed
807+ * and abort pending requests
808+ *
809+ * Batch IO Special Case:
810+ * ----------------------
811+ * In batch IO mode, io->task is NULL. This means ublk_io_release() always
812+ * takes the off-task path (ublk_put_req_ref), decrementing io->ref. The
813+ * task_registered_buffers counter still tracks registered buffers for the
814+ * invariant check, even though the callback doesn't decrement it.
815+ *
816+ * Note: updating task_registered_buffers is protected by io->lock.
817+ */
729818static inline void ublk_init_req_ref (const struct ublk_queue * ubq ,
730819 struct ublk_io * io )
731820{
0 commit comments