QuantumKitHub · lkdvos · Jun 12, 2026 · Mar 26, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -14,7 +14,7 @@ steps:
     agents:
       queue: "cuda"
     if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 30
+    timeout_in_minutes: 60
 
   - label: "Julia LTS -- CUDA"
     plugins:
@@ -28,4 +28,4 @@ steps:
     agents:
       queue: "cuda"
     if: build.message !~ /\[skip tests\]/
-    timeout_in_minutes: 30
+    timeout_in_minutes: 60
diff --git a/ext/MPSKitAdaptExt.jl b/ext/MPSKitAdaptExt.jl
@@ -35,5 +35,7 @@ end
     MPSKit.JordanMPOTensor(space(W), adapt(to, W.A), adapt(to, W.B), adapt(to, W.C), adapt(to, W.D))
 @inline Adapt.adapt_structure(to, mpo::MPOHamiltonian) =
     MPOHamiltonian(map(x -> adapt(to, x), mpo.W))
+@inline Adapt.adapt_structure(to, ml::MPSKit.Multiline) = MPSKit.Multiline(map(adapt(to), ml.data))
+@inline Adapt.adapt_structure(to, pa::MPSKit.PeriodicArray) = MPSKit.PeriodicArray(map(adapt(to), pa.data))
 
 end
diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl
@@ -153,7 +153,7 @@ Compute the mpo tensor that arises from multiplying MPOs.
 """
 function fuse_mul_mpo(O1, O2)
     TT = promote_type(scalartype(O1), scalartype(O2))
-    T = TensorKit.similarstoragetype(storagetype(O1), TT)
+    T = TensorKit.promote_storagetype(TT, O1, O2)
     F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1))
     F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1))
     return _fuse_mpo_mpo(O1, O2, F_left, F_right)
@@ -190,7 +190,7 @@ end
 function add_physical_charge(O::MPOTensor, charge::Sector)
     sectortype(O) === typeof(charge) || throw(SectorMismatch())
     auxspace = Vect[typeof(charge)](charge => 1)'
-    F = fuser(scalartype(O), physicalspace(O), auxspace)
+    F = fuser(storagetype(O), physicalspace(O), auxspace)
     @plansor O_charged[-1 -2; -3 -4] := F[-2; 1 2] *
         O[-1 1; 4 3] * τ[3 2; 5 -4] * conj(F[-3; 4 5])
     return O_charged

diff --git a/src/operators/jordanmpotensor.jl b/src/operators/jordanmpotensor.jl
@@ -133,8 +133,10 @@ end
 function jordanmpotensortype(::Type{O}) where {O <: AbstractTensorMap}
     return jordanmpotensortype(spacetype(O), storagetype(O))
 end
-Base.similar(W::JordanMPOTensor, ::Type{TorA}) where {TorA} =
-    jordanmpotensortype(spacetype(W), TorA)(undef, space(W))
+function Base.similar(W::JordanMPOTensor, ::Type{T}) where {T <: Number}
+    TE = TensorKit.similarstoragetype(TensorKit.storagetype(W), T)
+    return jordanmpotensortype(spacetype(W), TE)(undef, space(W))
+end
 
 # Properties
 # ----------
@@ -205,7 +207,7 @@ end
 for f in (:real, :complex)
     @eval function Base.$f(W::JordanMPOTensor)
         E = $f(scalartype(W))
-        W′ = JordanMPOTensor{E}(undef, space(W))
+        W′ = similar(W, E)
         for (I, v) in nonzero_pairs(W.A)
             W′.A[I] = $f(v)
         end
@@ -242,7 +244,7 @@ end
     elseif 1 < i < size(W, 1) && 1 < j < size(W, 4)
         return W.A[i - 1, 1, 1, j - 1]
     else
-        return zeros(scalartype(W), eachspace(W)[i, 1, 1, j])
+        return zeros(storagetype(W), eachspace(W)[i, 1, 1, j])
     end
 end
 

diff --git a/src/operators/mpo.jl b/src/operators/mpo.jl
@@ -62,7 +62,7 @@ eachsite(mpo::InfiniteMPO) = PeriodicArray(eachindex(mpo))
 function Base.similar(mpo::MPO{<:MPOTensor}, ::Type{O}, L::Int) where {O <: MPOTensor}
     return MPO(similar(parent(mpo), O, L))
 end
-function Base.similar(mpo::MPO, ::Type{T}) where {T <: Number}
+function Base.similar(mpo::MPO, ::Type{T}) where {T <: Union{Number, DenseVector}}
     return MPO(similar.(parent(mpo), T))
 end
 

diff --git a/src/operators/mpohamiltonian.jl b/src/operators/mpohamiltonian.jl
@@ -59,18 +59,18 @@ function InfiniteMPOHamiltonian(Ws::AbstractVector{O}) where {O <: MPOTensor}
 end
 
 """
-    FiniteMPOHamiltonian(Ws::Vector{<:Matrix})
+    FiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix})
 
 Create a `FiniteMPOHamiltonian` from a vector of matrices, such that `Ws[i][j, k]` represents
 the operator at site `i`, left level `j` and right level `k`. Here, the entries can be
 either `MPOTensor`, `Missing` or `Number`.
 """
-function FiniteMPOHamiltonian(Ws::Vector{<:Matrix})
+function FiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix})
     T = promote_type(_split_mpoham_types.(Ws)...)
     W = jordanmpotensortype(T)
     return FiniteMPOHamiltonian{W}(Ws)
 end
-function FiniteMPOHamiltonian{O}(W_mats::Vector{<:Matrix}) where {O <: JordanMPOTensor}
+function FiniteMPOHamiltonian{O}(W_mats::Vector{<:AbstractMatrix}) where {O <: JordanMPOTensor}
     T = scalartype(O)
     L = length(W_mats)
     # initialize sumspaces
@@ -157,12 +157,12 @@ Create a `InfiniteMPOHamiltonian` from a vector of matrices, such that `Ws[i][j,
 the the operator at site `i`, left level `j` and right level `k`. Here, the entries can be
 either `MPOTensor`, `Missing` or `Number`.
 """
-function InfiniteMPOHamiltonian(Ws::Vector{<:Matrix})
+function InfiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix})
     T = promote_type(_split_mpoham_types.(Ws)...)
     TW = jordanmpotensortype(T)
     return InfiniteMPOHamiltonian{TW}(Ws)
 end
-function InfiniteMPOHamiltonian{O}(W_mats::Vector{<:Matrix}) where {O <: MPOTensor}
+function InfiniteMPOHamiltonian{O}(W_mats::Vector{<:AbstractMatrix}) where {O <: MPOTensor}
     # InfiniteMPOHamiltonian only works for square matrices:
     for W_mat in W_mats
         size(W_mat, 1) == size(W_mat, 2) ||
@@ -504,10 +504,8 @@ function InfiniteMPOHamiltonian(lattice′::AbstractArray{<:VectorSpace}, local_
     end
 
     # partial sort by interaction range
-    local_mpos = sort!(
-        map(Base.Fix1(instantiate_operator, lattice), collect(local_operators));
-        by = x -> length(x[1])
-    )
+    unsorted_mpos = map(Base.Fix1(instantiate_operator, lattice), [local_operators...])
+    local_mpos = sort!(unsorted_mpos; by = x -> length(x[1]))
 
     for (sites, local_mpo) in local_mpos
         local key_R # trick to define key_R before the first iteration
@@ -703,8 +701,8 @@ end
 function Base.similar(H::MPOHamiltonian, ::Type{O}, L::Int) where {O <: MPOTensor}
     return MPOHamiltonian(similar(parent(H), O, L))
 end
-function Base.similar(H::MPOHamiltonian, ::Type{T}) where {T <: Number}
-    return MPOHamiltonian(similar.(parent(H), T))
+function Base.similar(H::MPOHamiltonian, ::Type{TorA}) where {TorA <: Union{Number, DenseVector}}
+    return MPOHamiltonian(similar.(parent(H), TorA))
 end
 
 # Linear Algebra
@@ -729,7 +727,7 @@ function Base.:+(
             ⊞(Vtriv, right_virtualspace(A), Vtriv)
         V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright
 
-        H[i] = eltype(H)(V, A, B, C, D)
+        H[i] = JordanMPOTensor(V, A, B, C, D)
     end
     return FiniteMPOHamiltonian(H)
 end
@@ -751,7 +749,7 @@ function Base.:+(
         Vright = ⊞(Vtriv, right_virtualspace(A), Vtriv)
         V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright
 
-        H[i] = eltype(H)(V, A, B, C, D)
+        H[i] = JordanMPOTensor(V, A, B, C, D)
     end
     return InfiniteMPOHamiltonian(H)
 end
@@ -821,7 +819,7 @@ function Base.:*(H::FiniteMPOHamiltonian, mps::FiniteMPS)
         A,
         tensormaptype(
             spacetype(mps), numout(eltype(mps)), numin(eltype(mps)),
-            promote_type(scalartype(H), scalartype(mps))
+            promote_type(storagetype(H), storagetype(mps))
         )
     )
     # left to middle

diff --git a/src/operators/ortho.jl b/src/operators/ortho.jl
@@ -11,7 +11,8 @@ function left_canonicalize!(
     d = sqrt(dim(P))
 
     # orthogonalize second column against first
-    WI = removeunit(W[1, 1, 1, 1], 1)
+    Wi = W[1, 1, 1, 1]
+    WI = removeunit(Wi, 1)
     @plansor t[l; r] := conj(WI[p; p' l]) * W.C[p; p' r]
     # TODO: the following is currently broken due to a TensorKit bug
     # @plansor C′[p; p' r] := W.C[p; p' r] - WI[p; p' l] * t[l; r]
@@ -99,7 +100,8 @@ function right_canonicalize!(
     d = sqrt(dim(P))
 
     # orthogonalize second row against last
-    WI = removeunit(W[end, 1, 1, end], 4)
+    Wi = W[end, 1, 1, end]
+    WI = removeunit(Wi, 4)
     @plansor t[l; r] := conj(WI[r p; p']) * W.B[l p; p']
     # TODO: the following is currently broken due to a TensorKit bug
     # @plansor B′[l p; p'] := W.B[l p; p'] - WI[r p; p'] * t[l; r]
@@ -124,7 +126,8 @@ function right_canonicalize!(
         end
         H[i] = JordanMPOTensor(V ⊗ P ← domain(W), Q1, Q2, W.C, W.D)
     else
-        tmp = transpose(cat(insertleftunit(B′, 4), W.A; dims = 4), ((1,), (3, 4, 2)))
+        B′′ = insertleftunit(B′, 4)
+        tmp = transpose(cat(B′′, W.A; dims = 4), ((1,), (3, 4, 2)))
         R, Q = right_orth!(tmp; alg)
         if dim(R) == 0
             V = _rightunit ⊞ _rightunit

diff --git a/test/gpu/cuda/operators.jl b/test/gpu/cuda/operators.jl
@@ -83,3 +83,188 @@ using Adapt, CUDA, cuTENSOR
 
     @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁)
 end
+
+@testset "Finite CuMPOHamiltonian T ($T), V ($(spacetype(V)))" for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1))
+    L = 3
+    lattice = fill(V, L)
+    O₁ = project_hermitian!(randn(T, V ← V))
+    E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁))
+    O₂ = project_hermitian!(randn(T, V^2 ← V^2))
+
+    dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁)
+    dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂)
+    hH1 = FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L)
+    hH2 = FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1))
+    hH3 = FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂)
+    H1 = adapt(CuVector{T, CUDA.DeviceMemory}, hH1)
+    H2 = adapt(CuVector{T, CUDA.DeviceMemory}, hH2)
+    H3 = adapt(CuVector{T, CUDA.DeviceMemory}, hH3)
+    @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory}
+
+    @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T
+    if !(T <: Complex)
+        for H in (H1, H2, H3)
+            Hc = @constinferred complex(H)
+            @test scalartype(Hc) == complex(T)
+            @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory}
+        end
+    end
+
+    # check if constructor works by converting back to tensormap
+    hH1_tm = convert(TensorMap, hH1)
+    H1_tm = convert(TensorMap, H1)
+    @test TensorKit.storagetype(H1_tm) == CuVector{T, CUDA.DeviceMemory}
+    operators = vcat(fill(E, L - 1), dO₁)
+    @test H1_tm ≈ mapreduce(+, 1:L) do i
+        return reduce(⊗, circshift(operators, i))
+    end
+    operators = vcat(fill(E, L - 2), dO₂)
+    @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i
+        return reduce(⊗, circshift(operators, i))
+    end
+    @test convert(TensorMap, H3) ≈
+        dO₁ ⊗ E ⊗ E + E ⊗ dO₂ + permute(dO₂ ⊗ E, ((1, 3, 2), (4, 6, 5)))
+
+    # check if adding terms on the same site works
+    single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2))
+    H4 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, single_terms))
+    @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
+    @test H4 ≈ H1 atol = 1.0e-6
+    double_terms = Iterators.flatten(
+        Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2)
+    )
+    H5 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, double_terms))
+    @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory}
+    @test H5 ≈ H2 atol = 1.0e-6
+
+    # test linear algebra
+    @test H1 ≈
+        adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁)) +
+        adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 2 => O₁)) +
+        adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 3 => O₁))
+    @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
+
+    H1′a = 0.8 * H1
+    @test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory}
+    H1′b = 0.2 * H1
+    @test TensorKit.storagetype(H1′b) == CuVector{T, CUDA.DeviceMemory}
+    H1′ = H1′a + H1′b
+    @test TensorKit.storagetype(H1′) == CuVector{T, CUDA.DeviceMemory}
+    @test H1′ ≈ H1 atol = 1.0e-6
+    @test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6
+    H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0)))
+    @test H1_trunc ≈ H1
+    @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1))
+    # test dot and application
+    hstate = rand(T, prod(lattice))
+    state = adapt(CuVector{T, CUDA.DeviceMemory}, hstate)
+    @test TensorKit.storagetype(state) == CuVector{T, CUDA.DeviceMemory}
+    hmps = FiniteMPS(hstate)
+    mps = adapt(CuVector{T, CUDA.DeviceMemory}, hmps)
+    @test TensorKit.storagetype(mps) == CuVector{T, CUDA.DeviceMemory}
+    @test norm(mps) ≈ norm(state)
+    @test norm(H1) ≈ norm(H1_tm)
+
+    @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
+    hH1mps = hH1 * hmps
+    H1mps = H1 * mps
+    @test TensorKit.storagetype(H1mps) == CuVector{T, CUDA.DeviceMemory}
+    hH1tmstate = hH1_tm * hstate
+    H1tmstate = H1_tm * state
+    @test TensorKit.storagetype(H1tmstate) == CuVector{T, CUDA.DeviceMemory}
+    @test norm(H1mps) ≈ norm(H1tmstate)
+
+    H1mpstm = convert(TensorMap, H1mps)
+    @test TensorKit.storagetype(H1mpstm) == CuVector{T, CUDA.DeviceMemory}
+    @test H1mpstm ≈ H1tmstate
+    @test H1 * state ≈ H1tmstate
+
+    H2mps = H2 * mps
+    hH2mps = hH2 * hmps
+    @test TensorKit.storagetype(H2mps) == CuVector{T, CUDA.DeviceMemory}
+    @test dot(mps, H2, mps) ≈ dot(mps, H2mps)
+    @test dot(hmps, hH2mps) ≈ dot(mps, H2mps)
+    @test dot(mps, H2, mps) ≈ dot(hmps, hH2mps)
+    # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys
+    grid = square = fill(V, 3, 3)
+
+    local_operators = Dict((I,) => O₁ for I in eachindex(grid))
+    I_vertical = CartesianIndex(1, 0)
+    vertical_operators = Dict(
+        (I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1)
+    )
+    I_horizontal = CartesianIndex(0, 1)
+    horizontal_operators = Dict(
+        (I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1)
+    )
+    operators = merge(local_operators, vertical_operators, horizontal_operators)
+    H4 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, operators))
+    @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
+
+    @test H4 ≈
+        adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, local_operators)) +
+        adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, vertical_operators)) +
+        adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4
+    @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
+
+    H4′ = H4 / 3 + 2H4 / 3
+    @test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory}
+    H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12)))
+    @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory}
+    # needed here to avoid real * complex multiplication in cuTENSOR, which isn't supported
+    psi = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPS(T, physicalspace(H5), V ⊕ rightunitspace(V)))
+    @test expectation_value(psi, H4) ≈ expectation_value(psi, H5)
+end
+
+pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1))
+vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1))
+
+@testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces)
+    # generate a 1-2-3 body interaction
+    operators = ntuple(3) do i
+        O = rand(ComplexF64, pspace^i, pspace^i)
+        return O += O'
+    end
+
+    H1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[1]))
+    H2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[2]))
+    H3 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, repeat(InfiniteMPOHamiltonian(operators[3]), 2))
+
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(H2) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(H3) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    # make a teststate to measure expectation values for
+    ψ1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace], [Dspace]))
+    ψ2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace, pspace], [Dspace, Dspace]))
+    @test TensorKit.storagetype(ψ1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(ψ2) == CuVector{ComplexF64, CUDA.DeviceMemory}
+
+    e1 = expectation_value(ψ1, H1)
+    e2 = expectation_value(ψ1, H2)
+
+    H1 = 2 * H1
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    H1 -= [1]
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10
+
+    H1 = H1 + H2
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10
+
+    H1 = repeat(H1, 2)
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+
+    e1 = expectation_value(ψ2, H1)
+    e3 = expectation_value(ψ2, H3)
+
+    @test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10
+
+    H4 = H1 + H3
+    @test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    h4 = H4 * H4
+    @test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test real(expectation_value(ψ2, H4)) >= 0
+end