add TensorOperations support#2944
Conversation
|
using TensorOperations
using Reactant
α = randn()
A = randn(5, 5, 5, 5, 5, 5)
B = randn(5, 5, 5)
C = randn(5, 5, 5)
D = zeros(5, 5, 5)
αre = ConcreteRNumber(α)
Are = Reactant.to_rarray(A)
Bre = Reactant.to_rarray(B)
Cre = Reactant.to_rarray(C)
Dre = Reactant.to_rarray(D)tensoraddfadd(α,C) = @tensor E[a,b,c] := α * C[c,a,b]
@code_hlo fadd(α, Cre)
@code_hlo fadd(αre, Cre)
fadd(α, C) ≈ @jit fadd(α, Cre)
fadd(α, C) ≈ @jit fadd(αre, Cre)returns module @reactant_fadd attributes {mhlo.num_partitions = 1 : i64, mhlo.num_replicas = 1 : i64} {
func.func @main(%arg0: tensor<5x5x5xf64> {enzymexla.memory_effects = []}) -> tensor<5x5x5xf64> attributes {enzymexla.memory_effects = []} {
%cst = stablehlo.constant dense<1.6177470041443587> : tensor<5x5x5xf64>
%0 = stablehlo.multiply %cst, %arg0 : tensor<5x5x5xf64>
%1 = stablehlo.transpose %0, dims = [2, 0, 1] : (tensor<5x5x5xf64>) -> tensor<5x5x5xf64>
return %1 : tensor<5x5x5xf64>
}
}
module @reactant_fadd attributes {mhlo.num_partitions = 1 : i64, mhlo.num_replicas = 1 : i64} {
func.func @main(%arg0: tensor<f64> {enzymexla.memory_effects = []}, %arg1: tensor<5x5x5xf64> {enzymexla.memory_effects = []}) -> tensor<5x5x5xf64> attributes {enzymexla.memory_effects = []} {
%0 = stablehlo.transpose %arg1, dims = [2, 0, 1] : (tensor<5x5x5xf64>) -> tensor<5x5x5xf64>
%1 = stablehlo.broadcast_in_dim %arg0, dims = [] : (tensor<f64>) -> tensor<5x5x5xf64>
%2 = stablehlo.multiply %1, %0 : tensor<5x5x5xf64>
return %2 : tensor<5x5x5xf64>
}
}
true
truetensorcontractfcontract(B,C) = @tensor X[] := B[a,b,c] * C[c,a,b]
@code_hlo fcontract(Bre, Cre)
fcontract(B, C) ≈ @jit fcontract(Bre, Cre)returns module @reactant_fcontract attributes {mhlo.num_partitions = 1 : i64, mhlo.num_replicas = 1 : i64} {
func.func @main(%arg0: tensor<5x5x5xf64> {enzymexla.memory_effects = []}, %arg1: tensor<5x5x5xf64> {enzymexla.memory_effects = []}) -> tensor<f64> attributes {enzymexla.memory_effects = []} {
%0 = stablehlo.dot_general %arg0, %arg1, contracting_dims = [2, 1, 0] x [1, 0, 2], precision = [DEFAULT, DEFAULT] : (tensor<5x5x5xf64>, tensor<5x5x5xf64>) -> tensor<f64>
return %0 : tensor<f64>
}
}
true |
|
|
||
| At = materialize_traced_array(A) | ||
| Bt = materialize_traced_array(B) | ||
|
|
There was a problem hiding this comment.
so is there a reason the default [or reactant] backend won't just call into say matmuls or anything else?
Like sure this may be more optimal code emission to begin with, but also if we would optimize it to the generalized matmul anyways without doing reactant-specific overloading that would be nicer
There was a problem hiding this comment.
it's mostly a problem with the design of TensorOperations: TO favors in-place functions so it will try allocate tensors in advance and we need to correctly point the way to do that (i.e. tensoralloc).
the tensoradd!, tensorcontract! I don't think are critical, but avoid the broadcasting assignment of the default TO backend.
also, it also uses some VectorInterface functions and types that we don't yet support.
for example, without this PR, it errors on the fadd example above in the following way:
ERROR: MethodError: no method matching fill!(::Array{Reactant.TracedRNumber{Float64}, 3}, ::Reactant.TracedRNumber{Float64})
This error has been manually thrown, explicitly, so the method may exist but be intentionally marked as unimplemented.
Closest candidates are:
fill!(::Array{T, N}, ::Reactant.TracedRNumber{T2}) where {T, N, T2}
@ Reactant ~/.julia/packages/Reactant/auube/src/TracedRArray.jl:254
fill!(::Array{T}, ::Any) where T
@ Base array.jl:326
fill!(::AbstractArray{Reactant.TracedRNumber{T}, N}, ::Reactant.TracedRNumber{T2}) where {T, N, T2}
@ Reactant ~/.julia/packages/Reactant/auube/src/TracedRArray.jl:248
...
Stacktrace:
[1] call_with_reactant
@ ./none:-1 [inlined]
[2] call_with_reactant(::Reactant.EnsureReturnType{…}, ::typeof(fill!), ::Array{…}, ::Reactant.TracedRNumber{…})
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[3] call_with_reactant
@ ./none:-1 [inlined]
[4] call_with_reactant(::Reactant.EnsureReturnType{…}, ::typeof(VectorInterface.zerovector!), ::Array{…})
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[5] call_with_reactant
@ ./none:-1 [inlined]
[6] call_with_reactant(::Reactant.EnsureReturnType{…}, ::typeof(VectorInterface.zerovector!!), ::Array{…})
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[7] call_with_reactant
@ ./none:-1 [inlined]
[8] call_with_reactant(::Reactant.EnsureReturnType{…}, ::typeof(tensoralloc), ::Type{…}, ::Tuple{…}, ::Val{…}, ::TensorOperations.DefaultAllocator)
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[9] tensoralloc_add
@ ~/.julia/packages/TensorOperations/36oM9/src/implementation/allocator.jl:119
[10] call_with_reactant
@ ./none:-1 [inlined]
[11] call_with_reactant(::Reactant.EnsureReturnType{…}, ::typeof(TensorOperations.tensoralloc_add), ::Type{…}, ::Reactant.TracedRArray{…}, ::Tuple{…}, ::Bool, ::Val{…}, ::TensorOperations.DefaultAllocator)
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[12] call_with_reactant
@ ./none:-1 [inlined]
[13] call_with_reactant(::Reactant.EnsureReturnType{…}, ::typeof(TensorOperations.tensoralloc_add), ::Type{…}, ::Reactant.TracedRArray{…}, ::Tuple{…}, ::Bool, ::Val{…})
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[14] call_with_reactant
@ ./none:-1 [inlined]
[15] call_with_reactant(::typeof(fadd), ::Float64, ::Reactant.TracedRArray{Float64, 3})
@ Reactant ~/.julia/packages/Reactant/auube/src/utils.jl:0
[16] make_mlir_fn(f::typeof(fadd), args::Tuple{…}, kwargs::@NamedTuple{}, name::String, concretein::Bool; toscalar::Bool, return_dialect::Symbol, args_in_result::Symbol, construct_function_without_args::Bool, do_transpose::Bool, within_autodiff::Bool, input_shardings::Nothing, output_shardings::Nothing, runtime::Val{…}, verify_arg_names::Nothing, argprefix::Symbol, resprefix::Symbol, resargprefix::Symbol, num_replicas::Int64, optimize_then_pad::Bool)
@ Reactant.TracedUtils ~/.julia/packages/Reactant/auube/src/TracedUtils.jl:370There was a problem hiding this comment.
I think the default backend for non-strided arrays does indeed end up calling matmuls and other base functions, but for strided arrays we bypass that to make use of Strided.jl for the permutations. In some sense I'm slightly surprised that the reactant array types would end up hitting this non-default path, but I'd have to investigate in more detail to see what is really going on.
It is definitely true though that the approach tends to be first allocate an appropriate result, then do the operation. In that sense, the overloads for tensoradd_type etc here are indeed what is required to allocate the correct output types, which is similar to how our GPU array extensions are structured.
The custom allocator type we have for e.g. CUDA serves a slightly different purpose, in the sense that there the point is that you might want to allocate GPU intermediate arrays even if the inputs are on the CPU, e.g. in a workflow where you first transfer the data to the device, then do all the computations, and finally collect everything back. I am not too familiar with Reactant (yet), but that seems less relevant for that?
There was a problem hiding this comment.
right, I added the ReactantAllocator for dispatching on tensoralloc_add but doesn't seem to be called at all in the tests. ReactantAllocator can be removed.
regarding the default backend, Strided-based backends generate terrible code on tensoradd! with Reactant due to the loop unrolling. but BaseCopy works great! so ReactantBackend can also be removed.
the code generated for the tests is not optimal, but I can write optimizations for them.
|
julia> a = ones(3,2,2,3,2,2,3);
julia> are = Reactant.to_rarray(a);
julia> f(a) = @tensor b[l,k,m] := a[l,i,i,k,j,j,m]
f (generic function with 1 method)
julia> @code_hlo f(are)
module @reactant_f attributes {mhlo.num_partitions = 1 : i64, mhlo.num_replicas = 1 : i64} {
func.func @main(%arg0: tensor<3x2x2x3x2x2x3xf64> {enzymexla.memory_effects = []}) -> tensor<3x3x3xf64> attributes {enzymexla.memory_effects = []} {
%c = stablehlo.constant dense<[[0, 0, 0, 0], [1, 1, 0, 0], [0, 0, 1, 1], [1, 1, 1, 1]]> : tensor<4x4xi64>
%cst = stablehlo.constant dense<0.000000e+00> : tensor<f64>
%0 = stablehlo.transpose %arg0, dims = [6, 5, 4, 3, 2, 1, 0] : (tensor<3x2x2x3x2x2x3xf64>) -> tensor<3x2x2x3x2x2x3xf64>
%1 = "stablehlo.gather"(%0, %c) <{dimension_numbers = #stablehlo.gather<offset_dims = [0, 1, 2], collapsed_slice_dims = [1, 2, 4, 5], start_index_map = [1, 2, 4, 5]>, indices_are_sorted = false, slice_sizes = array<i64: 3, 1, 1, 3, 1, 1, 3>}> : (tensor<3x2x2x3x2x2x3xf64>, tensor<4x4xi64>) -> tensor<3x3x3x4xf64>
%2 = stablehlo.reduce(%1 init: %cst) applies stablehlo.add across dimensions = [3] : (tensor<3x3x3x4xf64>, tensor<f64>) -> tensor<3x3x3xf64>
%3 = stablehlo.transpose %2, dims = [2, 1, 0] : (tensor<3x3x3xf64>) -> tensor<3x3x3xf64>
return %3 : tensor<3x3x3xf64>
}
}
julia> f(a) ≈ @jit f(are)
true |
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
why was it changed on the first place?
current implementation requires the definitions of QuantumKitHub/TensorOperations.jl#274, but thinking about skipping a
ReactantBackendbecause that's not sth the user should touch.cc @lkdvos @kshyatt