-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[RISCV] Handle codegen for Big Endian #172668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v Author: Djordje Todorovic (djtodoro) Changes
The clang PR is waiting for the codegen changes to be merged first. Patch is 27.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172668.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2d6bb06d689c3..b9bf9a55bf641 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8347,6 +8347,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
+ // For big-endian, swap the order of Lo and Hi.
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+
SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
return DAG.getMergeValues({Pair, Chain}, DL);
}
@@ -8419,15 +8423,22 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
- SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
+ SDValue Lo = Split.getValue(0);
+ SDValue Hi = Split.getValue(1);
+
+ // For big-endian, swap the order of Lo and Hi before storing.
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+
+ SDValue LoStore = DAG.getStore(Chain, DL, Lo, BasePtr,
Store->getPointerInfo(), Store->getBaseAlign(),
Store->getMemOperand()->getFlags());
BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
- SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
+ SDValue HiStore = DAG.getStore(Chain, DL, Hi, BasePtr,
Store->getPointerInfo().getWithOffset(4),
Store->getBaseAlign(),
Store->getMemOperand()->getFlags());
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);
}
if (VT == MVT::i64) {
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
@@ -15160,8 +15171,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Subtarget.hasStdExtDOrZdinx()) {
SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Op0);
- SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
- NewReg.getValue(0), NewReg.getValue(1));
+ SDValue Lo = NewReg.getValue(0);
+ SDValue Hi = NewReg.getValue(1);
+ // For big-endian, swap the order when building the i64 pair.
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+ SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
Results.push_back(RetReg);
} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
isTypeLegal(Op0VT)) {
@@ -22538,14 +22553,27 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
- BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
- .addFrameIndex(FI)
- .addImm(0)
- .addMemOperand(MMOLo);
- BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(MMOHi);
+
+ // For big-endian, the high part is at offset 0 and the low part at offset 4.
+ if (!Subtarget.isLittleEndian()) {
+ BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMOLo);
+ BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(MMOHi);
+ } else {
+ BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMOLo);
+ BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(MMOHi);
+ }
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -22571,16 +22599,31 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
- BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
- .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
- .addFrameIndex(FI)
- .addImm(0)
- .addMemOperand(MMOLo);
- BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
- .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(MMOHi);
+
+ // For big-endian, store the high part at offset 0 and the low part at offset 4.
+ if (!Subtarget.isLittleEndian()) {
+ BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+ .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMOLo);
+ BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+ .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(MMOHi);
+ } else {
+ BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+ .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMOLo);
+ BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+ .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(MMOHi);
+ }
TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -23407,6 +23450,13 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
+
+ // For big-endian, swap the order of Lo and Hi when building the pair.
+ const RISCVSubtarget &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
+ // TESTED with: CodeGen/RISCV/bigendian-double-bitmanip.ll
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
@@ -23778,6 +23828,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
+ // For big-endian, swap the order of Lo and Hi when passing.
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+
Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
@@ -24005,8 +24059,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
- RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
- RetValue2);
+
+ // For big-endian, swap the order when building the pair.
+ SDValue Lo = RetValue;
+ SDValue Hi = RetValue2;
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+
+ RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
} else
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
@@ -24071,6 +24131,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
+
+ // For big-endian, swap the order of Lo and Hi when returning.
+ if (!Subtarget.isLittleEndian())
+ std::swap(Lo, Hi);
+
Register RegLo = VA.getLocReg();
Register RegHi = RVLocs[++i].getLocReg();
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index f86265a21d17e..462ba1115f0ab 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -109,6 +109,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
unsigned RVVVectorBitsMax,
const TargetMachine &TM)
: RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+ TargetTriple(TT),
RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 29df53c6c9893..b591ecfa2c7a8 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -97,6 +97,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
RISCVProcFamilyEnum RISCVProcFamily = Others;
RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic;
+ Triple TargetTriple;
+
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool ATTRIBUTE = DEFAULT;
#include "RISCVGenSubtargetInfo.inc"
@@ -220,6 +222,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool is64Bit() const { return IsRV64; }
+ bool isLittleEndian() const {
+ return TargetTriple.isLittleEndian();
+ }
MVT getXLenVT() const {
return is64Bit() ? MVT::i64 : MVT::i32;
}
diff --git a/llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll b/llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll
new file mode 100644
index 0000000000000..c85fd5d4c55da
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-LE %s
+; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-BE %s
+
+; Test operations that involve SplitF64/BuildPairF64 on RV32 with D extension
+; but soft-float ABI. This configuration triggers the special handling for
+; big-endian.
+
+define double @fneg(double %a) nounwind {
+; RV32IFD-LE-LABEL: fneg:
+; RV32IFD-LE: # %bb.0:
+; RV32IFD-LE-NEXT: lui a2, 524288
+; RV32IFD-LE-NEXT: xor a1, a1, a2
+; RV32IFD-LE-NEXT: ret
+;
+; RV32IFD-BE-LABEL: fneg:
+; RV32IFD-BE: # %bb.0:
+; RV32IFD-BE-NEXT: lui a2, 524288
+; RV32IFD-BE-NEXT: xor a0, a0, a2
+; RV32IFD-BE-NEXT: ret
+ %1 = fneg double %a
+ ret double %1
+}
+
+define double @fabs(double %a) nounwind {
+; RV32IFD-LE-LABEL: fabs:
+; RV32IFD-LE: # %bb.0:
+; RV32IFD-LE-NEXT: slli a1, a1, 1
+; RV32IFD-LE-NEXT: srli a1, a1, 1
+; RV32IFD-LE-NEXT: ret
+;
+; RV32IFD-BE-LABEL: fabs:
+; RV32IFD-BE: # %bb.0:
+; RV32IFD-BE-NEXT: slli a0, a0, 1
+; RV32IFD-BE-NEXT: srli a0, a0, 1
+; RV32IFD-BE-NEXT: ret
+ %1 = call double @llvm.fabs.f64(double %a)
+ ret double %1
+}
+
+define double @fcopysign(double %a, double %b) nounwind {
+; RV32IFD-LE-LABEL: fcopysign:
+; RV32IFD-LE: # %bb.0:
+; RV32IFD-LE-NEXT: addi sp, sp, -16
+; RV32IFD-LE-NEXT: sw a2, 8(sp)
+; RV32IFD-LE-NEXT: sw a3, 12(sp)
+; RV32IFD-LE-NEXT: fld fa5, 8(sp)
+; RV32IFD-LE-NEXT: sw a0, 8(sp)
+; RV32IFD-LE-NEXT: sw a1, 12(sp)
+; RV32IFD-LE-NEXT: fld fa4, 8(sp)
+; RV32IFD-LE-NEXT: fsgnj.d fa5, fa4, fa5
+; RV32IFD-LE-NEXT: fsd fa5, 8(sp)
+; RV32IFD-LE-NEXT: lw a0, 8(sp)
+; RV32IFD-LE-NEXT: lw a1, 12(sp)
+; RV32IFD-LE-NEXT: addi sp, sp, 16
+; RV32IFD-LE-NEXT: ret
+;
+; RV32IFD-BE-LABEL: fcopysign:
+; RV32IFD-BE: # %bb.0:
+; RV32IFD-BE-NEXT: addi sp, sp, -16
+; RV32IFD-BE-NEXT: sw a2, 8(sp)
+; RV32IFD-BE-NEXT: sw a3, 12(sp)
+; RV32IFD-BE-NEXT: fld fa5, 8(sp)
+; RV32IFD-BE-NEXT: sw a0, 8(sp)
+; RV32IFD-BE-NEXT: sw a1, 12(sp)
+; RV32IFD-BE-NEXT: fld fa4, 8(sp)
+; RV32IFD-BE-NEXT: fsgnj.d fa5, fa4, fa5
+; RV32IFD-BE-NEXT: fsd fa5, 8(sp)
+; RV32IFD-BE-NEXT: lw a0, 8(sp)
+; RV32IFD-BE-NEXT: lw a1, 12(sp)
+; RV32IFD-BE-NEXT: addi sp, sp, 16
+; RV32IFD-BE-NEXT: ret
+ %1 = call double @llvm.copysign.f64(double %a, double %b)
+ ret double %1
+}
+
+declare double @llvm.fabs.f64(double)
+declare double @llvm.copysign.f64(double, double)
diff --git a/llvm/test/CodeGen/RISCV/bigendian-f64-call.ll b/llvm/test/CodeGen/RISCV/bigendian-f64-call.ll
new file mode 100644
index 0000000000000..83057e23a0d85
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bigendian-f64-call.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32LE %s
+; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32BE %s
+
+; Test f64 function calls with D extension and soft-float ABI
+; This specifically tests the LowerCall path that needs to swap Lo/Hi for BE
+
+declare double @external_func(double, double)
+
+define double @test_f64_call(double %a, double %b) {
+; RV32LE-LABEL: test_f64_call:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: addi sp, sp, -16
+; RV32LE-NEXT: .cfi_def_cfa_offset 16
+; RV32LE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32LE-NEXT: .cfi_offset ra, -4
+; RV32LE-NEXT: call external_func
+; RV32LE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32LE-NEXT: .cfi_restore ra
+; RV32LE-NEXT: addi sp, sp, 16
+; RV32LE-NEXT: .cfi_def_cfa_offset 0
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: test_f64_call:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: addi sp, sp, -16
+; RV32BE-NEXT: .cfi_def_cfa_offset 16
+; RV32BE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32BE-NEXT: .cfi_offset ra, -4
+; RV32BE-NEXT: call external_func
+; RV32BE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32BE-NEXT: .cfi_restore ra
+; RV32BE-NEXT: addi sp, sp, 16
+; RV32BE-NEXT: .cfi_def_cfa_offset 0
+; RV32BE-NEXT: ret
+ %result = call double @external_func(double %a, double %b)
+ ret double %result
+}
+
+; Test with a computation before the call to force SplitF64
+define double @test_f64_call_with_fadd(double %a, double %b) {
+; RV32LE-LABEL: test_f64_call_with_fadd:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: addi sp, sp, -16
+; RV32LE-NEXT: .cfi_def_cfa_offset 16
+; RV32LE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32LE-NEXT: .cfi_offset ra, -4
+; RV32LE-NEXT: sw a2, 0(sp)
+; RV32LE-NEXT: sw a3, 4(sp)
+; RV32LE-NEXT: fld fa5, 0(sp)
+; RV32LE-NEXT: sw a0, 0(sp)
+; RV32LE-NEXT: sw a1, 4(sp)
+; RV32LE-NEXT: fld fa4, 0(sp)
+; RV32LE-NEXT: fadd.d fa5, fa4, fa5
+; RV32LE-NEXT: fsd fa5, 0(sp)
+; RV32LE-NEXT: lw a0, 0(sp)
+; RV32LE-NEXT: lw a1, 4(sp)
+; RV32LE-NEXT: mv a2, a0
+; RV32LE-NEXT: mv a3, a1
+; RV32LE-NEXT: call external_func
+; RV32LE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32LE-NEXT: .cfi_restore ra
+; RV32LE-NEXT: addi sp, sp, 16
+; RV32LE-NEXT: .cfi_def_cfa_offset 0
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: test_f64_call_with_fadd:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: addi sp, sp, -16
+; RV32BE-NEXT: .cfi_def_cfa_offset 16
+; RV32BE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32BE-NEXT: .cfi_offset ra, -4
+; RV32BE-NEXT: sw a2, 0(sp)
+; RV32BE-NEXT: sw a3, 4(sp)
+; RV32BE-NEXT: fld fa5, 0(sp)
+; RV32BE-NEXT: sw a0, 0(sp)
+; RV32BE-NEXT: sw a1, 4(sp)
+; RV32BE-NEXT: fld fa4, 0(sp)
+; RV32BE-NEXT: fadd.d fa5, fa4, fa5
+; RV32BE-NEXT: fsd fa5, 0(sp)
+; RV32BE-NEXT: lw a0, 0(sp)
+; RV32BE-NEXT: lw a1, 4(sp)
+; RV32BE-NEXT: mv a2, a0
+; RV32BE-NEXT: mv a3, a1
+; RV32BE-NEXT: call external_func
+; RV32BE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32BE-NEXT: .cfi_restore ra
+; RV32BE-NEXT: addi sp, sp, 16
+; RV32BE-NEXT: .cfi_def_cfa_offset 0
+; RV32BE-NEXT: ret
+ %sum = fadd double %a, %b
+ %result = call double @external_func(double %sum, double %sum)
+ ret double %result
+}
diff --git a/llvm/test/CodeGen/RISCV/bigendian-load-store.ll b/llvm/test/CodeGen/RISCV/bigendian-load-store.ll
new file mode 100644
index 0000000000000..175346d5ab0f5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bigendian-load-store.ll
@@ -0,0 +1,435 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32LE
+; RUN: llc -mtriple=riscv32be -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32BE
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64LE
+; RUN: llc -mtriple=riscv64be -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64BE
+
+; Test basic load/store operations on both little-endian and big-endian RISC-V
+
+define i32 @load_i32(ptr %p) {
+; RV32LE-LABEL: load_i32:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: lw a0, 0(a0)
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: load_i32:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: lw a0, 0(a0)
+; RV32BE-NEXT: ret
+;
+; RV64LE-LABEL: load_i32:
+; RV64LE: # %bb.0:
+; RV64LE-NEXT: lw a0, 0(a0)
+; RV64LE-NEXT: ret
+;
+; RV64BE-LABEL: load_i32:
+; RV64BE: # %bb.0:
+; RV64BE-NEXT: lw a0, 0(a0)
+; RV64BE-NEXT: ret
+ %v = load i32, ptr %p
+ ret i32 %v
+}
+
+define void @store_i32(ptr %p, i32 %v) {
+; RV32LE-LABEL: store_i32:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: sw a1, 0(a0)
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: store_i32:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: sw a1, 0(a0)
+; RV32BE-NEXT: ret
+;
+; RV64LE-LABEL: store_i32:
+; RV64LE: # %bb.0:
+; RV64LE-NEXT: sw a1, 0(a0)
+; RV64LE-NEXT: ret
+;
+; RV64BE-LABEL: store_i32:
+; RV64BE: # %bb.0:
+; RV64BE-NEXT: sw a1, 0(a0)
+; RV64BE-NEXT: ret
+ store i32 %v, ptr %p
+ ret void
+}
+
+define i16 @load_i16(ptr %p) {
+; RV32LE-LABEL: load_i16:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: lh a0, 0(a0)
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: load_i16:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: lh a0, 0(a0)
+; RV32BE-NEXT: ret
+;
+; RV64LE-LABEL: load_i16:
+; RV64LE: # %bb.0:
+; RV64LE-NEXT: lh a0, 0(a0)
+; RV64LE-NEXT: ret
+;
+; RV64BE-LABEL: load_i16:
+; RV64BE: # %bb.0:
+; RV64BE-NEXT: lh a0, 0(a0)
+; RV64BE-NEXT: ret
+ %v = load i16, ptr %p
+ ret i16 %v
+}
+
+define void @store_i16(ptr %p, i16 %v) {
+; RV32LE-LABEL: store_i16:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: sh a1, 0(a0)
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: store_i16:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: sh a1, 0(a0)
+; RV32BE-NEXT: ret
+;
+; RV64LE-LABEL: store_i16:
+; RV64LE: # %bb.0:
+; RV64LE-NEXT: sh a1, 0(a0)
+; RV64LE-NEXT: ret
+;
+; RV64BE-LABEL: store_i16:
+; RV64BE: # %bb.0:
+; RV64BE-NEXT: sh a1, 0(a0)
+; RV64BE-NEXT: ret
+ store i16 %v, ptr %p
+ ret void
+}
+
+define i8 @load_i8(ptr %p) {
+; RV32LE-LABEL: load_i8:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: lbu a0, 0(a0)
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: load_i8:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: lbu a0, 0(a0)
+; RV32BE-NEXT: ret
+;
+; RV64LE-LABEL: load_i8:
+; RV64LE: # %bb.0:
+; RV64LE-NEXT: lbu a0, 0(a0)
+; RV64LE-NEXT: ret
+;
+; RV64BE-LABEL: load_i8:
+; RV64BE: # %bb.0:
+; RV64BE-NEXT: lbu a0, 0(a0)
+; RV64BE-NEXT: ret
+ %v = load i8, ptr %p
+ ret i8 %v
+}
+
+define void @store_i8(ptr %p, i8 %v) {
+; RV32LE-LABEL: store_i8:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: sb a1, 0(a0)
+; RV32LE-NEXT: ret
+;
+; RV32BE-LABEL: store_i8:
+; RV32BE: # %bb.0:
+; RV32BE-NEXT: sb a1, 0(a0)
+; RV32BE-NEXT: ret
+;
+; RV64LE-LABEL: store_i8:
+; RV64LE: # %bb.0:
+; RV64LE-NEXT: sb a1, 0(a0)
+; RV64LE-NEXT: ret
+;
+; RV64BE-LABEL: store_i8:
+; RV64BE: # %bb.0:
+; RV64BE-NEXT: sb a1, 0(a0)
+; RV64BE-NEXT: ret
+ store i8 %v, ptr %p
+ ret void
+}
+
+define i64 @load_i64(ptr %p) {
+; RV32LE-LABEL: load_i64:
+; RV32LE: # %bb.0:
+; RV32LE-NEXT: lw a2, 0(a0)
+; RV32LE-NEXT: lw a1, 4(a0)
+; RV32LE-NE...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
- Handle BE in RISCVSubtarget - Handle riscv big-endian f64 - Handle loads/stores - Add tests for LE vs BE
2c1b75b to
924cf81
Compare
| } | ||
|
|
||
| bool is64Bit() const { return IsRV64; } | ||
| bool isLittleEndian() const { return TargetTriple.isLittleEndian(); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we cache this in a bool in the constructor?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.
|
|
||
| // For big-endian, the high part is at offset 0 and the low part at offset 4. | ||
| if (!Subtarget.isLittleEndian()) { | ||
| BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we swap LoReg and HiReg instead of mostly duplicating the code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
|
|
||
| // For big-endian, store the high part at offset 0 and the low part at | ||
| // offset 4. | ||
| if (!Subtarget.isLittleEndian()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a lot of code duplication here. Can we use more variables to reduce it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, thanks for the suggestion.
| RISCVProcFamilyEnum RISCVProcFamily = Others; | ||
| RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic; | ||
|
|
||
| Triple TargetTriple; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we still need TargetTriple?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, will remove it.
| ; RV32IFD-BE-LABEL: fneg: | ||
| ; RV32IFD-BE: # %bb.0: | ||
| ; RV32IFD-BE-NEXT: lui a2, 524288 | ||
| ; RV32IFD-BE-NEXT: xor a0, a0, a2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The PSABI doc riscv-non-isa/riscv-elf-psabi-doc#470 says
This register-pair ordering is defined in terms of value significance and is
independent of endianness. For example, on RV32BE a 64-bit scalar returned
in a0/a1 places bits [31:0] (the least-significant XLEN bits) in a0 and
bits [63:32] in a1; memory layout remains big-endian.
Yet this code seems to be toggling the sign bit in a0. If the psabi doc is correct, isn't the sign bit in a1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The thing is that riscv-non-isa/riscv-elf-psabi-doc#470 is not ready yet -- or at least not merged/accepted officially yet.
And even GCC is not updated to follow all changes we plan for psabi (currently BE is marked as experimental), e.g.:
$ cat test.c
#include <stdint.h>
double flip_sign_bit(double a) {
union { double d; uint64_t i; } u;
u.d = a;
u.i ^= (1ULL << 63);
return u.d;
}
$ riscv64-unknown-elf-gcc -c -O2 -march=rv32gc -mabi=ilp32 -mbig-endian test.c
$ llvm-objdump -d /tmp/test.o
flip_sign_bit:
lui a4, 0x80000
mv a5, a1
xor a4, a4, a0
mv a0, a4
mv a1, a5
ret
Therefore, once psabi is ready, we can change both GCC and LLVM. Until then, we will emit a warning for riscvbe from clang (#165599), something like:
clang: warning: big-endian RISC-V target support is experimental [-Wriscv-be-experimental]
|
Do we need any changes for i64 arguments and returns on RV32? |
As far as I can tell, no -- it matches what GCC does. I have added a test case in |
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/42698 Here is the relevant piece of the build log for the reference |
The clang PR is waiting for the codegen changes to be merged first.
Furthermore, the idea here is to match the existing implementation within GCC, but the changes like riscv-non-isa/riscv-elf-psabi-doc#470 (comment) will be addressed later in both GCC and LLVM.