Skip to content

Conversation

@djtodoro
Copy link
Collaborator

  • Handle BE in RISCVSubtarget
  • Handle riscv big-endian f64
  • Handle loads/stores
  • Add tests for LE vs BE

The clang PR is waiting for the codegen changes to be merged first.
Furthermore, the idea here is to match the existing implementation within GCC, but the changes like riscv-non-isa/riscv-elf-psabi-doc#470 (comment) will be addressed later in both GCC and LLVM.

@llvmbot
Copy link
Member

llvmbot commented Dec 17, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Djordje Todorovic (djtodoro)

Changes
  • Handle BE in RISCVSubtarget
  • Handle riscv big-endian f64
  • Handle loads/stores
  • Add tests for LE vs BE

The clang PR is waiting for the codegen changes to be merged first.
Furthermore, the idea here is to match the existing implementation within GCC, but the changes like riscv-non-isa/riscv-elf-psabi-doc#470 (comment) will be addressed later in both GCC and LLVM.


Patch is 27.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172668.diff

6 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+90-25)
  • (modified) llvm/lib/Target/RISCV/RISCVSubtarget.cpp (+1)
  • (modified) llvm/lib/Target/RISCV/RISCVSubtarget.h (+5)
  • (added) llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll (+78)
  • (added) llvm/test/CodeGen/RISCV/bigendian-f64-call.ll (+94)
  • (added) llvm/test/CodeGen/RISCV/bigendian-load-store.ll (+435)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2d6bb06d689c3..b9bf9a55bf641 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8347,6 +8347,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
                           Hi.getValue(1));
 
+      // For big-endian, swap the order of Lo and Hi.
+      if (!Subtarget.isLittleEndian())
+        std::swap(Lo, Hi);
+
       SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
       return DAG.getMergeValues({Pair, Chain}, DL);
     }
@@ -8419,15 +8423,22 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
       SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
                                   DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
 
-      SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
+      SDValue Lo = Split.getValue(0);
+      SDValue Hi = Split.getValue(1);
+
+      // For big-endian, swap the order of Lo and Hi before storing.
+      if (!Subtarget.isLittleEndian())
+        std::swap(Lo, Hi);
+
+      SDValue LoStore = DAG.getStore(Chain, DL, Lo, BasePtr,
                                 Store->getPointerInfo(), Store->getBaseAlign(),
                                 Store->getMemOperand()->getFlags());
       BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
-      SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
+      SDValue HiStore = DAG.getStore(Chain, DL, Hi, BasePtr,
                                 Store->getPointerInfo().getWithOffset(4),
                                 Store->getBaseAlign(),
                                 Store->getMemOperand()->getFlags());
-      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);
     }
     if (VT == MVT::i64) {
       assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
@@ -15160,8 +15171,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
                Subtarget.hasStdExtDOrZdinx()) {
       SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
                                    DAG.getVTList(MVT::i32, MVT::i32), Op0);
-      SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
-                                   NewReg.getValue(0), NewReg.getValue(1));
+      SDValue Lo = NewReg.getValue(0);
+      SDValue Hi = NewReg.getValue(1);
+      // For big-endian, swap the order when building the i64 pair.
+      if (!Subtarget.isLittleEndian())
+        std::swap(Lo, Hi);
+      SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
       Results.push_back(RetReg);
     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
                isTypeLegal(Op0VT)) {
@@ -22538,14 +22553,27 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
-  BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
-      .addFrameIndex(FI)
-      .addImm(0)
-      .addMemOperand(MMOLo);
-  BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
-      .addFrameIndex(FI)
-      .addImm(4)
-      .addMemOperand(MMOHi);
+
+  // For big-endian, the high part is at offset 0 and the low part at offset 4.
+  if (!Subtarget.isLittleEndian()) {
+    BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addMemOperand(MMOLo);
+    BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
+        .addFrameIndex(FI)
+        .addImm(4)
+        .addMemOperand(MMOHi);
+  } else {
+    BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addMemOperand(MMOLo);
+    BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
+        .addFrameIndex(FI)
+        .addImm(4)
+        .addMemOperand(MMOHi);
+  }
   MI.eraseFromParent(); // The pseudo instruction is gone now.
   return BB;
 }
@@ -22571,16 +22599,31 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
-  BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
-      .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
-      .addFrameIndex(FI)
-      .addImm(0)
-      .addMemOperand(MMOLo);
-  BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
-      .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
-      .addFrameIndex(FI)
-      .addImm(4)
-      .addMemOperand(MMOHi);
+
+  // For big-endian, store the high part at offset 0 and the low part at offset 4.
+  if (!Subtarget.isLittleEndian()) {
+    BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+        .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addMemOperand(MMOLo);
+    BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+        .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
+        .addFrameIndex(FI)
+        .addImm(4)
+        .addMemOperand(MMOHi);
+  } else {
+    BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+        .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addMemOperand(MMOLo);
+    BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+        .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
+        .addFrameIndex(FI)
+        .addImm(4)
+        .addMemOperand(MMOHi);
+  }
   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());
   MI.eraseFromParent(); // The pseudo instruction is gone now.
   return BB;
@@ -23407,6 +23450,13 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
     RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
   }
+
+  // For big-endian, swap the order of Lo and Hi when building the pair.
+  const RISCVSubtarget &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
+  // TESTED with: CodeGen/RISCV/bigendian-double-bitmanip.ll
+  if (!Subtarget.isLittleEndian())
+    std::swap(Lo, Hi);
+
   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
 }
 
@@ -23778,6 +23828,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
       SDValue Lo = SplitF64.getValue(0);
       SDValue Hi = SplitF64.getValue(1);
 
+      // For big-endian, swap the order of Lo and Hi when passing.
+      if (!Subtarget.isLittleEndian())
+        std::swap(Lo, Hi);
+
       Register RegLo = VA.getLocReg();
       RegsToPass.push_back(std::make_pair(RegLo, Lo));
 
@@ -24005,8 +24059,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
                                              MVT::i32, Glue);
       Chain = RetValue2.getValue(1);
       Glue = RetValue2.getValue(2);
-      RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
-                             RetValue2);
+
+      // For big-endian, swap the order when building the pair.
+      SDValue Lo = RetValue;
+      SDValue Hi = RetValue2;
+      if (!Subtarget.isLittleEndian())
+        std::swap(Lo, Hi);
+
+      RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
     } else
       RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
 
@@ -24071,6 +24131,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
       SDValue Lo = SplitF64.getValue(0);
       SDValue Hi = SplitF64.getValue(1);
+
+      // For big-endian, swap the order of Lo and Hi when returning.
+      if (!Subtarget.isLittleEndian())
+        std::swap(Lo, Hi);
+
       Register RegLo = VA.getLocReg();
       Register RegHi = RVLocs[++i].getLocReg();
 
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index f86265a21d17e..462ba1115f0ab 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -109,6 +109,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
                                unsigned RVVVectorBitsMax,
                                const TargetMachine &TM)
     : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+      TargetTriple(TT),
       RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
       FrameLowering(
           initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 29df53c6c9893..b591ecfa2c7a8 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -97,6 +97,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   RISCVProcFamilyEnum RISCVProcFamily = Others;
   RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic;
 
+  Triple TargetTriple;
+
 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
   bool ATTRIBUTE = DEFAULT;
 #include "RISCVGenSubtargetInfo.inc"
@@ -220,6 +222,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   }
 
   bool is64Bit() const { return IsRV64; }
+  bool isLittleEndian() const {
+    return TargetTriple.isLittleEndian();
+  }
   MVT getXLenVT() const {
     return is64Bit() ? MVT::i64 : MVT::i32;
   }
diff --git a/llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll b/llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll
new file mode 100644
index 0000000000000..c85fd5d4c55da
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-LE %s
+; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-BE %s
+
+; Test operations that involve SplitF64/BuildPairF64 on RV32 with D extension
+; but soft-float ABI. This configuration triggers the special handling for
+; big-endian.
+
+define double @fneg(double %a) nounwind {
+; RV32IFD-LE-LABEL: fneg:
+; RV32IFD-LE:       # %bb.0:
+; RV32IFD-LE-NEXT:    lui a2, 524288
+; RV32IFD-LE-NEXT:    xor a1, a1, a2
+; RV32IFD-LE-NEXT:    ret
+;
+; RV32IFD-BE-LABEL: fneg:
+; RV32IFD-BE:       # %bb.0:
+; RV32IFD-BE-NEXT:    lui a2, 524288
+; RV32IFD-BE-NEXT:    xor a0, a0, a2
+; RV32IFD-BE-NEXT:    ret
+  %1 = fneg double %a
+  ret double %1
+}
+
+define double @fabs(double %a) nounwind {
+; RV32IFD-LE-LABEL: fabs:
+; RV32IFD-LE:       # %bb.0:
+; RV32IFD-LE-NEXT:    slli a1, a1, 1
+; RV32IFD-LE-NEXT:    srli a1, a1, 1
+; RV32IFD-LE-NEXT:    ret
+;
+; RV32IFD-BE-LABEL: fabs:
+; RV32IFD-BE:       # %bb.0:
+; RV32IFD-BE-NEXT:    slli a0, a0, 1
+; RV32IFD-BE-NEXT:    srli a0, a0, 1
+; RV32IFD-BE-NEXT:    ret
+  %1 = call double @llvm.fabs.f64(double %a)
+  ret double %1
+}
+
+define double @fcopysign(double %a, double %b) nounwind {
+; RV32IFD-LE-LABEL: fcopysign:
+; RV32IFD-LE:       # %bb.0:
+; RV32IFD-LE-NEXT:    addi sp, sp, -16
+; RV32IFD-LE-NEXT:    sw a2, 8(sp)
+; RV32IFD-LE-NEXT:    sw a3, 12(sp)
+; RV32IFD-LE-NEXT:    fld fa5, 8(sp)
+; RV32IFD-LE-NEXT:    sw a0, 8(sp)
+; RV32IFD-LE-NEXT:    sw a1, 12(sp)
+; RV32IFD-LE-NEXT:    fld fa4, 8(sp)
+; RV32IFD-LE-NEXT:    fsgnj.d fa5, fa4, fa5
+; RV32IFD-LE-NEXT:    fsd fa5, 8(sp)
+; RV32IFD-LE-NEXT:    lw a0, 8(sp)
+; RV32IFD-LE-NEXT:    lw a1, 12(sp)
+; RV32IFD-LE-NEXT:    addi sp, sp, 16
+; RV32IFD-LE-NEXT:    ret
+;
+; RV32IFD-BE-LABEL: fcopysign:
+; RV32IFD-BE:       # %bb.0:
+; RV32IFD-BE-NEXT:    addi sp, sp, -16
+; RV32IFD-BE-NEXT:    sw a2, 8(sp)
+; RV32IFD-BE-NEXT:    sw a3, 12(sp)
+; RV32IFD-BE-NEXT:    fld fa5, 8(sp)
+; RV32IFD-BE-NEXT:    sw a0, 8(sp)
+; RV32IFD-BE-NEXT:    sw a1, 12(sp)
+; RV32IFD-BE-NEXT:    fld fa4, 8(sp)
+; RV32IFD-BE-NEXT:    fsgnj.d fa5, fa4, fa5
+; RV32IFD-BE-NEXT:    fsd fa5, 8(sp)
+; RV32IFD-BE-NEXT:    lw a0, 8(sp)
+; RV32IFD-BE-NEXT:    lw a1, 12(sp)
+; RV32IFD-BE-NEXT:    addi sp, sp, 16
+; RV32IFD-BE-NEXT:    ret
+  %1 = call double @llvm.copysign.f64(double %a, double %b)
+  ret double %1
+}
+
+declare double @llvm.fabs.f64(double)
+declare double @llvm.copysign.f64(double, double)
diff --git a/llvm/test/CodeGen/RISCV/bigendian-f64-call.ll b/llvm/test/CodeGen/RISCV/bigendian-f64-call.ll
new file mode 100644
index 0000000000000..83057e23a0d85
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bigendian-f64-call.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32LE %s
+; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32BE %s
+
+; Test f64 function calls with D extension and soft-float ABI
+; This specifically tests the LowerCall path that needs to swap Lo/Hi for BE
+
+declare double @external_func(double, double)
+
+define double @test_f64_call(double %a, double %b) {
+; RV32LE-LABEL: test_f64_call:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    addi sp, sp, -16
+; RV32LE-NEXT:    .cfi_def_cfa_offset 16
+; RV32LE-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32LE-NEXT:    .cfi_offset ra, -4
+; RV32LE-NEXT:    call external_func
+; RV32LE-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32LE-NEXT:    .cfi_restore ra
+; RV32LE-NEXT:    addi sp, sp, 16
+; RV32LE-NEXT:    .cfi_def_cfa_offset 0
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: test_f64_call:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    addi sp, sp, -16
+; RV32BE-NEXT:    .cfi_def_cfa_offset 16
+; RV32BE-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32BE-NEXT:    .cfi_offset ra, -4
+; RV32BE-NEXT:    call external_func
+; RV32BE-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32BE-NEXT:    .cfi_restore ra
+; RV32BE-NEXT:    addi sp, sp, 16
+; RV32BE-NEXT:    .cfi_def_cfa_offset 0
+; RV32BE-NEXT:    ret
+  %result = call double @external_func(double %a, double %b)
+  ret double %result
+}
+
+; Test with a computation before the call to force SplitF64
+define double @test_f64_call_with_fadd(double %a, double %b) {
+; RV32LE-LABEL: test_f64_call_with_fadd:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    addi sp, sp, -16
+; RV32LE-NEXT:    .cfi_def_cfa_offset 16
+; RV32LE-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32LE-NEXT:    .cfi_offset ra, -4
+; RV32LE-NEXT:    sw a2, 0(sp)
+; RV32LE-NEXT:    sw a3, 4(sp)
+; RV32LE-NEXT:    fld fa5, 0(sp)
+; RV32LE-NEXT:    sw a0, 0(sp)
+; RV32LE-NEXT:    sw a1, 4(sp)
+; RV32LE-NEXT:    fld fa4, 0(sp)
+; RV32LE-NEXT:    fadd.d fa5, fa4, fa5
+; RV32LE-NEXT:    fsd fa5, 0(sp)
+; RV32LE-NEXT:    lw a0, 0(sp)
+; RV32LE-NEXT:    lw a1, 4(sp)
+; RV32LE-NEXT:    mv a2, a0
+; RV32LE-NEXT:    mv a3, a1
+; RV32LE-NEXT:    call external_func
+; RV32LE-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32LE-NEXT:    .cfi_restore ra
+; RV32LE-NEXT:    addi sp, sp, 16
+; RV32LE-NEXT:    .cfi_def_cfa_offset 0
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: test_f64_call_with_fadd:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    addi sp, sp, -16
+; RV32BE-NEXT:    .cfi_def_cfa_offset 16
+; RV32BE-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32BE-NEXT:    .cfi_offset ra, -4
+; RV32BE-NEXT:    sw a2, 0(sp)
+; RV32BE-NEXT:    sw a3, 4(sp)
+; RV32BE-NEXT:    fld fa5, 0(sp)
+; RV32BE-NEXT:    sw a0, 0(sp)
+; RV32BE-NEXT:    sw a1, 4(sp)
+; RV32BE-NEXT:    fld fa4, 0(sp)
+; RV32BE-NEXT:    fadd.d fa5, fa4, fa5
+; RV32BE-NEXT:    fsd fa5, 0(sp)
+; RV32BE-NEXT:    lw a0, 0(sp)
+; RV32BE-NEXT:    lw a1, 4(sp)
+; RV32BE-NEXT:    mv a2, a0
+; RV32BE-NEXT:    mv a3, a1
+; RV32BE-NEXT:    call external_func
+; RV32BE-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32BE-NEXT:    .cfi_restore ra
+; RV32BE-NEXT:    addi sp, sp, 16
+; RV32BE-NEXT:    .cfi_def_cfa_offset 0
+; RV32BE-NEXT:    ret
+  %sum = fadd double %a, %b
+  %result = call double @external_func(double %sum, double %sum)
+  ret double %result
+}
diff --git a/llvm/test/CodeGen/RISCV/bigendian-load-store.ll b/llvm/test/CodeGen/RISCV/bigendian-load-store.ll
new file mode 100644
index 0000000000000..175346d5ab0f5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bigendian-load-store.ll
@@ -0,0 +1,435 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32LE
+; RUN: llc -mtriple=riscv32be -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32BE
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64LE
+; RUN: llc -mtriple=riscv64be -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64BE
+
+; Test basic load/store operations on both little-endian and big-endian RISC-V
+
+define i32 @load_i32(ptr %p) {
+; RV32LE-LABEL: load_i32:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    lw a0, 0(a0)
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: load_i32:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    lw a0, 0(a0)
+; RV32BE-NEXT:    ret
+;
+; RV64LE-LABEL: load_i32:
+; RV64LE:       # %bb.0:
+; RV64LE-NEXT:    lw a0, 0(a0)
+; RV64LE-NEXT:    ret
+;
+; RV64BE-LABEL: load_i32:
+; RV64BE:       # %bb.0:
+; RV64BE-NEXT:    lw a0, 0(a0)
+; RV64BE-NEXT:    ret
+  %v = load i32, ptr %p
+  ret i32 %v
+}
+
+define void @store_i32(ptr %p, i32 %v) {
+; RV32LE-LABEL: store_i32:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    sw a1, 0(a0)
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: store_i32:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    sw a1, 0(a0)
+; RV32BE-NEXT:    ret
+;
+; RV64LE-LABEL: store_i32:
+; RV64LE:       # %bb.0:
+; RV64LE-NEXT:    sw a1, 0(a0)
+; RV64LE-NEXT:    ret
+;
+; RV64BE-LABEL: store_i32:
+; RV64BE:       # %bb.0:
+; RV64BE-NEXT:    sw a1, 0(a0)
+; RV64BE-NEXT:    ret
+  store i32 %v, ptr %p
+  ret void
+}
+
+define i16 @load_i16(ptr %p) {
+; RV32LE-LABEL: load_i16:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    lh a0, 0(a0)
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: load_i16:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    lh a0, 0(a0)
+; RV32BE-NEXT:    ret
+;
+; RV64LE-LABEL: load_i16:
+; RV64LE:       # %bb.0:
+; RV64LE-NEXT:    lh a0, 0(a0)
+; RV64LE-NEXT:    ret
+;
+; RV64BE-LABEL: load_i16:
+; RV64BE:       # %bb.0:
+; RV64BE-NEXT:    lh a0, 0(a0)
+; RV64BE-NEXT:    ret
+  %v = load i16, ptr %p
+  ret i16 %v
+}
+
+define void @store_i16(ptr %p, i16 %v) {
+; RV32LE-LABEL: store_i16:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    sh a1, 0(a0)
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: store_i16:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    sh a1, 0(a0)
+; RV32BE-NEXT:    ret
+;
+; RV64LE-LABEL: store_i16:
+; RV64LE:       # %bb.0:
+; RV64LE-NEXT:    sh a1, 0(a0)
+; RV64LE-NEXT:    ret
+;
+; RV64BE-LABEL: store_i16:
+; RV64BE:       # %bb.0:
+; RV64BE-NEXT:    sh a1, 0(a0)
+; RV64BE-NEXT:    ret
+  store i16 %v, ptr %p
+  ret void
+}
+
+define i8 @load_i8(ptr %p) {
+; RV32LE-LABEL: load_i8:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    lbu a0, 0(a0)
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: load_i8:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    lbu a0, 0(a0)
+; RV32BE-NEXT:    ret
+;
+; RV64LE-LABEL: load_i8:
+; RV64LE:       # %bb.0:
+; RV64LE-NEXT:    lbu a0, 0(a0)
+; RV64LE-NEXT:    ret
+;
+; RV64BE-LABEL: load_i8:
+; RV64BE:       # %bb.0:
+; RV64BE-NEXT:    lbu a0, 0(a0)
+; RV64BE-NEXT:    ret
+  %v = load i8, ptr %p
+  ret i8 %v
+}
+
+define void @store_i8(ptr %p, i8 %v) {
+; RV32LE-LABEL: store_i8:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    sb a1, 0(a0)
+; RV32LE-NEXT:    ret
+;
+; RV32BE-LABEL: store_i8:
+; RV32BE:       # %bb.0:
+; RV32BE-NEXT:    sb a1, 0(a0)
+; RV32BE-NEXT:    ret
+;
+; RV64LE-LABEL: store_i8:
+; RV64LE:       # %bb.0:
+; RV64LE-NEXT:    sb a1, 0(a0)
+; RV64LE-NEXT:    ret
+;
+; RV64BE-LABEL: store_i8:
+; RV64BE:       # %bb.0:
+; RV64BE-NEXT:    sb a1, 0(a0)
+; RV64BE-NEXT:    ret
+  store i8 %v, ptr %p
+  ret void
+}
+
+define i64 @load_i64(ptr %p) {
+; RV32LE-LABEL: load_i64:
+; RV32LE:       # %bb.0:
+; RV32LE-NEXT:    lw a2, 0(a0)
+; RV32LE-NEXT:    lw a1, 4(a0)
+; RV32LE-NE...
[truncated]

@djtodoro djtodoro requested a review from topperc December 17, 2025 14:48
@github-actions
Copy link

github-actions bot commented Dec 17, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

- Handle BE in RISCVSubtarget
- Handle riscv big-endian f64
- Handle loads/stores
- Add tests for LE vs BE
@djtodoro djtodoro force-pushed the pr/riscvbe-codegen-part branch from 2c1b75b to 924cf81 Compare December 17, 2025 15:25
}

bool is64Bit() const { return IsRV64; }
bool isLittleEndian() const { return TargetTriple.isLittleEndian(); }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we cache this in a bool in the constructor?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.


// For big-endian, the high part is at offset 0 and the low part at offset 4.
if (!Subtarget.isLittleEndian()) {
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we swap LoReg and HiReg instead of mostly duplicating the code

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes


// For big-endian, store the high part at offset 0 and the low part at
// offset 4.
if (!Subtarget.isLittleEndian()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a lot of code duplication here. Can we use more variables to reduce it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thanks for the suggestion.

RISCVProcFamilyEnum RISCVProcFamily = Others;
RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic;

Triple TargetTriple;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need TargetTriple?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, will remove it.

; RV32IFD-BE-LABEL: fneg:
; RV32IFD-BE: # %bb.0:
; RV32IFD-BE-NEXT: lui a2, 524288
; RV32IFD-BE-NEXT: xor a0, a0, a2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PSABI doc riscv-non-isa/riscv-elf-psabi-doc#470 says

This register-pair ordering is defined in terms of value significance and is
independent of endianness.  For example, on RV32BE a 64-bit scalar returned
in a0/a1 places bits [31:0] (the least-significant XLEN bits) in a0 and
bits [63:32] in a1; memory layout remains big-endian.

Yet this code seems to be toggling the sign bit in a0. If the psabi doc is correct, isn't the sign bit in a1?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thing is that riscv-non-isa/riscv-elf-psabi-doc#470 is not ready yet -- or at least not merged/accepted officially yet.
And even GCC is not updated to follow all changes we plan for psabi (currently BE is marked as experimental), e.g.:

$ cat test.c
  #include <stdint.h>
  double flip_sign_bit(double a) {
      union { double d; uint64_t i; } u;
      u.d = a;
      u.i ^= (1ULL << 63);
      return u.d;
  }

$ riscv64-unknown-elf-gcc -c -O2 -march=rv32gc -mabi=ilp32 -mbig-endian test.c
$ llvm-objdump -d /tmp/test.o
  flip_sign_bit:
     lui    a4, 0x80000
     mv     a5, a1
     xor    a4, a4, a0
     mv     a0, a4
     mv     a1, a5
     ret

Therefore, once psabi is ready, we can change both GCC and LLVM. Until then, we will emit a warning for riscvbe from clang (#165599), something like:

clang: warning: big-endian RISC-V target support is experimental [-Wriscv-be-experimental]

@topperc
Copy link
Collaborator

topperc commented Dec 19, 2025

Do we need any changes for i64 arguments and returns on RV32?

@djtodoro
Copy link
Collaborator Author

Do we need any changes for i64 arguments and returns on RV32?

As far as I can tell, no -- it matches what GCC does. I have added a test case in return_i64_const function (from bigendian-load-store.ll) that checks for the i64 type.

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@djtodoro djtodoro merged commit fefda86 into llvm:main Dec 19, 2025
10 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 19, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-sie-ubuntu-fast running on sie-linux-worker while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/42698

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'Clang :: Preprocessor/c99-6_10_3_4_p6.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 3
/home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/build/bin/clang -cc1 -internal-isystem /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/build/lib/clang/22/include -nostdsysteminc -E /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c | /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/build/bin/FileCheck -strict-whitespace /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c
# executed command: /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/build/bin/clang -cc1 -internal-isystem /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/build/lib/clang/22/include -nostdsysteminc -E /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c
# .---command stderr------------
# | /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c:7:57: warning: backslash and newline separated by space [-Wbackslash-newline-escape]
# |     7 | #define debug(s, t) printf("x" # s "= %d, x" # t "= s" \ 
# |       |                                                         ^
# | 1 warning generated.
# `-----------------------------
# executed command: /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/build/bin/FileCheck -strict-whitespace /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c
# .---command stderr------------
# | �[1m/home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c:24:11: �[0m�[0;1;31merror: �[0m�[1mCHECK: expected string not found in input
�[0m# | �[1m�[0m// CHECK: include "vers2.h"
# | �[0;1;32m          ^
�[0m# | �[0;1;32m�[0m�[1m<stdin>:10:62: �[0m�[0;1;30mnote: �[0m�[1mscanning from here
�[0m# | �[1m�[0mfputs("strncmp(\"abc\\0d\" \"abc\", '\\4') == 0" ": @\n", s);
# | �[0;1;32m                                                             ^
�[0m# | �[0;1;32m�[0m�[1m<stdin>:12:1: �[0m�[0;1;30mnote: �[0m�[1mpossible intended match here
�[0m# | �[1m�[0minclude "vers2 .h"
# | �[0;1;32m^
�[0m# | �[0;1;32m�[0m
# | Input file: <stdin>
# | Check file: /home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# | �[1m�[0m�[0;1;30m            1: �[0m�[1m�[0;1;46m# 1 "/home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c" �[0m
# | �[0;1;30m            2: �[0m�[1m�[0;1;46m# 1 "<built-in>" 1 �[0m
# | �[0;1;30m            3: �[0m�[1m�[0;1;46m# 1 "<built-in>" 3 �[0m
# | �[0;1;30m            4: �[0m�[1m�[0;1;46m# 389 "<built-in>" 3 �[0m
# | �[0;1;30m            5: �[0m�[1m�[0;1;46m# 1 "<command line>" 1 �[0m
# | �[0;1;30m            6: �[0m�[1m�[0;1;46m# 1 "<built-in>" 2 �[0m
# | �[0;1;30m            7: �[0m�[1m�[0;1;46m# 1 "/home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c" 2 �[0m
# | �[0;1;30m            8: �[0m�[1m�[0;1;46m# 14 "/home/buildbot/buildbot-root/llvm-clang-x86_64-sie-ubuntu-fast/llvm-project/clang/test/Preprocessor/c99-6_10_3_4_p6.c" �[0m
# | �[0;1;30m            9: �[0m�[1m�[0;1;46m�[0mprintf("x" "1" "= %d, x" "2" "= s" x1, x2);�[0;1;46m �[0m
# | �[0;1;32mcheck:22       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
�[0m# | �[0;1;32m�[0m�[0;1;30m           10: �[0m�[1m�[0;1;46m�[0mfputs("strncmp(\"abc\\0d\" \"abc\", '\\4') == 0" ": @\n", s);�[0;1;46m �[0m
# | �[0;1;32mcheck:23       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
�[0m# | �[0;1;32m�[0m�[0;1;31mcheck:24'0                                                                  X error: no match found
�[0m# | �[0;1;31m�[0m�[0;1;30m           11: �[0m�[1m�[0;1;46m �[0m
# | �[0;1;31mcheck:24'0     ~
�[0m# | �[0;1;31m�[0m�[0;1;30m           12: �[0m�[1m�[0;1;46minclude "vers2 .h" �[0m
# | �[0;1;31mcheck:24'0     ~~~~~~~~~~~~~~~~~~~
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants