Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 174 additions & 7 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
}

// Custom loads/stores to possible use __aeabi_uread/write*
if (Subtarget->isTargetAEABI() && !Subtarget->allowsUnalignedMem()) {
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
}

setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
Expand Down Expand Up @@ -10012,6 +10020,130 @@ void ARMTargetLowering::ExpandDIV_Windows(
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
}

std::pair<SDValue, SDValue>
ARMTargetLowering::LowerAEABIUnalignedLoad(SDValue Op,
SelectionDAG &DAG) const {
// If we have an unaligned load from a i32 or i64 that would normally be
// split into separate ldrb's, we can use the __aeabi_uread4/__aeabi_uread8
// functions instead.
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
EVT MemVT = LD->getMemoryVT();
if (MemVT != MVT::i32 && MemVT != MVT::i64)
return std::make_pair(SDValue(), SDValue());

const auto &MF = DAG.getMachineFunction();
unsigned AS = LD->getAddressSpace();
Align Alignment = LD->getAlign();
const DataLayout &DL = DAG.getDataLayout();
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();

const char *LibcallName = nullptr;
if ((MF.getFunction().hasMinSize() || MF.getFunction().hasOptSize()) &&
!AllowsUnaligned) {
if (MemVT == MVT::i32 && Alignment <= llvm::Align(2))
LibcallName = "__aeabi_uread4";
else if (MemVT == MVT::i64 && Alignment <= llvm::Align(2))
LibcallName = "__aeabi_uread8";
}

if (LibcallName) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned load to "
<< LibcallName << "\n");
CallingConv::ID CC = CallingConv::ARM_AAPCS;
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry(
LD->getBasePtr(),
LD->getBasePtr().getValueType().getTypeForEVT(*DAG.getContext()));
SDLoc dl(Op);

Args.push_back(Entry);

Type *RetTy = MemVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(LD->getChain())
.setCallee(CC, RetTy, Callee, std::move(Args));
auto Pair = LowerCallTo(CLI);

// If necessary, extend the node to 64bit
if (LD->getExtensionType() != ISD::NON_EXTLOAD) {
unsigned ExtType = LD->getExtensionType() == ISD::SEXTLOAD
? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND;
SDValue EN = DAG.getNode(ExtType, dl, LD->getValueType(0), Pair.first);
Pair.first = EN;
}
return Pair;
}

// Default expand to individual loads
if (!allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Alignment))
return expandUnalignedLoad(LD, DAG);
return std::make_pair(SDValue(), SDValue());
}

SDValue ARMTargetLowering::LowerAEABIUnalignedStore(SDValue Op,
SelectionDAG &DAG) const {
// If we have an unaligned store to a i32 or i64 that would normally be
// split into separate ldrb's, we can use the __aeabi_uwrite4/__aeabi_uwrite8
// functions instead.
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
if (MemVT != MVT::i32 && MemVT != MVT::i64)
return SDValue();

const auto &MF = DAG.getMachineFunction();
unsigned AS = ST->getAddressSpace();
Align Alignment = ST->getAlign();
const DataLayout &DL = DAG.getDataLayout();
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();

const char *LibcallName = nullptr;
if ((MF.getFunction().hasMinSize() || MF.getFunction().hasOptSize()) &&
!AllowsUnaligned) {
if (MemVT == MVT::i32 && Alignment <= llvm::Align(2))
LibcallName = "__aeabi_uwrite4";
else if (MemVT == MVT::i64 && Alignment <= llvm::Align(2))
LibcallName = "__aeabi_uwrite8";
}

if (LibcallName) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store to "
<< LibcallName << "\n");
CallingConv::ID CC = CallingConv::ARM_AAPCS;
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::ArgListTy Args;
SDLoc dl(Op);

// If necessary, trunc the value to 32bit
SDValue StoreVal = ST->getOperand(1);
if (ST->isTruncatingStore())
StoreVal = DAG.getNode(ISD::TRUNCATE, dl, MemVT, ST->getOperand(1));

TargetLowering::ArgListEntry Entry(
StoreVal, StoreVal.getValueType().getTypeForEVT(*DAG.getContext()));
Args.push_back(Entry);

Entry.Node = ST->getBasePtr();
Entry.Ty = ST->getBasePtr().getValueType().getTypeForEVT(*DAG.getContext());
Args.push_back(Entry);

Type *RetTy = Type::getVoidTy(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(ST->getChain())
.setCallee(CC, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}

// Default expand to individual stores
if (!allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Alignment))
return expandUnalignedStore(ST, DAG);
return SDValue();
}

static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
EVT MemVT = LD->getMemoryVT();
Expand Down Expand Up @@ -10054,11 +10186,11 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
assert(LD->isUnindexed() && "Loads should be unindexed at this point.");

if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
!Subtarget->isThumb1Only() && LD->isVolatile() &&
LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
SDLoc dl(N);
SDValue Result = DAG.getMemIntrinsicNode(
ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
Expand All @@ -10067,6 +10199,12 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
Results.append({Pair, Result.getValue(2)});
} else if ((MemVT == MVT::i32 || MemVT == MVT::i64)) {
auto Pair = LowerAEABIUnalignedLoad(SDValue(N, 0), DAG);
if (Pair.first) {
Results.push_back(Pair.first);
Results.push_back(Pair.second);
}
}
}

Expand Down Expand Up @@ -10108,15 +10246,15 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand());
}

static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDValue ARMTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
assert(ST->isUnindexed() && "Stores should be unindexed at this point.");

if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
!Subtarget->isThumb1Only() && ST->isVolatile() &&
ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
SDNode *N = Op.getNode();
SDLoc dl(N);

Expand All @@ -10136,8 +10274,9 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
MemVT == MVT::v16i1))) {
return LowerPredicateStore(Op, DAG);
} else if ((MemVT == MVT::i32 || MemVT == MVT::i64)) {
return LowerAEABIUnalignedStore(Op, DAG);
}

return SDValue();
}

Expand Down Expand Up @@ -10669,8 +10808,19 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDSAT:
case ISD::USUBSAT:
return LowerADDSUBSAT(Op, DAG, Subtarget);
case ISD::LOAD:
return LowerPredicateLoad(Op, DAG);
case ISD::LOAD: {
auto *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
if (Subtarget->hasMVEIntegerOps() &&
((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
MemVT == MVT::v16i1)))
return LowerPredicateLoad(Op, DAG);

auto Pair = LowerAEABIUnalignedLoad(Op, DAG);
if (Pair.first)
return DAG.getMergeValues({Pair.first, Pair.second}, SDLoc(Pair.first));
return SDValue();
}
case ISD::STORE:
return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
Expand Down Expand Up @@ -10811,6 +10961,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::LOAD:
LowerLOAD(N, Results, DAG);
break;
case ISD::STORE:
Res = LowerAEABIUnalignedStore(SDValue(N, 0), DAG);
break;
case ISD::TRUNCATE:
Res = LowerTruncate(N, DAG, Subtarget);
break;
Expand Down Expand Up @@ -19859,31 +20012,45 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
EVT VT;
SDValue Ptr;
Align Alignment;
unsigned AS = 0;
bool isSEXTLoad = false;
bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
Alignment = LD->getAlign();
AS = LD->getAddressSpace();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
Alignment = ST->getAlign();
AS = ST->getAddressSpace();
} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
Alignment = LD->getAlign();
AS = LD->getAddressSpace();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
IsMasked = true;
} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
Alignment = ST->getAlign();
AS = ST->getAddressSpace();
IsMasked = true;
} else
return false;

unsigned Fast = 0;
if (!allowsMisalignedMemoryAccesses(VT, AS, Alignment,
MachineMemOperand::MONone, &Fast)) {
// Only generate post-increment or pre-increment forms when a real
// hardware instruction exists for them. Do not emit postinc/preinc
// if the operation will end up as a libcall.
return false;
}

bool isInc;
bool isLegal = false;
if (VT.isVector())
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -919,10 +919,14 @@ class VectorType;
SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
std::pair<SDValue, SDValue>
LowerAEABIUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAEABIUnalignedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;

Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;

Expand Down
Loading