case AArch64::LDURXi:
case AArch64::LDURSWi:
case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
return true;
}
}
return isUnscaledLdSt(MI->getOpcode());
}
+static unsigned getBitExtrOpcode(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode.");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ return AArch64::UBFMWri;
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ return AArch64::SBFMWri;
+ }
+}
+
static bool isSmallTypeLdMerge(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64::LDRHHui:
case AArch64::LDURHHi:
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
return true;
- // FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
}
}
+
static bool isSmallTypeLdMerge(MachineInstr *MI) {
return isSmallTypeLdMerge(MI->getOpcode());
}
default:
llvm_unreachable("Opcode has unknown scale!");
case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
case AArch64::STRBBui:
return 1;
case AArch64::LDRHHui:
case AArch64::LDURHHi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
case AArch64::STRHHui:
return 2;
case AArch64::LDRSui:
case AArch64::LDURSi:
case AArch64::LDRHHui:
case AArch64::LDURHHi:
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
+ case AArch64::LDRSBWui:
+ return AArch64::LDRBBui;
+ case AArch64::LDRSHWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURSBWi:
+ return AArch64::LDURBBi;
+ case AArch64::LDURSHWi:
+ return AArch64::LDURHHi;
}
}
case AArch64::LDURSWi:
return AArch64::LDPSWi;
case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
return AArch64::LDRWui;
case AArch64::LDURHHi:
+ case AArch64::LDURSHWi:
return AArch64::LDURWi;
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ return AArch64::LDURHHi;
}
}
if (isSmallTypeLdMerge(Opc)) {
// Change the scaled offset from small to large type.
- if (!IsUnscaled)
+ if (!IsUnscaled) {
+ assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
OffsetImm /= 2;
+ }
MachineInstr *RtNewDest = MergeForward ? I : Paired;
// When merging small (< 32 bit) loads for big-endian targets, the order of
// the component parts gets swapped.
if (!Subtarget->isLittleEndian())
std::swap(RtMI, Rt2MI);
// Construct the new load instruction.
- // FIXME: currently we support only halfword unsigned load. We need to
- // handle byte type, signed, and store instructions as well.
MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(NewOpc))
DEBUG(dbgs() << " with instructions:\n ");
DEBUG((NewMemMI)->print(dbgs()));
+ int Width = getMemScale(I) == 1 ? 8 : 16;
+ int LSBLow = 0;
+ int LSBHigh = Width;
+ int ImmsLow = LSBLow + Width - 1;
+ int ImmsHigh = LSBHigh + Width - 1;
MachineInstr *ExtDestMI = MergeForward ? Paired : I;
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
- // Create the bitfield extract for high half.
+ // Create the bitfield extract for high bits.
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::UBFMWri))
+ TII->get(getBitExtrOpcode(Rt2MI)))
.addOperand(getLdStRegOp(Rt2MI))
.addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(16)
- .addImm(31);
- // Create the bitfield extract for low half.
- BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::ANDWri))
- .addOperand(getLdStRegOp(RtMI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(15);
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
+ // Create the bitfield extract for low bits.
+ if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+ // For unsigned, prefer to use AND for low bits.
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(ImmsLow);
+ } else {
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(RtMI)))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
+ }
} else {
- // Create the bitfield extract for low half.
- BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::ANDWri))
- .addOperand(getLdStRegOp(RtMI))
- .addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(15);
- // Create the bitfield extract for high half.
+ // Create the bitfield extract for low bits.
+ if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+ // For unsigned, prefer to use AND for low bits.
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(ImmsLow);
+ } else {
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(RtMI)))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
+ }
+
+ // Create the bitfield extract for high bits.
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
- TII->get(AArch64::UBFMWri))
+ TII->get(getBitExtrOpcode(Rt2MI)))
.addOperand(getLdStRegOp(Rt2MI))
.addReg(getLdStRegOp(RtNewDest).getReg())
- .addImm(16)
- .addImm(31);
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
}
DEBUG(dbgs() << " ");
DEBUG((BitExtMI1)->print(dbgs()));
bool enableNarrowLdOpt) {
bool Modified = false;
// Three tranformations to do here:
- // 1) Find halfword loads that can be merged into a single 32-bit word load
+ // 1) Find narrow loads that can be converted into a single wider load
// with bitfield extract instructions.
// e.g.,
// ldrh w0, [x2]
++MBBI;
break;
// Scaled instructions.
+ case AArch64::LDRBBui:
case AArch64::LDRHHui:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSHWui:
// Unscaled instructions.
- case AArch64::LDURHHi: {
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi: {
if (tryToMergeLdStInst(MBBI)) {
Modified = true;
break;
%add14 = sub nuw nsw i16 %add9, %l3
ret i16 %add14
}
+
+; CHECK-LABEL: Ldrsh_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+
+define i32 @Ldrsh_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp1, %sexttmp
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsh_zsext_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = zext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsh_szext_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = zext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrb_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_zsext_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_szext_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_zsext_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; LE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = zext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_szext_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; LE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; BE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_szext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = zext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldurb_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldurb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_zsext_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_szext_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_szext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+