WebAssemblyInstrInfo.cpp
WebAssemblyMachineFunctionInfo.cpp
WebAssemblyMCInstLower.cpp
+ WebAssemblyOptimizeReturned.cpp
+ WebAssemblyPeephole.cpp
WebAssemblyRegisterInfo.cpp
WebAssemblyRegColoring.cpp
WebAssemblyRegNumbering.cpp
WebAssemblyRegStackify.cpp
WebAssemblySelectionDAGInfo.cpp
+ WebAssemblyStoreResults.cpp
WebAssemblySubtarget.cpp
WebAssemblyTargetMachine.cpp
WebAssemblyTargetTransformInfo.cpp
//===---------------------------------------------------------------------===//
-set_local and store instructions have a return value. We should (a) model this,
+set_local instructions have a return value. We should (a) model this,
and (b) write optimizations which take advantage of it. Keep in mind that
many set_local instructions are implicit!
class WebAssemblyTargetMachine;
class FunctionPass;
+FunctionPass *createWebAssemblyOptimizeReturned();
+
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+FunctionPass *createWebAssemblyStoreResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyCFGStackify();
FunctionPass *createWebAssemblyRegNumbering();
+FunctionPass *createWebAssemblyPeephole();
FunctionPass *createWebAssemblyRelooper();
// Custom lowering hooks.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
};
--- /dev/null
+//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Optimize calls with "returned" attributes for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-optimize-returned"
+
+namespace {
+class OptimizeReturned final : public FunctionPass,
+ public InstVisitor<OptimizeReturned> {
+ const char *getPassName() const override {
+ return "WebAssembly Optimize Returned";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ DominatorTree *DT;
+
+public:
+ static char ID;
+ OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
+
+ void visitCallSite(CallSite CS);
+};
+} // End anonymous namespace
+
+char OptimizeReturned::ID = 0;
+FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
+ return new OptimizeReturned();
+}
+
+void OptimizeReturned::visitCallSite(CallSite CS) {
+ for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
+ if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
+ Instruction *Inst = CS.getInstruction();
+ Value *Arg = CS.getArgOperand(i);
+ // Like replaceDominatedUsesWith but using Instruction/Use dominance.
+ for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) {
+ Use &U = *UI++;
+ if (DT->dominates(Inst, U))
+ U.set(Inst);
+ }
+ }
+}
+
+bool OptimizeReturned::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ visit(F);
+ return true;
+}
--- /dev/null
+//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Late peephole optimizations for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-peephole"
+
+namespace {
+class WebAssemblyPeephole final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly late peephole optimizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID;
+ WebAssemblyPeephole() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyPeephole::ID = 0;
+FunctionPass *llvm::createWebAssemblyPeephole() {
+ return new WebAssemblyPeephole();
+}
+
+bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case WebAssembly::STORE8_I32:
+ case WebAssembly::STORE16_I32:
+ case WebAssembly::STORE8_I64:
+ case WebAssembly::STORE16_I64:
+ case WebAssembly::STORE32_I64:
+ case WebAssembly::STORE_F32:
+ case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_I32:
+ case WebAssembly::STORE_I64: {
+ // Store instructions return their value operand. If we ended up using
+ // the same register for both, replace it with a dead def so that it
+ // can use $discard instead.
+ MachineOperand &MO = MI.getOperand(0);
+ unsigned OldReg = MO.getReg();
+ if (OldReg == MI.getOperand(2).getReg()) {
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ MO.setReg(NewReg);
+ MO.setIsDead();
+ MFI.stackifyVReg(NewReg);
+ }
+ }
+ }
+
+ return Changed;
+}
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineBlockFrequencyInfo>();
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
/*isImp=*/true));
}
+// Test whether it's safe to move Def to just before Insert. Note that this
+// doesn't account for physical register dependencies, because WebAssembly
+// doesn't have any (other than special ones like EXPR_STACK).
+// TODO: Compute memory dependencies in a way that doesn't require always
+// walking the block.
+// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
+// more precise.
+static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
+ AliasAnalysis &AA) {
+ bool SawStore = false, SawSideEffects = false;
+ MachineBasicBlock::const_iterator D(Def), I(Insert);
+ for (--I; I != D; --I)
+ SawSideEffects |= I->isSafeToMove(&AA, SawStore);
+
+ return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
+ !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+}
+
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Register Stackifying **********\n"
"********** Function: "
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
// Walk the instructions from the bottom up. Currently we don't look past
// block boundaries, and the blocks aren't ordered so the block visitation
if (Insert->getOpcode() == TargetOpcode::PHI)
break;
+ // Don't nest anything inside an inline asm, because we don't have
+ // constraints for $push inputs.
+ if (Insert->getOpcode() == TargetOpcode::INLINEASM)
+ break;
+
// Iterate through the inputs in reverse order, since we'll be pulling
// operands off the stack in FIFO order.
bool AnyStackified = false;
for (MachineOperand &Op : reverse(Insert->uses())) {
// We're only interested in explicit virtual register operands.
- if (!Op.isReg() || Op.isImplicit())
+ if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
continue;
unsigned Reg = Op.getReg();
if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
continue;
+ // Don't nest an INLINE_ASM def into anything, because we don't have
+ // constraints for $pop outputs.
+ if (Def->getOpcode() == TargetOpcode::INLINEASM)
+ continue;
+
+ // Don't nest PHIs inside of anything.
+ if (Def->getOpcode() == TargetOpcode::PHI)
+ continue;
+
// Argument instructions represent live-in registers and not real
// instructions.
if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
// they be trivially clonable.
// TODO: Eventually we'll relax this, to take advantage of set_local
// returning its result.
- bool OneUse = MRI.hasOneUse(Reg);
- if (!OneUse && !Def->isMoveImmediate())
+ if (!MRI.hasOneUse(Reg))
continue;
// For now, be conservative and don't look across block boundaries,
if (Def->getParent() != &MBB && !Def->isMoveImmediate())
continue;
- // For now, be simple and don't reorder loads, stores, or side effects.
- // TODO: Be more aggressive.
- if ((Def->mayLoad() || Def->mayStore() ||
- Def->hasUnmodeledSideEffects()))
+ // Don't move instructions that have side effects or memory dependencies
+ // or other complications.
+ if (!IsSafeToMove(Def, Insert, AA))
continue;
Changed = true;
AnyStackified = true;
- if (OneUse) {
- // Move the def down and nest it in the current instruction.
- MBB.insert(MachineBasicBlock::instr_iterator(Insert),
- Def->removeFromParent());
- MFI.stackifyVReg(Reg);
- ImposeStackOrdering(Def);
- Insert = Def;
- } else {
- // Clone the def down and nest it in the current instruction.
- MachineInstr *Clone = MF.CloneMachineInstr(Def);
- unsigned OldReg = Def->getOperand(0).getReg();
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
- assert(Op.getReg() == OldReg);
- assert(Clone->getOperand(0).getReg() == OldReg);
- Op.setReg(NewReg);
- Clone->getOperand(0).setReg(NewReg);
- MBB.insert(MachineBasicBlock::instr_iterator(Insert), Clone);
- MFI.stackifyVReg(Reg);
- ImposeStackOrdering(Clone);
- Insert = Clone;
- }
+ // Move the def down and nest it in the current instruction.
+ MBB.insert(MachineBasicBlock::instr_iterator(Insert),
+ Def->removeFromParent());
+ MFI.stackifyVReg(Reg);
+ ImposeStackOrdering(Def);
+ Insert = Def;
}
if (AnyStackified)
ImposeStackOrdering(&MI);
--- /dev/null
+//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an optimization pass using store result values.
+///
+/// WebAssembly's store instructions return the stored value, specifically to
+/// enable the optimization of reducing get_local/set_local traffic, which is
+/// what we're doing here.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-store-results"
+
+namespace {
+class WebAssemblyStoreResults final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Store Results";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyStoreResults::ID = 0;
+FunctionPass *llvm::createWebAssemblyStoreResults() {
+ return new WebAssemblyStoreResults();
+}
+
+bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Store Results **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case WebAssembly::STORE8_I32:
+ case WebAssembly::STORE16_I32:
+ case WebAssembly::STORE8_I64:
+ case WebAssembly::STORE16_I64:
+ case WebAssembly::STORE32_I64:
+ case WebAssembly::STORE_F32:
+ case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_I32:
+ case WebAssembly::STORE_I64:
+ unsigned ToReg = MI.getOperand(0).getReg();
+ unsigned FromReg = MI.getOperand(2).getReg();
+ for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+ MachineOperand &O = *I++;
+ MachineInstr *Where = O.getParent();
+ if (Where->getOpcode() == TargetOpcode::PHI)
+ Where = Where->getOperand(&O - &Where->getOperand(0) + 1)
+ .getMBB()
+ ->getFirstTerminator();
+ if (&MI == Where || !MDT.dominates(&MI, Where))
+ continue;
+ O.setReg(ToReg);
+ }
+ }
+
+ return true;
+}
// control specifically what gets lowered.
addPass(createAtomicExpandPass(TM));
+ // Optimize "returned" function attributes.
+ addPass(createWebAssemblyOptimizeReturned());
+
TargetPassConfig::addIRPasses();
}
bool WebAssemblyPassConfig::addILPOpts() { return true; }
void WebAssemblyPassConfig::addPreRegAlloc() {
+ // Prepare store instructions for register stackifying.
+ addPass(createWebAssemblyStoreResults());
+
// Mark registers as representing wasm's expression stack.
addPass(createWebAssemblyRegStackify());
}
void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyCFGStackify());
addPass(createWebAssemblyRegNumbering());
+ addPass(createWebAssemblyPeephole());
}
; CHECK-LABEL: single_block:
; CHECK-NOT: br
-; CHECK: return ${{[0-9]+}}{{$}}
+; CHECK: return $pop{{[0-9]+}}{{$}}
define i32 @single_block(i32* %p) {
entry:
store volatile i32 0, i32* %p
; CHECK: foo:
; CHECK: i32.const $push0=, answer{{$}}
-; CHECK-NEXT: i32.load $0=, $pop0{{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK-NEXT: i32.load $push1=, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
define i32 @foo() {
%a = load i32, i32* @answer
ret i32 %a
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: sext_i8_i32:
-; CHECK: i32.load8_s $0=, $0{{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK: i32.load8_s $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i32 @sext_i8_i32(i8 *%p) {
%v = load i8, i8* %p
%e = sext i8 %v to i32
}
; CHECK-LABEL: zext_i8_i32:
-; CHECK: i32.load8_u $0=, $0{{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK: i32.load8_u $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i32 @zext_i8_i32(i8 *%p) {
%v = load i8, i8* %p
%e = zext i8 %v to i32
}
; CHECK-LABEL: sext_i16_i32:
-; CHECK: i32.load16_s $0=, $0{{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK: i32.load16_s $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i32 @sext_i16_i32(i16 *%p) {
%v = load i16, i16* %p
%e = sext i16 %v to i32
}
; CHECK-LABEL: zext_i16_i32:
-; CHECK: i32.load16_u $0=, $0{{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK: i32.load16_u $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i32 @zext_i16_i32(i16 *%p) {
%v = load i16, i16* %p
%e = zext i16 %v to i32
}
; CHECK-LABEL: sext_i8_i64:
-; CHECK: i64.load8_s $1=, $0{{$}}
-; CHECK-NEXT: return $1{{$}}
+; CHECK: i64.load8_s $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i64 @sext_i8_i64(i8 *%p) {
%v = load i8, i8* %p
%e = sext i8 %v to i64
}
; CHECK-LABEL: zext_i8_i64:
-; CHECK: i64.load8_u $1=, $0{{$}}
-; CHECK-NEXT: return $1{{$}}
+; CHECK: i64.load8_u $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i64 @zext_i8_i64(i8 *%p) {
%v = load i8, i8* %p
%e = zext i8 %v to i64
}
; CHECK-LABEL: sext_i16_i64:
-; CHECK: i64.load16_s $1=, $0{{$}}
-; CHECK-NEXT: return $1{{$}}
+; CHECK: i64.load16_s $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i64 @sext_i16_i64(i16 *%p) {
%v = load i16, i16* %p
%e = sext i16 %v to i64
}
; CHECK-LABEL: zext_i16_i64:
-; CHECK: i64.load16_u $1=, $0{{$}}
-; CHECK-NEXT: return $1{{$}}
+; CHECK: i64.load16_u $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i64 @zext_i16_i64(i16 *%p) {
%v = load i16, i16* %p
%e = zext i16 %v to i64
}
; CHECK-LABEL: sext_i32_i64:
-; CHECK: i64.load32_s $1=, $0{{$}}
-; CHECK-NEXT: return $1{{$}}
+; CHECK: i64.load32_s $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i64 @sext_i32_i64(i32 *%p) {
%v = load i32, i32* %p
%e = sext i32 %v to i64
}
; CHECK-LABEL: zext_i32_i64:
-; CHECK: i64.load32_u $1=, $0{{$}}
-; CHECK: return $1{{$}}
+; CHECK: i64.load32_u $push0=, $0{{$}}
+; CHECK: return $pop0{{$}}
define i64 @zext_i32_i64(i32 *%p) {
%v = load i32, i32* %p
%e = zext i32 %v to i64
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: load_u_i1_i32:
-; CHECK: i32.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $[[NUM0]]{{$}}
+; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM0]]{{$}}
define i32 @load_u_i1_i32(i1* %p) {
%v = load i1, i1* %p
%e = zext i1 %v to i32
}
; CHECK-LABEL: load_s_i1_i32:
-; CHECK: i32.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
+; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i32.const $[[NUM1:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $[[NUM0]], $[[NUM1]]{{$}}
+; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM3]]{{$}}
define i32 @load_s_i1_i32(i1* %p) {
}
; CHECK-LABEL: load_u_i1_i64:
-; CHECK: i64.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $[[NUM0]]{{$}}
+; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM0]]{{$}}
define i64 @load_u_i1_i64(i1* %p) {
%v = load i1, i1* %p
%e = zext i1 %v to i64
}
; CHECK-LABEL: load_s_i1_i64:
-; CHECK: i64.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
+; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i64.const $[[NUM1:[0-9]+]]=, 63{{$}}
-; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $[[NUM0]], $[[NUM1]]{{$}}
+; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM3]]{{$}}
define i64 @load_s_i1_i64(i1* %p) {
; CHECK-LABEL: ldi32:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: i32.load $[[NUM:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $[[NUM]]{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @ldi32(i32 *%p) {
%v = load i32, i32* %p
ret i32 %v
; CHECK-LABEL: ldi64:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i64{{$}}
-; CHECK-NEXT: .local i64{{$}}
-; CHECK-NEXT: i64.load $[[NUM:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $[[NUM]]{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i64 @ldi64(i64 *%p) {
%v = load i64, i64* %p
ret i64 %v
; CHECK-LABEL: ldf32:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result f32{{$}}
-; CHECK-NEXT: .local f32{{$}}
-; CHECK-NEXT: f32.load $[[NUM:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $[[NUM]]{{$}}
+; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
define float @ldf32(float *%p) {
%v = load float, float* %p
ret float %v
; CHECK-LABEL: ldf64:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result f64{{$}}
-; CHECK-NEXT: .local f64{{$}}
-; CHECK-NEXT: f64.load $[[NUM:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $[[NUM]]{{$}}
+; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
define double @ldf64(double *%p) {
%v = load double, double* %p
ret double %v
; CHECK-LABEL: memory_size:
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: .local i32{{$}}
-; CHECK-NEXT: memory_size $0={{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK-NEXT: memory_size $push0={{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i32 @memory_size() {
%a = call i32 @llvm.wasm.memory.size.i32()
ret i32 %a
; CHECK-LABEL: memory_size:
; CHECK-NEXT: .result i64{{$}}
-; CHECK-NEXT: .local i64{{$}}
-; CHECK-NEXT: memory_size $0={{$}}
-; CHECK-NEXT: return $0{{$}}
+; CHECK-NEXT: memory_size $push0={{$}}
+; CHECK-NEXT: return $pop0{{$}}
define i64 @memory_size() {
%a = call i64 @llvm.wasm.memory.size.i64()
ret i64 %a
--- /dev/null
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test the register stackifier pass.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; No because of pointer aliasing.
+
+; CHECK-LABEL: no0:
+; CHECK: return $1{{$}}
+define i32 @no0(i32* %p, i32* %q) {
+ %t = load i32, i32* %q
+ store i32 0, i32* %p
+ ret i32 %t
+}
+
+; No because of side effects.
+
+; CHECK-LABEL: no1:
+; CHECK: return $1{{$}}
+define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
+ %t = load volatile i32, i32* %q, !invariant.load !0
+ store volatile i32 0, i32* %p
+ ret i32 %t
+}
+
+; Yes because of invariant load and no side effects.
+
+; CHECK-LABEL: yes0:
+; CHECK: return $pop0{{$}}
+define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
+ %t = load i32, i32* %q, !invariant.load !0
+ store i32 0, i32* %p
+ ret i32 %t
+}
+
+; Yes because of no intervening side effects.
+
+; CHECK-LABEL: yes1:
+; CHECK: return $pop0{{$}}
+define i32 @yes1(i32* %q) {
+ %t = load volatile i32, i32* %q
+ ret i32 %t
+}
+
+!0 = !{}
--- /dev/null
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the "returned" attribute is optimized effectively.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: _Z3foov:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, 1{{$}}
+; CHECK-NEXT: call $push1=, _Znwm, $pop0{{$}}
+; CHECK-NEXT: call $push2=, _ZN5AppleC1Ev, $pop1{{$}}
+; CHECK-NEXT: return $pop2{{$}}
+%class.Apple = type { i8 }
+declare noalias i8* @_Znwm(i32)
+declare %class.Apple* @_ZN5AppleC1Ev(%class.Apple* returned)
+define %class.Apple* @_Z3foov() {
+entry:
+ %call = tail call noalias i8* @_Znwm(i32 1)
+ %0 = bitcast i8* %call to %class.Apple*
+ %call1 = tail call %class.Apple* @_ZN5AppleC1Ev(%class.Apple* %0)
+ ret %class.Apple* %0
+}
+
+; CHECK-LABEL: _Z3barPvS_l:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: call $push0=, memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare i8* @memcpy(i8* returned, i8*, i32)
+define i8* @_Z3barPvS_l(i8* %p, i8* %s, i32 %n) {
+entry:
+ %call = tail call i8* @memcpy(i8* %p, i8* %s, i32 %n)
+ ret i8* %p
+}