From: Bob Wilson Date: Tue, 30 Nov 2010 00:00:42 +0000 (+0000) Subject: Fix the encoding of VLD4-dup alignment. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8e0c7b52877983b4838e54e233449912fc1a2325;p=oota-llvm.git Fix the encoding of VLD4-dup alignment. The only reasonable way I could find to do this is to provide an alternate version of the addrmode6 operand with a different encoding function. Use it for all the VLD-dup instructions for the sake of consistency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120358 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 5872fcd8443..5f18673a3c8 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -191,6 +191,8 @@ namespace { const { return 0; } unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b9b1b7e2929..d2407c6b3cf 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -540,6 +540,15 @@ def am6offset : Operand { let EncoderMethod = "getAddrMode6OffsetOpValue"; } +// Special version of addrmode6 to handle alignment encoding for VLD-dup +// instructions, specifically VLD4-dup. +def addrmode6dup : Operand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; +} + // addrmodepc := pc + reg // def addrmodepc : Operand, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9ce3ac52e81..24a2d1a2c8d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -794,15 +794,15 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6:$Rn), + : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", - [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6:$Rn)))))]> { + [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; } class VLD1QDUPPseudo : VLDQPseudo { let Pattern = [(set QPR:$dst, - (Ty (NEONvdup (i32 (LoadOp addrmode6:$addr)))))]; + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; } def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; @@ -817,7 +817,7 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), - (ins addrmode6:$Rn), IIC_VLD1dup, + (ins addrmode6dup:$Rn), IIC_VLD1dup, "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; @@ -830,13 +830,13 @@ def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; // ...with address register writeback: class VLD1DUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1dupu, + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; } class VLD1QDUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1dupu, + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; } @@ -856,7 +856,7 @@ def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo; // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), - (ins addrmode6:$Rn), IIC_VLD2dup, + (ins addrmode6dup:$Rn), IIC_VLD2dup, "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; @@ -878,7 +878,7 @@ def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD2DUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2dupu, + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; } @@ -898,7 +898,7 @@ def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$Rn), IIC_VLD3dup, + (ins addrmode6dup:$Rn), IIC_VLD3dup, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; @@ -920,7 +920,7 @@ def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3dupu, + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -942,51 +942,42 @@ def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo; class VLD4DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1111, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$Rn), IIC_VLD4dup, + (ins addrmode6dup:$Rn), IIC_VLD4dup, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { let Rm = 0b1111; + let Inst{4} = Rn{4}; } -def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8"> { let Inst{4} = Rn{4}; } -def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16"> { let Inst{4} = Rn{4}; } -def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { - let Inst{6} = Rn{5}; - let Inst{4} = Rn{5}; -} +def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; +def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; +def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo : VLDQQPseudo; def VLD4DUPd16Pseudo : VLDQQPseudo; def VLD4DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8"> { let Inst{4} = Rn{4}; } -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16"> { let Inst{4} = Rn{4}; } -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { - let Inst{6} = Rn{5}; - let Inst{4} = Rn{5}; -} +def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1111, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4dupu, + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; - -def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8"> { let Inst{4} = Rn{4}; } -def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16"> { let Inst{4} = Rn{4}; } -def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { - let Inst{6} = Rn{5}; - let Inst{4} = Rn{5}; + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8"> { let Inst{4} = Rn{4}; } -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16"> { let Inst{4} = Rn{4}; } -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { - let Inst{6} = Rn{5}; - let Inst{4} = Rn{5}; -} +def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; +def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; +def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index b33c32608c0..2e9d804894e 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -214,6 +214,8 @@ public: SmallVectorImpl &Fixups) const; unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const; + unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const; @@ -775,6 +777,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, return Binary; } +/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along +/// with the alignment operand. unsigned ARMMCCodeEmitter:: getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { @@ -796,6 +800,30 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, return RegNo | (Align << 4); } +/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and +/// alignment operand for use in VLD-dup instructions. This is the same as +/// getAddrMode6AddressOpValue except for the alignment encoding, which is +/// different for VLD4-dup. +unsigned ARMMCCodeEmitter:: +getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const { + const MCOperand &Reg = MI.getOperand(Op); + const MCOperand &Imm = MI.getOperand(Op + 1); + + unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned Align = 0; + + switch (Imm.getImm()) { + default: break; + case 2: + case 4: + case 8: Align = 0x01; break; + case 16: Align = 0x03; break; + } + + return RegNo | (Align << 4); +} + unsigned ARMMCCodeEmitter:: getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index ab52e8bd267..61d4ccdda9e 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -606,6 +606,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type, MISC("addrmode5", "kOperandTypeARMAddrMode5"); // R, I MISC("addrmode6", "kOperandTypeARMAddrMode6"); // R, R, I, I MISC("am6offset", "kOperandTypeARMAddrMode6Offset"); // R, I, I + MISC("addrmode6dup", "kOperandTypeARMAddrMode6"); // R, R, I, I MISC("addrmodepc", "kOperandTypeARMAddrModePC"); // R, I MISC("reglist", "kOperandTypeARMRegisterList"); // I, R, ... MISC("dpr_reglist", "kOperandTypeARMDPRRegisterList"); // I, R, ...