InstCombiner::BuilderTy &Builder) {
if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
VectorType *VecTy = cast<VectorType>(II.getType());
- uint8_t Imm = CInt->getZExtValue();
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
// The immediate permute control byte looks like this:
// [1:0] - select 128 bits from sources for low half of destination
// [5:4] - select 128 bits from sources for high half of destination
// [6] - ignore
// [7] - zero high half of destination
-
- if ((Imm & 0x88) == 0x88) {
- // If both zero mask bits are set, this was just a weird way to
- // generate a zero vector.
- return ConstantAggregateZero::get(VecTy);
- }
- // TODO: If a single zero bit is set, replace one of the source operands
- // with a zero vector and use the same mask generation logic as below.
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
- if ((Imm & 0x88) == 0x00) {
- // If neither zero mask bit is set, this is a simple shuffle.
- unsigned NumElts = VecTy->getNumElements();
- unsigned HalfSize = NumElts / 2;
- unsigned HalfBegin;
- SmallVector<int, 8> ShuffleMask(NumElts);
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
- // Permute low half of result.
- HalfBegin = (Imm & 0x3) * HalfSize;
- for (unsigned i = 0; i != HalfSize; ++i)
- ShuffleMask[i] = HalfBegin + i;
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
- // Permute high half of result.
- HalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
- for (unsigned i = HalfSize; i != NumElts; ++i)
- ShuffleMask[i] = HalfBegin + i - HalfSize;
-
- Value *Op0 = II.getArgOperand(0);
- Value *Op1 = II.getArgOperand(1);
- return Builder.CreateShuffleVector(Op0, Op1, ShuffleMask);
- }
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
}
return nullptr;
}
ret <4 x double> %res
; CHECK-LABEL: @perm2pd_0x02
-; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x double> %1
}
ret <4 x double> %res
; CHECK-LABEL: @perm2pd_0x03
-; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: ret <4 x double> %1
}
ret <4 x double> %res
; CHECK-LABEL: @perm2pd_0x12
-; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: ret <4 x double> %1
}
ret <4 x double> %res
; CHECK-LABEL: @perm2pd_0x13
-; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
+; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
; CHECK-NEXT: ret <4 x double> %1
}
}
-; Confirm that when a single zero mask bit is set, we do nothing.
+; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
+
+define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
+ %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
+ ret <4 x double> %res
+
+; CHECK-LABEL: @perm2pd_0x81
+; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x double>
+}
define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
ret <4 x double> %res
; CHECK-LABEL: @perm2pd_0x83
-; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 -125)
+; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: ret <4 x double>
}
+define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
+ %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
+ ret <4 x double> %res
-; Confirm that when the other zero mask bit is set, we do nothing. Also confirm that an ignored bit has no effect.
+; CHECK-LABEL: @perm2pd_0x28
+; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x double>
+}
-define <4 x double> @perm2pd_0x48(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72)
+define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
+ %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
ret <4 x double> %res
-; CHECK-LABEL: @perm2pd_0x48
-; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72)
+; CHECK-LABEL: @perm2pd_0x08
+; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x double>
}