LLVM support for vector quad bit permute and gather instructions through builtins

author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Thu, 11 Jun 2015 06:21:25 +0000 (06:21 +0000)

committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Thu, 11 Jun 2015 06:21:25 +0000 (06:21 +0000)
author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Thu, 11 Jun 2015 06:21:25 +0000 (06:21 +0000)
committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Thu, 11 Jun 2015 06:21:25 +0000 (06:21 +0000)
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td

index 79654695837dd5253c763c9da68b48c4f434fc9a..d680085eaf327739347ce90e4484820ad2f518da 100644 (file)
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -608,6 +608,11 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
    def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
                Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
                           llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
  }
  
  def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td

index e27bf7f5c0e0bd9aa357762cb44d30b1e663b9b1..9ff604bbee9de43380e182963452d943ab6acaea 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1142,7 +1142,9 @@ def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
  def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB),
          (VPKUDUM $vB, $vA)>;
  
-
+def VGBBD : VX2_Int_Ty2<1292, "vgbbd", int_ppc_altivec_vgbbd, v16i8, v16i8>;
+def VBPERMQ : VX1_Int_Ty2<1356, "vbpermq", int_ppc_altivec_vbpermq,
+                          v2i64, v16i8>;
  } // end HasP8Altivec
  
  // Crypto instructions (from builtins)
diff --git a/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll b/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll

new file mode 100644 (file)

index 0000000..37111ef
--- /dev/null
+++ b/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll
@@ -0,0 +1,91 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+power8-vector -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-VSX
+
+@vsc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
+@vuc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
+@res_vll = common global <2 x i64> zeroinitializer, align 16
+@res_vull = common global <2 x i64> zeroinitializer, align 16
+@res_vsc = common global <16 x i8> zeroinitializer, align 16
+@res_vuc = common global <16 x i8> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @test1() {
+entry:
+  %__a.addr.i = alloca <16 x i8>, align 16
+  %__b.addr.i = alloca <16 x i8>, align 16
+  %0 = load <16 x i8>, <16 x i8>* @vsc, align 16
+  %1 = load <16 x i8>, <16 x i8>* @vsc, align 16
+  store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
+  store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16
+  %2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
+  %3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16
+  %4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3)
+  store <2 x i64> %4, <2 x i64>* @res_vll, align 16
+  ret void
+; CHECK-LABEL: @test1
+; CHECK: lvx [[REG1:[0-9]+]],
+; CHECK: lvx [[REG2:[0-9]+]],
+; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]]
+; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test2() {
+entry:
+  %__a.addr.i = alloca <16 x i8>, align 16
+  %__b.addr.i = alloca <16 x i8>, align 16
+  %0 = load <16 x i8>, <16 x i8>* @vuc, align 16
+  %1 = load <16 x i8>, <16 x i8>* @vuc, align 16
+  store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
+  store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16
+  %2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
+  %3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16
+  %4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3)
+  store <2 x i64> %4, <2 x i64>* @res_vull, align 16
+  ret void
+; CHECK-LABEL: @test2
+; CHECK: lvx [[REG1:[0-9]+]],
+; CHECK: lvx [[REG2:[0-9]+]],
+; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]]
+; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test3() {
+entry:
+  %__a.addr.i = alloca <16 x i8>, align 16
+  %0 = load <16 x i8>, <16 x i8>* @vsc, align 16
+  store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
+  %1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
+  %2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @res_vsc, align 16
+  ret void
+; CHECK-LABEL: @test3
+; CHECK: lvx [[REG1:[0-9]+]],
+; CHECK: vgbbd {{[0-9]+}}, [[REG1]]
+; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test4() {
+entry:
+  %__a.addr.i = alloca <16 x i8>, align 16
+  %0 = load <16 x i8>, <16 x i8>* @vuc, align 16
+  store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
+  %1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
+  %2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @res_vuc, align 16
+  ret void
+; CHECK-LABEL: @test4
+; CHECK: lvx [[REG1:[0-9]+]],
+; CHECK: vgbbd {{[0-9]+}}, [[REG1]]
+; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8>, <16 x i8>)
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8>)
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt

index a6e2367efcdb35664bcbf705baaf0149c6e2e529..0e3a83f6d3a538e84cf3ff355cb4b7bbd60e0647 100644 (file)
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@@ -630,6 +630,12 @@
  # CHECK: vrsqrtefp 2, 3                  
  0x10 0x40 0x19 0x4a
  
+# CHECK: vgbbd 2, 3
+0x10 0x40 0x1d 0x0c
+
+# CHECK: vbpermq 2, 5, 17
+0x10 0x45 0x8d 0x4c
+
  # CHECK: vclzb 2, 3
  0x10 0x40 0x1f 0x02
  
diff --git a/test/MC/PowerPC/ppc64-encoding-vmx.s b/test/MC/PowerPC/ppc64-encoding-vmx.s

index 51cae3fd2df9846c9a4fb5b9a7d2f96c9ba9787a..5c62d2a6c95532be380e36c9922a5930ec164dd2 100644 (file)
--- a/test/MC/PowerPC/ppc64-encoding-vmx.s
+++ b/test/MC/PowerPC/ppc64-encoding-vmx.s
@@ -686,6 +686,12 @@
  # CHECK-BE: vrsqrtefp 2, 3                  # encoding: [0x10,0x40,0x19,0x4a]
  # CHECK-LE: vrsqrtefp 2, 3                  # encoding: [0x4a,0x19,0x40,0x10]
              vrsqrtefp 2, 3
+# CHECK-BE: vgbbd 2, 3                      # encoding: [0x10,0x40,0x1d,0x0c]
+# CHECK-LE: vgbbd 2, 3                      # encoding: [0x0c,0x1d,0x40,0x10]
+            vgbbd 2, 3
+# CHECK-BE: vbpermq 2, 5, 17                # encoding: [0x10,0x45,0x8d,0x4c]
+# CHECK-LE: vbpermq 2, 5, 17                # encoding: [0x4c,0x8d,0x45,0x10]
+            vbpermq 2, 5, 17
  
  # Vector count leading zero instructions
  # CHECK-BE: vclzb 2, 3                      # encoding: [0x10,0x40,0x1f,0x02]
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Thu, 11 Jun 2015 06:21:25 +0000 (06:21 +0000)
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Thu, 11 Jun 2015 06:21:25 +0000 (06:21 +0000)
include/llvm/IR/IntrinsicsPowerPC.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrAltivec.td		patch \| blob \| history
test/CodeGen/PowerPC/builtins-ppc-p8vector.ll	[new file with mode: 0644]	patch \| blob
test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt		patch \| blob \| history
test/MC/PowerPC/ppc64-encoding-vmx.s		patch \| blob \| history