Masked Gather and Scatter Intrinsics.
authorElena Demikhovsky <elena.demikhovsky@intel.com>
Sun, 8 Feb 2015 08:27:19 +0000 (08:27 +0000)
committerElena Demikhovsky <elena.demikhovsky@intel.com>
Sun, 8 Feb 2015 08:27:19 +0000 (08:27 +0000)
Gather and Scatter are new introduced intrinsics, comming after recently implemented masked load and store.
This is the first patch for Gather and Scatter intrinsics. It includes only the syntax, parsing and verification.

Gather and Scatter intrinsics allow to perform multiple memory accesses (read/write) in one vector instruction.
The intrinsics are not target specific and will have the following syntax:
Gather:
declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> <vector of ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x i32> <passthru>)
declare <8 x float> @llvm.masked.gather.v8f32(<8 x float*><vector of ptrs>, i32 <alignment>, <8 x i1> <mask>, <8 x float><passthru>)

Scatter:
declare void @llvm.masked.scatter.v8i32(<8 x i32><vector value to be stored> , <8 x i32*><vector of ptrs> , i32 <alignment>, <8 x i1> <mask>)
declare void @llvm.masked.scatter.v16i32(<16 x i32> <vector value to be stored> , <16 x i32*> <vector of ptrs>, i32 <alignment>, <16 x i1><mask> )

Vector of ptrs - a set of source/destination addresses, to load/store the value.
Mask - switches on/off vector lanes to prevent memory access for switched-off lanes
vector of ptrs, value and mask should have the same vector width.

These are code examples where gather / scatter should be used and will allow function vectorization
;void foo1(int * restrict A, int * restrict B, int * restrict C) {
; for (int i=0; i<SIZE; i++) {
; A[i] = B[C[i]];
; }
;}

;void foo3(int * restrict A, int * restrict B) {
; for (int i=0; i<SIZE; i++) {
; A[B[i]] = i+5;
; }
;}

Tests will come in the following patches, with CodeGen and Vectorizer.

http://reviews.llvm.org/D7433

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228521 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/Intrinsics.h
include/llvm/IR/Intrinsics.td
lib/IR/Function.cpp
lib/IR/Verifier.cpp
utils/TableGen/IntrinsicEmitter.cpp

index addd78cea4ed6c094758a9a28464159dbf51dbc4..a66bd2cd7d99352a4cf7ad8b1ac4dbdb2cb9aac6 100644 (file)
@@ -77,7 +77,7 @@ namespace Intrinsic {
       Void, VarArg, MMX, Metadata, Half, Float, Double,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
-      SameVecWidthArgument, PtrToArgument
+      SameVecWidthArgument, PtrToArgument, VecOfPtrsToElt
     } Kind;
 
     union {
@@ -99,13 +99,15 @@ namespace Intrinsic {
     unsigned getArgumentNumber() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
-             Kind == SameVecWidthArgument || Kind == PtrToArgument);
+             Kind == SameVecWidthArgument || Kind == PtrToArgument ||
+             Kind == VecOfPtrsToElt);
       return Argument_Info >> 3;
     }
     ArgKind getArgumentKind() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
-             Kind == SameVecWidthArgument || Kind == PtrToArgument);
+             Kind == SameVecWidthArgument || Kind == PtrToArgument ||
+             Kind == VecOfPtrsToElt);
       return (ArgKind)(Argument_Info & 7);
     }
 
index 5a304db09f252a899f28fb0322572a50ec76e3e3..1326a8330d501544ceeb283ad9bc9b50527b3333 100644 (file)
@@ -117,6 +117,7 @@ class LLVMVectorSameWidth<int num, LLVMType elty>
   ValueType ElTy = elty.VT;
 }
 class LLVMPointerTo<int num> : LLVMMatchType<num>;
+class LLVMVectorOfPointersToElt<int num> : LLVMMatchType<num>;
 
 // Match the type of another intrinsic parameter that is expected to be a
 // vector type, but change the element count to be half as many
@@ -584,6 +585,19 @@ def int_masked_load  : Intrinsic<[llvm_anyvector_ty],
                                  [LLVMPointerTo<0>, llvm_i32_ty,
                                   LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
                                  [IntrReadArgMem]>;
+
+def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
+                                 [LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
+                                  LLVMVectorSameWidth<0, llvm_i1_ty>,
+                                  LLVMMatchType<0>],
+                                 [IntrReadArgMem]>;
+
+def int_masked_scatter: Intrinsic<[],
+                                  [llvm_anyvector_ty,
+                                   LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
+                                   LLVMVectorSameWidth<0, llvm_i1_ty>],
+                                  [IntrReadWriteArgMem]>;
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
index 070513edef22d75670934d516c2f646ad3b114b1..fe44f17d6862875047047eafbdf7ee8ebbdb8549 100644 (file)
@@ -542,7 +542,8 @@ enum IIT_Info {
   IIT_VARARG = 28,
   IIT_HALF_VEC_ARG = 29,
   IIT_SAME_VEC_WIDTH_ARG = 30,
-  IIT_PTR_TO_ARG = 31
+  IIT_PTR_TO_ARG = 31,
+  IIT_VEC_OF_PTRS_TO_ELT = 32
 };
 
 
@@ -662,6 +663,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
                                              ArgInfo));
     return;
   }
+  case IIT_VEC_OF_PTRS_TO_ELT: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt,
+                                             ArgInfo));
+    return;
+  }
   case IIT_EMPTYSTRUCT:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
     return;
@@ -781,6 +788,15 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
     Type *Ty = Tys[D.getArgumentNumber()];
     return PointerType::getUnqual(Ty);
   }
+  case IITDescriptor::VecOfPtrsToElt: {
+    Type *Ty = Tys[D.getArgumentNumber()];
+    VectorType *VTy = dyn_cast<VectorType>(Ty);
+    if (!VTy)
+      llvm_unreachable("Expected an argument of Vector Type");
+    Type *EltTy = VTy->getVectorElementType();
+    return VectorType::get(PointerType::getUnqual(EltTy),
+                           VTy->getNumElements());
+  }
  }
   llvm_unreachable("unhandled");
 }
index 29eddfe25410687782c4b81ab060aab0ba3d0779..a7b705410d752166298c80a871ab3cc4eec361d9 100644 (file)
@@ -2555,6 +2555,23 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
     PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
     return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
   }
+  case IITDescriptor::VecOfPtrsToElt: {
+    if (D.getArgumentNumber() >= ArgTys.size())
+      return true;
+    VectorType * ReferenceType =
+      dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
+    VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty);
+    if (!ThisArgVecTy || !ReferenceType || 
+        (ReferenceType->getVectorNumElements() !=
+         ThisArgVecTy->getVectorNumElements()))
+      return true;
+    PointerType *ThisArgEltTy =
+      dyn_cast<PointerType>(ThisArgVecTy->getVectorElementType());
+    if (!ThisArgEltTy)
+      return true;
+    return (!(ThisArgEltTy->getElementType() ==
+            ReferenceType->getVectorElementType()));
+  }
   }
   llvm_unreachable("unhandled");
 }
index 87f9c90dc4a759a24564b41f9cc373d289fc8756..e533d89326c52140796f9fbd1576304a6af0dc22 100644 (file)
@@ -259,7 +259,8 @@ enum IIT_Info {
   IIT_VARARG = 28,
   IIT_HALF_VEC_ARG = 29,
   IIT_SAME_VEC_WIDTH_ARG = 30,
-  IIT_PTR_TO_ARG = 31
+  IIT_PTR_TO_ARG = 31,
+  IIT_VEC_OF_PTRS_TO_ELT = 32
 };
 
 
@@ -314,9 +315,10 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
       EncodeFixedValueType(VT, Sig);
       return;
     }
-    else if (R->isSubClassOf("LLVMPointerTo")) {
+    else if (R->isSubClassOf("LLVMPointerTo"))
       Sig.push_back(IIT_PTR_TO_ARG);
-    }
+    else if (R->isSubClassOf("LLVMVectorOfPointersToElt"))
+      Sig.push_back(IIT_VEC_OF_PTRS_TO_ELT);
     else
       Sig.push_back(IIT_ARG);
     return Sig.push_back((Number << 3) | ArgCodes[Number]);