Syntax:
"""""""
-This is an overloaded intrinsic. The loaded data is a vector of any integer or floating point data type.
+This is an overloaded intrinsic. The loaded data is a vector of any integer, floating point or pointer data type.
::
- declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
- declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
+ declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+ declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
+ ;; The data is a vector of pointers to double
+ declare <8 x double*> @llvm.masked.load.v8p0f64 (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
+ ;; The data is a vector of function pointers
+ declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
Overview:
"""""""""
Syntax:
"""""""
-This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type.
+This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type.
::
- declare void @llvm.masked.store.v8i32 (<8 x i32> <value>, <8 x i32> * <ptr>, i32 <alignment>, <8 x i1> <mask>)
- declare void @llvm.masked.store.v16f32(<16 x i32> <value>, <16 x i32>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
+ declare void @llvm.masked.store.v8i32 (<8 x i32> <value>, <8 x i32>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
+ declare void @llvm.masked.store.v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
+ ;; The data is a vector of pointers to double
+ declare void @llvm.masked.store.v8p0f64 (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
+ ;; The data is a vector of function pointers
+ declare void @llvm.masked.store.v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
Overview:
"""""""""
Syntax:
"""""""
-This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer or floating point data type gathered together into one vector.
+This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating point or pointer data type gathered together into one vector.
::
- declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
- declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
+ declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+ declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
+ declare <8 x float*> @llvm.masked.gather.v8p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1> <mask>, <8 x float*> <passthru>)
Overview:
"""""""""
Syntax:
"""""""
-This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type. Each vector element is stored in an arbitrary memory addresses. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.
+This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.
::
- declare void @llvm.masked.scatter.v8i32 (<8 x i32> <value>, <8 x i32*> <ptrs>, i32 <alignment>, <8 x i1> <mask>)
- declare void @llvm.masked.scatter.v16f32(<16 x i32> <value>, <16 x i32*> <ptrs>, i32 <alignment>, <16 x i1> <mask>)
+ declare void @llvm.masked.scatter.v8i32 (<8 x i32> <value>, <8 x i32*> <ptrs>, i32 <alignment>, <8 x i1> <mask>)
+ declare void @llvm.masked.scatter.v16f32 (<16 x float> <value>, <16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>)
+ declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1> <mask>)
Overview:
"""""""""
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
+declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
+
+; AVX512-LABEL: test23
+; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
+; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+
+define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
+ %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
+ %res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
+ ret <16 x i32*> %res
+}
+
+%mystruct = type { i16, i16, [1 x i8*] }
+
+declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
+
+; AVX512-LABEL: test24
+; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; AVX512: kshiftrw $8, %k1, %k1
+; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
+
+define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
+ %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
+ ret <16 x %mystruct*> %res
+}
ret void
}
+; void foo7 (double * __restrict__ out, double ** __restrict__ in,
+; bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+; }
+
+;AVX512-LABEL: @foo7
+;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca double**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store double** %in, double*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load double**, double*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1
+ %7 = load double*, double** %arrayidx2, align 8
+ %cmp3 = icmp ne double* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+;typedef int (*fp)();
+;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+;}
+
+;AVX512-LABEL: @foo8
+;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca i32 ()**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store i32 ()** %in, i32 ()*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load i32 ()**, i32 ()*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1
+ %7 = load i32 ()*, i32 ()** %arrayidx2, align 8
+ %cmp3 = icmp ne i32 ()* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}