; CHECK-LABEL: test_v8i8_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
- %val = load <8 x i8>* %newaddr, align 8
+ %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret <8 x i8> %val
}
; CHECK-LABEL: test_v8i8_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
- %val = load <8 x i8>* %addr, align 8
+ %val = load <8 x i8>, <8 x i8>* %addr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret <8 x i8> %val
}
; CHECK-LABEL: test_v4i16_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
- %val = load <4 x i16>* %newaddr, align 8
+ %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret <4 x i16> %val
}
; CHECK-LABEL: test_v4i16_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
- %val = load <4 x i16>* %addr, align 8
+ %val = load <4 x i16>, <4 x i16>* %addr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret <4 x i16> %val
}
; CHECK-LABEL: test_v2i32_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
- %val = load <2 x i32>* %newaddr, align 8
+ %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret <2 x i32> %val
}
; CHECK-LABEL: test_v2i32_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
- %val = load <2 x i32>* %addr, align 8
+ %val = load <2 x i32>, <2 x i32>* %addr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret <2 x i32> %val
}
; CHECK-LABEL: test_v2f32_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
- %val = load <2 x float>* %newaddr, align 8
+ %val = load <2 x float>, <2 x float>* %newaddr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret <2 x float> %val
}
; CHECK-LABEL: test_v2f32_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
- %val = load <2 x float>* %addr, align 8
+ %val = load <2 x float>, <2 x float>* %addr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret <2 x float> %val
}
; CHECK-LABEL: test_v1i64_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
- %val = load <1 x i64>* %newaddr, align 8
+ %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret <1 x i64> %val
}
; CHECK-LABEL: test_v1i64_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
- %val = load <1 x i64>* %addr, align 8
+ %val = load <1 x i64>, <1 x i64>* %addr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret <1 x i64> %val
}
; CHECK-LABEL: test_v16i8_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
- %val = load <16 x i8>* %newaddr, align 8
+ %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret <16 x i8> %val
}
; CHECK-LABEL: test_v16i8_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
- %val = load <16 x i8>* %addr, align 8
+ %val = load <16 x i8>, <16 x i8>* %addr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret <16 x i8> %val
}
; CHECK-LABEL: test_v8i16_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
- %val = load <8 x i16>* %newaddr, align 8
+ %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret <8 x i16> %val
}
; CHECK-LABEL: test_v8i16_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
- %val = load <8 x i16>* %addr, align 8
+ %val = load <8 x i16>, <8 x i16>* %addr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret <8 x i16> %val
}
; CHECK-LABEL: test_v4i32_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
- %val = load <4 x i32>* %newaddr, align 8
+ %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret <4 x i32> %val
}
; CHECK-LABEL: test_v4i32_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
- %val = load <4 x i32>* %addr, align 8
+ %val = load <4 x i32>, <4 x i32>* %addr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret <4 x i32> %val
}
; CHECK-LABEL: test_v4f32_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
- %val = load <4 x float>* %newaddr, align 8
+ %val = load <4 x float>, <4 x float>* %newaddr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret <4 x float> %val
}
; CHECK-LABEL: test_v4f32_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
- %val = load <4 x float>* %addr, align 8
+ %val = load <4 x float>, <4 x float>* %addr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret <4 x float> %val
}
; CHECK-LABEL: test_v2i64_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
- %val = load <2 x i64>* %newaddr, align 8
+ %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret <2 x i64> %val
}
; CHECK-LABEL: test_v2i64_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
- %val = load <2 x i64>* %addr, align 8
+ %val = load <2 x i64>, <2 x i64>* %addr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret <2 x i64> %val
}
; CHECK-LABEL: test_v2f64_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
- %val = load <2 x double>* %newaddr, align 8
+ %val = load <2 x double>, <2 x double>* %newaddr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret <2 x double> %val
}
; CHECK-LABEL: test_v2f64_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
- %val = load <2 x double>* %addr, align 8
+ %val = load <2 x double>, <2 x double>* %addr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret <2 x double> %val
}
define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
; CHECK-LABEL: test_v16i8_post_imm_ld1r:
; CHECK: ld1r.16b { v0 }, [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
; CHECK-LABEL: test_v16i8_post_reg_ld1r:
; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
; CHECK-LABEL: test_v8i8_post_imm_ld1r:
; CHECK: ld1r.8b { v0 }, [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
; CHECK-LABEL: test_v8i8_post_reg_ld1r:
; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
; CHECK-LABEL: test_v8i16_post_imm_ld1r:
; CHECK: ld1r.8h { v0 }, [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
; CHECK-LABEL: test_v8i16_post_reg_ld1r:
; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
; CHECK-LABEL: test_v4i16_post_imm_ld1r:
; CHECK: ld1r.4h { v0 }, [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
; CHECK-LABEL: test_v4i16_post_reg_ld1r:
; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
; CHECK-LABEL: test_v4i32_post_imm_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
; CHECK-LABEL: test_v4i32_post_reg_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
; CHECK-LABEL: test_v2i32_post_imm_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = getelementptr i32, i32* %bar, i64 1
define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2i32_post_reg_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = getelementptr i32, i32* %bar, i64 %inc
define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
; CHECK-LABEL: test_v2i64_post_imm_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], #8
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
%tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
%tmp4 = getelementptr i64, i64* %bar, i64 1
define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2i64_post_reg_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
%tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
%tmp4 = getelementptr i64, i64* %bar, i64 %inc
define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
; CHECK-LABEL: test_v4f32_post_imm_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
%tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
; CHECK-LABEL: test_v4f32_post_reg_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
%tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
; CHECK-LABEL: test_v2f32_post_imm_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
%tmp4 = getelementptr float, float* %bar, i64 1
define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2f32_post_reg_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
%tmp4 = getelementptr float, float* %bar, i64 %inc
define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
; CHECK-LABEL: test_v2f64_post_imm_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], #8
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
%tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
%tmp4 = getelementptr double, double* %bar, i64 1
define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2f64_post_reg_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
%tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
%tmp4 = getelementptr double, double* %bar, i64 %inc
define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
%tmp3 = getelementptr i8, i8* %bar, i64 1
store i8* %tmp3, i8** %ptr
define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
%tmp3 = getelementptr i8, i8* %bar, i64 %inc
store i8* %tmp3, i8** %ptr
define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
%tmp3 = getelementptr i8, i8* %bar, i64 1
store i8* %tmp3, i8** %ptr
define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
%tmp3 = getelementptr i8, i8* %bar, i64 %inc
store i8* %tmp3, i8** %ptr
define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
%tmp3 = getelementptr i16, i16* %bar, i64 1
store i16* %tmp3, i16** %ptr
define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
%tmp3 = getelementptr i16, i16* %bar, i64 %inc
store i16* %tmp3, i16** %ptr
define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
%tmp3 = getelementptr i16, i16* %bar, i64 1
store i16* %tmp3, i16** %ptr
define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
%tmp3 = getelementptr i16, i16* %bar, i64 %inc
store i16* %tmp3, i16** %ptr
define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
%tmp3 = getelementptr i32, i32* %bar, i64 1
store i32* %tmp3, i32** %ptr
define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
%tmp3 = getelementptr i32, i32* %bar, i64 %inc
store i32* %tmp3, i32** %ptr
define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
%tmp3 = getelementptr i32, i32* %bar, i64 1
store i32* %tmp3, i32** %ptr
define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
%tmp3 = getelementptr i32, i32* %bar, i64 %inc
store i32* %tmp3, i32** %ptr
define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], #8
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
%tmp3 = getelementptr i64, i64* %bar, i64 1
store i64* %tmp3, i64** %ptr
define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
%tmp3 = getelementptr i64, i64* %bar, i64 %inc
store i64* %tmp3, i64** %ptr
define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
%tmp3 = getelementptr float, float* %bar, i64 1
store float* %tmp3, float** %ptr
define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
%tmp3 = getelementptr float, float* %bar, i64 %inc
store float* %tmp3, float** %ptr
define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
%tmp3 = getelementptr float, float* %bar, i64 1
store float* %tmp3, float** %ptr
define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
%tmp3 = getelementptr float, float* %bar, i64 %inc
store float* %tmp3, float** %ptr
define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], #8
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
%tmp3 = getelementptr double, double* %bar, i64 1
store double* %tmp3, double** %ptr
define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
%tmp3 = getelementptr double, double* %bar, i64 %inc
store double* %tmp3, double** %ptr
ret <2 x double> %tmp2
-}
\ No newline at end of file
+}
+
+; Check for dependencies between the vector and the scalar load.
+define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2) {
+; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
+; CHECK: BB#0:
+; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
+; CHECK-NEXT: movi.2d v0, #0000000000000000
+; CHECK-NEXT: str q0, [x3]
+; CHECK-NEXT: ldr q0, [x4]
+; CHECK-NEXT: ins.s v0[1], v[[LD]][0]
+; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
+; CHECK-NEXT: str [[POST]], [x1]
+; CHECK-NEXT: ret
+ %tmp1 = load float, float* %bar
+ store <4 x float> zeroinitializer, <4 x float>* %dep_ptr_1, align 16
+ %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
+ %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
+ %tmp3 = getelementptr float, float* %bar, i64 %inc
+ store float* %tmp3, float** %ptr
+ ret <4 x float> %tmp2
+}
+
+; Make sure that we test the narrow V64 code path.
+; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
+; widened to 128-bit before the LD1LANEpost combine has the chance to run,
+; making it avoid narrow vector types.
+; One way to trick that combine into running early is to force the vector ops
+; legalizer to run. We achieve that using the ctpop.
+; PR23265
+define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
+; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
+; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
+ %tmp1 = load i16, i16* %bar
+ %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
+ %tmp3 = getelementptr i16, i16* %bar, i64 %inc
+ store i16* %tmp3, i16** %ptr
+ %dl = load <2 x i32>, <2 x i32>* %d
+ %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
+ store <2 x i32> %dr, <2 x i32>* %d
+ ret <4 x i16> %tmp2
+}
+
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)