// The maximum alignment is equal to the memory size being referenced.
unsigned LSNAlign = LSN->getAlignment();
unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
- if (LSNAlign > MemSize && MemSize > 1)
+ if (LSNAlign >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
define <2 x float> @vld1dupf(float* %A) nounwind {
;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0]
+;CHECK: vld1.32 {d16[]}, [r0, :32]
%tmp0 = load float* %A
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
define <4 x float> @vld1dupQf(float* %A) nounwind {
;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0]
+;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
%tmp0 = load float* %A
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
ret <2 x i32> %tmp3
}
+define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32a32:
+;Check the alignment value. Legal values are none or :32.
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = load i32* %A, align 4
+ %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+ ret <2 x i32> %tmp3
+}
+
define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0]
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
%tmp1 = load <2 x float>* %B
%tmp2 = load float* %A, align 4
%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0]
+;CHECK: vld1.32 {d16[0]}, [r0, :32]
%tmp1 = load <4 x float>* %B
%tmp2 = load float* %A
%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0]
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
%tmp1 = load <2 x float>* %B
%tmp2 = extractelement <2 x float> %tmp1, i32 1
store float %tmp2, float* %A