-; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.32
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "armv7-eabi"
br i1 undef, label %return, label %bb
bb: ; preds = %bb, %entry
- %0 = load float* undef, align 4 ; <float> [#uses=1]
- %1 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
- %2 = insertelement <4 x float> %1, float undef, i32 3 ; <<4 x float>> [#uses=1]
- %3 = fmul <4 x float> undef, %2 ; <<4 x float>> [#uses=1]
- %4 = extractelement <4 x float> %3, i32 1 ; <float> [#uses=1]
- store float %4, float* undef, align 4
+; CHECK: vld1.16 {d16[], d17[]}
+ %0 = load i16* undef, align 2
+ %1 = insertelement <8 x i16> undef, i16 %0, i32 2
+ %2 = insertelement <8 x i16> %1, i16 undef, i32 3
+ %3 = mul <8 x i16> %2, %2
+ %4 = extractelement <8 x i16> %3, i32 2
+ store i16 %4, i16* undef, align 2
br i1 undef, label %return, label %bb
return: ; preds = %bb, %entry