From 1c49fda408ae5ba90fdaf1b274edd1119aea58b7 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 11 Dec 2012 00:36:57 +0000 Subject: [PATCH] Fix a miscompile in the DAG combiner. Previously, we would incorrectly try to reduce the width of this load, and would end up transforming: (truncate (lshr (sextload i48 as i64), 32) to i32) to (truncate (zextload i32 as i64) to i32) We lost the sext attached to the load while building the narrower i32 load, and replaced it with a zext because lshr always zext's the results. Instead, bail out of this combine when there is a conflict between a sextload and a zext narrowing. The rest of the DAG combiner still optimize the code down to the proper single instruction: movswl 6(...),%eax Which is exactly what we wanted. Previously we read past the end *and* missed the sign extension: movl 6(...), %eax git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++++-- test/CodeGen/X86/sext-load.ll | 25 ++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e774c138d28..1c28d6dcaf4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5068,11 +5068,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa(N0)) return SDValue(); + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (cast(N0)->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); + // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). - // If the load was a sextload then the result is a splat of the sign bit - // of the extended byte. This is not worth optimizing for. if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits()) return SDValue(); } diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll index c9b39d3a489..58c93229a2c 100644 --- a/test/CodeGen/X86/sext-load.ll +++ b/test/CodeGen/X86/sext-load.ll @@ -1,9 +1,30 @@ -; RUN: llc < %s -march=x86 | grep movsbl +; RUN: llc < %s -march=x86 | FileCheck %s -define i32 @foo(i32 %X) nounwind { +; When doing sign extension, use the sext-load lowering to take advantage of +; x86's sign extension during loads. +; +; CHECK: test1: +; CHECK: movsbl {{.*}}, %eax +; CHECK-NEXT: ret +define i32 @test1(i32 %X) nounwind { entry: %tmp12 = trunc i32 %X to i8 ; [#uses=1] %tmp123 = sext i8 %tmp12 to i32 ; [#uses=1] ret i32 %tmp123 } +; When using a sextload representation, ensure that the sign extension is +; preserved even when removing shifted-out low bits. +; +; CHECK: test2: +; CHECK: movswl {{.*}}, %eax +; CHECK-NEXT: ret +define i32 @test2({i16, [6 x i8]}* %this) { +entry: + %b48 = getelementptr inbounds { i16, [6 x i8] }* %this, i32 0, i32 1 + %cast = bitcast [6 x i8]* %b48 to i48* + %bf.load = load i48* %cast, align 2 + %bf.ashr = ashr i48 %bf.load, 32 + %bf.cast = trunc i48 %bf.ashr to i32 + ret i32 %bf.cast +} -- 2.34.1