case ISD::AND:
case ISD::OR:
case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
// If the other operand is a 8-bit immediate we should fold the immediate
// instead. This reduces code size.
// e.g.
// addl 4(%esp), %eax
// The former is 2 bytes shorter. In case where the increment is 1, then
// the saving can be 4 bytes (by using incl %eax).
- if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(U->getOperand(1)))
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
if (Imm->getAPIntValue().isSignedIntN(8))
return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
}
}
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp) {
X86ISelAddressMode AM;
- if (MatchAddress(N, AM))
- return false;
- //Is it better to set AM.Segment before calling MatchAddress to
- //prevent it from adding a segment?
- if (AM.Segment.getNode())
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
+ if (MatchAddress(N, AM))
return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
MVT VT = N.getValueType();
unsigned Complexity = 0;
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = external hidden thread_local global i32
--- /dev/null
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl %gs:0, %eax} %t | count 1
+; RUN: grep {leal i@NTPOFF(%eax), %ecx} %t
+; RUN: grep {leal j@NTPOFF(%eax), %eax} %t
+
+@i = thread_local global i32 0
+@j = thread_local global i32 0
+
+define void @f(i32** %a, i32** %b) {
+entry:
+ store i32* @i, i32** %a, align 8
+ store i32* @j, i32** %b, align 8
+ ret void
+}
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = thread_local global i32 15
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = internal thread_local global i32 15
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = hidden thread_local global i32 15