Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the X86SelectionDAGInfo class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "X86SelectionDAGInfo.h"
14
#include "X86ISelLowering.h"
15
#include "X86InstrInfo.h"
16
#include "X86RegisterInfo.h"
17
#include "X86Subtarget.h"
18
#include "llvm/CodeGen/SelectionDAG.h"
19
#include "llvm/CodeGen/TargetLowering.h"
20
#include "llvm/IR/DerivedTypes.h"
21
22
using namespace llvm;
23
24
#define DEBUG_TYPE "x86-selectiondag-info"
25
26
bool X86SelectionDAGInfo::isBaseRegConflictPossible(
27
380
    SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
28
380
  // We cannot use TRI->hasBasePointer() until *after* we select all basic
29
380
  // blocks.  Legalization may introduce new stack temporaries with large
30
380
  // alignment requirements.  Fall back to generic code if there are any
31
380
  // dynamic stack adjustments (hopefully rare) and the base pointer would
32
380
  // conflict if we had to use it.
33
380
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
34
380
  if (!MFI.hasVarSizedObjects() && 
!MFI.hasOpaqueSPAdjustment()373
)
35
372
    return false;
36
8
37
8
  const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
38
8
      DAG.getSubtarget().getRegisterInfo());
39
8
  unsigned BaseReg = TRI->getBaseRegister();
40
8
  for (unsigned R : ClobberSet)
41
43
    if (BaseReg == R)
42
5
      return true;
43
8
  
return false3
;
44
8
}
45
46
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
47
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
48
    SDValue Size, unsigned Align, bool isVolatile,
49
106
    MachinePointerInfo DstPtrInfo) const {
50
106
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
51
106
  const X86Subtarget &Subtarget =
52
106
      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
53
106
54
#ifndef NDEBUG
55
  // If the base register might conflict with our physical registers, bail out.
56
  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
57
                                  X86::ECX, X86::EAX, X86::EDI};
58
  assert(!isBaseRegConflictPossible(DAG, ClobberSet));
59
#endif
60
61
106
  // If to a segment-relative address space, use the default lowering.
62
106
  if (DstPtrInfo.getAddrSpace() >= 256)
63
0
    return SDValue();
64
106
65
106
  // If not DWORD aligned or size is more than the threshold, call the library.
66
106
  // The libc version is likely to be faster for these cases. It can use the
67
106
  // address value and run time information about the CPU.
68
106
  if ((Align & 3) != 0 || 
!ConstantSize37
||
69
106
      
ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()36
) {
70
103
    // Check to see if there is a specialized entry-point for memory zeroing.
71
103
    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
72
103
73
103
    if (const char *bzeroName = (ValC && ValC->isNullValue())
74
29
        ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
75
29
        : nullptr) {
76
29
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
77
29
      EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
78
29
      Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
79
29
      TargetLowering::ArgListTy Args;
80
29
      TargetLowering::ArgListEntry Entry;
81
29
      Entry.Node = Dst;
82
29
      Entry.Ty = IntPtrTy;
83
29
      Args.push_back(Entry);
84
29
      Entry.Node = Size;
85
29
      Args.push_back(Entry);
86
29
87
29
      TargetLowering::CallLoweringInfo CLI(DAG);
88
29
      CLI.setDebugLoc(dl)
89
29
          .setChain(Chain)
90
29
          .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
91
29
                        DAG.getExternalSymbol(bzeroName, IntPtr),
92
29
                        std::move(Args))
93
29
          .setDiscardResult();
94
29
95
29
      std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
96
29
      return CallResult.second;
97
29
    }
98
74
99
74
    // Otherwise have the target-independent code call memset.
100
74
    return SDValue();
101
74
  }
102
3
103
3
  uint64_t SizeVal = ConstantSize->getZExtValue();
104
3
  SDValue InFlag;
105
3
  EVT AVT;
106
3
  SDValue Count;
107
3
  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
108
3
  unsigned BytesLeft = 0;
109
3
  if (ValC) {
110
3
    unsigned ValReg;
111
3
    uint64_t Val = ValC->getZExtValue() & 255;
112
3
113
3
    // If the value is a constant, then we can potentially use larger sets.
114
3
    switch (Align & 3) {
115
3
    case 2:   // WORD aligned
116
0
      AVT = MVT::i16;
117
0
      ValReg = X86::AX;
118
0
      Val = (Val << 8) | Val;
119
0
      break;
120
3
    case 0:  // DWORD aligned
121
3
      AVT = MVT::i32;
122
3
      ValReg = X86::EAX;
123
3
      Val = (Val << 8)  | Val;
124
3
      Val = (Val << 16) | Val;
125
3
      if (Subtarget.is64Bit() && 
((Align & 0x7) == 0)2
) { // QWORD aligned
126
0
        AVT = MVT::i64;
127
0
        ValReg = X86::RAX;
128
0
        Val = (Val << 32) | Val;
129
0
      }
130
3
      break;
131
3
    default:  // Byte aligned
132
0
      AVT = MVT::i8;
133
0
      ValReg = X86::AL;
134
0
      Count = DAG.getIntPtrConstant(SizeVal, dl);
135
0
      break;
136
3
    }
137
3
138
3
    if (AVT.bitsGT(MVT::i8)) {
139
3
      unsigned UBytes = AVT.getSizeInBits() / 8;
140
3
      Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
141
3
      BytesLeft = SizeVal % UBytes;
142
3
    }
143
3
144
3
    Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
145
3
                             InFlag);
146
3
    InFlag = Chain.getValue(1);
147
3
  } else {
148
0
    AVT = MVT::i8;
149
0
    Count  = DAG.getIntPtrConstant(SizeVal, dl);
150
0
    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
151
0
    InFlag = Chain.getValue(1);
152
0
  }
153
3
154
3
  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
155
3
  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? 
X86::RCX1
:
X86::ECX2
,
156
3
                           Count, InFlag);
157
3
  InFlag = Chain.getValue(1);
158
3
  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? 
X86::RDI1
:
X86::EDI2
,
159
3
                           Dst, InFlag);
160
3
  InFlag = Chain.getValue(1);
161
3
162
3
  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
163
3
  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
164
3
  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
165
3
166
3
  if (BytesLeft) {
167
0
    // Handle the last 1 - 7 bytes.
168
0
    unsigned Offset = SizeVal - BytesLeft;
169
0
    EVT AddrVT = Dst.getValueType();
170
0
    EVT SizeVT = Size.getValueType();
171
0
172
0
    Chain = DAG.getMemset(Chain, dl,
173
0
                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
174
0
                                      DAG.getConstant(Offset, dl, AddrVT)),
175
0
                          Val,
176
0
                          DAG.getConstant(BytesLeft, dl, SizeVT),
177
0
                          Align, isVolatile, false,
178
0
                          DstPtrInfo.getWithOffset(Offset));
179
0
  }
180
3
181
3
  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
182
3
  return Chain;
183
3
}
184
185
/// Emit a single REP MOVS{B,W,D,Q} instruction.
186
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
187
                           const SDLoc &dl, SDValue Chain, SDValue Dst,
188
250
                           SDValue Src, SDValue Size, MVT AVT) {
189
250
  const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
190
250
  const unsigned CX = Use64BitRegs ? 
X86::RCX224
:
X86::ECX26
;
191
250
  const unsigned DI = Use64BitRegs ? 
X86::RDI224
:
X86::EDI26
;
192
250
  const unsigned SI = Use64BitRegs ? 
X86::RSI224
:
X86::ESI26
;
193
250
194
250
  SDValue InFlag;
195
250
  Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
196
250
  InFlag = Chain.getValue(1);
197
250
  Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
198
250
  InFlag = Chain.getValue(1);
199
250
  Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InFlag);
200
250
  InFlag = Chain.getValue(1);
201
250
202
250
  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
203
250
  SDValue Ops[] = {Chain, DAG.getValueType(AVT), InFlag};
204
250
  return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
205
250
}
206
207
/// Emit a single REP MOVSB instruction for a particular constant size.
208
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
209
                            const SDLoc &dl, SDValue Chain, SDValue Dst,
210
16
                            SDValue Src, uint64_t Size) {
211
16
  return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
212
16
                     DAG.getIntPtrConstant(Size, dl), MVT::i8);
213
16
}
214
215
/// Returns the best type to use with repmovs depending on alignment.
216
static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
217
234
                                 uint64_t Align) {
218
234
  assert((Align != 0) && "Align is normalized");
219
234
  assert(isPowerOf2_64(Align) && "Align is a power of 2");
220
234
  switch (Align) {
221
234
  case 1:
222
1
    return MVT::i8;
223
234
  case 2:
224
0
    return MVT::i16;
225
234
  case 4:
226
18
    return MVT::i32;
227
234
  default:
228
215
    return Subtarget.is64Bit() ? 
MVT::i64210
:
MVT::i325
;
229
234
  }
230
234
}
231
232
/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
233
/// a constant size memory copy. In some cases where we know REP MOVS is
234
/// inefficient we return an empty SDValue so the calling code can either
235
/// generate a load/store sequence or call the runtime memcpy function.
236
static SDValue emitConstantSizeRepmov(
237
    SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
238
    SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
239
    unsigned Align, bool isVolatile, bool AlwaysInline,
240
304
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
241
304
242
304
  /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
243
304
  /// efficient.
244
304
  if (!AlwaysInline && 
Size > Subtarget.getMaxInlineSizeThreshold()253
)
245
36
    return SDValue();
246
268
247
268
  /// If we have enhanced repmovs we use it.
248
268
  if (Subtarget.hasERMSB())
249
12
    return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
250
256
251
256
  assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
252
256
  /// We assume runtime memcpy will do a better job for unaligned copies when
253
256
  /// ERMS is not present.
254
256
  if (!AlwaysInline && 
(Align & 3) != 0217
)
255
22
    return SDValue();
256
234
257
234
  const MVT BlockType = getOptimalRepmovsType(Subtarget, Align);
258
234
  const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
259
234
  const uint64_t BlockCount = Size / BlockBytes;
260
234
  const uint64_t BytesLeft = Size % BlockBytes;
261
234
  SDValue RepMovs =
262
234
      emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
263
234
                  DAG.getIntPtrConstant(BlockCount, dl), BlockType);
264
234
265
234
  /// RepMov can process the whole length.
266
234
  if (BytesLeft == 0)
267
180
    return RepMovs;
268
54
269
54
  assert(BytesLeft && "We have leftover at this point");
270
54
271
54
  /// In case we optimize for size we use repmovsb even if it's less efficient
272
54
  /// so we can save the loads/stores of the leftover.
273
54
  if (DAG.getMachineFunction().getFunction().hasMinSize())
274
4
    return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
275
50
276
50
  // Handle the last 1 - 7 bytes.
277
50
  SmallVector<SDValue, 4> Results;
278
50
  Results.push_back(RepMovs);
279
50
  unsigned Offset = Size - BytesLeft;
280
50
  EVT DstVT = Dst.getValueType();
281
50
  EVT SrcVT = Src.getValueType();
282
50
  Results.push_back(DAG.getMemcpy(
283
50
      Chain, dl,
284
50
      DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
285
50
      DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
286
50
      DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile,
287
50
      /*AlwaysInline*/ true, /*isTailCall*/ false,
288
50
      DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
289
50
  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
290
50
}
291
292
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
293
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
294
    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
295
380
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
296
380
  // If to a segment-relative address space, use the default lowering.
297
380
  if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
298
0
    return SDValue();
299
380
300
380
  // If the base registers conflict with our physical registers, use the default
301
380
  // lowering.
302
380
  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
303
380
                                  X86::ECX, X86::ESI, X86::EDI};
304
380
  if (isBaseRegConflictPossible(DAG, ClobberSet))
305
5
    return SDValue();
306
375
307
375
  const X86Subtarget &Subtarget =
308
375
      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
309
375
310
375
  /// Handle constant sizes,
311
375
  if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
312
304
    return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
313
304
                                  ConstantSize->getZExtValue(),
314
304
                                  Size.getValueType(), Align, isVolatile,
315
304
                                  AlwaysInline, DstPtrInfo, SrcPtrInfo);
316
71
317
71
  return SDValue();
318
71
}