Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the ARMSelectionDAGInfo class.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "ARMTargetMachine.h"
15
#include "llvm/CodeGen/SelectionDAG.h"
16
#include "llvm/IR/DerivedTypes.h"
17
using namespace llvm;
18
19
#define DEBUG_TYPE "arm-selectiondag-info"
20
21
// Emit, if possible, a specialized version of the given Libcall. Typically this
22
// means selecting the appropriately aligned version, but we also convert memset
23
// of 0 into memclr.
24
SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
25
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
26
379
    SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
27
379
  const ARMSubtarget &Subtarget =
28
379
      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
29
379
  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
30
379
31
379
  // Only use a specialized AEABI function if the default version of this
32
379
  // Libcall is an AEABI function.
33
379
  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
34
245
    return SDValue();
35
134
36
134
  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
37
134
  // able to translate memset to memclr and use the value to index the function
38
134
  // name array.
39
134
  enum {
40
134
    AEABI_MEMCPY = 0,
41
134
    AEABI_MEMMOVE,
42
134
    AEABI_MEMSET,
43
134
    AEABI_MEMCLR
44
134
  } AEABILibcall;
45
134
  switch (LC) {
46
29
  case RTLIB::MEMCPY:
47
29
    AEABILibcall = AEABI_MEMCPY;
48
29
    break;
49
48
  case RTLIB::MEMMOVE:
50
48
    AEABILibcall = AEABI_MEMMOVE;
51
48
    break;
52
57
  case RTLIB::MEMSET: 
53
57
    AEABILibcall = AEABI_MEMSET;
54
57
    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
55
56
      
if (56
ConstantSrc->getZExtValue() == 056
)
56
9
        AEABILibcall = AEABI_MEMCLR;
57
57
    break;
58
0
  default:
59
0
    return SDValue();
60
134
  }
61
134
62
134
  // Choose the most-aligned libcall variant that we can
63
134
  enum {
64
134
    ALIGN1 = 0,
65
134
    ALIGN4,
66
134
    ALIGN8
67
134
  } AlignVariant;
68
134
  if ((Align & 7) == 0)
69
12
    AlignVariant = ALIGN8;
70
122
  else 
if (122
(Align & 3) == 0122
)
71
38
    AlignVariant = ALIGN4;
72
122
  else
73
84
    AlignVariant = ALIGN1;
74
134
75
134
  TargetLowering::ArgListTy Args;
76
134
  TargetLowering::ArgListEntry Entry;
77
134
  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
78
134
  Entry.Node = Dst;
79
134
  Args.push_back(Entry);
80
134
  if (
AEABILibcall == AEABI_MEMCLR134
) {
81
9
    Entry.Node = Size;
82
9
    Args.push_back(Entry);
83
134
  } else 
if (125
AEABILibcall == AEABI_MEMSET125
) {
84
48
    // Adjust parameters for memset, EABI uses format (ptr, size, value),
85
48
    // GNU library uses (ptr, value, size)
86
48
    // See RTABI section 4.3.4
87
48
    Entry.Node = Size;
88
48
    Args.push_back(Entry);
89
48
90
48
    // Extend or truncate the argument to be an i32 value for the call.
91
48
    if (Src.getValueType().bitsGT(MVT::i32))
92
0
      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
93
48
    else 
if (48
Src.getValueType().bitsLT(MVT::i32)48
)
94
48
      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
95
48
96
48
    Entry.Node = Src; 
97
48
    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
98
48
    Entry.IsSExt = false;
99
48
    Args.push_back(Entry);
100
125
  } else {
101
77
    Entry.Node = Src;
102
77
    Args.push_back(Entry);
103
77
    
104
77
    Entry.Node = Size;
105
77
    Args.push_back(Entry);
106
77
  }
107
379
108
379
  char const *FunctionNames[4][3] = {
109
379
    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
110
379
    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
111
379
    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
112
379
    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
113
379
  };
114
379
  TargetLowering::CallLoweringInfo CLI(DAG);
115
379
  CLI.setDebugLoc(dl)
116
379
      .setChain(Chain)
117
379
      .setLibCallee(
118
379
          TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
119
379
          DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
120
379
                                TLI->getPointerTy(DAG.getDataLayout())),
121
379
          std::move(Args))
122
379
      .setDiscardResult();
123
379
  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
124
379
  
125
379
  return CallResult.second;
126
379
}
127
128
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
129
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
130
    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
131
364
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
132
364
  const ARMSubtarget &Subtarget =
133
364
      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
134
364
  // Do repeated 4-byte loads and stores. To be improved.
135
364
  // This requires 4-byte alignment.
136
364
  if ((Align & 3) != 0)
137
265
    return SDValue();
138
99
  // This requires the copy size to be a constant, preferably
139
99
  // within a subtarget-specific limit.
140
99
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
141
99
  if (!ConstantSize)
142
0
    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
143
0
                                  RTLIB::MEMCPY);
144
99
  uint64_t SizeVal = ConstantSize->getZExtValue();
145
99
  if (
!AlwaysInline && 99
SizeVal > Subtarget.getMaxInlineSizeThreshold()99
)
146
63
    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
147
63
                                  RTLIB::MEMCPY);
148
36
149
36
  unsigned BytesLeft = SizeVal & 3;
150
36
  unsigned NumMemOps = SizeVal >> 2;
151
36
  unsigned EmittedNumMemOps = 0;
152
36
  EVT VT = MVT::i32;
153
36
  unsigned VTSize = 4;
154
36
  unsigned i = 0;
155
36
  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
156
36
  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 
415
:
621
;
157
36
  SDValue TFOps[6];
158
36
  SDValue Loads[6];
159
36
  uint64_t SrcOff = 0, DstOff = 0;
160
36
161
36
  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
162
36
  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
163
36
  // pressure on the general purpose registers. However this seems harder to map
164
36
  // onto the register allocator's view of the world.
165
36
166
36
  // The number of MEMCPY pseudo-instructions to emit. We use up to
167
36
  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
168
36
  // later on. This is a lower bound on the number of MEMCPY operations we must
169
36
  // emit.
170
36
  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
171
36
172
36
  // Code size optimisation: do not inline memcpy if expansion results in
173
36
  // more instructions than the libary call.
174
36
  if (
NumMEMCPYs > 1 && 36
DAG.getMachineFunction().getFunction()->optForMinSize()19
) {
175
1
    return SDValue();
176
1
  }
177
35
178
35
  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
179
35
180
91
  for (unsigned I = 0; 
I != NumMEMCPYs91
;
++I56
) {
181
56
    // Evenly distribute registers among MEMCPY operations to reduce register
182
56
    // pressure.
183
56
    unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
184
56
    unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
185
56
186
56
    Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
187
56
                      DAG.getConstant(NumRegs, dl, MVT::i32));
188
56
    Src = Dst.getValue(1);
189
56
    Chain = Dst.getValue(2);
190
56
191
56
    DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
192
56
    SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
193
56
194
56
    EmittedNumMemOps = NextEmittedNumMemOps;
195
56
  }
196
35
197
35
  if (BytesLeft == 0)
198
26
    return Chain;
199
9
200
9
  // Issue loads / stores for the trailing (1 - 3) bytes.
201
9
  
auto getRemainingValueType = [](unsigned BytesLeft) 9
{
202
26
    return (BytesLeft >= 2) ? 
MVT::i168
:
MVT::i818
;
203
26
  };
204
26
  auto getRemainingSize = [](unsigned BytesLeft) {
205
26
    return (BytesLeft >= 2) ? 
28
:
118
;
206
26
  };
207
9
208
9
  unsigned BytesLeftSave = BytesLeft;
209
9
  i = 0;
210
22
  while (
BytesLeft22
) {
211
13
    VT = getRemainingValueType(BytesLeft);
212
13
    VTSize = getRemainingSize(BytesLeft);
213
13
    Loads[i] = DAG.getLoad(VT, dl, Chain,
214
13
                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215
13
                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
216
13
                           SrcPtrInfo.getWithOffset(SrcOff));
217
13
    TFOps[i] = Loads[i].getValue(1);
218
13
    ++i;
219
13
    SrcOff += VTSize;
220
13
    BytesLeft -= VTSize;
221
13
  }
222
9
  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
223
9
                      makeArrayRef(TFOps, i));
224
9
225
9
  i = 0;
226
9
  BytesLeft = BytesLeftSave;
227
22
  while (
BytesLeft22
) {
228
13
    VT = getRemainingValueType(BytesLeft);
229
13
    VTSize = getRemainingSize(BytesLeft);
230
13
    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
231
13
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
232
13
                                        DAG.getConstant(DstOff, dl, MVT::i32)),
233
13
                            DstPtrInfo.getWithOffset(DstOff));
234
13
    ++i;
235
13
    DstOff += VTSize;
236
13
    BytesLeft -= VTSize;
237
13
  }
238
364
  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
239
364
                     makeArrayRef(TFOps, i));
240
364
}
241
242
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
243
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
244
    SDValue Size, unsigned Align, bool isVolatile,
245
120
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
246
120
  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
247
120
                                RTLIB::MEMMOVE);
248
120
}
249
250
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
251
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
252
    SDValue Size, unsigned Align, bool isVolatile,
253
196
    MachinePointerInfo DstPtrInfo) const {
254
196
  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
255
196
                                RTLIB::MEMSET);
256
196
}