Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the ARMSelectionDAGInfo class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "ARMTargetMachine.h"
14
#include "llvm/CodeGen/SelectionDAG.h"
15
#include "llvm/IR/DerivedTypes.h"
16
using namespace llvm;
17
18
#define DEBUG_TYPE "arm-selectiondag-info"
19
20
// Emit, if possible, a specialized version of the given Libcall. Typically this
21
// means selecting the appropriately aligned version, but we also convert memset
22
// of 0 into memclr.
23
SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
24
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
25
415
    SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
26
415
  const ARMSubtarget &Subtarget =
27
415
      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
28
415
  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
29
415
30
415
  // Only use a specialized AEABI function if the default version of this
31
415
  // Libcall is an AEABI function.
32
415
  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
33
281
    return SDValue();
34
134
35
134
  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
36
134
  // able to translate memset to memclr and use the value to index the function
37
134
  // name array.
38
134
  enum {
39
134
    AEABI_MEMCPY = 0,
40
134
    AEABI_MEMMOVE,
41
134
    AEABI_MEMSET,
42
134
    AEABI_MEMCLR
43
134
  } AEABILibcall;
44
134
  switch (LC) {
45
134
  case RTLIB::MEMCPY:
46
29
    AEABILibcall = AEABI_MEMCPY;
47
29
    break;
48
134
  case RTLIB::MEMMOVE:
49
48
    AEABILibcall = AEABI_MEMMOVE;
50
48
    break;
51
134
  case RTLIB::MEMSET:
52
57
    AEABILibcall = AEABI_MEMSET;
53
57
    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
54
56
      if (ConstantSrc->getZExtValue() == 0)
55
9
        AEABILibcall = AEABI_MEMCLR;
56
57
    break;
57
134
  default:
58
0
    return SDValue();
59
134
  }
60
134
61
134
  // Choose the most-aligned libcall variant that we can
62
134
  enum {
63
134
    ALIGN1 = 0,
64
134
    ALIGN4,
65
134
    ALIGN8
66
134
  } AlignVariant;
67
134
  if ((Align & 7) == 0)
68
12
    AlignVariant = ALIGN8;
69
122
  else if ((Align & 3) == 0)
70
38
    AlignVariant = ALIGN4;
71
84
  else
72
84
    AlignVariant = ALIGN1;
73
134
74
134
  TargetLowering::ArgListTy Args;
75
134
  TargetLowering::ArgListEntry Entry;
76
134
  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
77
134
  Entry.Node = Dst;
78
134
  Args.push_back(Entry);
79
134
  if (AEABILibcall == AEABI_MEMCLR) {
80
9
    Entry.Node = Size;
81
9
    Args.push_back(Entry);
82
125
  } else if (AEABILibcall == AEABI_MEMSET) {
83
48
    // Adjust parameters for memset, EABI uses format (ptr, size, value),
84
48
    // GNU library uses (ptr, value, size)
85
48
    // See RTABI section 4.3.4
86
48
    Entry.Node = Size;
87
48
    Args.push_back(Entry);
88
48
89
48
    // Extend or truncate the argument to be an i32 value for the call.
90
48
    if (Src.getValueType().bitsGT(MVT::i32))
91
0
      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
92
48
    else if (Src.getValueType().bitsLT(MVT::i32))
93
48
      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
94
48
95
48
    Entry.Node = Src;
96
48
    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
97
48
    Entry.IsSExt = false;
98
48
    Args.push_back(Entry);
99
77
  } else {
100
77
    Entry.Node = Src;
101
77
    Args.push_back(Entry);
102
77
103
77
    Entry.Node = Size;
104
77
    Args.push_back(Entry);
105
77
  }
106
134
107
134
  char const *FunctionNames[4][3] = {
108
134
    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
109
134
    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
110
134
    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
111
134
    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
112
134
  };
113
134
  TargetLowering::CallLoweringInfo CLI(DAG);
114
134
  CLI.setDebugLoc(dl)
115
134
      .setChain(Chain)
116
134
      .setLibCallee(
117
134
          TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
118
134
          DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
119
134
                                TLI->getPointerTy(DAG.getDataLayout())),
120
134
          std::move(Args))
121
134
      .setDiscardResult();
122
134
  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
123
134
124
134
  return CallResult.second;
125
134
}
126
127
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
128
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
129
    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
130
340
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
131
340
  const ARMSubtarget &Subtarget =
132
340
      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
133
340
  // Do repeated 4-byte loads and stores. To be improved.
134
340
  // This requires 4-byte alignment.
135
340
  if ((Align & 3) != 0)
136
243
    return SDValue();
137
97
  // This requires the copy size to be a constant, preferably
138
97
  // within a subtarget-specific limit.
139
97
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
140
97
  if (!ConstantSize)
141
0
    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
142
0
                                  RTLIB::MEMCPY);
143
97
  uint64_t SizeVal = ConstantSize->getZExtValue();
144
97
  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
145
63
    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
146
63
                                  RTLIB::MEMCPY);
147
34
148
34
  unsigned BytesLeft = SizeVal & 3;
149
34
  unsigned NumMemOps = SizeVal >> 2;
150
34
  unsigned EmittedNumMemOps = 0;
151
34
  EVT VT = MVT::i32;
152
34
  unsigned VTSize = 4;
153
34
  unsigned i = 0;
154
34
  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
155
34
  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 
415
:
619
;
156
34
  SDValue TFOps[6];
157
34
  SDValue Loads[6];
158
34
  uint64_t SrcOff = 0, DstOff = 0;
159
34
160
34
  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
161
34
  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
162
34
  // pressure on the general purpose registers. However this seems harder to map
163
34
  // onto the register allocator's view of the world.
164
34
165
34
  // The number of MEMCPY pseudo-instructions to emit. We use up to
166
34
  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
167
34
  // later on. This is a lower bound on the number of MEMCPY operations we must
168
34
  // emit.
169
34
  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
170
34
171
34
  // Code size optimisation: do not inline memcpy if expansion results in
172
34
  // more instructions than the libary call.
173
34
  if (NumMEMCPYs > 1 && 
Subtarget.hasMinSize()19
) {
174
1
    return SDValue();
175
1
  }
176
33
177
33
  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
178
33
179
87
  for (unsigned I = 0; I != NumMEMCPYs; 
++I54
) {
180
54
    // Evenly distribute registers among MEMCPY operations to reduce register
181
54
    // pressure.
182
54
    unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
183
54
    unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
184
54
185
54
    Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
186
54
                      DAG.getConstant(NumRegs, dl, MVT::i32));
187
54
    Src = Dst.getValue(1);
188
54
    Chain = Dst.getValue(2);
189
54
190
54
    DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
191
54
    SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
192
54
193
54
    EmittedNumMemOps = NextEmittedNumMemOps;
194
54
  }
195
33
196
33
  if (BytesLeft == 0)
197
26
    return Chain;
198
7
199
7
  // Issue loads / stores for the trailing (1 - 3) bytes.
200
18
  
auto getRemainingValueType = [](unsigned BytesLeft) 7
{
201
18
    return (BytesLeft >= 2) ? 
MVT::i164
:
MVT::i814
;
202
18
  };
203
18
  auto getRemainingSize = [](unsigned BytesLeft) {
204
18
    return (BytesLeft >= 2) ? 
24
:
114
;
205
18
  };
206
7
207
7
  unsigned BytesLeftSave = BytesLeft;
208
7
  i = 0;
209
16
  while (BytesLeft) {
210
9
    VT = getRemainingValueType(BytesLeft);
211
9
    VTSize = getRemainingSize(BytesLeft);
212
9
    Loads[i] = DAG.getLoad(VT, dl, Chain,
213
9
                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
214
9
                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
215
9
                           SrcPtrInfo.getWithOffset(SrcOff));
216
9
    TFOps[i] = Loads[i].getValue(1);
217
9
    ++i;
218
9
    SrcOff += VTSize;
219
9
    BytesLeft -= VTSize;
220
9
  }
221
7
  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
222
7
                      makeArrayRef(TFOps, i));
223
7
224
7
  i = 0;
225
7
  BytesLeft = BytesLeftSave;
226
16
  while (BytesLeft) {
227
9
    VT = getRemainingValueType(BytesLeft);
228
9
    VTSize = getRemainingSize(BytesLeft);
229
9
    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
230
9
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
231
9
                                        DAG.getConstant(DstOff, dl, MVT::i32)),
232
9
                            DstPtrInfo.getWithOffset(DstOff));
233
9
    ++i;
234
9
    DstOff += VTSize;
235
9
    BytesLeft -= VTSize;
236
9
  }
237
7
  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
238
7
                     makeArrayRef(TFOps, i));
239
7
}
240
241
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
242
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
243
    SDValue Size, unsigned Align, bool isVolatile,
244
120
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
245
120
  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
246
120
                                RTLIB::MEMMOVE);
247
120
}
248
249
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
250
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
251
    SDValue Size, unsigned Align, bool isVolatile,
252
232
    MachinePointerInfo DstPtrInfo) const {
253
232
  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
254
232
                                RTLIB::MEMSET);
255
232
}