/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the ARMSelectionDAGInfo class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "ARMTargetMachine.h" |
14 | | #include "llvm/CodeGen/SelectionDAG.h" |
15 | | #include "llvm/IR/DerivedTypes.h" |
16 | | using namespace llvm; |
17 | | |
18 | | #define DEBUG_TYPE "arm-selectiondag-info" |
19 | | |
20 | | // Emit, if possible, a specialized version of the given Libcall. Typically this |
21 | | // means selecting the appropriately aligned version, but we also convert memset |
22 | | // of 0 into memclr. |
23 | | SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( |
24 | | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
25 | 415 | SDValue Size, unsigned Align, RTLIB::Libcall LC) const { |
26 | 415 | const ARMSubtarget &Subtarget = |
27 | 415 | DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); |
28 | 415 | const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); |
29 | 415 | |
30 | 415 | // Only use a specialized AEABI function if the default version of this |
31 | 415 | // Libcall is an AEABI function. |
32 | 415 | if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) |
33 | 281 | return SDValue(); |
34 | 134 | |
35 | 134 | // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be |
36 | 134 | // able to translate memset to memclr and use the value to index the function |
37 | 134 | // name array. |
38 | 134 | enum { |
39 | 134 | AEABI_MEMCPY = 0, |
40 | 134 | AEABI_MEMMOVE, |
41 | 134 | AEABI_MEMSET, |
42 | 134 | AEABI_MEMCLR |
43 | 134 | } AEABILibcall; |
44 | 134 | switch (LC) { |
45 | 134 | case RTLIB::MEMCPY: |
46 | 29 | AEABILibcall = AEABI_MEMCPY; |
47 | 29 | break; |
48 | 134 | case RTLIB::MEMMOVE: |
49 | 48 | AEABILibcall = AEABI_MEMMOVE; |
50 | 48 | break; |
51 | 134 | case RTLIB::MEMSET: |
52 | 57 | AEABILibcall = AEABI_MEMSET; |
53 | 57 | if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) |
54 | 56 | if (ConstantSrc->getZExtValue() == 0) |
55 | 9 | AEABILibcall = AEABI_MEMCLR; |
56 | 57 | break; |
57 | 134 | default: |
58 | 0 | return SDValue(); |
59 | 134 | } |
60 | 134 | |
61 | 134 | // Choose the most-aligned libcall variant that we can |
62 | 134 | enum { |
63 | 134 | ALIGN1 = 0, |
64 | 134 | ALIGN4, |
65 | 134 | ALIGN8 |
66 | 134 | } AlignVariant; |
67 | 134 | if ((Align & 7) == 0) |
68 | 12 | AlignVariant = ALIGN8; |
69 | 122 | else if ((Align & 3) == 0) |
70 | 38 | AlignVariant = ALIGN4; |
71 | 84 | else |
72 | 84 | AlignVariant = ALIGN1; |
73 | 134 | |
74 | 134 | TargetLowering::ArgListTy Args; |
75 | 134 | TargetLowering::ArgListEntry Entry; |
76 | 134 | Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); |
77 | 134 | Entry.Node = Dst; |
78 | 134 | Args.push_back(Entry); |
79 | 134 | if (AEABILibcall == AEABI_MEMCLR) { |
80 | 9 | Entry.Node = Size; |
81 | 9 | Args.push_back(Entry); |
82 | 125 | } else if (AEABILibcall == AEABI_MEMSET) { |
83 | 48 | // Adjust parameters for memset, EABI uses format (ptr, size, value), |
84 | 48 | // GNU library uses (ptr, value, size) |
85 | 48 | // See RTABI section 4.3.4 |
86 | 48 | Entry.Node = Size; |
87 | 48 | Args.push_back(Entry); |
88 | 48 | |
89 | 48 | // Extend or truncate the argument to be an i32 value for the call. |
90 | 48 | if (Src.getValueType().bitsGT(MVT::i32)) |
91 | 0 | Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); |
92 | 48 | else if (Src.getValueType().bitsLT(MVT::i32)) |
93 | 48 | Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); |
94 | 48 | |
95 | 48 | Entry.Node = Src; |
96 | 48 | Entry.Ty = Type::getInt32Ty(*DAG.getContext()); |
97 | 48 | Entry.IsSExt = false; |
98 | 48 | Args.push_back(Entry); |
99 | 77 | } else { |
100 | 77 | Entry.Node = Src; |
101 | 77 | Args.push_back(Entry); |
102 | 77 | |
103 | 77 | Entry.Node = Size; |
104 | 77 | Args.push_back(Entry); |
105 | 77 | } |
106 | 134 | |
107 | 134 | char const *FunctionNames[4][3] = { |
108 | 134 | { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, |
109 | 134 | { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, |
110 | 134 | { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, |
111 | 134 | { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } |
112 | 134 | }; |
113 | 134 | TargetLowering::CallLoweringInfo CLI(DAG); |
114 | 134 | CLI.setDebugLoc(dl) |
115 | 134 | .setChain(Chain) |
116 | 134 | .setLibCallee( |
117 | 134 | TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), |
118 | 134 | DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], |
119 | 134 | TLI->getPointerTy(DAG.getDataLayout())), |
120 | 134 | std::move(Args)) |
121 | 134 | .setDiscardResult(); |
122 | 134 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
123 | 134 | |
124 | 134 | return CallResult.second; |
125 | 134 | } |
126 | | |
127 | | SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( |
128 | | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
129 | | SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, |
130 | 340 | MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { |
131 | 340 | const ARMSubtarget &Subtarget = |
132 | 340 | DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); |
133 | 340 | // Do repeated 4-byte loads and stores. To be improved. |
134 | 340 | // This requires 4-byte alignment. |
135 | 340 | if ((Align & 3) != 0) |
136 | 243 | return SDValue(); |
137 | 97 | // This requires the copy size to be a constant, preferably |
138 | 97 | // within a subtarget-specific limit. |
139 | 97 | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
140 | 97 | if (!ConstantSize) |
141 | 0 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
142 | 0 | RTLIB::MEMCPY); |
143 | 97 | uint64_t SizeVal = ConstantSize->getZExtValue(); |
144 | 97 | if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) |
145 | 63 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
146 | 63 | RTLIB::MEMCPY); |
147 | 34 | |
148 | 34 | unsigned BytesLeft = SizeVal & 3; |
149 | 34 | unsigned NumMemOps = SizeVal >> 2; |
150 | 34 | unsigned EmittedNumMemOps = 0; |
151 | 34 | EVT VT = MVT::i32; |
152 | 34 | unsigned VTSize = 4; |
153 | 34 | unsigned i = 0; |
154 | 34 | // Emit a maximum of 4 loads in Thumb1 since we have fewer registers |
155 | 34 | const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 415 : 619 ; |
156 | 34 | SDValue TFOps[6]; |
157 | 34 | SDValue Loads[6]; |
158 | 34 | uint64_t SrcOff = 0, DstOff = 0; |
159 | 34 | |
160 | 34 | // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to |
161 | 34 | // VLDM/VSTM and make this code emit it when appropriate. This would reduce |
162 | 34 | // pressure on the general purpose registers. However this seems harder to map |
163 | 34 | // onto the register allocator's view of the world. |
164 | 34 | |
165 | 34 | // The number of MEMCPY pseudo-instructions to emit. We use up to |
166 | 34 | // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm |
167 | 34 | // later on. This is a lower bound on the number of MEMCPY operations we must |
168 | 34 | // emit. |
169 | 34 | unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; |
170 | 34 | |
171 | 34 | // Code size optimisation: do not inline memcpy if expansion results in |
172 | 34 | // more instructions than the libary call. |
173 | 34 | if (NumMEMCPYs > 1 && Subtarget.hasMinSize()19 ) { |
174 | 1 | return SDValue(); |
175 | 1 | } |
176 | 33 | |
177 | 33 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); |
178 | 33 | |
179 | 87 | for (unsigned I = 0; I != NumMEMCPYs; ++I54 ) { |
180 | 54 | // Evenly distribute registers among MEMCPY operations to reduce register |
181 | 54 | // pressure. |
182 | 54 | unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; |
183 | 54 | unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; |
184 | 54 | |
185 | 54 | Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, |
186 | 54 | DAG.getConstant(NumRegs, dl, MVT::i32)); |
187 | 54 | Src = Dst.getValue(1); |
188 | 54 | Chain = Dst.getValue(2); |
189 | 54 | |
190 | 54 | DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); |
191 | 54 | SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); |
192 | 54 | |
193 | 54 | EmittedNumMemOps = NextEmittedNumMemOps; |
194 | 54 | } |
195 | 33 | |
196 | 33 | if (BytesLeft == 0) |
197 | 26 | return Chain; |
198 | 7 | |
199 | 7 | // Issue loads / stores for the trailing (1 - 3) bytes. |
200 | 18 | auto getRemainingValueType = [](unsigned BytesLeft) 7 { |
201 | 18 | return (BytesLeft >= 2) ? MVT::i164 : MVT::i814 ; |
202 | 18 | }; |
203 | 18 | auto getRemainingSize = [](unsigned BytesLeft) { |
204 | 18 | return (BytesLeft >= 2) ? 24 : 114 ; |
205 | 18 | }; |
206 | 7 | |
207 | 7 | unsigned BytesLeftSave = BytesLeft; |
208 | 7 | i = 0; |
209 | 16 | while (BytesLeft) { |
210 | 9 | VT = getRemainingValueType(BytesLeft); |
211 | 9 | VTSize = getRemainingSize(BytesLeft); |
212 | 9 | Loads[i] = DAG.getLoad(VT, dl, Chain, |
213 | 9 | DAG.getNode(ISD::ADD, dl, MVT::i32, Src, |
214 | 9 | DAG.getConstant(SrcOff, dl, MVT::i32)), |
215 | 9 | SrcPtrInfo.getWithOffset(SrcOff)); |
216 | 9 | TFOps[i] = Loads[i].getValue(1); |
217 | 9 | ++i; |
218 | 9 | SrcOff += VTSize; |
219 | 9 | BytesLeft -= VTSize; |
220 | 9 | } |
221 | 7 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
222 | 7 | makeArrayRef(TFOps, i)); |
223 | 7 | |
224 | 7 | i = 0; |
225 | 7 | BytesLeft = BytesLeftSave; |
226 | 16 | while (BytesLeft) { |
227 | 9 | VT = getRemainingValueType(BytesLeft); |
228 | 9 | VTSize = getRemainingSize(BytesLeft); |
229 | 9 | TFOps[i] = DAG.getStore(Chain, dl, Loads[i], |
230 | 9 | DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, |
231 | 9 | DAG.getConstant(DstOff, dl, MVT::i32)), |
232 | 9 | DstPtrInfo.getWithOffset(DstOff)); |
233 | 9 | ++i; |
234 | 9 | DstOff += VTSize; |
235 | 9 | BytesLeft -= VTSize; |
236 | 9 | } |
237 | 7 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
238 | 7 | makeArrayRef(TFOps, i)); |
239 | 7 | } |
240 | | |
241 | | SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( |
242 | | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
243 | | SDValue Size, unsigned Align, bool isVolatile, |
244 | 120 | MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { |
245 | 120 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
246 | 120 | RTLIB::MEMMOVE); |
247 | 120 | } |
248 | | |
249 | | SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( |
250 | | SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, |
251 | | SDValue Size, unsigned Align, bool isVolatile, |
252 | 232 | MachinePointerInfo DstPtrInfo) const { |
253 | 232 | return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, |
254 | 232 | RTLIB::MEMSET); |
255 | 232 | } |