/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonConstExtenders.cpp ------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "HexagonInstrInfo.h" |
10 | | #include "HexagonRegisterInfo.h" |
11 | | #include "HexagonSubtarget.h" |
12 | | #include "llvm/ADT/SmallVector.h" |
13 | | #include "llvm/CodeGen/MachineDominators.h" |
14 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
15 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
16 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
17 | | #include "llvm/Support/CommandLine.h" |
18 | | #include "llvm/Support/raw_ostream.h" |
19 | | #include "llvm/Pass.h" |
20 | | #include <map> |
21 | | #include <set> |
22 | | #include <utility> |
23 | | #include <vector> |
24 | | |
25 | | #define DEBUG_TYPE "hexagon-cext-opt" |
26 | | |
27 | | using namespace llvm; |
28 | | |
29 | | static cl::opt<unsigned> CountThreshold("hexagon-cext-threshold", |
30 | | cl::init(3), cl::Hidden, cl::ZeroOrMore, |
31 | | cl::desc("Minimum number of extenders to trigger replacement")); |
32 | | |
33 | | static cl::opt<unsigned> ReplaceLimit("hexagon-cext-limit", cl::init(0), |
34 | | cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum number of replacements")); |
35 | | |
36 | | namespace llvm { |
37 | | void initializeHexagonConstExtendersPass(PassRegistry&); |
38 | | FunctionPass *createHexagonConstExtenders(); |
39 | | } |
40 | | |
41 | 3.58k | static int32_t adjustUp(int32_t V, uint8_t A, uint8_t O) { |
42 | 3.58k | assert(isPowerOf2_32(A)); |
43 | 3.58k | int32_t U = (V & -A) + O; |
44 | 3.58k | return U >= V ? U3.55k : U+A25 ; |
45 | 3.58k | } |
46 | | |
47 | 3.56k | static int32_t adjustDown(int32_t V, uint8_t A, uint8_t O) { |
48 | 3.56k | assert(isPowerOf2_32(A)); |
49 | 3.56k | int32_t U = (V & -A) + O; |
50 | 3.56k | return U <= V ? U : U-A0 ; |
51 | 3.56k | } |
52 | | |
53 | | namespace { |
54 | | struct OffsetRange { |
55 | | // The range of values between Min and Max that are of form Align*N+Offset, |
56 | | // for some integer N. Min and Max are required to be of that form as well, |
57 | | // except in the case of an empty range. |
58 | | int32_t Min = INT_MIN, Max = INT_MAX; |
59 | | uint8_t Align = 1; |
60 | | uint8_t Offset = 0; |
61 | | |
62 | 3.19k | OffsetRange() = default; |
63 | | OffsetRange(int32_t L, int32_t H, uint8_t A, uint8_t O = 0) |
64 | 3.54k | : Min(L), Max(H), Align(A), Offset(O) {} |
65 | 3.54k | OffsetRange &intersect(OffsetRange A) { |
66 | 3.54k | if (Align < A.Align) |
67 | 272 | std::swap(*this, A); |
68 | 3.54k | |
69 | 3.54k | // Align >= A.Align. |
70 | 3.54k | if (Offset >= A.Offset && (Offset - A.Offset) % A.Align == 0) { |
71 | 3.54k | Min = adjustUp(std::max(Min, A.Min), Align, Offset); |
72 | 3.54k | Max = adjustDown(std::min(Max, A.Max), Align, Offset); |
73 | 3.54k | } else { |
74 | 0 | // Make an empty range. |
75 | 0 | Min = 0; |
76 | 0 | Max = -1; |
77 | 0 | } |
78 | 3.54k | // Canonicalize empty ranges. |
79 | 3.54k | if (Min > Max) |
80 | 0 | std::tie(Min, Max, Align) = std::make_tuple(0, -1, 1); |
81 | 3.54k | return *this; |
82 | 3.54k | } |
83 | 1.98k | OffsetRange &shift(int32_t S) { |
84 | 1.98k | Min += S; |
85 | 1.98k | Max += S; |
86 | 1.98k | Offset = (Offset+S) % Align; |
87 | 1.98k | return *this; |
88 | 1.98k | } |
89 | 2.43k | OffsetRange &extendBy(int32_t D) { |
90 | 2.43k | // If D < 0, extend Min, otherwise extend Max. |
91 | 2.43k | assert(D % Align == 0); |
92 | 2.43k | if (D < 0) |
93 | 1.21k | Min = (INT_MIN-D < Min) ? Min+D1.21k : INT_MIN; |
94 | 2.43k | else |
95 | 2.43k | Max = (INT_MAX-D > Max) 1.21k ? Max+D1.21k : INT_MAX; |
96 | 2.43k | return *this; |
97 | 2.43k | } |
98 | 0 | bool empty() const { |
99 | 0 | return Min > Max; |
100 | 0 | } |
101 | 17.5k | bool contains(int32_t V) const { |
102 | 17.5k | return Min <= V && V <= Max && (V-Offset) % Align == 015.3k ; |
103 | 17.5k | } |
104 | 1.92k | bool operator==(const OffsetRange &R) const { |
105 | 1.92k | return Min == R.Min && Max == R.Max209 && Align == R.Align209 ; |
106 | 1.92k | } |
107 | 0 | bool operator!=(const OffsetRange &R) const { |
108 | 0 | return !operator==(R); |
109 | 0 | } |
110 | 12.5k | bool operator<(const OffsetRange &R) const { |
111 | 12.5k | if (Min != R.Min) |
112 | 8.59k | return Min < R.Min; |
113 | 3.96k | if (Max != R.Max) |
114 | 0 | return Max < R.Max; |
115 | 3.96k | return Align < R.Align; |
116 | 3.96k | } |
117 | 2.99k | static OffsetRange zero() { return {0, 0, 1}; } |
118 | | }; |
119 | | |
120 | | struct RangeTree { |
121 | | struct Node { |
122 | 1.77k | Node(const OffsetRange &R) : MaxEnd(R.Max), Range(R) {} |
123 | | unsigned Height = 1; |
124 | | unsigned Count = 1; |
125 | | int32_t MaxEnd; |
126 | | const OffsetRange &Range; |
127 | | Node *Left = nullptr, *Right = nullptr; |
128 | | }; |
129 | | |
130 | | Node *Root = nullptr; |
131 | | |
132 | 1.98k | void add(const OffsetRange &R) { |
133 | 1.98k | Root = add(Root, R); |
134 | 1.98k | } |
135 | 1.77k | void erase(const Node *N) { |
136 | 1.77k | Root = remove(Root, N); |
137 | 1.77k | delete N; |
138 | 1.77k | } |
139 | 2.55k | void order(SmallVectorImpl<Node*> &Seq) const { |
140 | 2.55k | order(Root, Seq); |
141 | 2.55k | } |
142 | 11.9k | SmallVector<Node*,8> nodesWith(int32_t P, bool CheckAlign = true) { |
143 | 11.9k | SmallVector<Node*,8> Nodes; |
144 | 11.9k | nodesWith(Root, P, CheckAlign, Nodes); |
145 | 11.9k | return Nodes; |
146 | 11.9k | } |
147 | | void dump() const; |
148 | 1.27k | ~RangeTree() { |
149 | 1.27k | SmallVector<Node*,8> Nodes; |
150 | 1.27k | order(Nodes); |
151 | 1.27k | for (Node *N : Nodes) |
152 | 0 | delete N; |
153 | 1.27k | } |
154 | | |
155 | | private: |
156 | | void dump(const Node *N) const; |
157 | | void order(Node *N, SmallVectorImpl<Node*> &Seq) const; |
158 | | void nodesWith(Node *N, int32_t P, bool CheckA, |
159 | | SmallVectorImpl<Node*> &Seq) const; |
160 | | |
161 | | Node *add(Node *N, const OffsetRange &R); |
162 | | Node *remove(Node *N, const Node *D); |
163 | | Node *rotateLeft(Node *Lower, Node *Higher); |
164 | | Node *rotateRight(Node *Lower, Node *Higher); |
165 | 11.8k | unsigned height(Node *N) { |
166 | 11.8k | return N != nullptr ? N->Height9.24k : 02.61k ; |
167 | 11.8k | } |
168 | 3.14k | Node *update(Node *N) { |
169 | 3.14k | assert(N != nullptr); |
170 | 3.14k | N->Height = 1 + std::max(height(N->Left), height(N->Right)); |
171 | 3.14k | if (N->Left) |
172 | 1.99k | N->MaxEnd = std::max(N->MaxEnd, N->Left->MaxEnd); |
173 | 3.14k | if (N->Right) |
174 | 2.87k | N->MaxEnd = std::max(N->MaxEnd, N->Right->MaxEnd); |
175 | 3.14k | return N; |
176 | 3.14k | } |
177 | 2.42k | Node *rebalance(Node *N) { |
178 | 2.42k | assert(N != nullptr); |
179 | 2.42k | int32_t Balance = height(N->Right) - height(N->Left); |
180 | 2.42k | if (Balance < -1) |
181 | 6 | return rotateRight(N->Left, N); |
182 | 2.41k | if (Balance > 1) |
183 | 349 | return rotateLeft(N->Right, N); |
184 | 2.06k | return N; |
185 | 2.06k | } |
186 | | }; |
187 | | |
188 | | struct Loc { |
189 | | MachineBasicBlock *Block = nullptr; |
190 | | MachineBasicBlock::iterator At; |
191 | | |
192 | | Loc(MachineBasicBlock *B, MachineBasicBlock::iterator It) |
193 | 50 | : Block(B), At(It) { |
194 | 50 | if (B->end() == It) { |
195 | 0 | Pos = -1; |
196 | 50 | } else { |
197 | 50 | assert(It->getParent() == B); |
198 | 50 | Pos = std::distance(B->begin(), It); |
199 | 50 | } |
200 | 50 | } |
201 | 0 | bool operator<(Loc A) const { |
202 | 0 | if (Block != A.Block) |
203 | 0 | return Block->getNumber() < A.Block->getNumber(); |
204 | 0 | if (A.Pos == -1) |
205 | 0 | return Pos != A.Pos; |
206 | 0 | return Pos != -1 && Pos < A.Pos; |
207 | 0 | } |
208 | | private: |
209 | | int Pos = 0; |
210 | | }; |
211 | | |
212 | | struct HexagonConstExtenders : public MachineFunctionPass { |
213 | | static char ID; |
214 | 868 | HexagonConstExtenders() : MachineFunctionPass(ID) {} |
215 | | |
216 | 861 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
217 | 861 | AU.addRequired<MachineDominatorTree>(); |
218 | 861 | AU.addPreserved<MachineDominatorTree>(); |
219 | 861 | MachineFunctionPass::getAnalysisUsage(AU); |
220 | 861 | } |
221 | | |
222 | 4.22k | StringRef getPassName() const override { |
223 | 4.22k | return "Hexagon constant-extender optimization"; |
224 | 4.22k | } |
225 | | bool runOnMachineFunction(MachineFunction &MF) override; |
226 | | |
227 | | private: |
228 | | struct Register { |
229 | 4.07k | Register() = default; |
230 | 50 | Register(unsigned R, unsigned S) : Reg(R), Sub(S) {} |
231 | | Register(const MachineOperand &Op) |
232 | 2.79k | : Reg(Op.getReg()), Sub(Op.getSubReg()) {} |
233 | 1.36k | Register &operator=(const MachineOperand &Op) { |
234 | 1.36k | if (Op.isReg()) { |
235 | 1.36k | Reg = Op.getReg(); |
236 | 1.36k | Sub = Op.getSubReg(); |
237 | 1.36k | } else if (2 Op.isFI()2 ) { |
238 | 2 | Reg = TargetRegisterInfo::index2StackSlot(Op.getIndex()); |
239 | 2 | } |
240 | 1.36k | return *this; |
241 | 1.36k | } |
242 | 524 | bool isVReg() const { |
243 | 524 | return Reg != 0 && !TargetRegisterInfo::isStackSlot(Reg) && |
244 | 524 | TargetRegisterInfo::isVirtualRegister(Reg)523 ; |
245 | 524 | } |
246 | 71 | bool isSlot() const { |
247 | 71 | return Reg != 0 && TargetRegisterInfo::isStackSlot(Reg)14 ; |
248 | 71 | } |
249 | 524 | operator MachineOperand() const { |
250 | 524 | if (isVReg()) |
251 | 523 | return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false, |
252 | 523 | /*Kill*/false, /*Dead*/false, /*Undef*/false, |
253 | 523 | /*EarlyClobber*/false, Sub); |
254 | 1 | if (TargetRegisterInfo::isStackSlot(Reg)) { |
255 | 1 | int FI = TargetRegisterInfo::stackSlot2Index(Reg); |
256 | 1 | return MachineOperand::CreateFI(FI); |
257 | 1 | } |
258 | 0 | llvm_unreachable("Cannot create MachineOperand"); |
259 | 0 | } |
260 | 4.24k | bool operator==(Register R) const { return Reg == R.Reg && Sub == R.Sub4.00k ; } |
261 | 4.24k | bool operator!=(Register R) const { return !operator==(R); } |
262 | 239 | bool operator<(Register R) const { |
263 | 239 | // For std::map. |
264 | 239 | return Reg < R.Reg || (122 Reg == R.Reg122 && Sub < R.Sub0 ); |
265 | 239 | } |
266 | | unsigned Reg = 0, Sub = 0; |
267 | | }; |
268 | | |
269 | | struct ExtExpr { |
270 | | // A subexpression in which the extender is used. In general, this |
271 | | // represents an expression where adding D to the extender will be |
272 | | // equivalent to adding D to the expression as a whole. In other |
273 | | // words, expr(add(##V,D) = add(expr(##V),D). |
274 | | |
275 | | // The original motivation for this are the io/ur addressing modes, |
276 | | // where the offset is extended. Consider the io example: |
277 | | // In memw(Rs+##V), the ##V could be replaced by a register Rt to |
278 | | // form the rr mode: memw(Rt+Rs<<0). In such case, however, the |
279 | | // register Rt must have exactly the value of ##V. If there was |
280 | | // another instruction memw(Rs+##V+4), it would need a different Rt. |
281 | | // Now, if Rt was initialized as "##V+Rs<<0", both of these |
282 | | // instructions could use the same Rt, just with different offsets. |
283 | | // Here it's clear that "initializer+4" should be the same as if |
284 | | // the offset 4 was added to the ##V in the initializer. |
285 | | |
286 | | // The only kinds of expressions that support the requirement of |
287 | | // commuting with addition are addition and subtraction from ##V. |
288 | | // Include shifting the Rs to account for the ur addressing mode: |
289 | | // ##Val + Rs << S |
290 | | // ##Val - Rs |
291 | | Register Rs; |
292 | | unsigned S = 0; |
293 | | bool Neg = false; |
294 | | |
295 | 2.09k | ExtExpr() = default; |
296 | 0 | ExtExpr(Register RS, bool NG, unsigned SH) : Rs(RS), S(SH), Neg(NG) {} |
297 | | // Expression is trivial if it does not modify the extender. |
298 | 1.50k | bool trivial() const { |
299 | 1.50k | return Rs.Reg == 0; |
300 | 1.50k | } |
301 | 0 | bool operator==(const ExtExpr &Ex) const { |
302 | 0 | return Rs == Ex.Rs && S == Ex.S && Neg == Ex.Neg; |
303 | 0 | } |
304 | 0 | bool operator!=(const ExtExpr &Ex) const { |
305 | 0 | return !operator==(Ex); |
306 | 0 | } |
307 | 1.45k | bool operator<(const ExtExpr &Ex) const { |
308 | 1.45k | if (Rs != Ex.Rs) |
309 | 239 | return Rs < Ex.Rs; |
310 | 1.21k | if (S != Ex.S) |
311 | 0 | return S < Ex.S; |
312 | 1.21k | return !Neg && Ex.Neg; |
313 | 1.21k | } |
314 | | }; |
315 | | |
316 | | struct ExtDesc { |
317 | | MachineInstr *UseMI = nullptr; |
318 | | unsigned OpNum = -1u; |
319 | | // The subexpression in which the extender is used (e.g. address |
320 | | // computation). |
321 | | ExtExpr Expr; |
322 | | // Optional register that is assigned the value of Expr. |
323 | | Register Rd; |
324 | | // Def means that the output of the instruction may differ from the |
325 | | // original by a constant c, and that the difference can be corrected |
326 | | // by adding/subtracting c in all users of the defined register. |
327 | | bool IsDef = false; |
328 | | |
329 | 5.58k | MachineOperand &getOp() { |
330 | 5.58k | return UseMI->getOperand(OpNum); |
331 | 5.58k | } |
332 | 10.4k | const MachineOperand &getOp() const { |
333 | 10.4k | return UseMI->getOperand(OpNum); |
334 | 10.4k | } |
335 | | }; |
336 | | |
337 | | struct ExtRoot { |
338 | | union { |
339 | | const ConstantFP *CFP; // MO_FPImmediate |
340 | | const char *SymbolName; // MO_ExternalSymbol |
341 | | const GlobalValue *GV; // MO_GlobalAddress |
342 | | const BlockAddress *BA; // MO_BlockAddress |
343 | | int64_t ImmVal; // MO_Immediate, MO_TargetIndex, |
344 | | // and MO_ConstantPoolIndex |
345 | | } V; |
346 | | unsigned Kind; // Same as in MachineOperand. |
347 | | unsigned char TF; // TargetFlags. |
348 | | |
349 | | ExtRoot(const MachineOperand &Op); |
350 | 13.0k | bool operator==(const ExtRoot &ER) const { |
351 | 13.0k | return Kind == ER.Kind && V.ImmVal == ER.V.ImmVal12.4k ; |
352 | 13.0k | } |
353 | 0 | bool operator!=(const ExtRoot &ER) const { |
354 | 0 | return !operator==(ER); |
355 | 0 | } |
356 | | bool operator<(const ExtRoot &ER) const; |
357 | | }; |
358 | | |
359 | | struct ExtValue : public ExtRoot { |
360 | | int32_t Offset; |
361 | | |
362 | | ExtValue(const MachineOperand &Op); |
363 | 10.4k | ExtValue(const ExtDesc &ED) : ExtValue(ED.getOp()) {} |
364 | 1.36k | ExtValue(const ExtRoot &ER, int32_t Off) : ExtRoot(ER), Offset(Off) {} |
365 | | bool operator<(const ExtValue &EV) const; |
366 | 3.96k | bool operator==(const ExtValue &EV) const { |
367 | 3.96k | return ExtRoot(*this) == ExtRoot(EV) && Offset == EV.Offset2.74k ; |
368 | 3.96k | } |
369 | 3.96k | bool operator!=(const ExtValue &EV) const { |
370 | 3.96k | return !operator==(EV); |
371 | 3.96k | } |
372 | | explicit operator MachineOperand() const; |
373 | | }; |
374 | | |
375 | | using IndexList = SetVector<unsigned>; |
376 | | using ExtenderInit = std::pair<ExtValue, ExtExpr>; |
377 | | using AssignmentMap = std::map<ExtenderInit, IndexList>; |
378 | | using LocDefList = std::vector<std::pair<Loc, IndexList>>; |
379 | | |
380 | | const HexagonInstrInfo *HII = nullptr; |
381 | | const HexagonRegisterInfo *HRI = nullptr; |
382 | | MachineDominatorTree *MDT = nullptr; |
383 | | MachineRegisterInfo *MRI = nullptr; |
384 | | std::vector<ExtDesc> Extenders; |
385 | | std::vector<unsigned> NewRegs; |
386 | | |
387 | | bool isStoreImmediate(unsigned Opc) const; |
388 | | bool isRegOffOpcode(unsigned ExtOpc) const ; |
389 | | unsigned getRegOffOpcode(unsigned ExtOpc) const; |
390 | | unsigned getDirectRegReplacement(unsigned ExtOpc) const; |
391 | | OffsetRange getOffsetRange(Register R, const MachineInstr &MI) const; |
392 | | OffsetRange getOffsetRange(const ExtDesc &ED) const; |
393 | | OffsetRange getOffsetRange(Register Rd) const; |
394 | | |
395 | | void recordExtender(MachineInstr &MI, unsigned OpNum); |
396 | | void collectInstr(MachineInstr &MI); |
397 | | void collect(MachineFunction &MF); |
398 | | void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, |
399 | | AssignmentMap &IMap); |
400 | | void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, |
401 | | LocDefList &Defs); |
402 | | Register insertInitializer(Loc DefL, const ExtenderInit &ExtI); |
403 | | bool replaceInstrExact(const ExtDesc &ED, Register ExtR); |
404 | | bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI, |
405 | | Register ExtR, int32_t &Diff); |
406 | | bool replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI); |
407 | | bool replaceExtenders(const AssignmentMap &IMap); |
408 | | |
409 | | unsigned getOperandIndex(const MachineInstr &MI, |
410 | | const MachineOperand &Op) const; |
411 | | const MachineOperand &getPredicateOp(const MachineInstr &MI) const; |
412 | | const MachineOperand &getLoadResultOp(const MachineInstr &MI) const; |
413 | | const MachineOperand &getStoredValueOp(const MachineInstr &MI) const; |
414 | | |
415 | | friend struct PrintRegister; |
416 | | friend struct PrintExpr; |
417 | | friend struct PrintInit; |
418 | | friend struct PrintIMap; |
419 | | friend raw_ostream &operator<< (raw_ostream &OS, |
420 | | const struct PrintRegister &P); |
421 | | friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintExpr &P); |
422 | | friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintInit &P); |
423 | | friend raw_ostream &operator<< (raw_ostream &OS, const ExtDesc &ED); |
424 | | friend raw_ostream &operator<< (raw_ostream &OS, const ExtRoot &ER); |
425 | | friend raw_ostream &operator<< (raw_ostream &OS, const ExtValue &EV); |
426 | | friend raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR); |
427 | | friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintIMap &P); |
428 | | }; |
429 | | |
430 | | using HCE = HexagonConstExtenders; |
431 | | |
432 | | LLVM_ATTRIBUTE_UNUSED |
433 | 0 | raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { |
434 | 0 | if (OR.Min > OR.Max) |
435 | 0 | OS << '!'; |
436 | 0 | OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align) |
437 | 0 | << '+' << unsigned(OR.Offset); |
438 | 0 | return OS; |
439 | 0 | } |
440 | | |
441 | | struct PrintRegister { |
442 | | PrintRegister(HCE::Register R, const HexagonRegisterInfo &I) |
443 | 0 | : Rs(R), HRI(I) {} |
444 | | HCE::Register Rs; |
445 | | const HexagonRegisterInfo &HRI; |
446 | | }; |
447 | | |
448 | | LLVM_ATTRIBUTE_UNUSED |
449 | 0 | raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { |
450 | 0 | if (P.Rs.Reg != 0) |
451 | 0 | OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); |
452 | 0 | else |
453 | 0 | OS << "noreg"; |
454 | 0 | return OS; |
455 | 0 | } |
456 | | |
457 | | struct PrintExpr { |
458 | | PrintExpr(const HCE::ExtExpr &E, const HexagonRegisterInfo &I) |
459 | 0 | : Ex(E), HRI(I) {} |
460 | | const HCE::ExtExpr &Ex; |
461 | | const HexagonRegisterInfo &HRI; |
462 | | }; |
463 | | |
464 | | LLVM_ATTRIBUTE_UNUSED |
465 | 0 | raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { |
466 | 0 | OS << "## " << (P.Ex.Neg ? "- " : "+ "); |
467 | 0 | if (P.Ex.Rs.Reg != 0) |
468 | 0 | OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub); |
469 | 0 | else |
470 | 0 | OS << "__"; |
471 | 0 | OS << " << " << P.Ex.S; |
472 | 0 | return OS; |
473 | 0 | } |
474 | | |
475 | | struct PrintInit { |
476 | | PrintInit(const HCE::ExtenderInit &EI, const HexagonRegisterInfo &I) |
477 | 0 | : ExtI(EI), HRI(I) {} |
478 | | const HCE::ExtenderInit &ExtI; |
479 | | const HexagonRegisterInfo &HRI; |
480 | | }; |
481 | | |
482 | | LLVM_ATTRIBUTE_UNUSED |
483 | 0 | raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { |
484 | 0 | OS << '[' << P.ExtI.first << ", " |
485 | 0 | << PrintExpr(P.ExtI.second, P.HRI) << ']'; |
486 | 0 | return OS; |
487 | 0 | } |
488 | | |
489 | | LLVM_ATTRIBUTE_UNUSED |
490 | 0 | raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { |
491 | 0 | assert(ED.OpNum != -1u); |
492 | 0 | const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); |
493 | 0 | const MachineFunction &MF = *MBB.getParent(); |
494 | 0 | const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
495 | 0 | OS << "bb#" << MBB.getNumber() << ": "; |
496 | 0 | if (ED.Rd.Reg != 0) |
497 | 0 | OS << printReg(ED.Rd.Reg, &HRI, ED.Rd.Sub); |
498 | 0 | else |
499 | 0 | OS << "__"; |
500 | 0 | OS << " = " << PrintExpr(ED.Expr, HRI); |
501 | 0 | if (ED.IsDef) |
502 | 0 | OS << ", def"; |
503 | 0 | return OS; |
504 | 0 | } |
505 | | |
506 | | LLVM_ATTRIBUTE_UNUSED |
507 | 0 | raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { |
508 | 0 | switch (ER.Kind) { |
509 | 0 | case MachineOperand::MO_Immediate: |
510 | 0 | OS << "imm:" << ER.V.ImmVal; |
511 | 0 | break; |
512 | 0 | case MachineOperand::MO_FPImmediate: |
513 | 0 | OS << "fpi:" << *ER.V.CFP; |
514 | 0 | break; |
515 | 0 | case MachineOperand::MO_ExternalSymbol: |
516 | 0 | OS << "sym:" << *ER.V.SymbolName; |
517 | 0 | break; |
518 | 0 | case MachineOperand::MO_GlobalAddress: |
519 | 0 | OS << "gad:" << ER.V.GV->getName(); |
520 | 0 | break; |
521 | 0 | case MachineOperand::MO_BlockAddress: |
522 | 0 | OS << "blk:" << *ER.V.BA; |
523 | 0 | break; |
524 | 0 | case MachineOperand::MO_TargetIndex: |
525 | 0 | OS << "tgi:" << ER.V.ImmVal; |
526 | 0 | break; |
527 | 0 | case MachineOperand::MO_ConstantPoolIndex: |
528 | 0 | OS << "cpi:" << ER.V.ImmVal; |
529 | 0 | break; |
530 | 0 | case MachineOperand::MO_JumpTableIndex: |
531 | 0 | OS << "jti:" << ER.V.ImmVal; |
532 | 0 | break; |
533 | 0 | default: |
534 | 0 | OS << "???:" << ER.V.ImmVal; |
535 | 0 | break; |
536 | 0 | } |
537 | 0 | return OS; |
538 | 0 | } |
539 | | |
540 | | LLVM_ATTRIBUTE_UNUSED |
541 | 0 | raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { |
542 | 0 | OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; |
543 | 0 | return OS; |
544 | 0 | } |
545 | | |
546 | | struct PrintIMap { |
547 | | PrintIMap(const HCE::AssignmentMap &M, const HexagonRegisterInfo &I) |
548 | 0 | : IMap(M), HRI(I) {} |
549 | | const HCE::AssignmentMap &IMap; |
550 | | const HexagonRegisterInfo &HRI; |
551 | | }; |
552 | | |
553 | | LLVM_ATTRIBUTE_UNUSED |
554 | 0 | raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { |
555 | 0 | OS << "{\n"; |
556 | 0 | for (const std::pair<HCE::ExtenderInit,HCE::IndexList> &Q : P.IMap) { |
557 | 0 | OS << " " << PrintInit(Q.first, P.HRI) << " -> {"; |
558 | 0 | for (unsigned I : Q.second) |
559 | 0 | OS << ' ' << I; |
560 | 0 | OS << " }\n"; |
561 | 0 | } |
562 | 0 | OS << "}\n"; |
563 | 0 | return OS; |
564 | 0 | } |
565 | | } |
566 | | |
567 | 101k | INITIALIZE_PASS_BEGIN(HexagonConstExtenders, "hexagon-cext-opt", |
568 | 101k | "Hexagon constant-extender optimization", false, false) |
569 | 101k | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
570 | 101k | INITIALIZE_PASS_END(HexagonConstExtenders, "hexagon-cext-opt", |
571 | | "Hexagon constant-extender optimization", false, false) |
572 | | |
573 | | static unsigned ReplaceCounter = 0; |
574 | | |
575 | | char HCE::ID = 0; |
576 | | |
577 | | #ifndef NDEBUG |
578 | | LLVM_DUMP_METHOD void RangeTree::dump() const { |
579 | | dbgs() << "Root: " << Root << '\n'; |
580 | | if (Root) |
581 | | dump(Root); |
582 | | } |
583 | | |
584 | | LLVM_DUMP_METHOD void RangeTree::dump(const Node *N) const { |
585 | | dbgs() << "Node: " << N << '\n'; |
586 | | dbgs() << " Height: " << N->Height << '\n'; |
587 | | dbgs() << " Count: " << N->Count << '\n'; |
588 | | dbgs() << " MaxEnd: " << N->MaxEnd << '\n'; |
589 | | dbgs() << " Range: " << N->Range << '\n'; |
590 | | dbgs() << " Left: " << N->Left << '\n'; |
591 | | dbgs() << " Right: " << N->Right << "\n\n"; |
592 | | |
593 | | if (N->Left) |
594 | | dump(N->Left); |
595 | | if (N->Right) |
596 | | dump(N->Right); |
597 | | } |
598 | | #endif |
599 | | |
600 | 6.10k | void RangeTree::order(Node *N, SmallVectorImpl<Node*> &Seq) const { |
601 | 6.10k | if (N == nullptr) |
602 | 4.32k | return; |
603 | 1.77k | order(N->Left, Seq); |
604 | 1.77k | Seq.push_back(N); |
605 | 1.77k | order(N->Right, Seq); |
606 | 1.77k | } |
607 | | |
608 | | void RangeTree::nodesWith(Node *N, int32_t P, bool CheckA, |
609 | 79.0k | SmallVectorImpl<Node*> &Seq) const { |
610 | 79.0k | if (N == nullptr || N->MaxEnd < P36.5k ) |
611 | 44.4k | return; |
612 | 34.6k | nodesWith(N->Left, P, CheckA, Seq); |
613 | 34.6k | if (N->Range.Min <= P) { |
614 | 32.4k | if ((CheckA && N->Range.contains(P)17.5k ) || (17.4k !CheckA17.4k && P <= N->Range.Max14.9k )) |
615 | 28.2k | Seq.push_back(N); |
616 | 32.4k | nodesWith(N->Right, P, CheckA, Seq); |
617 | 32.4k | } |
618 | 34.6k | } |
619 | | |
620 | 3.70k | RangeTree::Node *RangeTree::add(Node *N, const OffsetRange &R) { |
621 | 3.70k | if (N == nullptr) |
622 | 1.77k | return new Node(R); |
623 | 1.92k | |
624 | 1.92k | if (N->Range == R) { |
625 | 209 | N->Count++; |
626 | 209 | return N; |
627 | 209 | } |
628 | 1.72k | |
629 | 1.72k | if (R < N->Range) |
630 | 49 | N->Left = add(N->Left, R); |
631 | 1.67k | else |
632 | 1.67k | N->Right = add(N->Right, R); |
633 | 1.72k | return rebalance(update(N)); |
634 | 1.72k | } |
635 | | |
636 | 2.47k | RangeTree::Node *RangeTree::remove(Node *N, const Node *D) { |
637 | 2.47k | assert(N != nullptr); |
638 | 2.47k | |
639 | 2.47k | if (N != D) { |
640 | 702 | assert(N->Range != D->Range && "N and D should not be equal"); |
641 | 702 | if (D->Range < N->Range) |
642 | 616 | N->Left = remove(N->Left, D); |
643 | 86 | else |
644 | 86 | N->Right = remove(N->Right, D); |
645 | 702 | return rebalance(update(N)); |
646 | 702 | } |
647 | 1.77k | |
648 | 1.77k | // We got to the node we need to remove. If any of its children are |
649 | 1.77k | // missing, simply replace it with the other child. |
650 | 1.77k | if (N->Left == nullptr || N->Right == nullptr21 ) |
651 | 1.77k | return (N->Left == nullptr) ? N->Right1.75k : N->Left19 ; |
652 | 2 | |
653 | 2 | // Find the rightmost child of N->Left, remove it and plug it in place |
654 | 2 | // of N. |
655 | 2 | Node *M = N->Left; |
656 | 2 | while (M->Right) |
657 | 0 | M = M->Right; |
658 | 2 | M->Left = remove(N->Left, M); |
659 | 2 | M->Right = N->Right; |
660 | 2 | return rebalance(update(M)); |
661 | 2 | } |
662 | | |
663 | 351 | RangeTree::Node *RangeTree::rotateLeft(Node *Lower, Node *Higher) { |
664 | 351 | assert(Higher->Right == Lower); |
665 | 351 | // The Lower node is on the right from Higher. Make sure that Lower's |
666 | 351 | // balance is greater to the right. Otherwise the rotation will create |
667 | 351 | // an unbalanced tree again. |
668 | 351 | if (height(Lower->Left) > height(Lower->Right)) |
669 | 4 | Lower = rotateRight(Lower->Left, Lower); |
670 | 351 | assert(height(Lower->Left) <= height(Lower->Right)); |
671 | 351 | Higher->Right = Lower->Left; |
672 | 351 | update(Higher); |
673 | 351 | Lower->Left = Higher; |
674 | 351 | update(Lower); |
675 | 351 | return Lower; |
676 | 351 | } |
677 | | |
678 | 10 | RangeTree::Node *RangeTree::rotateRight(Node *Lower, Node *Higher) { |
679 | 10 | assert(Higher->Left == Lower); |
680 | 10 | // The Lower node is on the left from Higher. Make sure that Lower's |
681 | 10 | // balance is greater to the left. Otherwise the rotation will create |
682 | 10 | // an unbalanced tree again. |
683 | 10 | if (height(Lower->Left) < height(Lower->Right)) |
684 | 2 | Lower = rotateLeft(Lower->Right, Lower); |
685 | 10 | assert(height(Lower->Left) >= height(Lower->Right)); |
686 | 10 | Higher->Left = Lower->Right; |
687 | 10 | update(Higher); |
688 | 10 | Lower->Right = Higher; |
689 | 10 | update(Lower); |
690 | 10 | return Lower; |
691 | 10 | } |
692 | | |
693 | | |
694 | 16.0k | HCE::ExtRoot::ExtRoot(const MachineOperand &Op) { |
695 | 16.0k | // Always store ImmVal, since it's the field used for comparisons. |
696 | 16.0k | V.ImmVal = 0; |
697 | 16.0k | if (Op.isImm()) |
698 | 2.88k | ; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root). |
699 | 13.1k | else if (Op.isFPImm()) |
700 | 0 | V.CFP = Op.getFPImm(); |
701 | 13.1k | else if (Op.isSymbol()) |
702 | 0 | V.SymbolName = Op.getSymbolName(); |
703 | 13.1k | else if (Op.isGlobal()) |
704 | 12.1k | V.GV = Op.getGlobal(); |
705 | 1.02k | else if (Op.isBlockAddress()) |
706 | 6 | V.BA = Op.getBlockAddress(); |
707 | 1.01k | else if (Op.isCPI() || Op.isTargetIndex()22 || Op.isJTI()22 ) |
708 | 1.01k | V.ImmVal = Op.getIndex(); |
709 | 1.01k | else |
710 | 1.01k | llvm_unreachable0 ("Unexpected operand type"); |
711 | 16.0k | |
712 | 16.0k | Kind = Op.getType(); |
713 | 16.0k | TF = Op.getTargetFlags(); |
714 | 16.0k | } |
715 | | |
716 | 1.22k | bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const { |
717 | 1.22k | if (Kind != ER.Kind) |
718 | 225 | return Kind < ER.Kind; |
719 | 998 | switch (Kind) { |
720 | 998 | case MachineOperand::MO_Immediate: |
721 | 104 | case MachineOperand::MO_TargetIndex: |
722 | 104 | case MachineOperand::MO_ConstantPoolIndex: |
723 | 104 | case MachineOperand::MO_JumpTableIndex: |
724 | 104 | return V.ImmVal < ER.V.ImmVal; |
725 | 104 | case MachineOperand::MO_FPImmediate: { |
726 | 0 | const APFloat &ThisF = V.CFP->getValueAPF(); |
727 | 0 | const APFloat &OtherF = ER.V.CFP->getValueAPF(); |
728 | 0 | return ThisF.bitcastToAPInt().ult(OtherF.bitcastToAPInt()); |
729 | 104 | } |
730 | 104 | case MachineOperand::MO_ExternalSymbol: |
731 | 0 | return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName); |
732 | 894 | case MachineOperand::MO_GlobalAddress: |
733 | 894 | // Do not use GUIDs, since they depend on the source path. Moving the |
734 | 894 | // source file to a different directory could cause different GUID |
735 | 894 | // values for a pair of given symbols. These symbols could then compare |
736 | 894 | // "less" in one directory, but "greater" in another. |
737 | 894 | assert(!V.GV->getName().empty() && !ER.V.GV->getName().empty()); |
738 | 894 | return V.GV->getName() < ER.V.GV->getName(); |
739 | 104 | case MachineOperand::MO_BlockAddress: { |
740 | 0 | const BasicBlock *ThisB = V.BA->getBasicBlock(); |
741 | 0 | const BasicBlock *OtherB = ER.V.BA->getBasicBlock(); |
742 | 0 | assert(ThisB->getParent() == OtherB->getParent()); |
743 | 0 | const Function &F = *ThisB->getParent(); |
744 | 0 | return std::distance(F.begin(), ThisB->getIterator()) < |
745 | 0 | std::distance(F.begin(), OtherB->getIterator()); |
746 | 0 | } |
747 | 0 | } |
748 | 0 | return V.ImmVal < ER.V.ImmVal; |
749 | 0 | } |
750 | | |
751 | 10.4k | HCE::ExtValue::ExtValue(const MachineOperand &Op) : ExtRoot(Op) { |
752 | 10.4k | if (Op.isImm()) |
753 | 1.42k | Offset = Op.getImm(); |
754 | 9.01k | else if (Op.isFPImm() || Op.isJTI()) |
755 | 9 | Offset = 0; |
756 | 9.00k | else if (Op.isSymbol() || Op.isGlobal() || Op.isBlockAddress()401 || |
757 | 9.00k | Op.isCPI()399 || Op.isTargetIndex()0 ) |
758 | 9.00k | Offset = Op.getOffset(); |
759 | 9.00k | else |
760 | 9.00k | llvm_unreachable0 ("Unexpected operand type"); |
761 | 10.4k | } |
762 | | |
763 | 6.74k | bool HCE::ExtValue::operator< (const HCE::ExtValue &EV) const { |
764 | 6.74k | const ExtRoot &ER = *this; |
765 | 6.74k | if (!(ER == ExtRoot(EV))) |
766 | 1.22k | return ER < EV; |
767 | 5.51k | return Offset < EV.Offset; |
768 | 5.51k | } |
769 | | |
770 | 50 | HCE::ExtValue::operator MachineOperand() const { |
771 | 50 | switch (Kind) { |
772 | 50 | case MachineOperand::MO_Immediate: |
773 | 7 | return MachineOperand::CreateImm(V.ImmVal + Offset); |
774 | 50 | case MachineOperand::MO_FPImmediate: |
775 | 0 | assert(Offset == 0); |
776 | 0 | return MachineOperand::CreateFPImm(V.CFP); |
777 | 50 | case MachineOperand::MO_ExternalSymbol: |
778 | 0 | assert(Offset == 0); |
779 | 0 | return MachineOperand::CreateES(V.SymbolName, TF); |
780 | 50 | case MachineOperand::MO_GlobalAddress: |
781 | 43 | return MachineOperand::CreateGA(V.GV, Offset, TF); |
782 | 50 | case MachineOperand::MO_BlockAddress: |
783 | 0 | return MachineOperand::CreateBA(V.BA, Offset, TF); |
784 | 50 | case MachineOperand::MO_TargetIndex: |
785 | 0 | return MachineOperand::CreateTargetIndex(V.ImmVal, Offset, TF); |
786 | 50 | case MachineOperand::MO_ConstantPoolIndex: |
787 | 0 | return MachineOperand::CreateCPI(V.ImmVal, Offset, TF); |
788 | 50 | case MachineOperand::MO_JumpTableIndex: |
789 | 0 | assert(Offset == 0); |
790 | 0 | return MachineOperand::CreateJTI(V.ImmVal, TF); |
791 | 50 | default: |
792 | 0 | llvm_unreachable("Unhandled kind"); |
793 | 50 | } |
794 | 50 | } |
795 | | |
796 | 164 | bool HCE::isStoreImmediate(unsigned Opc) const { |
797 | 164 | switch (Opc) { |
798 | 164 | case Hexagon::S4_storeirbt_io: |
799 | 82 | case Hexagon::S4_storeirbf_io: |
800 | 82 | case Hexagon::S4_storeirht_io: |
801 | 82 | case Hexagon::S4_storeirhf_io: |
802 | 82 | case Hexagon::S4_storeirit_io: |
803 | 82 | case Hexagon::S4_storeirif_io: |
804 | 82 | case Hexagon::S4_storeirb_io: |
805 | 82 | case Hexagon::S4_storeirh_io: |
806 | 82 | case Hexagon::S4_storeiri_io: |
807 | 82 | return true; |
808 | 82 | default: |
809 | 82 | break; |
810 | 82 | } |
811 | 82 | return false; |
812 | 82 | } |
813 | | |
814 | 2.78k | bool HCE::isRegOffOpcode(unsigned Opc) const { |
815 | 2.78k | switch (Opc) { |
816 | 2.78k | case Hexagon::L2_loadrub_io: |
817 | 12 | case Hexagon::L2_loadrb_io: |
818 | 12 | case Hexagon::L2_loadruh_io: |
819 | 12 | case Hexagon::L2_loadrh_io: |
820 | 12 | case Hexagon::L2_loadri_io: |
821 | 12 | case Hexagon::L2_loadrd_io: |
822 | 12 | case Hexagon::L2_loadbzw2_io: |
823 | 12 | case Hexagon::L2_loadbzw4_io: |
824 | 12 | case Hexagon::L2_loadbsw2_io: |
825 | 12 | case Hexagon::L2_loadbsw4_io: |
826 | 12 | case Hexagon::L2_loadalignh_io: |
827 | 12 | case Hexagon::L2_loadalignb_io: |
828 | 12 | case Hexagon::L2_ploadrubt_io: |
829 | 12 | case Hexagon::L2_ploadrubf_io: |
830 | 12 | case Hexagon::L2_ploadrbt_io: |
831 | 12 | case Hexagon::L2_ploadrbf_io: |
832 | 12 | case Hexagon::L2_ploadruht_io: |
833 | 12 | case Hexagon::L2_ploadruhf_io: |
834 | 12 | case Hexagon::L2_ploadrht_io: |
835 | 12 | case Hexagon::L2_ploadrhf_io: |
836 | 12 | case Hexagon::L2_ploadrit_io: |
837 | 12 | case Hexagon::L2_ploadrif_io: |
838 | 12 | case Hexagon::L2_ploadrdt_io: |
839 | 12 | case Hexagon::L2_ploadrdf_io: |
840 | 12 | case Hexagon::S2_storerb_io: |
841 | 12 | case Hexagon::S2_storerh_io: |
842 | 12 | case Hexagon::S2_storerf_io: |
843 | 12 | case Hexagon::S2_storeri_io: |
844 | 12 | case Hexagon::S2_storerd_io: |
845 | 12 | case Hexagon::S2_pstorerbt_io: |
846 | 12 | case Hexagon::S2_pstorerbf_io: |
847 | 12 | case Hexagon::S2_pstorerht_io: |
848 | 12 | case Hexagon::S2_pstorerhf_io: |
849 | 12 | case Hexagon::S2_pstorerft_io: |
850 | 12 | case Hexagon::S2_pstorerff_io: |
851 | 12 | case Hexagon::S2_pstorerit_io: |
852 | 12 | case Hexagon::S2_pstorerif_io: |
853 | 12 | case Hexagon::S2_pstorerdt_io: |
854 | 12 | case Hexagon::S2_pstorerdf_io: |
855 | 12 | case Hexagon::A2_addi: |
856 | 12 | return true; |
857 | 2.77k | default: |
858 | 2.77k | break; |
859 | 2.77k | } |
860 | 2.77k | return false; |
861 | 2.77k | } |
862 | | |
863 | 1.24k | unsigned HCE::getRegOffOpcode(unsigned ExtOpc) const { |
864 | 1.24k | // If there exists an instruction that takes a register and offset, |
865 | 1.24k | // that corresponds to the ExtOpc, return it, otherwise return 0. |
866 | 1.24k | using namespace Hexagon; |
867 | 1.24k | switch (ExtOpc) { |
868 | 1.24k | case A2_tfrsi: return A2_addi107 ; |
869 | 1.24k | default: |
870 | 1.13k | break; |
871 | 1.13k | } |
872 | 1.13k | const MCInstrDesc &D = HII->get(ExtOpc); |
873 | 1.13k | if (D.mayLoad() || D.mayStore()634 ) { |
874 | 958 | uint64_t F = D.TSFlags; |
875 | 958 | unsigned AM = (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; |
876 | 958 | switch (AM) { |
877 | 958 | case HexagonII::Absolute: |
878 | 873 | case HexagonII::AbsoluteSet: |
879 | 873 | case HexagonII::BaseLongOffset: |
880 | 873 | switch (ExtOpc) { |
881 | 873 | case PS_loadrubabs: |
882 | 279 | case L4_loadrub_ap: |
883 | 279 | case L4_loadrub_ur: return L2_loadrub_io; |
884 | 279 | case PS_loadrbabs: |
885 | 11 | case L4_loadrb_ap: |
886 | 11 | case L4_loadrb_ur: return L2_loadrb_io; |
887 | 32 | case PS_loadruhabs: |
888 | 32 | case L4_loadruh_ap: |
889 | 32 | case L4_loadruh_ur: return L2_loadruh_io; |
890 | 32 | case PS_loadrhabs: |
891 | 7 | case L4_loadrh_ap: |
892 | 7 | case L4_loadrh_ur: return L2_loadrh_io; |
893 | 131 | case PS_loadriabs: |
894 | 131 | case L4_loadri_ap: |
895 | 131 | case L4_loadri_ur: return L2_loadri_io; |
896 | 131 | case PS_loadrdabs: |
897 | 24 | case L4_loadrd_ap: |
898 | 24 | case L4_loadrd_ur: return L2_loadrd_io; |
899 | 24 | case L4_loadbzw2_ap: |
900 | 0 | case L4_loadbzw2_ur: return L2_loadbzw2_io; |
901 | 0 | case L4_loadbzw4_ap: |
902 | 0 | case L4_loadbzw4_ur: return L2_loadbzw4_io; |
903 | 0 | case L4_loadbsw2_ap: |
904 | 0 | case L4_loadbsw2_ur: return L2_loadbsw2_io; |
905 | 0 | case L4_loadbsw4_ap: |
906 | 0 | case L4_loadbsw4_ur: return L2_loadbsw4_io; |
907 | 0 | case L4_loadalignh_ap: |
908 | 0 | case L4_loadalignh_ur: return L2_loadalignh_io; |
909 | 0 | case L4_loadalignb_ap: |
910 | 0 | case L4_loadalignb_ur: return L2_loadalignb_io; |
911 | 0 | case L4_ploadrubt_abs: return L2_ploadrubt_io; |
912 | 0 | case L4_ploadrubf_abs: return L2_ploadrubf_io; |
913 | 0 | case L4_ploadrbt_abs: return L2_ploadrbt_io; |
914 | 0 | case L4_ploadrbf_abs: return L2_ploadrbf_io; |
915 | 0 | case L4_ploadruht_abs: return L2_ploadruht_io; |
916 | 0 | case L4_ploadruhf_abs: return L2_ploadruhf_io; |
917 | 0 | case L4_ploadrht_abs: return L2_ploadrht_io; |
918 | 0 | case L4_ploadrhf_abs: return L2_ploadrhf_io; |
919 | 0 | case L4_ploadrit_abs: return L2_ploadrit_io; |
920 | 0 | case L4_ploadrif_abs: return L2_ploadrif_io; |
921 | 0 | case L4_ploadrdt_abs: return L2_ploadrdt_io; |
922 | 0 | case L4_ploadrdf_abs: return L2_ploadrdf_io; |
923 | 191 | case PS_storerbabs: |
924 | 191 | case S4_storerb_ap: |
925 | 191 | case S4_storerb_ur: return S2_storerb_io; |
926 | 191 | case PS_storerhabs: |
927 | 72 | case S4_storerh_ap: |
928 | 72 | case S4_storerh_ur: return S2_storerh_io; |
929 | 72 | case PS_storerfabs: |
930 | 0 | case S4_storerf_ap: |
931 | 0 | case S4_storerf_ur: return S2_storerf_io; |
932 | 69 | case PS_storeriabs: |
933 | 69 | case S4_storeri_ap: |
934 | 69 | case S4_storeri_ur: return S2_storeri_io; |
935 | 69 | case PS_storerdabs: |
936 | 50 | case S4_storerd_ap: |
937 | 50 | case S4_storerd_ur: return S2_storerd_io; |
938 | 50 | case S4_pstorerbt_abs: return S2_pstorerbt_io4 ; |
939 | 50 | case S4_pstorerbf_abs: return S2_pstorerbf_io0 ; |
940 | 50 | case S4_pstorerht_abs: return S2_pstorerht_io1 ; |
941 | 50 | case S4_pstorerhf_abs: return S2_pstorerhf_io0 ; |
942 | 50 | case S4_pstorerft_abs: return S2_pstorerft_io0 ; |
943 | 50 | case S4_pstorerff_abs: return S2_pstorerff_io0 ; |
944 | 50 | case S4_pstorerit_abs: return S2_pstorerit_io1 ; |
945 | 50 | case S4_pstorerif_abs: return S2_pstorerif_io0 ; |
946 | 50 | case S4_pstorerdt_abs: return S2_pstorerdt_io0 ; |
947 | 50 | case S4_pstorerdf_abs: return S2_pstorerdf_io1 ; |
948 | 50 | default: |
949 | 0 | break; |
950 | 0 | } |
951 | 0 | break; |
952 | 85 | case HexagonII::BaseImmOffset: |
953 | 85 | if (!isStoreImmediate(ExtOpc)) |
954 | 44 | return ExtOpc; |
955 | 41 | break; |
956 | 41 | default: |
957 | 0 | break; |
958 | 221 | } |
959 | 221 | } |
960 | 221 | return 0; |
961 | 221 | } |
962 | | |
963 | 37 | unsigned HCE::getDirectRegReplacement(unsigned ExtOpc) const { |
964 | 37 | switch (ExtOpc) { |
965 | 37 | case Hexagon::A2_addi: return Hexagon::A2_add0 ; |
966 | 37 | case Hexagon::A2_andir: return Hexagon::A2_and0 ; |
967 | 37 | case Hexagon::A2_combineii: return Hexagon::A4_combineri0 ; |
968 | 37 | case Hexagon::A2_orir: return Hexagon::A2_or0 ; |
969 | 37 | case Hexagon::A2_paddif: return Hexagon::A2_paddf0 ; |
970 | 37 | case Hexagon::A2_paddit: return Hexagon::A2_paddt0 ; |
971 | 37 | case Hexagon::A2_subri: return Hexagon::A2_sub0 ; |
972 | 37 | case Hexagon::A2_tfrsi: return TargetOpcode::COPY3 ; |
973 | 37 | case Hexagon::A4_cmpbeqi: return Hexagon::A4_cmpbeq0 ; |
974 | 37 | case Hexagon::A4_cmpbgti: return Hexagon::A4_cmpbgt0 ; |
975 | 37 | case Hexagon::A4_cmpbgtui: return Hexagon::A4_cmpbgtu0 ; |
976 | 37 | case Hexagon::A4_cmpheqi: return Hexagon::A4_cmpheq0 ; |
977 | 37 | case Hexagon::A4_cmphgti: return Hexagon::A4_cmphgt0 ; |
978 | 37 | case Hexagon::A4_cmphgtui: return Hexagon::A4_cmphgtu0 ; |
979 | 37 | case Hexagon::A4_combineii: return Hexagon::A4_combineir0 ; |
980 | 37 | case Hexagon::A4_combineir: return TargetOpcode::REG_SEQUENCE0 ; |
981 | 37 | case Hexagon::A4_combineri: return TargetOpcode::REG_SEQUENCE0 ; |
982 | 37 | case Hexagon::A4_rcmpeqi: return Hexagon::A4_rcmpeq0 ; |
983 | 37 | case Hexagon::A4_rcmpneqi: return Hexagon::A4_rcmpneq0 ; |
984 | 37 | case Hexagon::C2_cmoveif: return Hexagon::A2_tfrpf0 ; |
985 | 37 | case Hexagon::C2_cmoveit: return Hexagon::A2_tfrpt0 ; |
986 | 37 | case Hexagon::C2_cmpeqi: return Hexagon::C2_cmpeq0 ; |
987 | 37 | case Hexagon::C2_cmpgti: return Hexagon::C2_cmpgt0 ; |
988 | 37 | case Hexagon::C2_cmpgtui: return Hexagon::C2_cmpgtu0 ; |
989 | 37 | case Hexagon::C2_muxii: return Hexagon::C2_muxir32 ; |
990 | 37 | case Hexagon::C2_muxir: return Hexagon::C2_mux0 ; |
991 | 37 | case Hexagon::C2_muxri: return Hexagon::C2_mux1 ; |
992 | 37 | case Hexagon::C4_cmpltei: return Hexagon::C4_cmplte0 ; |
993 | 37 | case Hexagon::C4_cmplteui: return Hexagon::C4_cmplteu0 ; |
994 | 37 | case Hexagon::C4_cmpneqi: return Hexagon::C4_cmpneq0 ; |
995 | 37 | case Hexagon::M2_accii: return Hexagon::M2_acci0 ; // T -> T |
996 | 37 | /* No M2_macsin */ |
997 | 37 | case Hexagon::M2_macsip: return Hexagon::M2_maci0 ; // T -> T |
998 | 37 | case Hexagon::M2_mpysin: return Hexagon::M2_mpyi0 ; |
999 | 37 | case Hexagon::M2_mpysip: return Hexagon::M2_mpyi0 ; |
1000 | 37 | case Hexagon::M2_mpysmi: return Hexagon::M2_mpyi0 ; |
1001 | 37 | case Hexagon::M2_naccii: return Hexagon::M2_nacci0 ; // T -> T |
1002 | 37 | case Hexagon::M4_mpyri_addi: return Hexagon::M4_mpyri_addr0 ; |
1003 | 37 | case Hexagon::M4_mpyri_addr: return Hexagon::M4_mpyrr_addr0 ; // _ -> T |
1004 | 37 | case Hexagon::M4_mpyrr_addi: return Hexagon::M4_mpyrr_addr0 ; // _ -> T |
1005 | 37 | case Hexagon::S4_addaddi: return Hexagon::M2_acci0 ; // _ -> T |
1006 | 37 | case Hexagon::S4_addi_asl_ri: return Hexagon::S2_asl_i_r_acc0 ; // T -> T |
1007 | 37 | case Hexagon::S4_addi_lsr_ri: return Hexagon::S2_lsr_i_r_acc0 ; // T -> T |
1008 | 37 | case Hexagon::S4_andi_asl_ri: return Hexagon::S2_asl_i_r_and0 ; // T -> T |
1009 | 37 | case Hexagon::S4_andi_lsr_ri: return Hexagon::S2_lsr_i_r_and0 ; // T -> T |
1010 | 37 | case Hexagon::S4_ori_asl_ri: return Hexagon::S2_asl_i_r_or0 ; // T -> T |
1011 | 37 | case Hexagon::S4_ori_lsr_ri: return Hexagon::S2_lsr_i_r_or0 ; // T -> T |
1012 | 37 | case Hexagon::S4_subaddi: return Hexagon::M2_subacc0 ; // _ -> T |
1013 | 37 | case Hexagon::S4_subi_asl_ri: return Hexagon::S2_asl_i_r_nac0 ; // T -> T |
1014 | 37 | case Hexagon::S4_subi_lsr_ri: return Hexagon::S2_lsr_i_r_nac0 ; // T -> T |
1015 | 37 | |
1016 | 37 | // Store-immediates: |
1017 | 37 | case Hexagon::S4_storeirbf_io: return Hexagon::S2_pstorerbf_io0 ; |
1018 | 37 | case Hexagon::S4_storeirb_io: return Hexagon::S2_storerb_io0 ; |
1019 | 37 | case Hexagon::S4_storeirbt_io: return Hexagon::S2_pstorerbt_io0 ; |
1020 | 37 | case Hexagon::S4_storeirhf_io: return Hexagon::S2_pstorerhf_io0 ; |
1021 | 37 | case Hexagon::S4_storeirh_io: return Hexagon::S2_storerh_io0 ; |
1022 | 37 | case Hexagon::S4_storeirht_io: return Hexagon::S2_pstorerht_io0 ; |
1023 | 37 | case Hexagon::S4_storeirif_io: return Hexagon::S2_pstorerif_io0 ; |
1024 | 37 | case Hexagon::S4_storeiri_io: return Hexagon::S2_storeri_io0 ; |
1025 | 37 | case Hexagon::S4_storeirit_io: return Hexagon::S2_pstorerit_io0 ; |
1026 | 37 | |
1027 | 37 | default: |
1028 | 1 | break; |
1029 | 1 | } |
1030 | 1 | return 0; |
1031 | 1 | } |
1032 | | |
1033 | | // Return the allowable deviation from the current value of Rb (i.e. the |
1034 | | // range of values that can be added to the current value) which the |
1035 | | // instruction MI can accommodate. |
1036 | | // The instruction MI is a user of register Rb, which is defined via an |
1037 | | // extender. It may be possible for MI to be tweaked to work for a register |
1038 | | // defined with a slightly different value. For example |
1039 | | // ... = L2_loadrub_io Rb, 1 |
1040 | | // can be modifed to be |
1041 | | // ... = L2_loadrub_io Rb', 0 |
1042 | | // if Rb' = Rb+1. |
1043 | | // The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range |
1044 | | // for L2_loadrub with offset 0. That means that Rb could be replaced with |
1045 | | // Rc, where Rc-Rb belongs to [Min+1, Max+1]. |
1046 | 2.78k | OffsetRange HCE::getOffsetRange(Register Rb, const MachineInstr &MI) const { |
1047 | 2.78k | unsigned Opc = MI.getOpcode(); |
1048 | 2.78k | // Instructions that are constant-extended may be replaced with something |
1049 | 2.78k | // else that no longer offers the same range as the original. |
1050 | 2.78k | if (!isRegOffOpcode(Opc) || HII->isConstExtended(MI)12 ) |
1051 | 2.77k | return OffsetRange::zero(); |
1052 | 12 | |
1053 | 12 | if (Opc == Hexagon::A2_addi) { |
1054 | 1 | const MachineOperand &Op1 = MI.getOperand(1), &Op2 = MI.getOperand(2); |
1055 | 1 | if (Rb != Register(Op1) || !Op2.isImm()) |
1056 | 0 | return OffsetRange::zero(); |
1057 | 1 | OffsetRange R = { -(1<<15)+1, (1<<15)-1, 1 }; |
1058 | 1 | return R.shift(Op2.getImm()); |
1059 | 1 | } |
1060 | 11 | |
1061 | 11 | // HII::getBaseAndOffsetPosition returns the increment position as "offset". |
1062 | 11 | if (HII->isPostIncrement(MI)) |
1063 | 0 | return OffsetRange::zero(); |
1064 | 11 | |
1065 | 11 | const MCInstrDesc &D = HII->get(Opc); |
1066 | 11 | assert(D.mayLoad() || D.mayStore()); |
1067 | 11 | |
1068 | 11 | unsigned BaseP, OffP; |
1069 | 11 | if (!HII->getBaseAndOffsetPosition(MI, BaseP, OffP) || |
1070 | 11 | Rb != Register(MI.getOperand(BaseP))7 || |
1071 | 11 | !MI.getOperand(OffP).isImm()5 ) |
1072 | 6 | return OffsetRange::zero(); |
1073 | 5 | |
1074 | 5 | uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) & |
1075 | 5 | HexagonII::MemAccesSizeMask; |
1076 | 5 | uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F)); |
1077 | 5 | unsigned L = Log2_32(A); |
1078 | 5 | unsigned S = 10+L; // sint11_L |
1079 | 5 | int32_t Min = -alignDown((1<<S)-1, A); |
1080 | 5 | |
1081 | 5 | // The range will be shifted by Off. To prefer non-negative offsets, |
1082 | 5 | // adjust Max accordingly. |
1083 | 5 | int32_t Off = MI.getOperand(OffP).getImm(); |
1084 | 5 | int32_t Max = Off >= 0 ? 04 : -Off1 ; |
1085 | 5 | |
1086 | 5 | OffsetRange R = { Min, Max, A }; |
1087 | 5 | return R.shift(Off); |
1088 | 5 | } |
1089 | | |
1090 | | // Return the allowable deviation from the current value of the extender ED, |
1091 | | // for which the instruction corresponding to ED can be modified without |
1092 | | // using an extender. |
1093 | | // The instruction uses the extender directly. It will be replaced with |
1094 | | // another instruction, say MJ, where the extender will be replaced with a |
1095 | | // register. MJ can allow some variability with respect to the value of |
1096 | | // that register, as is the case with indexed memory instructions. |
1097 | 762 | OffsetRange HCE::getOffsetRange(const ExtDesc &ED) const { |
1098 | 762 | // The only way that there can be a non-zero range available is if |
1099 | 762 | // the instruction using ED will be converted to an indexed memory |
1100 | 762 | // instruction. |
1101 | 762 | unsigned IdxOpc = getRegOffOpcode(ED.UseMI->getOpcode()); |
1102 | 762 | switch (IdxOpc) { |
1103 | 762 | case 0: |
1104 | 221 | return OffsetRange::zero(); |
1105 | 762 | case Hexagon::A2_addi: // s16 |
1106 | 0 | return { -32767, 32767, 1 }; |
1107 | 762 | case Hexagon::A2_subri: // s10 |
1108 | 0 | return { -511, 511, 1 }; |
1109 | 541 | } |
1110 | 541 | |
1111 | 541 | if (!ED.UseMI->mayLoad() && !ED.UseMI->mayStore()236 ) |
1112 | 0 | return OffsetRange::zero(); |
1113 | 541 | const MCInstrDesc &D = HII->get(IdxOpc); |
1114 | 541 | uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) & |
1115 | 541 | HexagonII::MemAccesSizeMask; |
1116 | 541 | uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F)); |
1117 | 541 | unsigned L = Log2_32(A); |
1118 | 541 | unsigned S = 10+L; // sint11_L |
1119 | 541 | int32_t Min = -alignDown((1<<S)-1, A); |
1120 | 541 | int32_t Max = 0; // Force non-negative offsets. |
1121 | 541 | return { Min, Max, A }; |
1122 | 541 | } |
1123 | | |
1124 | | // Get the allowable deviation from the current value of Rd by checking |
1125 | | // all uses of Rd. |
1126 | 1.21k | OffsetRange HCE::getOffsetRange(Register Rd) const { |
1127 | 1.21k | OffsetRange Range; |
1128 | 2.78k | for (const MachineOperand &Op : MRI->use_operands(Rd.Reg)) { |
1129 | 2.78k | // Make sure that the register being used by this operand is identical |
1130 | 2.78k | // to the register that was defined: using a different subregister |
1131 | 2.78k | // precludes any non-trivial range. |
1132 | 2.78k | if (Rd != Register(Op)) |
1133 | 0 | return OffsetRange::zero(); |
1134 | 2.78k | Range.intersect(getOffsetRange(Rd, *Op.getParent())); |
1135 | 2.78k | } |
1136 | 1.21k | return Range; |
1137 | 1.21k | } |
1138 | | |
1139 | 1.98k | void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { |
1140 | 1.98k | unsigned Opc = MI.getOpcode(); |
1141 | 1.98k | ExtDesc ED; |
1142 | 1.98k | ED.OpNum = OpNum; |
1143 | 1.98k | |
1144 | 1.98k | bool IsLoad = MI.mayLoad(); |
1145 | 1.98k | bool IsStore = MI.mayStore(); |
1146 | 1.98k | |
1147 | 1.98k | // Fixed stack slots have negative indexes, and they cannot be used |
1148 | 1.98k | // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat |
1149 | 1.98k | // unfortunate, but should not be a frequent thing. |
1150 | 1.98k | for (MachineOperand &Op : MI.operands()) |
1151 | 4.41k | if (Op.isFI() && Op.getIndex() < 031 ) |
1152 | 1 | return; |
1153 | 1.98k | |
1154 | 1.98k | if (1.98k IsLoad1.98k || IsStore1.67k ) { |
1155 | 582 | unsigned AM = HII->getAddrMode(MI); |
1156 | 582 | switch (AM) { |
1157 | 582 | // (Re: ##Off + Rb<<S) = Rd: ##Val |
1158 | 582 | case HexagonII::Absolute: // (__: ## + __<<_) |
1159 | 456 | break; |
1160 | 582 | case HexagonII::AbsoluteSet: // (Rd: ## + __<<_) |
1161 | 0 | ED.Rd = MI.getOperand(OpNum-1); |
1162 | 0 | ED.IsDef = true; |
1163 | 0 | break; |
1164 | 582 | case HexagonII::BaseImmOffset: // (__: ## + Rs<<0) |
1165 | 78 | // Store-immediates are treated as non-memory operations, since |
1166 | 78 | // it's the value being stored that is extended (as opposed to |
1167 | 78 | // a part of the address). |
1168 | 78 | if (!isStoreImmediate(Opc)) |
1169 | 37 | ED.Expr.Rs = MI.getOperand(OpNum-1); |
1170 | 78 | break; |
1171 | 582 | case HexagonII::BaseLongOffset: // (__: ## + Rs<<S) |
1172 | 48 | ED.Expr.Rs = MI.getOperand(OpNum-2); |
1173 | 48 | ED.Expr.S = MI.getOperand(OpNum-1).getImm(); |
1174 | 48 | break; |
1175 | 582 | default: |
1176 | 0 | llvm_unreachable("Unhandled memory instruction"); |
1177 | 1.40k | } |
1178 | 1.40k | } else { |
1179 | 1.40k | switch (Opc) { |
1180 | 1.40k | case Hexagon::A2_tfrsi: // (Rd: ## + __<<_) |
1181 | 1.22k | ED.Rd = MI.getOperand(0); |
1182 | 1.22k | ED.IsDef = true; |
1183 | 1.22k | break; |
1184 | 1.40k | case Hexagon::A2_combineii: // (Rd: ## + __<<_) |
1185 | 0 | case Hexagon::A4_combineir: |
1186 | 0 | ED.Rd = { MI.getOperand(0).getReg(), Hexagon::isub_hi }; |
1187 | 0 | ED.IsDef = true; |
1188 | 0 | break; |
1189 | 0 | case Hexagon::A4_combineri: // (Rd: ## + __<<_) |
1190 | 0 | ED.Rd = { MI.getOperand(0).getReg(), Hexagon::isub_lo }; |
1191 | 0 | ED.IsDef = true; |
1192 | 0 | break; |
1193 | 22 | case Hexagon::A2_addi: // (Rd: ## + Rs<<0) |
1194 | 22 | ED.Rd = MI.getOperand(0); |
1195 | 22 | ED.Expr.Rs = MI.getOperand(OpNum-1); |
1196 | 22 | break; |
1197 | 11 | case Hexagon::M2_accii: // (__: ## + Rs<<0) |
1198 | 11 | case Hexagon::M2_naccii: |
1199 | 11 | case Hexagon::S4_addaddi: |
1200 | 11 | ED.Expr.Rs = MI.getOperand(OpNum-1); |
1201 | 11 | break; |
1202 | 11 | case Hexagon::A2_subri: // (Rd: ## - Rs<<0) |
1203 | 0 | ED.Rd = MI.getOperand(0); |
1204 | 0 | ED.Expr.Rs = MI.getOperand(OpNum+1); |
1205 | 0 | ED.Expr.Neg = true; |
1206 | 0 | break; |
1207 | 11 | case Hexagon::S4_subaddi: // (__: ## - Rs<<0) |
1208 | 5 | ED.Expr.Rs = MI.getOperand(OpNum+1); |
1209 | 5 | ED.Expr.Neg = true; |
1210 | 5 | break; |
1211 | 142 | default: // (__: ## + __<<_) |
1212 | 142 | break; |
1213 | 1.98k | } |
1214 | 1.98k | } |
1215 | 1.98k | |
1216 | 1.98k | ED.UseMI = &MI; |
1217 | 1.98k | |
1218 | 1.98k | // Ignore unnamed globals. |
1219 | 1.98k | ExtRoot ER(ED.getOp()); |
1220 | 1.98k | if (ER.Kind == MachineOperand::MO_GlobalAddress) |
1221 | 1.27k | if (ER.V.GV->getName().empty()) |
1222 | 2 | return; |
1223 | 1.98k | Extenders.push_back(ED); |
1224 | 1.98k | } |
1225 | | |
1226 | 36.0k | void HCE::collectInstr(MachineInstr &MI) { |
1227 | 36.0k | if (!HII->isConstExtended(MI)) |
1228 | 34.0k | return; |
1229 | 2.02k | |
1230 | 2.02k | // Skip some non-convertible instructions. |
1231 | 2.02k | unsigned Opc = MI.getOpcode(); |
1232 | 2.02k | switch (Opc) { |
1233 | 2.02k | case Hexagon::M2_macsin: // There is no Rx -= mpyi(Rs,Rt). |
1234 | 41 | case Hexagon::C4_addipc: |
1235 | 41 | case Hexagon::S4_or_andi: |
1236 | 41 | case Hexagon::S4_or_andix: |
1237 | 41 | case Hexagon::S4_or_ori: |
1238 | 41 | return; |
1239 | 1.98k | } |
1240 | 1.98k | recordExtender(MI, HII->getCExtOpNum(MI)); |
1241 | 1.98k | } |
1242 | | |
1243 | 3.35k | void HCE::collect(MachineFunction &MF) { |
1244 | 3.35k | Extenders.clear(); |
1245 | 4.97k | for (MachineBasicBlock &MBB : MF) { |
1246 | 4.97k | // Skip unreachable blocks. |
1247 | 4.97k | if (MBB.getNumber() == -1) |
1248 | 0 | continue; |
1249 | 4.97k | for (MachineInstr &MI : MBB) |
1250 | 36.0k | collectInstr(MI); |
1251 | 4.97k | } |
1252 | 3.35k | } |
1253 | | |
1254 | | void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, |
1255 | 1.27k | AssignmentMap &IMap) { |
1256 | 1.27k | // Sanity check: make sure that all extenders in the range [Begin..End) |
1257 | 1.27k | // share the same root ER. |
1258 | 3.25k | for (unsigned I = Begin; I != End; ++I1.98k ) |
1259 | 1.27k | assert(ER == ExtRoot(Extenders[I].getOp())); |
1260 | 1.27k | |
1261 | 1.27k | // Construct the list of ranges, such that for each P in Ranges[I], |
1262 | 1.27k | // a register Reg = ER+P can be used in place of Extender[I]. If the |
1263 | 1.27k | // instruction allows, uses in the form of Reg+Off are considered |
1264 | 1.27k | // (here, Off = required_value - P). |
1265 | 1.27k | std::vector<OffsetRange> Ranges(End-Begin); |
1266 | 1.27k | |
1267 | 1.27k | // For each extender that is a def, visit all uses of the defined register, |
1268 | 1.27k | // and produce an offset range that works for all uses. The def doesn't |
1269 | 1.27k | // have to be checked, because it can become dead if all uses can be updated |
1270 | 1.27k | // to use a different reg/offset. |
1271 | 3.25k | for (unsigned I = Begin; I != End; ++I1.98k ) { |
1272 | 1.98k | const ExtDesc &ED = Extenders[I]; |
1273 | 1.98k | if (!ED.IsDef) |
1274 | 762 | continue; |
1275 | 1.21k | ExtValue EV(ED); |
1276 | 1.21k | LLVM_DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n'); |
1277 | 1.21k | assert(ED.Rd.Reg != 0); |
1278 | 1.21k | Ranges[I-Begin] = getOffsetRange(ED.Rd).shift(EV.Offset); |
1279 | 1.21k | // A2_tfrsi is a special case: it will be replaced with A2_addi, which |
1280 | 1.21k | // has a 16-bit signed offset. This means that A2_tfrsi not only has a |
1281 | 1.21k | // range coming from its uses, but also from the fact that its replacement |
1282 | 1.21k | // has a range as well. |
1283 | 1.21k | if (ED.UseMI->getOpcode() == Hexagon::A2_tfrsi) { |
1284 | 1.21k | int32_t D = alignDown(32767, Ranges[I-Begin].Align); // XXX hardcoded |
1285 | 1.21k | Ranges[I-Begin].extendBy(-D).extendBy(D); |
1286 | 1.21k | } |
1287 | 1.21k | } |
1288 | 1.27k | |
1289 | 1.27k | // Visit all non-def extenders. For each one, determine the offset range |
1290 | 1.27k | // available for it. |
1291 | 3.25k | for (unsigned I = Begin; I != End; ++I1.98k ) { |
1292 | 1.98k | const ExtDesc &ED = Extenders[I]; |
1293 | 1.98k | if (ED.IsDef) |
1294 | 1.21k | continue; |
1295 | 762 | ExtValue EV(ED); |
1296 | 762 | LLVM_DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n'); |
1297 | 762 | OffsetRange Dev = getOffsetRange(ED); |
1298 | 762 | Ranges[I-Begin].intersect(Dev.shift(EV.Offset)); |
1299 | 762 | } |
1300 | 1.27k | |
1301 | 1.27k | // Here for each I there is a corresponding Range[I]. Construct the |
1302 | 1.27k | // inverse map, that to each range will assign the set of indexes in |
1303 | 1.27k | // [Begin..End) that this range corresponds to. |
1304 | 1.27k | std::map<OffsetRange, IndexList> RangeMap; |
1305 | 3.25k | for (unsigned I = Begin; I != End; ++I1.98k ) |
1306 | 1.98k | RangeMap[Ranges[I-Begin]].insert(I); |
1307 | 1.27k | |
1308 | 1.27k | LLVM_DEBUG({ |
1309 | 1.27k | dbgs() << "Ranges\n"; |
1310 | 1.27k | for (unsigned I = Begin; I != End; ++I) |
1311 | 1.27k | dbgs() << " " << I << ". " << Ranges[I-Begin] << '\n'; |
1312 | 1.27k | dbgs() << "RangeMap\n"; |
1313 | 1.27k | for (auto &P : RangeMap) { |
1314 | 1.27k | dbgs() << " " << P.first << " ->"; |
1315 | 1.27k | for (unsigned I : P.second) |
1316 | 1.27k | dbgs() << ' ' << I; |
1317 | 1.27k | dbgs() << '\n'; |
1318 | 1.27k | } |
1319 | 1.27k | }); |
1320 | 1.27k | |
1321 | 1.27k | // Select the definition points, and generate the assignment between |
1322 | 1.27k | // these points and the uses. |
1323 | 1.27k | |
1324 | 1.27k | // For each candidate offset, keep a pair CandData consisting of |
1325 | 1.27k | // the total number of ranges containing that candidate, and the |
1326 | 1.27k | // vector of corresponding RangeTree nodes. |
1327 | 1.27k | using CandData = std::pair<unsigned, SmallVector<RangeTree::Node*,8>>; |
1328 | 1.27k | std::map<int32_t, CandData> CandMap; |
1329 | 1.27k | |
1330 | 1.27k | RangeTree Tree; |
1331 | 1.27k | for (const OffsetRange &R : Ranges) |
1332 | 1.98k | Tree.add(R); |
1333 | 1.27k | SmallVector<RangeTree::Node*,8> Nodes; |
1334 | 1.27k | Tree.order(Nodes); |
1335 | 1.27k | |
1336 | 1.27k | auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes, |
1337 | 3.54k | uint8_t Align, uint8_t Offset) { |
1338 | 13.2k | for (RangeTree::Node *N : Nodes) { |
1339 | 13.2k | if (N->Range.Align <= Align || N->Range.Offset < Offset61 ) |
1340 | 13.1k | continue; |
1341 | 61 | if ((N->Range.Offset - Offset) % Align != 0) |
1342 | 0 | continue; |
1343 | 61 | Align = N->Range.Align; |
1344 | 61 | Offset = N->Range.Offset; |
1345 | 61 | } |
1346 | 3.54k | return std::make_pair(Align, Offset); |
1347 | 3.54k | }; |
1348 | 1.27k | |
1349 | 1.27k | // Construct the set of all potential definition points from the endpoints |
1350 | 1.27k | // of the ranges. If a given endpoint also belongs to a different range, |
1351 | 1.27k | // but with a higher alignment, also consider the more-highly-aligned |
1352 | 1.27k | // value of this endpoint. |
1353 | 1.27k | std::set<int32_t> CandSet; |
1354 | 1.77k | for (RangeTree::Node *N : Nodes) { |
1355 | 1.77k | const OffsetRange &R = N->Range; |
1356 | 1.77k | auto P0 = MaxAlign(Tree.nodesWith(R.Min, false), R.Align, R.Offset); |
1357 | 1.77k | CandSet.insert(R.Min); |
1358 | 1.77k | if (R.Align < P0.first) |
1359 | 38 | CandSet.insert(adjustUp(R.Min, P0.first, P0.second)); |
1360 | 1.77k | auto P1 = MaxAlign(Tree.nodesWith(R.Max, false), R.Align, R.Offset); |
1361 | 1.77k | CandSet.insert(R.Max); |
1362 | 1.77k | if (R.Align < P1.first) |
1363 | 23 | CandSet.insert(adjustDown(R.Max, P1.first, P1.second)); |
1364 | 1.77k | } |
1365 | 1.27k | |
1366 | 1.27k | // Build the assignment map: candidate C -> { list of extender indexes }. |
1367 | 1.27k | // This has to be done iteratively: |
1368 | 1.27k | // - pick the candidate that covers the maximum number of extenders, |
1369 | 1.27k | // - add the candidate to the map, |
1370 | 1.27k | // - remove the extenders from the pool. |
1371 | 2.64k | while (true) { |
1372 | 2.64k | using CMap = std::map<int32_t,unsigned>; |
1373 | 2.64k | CMap Counts; |
1374 | 9.70k | for (auto It = CandSet.begin(), Et = CandSet.end(); It != Et; ) { |
1375 | 7.05k | auto &&V = Tree.nodesWith(*It); |
1376 | 7.05k | unsigned N = std::accumulate(V.begin(), V.end(), 0u, |
1377 | 13.2k | [](unsigned Acc, const RangeTree::Node *N) { |
1378 | 13.2k | return Acc + N->Count; |
1379 | 13.2k | }); |
1380 | 7.05k | if (N != 0) |
1381 | 3.67k | Counts.insert({*It, N}); |
1382 | 7.05k | It = (N != 0) ? std::next(It)3.67k : CandSet.erase(It)3.37k ; |
1383 | 7.05k | } |
1384 | 2.64k | if (Counts.empty()) |
1385 | 1.27k | break; |
1386 | 1.36k | |
1387 | 1.36k | // Find the best candidate with respect to the number of extenders covered. |
1388 | 1.36k | auto BestIt = std::max_element(Counts.begin(), Counts.end(), |
1389 | 2.31k | [](const CMap::value_type &A, const CMap::value_type &B) { |
1390 | 2.31k | return A.second < B.second || |
1391 | 2.31k | (1.90k A.second == B.second1.90k && A < B1.48k ); |
1392 | 2.31k | }); |
1393 | 1.36k | int32_t Best = BestIt->first; |
1394 | 1.36k | ExtValue BestV(ER, Best); |
1395 | 1.77k | for (RangeTree::Node *N : Tree.nodesWith(Best)) { |
1396 | 1.77k | for (unsigned I : RangeMap[N->Range]) |
1397 | 1.98k | IMap[{BestV,Extenders[I].Expr}].insert(I); |
1398 | 1.77k | Tree.erase(N); |
1399 | 1.77k | } |
1400 | 1.36k | } |
1401 | 1.27k | |
1402 | 1.27k | LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI)); |
1403 | 1.27k | |
1404 | 1.27k | // There is some ambiguity in what initializer should be used, if the |
1405 | 1.27k | // descriptor's subexpression is non-trivial: it can be the entire |
1406 | 1.27k | // subexpression (which is what has been done so far), or it can be |
1407 | 1.27k | // the extender's value itself, if all corresponding extenders have the |
1408 | 1.27k | // exact value of the initializer (i.e. require offset of 0). |
1409 | 1.27k | |
1410 | 1.27k | // To reduce the number of initializers, merge such special cases. |
1411 | 1.38k | for (std::pair<const ExtenderInit,IndexList> &P : IMap) { |
1412 | 1.38k | // Skip trivial initializers. |
1413 | 1.38k | if (P.first.second.trivial()) |
1414 | 1.27k | continue; |
1415 | 110 | // If the corresponding trivial initializer does not exist, skip this |
1416 | 110 | // entry. |
1417 | 110 | const ExtValue &EV = P.first.first; |
1418 | 110 | AssignmentMap::iterator F = IMap.find({EV, ExtExpr()}); |
1419 | 110 | if (F == IMap.end()) |
1420 | 100 | continue; |
1421 | 10 | |
1422 | 10 | // Finally, check if all extenders have the same value as the initializer. |
1423 | 10 | // Make sure that extenders that are a part of a stack address are not |
1424 | 10 | // merged with those that aren't. Stack addresses need an offset field |
1425 | 10 | // (to be used by frame index elimination), while non-stack expressions |
1426 | 10 | // can be replaced with forms (such as rr) that do not have such a field. |
1427 | 10 | // Example: |
1428 | 10 | // |
1429 | 10 | // Collected 3 extenders |
1430 | 10 | // =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def |
1431 | 10 | // 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 0 |
1432 | 10 | // 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 0 |
1433 | 10 | // Ranges |
1434 | 10 | // 0. [-756,267]a1+0 |
1435 | 10 | // 1. [-756,267]a1+0 |
1436 | 10 | // 2. [201,65735]a1+0 |
1437 | 10 | // RangeMap |
1438 | 10 | // [-756,267]a1+0 -> 0 1 |
1439 | 10 | // [201,65735]a1+0 -> 2 |
1440 | 10 | // IMap (before fixup) = { |
1441 | 10 | // [imm:0 off:267, ## + __ << 0] -> { 2 } |
1442 | 10 | // [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 } |
1443 | 10 | // } |
1444 | 10 | // IMap (after fixup) = { |
1445 | 10 | // [imm:0 off:267, ## + __ << 0] -> { 2 0 1 } |
1446 | 10 | // [imm:0 off:267, ## + SS#1 << 0] -> { } |
1447 | 10 | // } |
1448 | 10 | // Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0] |
1449 | 10 | // %12:intregs = A2_tfrsi 267 |
1450 | 10 | // |
1451 | 10 | // The result was |
1452 | 10 | // %12:intregs = A2_tfrsi 267 |
1453 | 10 | // S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4 |
1454 | 10 | // Which became |
1455 | 10 | // r0 = #267 |
1456 | 10 | // if (p0.new) memb(r0+r29<<#4) = r2 |
1457 | 10 | |
1458 | 11 | bool IsStack = any_of(F->second, [this](unsigned I) 10 { |
1459 | 11 | return Extenders[I].Expr.Rs.isSlot(); |
1460 | 11 | }); |
1461 | 10 | auto SameValue = [&EV,this,IsStack](unsigned I) { |
1462 | 10 | const ExtDesc &ED = Extenders[I]; |
1463 | 10 | return ED.Expr.Rs.isSlot() == IsStack && |
1464 | 10 | ExtValue(ED).Offset == EV.Offset9 ; |
1465 | 10 | }; |
1466 | 10 | if (all_of(P.second, SameValue)) { |
1467 | 8 | F->second.insert(P.second.begin(), P.second.end()); |
1468 | 8 | P.second.clear(); |
1469 | 8 | } |
1470 | 10 | } |
1471 | 1.27k | |
1472 | 1.27k | LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI)); |
1473 | 1.27k | } |
1474 | | |
1475 | | void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, |
1476 | 50 | LocDefList &Defs) { |
1477 | 50 | if (Refs.empty()) |
1478 | 0 | return; |
1479 | 50 | |
1480 | 50 | // The placement calculation is somewhat simple right now: it finds a |
1481 | 50 | // single location for the def that dominates all refs. Since this may |
1482 | 50 | // place the def far from the uses, producing several locations for |
1483 | 50 | // defs that collectively dominate all refs could be better. |
1484 | 50 | // For now only do the single one. |
1485 | 50 | DenseSet<MachineBasicBlock*> Blocks; |
1486 | 50 | DenseSet<MachineInstr*> RefMIs; |
1487 | 50 | const ExtDesc &ED0 = Extenders[Refs[0]]; |
1488 | 50 | MachineBasicBlock *DomB = ED0.UseMI->getParent(); |
1489 | 50 | RefMIs.insert(ED0.UseMI); |
1490 | 50 | Blocks.insert(DomB); |
1491 | 520 | for (unsigned i = 1, e = Refs.size(); i != e; ++i470 ) { |
1492 | 470 | const ExtDesc &ED = Extenders[Refs[i]]; |
1493 | 470 | MachineBasicBlock *MBB = ED.UseMI->getParent(); |
1494 | 470 | RefMIs.insert(ED.UseMI); |
1495 | 470 | DomB = MDT->findNearestCommonDominator(DomB, MBB); |
1496 | 470 | Blocks.insert(MBB); |
1497 | 470 | } |
1498 | 50 | |
1499 | | #ifndef NDEBUG |
1500 | | // The block DomB should be dominated by the def of each register used |
1501 | | // in the initializer. |
1502 | | Register Rs = ExtI.second.Rs; // Only one reg allowed now. |
1503 | | const MachineInstr *DefI = Rs.isVReg() ? MRI->getVRegDef(Rs.Reg) : nullptr; |
1504 | | |
1505 | | // This should be guaranteed given that the entire expression is used |
1506 | | // at each instruction in Refs. Add an assertion just in case. |
1507 | | assert(!DefI || MDT->dominates(DefI->getParent(), DomB)); |
1508 | | #endif |
1509 | | |
1510 | 50 | MachineBasicBlock::iterator It; |
1511 | 50 | if (Blocks.count(DomB)) { |
1512 | 46 | // Try to find the latest possible location for the def. |
1513 | 46 | MachineBasicBlock::iterator End = DomB->end(); |
1514 | 712 | for (It = DomB->begin(); It != End; ++It666 ) |
1515 | 712 | if (RefMIs.count(&*It)) |
1516 | 46 | break; |
1517 | 46 | assert(It != End && "Should have found a ref in DomB"); |
1518 | 46 | } else { |
1519 | 4 | // DomB does not contain any refs. |
1520 | 4 | It = DomB->getFirstTerminator(); |
1521 | 4 | } |
1522 | 50 | Loc DefLoc(DomB, It); |
1523 | 50 | Defs.emplace_back(DefLoc, Refs); |
1524 | 50 | } |
1525 | | |
1526 | 50 | HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) { |
1527 | 50 | unsigned DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); |
1528 | 50 | MachineBasicBlock &MBB = *DefL.Block; |
1529 | 50 | MachineBasicBlock::iterator At = DefL.At; |
1530 | 50 | DebugLoc dl = DefL.Block->findDebugLoc(DefL.At); |
1531 | 50 | const ExtValue &EV = ExtI.first; |
1532 | 50 | MachineOperand ExtOp(EV); |
1533 | 50 | |
1534 | 50 | const ExtExpr &Ex = ExtI.second; |
1535 | 50 | const MachineInstr *InitI = nullptr; |
1536 | 50 | |
1537 | 50 | if (Ex.Rs.isSlot()) { |
1538 | 1 | assert(Ex.S == 0 && "Cannot have a shift of a stack slot"); |
1539 | 1 | assert(!Ex.Neg && "Cannot subtract a stack slot"); |
1540 | 1 | // DefR = PS_fi Rb,##EV |
1541 | 1 | InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::PS_fi), DefR) |
1542 | 1 | .add(MachineOperand(Ex.Rs)) |
1543 | 1 | .add(ExtOp); |
1544 | 49 | } else { |
1545 | 49 | assert((Ex.Rs.Reg == 0 || Ex.Rs.isVReg()) && "Expecting virtual register"); |
1546 | 49 | if (Ex.trivial()) { |
1547 | 46 | // DefR = ##EV |
1548 | 46 | InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_tfrsi), DefR) |
1549 | 46 | .add(ExtOp); |
1550 | 46 | } else if (3 Ex.S == 03 ) { |
1551 | 3 | if (Ex.Neg) { |
1552 | 0 | // DefR = sub(##EV,Rb) |
1553 | 0 | InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_subri), DefR) |
1554 | 0 | .add(ExtOp) |
1555 | 0 | .add(MachineOperand(Ex.Rs)); |
1556 | 3 | } else { |
1557 | 3 | // DefR = add(Rb,##EV) |
1558 | 3 | InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_addi), DefR) |
1559 | 3 | .add(MachineOperand(Ex.Rs)) |
1560 | 3 | .add(ExtOp); |
1561 | 3 | } |
1562 | 3 | } else { |
1563 | 0 | unsigned NewOpc = Ex.Neg ? Hexagon::S4_subi_asl_ri |
1564 | 0 | : Hexagon::S4_addi_asl_ri; |
1565 | 0 | // DefR = add(##EV,asl(Rb,S)) |
1566 | 0 | InitI = BuildMI(MBB, At, dl, HII->get(NewOpc), DefR) |
1567 | 0 | .add(ExtOp) |
1568 | 0 | .add(MachineOperand(Ex.Rs)) |
1569 | 0 | .addImm(Ex.S); |
1570 | 0 | } |
1571 | 49 | } |
1572 | 50 | |
1573 | 50 | assert(InitI); |
1574 | 50 | (void)InitI; |
1575 | 50 | LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber() |
1576 | 50 | << " for initializer: " << PrintInit(ExtI, *HRI) << "\n " |
1577 | 50 | << *InitI); |
1578 | 50 | return { DefR, 0 }; |
1579 | 50 | } |
1580 | | |
1581 | | // Replace the extender at index Idx with the register ExtR. |
1582 | 37 | bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) { |
1583 | 37 | MachineInstr &MI = *ED.UseMI; |
1584 | 37 | MachineBasicBlock &MBB = *MI.getParent(); |
1585 | 37 | MachineBasicBlock::iterator At = MI.getIterator(); |
1586 | 37 | DebugLoc dl = MI.getDebugLoc(); |
1587 | 37 | unsigned ExtOpc = MI.getOpcode(); |
1588 | 37 | |
1589 | 37 | // With a few exceptions, direct replacement amounts to creating an |
1590 | 37 | // instruction with a corresponding register opcode, with all operands |
1591 | 37 | // the same, except for the register used in place of the extender. |
1592 | 37 | unsigned RegOpc = getDirectRegReplacement(ExtOpc); |
1593 | 37 | |
1594 | 37 | if (RegOpc == TargetOpcode::REG_SEQUENCE) { |
1595 | 0 | if (ExtOpc == Hexagon::A4_combineri) |
1596 | 0 | BuildMI(MBB, At, dl, HII->get(RegOpc)) |
1597 | 0 | .add(MI.getOperand(0)) |
1598 | 0 | .add(MI.getOperand(1)) |
1599 | 0 | .addImm(Hexagon::isub_hi) |
1600 | 0 | .add(MachineOperand(ExtR)) |
1601 | 0 | .addImm(Hexagon::isub_lo); |
1602 | 0 | else if (ExtOpc == Hexagon::A4_combineir) |
1603 | 0 | BuildMI(MBB, At, dl, HII->get(RegOpc)) |
1604 | 0 | .add(MI.getOperand(0)) |
1605 | 0 | .add(MachineOperand(ExtR)) |
1606 | 0 | .addImm(Hexagon::isub_hi) |
1607 | 0 | .add(MI.getOperand(2)) |
1608 | 0 | .addImm(Hexagon::isub_lo); |
1609 | 0 | else |
1610 | 0 | llvm_unreachable("Unexpected opcode became REG_SEQUENCE"); |
1611 | 0 | MBB.erase(MI); |
1612 | 0 | return true; |
1613 | 37 | } |
1614 | 37 | if (ExtOpc == Hexagon::C2_cmpgei || ExtOpc == Hexagon::C2_cmpgeui) { |
1615 | 0 | unsigned NewOpc = ExtOpc == Hexagon::C2_cmpgei ? Hexagon::C2_cmplt |
1616 | 0 | : Hexagon::C2_cmpltu; |
1617 | 0 | BuildMI(MBB, At, dl, HII->get(NewOpc)) |
1618 | 0 | .add(MI.getOperand(0)) |
1619 | 0 | .add(MachineOperand(ExtR)) |
1620 | 0 | .add(MI.getOperand(1)); |
1621 | 0 | MBB.erase(MI); |
1622 | 0 | return true; |
1623 | 0 | } |
1624 | 37 | |
1625 | 37 | if (RegOpc != 0) { |
1626 | 36 | MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc)); |
1627 | 36 | unsigned RegN = ED.OpNum; |
1628 | 36 | // Copy all operands except the one that has the extender. |
1629 | 174 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i138 ) { |
1630 | 138 | if (i != RegN) |
1631 | 102 | MIB.add(MI.getOperand(i)); |
1632 | 36 | else |
1633 | 36 | MIB.add(MachineOperand(ExtR)); |
1634 | 138 | } |
1635 | 36 | MIB.cloneMemRefs(MI); |
1636 | 36 | MBB.erase(MI); |
1637 | 36 | return true; |
1638 | 36 | } |
1639 | 1 | |
1640 | 1 | if ((MI.mayLoad() || MI.mayStore()0 ) && !isStoreImmediate(ExtOpc)) { |
1641 | 1 | // For memory instructions, there is an asymmetry in the addressing |
1642 | 1 | // modes. Addressing modes allowing extenders can be replaced with |
1643 | 1 | // addressing modes that use registers, but the order of operands |
1644 | 1 | // (or even their number) may be different. |
1645 | 1 | // Replacements: |
1646 | 1 | // BaseImmOffset (io) -> BaseRegOffset (rr) |
1647 | 1 | // BaseLongOffset (ur) -> BaseRegOffset (rr) |
1648 | 1 | unsigned RegOpc, Shift; |
1649 | 1 | unsigned AM = HII->getAddrMode(MI); |
1650 | 1 | if (AM == HexagonII::BaseImmOffset) { |
1651 | 0 | RegOpc = HII->changeAddrMode_io_rr(ExtOpc); |
1652 | 0 | Shift = 0; |
1653 | 1 | } else if (AM == HexagonII::BaseLongOffset) { |
1654 | 1 | // Loads: Rd = L4_loadri_ur Rs, S, ## |
1655 | 1 | // Stores: S4_storeri_ur Rs, S, ##, Rt |
1656 | 1 | RegOpc = HII->changeAddrMode_ur_rr(ExtOpc); |
1657 | 1 | Shift = MI.getOperand(MI.mayLoad() ? 2 : 10 ).getImm(); |
1658 | 1 | } else { |
1659 | 0 | llvm_unreachable("Unexpected addressing mode"); |
1660 | 0 | } |
1661 | | #ifndef NDEBUG |
1662 | | if (RegOpc == -1u) { |
1663 | | dbgs() << "\nExtOpc: " << HII->getName(ExtOpc) << " has no rr version\n"; |
1664 | | llvm_unreachable("No corresponding rr instruction"); |
1665 | | } |
1666 | | #endif |
1667 | | |
1668 | 1 | unsigned BaseP, OffP; |
1669 | 1 | HII->getBaseAndOffsetPosition(MI, BaseP, OffP); |
1670 | 1 | |
1671 | 1 | // Build an rr instruction: (RegOff + RegBase<<0) |
1672 | 1 | MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc)); |
1673 | 1 | // First, add the def for loads. |
1674 | 1 | if (MI.mayLoad()) |
1675 | 1 | MIB.add(getLoadResultOp(MI)); |
1676 | 1 | // Handle possible predication. |
1677 | 1 | if (HII->isPredicated(MI)) |
1678 | 0 | MIB.add(getPredicateOp(MI)); |
1679 | 1 | // Build the address. |
1680 | 1 | MIB.add(MachineOperand(ExtR)); // RegOff |
1681 | 1 | MIB.add(MI.getOperand(BaseP)); // RegBase |
1682 | 1 | MIB.addImm(Shift); // << Shift |
1683 | 1 | // Add the stored value for stores. |
1684 | 1 | if (MI.mayStore()) |
1685 | 0 | MIB.add(getStoredValueOp(MI)); |
1686 | 1 | MIB.cloneMemRefs(MI); |
1687 | 1 | MBB.erase(MI); |
1688 | 1 | return true; |
1689 | 1 | } |
1690 | 0 | |
1691 | | #ifndef NDEBUG |
1692 | | dbgs() << '\n' << MI; |
1693 | | #endif |
1694 | 0 | llvm_unreachable("Unhandled exact replacement"); |
1695 | 0 | return false; |
1696 | 0 | } |
1697 | | |
1698 | | // Replace the extender ED with a form corresponding to the initializer ExtI. |
1699 | | bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI, |
1700 | 483 | Register ExtR, int32_t &Diff) { |
1701 | 483 | MachineInstr &MI = *ED.UseMI; |
1702 | 483 | MachineBasicBlock &MBB = *MI.getParent(); |
1703 | 483 | MachineBasicBlock::iterator At = MI.getIterator(); |
1704 | 483 | DebugLoc dl = MI.getDebugLoc(); |
1705 | 483 | unsigned ExtOpc = MI.getOpcode(); |
1706 | 483 | |
1707 | 483 | if (ExtOpc == Hexagon::A2_tfrsi) { |
1708 | 107 | // A2_tfrsi is a special case: it's replaced with A2_addi, which introduces |
1709 | 107 | // another range. One range is the one that's common to all tfrsi's uses, |
1710 | 107 | // this one is the range of immediates in A2_addi. When calculating ranges, |
1711 | 107 | // the addi's 16-bit argument was included, so now we need to make it such |
1712 | 107 | // that the produced value is in the range for the uses alone. |
1713 | 107 | // Most of the time, simply adding Diff will make the addi produce exact |
1714 | 107 | // result, but if Diff is outside of the 16-bit range, some adjustment |
1715 | 107 | // will be needed. |
1716 | 107 | unsigned IdxOpc = getRegOffOpcode(ExtOpc); |
1717 | 107 | assert(IdxOpc == Hexagon::A2_addi); |
1718 | 107 | |
1719 | 107 | // Clamp Diff to the 16 bit range. |
1720 | 107 | int32_t D = isInt<16>(Diff) ? Diff106 : (Diff > 0 1 ? 327670 : -327681 ); |
1721 | 107 | if (Diff > 32767) { |
1722 | 0 | // Split Diff into two values: one that is close to min/max int16, |
1723 | 0 | // and the other being the rest, and such that both have the same |
1724 | 0 | // "alignment" as Diff. |
1725 | 0 | uint32_t UD = Diff; |
1726 | 0 | OffsetRange R = getOffsetRange(MI.getOperand(0)); |
1727 | 0 | uint32_t A = std::min<uint32_t>(R.Align, 1u << countTrailingZeros(UD)); |
1728 | 0 | D &= ~(A-1); |
1729 | 0 | } |
1730 | 107 | BuildMI(MBB, At, dl, HII->get(IdxOpc)) |
1731 | 107 | .add(MI.getOperand(0)) |
1732 | 107 | .add(MachineOperand(ExtR)) |
1733 | 107 | .addImm(D); |
1734 | 107 | Diff -= D; |
1735 | | #ifndef NDEBUG |
1736 | | // Make sure the output is within allowable range for uses. |
1737 | | // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV, |
1738 | | // not DefV - Ext, as the getOffsetRange would calculate. |
1739 | | OffsetRange Uses = getOffsetRange(MI.getOperand(0)); |
1740 | | if (!Uses.contains(-Diff)) |
1741 | | dbgs() << "Diff: " << -Diff << " out of range " << Uses |
1742 | | << " for " << MI; |
1743 | | assert(Uses.contains(-Diff)); |
1744 | | #endif |
1745 | | MBB.erase(MI); |
1746 | 107 | return true; |
1747 | 107 | } |
1748 | 376 | |
1749 | 376 | const ExtValue &EV = ExtI.first; (void)EV; |
1750 | 376 | const ExtExpr &Ex = ExtI.second; (void)Ex; |
1751 | 376 | |
1752 | 376 | if (ExtOpc == Hexagon::A2_addi || ExtOpc == Hexagon::A2_subri) { |
1753 | 0 | // If addi/subri are replaced with the exactly matching initializer, |
1754 | 0 | // they amount to COPY. |
1755 | 0 | // Check that the initializer is an exact match (for simplicity). |
1756 | | #ifndef NDEBUG |
1757 | | bool IsAddi = ExtOpc == Hexagon::A2_addi; |
1758 | | const MachineOperand &RegOp = MI.getOperand(IsAddi ? 1 : 2); |
1759 | | const MachineOperand &ImmOp = MI.getOperand(IsAddi ? 2 : 1); |
1760 | | assert(Ex.Rs == RegOp && EV == ImmOp && Ex.Neg != IsAddi && |
1761 | | "Initializer mismatch"); |
1762 | | #endif |
1763 | | BuildMI(MBB, At, dl, HII->get(TargetOpcode::COPY)) |
1764 | 0 | .add(MI.getOperand(0)) |
1765 | 0 | .add(MachineOperand(ExtR)); |
1766 | 0 | Diff = 0; |
1767 | 0 | MBB.erase(MI); |
1768 | 0 | return true; |
1769 | 0 | } |
1770 | 376 | if (ExtOpc == Hexagon::M2_accii || ExtOpc == Hexagon::M2_naccii || |
1771 | 376 | ExtOpc == Hexagon::S4_addaddi || ExtOpc == Hexagon::S4_subaddi) { |
1772 | 0 | // M2_accii: add(Rt,add(Rs,V)) (tied) |
1773 | 0 | // M2_naccii: sub(Rt,add(Rs,V)) |
1774 | 0 | // S4_addaddi: add(Rt,add(Rs,V)) |
1775 | 0 | // S4_subaddi: add(Rt,sub(V,Rs)) |
1776 | 0 | // Check that Rs and V match the initializer expression. The Rs+V is the |
1777 | 0 | // combination that is considered "subexpression" for V, although Rx+V |
1778 | 0 | // would also be valid. |
1779 | | #ifndef NDEBUG |
1780 | | bool IsSub = ExtOpc == Hexagon::S4_subaddi; |
1781 | | Register Rs = MI.getOperand(IsSub ? 3 : 2); |
1782 | | ExtValue V = MI.getOperand(IsSub ? 2 : 3); |
1783 | | assert(EV == V && Rs == Ex.Rs && IsSub == Ex.Neg && "Initializer mismatch"); |
1784 | | #endif |
1785 | 0 | unsigned NewOpc = ExtOpc == Hexagon::M2_naccii ? Hexagon::A2_sub |
1786 | 0 | : Hexagon::A2_add; |
1787 | 0 | BuildMI(MBB, At, dl, HII->get(NewOpc)) |
1788 | 0 | .add(MI.getOperand(0)) |
1789 | 0 | .add(MI.getOperand(1)) |
1790 | 0 | .add(MachineOperand(ExtR)); |
1791 | 0 | MBB.erase(MI); |
1792 | 0 | return true; |
1793 | 0 | } |
1794 | 376 | |
1795 | 376 | if (MI.mayLoad() || MI.mayStore()177 ) { |
1796 | 376 | unsigned IdxOpc = getRegOffOpcode(ExtOpc); |
1797 | 376 | assert(IdxOpc && "Expecting indexed opcode"); |
1798 | 376 | MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(IdxOpc)); |
1799 | 376 | // Construct the new indexed instruction. |
1800 | 376 | // First, add the def for loads. |
1801 | 376 | if (MI.mayLoad()) |
1802 | 199 | MIB.add(getLoadResultOp(MI)); |
1803 | 376 | // Handle possible predication. |
1804 | 376 | if (HII->isPredicated(MI)) |
1805 | 2 | MIB.add(getPredicateOp(MI)); |
1806 | 376 | // Build the address. |
1807 | 376 | MIB.add(MachineOperand(ExtR)); |
1808 | 376 | MIB.addImm(Diff); |
1809 | 376 | // Add the stored value for stores. |
1810 | 376 | if (MI.mayStore()) |
1811 | 177 | MIB.add(getStoredValueOp(MI)); |
1812 | 376 | MIB.cloneMemRefs(MI); |
1813 | 376 | MBB.erase(MI); |
1814 | 376 | return true; |
1815 | 376 | } |
1816 | 0 | |
1817 | | #ifndef NDEBUG |
1818 | | dbgs() << '\n' << PrintInit(ExtI, *HRI) << " " << MI; |
1819 | | #endif |
1820 | 0 | llvm_unreachable("Unhandled expr replacement"); |
1821 | 0 | return false; |
1822 | 0 | } |
1823 | | |
1824 | 520 | bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) { |
1825 | 520 | if (ReplaceLimit.getNumOccurrences()) { |
1826 | 0 | if (ReplaceLimit <= ReplaceCounter) |
1827 | 0 | return false; |
1828 | 0 | ++ReplaceCounter; |
1829 | 0 | } |
1830 | 520 | const ExtDesc &ED = Extenders[Idx]; |
1831 | 520 | assert((!ED.IsDef || ED.Rd.Reg != 0) && "Missing Rd for def"); |
1832 | 520 | const ExtValue &DefV = ExtI.first; |
1833 | 520 | assert(ExtRoot(ExtValue(ED)) == ExtRoot(DefV) && "Extender root mismatch"); |
1834 | 520 | const ExtExpr &DefEx = ExtI.second; |
1835 | 520 | |
1836 | 520 | ExtValue EV(ED); |
1837 | 520 | int32_t Diff = EV.Offset - DefV.Offset; |
1838 | 520 | const MachineInstr &MI = *ED.UseMI; |
1839 | 520 | LLVM_DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:" |
1840 | 520 | << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n'); |
1841 | 520 | |
1842 | 520 | // These two addressing modes must be converted into indexed forms |
1843 | 520 | // regardless of what the initializer looks like. |
1844 | 520 | bool IsAbs = false, IsAbsSet = false; |
1845 | 520 | if (MI.mayLoad() || MI.mayStore()320 ) { |
1846 | 377 | unsigned AM = HII->getAddrMode(MI); |
1847 | 377 | IsAbs = AM == HexagonII::Absolute; |
1848 | 377 | IsAbsSet = AM == HexagonII::AbsoluteSet; |
1849 | 377 | } |
1850 | 520 | |
1851 | 520 | // If it's a def, remember all operands that need to be updated. |
1852 | 520 | // If ED is a def, and Diff is not 0, then all uses of the register Rd |
1853 | 520 | // defined by ED must be in the form (Rd, imm), i.e. the immediate offset |
1854 | 520 | // must follow the Rd in the operand list. |
1855 | 520 | std::vector<std::pair<MachineInstr*,unsigned>> RegOps; |
1856 | 520 | if (ED.IsDef && Diff != 0110 ) { |
1857 | 759 | for (MachineOperand &Op : MRI->use_operands(ED.Rd.Reg)) { |
1858 | 759 | MachineInstr &UI = *Op.getParent(); |
1859 | 759 | RegOps.push_back({&UI, getOperandIndex(UI, Op)}); |
1860 | 759 | } |
1861 | 107 | } |
1862 | 520 | |
1863 | 520 | // Replace the instruction. |
1864 | 520 | bool Replaced = false; |
1865 | 520 | if (Diff == 0 && DefEx.trivial()68 && !IsAbs63 && !IsAbsSet37 ) |
1866 | 37 | Replaced = replaceInstrExact(ED, ExtR); |
1867 | 483 | else |
1868 | 483 | Replaced = replaceInstrExpr(ED, ExtI, ExtR, Diff); |
1869 | 520 | |
1870 | 520 | if (Diff != 0 && Replaced346 && ED.IsDef346 ) { |
1871 | 1 | // Update offsets of the def's uses. |
1872 | 1 | for (std::pair<MachineInstr*,unsigned> P : RegOps) { |
1873 | 1 | unsigned J = P.second; |
1874 | 1 | assert(P.first->getNumOperands() > J+1 && |
1875 | 1 | P.first->getOperand(J+1).isImm()); |
1876 | 1 | MachineOperand &ImmOp = P.first->getOperand(J+1); |
1877 | 1 | ImmOp.setImm(ImmOp.getImm() + Diff); |
1878 | 1 | } |
1879 | 1 | // If it was an absolute-set instruction, the "set" part has been removed. |
1880 | 1 | // ExtR will now be the register with the extended value, and since all |
1881 | 1 | // users of Rd have been updated, all that needs to be done is to replace |
1882 | 1 | // Rd with ExtR. |
1883 | 1 | if (IsAbsSet) { |
1884 | 0 | assert(ED.Rd.Sub == 0 && ExtR.Sub == 0); |
1885 | 0 | MRI->replaceRegWith(ED.Rd.Reg, ExtR.Reg); |
1886 | 0 | } |
1887 | 1 | } |
1888 | 520 | |
1889 | 520 | return Replaced; |
1890 | 520 | } |
1891 | | |
1892 | 1.27k | bool HCE::replaceExtenders(const AssignmentMap &IMap) { |
1893 | 1.27k | LocDefList Defs; |
1894 | 1.27k | bool Changed = false; |
1895 | 1.27k | |
1896 | 1.38k | for (const std::pair<ExtenderInit,IndexList> &P : IMap) { |
1897 | 1.38k | const IndexList &Idxs = P.second; |
1898 | 1.38k | if (Idxs.size() < CountThreshold) |
1899 | 1.33k | continue; |
1900 | 50 | |
1901 | 50 | Defs.clear(); |
1902 | 50 | calculatePlacement(P.first, Idxs, Defs); |
1903 | 50 | for (const std::pair<Loc,IndexList> &Q : Defs) { |
1904 | 50 | Register DefR = insertInitializer(Q.first, P.first); |
1905 | 50 | NewRegs.push_back(DefR.Reg); |
1906 | 50 | for (unsigned I : Q.second) |
1907 | 520 | Changed |= replaceInstr(I, DefR, P.first); |
1908 | 50 | } |
1909 | 50 | } |
1910 | 1.27k | return Changed; |
1911 | 1.27k | } |
1912 | | |
1913 | | unsigned HCE::getOperandIndex(const MachineInstr &MI, |
1914 | 759 | const MachineOperand &Op) const { |
1915 | 1.50k | for (unsigned i = 0, n = MI.getNumOperands(); i != n; ++i744 ) |
1916 | 1.50k | if (&MI.getOperand(i) == &Op) |
1917 | 759 | return i; |
1918 | 759 | llvm_unreachable0 ("Not an operand of MI"); |
1919 | 759 | } |
1920 | | |
1921 | 2 | const MachineOperand &HCE::getPredicateOp(const MachineInstr &MI) const { |
1922 | 2 | assert(HII->isPredicated(MI)); |
1923 | 2 | for (const MachineOperand &Op : MI.operands()) { |
1924 | 2 | if (!Op.isReg() || !Op.isUse() || |
1925 | 2 | MRI->getRegClass(Op.getReg()) != &Hexagon::PredRegsRegClass) |
1926 | 0 | continue; |
1927 | 2 | assert(Op.getSubReg() == 0 && "Predicate register with a subregister"); |
1928 | 2 | return Op; |
1929 | 2 | } |
1930 | 2 | llvm_unreachable0 ("Predicate operand not found"); |
1931 | 2 | } |
1932 | | |
1933 | 200 | const MachineOperand &HCE::getLoadResultOp(const MachineInstr &MI) const { |
1934 | 200 | assert(MI.mayLoad()); |
1935 | 200 | return MI.getOperand(0); |
1936 | 200 | } |
1937 | | |
1938 | 177 | const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const { |
1939 | 177 | assert(MI.mayStore()); |
1940 | 177 | return MI.getOperand(MI.getNumExplicitOperands()-1); |
1941 | 177 | } |
1942 | | |
1943 | 3.36k | bool HCE::runOnMachineFunction(MachineFunction &MF) { |
1944 | 3.36k | if (skipFunction(MF.getFunction())) |
1945 | 10 | return false; |
1946 | 3.35k | if (MF.getFunction().hasPersonalityFn()) { |
1947 | 8 | LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF.getName() |
1948 | 8 | << " due to exception handling\n"); |
1949 | 8 | return false; |
1950 | 8 | } |
1951 | 3.35k | LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr)); |
1952 | 3.35k | |
1953 | 3.35k | HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); |
1954 | 3.35k | HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1955 | 3.35k | MDT = &getAnalysis<MachineDominatorTree>(); |
1956 | 3.35k | MRI = &MF.getRegInfo(); |
1957 | 3.35k | AssignmentMap IMap; |
1958 | 3.35k | |
1959 | 3.35k | collect(MF); |
1960 | 3.96k | llvm::sort(Extenders, [this](const ExtDesc &A, const ExtDesc &B) { |
1961 | 3.96k | ExtValue VA(A), VB(B); |
1962 | 3.96k | if (VA != VB) |
1963 | 3.61k | return VA < VB; |
1964 | 355 | const MachineInstr *MA = A.UseMI; |
1965 | 355 | const MachineInstr *MB = B.UseMI; |
1966 | 355 | if (MA == MB) { |
1967 | 17 | // If it's the same instruction, compare operand numbers. |
1968 | 17 | return A.OpNum < B.OpNum; |
1969 | 17 | } |
1970 | 338 | |
1971 | 338 | const MachineBasicBlock *BA = MA->getParent(); |
1972 | 338 | const MachineBasicBlock *BB = MB->getParent(); |
1973 | 338 | assert(BA->getNumber() != -1 && BB->getNumber() != -1); |
1974 | 338 | if (BA != BB) |
1975 | 79 | return BA->getNumber() < BB->getNumber(); |
1976 | 259 | return MDT->dominates(MA, MB); |
1977 | 259 | }); |
1978 | 3.35k | |
1979 | 3.35k | bool Changed = false; |
1980 | 3.35k | LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n"); |
1981 | 4.62k | for (unsigned I = 0, E = Extenders.size(); I != E; ) { |
1982 | 1.27k | unsigned B = I; |
1983 | 1.27k | const ExtRoot &T = Extenders[B].getOp(); |
1984 | 3.25k | while (I != E && ExtRoot(Extenders[I].getOp()) == T2.32k ) |
1985 | 1.98k | ++I; |
1986 | 1.27k | |
1987 | 1.27k | IMap.clear(); |
1988 | 1.27k | assignInits(T, B, I, IMap); |
1989 | 1.27k | Changed |= replaceExtenders(IMap); |
1990 | 1.27k | } |
1991 | 3.35k | |
1992 | 3.35k | LLVM_DEBUG({ |
1993 | 3.35k | if (Changed) |
1994 | 3.35k | MF.print(dbgs() << "After " << getPassName() << '\n', nullptr); |
1995 | 3.35k | else |
1996 | 3.35k | dbgs() << "No changes\n"; |
1997 | 3.35k | }); |
1998 | 3.35k | return Changed; |
1999 | 3.35k | } |
2000 | | |
2001 | 861 | FunctionPass *llvm::createHexagonConstExtenders() { |
2002 | 861 | return new HexagonConstExtenders(); |
2003 | 861 | } |