/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonShuffler.cpp - Instruction bundle shuffling -----------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This implements the shuffling of insns inside a bundle according to the |
11 | | // packet formation rules of the Hexagon ISA. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #define DEBUG_TYPE "hexagon-shuffle" |
16 | | |
17 | | #include "MCTargetDesc/HexagonShuffler.h" |
18 | | #include "Hexagon.h" |
19 | | #include "MCTargetDesc/HexagonBaseInfo.h" |
20 | | #include "MCTargetDesc/HexagonMCInstrInfo.h" |
21 | | #include "MCTargetDesc/HexagonMCTargetDesc.h" |
22 | | #include "llvm/ADT/SmallVector.h" |
23 | | #include "llvm/ADT/Twine.h" |
24 | | #include "llvm/MC/MCContext.h" |
25 | | #include "llvm/MC/MCInst.h" |
26 | | #include "llvm/MC/MCSubtargetInfo.h" |
27 | | #include "llvm/Support/Compiler.h" |
28 | | #include "llvm/Support/Debug.h" |
29 | | #include "llvm/Support/MathExtras.h" |
30 | | #include "llvm/Support/raw_ostream.h" |
31 | | #include <algorithm> |
32 | | #include <cassert> |
33 | | #include <utility> |
34 | | #include <vector> |
35 | | |
36 | | using namespace llvm; |
37 | | |
38 | | namespace { |
39 | | |
40 | | // Insn shuffling priority. |
41 | | class HexagonBid { |
42 | | // The priority is directly proportional to how restricted the insn is based |
43 | | // on its flexibility to run on the available slots. So, the fewer slots it |
44 | | // may run on, the higher its priority. |
45 | | enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15. |
46 | | unsigned Bid = 0; |
47 | | |
48 | | public: |
49 | 91.8k | HexagonBid() = default; |
50 | 82.0k | HexagonBid(unsigned B) { Bid = B ? 82.0k MAX / countPopulation(B)82.0k : 00 ; } |
51 | | |
52 | | // Check if the insn priority is overflowed. |
53 | 82.0k | bool isSold() const { return (Bid >= MAX); } |
54 | | |
55 | 82.0k | HexagonBid &operator+=(const HexagonBid &B) { |
56 | 82.0k | Bid += B.Bid; |
57 | 82.0k | return *this; |
58 | 82.0k | } |
59 | | }; |
60 | | |
61 | | // Slot shuffling allocation. |
62 | | class HexagonUnitAuction { |
63 | | HexagonBid Scores[HEXAGON_PACKET_SIZE]; |
64 | | // Mask indicating which slot is unavailable. |
65 | | unsigned isSold : HEXAGON_PACKET_SIZE; |
66 | | |
67 | | public: |
68 | 22.9k | HexagonUnitAuction(unsigned cs = 0) : isSold(cs) {} |
69 | | |
70 | | // Allocate slots. |
71 | 46.5k | bool bid(unsigned B) { |
72 | 46.5k | // Exclude already auctioned slots from the bid. |
73 | 46.5k | unsigned b = B & ~isSold; |
74 | 46.5k | if (b46.5k ) { |
75 | 232k | for (unsigned i = 0; i < 232k HEXAGON_PACKET_SIZE232k ; ++i186k ) |
76 | 186k | if (186k b & (1 << i)186k ) { |
77 | 82.0k | // Request candidate slots. |
78 | 82.0k | Scores[i] += HexagonBid(b); |
79 | 82.0k | isSold |= Scores[i].isSold() << i; |
80 | 82.0k | } |
81 | 46.5k | return true; |
82 | 46.5k | } else |
83 | 46.5k | // Error if the desired slots are already full. |
84 | 16 | return false; |
85 | 0 | } |
86 | | }; |
87 | | |
88 | | } // end anonymous namespace |
89 | | |
90 | 432k | unsigned HexagonResource::setWeight(unsigned s) { |
91 | 432k | const unsigned SlotWeight = 8; |
92 | 432k | const unsigned MaskWeight = SlotWeight - 1; |
93 | 432k | unsigned Units = getUnits(); |
94 | 432k | unsigned Key = ((1u << s) & Units) != 0; |
95 | 432k | |
96 | 432k | // Calculate relative weight of the insn for the given slot, weighing it the |
97 | 432k | // heavier the more restrictive the insn is and the lowest the slots that the |
98 | 432k | // insn may be executed in. |
99 | 432k | if (Key == 0 || 432k Units == 048.9k || (SlotWeight * s >= 32)48.9k ) |
100 | 383k | return Weight = 0; |
101 | 48.9k | |
102 | 48.9k | unsigned Ctpop = countPopulation(Units); |
103 | 48.9k | unsigned Cttz = countTrailingZeros(Units); |
104 | 48.9k | Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz); |
105 | 48.9k | return Weight; |
106 | 48.9k | } |
107 | | |
108 | 83.8k | void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { |
109 | 83.8k | (*TUL)[HexagonII::TypeCVI_VA] = |
110 | 83.8k | UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); |
111 | 83.8k | (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); |
112 | 83.8k | (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); |
113 | 83.8k | (*TUL)[HexagonII::TypeCVI_VX_LATE] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); |
114 | 83.8k | (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); |
115 | 83.8k | (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); |
116 | 83.8k | (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); |
117 | 83.8k | (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); |
118 | 83.8k | (*TUL)[HexagonII::TypeCVI_VINLANESAT] = |
119 | 83.8k | (CPU == "hexagonv60") |
120 | 24.5k | ? UnitsAndLanes(CVI_SHIFT, 1) |
121 | 59.3k | : UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); |
122 | 83.8k | (*TUL)[HexagonII::TypeCVI_VM_LD] = |
123 | 83.8k | UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); |
124 | 83.8k | (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); |
125 | 83.8k | (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); |
126 | 83.8k | (*TUL)[HexagonII::TypeCVI_VM_ST] = |
127 | 83.8k | UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); |
128 | 83.8k | (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); |
129 | 83.8k | (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); |
130 | 83.8k | (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); |
131 | 83.8k | } |
132 | | |
133 | | HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL, |
134 | | MCInstrInfo const &MCII, unsigned s, |
135 | | MCInst const *id) |
136 | 107k | : HexagonResource(s), TUL(TUL) { |
137 | 107k | unsigned T = HexagonMCInstrInfo::getType(MCII, *id); |
138 | 107k | |
139 | 107k | if (TUL->count(T)107k ) { |
140 | 11.0k | // For an HVX insn. |
141 | 11.0k | Valid = true; |
142 | 11.0k | setUnits((*TUL)[T].first); |
143 | 11.0k | setLanes((*TUL)[T].second); |
144 | 11.0k | setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); |
145 | 11.0k | setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); |
146 | 107k | } else { |
147 | 96.6k | // For core insns. |
148 | 96.6k | Valid = false; |
149 | 96.6k | setUnits(0); |
150 | 96.6k | setLanes(0); |
151 | 96.6k | setLoad(false); |
152 | 96.6k | setStore(false); |
153 | 96.6k | } |
154 | 107k | } |
155 | | |
156 | | struct CVIUnits { |
157 | | unsigned Units; |
158 | | unsigned Lanes; |
159 | | }; |
160 | | using HVXInstsT = SmallVector<struct CVIUnits, 8>; |
161 | | |
162 | | static unsigned makeAllBits(unsigned startBit, unsigned Lanes) |
163 | 6.20k | { |
164 | 7.08k | for (unsigned i = 1; i < Lanes7.08k ; ++i883 ) |
165 | 883 | startBit = (startBit << 1) | startBit; |
166 | 6.20k | return startBit; |
167 | 6.20k | } |
168 | | |
169 | | static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx, |
170 | 8.56k | unsigned usedUnits) { |
171 | 8.56k | if (startIdx < hvxInsts.size()8.56k ) { |
172 | 5.09k | if (!hvxInsts[startIdx].Units) |
173 | 0 | return checkHVXPipes(hvxInsts, startIdx + 1, usedUnits); |
174 | 9.06k | for (unsigned b = 0x1; 5.09k b <= 0x89.06k ; b <<= 13.96k ) { |
175 | 9.05k | if ((hvxInsts[startIdx].Units & b) == 0) |
176 | 2.85k | continue; |
177 | 6.20k | unsigned allBits = makeAllBits(b, hvxInsts[startIdx].Lanes); |
178 | 6.20k | if ((allBits & usedUnits) == 06.20k ) { |
179 | 5.09k | if (checkHVXPipes(hvxInsts, startIdx + 1, usedUnits | allBits)) |
180 | 5.08k | return true; |
181 | 5.09k | } |
182 | 9.05k | } |
183 | 8 | return false; |
184 | 3.47k | } |
185 | 3.47k | return true; |
186 | 3.47k | } |
187 | | |
188 | | HexagonShuffler::HexagonShuffler(MCContext &Context, bool ReportErrors, |
189 | | MCInstrInfo const &MCII, |
190 | | MCSubtargetInfo const &STI) |
191 | 83.8k | : Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) { |
192 | 83.8k | reset(); |
193 | 83.8k | HexagonCVIResource::SetupTUL(&TUL, STI.getCPU()); |
194 | 83.8k | } |
195 | | |
196 | 83.8k | void HexagonShuffler::reset() { |
197 | 83.8k | Packet.clear(); |
198 | 83.8k | BundleFlags = 0; |
199 | 83.8k | } |
200 | | |
201 | | void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender, |
202 | 107k | unsigned S) { |
203 | 107k | HexagonInstr PI(&TUL, MCII, &ID, Extender, S); |
204 | 107k | |
205 | 107k | Packet.push_back(PI); |
206 | 107k | } |
207 | | |
208 | | static struct { |
209 | | unsigned first; |
210 | | unsigned second; |
211 | | } jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; |
212 | 25 | #define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0])) |
213 | | |
214 | | /// Check that the packet is legal and enforce relative insn order. |
215 | 23.0k | bool HexagonShuffler::check() { |
216 | 23.0k | // Descriptive slot masks. |
217 | 23.0k | const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, |
218 | 23.0k | slotThree = 0x8, // slotFirstJump = 0x8, |
219 | 23.0k | slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; |
220 | 23.0k | // Highest slots for branches and stores used to keep their original order. |
221 | 23.0k | // unsigned slotJump = slotFirstJump; |
222 | 23.0k | unsigned slotLoadStore = slotFirstLoadStore; |
223 | 23.0k | // Number of branches, solo branches, indirect branches. |
224 | 23.0k | unsigned jumps = 0, jump1 = 0; |
225 | 23.0k | // Number of memory operations, loads, solo loads, stores, solo stores, single |
226 | 23.0k | // stores. |
227 | 23.0k | unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; |
228 | 23.0k | // Number of duplex insns |
229 | 23.0k | unsigned duplex = 0; |
230 | 23.0k | // Number of insns restricting other insns in slot #1 to A type. |
231 | 23.0k | unsigned onlyAin1 = 0; |
232 | 23.0k | // Number of insns restricting any insn in slot #1, except A2_nop. |
233 | 23.0k | unsigned onlyNo1 = 0; |
234 | 23.0k | unsigned pSlot3Cnt = 0; |
235 | 23.0k | unsigned nvstores = 0; |
236 | 23.0k | unsigned memops = 0; |
237 | 23.0k | unsigned deallocs = 0; |
238 | 23.0k | iterator slot3ISJ = end(); |
239 | 23.0k | std::vector<iterator> foundBranches; |
240 | 23.0k | unsigned reservedSlots = 0; |
241 | 23.0k | |
242 | 23.0k | // Collect information from the insns in the packet. |
243 | 70.0k | for (iterator ISJ = begin(); ISJ != end()70.0k ; ++ISJ46.9k ) { |
244 | 46.9k | MCInst const &ID = ISJ->getDesc(); |
245 | 46.9k | |
246 | 46.9k | if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) |
247 | 24 | ++onlyAin1; |
248 | 46.9k | if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)46.9k ) { |
249 | 4.13k | ++pSlot3Cnt; |
250 | 4.13k | slot3ISJ = ISJ; |
251 | 4.13k | } |
252 | 46.9k | reservedSlots |= HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID); |
253 | 46.9k | if (HexagonMCInstrInfo::isCofMax1(MCII, ID)) |
254 | 3.63k | ++jump1; |
255 | 46.9k | |
256 | 46.9k | switch (HexagonMCInstrInfo::getType(MCII, ID)) { |
257 | 5.34k | case HexagonII::TypeS_2op: |
258 | 5.34k | case HexagonII::TypeS_3op: |
259 | 5.34k | case HexagonII::TypeALU64: |
260 | 5.34k | break; |
261 | 4.34k | case HexagonII::TypeJ: |
262 | 4.34k | ++jumps; |
263 | 4.34k | foundBranches.push_back(ISJ); |
264 | 4.34k | break; |
265 | 32 | case HexagonII::TypeCVI_VM_VP_LDU: |
266 | 32 | ++onlyNo1; |
267 | 32 | LLVM_FALLTHROUGH; |
268 | 6.89k | case HexagonII::TypeCVI_VM_LD: |
269 | 6.89k | case HexagonII::TypeCVI_VM_TMP_LD: |
270 | 6.89k | case HexagonII::TypeLD: |
271 | 6.89k | ++loads; |
272 | 6.89k | ++memory; |
273 | 6.89k | if (ISJ->Core.getUnits() == slotSingleLoad || |
274 | 6.61k | HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU) |
275 | 284 | ++load0; |
276 | 6.89k | if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()6.89k ) { |
277 | 212 | ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. |
278 | 212 | foundBranches.push_back(ISJ); |
279 | 212 | } |
280 | 6.89k | break; |
281 | 37 | case HexagonII::TypeCVI_VM_STU: |
282 | 37 | ++onlyNo1; |
283 | 37 | LLVM_FALLTHROUGH; |
284 | 7.65k | case HexagonII::TypeCVI_VM_ST: |
285 | 7.65k | case HexagonII::TypeCVI_VM_NEW_ST: |
286 | 7.65k | case HexagonII::TypeST: |
287 | 7.65k | ++stores; |
288 | 7.65k | ++memory; |
289 | 7.65k | if (ISJ->Core.getUnits() == slotSingleStore || |
290 | 3.68k | HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_STU) |
291 | 3.96k | ++store0; |
292 | 7.65k | break; |
293 | 988 | case HexagonII::TypeV4LDST: |
294 | 988 | ++loads; |
295 | 988 | ++stores; |
296 | 988 | ++store1; |
297 | 988 | ++memops; |
298 | 988 | ++memory; |
299 | 988 | break; |
300 | 675 | case HexagonII::TypeNCJ: |
301 | 675 | ++memory; // NV insns are memory-like. |
302 | 675 | ++jumps, ++jump1; |
303 | 675 | foundBranches.push_back(ISJ); |
304 | 675 | break; |
305 | 2.72k | case HexagonII::TypeV2LDST: |
306 | 2.72k | if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()2.72k ) { |
307 | 1.26k | ++loads; |
308 | 1.26k | ++memory; |
309 | 1.26k | if (ISJ->Core.getUnits() == slotSingleLoad || |
310 | 1.26k | HexagonMCInstrInfo::getType(MCII, ID) == |
311 | 1.26k | HexagonII::TypeCVI_VM_VP_LDU) |
312 | 0 | ++load0; |
313 | 2.72k | } else { |
314 | 1.45k | assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()); |
315 | 1.45k | ++memory; |
316 | 1.45k | ++stores; |
317 | 1.45k | if (HexagonMCInstrInfo::isNewValue(MCII, ID)) |
318 | 394 | ++nvstores; |
319 | 1.45k | } |
320 | 2.72k | break; |
321 | 1.16k | case HexagonII::TypeCR: |
322 | 1.16k | // Legacy conditional branch predicated on a register. |
323 | 1.16k | case HexagonII::TypeCJ: |
324 | 1.16k | if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()1.16k ) { |
325 | 346 | ++jumps; |
326 | 346 | foundBranches.push_back(ISJ); |
327 | 346 | } |
328 | 1.16k | break; |
329 | 1.22k | case HexagonII::TypeDUPLEX: { |
330 | 1.22k | ++duplex; |
331 | 1.22k | MCInst const &Inst0 = *ID.getOperand(0).getInst(); |
332 | 1.22k | MCInst const &Inst1 = *ID.getOperand(1).getInst(); |
333 | 1.22k | if (HexagonMCInstrInfo::isCofMax1(MCII, Inst0)) |
334 | 136 | ++jump1; |
335 | 1.22k | if (HexagonMCInstrInfo::isCofMax1(MCII, Inst1)) |
336 | 0 | ++jump1; |
337 | 1.22k | if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isBranch()1.22k ) { |
338 | 0 | ++jumps; |
339 | 0 | foundBranches.push_back(ISJ); |
340 | 0 | } |
341 | 1.22k | if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isBranch()1.22k ) { |
342 | 0 | ++jumps; |
343 | 0 | foundBranches.push_back(ISJ); |
344 | 0 | } |
345 | 1.22k | if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()1.22k ) { |
346 | 136 | ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. |
347 | 136 | foundBranches.push_back(ISJ); |
348 | 136 | } |
349 | 1.22k | if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()1.22k ) { |
350 | 0 | ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. |
351 | 0 | foundBranches.push_back(ISJ); |
352 | 0 | } |
353 | 1.22k | break; |
354 | 5.34k | } |
355 | 46.9k | } |
356 | 46.9k | } |
357 | 23.0k | |
358 | 23.0k | // Check if the packet is legal. |
359 | 23.0k | if (23.0k (load0 > 1 || 23.0k store0 > 123.0k ) || |
360 | 23.0k | (duplex > 1 || 23.0k (duplex && 23.0k memory1.22k ))) { |
361 | 142 | reportError(Twine("invalid instruction packet")); |
362 | 142 | return false; |
363 | 142 | } |
364 | 22.9k | |
365 | 22.9k | if (22.9k jump1 && 22.9k jumps > 13.73k ) { |
366 | 9 | // Error if single branch with another branch. |
367 | 9 | reportError(Twine("too many branches in packet")); |
368 | 9 | return false; |
369 | 9 | } |
370 | 22.9k | if (22.9k (nvstores || 22.9k memops22.5k ) && stores > 11.38k ) { |
371 | 0 | reportError(Twine("slot 0 instruction does not allow slot 1 store")); |
372 | 0 | return false; |
373 | 0 | } |
374 | 22.9k | if (22.9k deallocs && 22.9k stores318 ) { |
375 | 0 | reportError(Twine("slot 0 instruction does not allow slot 1 store")); |
376 | 0 | return false; |
377 | 0 | } |
378 | 22.9k | |
379 | 22.9k | // Modify packet accordingly. |
380 | 22.9k | // TODO: need to reserve slots #0 and #1 for duplex insns. |
381 | 22.9k | bool bOnlySlot3 = false; |
382 | 69.4k | for (iterator ISJ = begin(); ISJ != end()69.4k ; ++ISJ46.5k ) { |
383 | 46.5k | MCInst const &ID = ISJ->getDesc(); |
384 | 46.5k | |
385 | 46.5k | if (!ISJ->Core.getUnits()46.5k ) { |
386 | 0 | // Error if insn may not be executed in any slot. |
387 | 0 | return false; |
388 | 0 | } |
389 | 46.5k | |
390 | 46.5k | // Exclude from slot #1 any insn but A2_nop. |
391 | 46.5k | if (46.5k HexagonMCInstrInfo::getDesc(MCII, ID).getOpcode() != Hexagon::A2_nop46.5k ) |
392 | 46.3k | if (46.3k onlyNo146.3k ) |
393 | 105 | ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); |
394 | 46.5k | |
395 | 46.5k | // Exclude from slot #1 any insn but A-type. |
396 | 46.5k | if (HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_2op && |
397 | 39.8k | HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_3op && |
398 | 37.6k | HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_ADDI) |
399 | 35.5k | if (35.5k onlyAin135.5k ) |
400 | 40 | ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); |
401 | 46.5k | |
402 | 46.5k | // A single load must use slot #0. |
403 | 46.5k | if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()46.5k ) { |
404 | 9.10k | if (loads == 1 && 9.10k loads == memory7.35k && memops == 04.06k ) |
405 | 9.10k | // Pin the load to slot #0. |
406 | 3.07k | ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); |
407 | 9.10k | } |
408 | 46.5k | |
409 | 46.5k | // A single store must use slot #0. |
410 | 46.5k | if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()46.5k ) { |
411 | 10.0k | if (!store010.0k ) { |
412 | 5.98k | if (stores == 1) |
413 | 4.55k | ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); |
414 | 1.43k | else if (1.43k stores > 11.43k ) { |
415 | 1.42k | if (slotLoadStore < slotLastLoadStore1.42k ) { |
416 | 0 | // Error if no more slots available for stores. |
417 | 0 | reportError(Twine("invalid instruction packet: too many stores")); |
418 | 0 | return false; |
419 | 0 | } |
420 | 1.42k | // Pin the store to the highest slot available to it. |
421 | 1.42k | ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore); |
422 | 1.42k | // Update the next highest slot available to stores. |
423 | 1.42k | slotLoadStore >>= 1; |
424 | 1.42k | } |
425 | 5.98k | } |
426 | 10.0k | if (10.0k store1 && 10.0k stores > 1988 ) { |
427 | 0 | // Error if a single store with another store. |
428 | 0 | reportError(Twine("invalid instruction packet: too many stores")); |
429 | 0 | return false; |
430 | 0 | } |
431 | 46.5k | } |
432 | 46.5k | |
433 | 46.5k | // flag if an instruction requires to be in slot 3 |
434 | 46.5k | if (46.5k ISJ->Core.getUnits() == slotThree46.5k ) |
435 | 614 | bOnlySlot3 = true; |
436 | 46.5k | |
437 | 46.5k | if (!ISJ->Core.getUnits()46.5k ) { |
438 | 0 | // Error if insn may not be executed in any slot. |
439 | 0 | reportError(Twine("invalid instruction packet: out of slots")); |
440 | 0 | return false; |
441 | 0 | } |
442 | 46.5k | } |
443 | 22.9k | |
444 | 22.9k | // preserve branch order |
445 | 22.9k | bool validateSlots = true; |
446 | 22.9k | if (jumps > 122.9k ) { |
447 | 13 | if (foundBranches.size() > 213 ) { |
448 | 0 | reportError(Twine("too many branches in packet")); |
449 | 0 | return false; |
450 | 0 | } |
451 | 13 | |
452 | 13 | // try all possible choices |
453 | 25 | for (unsigned int i = 0; 13 i < 25 MAX_JUMP_SLOTS25 ; ++i12 ) { |
454 | 24 | // validate first jump with this slot rule |
455 | 24 | if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits())) |
456 | 0 | continue; |
457 | 24 | |
458 | 24 | // validate second jump with this slot rule |
459 | 24 | if (24 !(jumpSlots[i].second & foundBranches[1]->Core.getUnits())24 ) |
460 | 5 | continue; |
461 | 19 | |
462 | 19 | // both valid for this configuration, set new slot rules |
463 | 19 | PacketSave = Packet; |
464 | 19 | foundBranches[0]->Core.setUnits(jumpSlots[i].first); |
465 | 19 | foundBranches[1]->Core.setUnits(jumpSlots[i].second); |
466 | 19 | |
467 | 19 | HexagonUnitAuction AuctionCore(reservedSlots); |
468 | 19 | std::sort(begin(), end(), HexagonInstr::lessCore); |
469 | 19 | |
470 | 19 | // see if things ok with that instruction being pinned to slot "slotJump" |
471 | 19 | bool bFail = false; |
472 | 76 | for (iterator I = begin(); I != end() && 76 !bFail63 ; ++I57 ) |
473 | 57 | if (57 !AuctionCore.bid(I->Core.getUnits())57 ) |
474 | 7 | bFail = true; |
475 | 19 | |
476 | 19 | // if yes, great, if not then restore original slot mask |
477 | 19 | if (!bFail19 ) { |
478 | 12 | validateSlots = false; // all good, no need to re-do auction |
479 | 12 | break; |
480 | 12 | } else |
481 | 19 | // restore original values |
482 | 7 | Packet = PacketSave; |
483 | 24 | } |
484 | 13 | if (validateSlots13 ) { |
485 | 1 | reportError(Twine("invalid instruction packet: out of slots")); |
486 | 1 | return false; |
487 | 1 | } |
488 | 22.9k | } |
489 | 22.9k | |
490 | 22.9k | if (22.9k jumps <= 1 && 22.9k !bOnlySlot322.9k && pSlot3Cnt == 122.3k && slot3ISJ != end()3.34k ) { |
491 | 3.34k | validateSlots = true; |
492 | 3.34k | // save off slot mask of instruction marked with A_PREFER_SLOT3 |
493 | 3.34k | // and then pin it to slot #3 |
494 | 3.34k | unsigned saveUnits = slot3ISJ->Core.getUnits(); |
495 | 3.34k | slot3ISJ->Core.setUnits(saveUnits & slotThree); |
496 | 3.34k | |
497 | 3.34k | HexagonUnitAuction AuctionCore(reservedSlots); |
498 | 3.34k | std::sort(begin(), end(), HexagonInstr::lessCore); |
499 | 3.34k | |
500 | 3.34k | // see if things ok with that instruction being pinned to slot #3 |
501 | 3.34k | bool bFail = false; |
502 | 9.49k | for (iterator I = begin(); I != end() && 9.49k !bFail6.14k ; ++I6.14k ) |
503 | 6.14k | if (6.14k !AuctionCore.bid(I->Core.getUnits())6.14k ) |
504 | 9 | bFail = true; |
505 | 3.34k | |
506 | 3.34k | // if yes, great, if not then restore original slot mask |
507 | 3.34k | if (!bFail) |
508 | 3.33k | validateSlots = false; // all good, no need to re-do auction |
509 | 3.34k | else |
510 | 18 | for (iterator ISJ = begin(); 9 ISJ != end()18 ; ++ISJ9 ) { |
511 | 9 | MCInst const &ID = ISJ->getDesc(); |
512 | 9 | if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) |
513 | 9 | ISJ->Core.setUnits(saveUnits); |
514 | 9 | } |
515 | 3.34k | } |
516 | 22.9k | |
517 | 22.9k | // Check if any slot, core or CVI, is over-subscribed. |
518 | 22.9k | // Verify the core slot subscriptions. |
519 | 22.9k | if (validateSlots22.9k ) { |
520 | 19.5k | HexagonUnitAuction AuctionCore(reservedSlots); |
521 | 19.5k | |
522 | 19.5k | std::sort(begin(), end(), HexagonInstr::lessCore); |
523 | 19.5k | |
524 | 59.9k | for (iterator I = begin(); I != end()59.9k ; ++I40.3k ) |
525 | 40.3k | if (40.3k !AuctionCore.bid(I->Core.getUnits())40.3k ) { |
526 | 0 | reportError(Twine("invalid instruction packet: slot error")); |
527 | 0 | return false; |
528 | 0 | } |
529 | 19.5k | } |
530 | 22.9k | // Verify the CVI slot subscriptions. |
531 | 22.9k | std::sort(begin(), end(), HexagonInstr::lessCVI); |
532 | 22.9k | // create vector of hvx instructions to check |
533 | 22.9k | HVXInstsT hvxInsts; |
534 | 22.9k | hvxInsts.clear(); |
535 | 69.4k | for (iterator I = begin(); I != end()69.4k ; ++I46.5k ) { |
536 | 46.5k | struct CVIUnits inst; |
537 | 46.5k | inst.Units = I->CVI.getUnits(); |
538 | 46.5k | inst.Lanes = I->CVI.getLanes(); |
539 | 46.5k | if (inst.Units == 0) |
540 | 41.4k | continue; // not an hvx inst or an hvx inst that doesn't uses any pipes |
541 | 5.09k | hvxInsts.push_back(inst); |
542 | 5.09k | } |
543 | 22.9k | // if there are any hvx instructions in this packet, check pipe usage |
544 | 22.9k | if (hvxInsts.size() > 022.9k ) { |
545 | 3.47k | unsigned startIdx, usedUnits; |
546 | 3.47k | startIdx = usedUnits = 0x0; |
547 | 3.47k | if (!checkHVXPipes(hvxInsts, startIdx, usedUnits)3.47k ) { |
548 | 4 | // too many pipes used to be valid |
549 | 4 | reportError(Twine("invalid instruction packet: slot error")); |
550 | 4 | return false; |
551 | 4 | } |
552 | 22.9k | } |
553 | 22.9k | |
554 | 22.9k | return true; |
555 | 22.9k | } |
556 | | |
557 | 77.0k | bool HexagonShuffler::shuffle() { |
558 | 77.0k | if (size() > 77.0k HEXAGON_PACKET_SIZE77.0k ) { |
559 | 1 | // Ignore a packet with with more than what a packet can hold |
560 | 1 | // or with compound or duplex insns for now. |
561 | 1 | reportError(Twine("invalid instruction packet")); |
562 | 1 | return false; |
563 | 1 | } |
564 | 77.0k | |
565 | 77.0k | // Check and prepare packet. |
566 | 77.0k | bool Ok = true; |
567 | 77.0k | if (size() > 1 && 77.0k (Ok = check())16.5k ) |
568 | 77.0k | // Reorder the handles for each slot. |
569 | 81.9k | for (unsigned nSlot = 0, emptySlots = 0; 16.3k nSlot < 81.9k HEXAGON_PACKET_SIZE81.9k ; |
570 | 65.5k | ++nSlot65.5k ) { |
571 | 65.5k | iterator ISJ, ISK; |
572 | 65.5k | unsigned slotSkip, slotWeight; |
573 | 65.5k | |
574 | 65.5k | // Prioritize the handles considering their restrictions. |
575 | 65.5k | for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0; |
576 | 220k | ISK != Packet.end()220k ; ++ISK, ++slotSkip154k ) |
577 | 154k | if (154k slotSkip < nSlot - emptySlots154k ) |
578 | 154k | // Note which handle to begin at. |
579 | 57.6k | ++ISJ; |
580 | 154k | else |
581 | 154k | // Calculate the weight of the slot. |
582 | 154k | slotWeight += ISK->Core.setWeight(96.9k HEXAGON_PACKET_SIZE96.9k - nSlot - 1); |
583 | 65.5k | |
584 | 65.5k | if (slotWeight) |
585 | 65.5k | // Sort the packet, favoring source order, |
586 | 65.5k | // beginning after the previous slot. |
587 | 38.6k | std::sort(ISJ, Packet.end()); |
588 | 65.5k | else |
589 | 65.5k | // Skip unused slot. |
590 | 26.9k | ++emptySlots; |
591 | 16.3k | } |
592 | 77.0k | |
593 | 176k | for (iterator ISJ = begin(); ISJ != end()176k ; ++ISJ99.5k ) |
594 | 77.0k | DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) { |
595 | 77.0k | dbgs() << '/'; |
596 | 77.0k | dbgs().write_hex(ISJ->CVI.getUnits()) << '|'; |
597 | 77.0k | dbgs() << ISJ->CVI.getLanes(); |
598 | 77.0k | } dbgs() << ':' |
599 | 77.0k | << HexagonMCInstrInfo::getDesc(MCII, ISJ->getDesc()).getOpcode(); |
600 | 77.0k | dbgs() << '\n'); |
601 | 77.0k | DEBUG(dbgs() << '\n'); |
602 | 77.0k | |
603 | 77.0k | return Ok; |
604 | 77.0k | } |
605 | | |
606 | 157 | void HexagonShuffler::reportError(Twine const &Msg) { |
607 | 157 | if (ReportErrors) |
608 | 3 | Context.reportError(Loc, Msg); |
609 | 157 | } |