/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
Line | Count | Source (jump to first uncovered line) |
1 | | //==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // Shared implementation of BlockFrequency for IR and Machine Instructions. |
11 | | // See the documentation below for BlockFrequencyInfoImpl for details. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H |
16 | | #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H |
17 | | |
18 | | #include "llvm/ADT/DenseMap.h" |
19 | | #include "llvm/ADT/GraphTraits.h" |
20 | | #include "llvm/ADT/Optional.h" |
21 | | #include "llvm/ADT/PostOrderIterator.h" |
22 | | #include "llvm/ADT/SmallVector.h" |
23 | | #include "llvm/ADT/Twine.h" |
24 | | #include "llvm/ADT/iterator_range.h" |
25 | | #include "llvm/IR/BasicBlock.h" |
26 | | #include "llvm/Support/BlockFrequency.h" |
27 | | #include "llvm/Support/BranchProbability.h" |
28 | | #include "llvm/Support/DOTGraphTraits.h" |
29 | | #include "llvm/Support/Debug.h" |
30 | | #include "llvm/Support/ErrorHandling.h" |
31 | | #include "llvm/Support/Format.h" |
32 | | #include "llvm/Support/ScaledNumber.h" |
33 | | #include "llvm/Support/raw_ostream.h" |
34 | | #include <algorithm> |
35 | | #include <cassert> |
36 | | #include <cstddef> |
37 | | #include <cstdint> |
38 | | #include <deque> |
39 | | #include <iterator> |
40 | | #include <limits> |
41 | | #include <list> |
42 | | #include <string> |
43 | | #include <utility> |
44 | | #include <vector> |
45 | | |
46 | | #define DEBUG_TYPE "block-freq" |
47 | | |
48 | | namespace llvm { |
49 | | |
50 | | class BranchProbabilityInfo; |
51 | | class Function; |
52 | | class Loop; |
53 | | class LoopInfo; |
54 | | class MachineBasicBlock; |
55 | | class MachineBranchProbabilityInfo; |
56 | | class MachineFunction; |
57 | | class MachineLoop; |
58 | | class MachineLoopInfo; |
59 | | |
60 | | namespace bfi_detail { |
61 | | |
62 | | struct IrreducibleGraph; |
63 | | |
64 | | // This is part of a workaround for a GCC 4.7 crash on lambdas. |
65 | | template <class BT> struct BlockEdgesAdder; |
66 | | |
67 | | /// \brief Mass of a block. |
68 | | /// |
69 | | /// This class implements a sort of fixed-point fraction always between 0.0 and |
70 | | /// 1.0. getMass() == std::numeric_limits<uint64_t>::max() indicates a value of |
71 | | /// 1.0. |
72 | | /// |
73 | | /// Masses can be added and subtracted. Simple saturation arithmetic is used, |
74 | | /// so arithmetic operations never overflow or underflow. |
75 | | /// |
76 | | /// Masses can be multiplied. Multiplication treats full mass as 1.0 and uses |
77 | | /// an inexpensive floating-point algorithm that's off-by-one (almost, but not |
78 | | /// quite, maximum precision). |
79 | | /// |
80 | | /// Masses can be scaled by \a BranchProbability at maximum precision. |
81 | | class BlockMass { |
82 | | uint64_t Mass = 0; |
83 | | |
84 | | public: |
85 | 82.6M | BlockMass() = default; |
86 | 11.0M | explicit BlockMass(uint64_t Mass) : Mass(Mass) {} |
87 | | |
88 | 21.7k | static BlockMass getEmpty() { return BlockMass(); } |
89 | | |
90 | 11.0M | static BlockMass getFull() { |
91 | 11.0M | return BlockMass(std::numeric_limits<uint64_t>::max()); |
92 | 11.0M | } |
93 | | |
94 | 34.0M | uint64_t getMass() const { return Mass; } |
95 | | |
96 | 41.2M | bool isFull() const { return Mass == std::numeric_limits<uint64_t>::max(); } |
97 | 3.11M | bool isEmpty() const { return !Mass; } |
98 | | |
99 | 0 | bool operator!() const { return isEmpty(); } |
100 | | |
101 | | /// \brief Add another mass. |
102 | | /// |
103 | | /// Adds another mass, saturating at \a isFull() rather than overflowing. |
104 | 48.8M | BlockMass &operator+=(BlockMass X) { |
105 | 48.8M | uint64_t Sum = Mass + X.Mass; |
106 | 48.8M | Mass = Sum < Mass ? std::numeric_limits<uint64_t>::max()0 : Sum48.8M ; |
107 | 48.8M | return *this; |
108 | 48.8M | } |
109 | | |
110 | | /// \brief Subtract another mass. |
111 | | /// |
112 | | /// Subtracts another mass, saturating at \a isEmpty() rather than |
113 | | /// undeflowing. |
114 | 53.0M | BlockMass &operator-=(BlockMass X) { |
115 | 53.0M | uint64_t Diff = Mass - X.Mass; |
116 | 53.0M | Mass = Diff > Mass ? 00 : Diff53.0M ; |
117 | 53.0M | return *this; |
118 | 53.0M | } |
119 | | |
120 | 49.8M | BlockMass &operator*=(BranchProbability P) { |
121 | 49.8M | Mass = P.scale(Mass); |
122 | 49.8M | return *this; |
123 | 49.8M | } |
124 | | |
125 | 0 | bool operator==(BlockMass X) const { return Mass == X.Mass; } |
126 | 0 | bool operator!=(BlockMass X) const { return Mass != X.Mass; } |
127 | 0 | bool operator<=(BlockMass X) const { return Mass <= X.Mass; } |
128 | 0 | bool operator>=(BlockMass X) const { return Mass >= X.Mass; } |
129 | 0 | bool operator<(BlockMass X) const { return Mass < X.Mass; } |
130 | 0 | bool operator>(BlockMass X) const { return Mass > X.Mass; } |
131 | | |
132 | | /// \brief Convert to scaled number. |
133 | | /// |
134 | | /// Convert to \a ScaledNumber. \a isFull() gives 1.0, while \a isEmpty() |
135 | | /// gives slightly above 0.0. |
136 | | ScaledNumber<uint64_t> toScaled() const; |
137 | | |
138 | | void dump() const; |
139 | | raw_ostream &print(raw_ostream &OS) const; |
140 | | }; |
141 | | |
142 | 0 | inline BlockMass operator+(BlockMass L, BlockMass R) { |
143 | 0 | return BlockMass(L) += R; |
144 | 0 | } |
145 | 3.11M | inline BlockMass operator-(BlockMass L, BlockMass R) { |
146 | 3.11M | return BlockMass(L) -= R; |
147 | 3.11M | } |
148 | 49.8M | inline BlockMass operator*(BlockMass L, BranchProbability R) { |
149 | 49.8M | return BlockMass(L) *= R; |
150 | 49.8M | } |
151 | 0 | inline BlockMass operator*(BranchProbability L, BlockMass R) { |
152 | 0 | return BlockMass(R) *= L; |
153 | 0 | } |
154 | | |
155 | 0 | inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) { |
156 | 0 | return X.print(OS); |
157 | 0 | } |
158 | | |
159 | | } // end namespace bfi_detail |
160 | | |
161 | | template <> struct isPodLike<bfi_detail::BlockMass> { |
162 | | static const bool value = true; |
163 | | }; |
164 | | |
165 | | /// \brief Base class for BlockFrequencyInfoImpl |
166 | | /// |
167 | | /// BlockFrequencyInfoImplBase has supporting data structures and some |
168 | | /// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on |
169 | | /// the block type (or that call such algorithms) are skipped here. |
170 | | /// |
171 | | /// Nevertheless, the majority of the overall algorithm documention lives with |
172 | | /// BlockFrequencyInfoImpl. See there for details. |
173 | | class BlockFrequencyInfoImplBase { |
174 | | public: |
175 | | using Scaled64 = ScaledNumber<uint64_t>; |
176 | | using BlockMass = bfi_detail::BlockMass; |
177 | | |
178 | | /// \brief Representative of a block. |
179 | | /// |
180 | | /// This is a simple wrapper around an index into the reverse-post-order |
181 | | /// traversal of the blocks. |
182 | | /// |
183 | | /// Unlike a block pointer, its order has meaning (location in the |
184 | | /// topological sort) and it's class is the same regardless of block type. |
185 | | struct BlockNode { |
186 | | using IndexType = uint32_t; |
187 | | |
188 | | IndexType Index = std::numeric_limits<uint32_t>::max(); |
189 | | |
190 | 35.6M | BlockNode() = default; |
191 | 113M | BlockNode(IndexType Index) : Index(Index) {} |
192 | | |
193 | 109M | bool operator==(const BlockNode &X) const { return Index == X.Index; } |
194 | 35.1M | bool operator!=(const BlockNode &X) const { return Index != X.Index; } |
195 | 0 | bool operator<=(const BlockNode &X) const { return Index <= X.Index; } |
196 | 0 | bool operator>=(const BlockNode &X) const { return Index >= X.Index; } |
197 | 62.6M | bool operator<(const BlockNode &X) const { return Index < X.Index; } |
198 | 0 | bool operator>(const BlockNode &X) const { return Index > X.Index; } |
199 | | |
200 | 67.0M | bool isValid() const { return Index <= getMaxIndex(); } |
201 | | |
202 | 67.0M | static size_t getMaxIndex() { |
203 | 67.0M | return std::numeric_limits<uint32_t>::max() - 1; |
204 | 67.0M | } |
205 | | }; |
206 | | |
207 | | /// \brief Stats about a block itself. |
208 | | struct FrequencyData { |
209 | | Scaled64 Scaled; |
210 | | uint64_t Integer; |
211 | | }; |
212 | | |
213 | | /// \brief Data about a loop. |
214 | | /// |
215 | | /// Contains the data necessary to represent a loop as a pseudo-node once it's |
216 | | /// packaged. |
217 | | struct LoopData { |
218 | | using ExitMap = SmallVector<std::pair<BlockNode, BlockMass>, 4>; |
219 | | using NodeList = SmallVector<BlockNode, 4>; |
220 | | using HeaderMassList = SmallVector<BlockMass, 1>; |
221 | | |
222 | | LoopData *Parent; ///< The parent loop. |
223 | | bool IsPackaged = false; ///< Whether this has been packaged. |
224 | | uint32_t NumHeaders = 1; ///< Number of headers. |
225 | | ExitMap Exits; ///< Successor edges (and weights). |
226 | | NodeList Nodes; ///< Header and the members of the loop. |
227 | | HeaderMassList BackedgeMass; ///< Mass returned to each loop header. |
228 | | BlockMass Mass; |
229 | | Scaled64 Scale; |
230 | | |
231 | | LoopData(LoopData *Parent, const BlockNode &Header) |
232 | 3.10M | : Parent(Parent), Nodes(1, Header), BackedgeMass(1) {} |
233 | | |
234 | | template <class It1, class It2> |
235 | | LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, |
236 | | It2 LastOther) |
237 | 804 | : Parent(Parent), Nodes(FirstHeader, LastHeader) { |
238 | 804 | NumHeaders = Nodes.size(); |
239 | 804 | Nodes.insert(Nodes.end(), FirstOther, LastOther); |
240 | 804 | BackedgeMass.resize(NumHeaders); |
241 | 804 | } |
242 | | |
243 | 91.7M | bool isHeader(const BlockNode &Node) const { |
244 | 91.7M | if (isIrreducible()) |
245 | 91.9k | return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders, |
246 | 91.9k | Node); |
247 | 91.6M | return Node == Nodes[0]; |
248 | 91.7M | } |
249 | | |
250 | 23.5M | BlockNode getHeader() const { return Nodes[0]; } |
251 | 101M | bool isIrreducible() const { return NumHeaders > 1; } |
252 | | |
253 | 3.18M | HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) { |
254 | 3.18M | assert(isHeader(B) && "this is only valid on loop header blocks"); |
255 | 3.18M | if (isIrreducible()) |
256 | 7.79k | return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) - |
257 | 7.79k | Nodes.begin(); |
258 | 3.17M | return 0; |
259 | 3.18M | } |
260 | | |
261 | 3.11M | NodeList::const_iterator members_begin() const { |
262 | 3.11M | return Nodes.begin() + NumHeaders; |
263 | 3.11M | } |
264 | | |
265 | 3.11M | NodeList::const_iterator members_end() const { return Nodes.end(); } |
266 | 3.11M | iterator_range<NodeList::const_iterator> members() const { |
267 | 3.11M | return make_range(members_begin(), members_end()); |
268 | 3.11M | } |
269 | | }; |
270 | | |
271 | | /// \brief Index of loop information. |
272 | | struct WorkingData { |
273 | | BlockNode Node; ///< This node. |
274 | | LoopData *Loop = nullptr; ///< The loop this block is inside. |
275 | | BlockMass Mass; ///< Mass distribution from the entry block. |
276 | | |
277 | 35.0M | WorkingData(const BlockNode &Node) : Node(Node) {} |
278 | | |
279 | 187M | bool isLoopHeader() const { return Loop && 187M Loop->isHeader(Node)73.3M ; } |
280 | | |
281 | 12.5M | bool isDoubleLoopHeader() const { |
282 | 12.5M | return isLoopHeader() && Loop->Parent12.5M && Loop->Parent->isIrreducible()3.59M && |
283 | 2.97k | Loop->Parent->isHeader(Node); |
284 | 12.5M | } |
285 | | |
286 | 50.1M | LoopData *getContainingLoop() const { |
287 | 50.1M | if (!isLoopHeader()) |
288 | 43.9M | return Loop; |
289 | 6.27M | if (6.27M !isDoubleLoopHeader()6.27M ) |
290 | 6.27M | return Loop->Parent; |
291 | 155 | return Loop->Parent->Parent; |
292 | 50.1M | } |
293 | | |
294 | | /// \brief Resolve a node to its representative. |
295 | | /// |
296 | | /// Get the node currently representing Node, which could be a containing |
297 | | /// loop. |
298 | | /// |
299 | | /// This function should only be called when distributing mass. As long as |
300 | | /// there are no irreducible edges to Node, then it will have complexity |
301 | | /// O(1) in this context. |
302 | | /// |
303 | | /// In general, the complexity is O(L), where L is the number of loop |
304 | | /// headers Node has been packaged into. Since this method is called in |
305 | | /// the context of distributing mass, L will be the number of loop headers |
306 | | /// an early exit edge jumps out of. |
307 | 85.3M | BlockNode getResolvedNode() const { |
308 | 85.3M | auto L = getPackagedLoop(); |
309 | 85.3M | return L ? L->getHeader()13.1M : Node72.1M ; |
310 | 85.3M | } |
311 | | |
312 | 135M | LoopData *getPackagedLoop() const { |
313 | 135M | if (!Loop || 135M !Loop->IsPackaged52.5M ) |
314 | 117M | return nullptr; |
315 | 18.0M | auto L = Loop; |
316 | 21.7M | while (L->Parent && 21.7M L->Parent->IsPackaged7.22M ) |
317 | 3.63M | L = L->Parent; |
318 | 18.0M | return L; |
319 | 135M | } |
320 | | |
321 | | /// \brief Get the appropriate mass for a node. |
322 | | /// |
323 | | /// Get appropriate mass for Node. If Node is a loop-header (whose loop |
324 | | /// has been packaged), returns the mass of its pseudo-node. If it's a |
325 | | /// node inside a packaged loop, it returns the loop's mass. |
326 | 88.7M | BlockMass &getMass() { |
327 | 88.7M | if (!isAPackage()) |
328 | 82.5M | return Mass; |
329 | 6.25M | if (6.25M !isADoublePackage()6.25M ) |
330 | 6.25M | return Loop->Mass; |
331 | 89 | return Loop->Parent->Mass; |
332 | 88.7M | } |
333 | | |
334 | | /// \brief Has ContainingLoop been packaged up? |
335 | 35.1M | bool isPackaged() const { return getResolvedNode() != Node; } |
336 | | |
337 | | /// \brief Has Loop been packaged up? |
338 | 99.7M | bool isAPackage() const { return isLoopHeader() && 99.7M Loop->IsPackaged16.4M ; } |
339 | | |
340 | | /// \brief Has Loop been packaged up twice? |
341 | 6.25M | bool isADoublePackage() const { |
342 | 551 | return isDoubleLoopHeader() && Loop->Parent->IsPackaged; |
343 | 6.25M | } |
344 | | }; |
345 | | |
346 | | /// \brief Unscaled probability weight. |
347 | | /// |
348 | | /// Probability weight for an edge in the graph (including the |
349 | | /// successor/target node). |
350 | | /// |
351 | | /// All edges in the original function are 32-bit. However, exit edges from |
352 | | /// loop packages are taken from 64-bit exit masses, so we need 64-bits of |
353 | | /// space in general. |
354 | | /// |
355 | | /// In addition to the raw weight amount, Weight stores the type of the edge |
356 | | /// in the current context (i.e., the context of the loop being processed). |
357 | | /// Is this a local edge within the loop, an exit from the loop, or a |
358 | | /// backedge to the loop header? |
359 | | struct Weight { |
360 | | enum DistType { Local, Exit, Backedge }; |
361 | | DistType Type = Local; |
362 | | BlockNode TargetNode; |
363 | | uint64_t Amount = 0; |
364 | | |
365 | 13.1k | Weight() = default; |
366 | | Weight(DistType Type, BlockNode TargetNode, uint64_t Amount) |
367 | 50.2M | : Type(Type), TargetNode(TargetNode), Amount(Amount) {} |
368 | | }; |
369 | | |
370 | | /// \brief Distribution of unscaled probability weight. |
371 | | /// |
372 | | /// Distribution of unscaled probability weight to a set of successors. |
373 | | /// |
374 | | /// This class collates the successor edge weights for later processing. |
375 | | /// |
376 | | /// \a DidOverflow indicates whether \a Total did overflow while adding to |
377 | | /// the distribution. It should never overflow twice. |
378 | | struct Distribution { |
379 | | using WeightList = SmallVector<Weight, 4>; |
380 | | |
381 | | WeightList Weights; ///< Individual successor weights. |
382 | | uint64_t Total = 0; ///< Sum of all weights. |
383 | | bool DidOverflow = false; ///< Whether \a Total did overflow. |
384 | | |
385 | 38.1M | Distribution() = default; |
386 | | |
387 | 42.9M | void addLocal(const BlockNode &Node, uint64_t Amount) { |
388 | 42.9M | add(Node, Amount, Weight::Local); |
389 | 42.9M | } |
390 | | |
391 | 4.16M | void addExit(const BlockNode &Node, uint64_t Amount) { |
392 | 4.16M | add(Node, Amount, Weight::Exit); |
393 | 4.16M | } |
394 | | |
395 | 3.17M | void addBackedge(const BlockNode &Node, uint64_t Amount) { |
396 | 3.17M | add(Node, Amount, Weight::Backedge); |
397 | 3.17M | } |
398 | | |
399 | | /// \brief Normalize the distribution. |
400 | | /// |
401 | | /// Combines multiple edges to the same \a Weight::TargetNode and scales |
402 | | /// down so that \a Total fits into 32-bits. |
403 | | /// |
404 | | /// This is linear in the size of \a Weights. For the vast majority of |
405 | | /// cases, adjacent edge weights are combined by sorting WeightList and |
406 | | /// combining adjacent weights. However, for very large edge lists an |
407 | | /// auxiliary hash table is used. |
408 | | void normalize(); |
409 | | |
410 | | private: |
411 | | void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type); |
412 | | }; |
413 | | |
414 | | /// \brief Data about each block. This is used downstream. |
415 | | std::vector<FrequencyData> Freqs; |
416 | | |
417 | | /// \brief Loop data: see initializeLoops(). |
418 | | std::vector<WorkingData> Working; |
419 | | |
420 | | /// \brief Indexed information about loops. |
421 | | std::list<LoopData> Loops; |
422 | | |
423 | | /// \brief Virtual destructor. |
424 | | /// |
425 | | /// Need a virtual destructor to mask the compiler warning about |
426 | | /// getBlockName(). |
427 | 4.87M | virtual ~BlockFrequencyInfoImplBase() = default; |
428 | | |
429 | | /// \brief Add all edges out of a packaged loop to the distribution. |
430 | | /// |
431 | | /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each |
432 | | /// successor edge. |
433 | | /// |
434 | | /// \return \c true unless there's an irreducible backedge. |
435 | | bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, |
436 | | Distribution &Dist); |
437 | | |
438 | | /// \brief Add an edge to the distribution. |
439 | | /// |
440 | | /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the |
441 | | /// edge is local/exit/backedge is in the context of LoopHead. Otherwise, |
442 | | /// every edge should be a local edge (since all the loops are packaged up). |
443 | | /// |
444 | | /// \return \c true unless aborted due to an irreducible backedge. |
445 | | bool addToDist(Distribution &Dist, const LoopData *OuterLoop, |
446 | | const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight); |
447 | | |
448 | 0 | LoopData &getLoopPackage(const BlockNode &Head) { |
449 | 0 | assert(Head.Index < Working.size()); |
450 | 0 | assert(Working[Head.Index].isLoopHeader()); |
451 | 0 | return *Working[Head.Index].Loop; |
452 | 0 | } |
453 | | |
454 | | /// \brief Analyze irreducible SCCs. |
455 | | /// |
456 | | /// Separate irreducible SCCs from \c G, which is an explict graph of \c |
457 | | /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr). |
458 | | /// Insert them into \a Loops before \c Insert. |
459 | | /// |
460 | | /// \return the \c LoopData nodes representing the irreducible SCCs. |
461 | | iterator_range<std::list<LoopData>::iterator> |
462 | | analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, |
463 | | std::list<LoopData>::iterator Insert); |
464 | | |
465 | | /// \brief Update a loop after packaging irreducible SCCs inside of it. |
466 | | /// |
467 | | /// Update \c OuterLoop. Before finding irreducible control flow, it was |
468 | | /// partway through \a computeMassInLoop(), so \a LoopData::Exits and \a |
469 | | /// LoopData::BackedgeMass need to be reset. Also, nodes that were packaged |
470 | | /// up need to be removed from \a OuterLoop::Nodes. |
471 | | void updateLoopWithIrreducible(LoopData &OuterLoop); |
472 | | |
473 | | /// \brief Distribute mass according to a distribution. |
474 | | /// |
475 | | /// Distributes the mass in Source according to Dist. If LoopHead.isValid(), |
476 | | /// backedges and exits are stored in its entry in Loops. |
477 | | /// |
478 | | /// Mass is distributed in parallel from two copies of the source mass. |
479 | | void distributeMass(const BlockNode &Source, LoopData *OuterLoop, |
480 | | Distribution &Dist); |
481 | | |
482 | | /// \brief Compute the loop scale for a loop. |
483 | | void computeLoopScale(LoopData &Loop); |
484 | | |
485 | | /// Adjust the mass of all headers in an irreducible loop. |
486 | | /// |
487 | | /// Initially, irreducible loops are assumed to distribute their mass |
488 | | /// equally among its headers. This can lead to wrong frequency estimates |
489 | | /// since some headers may be executed more frequently than others. |
490 | | /// |
491 | | /// This adjusts header mass distribution so it matches the weights of |
492 | | /// the backedges going into each of the loop headers. |
493 | | void adjustLoopHeaderMass(LoopData &Loop); |
494 | | |
495 | | /// \brief Package up a loop. |
496 | | void packageLoop(LoopData &Loop); |
497 | | |
498 | | /// \brief Unwrap loops. |
499 | | void unwrapLoops(); |
500 | | |
501 | | /// \brief Finalize frequency metrics. |
502 | | /// |
503 | | /// Calculates final frequencies and cleans up no-longer-needed data |
504 | | /// structures. |
505 | | void finalizeMetrics(); |
506 | | |
507 | | /// \brief Clear all memory. |
508 | | void clear(); |
509 | | |
510 | | virtual std::string getBlockName(const BlockNode &Node) const; |
511 | | std::string getLoopName(const LoopData &Loop) const; |
512 | | |
513 | 0 | virtual raw_ostream &print(raw_ostream &OS) const { return OS; } |
514 | 0 | void dump() const { print(dbgs()); } |
515 | | |
516 | | Scaled64 getFloatingBlockFreq(const BlockNode &Node) const; |
517 | | |
518 | | BlockFrequency getBlockFreq(const BlockNode &Node) const; |
519 | | Optional<uint64_t> getBlockProfileCount(const Function &F, |
520 | | const BlockNode &Node) const; |
521 | | Optional<uint64_t> getProfileCountFromFreq(const Function &F, |
522 | | uint64_t Freq) const; |
523 | | |
524 | | void setBlockFreq(const BlockNode &Node, uint64_t Freq); |
525 | | |
526 | | raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; |
527 | | raw_ostream &printBlockFreq(raw_ostream &OS, |
528 | | const BlockFrequency &Freq) const; |
529 | | |
530 | 32.7M | uint64_t getEntryFreq() const { |
531 | 32.7M | assert(!Freqs.empty()); |
532 | 32.7M | return Freqs[0].Integer; |
533 | 32.7M | } |
534 | | }; |
535 | | |
536 | | namespace bfi_detail { |
537 | | |
538 | | template <class BlockT> struct TypeMap {}; |
539 | | template <> struct TypeMap<BasicBlock> { |
540 | | using BlockT = BasicBlock; |
541 | | using FunctionT = Function; |
542 | | using BranchProbabilityInfoT = BranchProbabilityInfo; |
543 | | using LoopT = Loop; |
544 | | using LoopInfoT = LoopInfo; |
545 | | }; |
546 | | template <> struct TypeMap<MachineBasicBlock> { |
547 | | using BlockT = MachineBasicBlock; |
548 | | using FunctionT = MachineFunction; |
549 | | using BranchProbabilityInfoT = MachineBranchProbabilityInfo; |
550 | | using LoopT = MachineLoop; |
551 | | using LoopInfoT = MachineLoopInfo; |
552 | | }; |
553 | | |
554 | | /// \brief Get the name of a MachineBasicBlock. |
555 | | /// |
556 | | /// Get the name of a MachineBasicBlock. It's templated so that including from |
557 | | /// CodeGen is unnecessary (that would be a layering issue). |
558 | | /// |
559 | | /// This is used mainly for debug output. The name is similar to |
560 | | /// MachineBasicBlock::getFullName(), but skips the name of the function. |
561 | 0 | template <class BlockT> std::string getBlockName(const BlockT *BB) { |
562 | 0 | assert(BB && "Unexpected nullptr"); |
563 | 0 | auto MachineName = "BB" + Twine(BB->getNumber()); |
564 | 0 | if (BB->getBasicBlock()) |
565 | 0 | return (MachineName + "[" + BB->getName() + "]").str(); |
566 | 0 | return MachineName.str(); |
567 | 0 | } |
568 | | /// \brief Get the name of a BasicBlock. |
569 | 435 | template <> inline std::string getBlockName(const BasicBlock *BB) { |
570 | 435 | assert(BB && "Unexpected nullptr"); |
571 | 435 | return BB->getName().str(); |
572 | 435 | } |
573 | | |
574 | | /// \brief Graph of irreducible control flow. |
575 | | /// |
576 | | /// This graph is used for determining the SCCs in a loop (or top-level |
577 | | /// function) that has irreducible control flow. |
578 | | /// |
579 | | /// During the block frequency algorithm, the local graphs are defined in a |
580 | | /// light-weight way, deferring to the \a BasicBlock or \a MachineBasicBlock |
581 | | /// graphs for most edges, but getting others from \a LoopData::ExitMap. The |
582 | | /// latter only has successor information. |
583 | | /// |
584 | | /// \a IrreducibleGraph makes this graph explicit. It's in a form that can use |
585 | | /// \a GraphTraits (so that \a analyzeIrreducible() can use \a scc_iterator), |
586 | | /// and it explicitly lists predecessors and successors. The initialization |
587 | | /// that relies on \c MachineBasicBlock is defined in the header. |
588 | | struct IrreducibleGraph { |
589 | | using BFIBase = BlockFrequencyInfoImplBase; |
590 | | |
591 | | BFIBase &BFI; |
592 | | |
593 | | using BlockNode = BFIBase::BlockNode; |
594 | | struct IrrNode { |
595 | | BlockNode Node; |
596 | | unsigned NumIn = 0; |
597 | | std::deque<const IrrNode *> Edges; |
598 | | |
599 | 21.5k | IrrNode(const BlockNode &Node) : Node(Node) {} |
600 | | |
601 | | using iterator = std::deque<const IrrNode *>::const_iterator; |
602 | | |
603 | 14.9k | iterator pred_begin() const { return Edges.begin(); } |
604 | 36.4k | iterator succ_begin() const { return Edges.begin() + NumIn; } |
605 | 14.9k | iterator pred_end() const { return succ_begin(); } |
606 | 61.4k | iterator succ_end() const { return Edges.end(); } |
607 | | }; |
608 | | BlockNode Start; |
609 | | const IrrNode *StartIrr = nullptr; |
610 | | std::vector<IrrNode> Nodes; |
611 | | SmallDenseMap<uint32_t, IrrNode *, 4> Lookup; |
612 | | |
613 | | /// \brief Construct an explicit graph containing irreducible control flow. |
614 | | /// |
615 | | /// Construct an explicit graph of the control flow in \c OuterLoop (or the |
616 | | /// top-level function, if \c OuterLoop is \c nullptr). Uses \c |
617 | | /// addBlockEdges to add block successors that have not been packaged into |
618 | | /// loops. |
619 | | /// |
620 | | /// \a BlockFrequencyInfoImpl::computeIrreducibleMass() is the only expected |
621 | | /// user of this. |
622 | | template <class BlockEdgesAdder> |
623 | | IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, |
624 | 584 | BlockEdgesAdder addBlockEdges) : BFI(BFI) { |
625 | 584 | initialize(OuterLoop, addBlockEdges); |
626 | 584 | } llvm::bfi_detail::IrreducibleGraph::IrreducibleGraph<llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock> >(llvm::BlockFrequencyInfoImplBase&, llvm::BlockFrequencyInfoImplBase::LoopData const*, llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock>) Line | Count | Source | 624 | 267 | BlockEdgesAdder addBlockEdges) : BFI(BFI) { | 625 | 267 | initialize(OuterLoop, addBlockEdges); | 626 | 267 | } |
llvm::bfi_detail::IrreducibleGraph::IrreducibleGraph<llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock> >(llvm::BlockFrequencyInfoImplBase&, llvm::BlockFrequencyInfoImplBase::LoopData const*, llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock>) Line | Count | Source | 624 | 317 | BlockEdgesAdder addBlockEdges) : BFI(BFI) { | 625 | 317 | initialize(OuterLoop, addBlockEdges); | 626 | 317 | } |
|
627 | | |
628 | | template <class BlockEdgesAdder> |
629 | | void initialize(const BFIBase::LoopData *OuterLoop, |
630 | | BlockEdgesAdder addBlockEdges); |
631 | | void addNodesInLoop(const BFIBase::LoopData &OuterLoop); |
632 | | void addNodesInFunction(); |
633 | | |
634 | 21.5k | void addNode(const BlockNode &Node) { |
635 | 21.5k | Nodes.emplace_back(Node); |
636 | 21.5k | BFI.Working[Node.Index].getMass() = BlockMass::getEmpty(); |
637 | 21.5k | } |
638 | | |
639 | | void indexNodes(); |
640 | | template <class BlockEdgesAdder> |
641 | | void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, |
642 | | BlockEdgesAdder addBlockEdges); |
643 | | void addEdge(IrrNode &Irr, const BlockNode &Succ, |
644 | | const BFIBase::LoopData *OuterLoop); |
645 | | }; |
646 | | |
647 | | template <class BlockEdgesAdder> |
648 | | void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop, |
649 | 584 | BlockEdgesAdder addBlockEdges) { |
650 | 584 | if (OuterLoop584 ) { |
651 | 207 | addNodesInLoop(*OuterLoop); |
652 | 207 | for (auto N : OuterLoop->Nodes) |
653 | 11.8k | addEdges(N, OuterLoop, addBlockEdges); |
654 | 584 | } else { |
655 | 377 | addNodesInFunction(); |
656 | 12.5k | for (uint32_t Index = 0; Index < BFI.Working.size()12.5k ; ++Index12.1k ) |
657 | 12.1k | addEdges(Index, OuterLoop, addBlockEdges); |
658 | 377 | } |
659 | 584 | StartIrr = Lookup[Start.Index]; |
660 | 584 | } void llvm::bfi_detail::IrreducibleGraph::initialize<llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock> >(llvm::BlockFrequencyInfoImplBase::LoopData const*, llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock>) Line | Count | Source | 649 | 267 | BlockEdgesAdder addBlockEdges) { | 650 | 267 | if (OuterLoop267 ) { | 651 | 95 | addNodesInLoop(*OuterLoop); | 652 | 95 | for (auto N : OuterLoop->Nodes) | 653 | 5.91k | addEdges(N, OuterLoop, addBlockEdges); | 654 | 267 | } else { | 655 | 172 | addNodesInFunction(); | 656 | 5.87k | for (uint32_t Index = 0; Index < BFI.Working.size()5.87k ; ++Index5.70k ) | 657 | 5.70k | addEdges(Index, OuterLoop, addBlockEdges); | 658 | 172 | } | 659 | 267 | StartIrr = Lookup[Start.Index]; | 660 | 267 | } |
void llvm::bfi_detail::IrreducibleGraph::initialize<llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock> >(llvm::BlockFrequencyInfoImplBase::LoopData const*, llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock>) Line | Count | Source | 649 | 317 | BlockEdgesAdder addBlockEdges) { | 650 | 317 | if (OuterLoop317 ) { | 651 | 112 | addNodesInLoop(*OuterLoop); | 652 | 112 | for (auto N : OuterLoop->Nodes) | 653 | 5.91k | addEdges(N, OuterLoop, addBlockEdges); | 654 | 317 | } else { | 655 | 205 | addNodesInFunction(); | 656 | 6.65k | for (uint32_t Index = 0; Index < BFI.Working.size()6.65k ; ++Index6.44k ) | 657 | 6.44k | addEdges(Index, OuterLoop, addBlockEdges); | 658 | 205 | } | 659 | 317 | StartIrr = Lookup[Start.Index]; | 660 | 317 | } |
|
661 | | |
662 | | template <class BlockEdgesAdder> |
663 | | void IrreducibleGraph::addEdges(const BlockNode &Node, |
664 | | const BFIBase::LoopData *OuterLoop, |
665 | 23.9k | BlockEdgesAdder addBlockEdges) { |
666 | 23.9k | auto L = Lookup.find(Node.Index); |
667 | 23.9k | if (L == Lookup.end()) |
668 | 2.42k | return; |
669 | 21.5k | IrrNode &Irr = *L->second; |
670 | 21.5k | const auto &Working = BFI.Working[Node.Index]; |
671 | 21.5k | |
672 | 21.5k | if (Working.isAPackage()) |
673 | 1.89k | for (const auto &I : Working.Loop->Exits) |
674 | 7.03k | addEdge(Irr, I.first, OuterLoop); |
675 | 21.5k | else |
676 | 19.6k | addBlockEdges(*this, Irr, OuterLoop); |
677 | 23.9k | } void llvm::bfi_detail::IrreducibleGraph::addEdges<llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock> >(llvm::BlockFrequencyInfoImplBase::BlockNode const&, llvm::BlockFrequencyInfoImplBase::LoopData const*, llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock>) Line | Count | Source | 665 | 11.6k | BlockEdgesAdder addBlockEdges) { | 666 | 11.6k | auto L = Lookup.find(Node.Index); | 667 | 11.6k | if (L == Lookup.end()) | 668 | 1.15k | return; | 669 | 10.4k | IrrNode &Irr = *L->second; | 670 | 10.4k | const auto &Working = BFI.Working[Node.Index]; | 671 | 10.4k | | 672 | 10.4k | if (Working.isAPackage()) | 673 | 965 | for (const auto &I : Working.Loop->Exits) | 674 | 3.20k | addEdge(Irr, I.first, OuterLoop); | 675 | 10.4k | else | 676 | 9.49k | addBlockEdges(*this, Irr, OuterLoop); | 677 | 11.6k | } |
void llvm::bfi_detail::IrreducibleGraph::addEdges<llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock> >(llvm::BlockFrequencyInfoImplBase::BlockNode const&, llvm::BlockFrequencyInfoImplBase::LoopData const*, llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock>) Line | Count | Source | 665 | 12.3k | BlockEdgesAdder addBlockEdges) { | 666 | 12.3k | auto L = Lookup.find(Node.Index); | 667 | 12.3k | if (L == Lookup.end()) | 668 | 1.26k | return; | 669 | 11.0k | IrrNode &Irr = *L->second; | 670 | 11.0k | const auto &Working = BFI.Working[Node.Index]; | 671 | 11.0k | | 672 | 11.0k | if (Working.isAPackage()) | 673 | 934 | for (const auto &I : Working.Loop->Exits) | 674 | 3.83k | addEdge(Irr, I.first, OuterLoop); | 675 | 11.0k | else | 676 | 10.1k | addBlockEdges(*this, Irr, OuterLoop); | 677 | 12.3k | } |
|
678 | | |
679 | | } // end namespace bfi_detail |
680 | | |
681 | | /// \brief Shared implementation for block frequency analysis. |
682 | | /// |
683 | | /// This is a shared implementation of BlockFrequencyInfo and |
684 | | /// MachineBlockFrequencyInfo, and calculates the relative frequencies of |
685 | | /// blocks. |
686 | | /// |
687 | | /// LoopInfo defines a loop as a "non-trivial" SCC dominated by a single block, |
688 | | /// which is called the header. A given loop, L, can have sub-loops, which are |
689 | | /// loops within the subgraph of L that exclude its header. (A "trivial" SCC |
690 | | /// consists of a single block that does not have a self-edge.) |
691 | | /// |
692 | | /// In addition to loops, this algorithm has limited support for irreducible |
693 | | /// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are |
694 | | /// discovered on they fly, and modelled as loops with multiple headers. |
695 | | /// |
696 | | /// The headers of irreducible sub-SCCs consist of its entry blocks and all |
697 | | /// nodes that are targets of a backedge within it (excluding backedges within |
698 | | /// true sub-loops). Block frequency calculations act as if a block is |
699 | | /// inserted that intercepts all the edges to the headers. All backedges and |
700 | | /// entries point to this block. Its successors are the headers, which split |
701 | | /// the frequency evenly. |
702 | | /// |
703 | | /// This algorithm leverages BlockMass and ScaledNumber to maintain precision, |
704 | | /// separates mass distribution from loop scaling, and dithers to eliminate |
705 | | /// probability mass loss. |
706 | | /// |
707 | | /// The implementation is split between BlockFrequencyInfoImpl, which knows the |
708 | | /// type of graph being modelled (BasicBlock vs. MachineBasicBlock), and |
709 | | /// BlockFrequencyInfoImplBase, which doesn't. The base class uses \a |
710 | | /// BlockNode, a wrapper around a uint32_t. BlockNode is numbered from 0 in |
711 | | /// reverse-post order. This gives two advantages: it's easy to compare the |
712 | | /// relative ordering of two nodes, and maps keyed on BlockT can be represented |
713 | | /// by vectors. |
714 | | /// |
715 | | /// This algorithm is O(V+E), unless there is irreducible control flow, in |
716 | | /// which case it's O(V*E) in the worst case. |
717 | | /// |
718 | | /// These are the main stages: |
719 | | /// |
720 | | /// 0. Reverse post-order traversal (\a initializeRPOT()). |
721 | | /// |
722 | | /// Run a single post-order traversal and save it (in reverse) in RPOT. |
723 | | /// All other stages make use of this ordering. Save a lookup from BlockT |
724 | | /// to BlockNode (the index into RPOT) in Nodes. |
725 | | /// |
726 | | /// 1. Loop initialization (\a initializeLoops()). |
727 | | /// |
728 | | /// Translate LoopInfo/MachineLoopInfo into a form suitable for the rest of |
729 | | /// the algorithm. In particular, store the immediate members of each loop |
730 | | /// in reverse post-order. |
731 | | /// |
732 | | /// 2. Calculate mass and scale in loops (\a computeMassInLoops()). |
733 | | /// |
734 | | /// For each loop (bottom-up), distribute mass through the DAG resulting |
735 | | /// from ignoring backedges and treating sub-loops as a single pseudo-node. |
736 | | /// Track the backedge mass distributed to the loop header, and use it to |
737 | | /// calculate the loop scale (number of loop iterations). Immediate |
738 | | /// members that represent sub-loops will already have been visited and |
739 | | /// packaged into a pseudo-node. |
740 | | /// |
741 | | /// Distributing mass in a loop is a reverse-post-order traversal through |
742 | | /// the loop. Start by assigning full mass to the Loop header. For each |
743 | | /// node in the loop: |
744 | | /// |
745 | | /// - Fetch and categorize the weight distribution for its successors. |
746 | | /// If this is a packaged-subloop, the weight distribution is stored |
747 | | /// in \a LoopData::Exits. Otherwise, fetch it from |
748 | | /// BranchProbabilityInfo. |
749 | | /// |
750 | | /// - Each successor is categorized as \a Weight::Local, a local edge |
751 | | /// within the current loop, \a Weight::Backedge, a backedge to the |
752 | | /// loop header, or \a Weight::Exit, any successor outside the loop. |
753 | | /// The weight, the successor, and its category are stored in \a |
754 | | /// Distribution. There can be multiple edges to each successor. |
755 | | /// |
756 | | /// - If there's a backedge to a non-header, there's an irreducible SCC. |
757 | | /// The usual flow is temporarily aborted. \a |
758 | | /// computeIrreducibleMass() finds the irreducible SCCs within the |
759 | | /// loop, packages them up, and restarts the flow. |
760 | | /// |
761 | | /// - Normalize the distribution: scale weights down so that their sum |
762 | | /// is 32-bits, and coalesce multiple edges to the same node. |
763 | | /// |
764 | | /// - Distribute the mass accordingly, dithering to minimize mass loss, |
765 | | /// as described in \a distributeMass(). |
766 | | /// |
767 | | /// In the case of irreducible loops, instead of a single loop header, |
768 | | /// there will be several. The computation of backedge masses is similar |
769 | | /// but instead of having a single backedge mass, there will be one |
770 | | /// backedge per loop header. In these cases, each backedge will carry |
771 | | /// a mass proportional to the edge weights along the corresponding |
772 | | /// path. |
773 | | /// |
774 | | /// At the end of propagation, the full mass assigned to the loop will be |
775 | | /// distributed among the loop headers proportionally according to the |
776 | | /// mass flowing through their backedges. |
777 | | /// |
778 | | /// Finally, calculate the loop scale from the accumulated backedge mass. |
779 | | /// |
780 | | /// 3. Distribute mass in the function (\a computeMassInFunction()). |
781 | | /// |
782 | | /// Finally, distribute mass through the DAG resulting from packaging all |
783 | | /// loops in the function. This uses the same algorithm as distributing |
784 | | /// mass in a loop, except that there are no exit or backedge edges. |
785 | | /// |
786 | | /// 4. Unpackage loops (\a unwrapLoops()). |
787 | | /// |
788 | | /// Initialize each block's frequency to a floating point representation of |
789 | | /// its mass. |
790 | | /// |
791 | | /// Visit loops top-down, scaling the frequencies of its immediate members |
792 | | /// by the loop's pseudo-node's frequency. |
793 | | /// |
794 | | /// 5. Convert frequencies to a 64-bit range (\a finalizeMetrics()). |
795 | | /// |
796 | | /// Using the min and max frequencies as a guide, translate floating point |
797 | | /// frequencies to an appropriate range in uint64_t. |
798 | | /// |
799 | | /// It has some known flaws. |
800 | | /// |
801 | | /// - The model of irreducible control flow is a rough approximation. |
802 | | /// |
803 | | /// Modelling irreducible control flow exactly involves setting up and |
804 | | /// solving a group of infinite geometric series. Such precision is |
805 | | /// unlikely to be worthwhile, since most of our algorithms give up on |
806 | | /// irreducible control flow anyway. |
807 | | /// |
808 | | /// Nevertheless, we might find that we need to get closer. Here's a sort |
809 | | /// of TODO list for the model with diminishing returns, to be completed as |
810 | | /// necessary. |
811 | | /// |
812 | | /// - The headers for the \a LoopData representing an irreducible SCC |
813 | | /// include non-entry blocks. When these extra blocks exist, they |
814 | | /// indicate a self-contained irreducible sub-SCC. We could treat them |
815 | | /// as sub-loops, rather than arbitrarily shoving the problematic |
816 | | /// blocks into the headers of the main irreducible SCC. |
817 | | /// |
818 | | /// - Entry frequencies are assumed to be evenly split between the |
819 | | /// headers of a given irreducible SCC, which is the only option if we |
820 | | /// need to compute mass in the SCC before its parent loop. Instead, |
821 | | /// we could partially compute mass in the parent loop, and stop when |
822 | | /// we get to the SCC. Here, we have the correct ratio of entry |
823 | | /// masses, which we can use to adjust their relative frequencies. |
824 | | /// Compute mass in the SCC, and then continue propagation in the |
825 | | /// parent. |
826 | | /// |
827 | | /// - We can propagate mass iteratively through the SCC, for some fixed |
828 | | /// number of iterations. Each iteration starts by assigning the entry |
829 | | /// blocks their backedge mass from the prior iteration. The final |
830 | | /// mass for each block (and each exit, and the total backedge mass |
831 | | /// used for computing loop scale) is the sum of all iterations. |
832 | | /// (Running this until fixed point would "solve" the geometric |
833 | | /// series by simulation.) |
834 | | template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { |
835 | | // This is part of a workaround for a GCC 4.7 crash on lambdas. |
836 | | friend struct bfi_detail::BlockEdgesAdder<BT>; |
837 | | |
838 | | using BlockT = typename bfi_detail::TypeMap<BT>::BlockT; |
839 | | using FunctionT = typename bfi_detail::TypeMap<BT>::FunctionT; |
840 | | using BranchProbabilityInfoT = |
841 | | typename bfi_detail::TypeMap<BT>::BranchProbabilityInfoT; |
842 | | using LoopT = typename bfi_detail::TypeMap<BT>::LoopT; |
843 | | using LoopInfoT = typename bfi_detail::TypeMap<BT>::LoopInfoT; |
844 | | using Successor = GraphTraits<const BlockT *>; |
845 | | using Predecessor = GraphTraits<Inverse<const BlockT *>>; |
846 | | |
847 | | const BranchProbabilityInfoT *BPI = nullptr; |
848 | | const LoopInfoT *LI = nullptr; |
849 | | const FunctionT *F = nullptr; |
850 | | |
851 | | // All blocks in reverse postorder. |
852 | | std::vector<const BlockT *> RPOT; |
853 | | DenseMap<const BlockT *, BlockNode> Nodes; |
854 | | |
855 | | using rpot_iterator = typename std::vector<const BlockT *>::const_iterator; |
856 | | |
857 | 79.9M | rpot_iterator rpot_begin() const { return RPOT.begin(); } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::rpot_begin() const Line | Count | Source | 857 | 42.4M | rpot_iterator rpot_begin() const { return RPOT.begin(); } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::rpot_begin() const Line | Count | Source | 857 | 37.4M | rpot_iterator rpot_begin() const { return RPOT.begin(); } |
|
858 | 9.75M | rpot_iterator rpot_end() const { return RPOT.end(); } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::rpot_end() const Line | Count | Source | 858 | 4.98M | rpot_iterator rpot_end() const { return RPOT.end(); } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::rpot_end() const Line | Count | Source | 858 | 4.77M | rpot_iterator rpot_end() const { return RPOT.end(); } |
|
859 | | |
860 | 70.1M | size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getIndex(std::__1::__wrap_iter<llvm::BasicBlock const* const*> const&) const Line | Count | Source | 860 | 37.4M | size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getIndex(std::__1::__wrap_iter<llvm::MachineBasicBlock const* const*> const&) const Line | Count | Source | 860 | 32.7M | size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); } |
|
861 | | |
862 | 70.1M | BlockNode getNode(const rpot_iterator &I) const { |
863 | 70.1M | return BlockNode(getIndex(I)); |
864 | 70.1M | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getNode(std::__1::__wrap_iter<llvm::MachineBasicBlock const* const*> const&) const Line | Count | Source | 862 | 32.7M | BlockNode getNode(const rpot_iterator &I) const { | 863 | 32.7M | return BlockNode(getIndex(I)); | 864 | 32.7M | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getNode(std::__1::__wrap_iter<llvm::BasicBlock const* const*> const&) const Line | Count | Source | 862 | 37.4M | BlockNode getNode(const rpot_iterator &I) const { | 863 | 37.4M | return BlockNode(getIndex(I)); | 864 | 37.4M | } |
|
865 | 123M | BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB); } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getNode(llvm::BasicBlock const*) const Line | Count | Source | 865 | 36.3M | BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB); } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getNode(llvm::MachineBasicBlock const*) const Line | Count | Source | 865 | 86.8M | BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB); } |
|
866 | | |
867 | 35.0M | const BlockT *getBlock(const BlockNode &Node) const { |
868 | 35.0M | assert(Node.Index < RPOT.size()); |
869 | 35.0M | return RPOT[Node.Index]; |
870 | 35.0M | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getBlock(llvm::BlockFrequencyInfoImplBase::BlockNode const&) const Line | Count | Source | 867 | 16.3M | const BlockT *getBlock(const BlockNode &Node) const { | 868 | 16.3M | assert(Node.Index < RPOT.size()); | 869 | 16.3M | return RPOT[Node.Index]; | 870 | 16.3M | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getBlock(llvm::BlockFrequencyInfoImplBase::BlockNode const&) const Line | Count | Source | 867 | 18.7M | const BlockT *getBlock(const BlockNode &Node) const { | 868 | 18.7M | assert(Node.Index < RPOT.size()); | 869 | 18.7M | return RPOT[Node.Index]; | 870 | 18.7M | } |
|
871 | | |
872 | | /// \brief Run (and save) a post-order traversal. |
873 | | /// |
874 | | /// Saves a reverse post-order traversal of all the nodes in \a F. |
875 | | void initializeRPOT(); |
876 | | |
877 | | /// \brief Initialize loop data. |
878 | | /// |
879 | | /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from |
880 | | /// each block to the deepest loop it's in, but we need the inverse. For each |
881 | | /// loop, we store in reverse post-order its "immediate" members, defined as |
882 | | /// the header, the headers of immediate sub-loops, and all other blocks in |
883 | | /// the loop that are not in sub-loops. |
884 | | void initializeLoops(); |
885 | | |
886 | | /// \brief Propagate to a block's successors. |
887 | | /// |
888 | | /// In the context of distributing mass through \c OuterLoop, divide the mass |
889 | | /// currently assigned to \c Node between its successors. |
890 | | /// |
891 | | /// \return \c true unless there's an irreducible backedge. |
892 | | bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node); |
893 | | |
894 | | /// \brief Compute mass in a particular loop. |
895 | | /// |
896 | | /// Assign mass to \c Loop's header, and then for each block in \c Loop in |
897 | | /// reverse post-order, distribute mass to its successors. Only visits nodes |
898 | | /// that have not been packaged into sub-loops. |
899 | | /// |
900 | | /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop. |
901 | | /// \return \c true unless there's an irreducible backedge. |
902 | | bool computeMassInLoop(LoopData &Loop); |
903 | | |
904 | | /// \brief Try to compute mass in the top-level function. |
905 | | /// |
906 | | /// Assign mass to the entry block, and then for each block in reverse |
907 | | /// post-order, distribute mass to its successors. Skips nodes that have |
908 | | /// been packaged into loops. |
909 | | /// |
910 | | /// \pre \a computeMassInLoops() has been called. |
911 | | /// \return \c true unless there's an irreducible backedge. |
912 | | bool tryToComputeMassInFunction(); |
913 | | |
914 | | /// \brief Compute mass in (and package up) irreducible SCCs. |
915 | | /// |
916 | | /// Find the irreducible SCCs in \c OuterLoop, add them to \a Loops (in front |
917 | | /// of \c Insert), and call \a computeMassInLoop() on each of them. |
918 | | /// |
919 | | /// If \c OuterLoop is \c nullptr, it refers to the top-level function. |
920 | | /// |
921 | | /// \pre \a computeMassInLoop() has been called for each subloop of \c |
922 | | /// OuterLoop. |
923 | | /// \pre \c Insert points at the last loop successfully processed by \a |
924 | | /// computeMassInLoop(). |
925 | | /// \pre \c OuterLoop has irreducible SCCs. |
926 | | void computeIrreducibleMass(LoopData *OuterLoop, |
927 | | std::list<LoopData>::iterator Insert); |
928 | | |
929 | | /// \brief Compute mass in all loops. |
930 | | /// |
931 | | /// For each loop bottom-up, call \a computeMassInLoop(). |
932 | | /// |
933 | | /// \a computeMassInLoop() aborts (and returns \c false) on loops that |
934 | | /// contain a irreducible sub-SCCs. Use \a computeIrreducibleMass() and then |
935 | | /// re-enter \a computeMassInLoop(). |
936 | | /// |
937 | | /// \post \a computeMassInLoop() has returned \c true for every loop. |
938 | | void computeMassInLoops(); |
939 | | |
940 | | /// \brief Compute mass in the top-level function. |
941 | | /// |
942 | | /// Uses \a tryToComputeMassInFunction() and \a computeIrreducibleMass() to |
943 | | /// compute mass in the top-level function. |
944 | | /// |
945 | | /// \post \a tryToComputeMassInFunction() has returned \c true. |
946 | | void computeMassInFunction(); |
947 | | |
948 | 0 | std::string getBlockName(const BlockNode &Node) const override { |
949 | 0 | return bfi_detail::getBlockName(getBlock(Node)); |
950 | 0 | } Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getBlockName(llvm::BlockFrequencyInfoImplBase::BlockNode const&) const Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getBlockName(llvm::BlockFrequencyInfoImplBase::BlockNode const&) const |
951 | | |
952 | | public: |
953 | 4.87M | BlockFrequencyInfoImpl() = default; llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::BlockFrequencyInfoImpl() Line | Count | Source | 953 | 2.38M | BlockFrequencyInfoImpl() = default; |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::BlockFrequencyInfoImpl() Line | Count | Source | 953 | 2.48M | BlockFrequencyInfoImpl() = default; |
|
954 | | |
955 | 511 | const FunctionT *getFunction() const { return F; } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getFunction() const Line | Count | Source | 955 | 473 | const FunctionT *getFunction() const { return F; } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getFunction() const Line | Count | Source | 955 | 38 | const FunctionT *getFunction() const { return F; } |
|
956 | | |
957 | | void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, |
958 | | const LoopInfoT &LI); |
959 | | |
960 | | using BlockFrequencyInfoImplBase::getEntryFreq; |
961 | | |
962 | 67.0M | BlockFrequency getBlockFreq(const BlockT *BB) const { |
963 | 67.0M | return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); |
964 | 67.0M | } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getBlockFreq(llvm::BasicBlock const*) const Line | Count | Source | 962 | 5.63M | BlockFrequency getBlockFreq(const BlockT *BB) const { | 963 | 5.63M | return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); | 964 | 5.63M | } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getBlockFreq(llvm::MachineBasicBlock const*) const Line | Count | Source | 962 | 61.3M | BlockFrequency getBlockFreq(const BlockT *BB) const { | 963 | 61.3M | return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); | 964 | 61.3M | } |
|
965 | | |
966 | | Optional<uint64_t> getBlockProfileCount(const Function &F, |
967 | 441 | const BlockT *BB) const { |
968 | 441 | return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); |
969 | 441 | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getBlockProfileCount(llvm::Function const&, llvm::MachineBasicBlock const*) const Line | Count | Source | 967 | 38 | const BlockT *BB) const { | 968 | 38 | return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); | 969 | 38 | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getBlockProfileCount(llvm::Function const&, llvm::BasicBlock const*) const Line | Count | Source | 967 | 403 | const BlockT *BB) const { | 968 | 403 | return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); | 969 | 403 | } |
|
970 | | |
971 | | Optional<uint64_t> getProfileCountFromFreq(const Function &F, |
972 | 70 | uint64_t Freq) const { |
973 | 70 | return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq); |
974 | 70 | } Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getProfileCountFromFreq(llvm::Function const&, unsigned long long) const llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getProfileCountFromFreq(llvm::Function const&, unsigned long long) const Line | Count | Source | 972 | 70 | uint64_t Freq) const { | 973 | 70 | return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq); | 974 | 70 | } |
|
975 | | |
976 | | void setBlockFreq(const BlockT *BB, uint64_t Freq); |
977 | | |
978 | 435 | Scaled64 getFloatingBlockFreq(const BlockT *BB) const { |
979 | 435 | return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); |
980 | 435 | } Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getFloatingBlockFreq(llvm::MachineBasicBlock const*) const llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getFloatingBlockFreq(llvm::BasicBlock const*) const Line | Count | Source | 978 | 435 | Scaled64 getFloatingBlockFreq(const BlockT *BB) const { | 979 | 435 | return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); | 980 | 435 | } |
|
981 | | |
982 | 0 | const BranchProbabilityInfoT &getBPI() const { return *BPI; } Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::getBPI() const Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::getBPI() const |
983 | | |
984 | | /// \brief Print the frequencies for the current function. |
985 | | /// |
986 | | /// Prints the frequencies for the blocks in the current function. |
987 | | /// |
988 | | /// Blocks are printed in the natural iteration order of the function, rather |
989 | | /// than reverse post-order. This provides two advantages: writing -analyze |
990 | | /// tests is easier (since blocks come out in source order), and even |
991 | | /// unreachable blocks are printed. |
992 | | /// |
993 | | /// \a BlockFrequencyInfoImplBase::print() only knows reverse post-order, so |
994 | | /// we need to override it here. |
995 | | raw_ostream &print(raw_ostream &OS) const override; |
996 | | |
997 | | using BlockFrequencyInfoImplBase::dump; |
998 | | using BlockFrequencyInfoImplBase::printBlockFreq; |
999 | | |
1000 | 0 | raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const { |
1001 | 0 | return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB)); |
1002 | 0 | } Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::printBlockFreq(llvm::raw_ostream&, llvm::MachineBasicBlock const*) const Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::printBlockFreq(llvm::raw_ostream&, llvm::BasicBlock const*) const |
1003 | | }; |
1004 | | |
1005 | | template <class BT> |
1006 | | void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F, |
1007 | | const BranchProbabilityInfoT &BPI, |
1008 | 4.87M | const LoopInfoT &LI) { |
1009 | 4.87M | // Save the parameters. |
1010 | 4.87M | this->BPI = &BPI; |
1011 | 4.87M | this->LI = &LI; |
1012 | 4.87M | this->F = &F; |
1013 | 4.87M | |
1014 | 4.87M | // Clean up left-over data structures. |
1015 | 4.87M | BlockFrequencyInfoImplBase::clear(); |
1016 | 4.87M | RPOT.clear(); |
1017 | 4.87M | Nodes.clear(); |
1018 | 4.87M | |
1019 | 4.87M | // Initialize. |
1020 | 4.87M | DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n=================" |
1021 | 4.87M | << std::string(F.getName().size(), '=') << "\n"); |
1022 | 4.87M | initializeRPOT(); |
1023 | 4.87M | initializeLoops(); |
1024 | 4.87M | |
1025 | 4.87M | // Visit loops in post-order to find the local mass distribution, and then do |
1026 | 4.87M | // the full function. |
1027 | 4.87M | computeMassInLoops(); |
1028 | 4.87M | computeMassInFunction(); |
1029 | 4.87M | unwrapLoops(); |
1030 | 4.87M | finalizeMetrics(); |
1031 | 4.87M | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::calculate(llvm::MachineFunction const&, llvm::MachineBranchProbabilityInfo const&, llvm::MachineLoopInfo const&) Line | Count | Source | 1008 | 2.38M | const LoopInfoT &LI) { | 1009 | 2.38M | // Save the parameters. | 1010 | 2.38M | this->BPI = &BPI; | 1011 | 2.38M | this->LI = &LI; | 1012 | 2.38M | this->F = &F; | 1013 | 2.38M | | 1014 | 2.38M | // Clean up left-over data structures. | 1015 | 2.38M | BlockFrequencyInfoImplBase::clear(); | 1016 | 2.38M | RPOT.clear(); | 1017 | 2.38M | Nodes.clear(); | 1018 | 2.38M | | 1019 | 2.38M | // Initialize. | 1020 | 2.38M | DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n=================" | 1021 | 2.38M | << std::string(F.getName().size(), '=') << "\n"); | 1022 | 2.38M | initializeRPOT(); | 1023 | 2.38M | initializeLoops(); | 1024 | 2.38M | | 1025 | 2.38M | // Visit loops in post-order to find the local mass distribution, and then do | 1026 | 2.38M | // the full function. | 1027 | 2.38M | computeMassInLoops(); | 1028 | 2.38M | computeMassInFunction(); | 1029 | 2.38M | unwrapLoops(); | 1030 | 2.38M | finalizeMetrics(); | 1031 | 2.38M | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::calculate(llvm::Function const&, llvm::BranchProbabilityInfo const&, llvm::LoopInfo const&) Line | Count | Source | 1008 | 2.48M | const LoopInfoT &LI) { | 1009 | 2.48M | // Save the parameters. | 1010 | 2.48M | this->BPI = &BPI; | 1011 | 2.48M | this->LI = &LI; | 1012 | 2.48M | this->F = &F; | 1013 | 2.48M | | 1014 | 2.48M | // Clean up left-over data structures. | 1015 | 2.48M | BlockFrequencyInfoImplBase::clear(); | 1016 | 2.48M | RPOT.clear(); | 1017 | 2.48M | Nodes.clear(); | 1018 | 2.48M | | 1019 | 2.48M | // Initialize. | 1020 | 2.48M | DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n=================" | 1021 | 2.48M | << std::string(F.getName().size(), '=') << "\n"); | 1022 | 2.48M | initializeRPOT(); | 1023 | 2.48M | initializeLoops(); | 1024 | 2.48M | | 1025 | 2.48M | // Visit loops in post-order to find the local mass distribution, and then do | 1026 | 2.48M | // the full function. | 1027 | 2.48M | computeMassInLoops(); | 1028 | 2.48M | computeMassInFunction(); | 1029 | 2.48M | unwrapLoops(); | 1030 | 2.48M | finalizeMetrics(); | 1031 | 2.48M | } |
|
1032 | | |
1033 | | template <class BT> |
1034 | 1.92k | void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB, uint64_t Freq) { |
1035 | 1.92k | if (Nodes.count(BB)) |
1036 | 1.08k | BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq); |
1037 | 843 | else { |
1038 | 843 | // If BB is a newly added block after BFI is done, we need to create a new |
1039 | 843 | // BlockNode for it assigned with a new index. The index can be determined |
1040 | 843 | // by the size of Freqs. |
1041 | 843 | BlockNode NewNode(Freqs.size()); |
1042 | 843 | Nodes[BB] = NewNode; |
1043 | 843 | Freqs.emplace_back(); |
1044 | 843 | BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq); |
1045 | 843 | } |
1046 | 1.92k | } |
1047 | | |
1048 | 4.87M | template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() { |
1049 | 4.87M | const BlockT *Entry = &F->front(); |
1050 | 4.87M | RPOT.reserve(F->size()); |
1051 | 4.87M | std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); |
1052 | 4.87M | std::reverse(RPOT.begin(), RPOT.end()); |
1053 | 4.87M | |
1054 | 4.87M | assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() && |
1055 | 4.87M | "More nodes in function than Block Frequency Info supports"); |
1056 | 4.87M | |
1057 | 4.87M | DEBUG(dbgs() << "reverse-post-order-traversal\n"); |
1058 | 39.9M | for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E39.9M ; ++I35.0M ) { |
1059 | 35.0M | BlockNode Node = getNode(I); |
1060 | 35.0M | DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node) << "\n"); |
1061 | 35.0M | Nodes[*I] = Node; |
1062 | 35.0M | } |
1063 | 4.87M | |
1064 | 4.87M | Working.reserve(RPOT.size()); |
1065 | 39.9M | for (size_t Index = 0; Index < RPOT.size()39.9M ; ++Index35.0M ) |
1066 | 35.0M | Working.emplace_back(Index); |
1067 | 4.87M | Freqs.resize(RPOT.size()); |
1068 | 4.87M | } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::initializeRPOT() Line | Count | Source | 1048 | 2.48M | template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() { | 1049 | 2.48M | const BlockT *Entry = &F->front(); | 1050 | 2.48M | RPOT.reserve(F->size()); | 1051 | 2.48M | std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); | 1052 | 2.48M | std::reverse(RPOT.begin(), RPOT.end()); | 1053 | 2.48M | | 1054 | 2.48M | assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() && | 1055 | 2.48M | "More nodes in function than Block Frequency Info supports"); | 1056 | 2.48M | | 1057 | 2.48M | DEBUG(dbgs() << "reverse-post-order-traversal\n"); | 1058 | 21.2M | for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E21.2M ; ++I18.7M ) { | 1059 | 18.7M | BlockNode Node = getNode(I); | 1060 | 18.7M | DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node) << "\n"); | 1061 | 18.7M | Nodes[*I] = Node; | 1062 | 18.7M | } | 1063 | 2.48M | | 1064 | 2.48M | Working.reserve(RPOT.size()); | 1065 | 21.2M | for (size_t Index = 0; Index < RPOT.size()21.2M ; ++Index18.7M ) | 1066 | 18.7M | Working.emplace_back(Index); | 1067 | 2.48M | Freqs.resize(RPOT.size()); | 1068 | 2.48M | } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::initializeRPOT() Line | Count | Source | 1048 | 2.38M | template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() { | 1049 | 2.38M | const BlockT *Entry = &F->front(); | 1050 | 2.38M | RPOT.reserve(F->size()); | 1051 | 2.38M | std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); | 1052 | 2.38M | std::reverse(RPOT.begin(), RPOT.end()); | 1053 | 2.38M | | 1054 | 2.38M | assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() && | 1055 | 2.38M | "More nodes in function than Block Frequency Info supports"); | 1056 | 2.38M | | 1057 | 2.38M | DEBUG(dbgs() << "reverse-post-order-traversal\n"); | 1058 | 18.7M | for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E18.7M ; ++I16.3M ) { | 1059 | 16.3M | BlockNode Node = getNode(I); | 1060 | 16.3M | DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node) << "\n"); | 1061 | 16.3M | Nodes[*I] = Node; | 1062 | 16.3M | } | 1063 | 2.38M | | 1064 | 2.38M | Working.reserve(RPOT.size()); | 1065 | 18.7M | for (size_t Index = 0; Index < RPOT.size()18.7M ; ++Index16.3M ) | 1066 | 16.3M | Working.emplace_back(Index); | 1067 | 2.38M | Freqs.resize(RPOT.size()); | 1068 | 2.38M | } |
|
1069 | | |
1070 | 4.87M | template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() { |
1071 | 4.87M | DEBUG(dbgs() << "loop-detection\n"); |
1072 | 4.87M | if (LI->empty()) |
1073 | 3.88M | return; |
1074 | 4.87M | |
1075 | 4.87M | // Visit loops top down and assign them an index. |
1076 | 989k | std::deque<std::pair<const LoopT *, LoopData *>> Q; |
1077 | 989k | for (const LoopT *L : *LI) |
1078 | 2.21M | Q.emplace_back(L, nullptr); |
1079 | 4.09M | while (!Q.empty()4.09M ) { |
1080 | 3.10M | const LoopT *Loop = Q.front().first; |
1081 | 3.10M | LoopData *Parent = Q.front().second; |
1082 | 3.10M | Q.pop_front(); |
1083 | 3.10M | |
1084 | 3.10M | BlockNode Header = getNode(Loop->getHeader()); |
1085 | 3.10M | assert(Header.isValid()); |
1086 | 3.10M | |
1087 | 3.10M | Loops.emplace_back(Parent, Header); |
1088 | 3.10M | Working[Header.Index].Loop = &Loops.back(); |
1089 | 3.10M | DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n"); |
1090 | 3.10M | |
1091 | 3.10M | for (const LoopT *L : *Loop) |
1092 | 894k | Q.emplace_back(L, &Loops.back()); |
1093 | 3.10M | } |
1094 | 989k | |
1095 | 989k | // Visit nodes in reverse post-order and add them to their deepest containing |
1096 | 989k | // loop. |
1097 | 25.8M | for (size_t Index = 0; Index < RPOT.size()25.8M ; ++Index24.9M ) { |
1098 | 24.9M | // Loop headers have already been mostly mapped. |
1099 | 24.9M | if (Working[Index].isLoopHeader()24.9M ) { |
1100 | 3.10M | LoopData *ContainingLoop = Working[Index].getContainingLoop(); |
1101 | 3.10M | if (ContainingLoop) |
1102 | 894k | ContainingLoop->Nodes.push_back(Index); |
1103 | 3.10M | continue; |
1104 | 3.10M | } |
1105 | 24.9M | |
1106 | 21.7M | const LoopT *Loop = LI->getLoopFor(RPOT[Index]); |
1107 | 21.7M | if (!Loop) |
1108 | 14.8M | continue; |
1109 | 21.7M | |
1110 | 21.7M | // Add this node to its containing loop's member list. |
1111 | 6.91M | BlockNode Header = getNode(Loop->getHeader()); |
1112 | 6.91M | assert(Header.isValid()); |
1113 | 6.91M | const auto &HeaderData = Working[Header.Index]; |
1114 | 6.91M | assert(HeaderData.isLoopHeader()); |
1115 | 6.91M | |
1116 | 6.91M | Working[Index].Loop = HeaderData.Loop; |
1117 | 6.91M | HeaderData.Loop->Nodes.push_back(Index); |
1118 | 6.91M | DEBUG(dbgs() << " - loop = " << getBlockName(Header) |
1119 | 24.9M | << ": member = " << getBlockName(Index) << "\n"); |
1120 | 24.9M | } |
1121 | 4.87M | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::initializeLoops() Line | Count | Source | 1070 | 2.38M | template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() { | 1071 | 2.38M | DEBUG(dbgs() << "loop-detection\n"); | 1072 | 2.38M | if (LI->empty()) | 1073 | 1.93M | return; | 1074 | 2.38M | | 1075 | 2.38M | // Visit loops top down and assign them an index. | 1076 | 450k | std::deque<std::pair<const LoopT *, LoopData *>> Q; | 1077 | 450k | for (const LoopT *L : *LI) | 1078 | 1.02M | Q.emplace_back(L, nullptr); | 1079 | 1.90M | while (!Q.empty()1.90M ) { | 1080 | 1.45M | const LoopT *Loop = Q.front().first; | 1081 | 1.45M | LoopData *Parent = Q.front().second; | 1082 | 1.45M | Q.pop_front(); | 1083 | 1.45M | | 1084 | 1.45M | BlockNode Header = getNode(Loop->getHeader()); | 1085 | 1.45M | assert(Header.isValid()); | 1086 | 1.45M | | 1087 | 1.45M | Loops.emplace_back(Parent, Header); | 1088 | 1.45M | Working[Header.Index].Loop = &Loops.back(); | 1089 | 1.45M | DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n"); | 1090 | 1.45M | | 1091 | 1.45M | for (const LoopT *L : *Loop) | 1092 | 424k | Q.emplace_back(L, &Loops.back()); | 1093 | 1.45M | } | 1094 | 450k | | 1095 | 450k | // Visit nodes in reverse post-order and add them to their deepest containing | 1096 | 450k | // loop. | 1097 | 11.9M | for (size_t Index = 0; Index < RPOT.size()11.9M ; ++Index11.5M ) { | 1098 | 11.5M | // Loop headers have already been mostly mapped. | 1099 | 11.5M | if (Working[Index].isLoopHeader()11.5M ) { | 1100 | 1.45M | LoopData *ContainingLoop = Working[Index].getContainingLoop(); | 1101 | 1.45M | if (ContainingLoop) | 1102 | 424k | ContainingLoop->Nodes.push_back(Index); | 1103 | 1.45M | continue; | 1104 | 1.45M | } | 1105 | 11.5M | | 1106 | 10.0M | const LoopT *Loop = LI->getLoopFor(RPOT[Index]); | 1107 | 10.0M | if (!Loop) | 1108 | 6.89M | continue; | 1109 | 10.0M | | 1110 | 10.0M | // Add this node to its containing loop's member list. | 1111 | 3.17M | BlockNode Header = getNode(Loop->getHeader()); | 1112 | 3.17M | assert(Header.isValid()); | 1113 | 3.17M | const auto &HeaderData = Working[Header.Index]; | 1114 | 3.17M | assert(HeaderData.isLoopHeader()); | 1115 | 3.17M | | 1116 | 3.17M | Working[Index].Loop = HeaderData.Loop; | 1117 | 3.17M | HeaderData.Loop->Nodes.push_back(Index); | 1118 | 3.17M | DEBUG(dbgs() << " - loop = " << getBlockName(Header) | 1119 | 11.5M | << ": member = " << getBlockName(Index) << "\n"); | 1120 | 11.5M | } | 1121 | 2.38M | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::initializeLoops() Line | Count | Source | 1070 | 2.48M | template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() { | 1071 | 2.48M | DEBUG(dbgs() << "loop-detection\n"); | 1072 | 2.48M | if (LI->empty()) | 1073 | 1.95M | return; | 1074 | 2.48M | | 1075 | 2.48M | // Visit loops top down and assign them an index. | 1076 | 539k | std::deque<std::pair<const LoopT *, LoopData *>> Q; | 1077 | 539k | for (const LoopT *L : *LI) | 1078 | 1.18M | Q.emplace_back(L, nullptr); | 1079 | 2.19M | while (!Q.empty()2.19M ) { | 1080 | 1.65M | const LoopT *Loop = Q.front().first; | 1081 | 1.65M | LoopData *Parent = Q.front().second; | 1082 | 1.65M | Q.pop_front(); | 1083 | 1.65M | | 1084 | 1.65M | BlockNode Header = getNode(Loop->getHeader()); | 1085 | 1.65M | assert(Header.isValid()); | 1086 | 1.65M | | 1087 | 1.65M | Loops.emplace_back(Parent, Header); | 1088 | 1.65M | Working[Header.Index].Loop = &Loops.back(); | 1089 | 1.65M | DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n"); | 1090 | 1.65M | | 1091 | 1.65M | for (const LoopT *L : *Loop) | 1092 | 470k | Q.emplace_back(L, &Loops.back()); | 1093 | 1.65M | } | 1094 | 539k | | 1095 | 539k | // Visit nodes in reverse post-order and add them to their deepest containing | 1096 | 539k | // loop. | 1097 | 13.9M | for (size_t Index = 0; Index < RPOT.size()13.9M ; ++Index13.3M ) { | 1098 | 13.3M | // Loop headers have already been mostly mapped. | 1099 | 13.3M | if (Working[Index].isLoopHeader()13.3M ) { | 1100 | 1.65M | LoopData *ContainingLoop = Working[Index].getContainingLoop(); | 1101 | 1.65M | if (ContainingLoop) | 1102 | 470k | ContainingLoop->Nodes.push_back(Index); | 1103 | 1.65M | continue; | 1104 | 1.65M | } | 1105 | 13.3M | | 1106 | 11.7M | const LoopT *Loop = LI->getLoopFor(RPOT[Index]); | 1107 | 11.7M | if (!Loop) | 1108 | 7.98M | continue; | 1109 | 11.7M | | 1110 | 11.7M | // Add this node to its containing loop's member list. | 1111 | 3.73M | BlockNode Header = getNode(Loop->getHeader()); | 1112 | 3.73M | assert(Header.isValid()); | 1113 | 3.73M | const auto &HeaderData = Working[Header.Index]; | 1114 | 3.73M | assert(HeaderData.isLoopHeader()); | 1115 | 3.73M | | 1116 | 3.73M | Working[Index].Loop = HeaderData.Loop; | 1117 | 3.73M | HeaderData.Loop->Nodes.push_back(Index); | 1118 | 3.73M | DEBUG(dbgs() << " - loop = " << getBlockName(Header) | 1119 | 13.3M | << ": member = " << getBlockName(Index) << "\n"); | 1120 | 13.3M | } | 1121 | 2.48M | } |
|
1122 | | |
1123 | 4.87M | template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() { |
1124 | 4.87M | // Visit loops with the deepest first, and the top-level loops last. |
1125 | 7.98M | for (auto L = Loops.rbegin(), E = Loops.rend(); L != E7.98M ; ++L3.10M ) { |
1126 | 3.10M | if (computeMassInLoop(*L)) |
1127 | 3.10M | continue; |
1128 | 207 | auto Next = std::next(L); |
1129 | 207 | computeIrreducibleMass(&*L, L.base()); |
1130 | 207 | L = std::prev(Next); |
1131 | 207 | if (computeMassInLoop(*L)) |
1132 | 207 | continue; |
1133 | 0 | llvm_unreachable0 ("unhandled irreducible control flow"); |
1134 | 207 | } |
1135 | 4.87M | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::computeMassInLoops() Line | Count | Source | 1123 | 2.38M | template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() { | 1124 | 2.38M | // Visit loops with the deepest first, and the top-level loops last. | 1125 | 3.83M | for (auto L = Loops.rbegin(), E = Loops.rend(); L != E3.83M ; ++L1.45M ) { | 1126 | 1.45M | if (computeMassInLoop(*L)) | 1127 | 1.45M | continue; | 1128 | 112 | auto Next = std::next(L); | 1129 | 112 | computeIrreducibleMass(&*L, L.base()); | 1130 | 112 | L = std::prev(Next); | 1131 | 112 | if (computeMassInLoop(*L)) | 1132 | 112 | continue; | 1133 | 0 | llvm_unreachable0 ("unhandled irreducible control flow"); | 1134 | | } | 1135 | 2.38M | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::computeMassInLoops() Line | Count | Source | 1123 | 2.48M | template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() { | 1124 | 2.48M | // Visit loops with the deepest first, and the top-level loops last. | 1125 | 4.14M | for (auto L = Loops.rbegin(), E = Loops.rend(); L != E4.14M ; ++L1.65M ) { | 1126 | 1.65M | if (computeMassInLoop(*L)) | 1127 | 1.65M | continue; | 1128 | 95 | auto Next = std::next(L); | 1129 | 95 | computeIrreducibleMass(&*L, L.base()); | 1130 | 95 | L = std::prev(Next); | 1131 | 95 | if (computeMassInLoop(*L)) | 1132 | 95 | continue; | 1133 | 0 | llvm_unreachable0 ("unhandled irreducible control flow"); | 1134 | | } | 1135 | 2.48M | } |
|
1136 | | |
1137 | | template <class BT> |
1138 | 3.11M | bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { |
1139 | 3.11M | // Compute mass in loop. |
1140 | 3.11M | DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); |
1141 | 3.11M | |
1142 | 3.11M | if (Loop.isIrreducible()3.11M ) { |
1143 | 804 | BlockMass Remaining = BlockMass::getFull(); |
1144 | 3.71k | for (uint32_t H = 0; H < Loop.NumHeaders3.71k ; ++H2.91k ) { |
1145 | 2.91k | auto &Mass = Working[Loop.Nodes[H].Index].getMass(); |
1146 | 2.91k | Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H); |
1147 | 2.91k | Remaining -= Mass; |
1148 | 2.91k | } |
1149 | 804 | for (const BlockNode &M : Loop.Nodes) |
1150 | 8.80k | if (8.80k !propagateMassToSuccessors(&Loop, M)8.80k ) |
1151 | 0 | llvm_unreachable("unhandled irreducible control flow"); |
1152 | 804 | |
1153 | 804 | adjustLoopHeaderMass(Loop); |
1154 | 3.11M | } else { |
1155 | 3.11M | Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); |
1156 | 3.11M | if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) |
1157 | 0 | llvm_unreachable("irreducible control flow to loop header!?"); |
1158 | 3.11M | for (const BlockNode &M : Loop.members()) |
1159 | 7.81M | if (7.81M !propagateMassToSuccessors(&Loop, M)7.81M ) |
1160 | 7.81M | // Irreducible backedge. |
1161 | 207 | return false; |
1162 | 3.11M | } |
1163 | 3.11M | |
1164 | 3.11M | computeLoopScale(Loop); |
1165 | 3.11M | packageLoop(Loop); |
1166 | 3.11M | return true; |
1167 | 3.11M | } llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::computeMassInLoop(llvm::BlockFrequencyInfoImplBase::LoopData&) Line | Count | Source | 1138 | 1.45M | bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { | 1139 | 1.45M | // Compute mass in loop. | 1140 | 1.45M | DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); | 1141 | 1.45M | | 1142 | 1.45M | if (Loop.isIrreducible()1.45M ) { | 1143 | 401 | BlockMass Remaining = BlockMass::getFull(); | 1144 | 1.81k | for (uint32_t H = 0; H < Loop.NumHeaders1.81k ; ++H1.41k ) { | 1145 | 1.41k | auto &Mass = Working[Loop.Nodes[H].Index].getMass(); | 1146 | 1.41k | Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H); | 1147 | 1.41k | Remaining -= Mass; | 1148 | 1.41k | } | 1149 | 401 | for (const BlockNode &M : Loop.Nodes) | 1150 | 4.34k | if (4.34k !propagateMassToSuccessors(&Loop, M)4.34k ) | 1151 | 0 | llvm_unreachable("unhandled irreducible control flow"); | 1152 | 401 | | 1153 | 401 | adjustLoopHeaderMass(Loop); | 1154 | 1.45M | } else { | 1155 | 1.45M | Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); | 1156 | 1.45M | if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) | 1157 | 0 | llvm_unreachable("irreducible control flow to loop header!?"); | 1158 | 1.45M | for (const BlockNode &M : Loop.members()) | 1159 | 3.60M | if (3.60M !propagateMassToSuccessors(&Loop, M)3.60M ) | 1160 | 3.60M | // Irreducible backedge. | 1161 | 112 | return false; | 1162 | 1.45M | } | 1163 | 1.45M | | 1164 | 1.45M | computeLoopScale(Loop); | 1165 | 1.45M | packageLoop(Loop); | 1166 | 1.45M | return true; | 1167 | 1.45M | } |
llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::computeMassInLoop(llvm::BlockFrequencyInfoImplBase::LoopData&) Line | Count | Source | 1138 | 1.65M | bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { | 1139 | 1.65M | // Compute mass in loop. | 1140 | 1.65M | DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); | 1141 | 1.65M | | 1142 | 1.65M | if (Loop.isIrreducible()1.65M ) { | 1143 | 403 | BlockMass Remaining = BlockMass::getFull(); | 1144 | 1.89k | for (uint32_t H = 0; H < Loop.NumHeaders1.89k ; ++H1.49k ) { | 1145 | 1.49k | auto &Mass = Working[Loop.Nodes[H].Index].getMass(); | 1146 | 1.49k | Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H); | 1147 | 1.49k | Remaining -= Mass; | 1148 | 1.49k | } | 1149 | 403 | for (const BlockNode &M : Loop.Nodes) | 1150 | 4.46k | if (4.46k !propagateMassToSuccessors(&Loop, M)4.46k ) | 1151 | 0 | llvm_unreachable("unhandled irreducible control flow"); | 1152 | 403 | | 1153 | 403 | adjustLoopHeaderMass(Loop); | 1154 | 1.65M | } else { | 1155 | 1.65M | Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); | 1156 | 1.65M | if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) | 1157 | 0 | llvm_unreachable("irreducible control flow to loop header!?"); | 1158 | 1.65M | for (const BlockNode &M : Loop.members()) | 1159 | 4.20M | if (4.20M !propagateMassToSuccessors(&Loop, M)4.20M ) | 1160 | 4.20M | // Irreducible backedge. | 1161 | 95 | return false; | 1162 | 1.65M | } | 1163 | 1.65M | | 1164 | 1.65M | computeLoopScale(Loop); | 1165 | 1.65M | packageLoop(Loop); | 1166 | 1.65M | return true; | 1167 | 1.65M | } |
|
1168 | | |
1169 | | template <class BT> |
1170 | 4.87M | bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() { |
1171 | 4.87M | // Compute mass in function. |
1172 | 4.87M | DEBUG(dbgs() << "compute-mass-in-function\n"); |
1173 | 4.87M | assert(!Working.empty() && "no blocks in function"); |
1174 | 4.87M | assert(!Working[0].isLoopHeader() && "entry block is a loop header"); |
1175 | 4.87M | |
1176 | 4.87M | Working[0].getMass() = BlockMass::getFull(); |
1177 | 39.9M | for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE39.9M ; ++I35.0M ) { |
1178 | 35.0M | // Check for nodes that have been packaged. |
1179 | 35.0M | BlockNode Node = getNode(I); |
1180 | 35.0M | if (Working[Node.Index].isPackaged()) |
1181 | 7.81M | continue; |
1182 | 35.0M | |
1183 | 27.2M | if (27.2M !propagateMassToSuccessors(nullptr, Node)27.2M ) |
1184 | 377 | return false; |
1185 | 35.0M | } |
1186 | 4.87M | return true; |
1187 | 4.87M | } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::tryToComputeMassInFunction() Line | Count | Source | 1170 | 2.49M | bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() { | 1171 | 2.49M | // Compute mass in function. | 1172 | 2.49M | DEBUG(dbgs() << "compute-mass-in-function\n"); | 1173 | 2.49M | assert(!Working.empty() && "no blocks in function"); | 1174 | 2.49M | assert(!Working[0].isLoopHeader() && "entry block is a loop header"); | 1175 | 2.49M | | 1176 | 2.49M | Working[0].getMass() = BlockMass::getFull(); | 1177 | 21.2M | for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE21.2M ; ++I18.7M ) { | 1178 | 18.7M | // Check for nodes that have been packaged. | 1179 | 18.7M | BlockNode Node = getNode(I); | 1180 | 18.7M | if (Working[Node.Index].isPackaged()) | 1181 | 4.21M | continue; | 1182 | 18.7M | | 1183 | 14.5M | if (14.5M !propagateMassToSuccessors(nullptr, Node)14.5M ) | 1184 | 172 | return false; | 1185 | 18.7M | } | 1186 | 2.48M | return true; | 1187 | 2.49M | } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::tryToComputeMassInFunction() Line | Count | Source | 1170 | 2.38M | bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() { | 1171 | 2.38M | // Compute mass in function. | 1172 | 2.38M | DEBUG(dbgs() << "compute-mass-in-function\n"); | 1173 | 2.38M | assert(!Working.empty() && "no blocks in function"); | 1174 | 2.38M | assert(!Working[0].isLoopHeader() && "entry block is a loop header"); | 1175 | 2.38M | | 1176 | 2.38M | Working[0].getMass() = BlockMass::getFull(); | 1177 | 18.7M | for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE18.7M ; ++I16.3M ) { | 1178 | 16.3M | // Check for nodes that have been packaged. | 1179 | 16.3M | BlockNode Node = getNode(I); | 1180 | 16.3M | if (Working[Node.Index].isPackaged()) | 1181 | 3.60M | continue; | 1182 | 16.3M | | 1183 | 12.7M | if (12.7M !propagateMassToSuccessors(nullptr, Node)12.7M ) | 1184 | 205 | return false; | 1185 | 16.3M | } | 1186 | 2.38M | return true; | 1187 | 2.38M | } |
|
1188 | | |
1189 | 4.87M | template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { |
1190 | 4.87M | if (tryToComputeMassInFunction()) |
1191 | 4.87M | return; |
1192 | 378 | computeIrreducibleMass(nullptr, Loops.begin()); |
1193 | 378 | if (tryToComputeMassInFunction()) |
1194 | 377 | return; |
1195 | 1 | llvm_unreachable1 ("unhandled irreducible control flow"); |
1196 | 378 | } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::computeMassInFunction() Line | Count | Source | 1189 | 2.48M | template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { | 1190 | 2.48M | if (tryToComputeMassInFunction()) | 1191 | 2.48M | return; | 1192 | 170 | computeIrreducibleMass(nullptr, Loops.begin()); | 1193 | 170 | if (tryToComputeMassInFunction()) | 1194 | 172 | return; | 1195 | 18.4E | llvm_unreachable18.4E ("unhandled irreducible control flow"); | 1196 | | } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::computeMassInFunction() Line | Count | Source | 1189 | 2.38M | template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { | 1190 | 2.38M | if (tryToComputeMassInFunction()) | 1191 | 2.38M | return; | 1192 | 208 | computeIrreducibleMass(nullptr, Loops.begin()); | 1193 | 208 | if (tryToComputeMassInFunction()) | 1194 | 205 | return; | 1195 | 3 | llvm_unreachable3 ("unhandled irreducible control flow"); | 1196 | | } |
|
1197 | | |
1198 | | /// \note This should be a lambda, but that crashes GCC 4.7. |
1199 | | namespace bfi_detail { |
1200 | | |
1201 | | template <class BT> struct BlockEdgesAdder { |
1202 | | using BlockT = BT; |
1203 | | using LoopData = BlockFrequencyInfoImplBase::LoopData; |
1204 | | using Successor = GraphTraits<const BlockT *>; |
1205 | | |
1206 | | const BlockFrequencyInfoImpl<BT> &BFI; |
1207 | | |
1208 | | explicit BlockEdgesAdder(const BlockFrequencyInfoImpl<BT> &BFI) |
1209 | 584 | : BFI(BFI) {} llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock>::BlockEdgesAdder(llvm::BlockFrequencyInfoImpl<llvm::BasicBlock> const&) Line | Count | Source | 1209 | 267 | : BFI(BFI) {} |
llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock>::BlockEdgesAdder(llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock> const&) Line | Count | Source | 1209 | 317 | : BFI(BFI) {} |
|
1210 | | |
1211 | | void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, |
1212 | 19.6k | const LoopData *OuterLoop) { |
1213 | 19.6k | const BlockT *BB = BFI.RPOT[Irr.Node.Index]; |
1214 | 19.6k | for (const auto Succ : children<const BlockT *>(BB)) |
1215 | 35.9k | G.addEdge(Irr, BFI.getNode(Succ), OuterLoop); |
1216 | 19.6k | } llvm::bfi_detail::BlockEdgesAdder<llvm::MachineBasicBlock>::operator()(llvm::bfi_detail::IrreducibleGraph&, llvm::bfi_detail::IrreducibleGraph::IrrNode&, llvm::BlockFrequencyInfoImplBase::LoopData const*) Line | Count | Source | 1212 | 10.1k | const LoopData *OuterLoop) { | 1213 | 10.1k | const BlockT *BB = BFI.RPOT[Irr.Node.Index]; | 1214 | 10.1k | for (const auto Succ : children<const BlockT *>(BB)) | 1215 | 22.0k | G.addEdge(Irr, BFI.getNode(Succ), OuterLoop); | 1216 | 10.1k | } |
llvm::bfi_detail::BlockEdgesAdder<llvm::BasicBlock>::operator()(llvm::bfi_detail::IrreducibleGraph&, llvm::bfi_detail::IrreducibleGraph::IrrNode&, llvm::BlockFrequencyInfoImplBase::LoopData const*) Line | Count | Source | 1212 | 9.49k | const LoopData *OuterLoop) { | 1213 | 9.49k | const BlockT *BB = BFI.RPOT[Irr.Node.Index]; | 1214 | 9.49k | for (const auto Succ : children<const BlockT *>(BB)) | 1215 | 13.8k | G.addEdge(Irr, BFI.getNode(Succ), OuterLoop); | 1216 | 9.49k | } |
|
1217 | | }; |
1218 | | |
1219 | | } // end namespace bfi_detail |
1220 | | |
1221 | | template <class BT> |
1222 | | void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass( |
1223 | 584 | LoopData *OuterLoop, std::list<LoopData>::iterator Insert) { |
1224 | 584 | DEBUG(dbgs() << "analyze-irreducible-in-"; |
1225 | 584 | if (OuterLoop) dbgs() << "loop: " << getLoopName(*OuterLoop) << "\n"; |
1226 | 584 | else dbgs() << "function\n"); |
1227 | 584 | |
1228 | 584 | using namespace bfi_detail; |
1229 | 584 | |
1230 | 584 | // Ideally, addBlockEdges() would be declared here as a lambda, but that |
1231 | 584 | // crashes GCC 4.7. |
1232 | 584 | BlockEdgesAdder<BT> addBlockEdges(*this); |
1233 | 584 | IrreducibleGraph G(*this, OuterLoop, addBlockEdges); |
1234 | 584 | |
1235 | 584 | for (auto &L : analyzeIrreducible(G, OuterLoop, Insert)) |
1236 | 804 | computeMassInLoop(L); |
1237 | 584 | |
1238 | 584 | if (!OuterLoop) |
1239 | 377 | return; |
1240 | 207 | updateLoopWithIrreducible(*OuterLoop); |
1241 | 207 | } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::computeIrreducibleMass(llvm::BlockFrequencyInfoImplBase::LoopData*, std::__1::__list_iterator<llvm::BlockFrequencyInfoImplBase::LoopData, void*>) Line | Count | Source | 1223 | 267 | LoopData *OuterLoop, std::list<LoopData>::iterator Insert) { | 1224 | 267 | DEBUG(dbgs() << "analyze-irreducible-in-"; | 1225 | 267 | if (OuterLoop) dbgs() << "loop: " << getLoopName(*OuterLoop) << "\n"; | 1226 | 267 | else dbgs() << "function\n"); | 1227 | 267 | | 1228 | 267 | using namespace bfi_detail; | 1229 | 267 | | 1230 | 267 | // Ideally, addBlockEdges() would be declared here as a lambda, but that | 1231 | 267 | // crashes GCC 4.7. | 1232 | 267 | BlockEdgesAdder<BT> addBlockEdges(*this); | 1233 | 267 | IrreducibleGraph G(*this, OuterLoop, addBlockEdges); | 1234 | 267 | | 1235 | 267 | for (auto &L : analyzeIrreducible(G, OuterLoop, Insert)) | 1236 | 403 | computeMassInLoop(L); | 1237 | 267 | | 1238 | 267 | if (!OuterLoop) | 1239 | 172 | return; | 1240 | 95 | updateLoopWithIrreducible(*OuterLoop); | 1241 | 95 | } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::computeIrreducibleMass(llvm::BlockFrequencyInfoImplBase::LoopData*, std::__1::__list_iterator<llvm::BlockFrequencyInfoImplBase::LoopData, void*>) Line | Count | Source | 1223 | 317 | LoopData *OuterLoop, std::list<LoopData>::iterator Insert) { | 1224 | 317 | DEBUG(dbgs() << "analyze-irreducible-in-"; | 1225 | 317 | if (OuterLoop) dbgs() << "loop: " << getLoopName(*OuterLoop) << "\n"; | 1226 | 317 | else dbgs() << "function\n"); | 1227 | 317 | | 1228 | 317 | using namespace bfi_detail; | 1229 | 317 | | 1230 | 317 | // Ideally, addBlockEdges() would be declared here as a lambda, but that | 1231 | 317 | // crashes GCC 4.7. | 1232 | 317 | BlockEdgesAdder<BT> addBlockEdges(*this); | 1233 | 317 | IrreducibleGraph G(*this, OuterLoop, addBlockEdges); | 1234 | 317 | | 1235 | 317 | for (auto &L : analyzeIrreducible(G, OuterLoop, Insert)) | 1236 | 401 | computeMassInLoop(L); | 1237 | 317 | | 1238 | 317 | if (!OuterLoop) | 1239 | 205 | return; | 1240 | 112 | updateLoopWithIrreducible(*OuterLoop); | 1241 | 112 | } |
|
1242 | | |
1243 | | // A helper function that converts a branch probability into weight. |
1244 | 46.1M | inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) { |
1245 | 46.1M | return Prob.getNumerator(); |
1246 | 46.1M | } |
1247 | | |
1248 | | template <class BT> |
1249 | | bool |
1250 | | BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop, |
1251 | 38.1M | const BlockNode &Node) { |
1252 | 38.1M | DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); |
1253 | 38.1M | // Calculate probability for successors. |
1254 | 38.1M | Distribution Dist; |
1255 | 38.1M | if (auto *Loop38.1M = Working[Node.Index].getPackagedLoop()) { |
1256 | 3.11M | assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop"); |
1257 | 3.11M | if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist)) |
1258 | 3.11M | // Irreducible backedge. |
1259 | 48 | return false; |
1260 | 38.1M | } else { |
1261 | 35.0M | const BlockT *BB = getBlock(Node); |
1262 | 35.0M | for (const auto Succ : children<const BlockT *>(BB)) |
1263 | 46.1M | if (46.1M !addToDist(Dist, OuterLoop, Node, getNode(Succ), |
1264 | 46.1M | getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ)))) |
1265 | 46.1M | // Irreducible backedge. |
1266 | 536 | return false; |
1267 | 35.0M | } |
1268 | 38.1M | |
1269 | 38.1M | // Distribute mass to successors, saving exit and backedge data in the |
1270 | 38.1M | // loop header. |
1271 | 38.1M | distributeMass(Node, OuterLoop, Dist); |
1272 | 38.1M | return true; |
1273 | 38.1M | } llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::propagateMassToSuccessors(llvm::BlockFrequencyInfoImplBase::LoopData*, llvm::BlockFrequencyInfoImplBase::BlockNode const&) Line | Count | Source | 1251 | 20.3M | const BlockNode &Node) { | 1252 | 20.3M | DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); | 1253 | 20.3M | // Calculate probability for successors. | 1254 | 20.3M | Distribution Dist; | 1255 | 20.3M | if (auto *Loop20.3M = Working[Node.Index].getPackagedLoop()) { | 1256 | 1.65M | assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop"); | 1257 | 1.65M | if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist)) | 1258 | 1.65M | // Irreducible backedge. | 1259 | 4 | return false; | 1260 | 20.3M | } else { | 1261 | 18.7M | const BlockT *BB = getBlock(Node); | 1262 | 18.7M | for (const auto Succ : children<const BlockT *>(BB)) | 1263 | 25.2M | if (25.2M !addToDist(Dist, OuterLoop, Node, getNode(Succ), | 1264 | 25.2M | getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ)))) | 1265 | 25.2M | // Irreducible backedge. | 1266 | 263 | return false; | 1267 | 18.7M | } | 1268 | 20.3M | | 1269 | 20.3M | // Distribute mass to successors, saving exit and backedge data in the | 1270 | 20.3M | // loop header. | 1271 | 20.3M | distributeMass(Node, OuterLoop, Dist); | 1272 | 20.3M | return true; | 1273 | 20.3M | } |
llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::propagateMassToSuccessors(llvm::BlockFrequencyInfoImplBase::LoopData*, llvm::BlockFrequencyInfoImplBase::BlockNode const&) Line | Count | Source | 1251 | 17.8M | const BlockNode &Node) { | 1252 | 17.8M | DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); | 1253 | 17.8M | // Calculate probability for successors. | 1254 | 17.8M | Distribution Dist; | 1255 | 17.8M | if (auto *Loop17.8M = Working[Node.Index].getPackagedLoop()) { | 1256 | 1.45M | assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop"); | 1257 | 1.45M | if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist)) | 1258 | 1.45M | // Irreducible backedge. | 1259 | 44 | return false; | 1260 | 17.8M | } else { | 1261 | 16.3M | const BlockT *BB = getBlock(Node); | 1262 | 16.3M | for (const auto Succ : children<const BlockT *>(BB)) | 1263 | 20.8M | if (20.8M !addToDist(Dist, OuterLoop, Node, getNode(Succ), | 1264 | 20.8M | getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ)))) | 1265 | 20.8M | // Irreducible backedge. | 1266 | 273 | return false; | 1267 | 16.3M | } | 1268 | 17.8M | | 1269 | 17.8M | // Distribute mass to successors, saving exit and backedge data in the | 1270 | 17.8M | // loop header. | 1271 | 17.8M | distributeMass(Node, OuterLoop, Dist); | 1272 | 17.8M | return true; | 1273 | 17.8M | } |
|
1274 | | |
1275 | | template <class BT> |
1276 | 52 | raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { |
1277 | 52 | if (!F) |
1278 | 0 | return OS; |
1279 | 52 | OS << "block-frequency-info: " << F->getName() << "\n"; |
1280 | 435 | for (const BlockT &BB : *F) { |
1281 | 435 | OS << " - " << bfi_detail::getBlockName(&BB) << ": float = "; |
1282 | 435 | getFloatingBlockFreq(&BB).print(OS, 5) |
1283 | 435 | << ", int = " << getBlockFreq(&BB).getFrequency(); |
1284 | 435 | if (Optional<uint64_t> ProfileCount = |
1285 | 435 | BlockFrequencyInfoImplBase::getBlockProfileCount( |
1286 | 435 | *F->getFunction(), getNode(&BB))) |
1287 | 0 | OS << ", count = " << ProfileCount.getValue(); |
1288 | 435 | OS << "\n"; |
1289 | 435 | } |
1290 | 52 | |
1291 | 52 | // Add an extra newline for readability. |
1292 | 52 | OS << "\n"; |
1293 | 52 | return OS; |
1294 | 52 | } Unexecuted instantiation: llvm::BlockFrequencyInfoImpl<llvm::MachineBasicBlock>::print(llvm::raw_ostream&) const llvm::BlockFrequencyInfoImpl<llvm::BasicBlock>::print(llvm::raw_ostream&) const Line | Count | Source | 1276 | 52 | raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { | 1277 | 52 | if (!F) | 1278 | 0 | return OS; | 1279 | 52 | OS << "block-frequency-info: " << F->getName() << "\n"; | 1280 | 435 | for (const BlockT &BB : *F) { | 1281 | 435 | OS << " - " << bfi_detail::getBlockName(&BB) << ": float = "; | 1282 | 435 | getFloatingBlockFreq(&BB).print(OS, 5) | 1283 | 435 | << ", int = " << getBlockFreq(&BB).getFrequency(); | 1284 | 435 | if (Optional<uint64_t> ProfileCount = | 1285 | 435 | BlockFrequencyInfoImplBase::getBlockProfileCount( | 1286 | 435 | *F->getFunction(), getNode(&BB))) | 1287 | 0 | OS << ", count = " << ProfileCount.getValue(); | 1288 | 435 | OS << "\n"; | 1289 | 435 | } | 1290 | 52 | | 1291 | 52 | // Add an extra newline for readability. | 1292 | 52 | OS << "\n"; | 1293 | 52 | return OS; | 1294 | 52 | } |
|
1295 | | |
1296 | | // Graph trait base class for block frequency information graph |
1297 | | // viewer. |
1298 | | |
1299 | | enum GVDAGType { GVDT_None, GVDT_Fraction, GVDT_Integer, GVDT_Count }; |
1300 | | |
1301 | | template <class BlockFrequencyInfoT, class BranchProbabilityInfoT> |
1302 | | struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { |
1303 | | using GTraits = GraphTraits<BlockFrequencyInfoT *>; |
1304 | | using NodeRef = typename GTraits::NodeRef; |
1305 | | using EdgeIter = typename GTraits::ChildIteratorType; |
1306 | | using NodeIter = typename GTraits::nodes_iterator; |
1307 | | |
1308 | | uint64_t MaxFrequency = 0; |
1309 | | |
1310 | | explicit BFIDOTGraphTraitsBase(bool isSimple = false) |
1311 | 0 | : DefaultDOTGraphTraits(isSimple) {} Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::MachineBlockFrequencyInfo, llvm::MachineBranchProbabilityInfo>::BFIDOTGraphTraitsBase(bool) Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::BlockFrequencyInfo, llvm::BranchProbabilityInfo>::BFIDOTGraphTraitsBase(bool) |
1312 | | |
1313 | 0 | static std::string getGraphName(const BlockFrequencyInfoT *G) { |
1314 | 0 | return G->getFunction()->getName(); |
1315 | 0 | } Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::MachineBlockFrequencyInfo, llvm::MachineBranchProbabilityInfo>::getGraphName(llvm::MachineBlockFrequencyInfo const*) Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::BlockFrequencyInfo, llvm::BranchProbabilityInfo>::getGraphName(llvm::BlockFrequencyInfo const*) |
1316 | | |
1317 | | std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph, |
1318 | 0 | unsigned HotPercentThreshold = 0) { |
1319 | 0 | std::string Result; |
1320 | 0 | if (!HotPercentThreshold) |
1321 | 0 | return Result; |
1322 | 0 |
|
1323 | 0 | // Compute MaxFrequency on the fly: |
1324 | 0 | if (0 !MaxFrequency0 ) { |
1325 | 0 | for (NodeIter I = GTraits::nodes_begin(Graph), |
1326 | 0 | E = GTraits::nodes_end(Graph); |
1327 | 0 | I != E0 ; ++I0 ) { |
1328 | 0 | NodeRef N = *I; |
1329 | 0 | MaxFrequency = |
1330 | 0 | std::max(MaxFrequency, Graph->getBlockFreq(N).getFrequency()); |
1331 | 0 | } |
1332 | 0 | } |
1333 | 0 | BlockFrequency Freq = Graph->getBlockFreq(Node); |
1334 | 0 | BlockFrequency HotFreq = |
1335 | 0 | (BlockFrequency(MaxFrequency) * |
1336 | 0 | BranchProbability::getBranchProbability(HotPercentThreshold, 100)); |
1337 | 0 |
|
1338 | 0 | if (Freq < HotFreq) |
1339 | 0 | return Result; |
1340 | 0 |
|
1341 | 0 | raw_string_ostream OS(Result); |
1342 | 0 | OS << "color=\"red\""; |
1343 | 0 | OS.flush(); |
1344 | 0 | return Result; |
1345 | 0 | } Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::BlockFrequencyInfo, llvm::BranchProbabilityInfo>::getNodeAttributes(llvm::BasicBlock const*, llvm::BlockFrequencyInfo const*, unsigned int) Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::MachineBlockFrequencyInfo, llvm::MachineBranchProbabilityInfo>::getNodeAttributes(llvm::MachineBasicBlock const*, llvm::MachineBlockFrequencyInfo const*, unsigned int) |
1346 | | |
1347 | | std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph, |
1348 | 0 | GVDAGType GType, int layout_order = -1) { |
1349 | 0 | std::string Result; |
1350 | 0 | raw_string_ostream OS(Result); |
1351 | 0 |
|
1352 | 0 | if (layout_order != -1) |
1353 | 0 | OS << Node->getName() << "[" << layout_order << "] : "; |
1354 | 0 | else |
1355 | 0 | OS << Node->getName() << " : "; |
1356 | 0 | switch (GType) { |
1357 | 0 | case GVDT_Fraction: |
1358 | 0 | Graph->printBlockFreq(OS, Node); |
1359 | 0 | break; |
1360 | 0 | case GVDT_Integer: |
1361 | 0 | OS << Graph->getBlockFreq(Node).getFrequency(); |
1362 | 0 | break; |
1363 | 0 | case GVDT_Count: { |
1364 | 0 | auto Count = Graph->getBlockProfileCount(Node); |
1365 | 0 | if (Count) |
1366 | 0 | OS << Count.getValue(); |
1367 | 0 | else |
1368 | 0 | OS << "Unknown"; |
1369 | 0 | break; |
1370 | 0 | } |
1371 | 0 | case GVDT_None: |
1372 | 0 | llvm_unreachable("If we are not supposed to render a graph we should " |
1373 | 0 | "never reach this point."); |
1374 | 0 | } |
1375 | 0 | return Result; |
1376 | 0 | } Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::MachineBlockFrequencyInfo, llvm::MachineBranchProbabilityInfo>::getNodeLabel(llvm::MachineBasicBlock const*, llvm::MachineBlockFrequencyInfo const*, llvm::GVDAGType, int) Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::BlockFrequencyInfo, llvm::BranchProbabilityInfo>::getNodeLabel(llvm::BasicBlock const*, llvm::BlockFrequencyInfo const*, llvm::GVDAGType, int) |
1377 | | |
1378 | | std::string getEdgeAttributes(NodeRef Node, EdgeIter EI, |
1379 | | const BlockFrequencyInfoT *BFI, |
1380 | | const BranchProbabilityInfoT *BPI, |
1381 | 0 | unsigned HotPercentThreshold = 0) { |
1382 | 0 | std::string Str; |
1383 | 0 | if (!BPI) |
1384 | 0 | return Str; |
1385 | 0 |
|
1386 | 0 | BranchProbability BP = BPI->getEdgeProbability(Node, EI); |
1387 | 0 | uint32_t N = BP.getNumerator(); |
1388 | 0 | uint32_t D = BP.getDenominator(); |
1389 | 0 | double Percent = 100.0 * N / D; |
1390 | 0 | raw_string_ostream OS(Str); |
1391 | 0 | OS << format("label=\"%.1f%%\"", Percent); |
1392 | 0 |
|
1393 | 0 | if (HotPercentThreshold0 ) { |
1394 | 0 | BlockFrequency EFreq = BFI->getBlockFreq(Node) * BP; |
1395 | 0 | BlockFrequency HotFreq = BlockFrequency(MaxFrequency) * |
1396 | 0 | BranchProbability(HotPercentThreshold, 100); |
1397 | 0 |
|
1398 | 0 | if (EFreq >= HotFreq0 ) { |
1399 | 0 | OS << ",color=\"red\""; |
1400 | 0 | } |
1401 | 0 | } |
1402 | 0 |
|
1403 | 0 | OS.flush(); |
1404 | 0 | return Str; |
1405 | 0 | } Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::BlockFrequencyInfo, llvm::BranchProbabilityInfo>::getEdgeAttributes(llvm::BasicBlock const*, llvm::TerminatorInst::SuccIterator<llvm::TerminatorInst const*, llvm::BasicBlock const>, llvm::BlockFrequencyInfo const*, llvm::BranchProbabilityInfo const*, unsigned int) Unexecuted instantiation: llvm::BFIDOTGraphTraitsBase<llvm::MachineBlockFrequencyInfo, llvm::MachineBranchProbabilityInfo>::getEdgeAttributes(llvm::MachineBasicBlock const*, std::__1::__wrap_iter<llvm::MachineBasicBlock* const*>, llvm::MachineBlockFrequencyInfo const*, llvm::MachineBranchProbabilityInfo const*, unsigned int) |
1406 | | }; |
1407 | | |
1408 | | } // end namespace llvm |
1409 | | |
1410 | | #undef DEBUG_TYPE |
1411 | | |
1412 | | #endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H |