Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/StackColoring.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- StackColoring.cpp -------------------------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass implements the stack-coloring optimization that looks for
11
// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
12
// which represent the possible lifetime of stack slots. It attempts to
13
// merge disjoint stack slots and reduce the used stack space.
14
// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15
//
16
// TODO: In the future we plan to improve stack coloring in the following ways:
17
// 1. Allow merging multiple small slots into a single larger slot at different
18
//    offsets.
19
// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
20
//    spill slots.
21
//
22
//===----------------------------------------------------------------------===//
23
24
#include "llvm/ADT/BitVector.h"
25
#include "llvm/ADT/DepthFirstIterator.h"
26
#include "llvm/ADT/SetVector.h"
27
#include "llvm/ADT/SmallPtrSet.h"
28
#include "llvm/ADT/Statistic.h"
29
#include "llvm/Analysis/ValueTracking.h"
30
#include "llvm/CodeGen/LiveInterval.h"
31
#include "llvm/CodeGen/MachineBasicBlock.h"
32
#include "llvm/CodeGen/MachineFrameInfo.h"
33
#include "llvm/CodeGen/MachineFunctionPass.h"
34
#include "llvm/CodeGen/MachineLoopInfo.h"
35
#include "llvm/CodeGen/MachineMemOperand.h"
36
#include "llvm/CodeGen/MachineModuleInfo.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/CodeGen/Passes.h"
39
#include "llvm/CodeGen/PseudoSourceValue.h"
40
#include "llvm/CodeGen/SelectionDAGNodes.h"
41
#include "llvm/CodeGen/SlotIndexes.h"
42
#include "llvm/CodeGen/StackProtector.h"
43
#include "llvm/CodeGen/WinEHFuncInfo.h"
44
#include "llvm/IR/DebugInfo.h"
45
#include "llvm/IR/Function.h"
46
#include "llvm/IR/Instructions.h"
47
#include "llvm/IR/IntrinsicInst.h"
48
#include "llvm/IR/Module.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/Debug.h"
51
#include "llvm/Support/raw_ostream.h"
52
#include "llvm/Target/TargetInstrInfo.h"
53
#include "llvm/Target/TargetRegisterInfo.h"
54
55
using namespace llvm;
56
57
#define DEBUG_TYPE "stack-coloring"
58
59
static cl::opt<bool>
60
DisableColoring("no-stack-coloring",
61
        cl::init(false), cl::Hidden,
62
        cl::desc("Disable stack coloring"));
63
64
/// The user may write code that uses allocas outside of the declared lifetime
65
/// zone. This can happen when the user returns a reference to a local
66
/// data-structure. We can detect these cases and decide not to optimize the
67
/// code. If this flag is enabled, we try to save the user. This option
68
/// is treated as overriding LifetimeStartOnFirstUse below.
69
static cl::opt<bool>
70
ProtectFromEscapedAllocas("protect-from-escaped-allocas",
71
                          cl::init(false), cl::Hidden,
72
                          cl::desc("Do not optimize lifetime zones that "
73
                                   "are broken"));
74
75
/// Enable enhanced dataflow scheme for lifetime analysis (treat first
76
/// use of stack slot as start of slot lifetime, as opposed to looking
77
/// for LIFETIME_START marker). See "Implementation notes" below for
78
/// more info.
79
static cl::opt<bool>
80
LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
81
        cl::init(true), cl::Hidden,
82
        cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
83
84
85
STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
86
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
87
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
88
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
89
90
//===----------------------------------------------------------------------===//
91
//                           StackColoring Pass
92
//===----------------------------------------------------------------------===//
93
//
94
// Stack Coloring reduces stack usage by merging stack slots when they
95
// can't be used together. For example, consider the following C program:
96
//
97
//     void bar(char *, int);
98
//     void foo(bool var) {
99
//         A: {
100
//             char z[4096];
101
//             bar(z, 0);
102
//         }
103
//
104
//         char *p;
105
//         char x[4096];
106
//         char y[4096];
107
//         if (var) {
108
//             p = x;
109
//         } else {
110
//             bar(y, 1);
111
//             p = y + 1024;
112
//         }
113
//     B:
114
//         bar(p, 2);
115
//     }
116
//
117
// Naively-compiled, this program would use 12k of stack space. However, the
118
// stack slot corresponding to `z` is always destroyed before either of the
119
// stack slots for `x` or `y` are used, and then `x` is only used if `var`
120
// is true, while `y` is only used if `var` is false. So in no time are 2
121
// of the stack slots used together, and therefore we can merge them,
122
// compiling the function using only a single 4k alloca:
123
//
124
//     void foo(bool var) { // equivalent
125
//         char x[4096];
126
//         char *p;
127
//         bar(x, 0);
128
//         if (var) {
129
//             p = x;
130
//         } else {
131
//             bar(x, 1);
132
//             p = x + 1024;
133
//         }
134
//         bar(p, 2);
135
//     }
136
//
137
// This is an important optimization if we want stack space to be under
138
// control in large functions, both open-coded ones and ones created by
139
// inlining.
140
//
141
// Implementation Notes:
142
// ---------------------
143
//
144
// An important part of the above reasoning is that `z` can't be accessed
145
// while the latter 2 calls to `bar` are running. This is justified because
146
// `z`'s lifetime is over after we exit from block `A:`, so any further
147
// accesses to it would be UB. The way we represent this information
148
// in LLVM is by having frontends delimit blocks with `lifetime.start`
149
// and `lifetime.end` intrinsics.
150
//
151
// The effect of these intrinsics seems to be as follows (maybe I should
152
// specify this in the reference?):
153
//
154
//   L1) at start, each stack-slot is marked as *out-of-scope*, unless no
155
//   lifetime intrinsic refers to that stack slot, in which case
156
//   it is marked as *in-scope*.
157
//   L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
158
//   the stack slot is overwritten with `undef`.
159
//   L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
160
//   L4) on function exit, all stack slots are marked as *out-of-scope*.
161
//   L5) `lifetime.end` is a no-op when called on a slot that is already
162
//   *out-of-scope*.
163
//   L6) memory accesses to *out-of-scope* stack slots are UB.
164
//   L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
165
//   are invalidated, unless the slot is "degenerate". This is used to
166
//   justify not marking slots as in-use until the pointer to them is
167
//   used, but feels a bit hacky in the presence of things like LICM. See
168
//   the "Degenerate Slots" section for more details.
169
//
170
// Now, let's ground stack coloring on these rules. We'll define a slot
171
// as *in-use* at a (dynamic) point in execution if it either can be
172
// written to at that point, or if it has a live and non-undef content
173
// at that point.
174
//
175
// Obviously, slots that are never *in-use* together can be merged, and
176
// in our example `foo`, the slots for `x`, `y` and `z` are never
177
// in-use together (of course, sometimes slots that *are* in-use together
178
// might still be mergable, but we don't care about that here).
179
//
180
// In this implementation, we successively merge pairs of slots that are
181
// not *in-use* together. We could be smarter - for example, we could merge
182
// a single large slot with 2 small slots, or we could construct the
183
// interference graph and run a "smart" graph coloring algorithm, but with
184
// that aside, how do we find out whether a pair of slots might be *in-use*
185
// together?
186
//
187
// From our rules, we see that *out-of-scope* slots are never *in-use*,
188
// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
189
// until their address is taken. Therefore, we can approximate slot activity
190
// using dataflow.
191
//
192
// A subtle point: naively, we might try to figure out which pairs of
193
// stack-slots interfere by propagating `S in-use` through the CFG for every
194
// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
195
// which they are both *in-use*.
196
//
197
// That is sound, but overly conservative in some cases: in our (artificial)
198
// example `foo`, either `x` or `y` might be in use at the label `B:`, but
199
// as `x` is only in use if we came in from the `var` edge and `y` only
200
// if we came from the `!var` edge, they still can't be in use together.
201
// See PR32488 for an important real-life case.
202
//
203
// If we wanted to find all points of interference precisely, we could
204
// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
205
// would be precise, but requires propagating `O(n^2)` dataflow facts.
206
//
207
// However, we aren't interested in the *set* of points of interference
208
// between 2 stack slots, only *whether* there *is* such a point. So we
209
// can rely on a little trick: for `S` and `T` to be in-use together,
210
// one of them needs to become in-use while the other is in-use (or
211
// they might both become in use simultaneously). We can check this
212
// by also keeping track of the points at which a stack slot might *start*
213
// being in-use.
214
//
215
// Exact first use:
216
// ----------------
217
//
218
// Consider the following motivating example:
219
//
220
//     int foo() {
221
//       char b1[1024], b2[1024];
222
//       if (...) {
223
//         char b3[1024];
224
//         <uses of b1, b3>;
225
//         return x;
226
//       } else {
227
//         char b4[1024], b5[1024];
228
//         <uses of b2, b4, b5>;
229
//         return y;
230
//       }
231
//     }
232
//
233
// In the code above, "b3" and "b4" are declared in distinct lexical
234
// scopes, meaning that it is easy to prove that they can share the
235
// same stack slot. Variables "b1" and "b2" are declared in the same
236
// scope, meaning that from a lexical point of view, their lifetimes
237
// overlap. From a control flow pointer of view, however, the two
238
// variables are accessed in disjoint regions of the CFG, thus it
239
// should be possible for them to share the same stack slot. An ideal
240
// stack allocation for the function above would look like:
241
//
242
//     slot 0: b1, b2
243
//     slot 1: b3, b4
244
//     slot 2: b5
245
//
246
// Achieving this allocation is tricky, however, due to the way
247
// lifetime markers are inserted. Here is a simplified view of the
248
// control flow graph for the code above:
249
//
250
//                +------  block 0 -------+
251
//               0| LIFETIME_START b1, b2 |
252
//               1| <test 'if' condition> |
253
//                +-----------------------+
254
//                   ./              \.
255
//   +------  block 1 -------+   +------  block 2 -------+
256
//  2| LIFETIME_START b3     |  5| LIFETIME_START b4, b5 |
257
//  3| <uses of b1, b3>      |  6| <uses of b2, b4, b5>  |
258
//  4| LIFETIME_END b3       |  7| LIFETIME_END b4, b5   |
259
//   +-----------------------+   +-----------------------+
260
//                   \.              /.
261
//                +------  block 3 -------+
262
//               8| <cleanupcode>         |
263
//               9| LIFETIME_END b1, b2   |
264
//              10| return                |
265
//                +-----------------------+
266
//
267
// If we create live intervals for the variables above strictly based
268
// on the lifetime markers, we'll get the set of intervals on the
269
// left. If we ignore the lifetime start markers and instead treat a
270
// variable's lifetime as beginning with the first reference to the
271
// var, then we get the intervals on the right.
272
//
273
//            LIFETIME_START      First Use
274
//     b1:    [0,9]               [3,4] [8,9]
275
//     b2:    [0,9]               [6,9]
276
//     b3:    [2,4]               [3,4]
277
//     b4:    [5,7]               [6,7]
278
//     b5:    [5,7]               [6,7]
279
//
280
// For the intervals on the left, the best we can do is overlap two
281
// variables (b3 and b4, for example); this gives us a stack size of
282
// 4*1024 bytes, not ideal. When treating first-use as the start of a
283
// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
284
// byte stack (better).
285
//
286
// Degenerate Slots:
287
// -----------------
288
//
289
// Relying entirely on first-use of stack slots is problematic,
290
// however, due to the fact that optimizations can sometimes migrate
291
// uses of a variable outside of its lifetime start/end region. Here
292
// is an example:
293
//
294
//     int bar() {
295
//       char b1[1024], b2[1024];
296
//       if (...) {
297
//         <uses of b2>
298
//         return y;
299
//       } else {
300
//         <uses of b1>
301
//         while (...) {
302
//           char b3[1024];
303
//           <uses of b3>
304
//         }
305
//       }
306
//     }
307
//
308
// Before optimization, the control flow graph for the code above
309
// might look like the following:
310
//
311
//                +------  block 0 -------+
312
//               0| LIFETIME_START b1, b2 |
313
//               1| <test 'if' condition> |
314
//                +-----------------------+
315
//                   ./              \.
316
//   +------  block 1 -------+    +------- block 2 -------+
317
//  2| <uses of b2>          |   3| <uses of b1>          |
318
//   +-----------------------+    +-----------------------+
319
//              |                            |
320
//              |                 +------- block 3 -------+ <-\.
321
//              |                4| <while condition>     |    |
322
//              |                 +-----------------------+    |
323
//              |               /          |                   |
324
//              |              /  +------- block 4 -------+
325
//              \             /  5| LIFETIME_START b3     |    |
326
//               \           /   6| <uses of b3>          |    |
327
//                \         /    7| LIFETIME_END b3       |    |
328
//                 \        |    +------------------------+    |
329
//                  \       |                 \                /
330
//                +------  block 5 -----+      \---------------
331
//               8| <cleanupcode>       |
332
//               9| LIFETIME_END b1, b2 |
333
//              10| return              |
334
//                +---------------------+
335
//
336
// During optimization, however, it can happen that an instruction
337
// computing an address in "b3" (for example, a loop-invariant GEP) is
338
// hoisted up out of the loop from block 4 to block 2.  [Note that
339
// this is not an actual load from the stack, only an instruction that
340
// computes the address to be loaded]. If this happens, there is now a
341
// path leading from the first use of b3 to the return instruction
342
// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
343
// now larger than if we were computing live intervals strictly based
344
// on lifetime markers. In the example above, this lengthened lifetime
345
// would mean that it would appear illegal to overlap b3 with b2.
346
//
347
// To deal with this such cases, the code in ::collectMarkers() below
348
// tries to identify "degenerate" slots -- those slots where on a single
349
// forward pass through the CFG we encounter a first reference to slot
350
// K before we hit the slot K lifetime start marker. For such slots,
351
// we fall back on using the lifetime start marker as the beginning of
352
// the variable's lifetime.  NB: with this implementation, slots can
353
// appear degenerate in cases where there is unstructured control flow:
354
//
355
//    if (q) goto mid;
356
//    if (x > 9) {
357
//         int b[100];
358
//         memcpy(&b[0], ...);
359
//    mid: b[k] = ...;
360
//         abc(&b);
361
//    }
362
//
363
// If in RPO ordering chosen to walk the CFG  we happen to visit the b[k]
364
// before visiting the memcpy block (which will contain the lifetime start
365
// for "b" then it will appear that 'b' has a degenerate lifetime.
366
//
367
368
namespace {
369
/// StackColoring - A machine pass for merging disjoint stack allocations,
370
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
371
class StackColoring : public MachineFunctionPass {
372
  MachineFrameInfo *MFI;
373
  MachineFunction *MF;
374
375
  /// A class representing liveness information for a single basic block.
376
  /// Each bit in the BitVector represents the liveness property
377
  /// for a different stack slot.
378
  struct BlockLifetimeInfo {
379
    /// Which slots BEGINs in each basic block.
380
    BitVector Begin;
381
    /// Which slots ENDs in each basic block.
382
    BitVector End;
383
    /// Which slots are marked as LIVE_IN, coming into each basic block.
384
    BitVector LiveIn;
385
    /// Which slots are marked as LIVE_OUT, coming out of each basic block.
386
    BitVector LiveOut;
387
  };
388
389
  /// Maps active slots (per bit) for each basic block.
390
  typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
391
  LivenessMap BlockLiveness;
392
393
  /// Maps serial numbers to basic blocks.
394
  DenseMap<const MachineBasicBlock*, int> BasicBlocks;
395
  /// Maps basic blocks to a serial number.
396
  SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
397
398
  /// Maps slots to their use interval. Outside of this interval, slots
399
  /// values are either dead or `undef` and they will not be written to.
400
  SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
401
  /// Maps slots to the points where they can become in-use.
402
  SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
403
  /// VNInfo is used for the construction of LiveIntervals.
404
  VNInfo::Allocator VNInfoAllocator;
405
  /// SlotIndex analysis object.
406
  SlotIndexes *Indexes;
407
  /// The stack protector object.
408
  StackProtector *SP;
409
410
  /// The list of lifetime markers found. These markers are to be removed
411
  /// once the coloring is done.
412
  SmallVector<MachineInstr*, 8> Markers;
413
414
  /// Record the FI slots for which we have seen some sort of
415
  /// lifetime marker (either start or end).
416
  BitVector InterestingSlots;
417
418
  /// FI slots that need to be handled conservatively (for these
419
  /// slots lifetime-start-on-first-use is disabled).
420
  BitVector ConservativeSlots;
421
422
  /// Number of iterations taken during data flow analysis.
423
  unsigned NumIterations;
424
425
public:
426
  static char ID;
427
32.0k
  StackColoring() : MachineFunctionPass(ID) {
428
32.0k
    initializeStackColoringPass(*PassRegistry::getPassRegistry());
429
32.0k
  }
430
  void getAnalysisUsage(AnalysisUsage &AU) const override;
431
  bool runOnMachineFunction(MachineFunction &MF) override;
432
433
private:
434
  /// Debug.
435
  void dump() const;
436
  void dumpIntervals() const;
437
  void dumpBB(MachineBasicBlock *MBB) const;
438
  void dumpBV(const char *tag, const BitVector &BV) const;
439
440
  /// Removes all of the lifetime marker instructions from the function.
441
  /// \returns true if any markers were removed.
442
  bool removeAllMarkers();
443
444
  /// Scan the machine function and find all of the lifetime markers.
445
  /// Record the findings in the BEGIN and END vectors.
446
  /// \returns the number of markers found.
447
  unsigned collectMarkers(unsigned NumSlot);
448
449
  /// Perform the dataflow calculation and calculate the lifetime for each of
450
  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
451
  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
452
  /// in and out blocks.
453
  void calculateLocalLiveness();
454
455
  /// Returns TRUE if we're using the first-use-begins-lifetime method for
456
  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
457
1.21M
  bool applyFirstUse(int Slot) {
458
1.21M
    if (
!LifetimeStartOnFirstUse || 1.21M
ProtectFromEscapedAllocas1.21M
)
459
110
      return false;
460
1.21M
    
if (1.21M
ConservativeSlots.test(Slot)1.21M
)
461
396k
      return false;
462
822k
    return true;
463
822k
  }
464
465
  /// Examines the specified instruction and returns TRUE if the instruction
466
  /// represents the start or end of an interesting lifetime. The slot or slots
467
  /// starting or ending are added to the vector "slots" and "isStart" is set
468
  /// accordingly.
469
  /// \returns True if inst contains a lifetime start or end
470
  bool isLifetimeStartOrEnd(const MachineInstr &MI,
471
                            SmallVector<int, 4> &slots,
472
                            bool &isStart);
473
474
  /// Construct the LiveIntervals for the slots.
475
  void calculateLiveIntervals(unsigned NumSlots);
476
477
  /// Go over the machine function and change instructions which use stack
478
  /// slots to use the joint slots.
479
  void remapInstructions(DenseMap<int, int> &SlotRemap);
480
481
  /// The input program may contain instructions which are not inside lifetime
482
  /// markers. This can happen due to a bug in the compiler or due to a bug in
483
  /// user code (for example, returning a reference to a local variable).
484
  /// This procedure checks all of the instructions in the function and
485
  /// invalidates lifetime ranges which do not contain all of the instructions
486
  /// which access that frame slot.
487
  void removeInvalidSlotRanges();
488
489
  /// Map entries which point to other entries to their destination.
490
  ///   A->B->C becomes A->C.
491
  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
492
493
  /// Used in collectMarkers
494
  typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
495
};
496
} // end anonymous namespace
497
498
char StackColoring::ID = 0;
499
char &llvm::StackColoringID = StackColoring::ID;
500
501
36.7k
INITIALIZE_PASS_BEGIN36.7k
(StackColoring, DEBUG_TYPE,
502
36.7k
                      "Merge disjoint stack slots", false, false)
503
36.7k
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
504
36.7k
INITIALIZE_PASS_DEPENDENCY(StackProtector)
505
36.7k
INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
506
                    "Merge disjoint stack slots", false, false)
507
508
32.0k
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
509
32.0k
  AU.addRequired<SlotIndexes>();
510
32.0k
  AU.addRequired<StackProtector>();
511
32.0k
  MachineFunctionPass::getAnalysisUsage(AU);
512
32.0k
}
513
514
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
515
LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
516
                                            const BitVector &BV) const {
517
  dbgs() << tag << " : { ";
518
  for (unsigned I = 0, E = BV.size(); I != E; ++I)
519
    dbgs() << BV.test(I) << " ";
520
  dbgs() << "}\n";
521
}
522
523
LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
524
  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
525
  assert(BI != BlockLiveness.end() && "Block not found");
526
  const BlockLifetimeInfo &BlockInfo = BI->second;
527
528
  dumpBV("BEGIN", BlockInfo.Begin);
529
  dumpBV("END", BlockInfo.End);
530
  dumpBV("LIVE_IN", BlockInfo.LiveIn);
531
  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
532
}
533
534
LLVM_DUMP_METHOD void StackColoring::dump() const {
535
  for (MachineBasicBlock *MBB : depth_first(MF)) {
536
    dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
537
           << MBB->getName() << "]\n";
538
    dumpBB(MBB);
539
  }
540
}
541
542
LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
543
  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
544
    dbgs() << "Interval[" << I << "]:\n";
545
    Intervals[I]->dump();
546
  }
547
}
548
#endif
549
550
static inline int getStartOrEndSlot(const MachineInstr &MI)
551
458k
{
552
458k
  assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
553
458k
          MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
554
458k
         "Expected LIFETIME_START or LIFETIME_END op");
555
458k
  const MachineOperand &MO = MI.getOperand(0);
556
458k
  int Slot = MO.getIndex();
557
458k
  if (Slot >= 0)
558
458k
    return Slot;
559
2
  return -1;
560
2
}
561
562
//
563
// At the moment the only way to end a variable lifetime is with
564
// a VARIABLE_LIFETIME op (which can't contain a start). If things
565
// change and the IR allows for a single inst that both begins
566
// and ends lifetime(s), this interface will need to be reworked.
567
//
568
bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
569
                                         SmallVector<int, 4> &slots,
570
                                         bool &isStart)
571
22.8M
{
572
22.8M
  if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
573
22.8M
      
MI.getOpcode() == TargetOpcode::LIFETIME_END22.7M
) {
574
296k
    int Slot = getStartOrEndSlot(MI);
575
296k
    if (Slot < 0)
576
1
      return false;
577
296k
    
if (296k
!InterestingSlots.test(Slot)296k
)
578
0
      return false;
579
296k
    slots.push_back(Slot);
580
296k
    if (
MI.getOpcode() == TargetOpcode::LIFETIME_END296k
) {
581
168k
      isStart = false;
582
168k
      return true;
583
168k
    }
584
127k
    
if (127k
! applyFirstUse(Slot)127k
) {
585
37.2k
      isStart = true;
586
37.2k
      return true;
587
37.2k
    }
588
22.5M
  } else 
if (22.5M
LifetimeStartOnFirstUse && 22.5M
!ProtectFromEscapedAllocas22.5M
) {
589
22.5M
    if (
! MI.isDebugValue()22.5M
) {
590
22.5M
      bool found = false;
591
70.9M
      for (const MachineOperand &MO : MI.operands()) {
592
70.9M
        if (!MO.isFI())
593
69.8M
          continue;
594
1.13M
        int Slot = MO.getIndex();
595
1.13M
        if (Slot<0)
596
2.73k
          continue;
597
1.13M
        
if (1.13M
InterestingSlots.test(Slot) && 1.13M
applyFirstUse(Slot)1.09M
) {
598
732k
          slots.push_back(Slot);
599
732k
          found = true;
600
732k
        }
601
70.9M
      }
602
22.5M
      if (
found22.5M
) {
603
732k
        isStart = true;
604
732k
        return true;
605
732k
      }
606
21.8M
    }
607
22.5M
  }
608
21.8M
  return false;
609
21.8M
}
610
611
unsigned StackColoring::collectMarkers(unsigned NumSlot)
612
50.0k
{
613
50.0k
  unsigned MarkersFound = 0;
614
50.0k
  BlockBitVecMap SeenStartMap;
615
50.0k
  InterestingSlots.clear();
616
50.0k
  InterestingSlots.resize(NumSlot);
617
50.0k
  ConservativeSlots.clear();
618
50.0k
  ConservativeSlots.resize(NumSlot);
619
50.0k
620
50.0k
  // number of start and end lifetime ops for each slot
621
50.0k
  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
622
50.0k
  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
623
50.0k
624
50.0k
  // Step 1: collect markers and populate the "InterestingSlots"
625
50.0k
  // and "ConservativeSlots" sets.
626
1.22M
  for (MachineBasicBlock *MBB : depth_first(MF)) {
627
1.22M
628
1.22M
    // Compute the set of slots for which we've seen a START marker but have
629
1.22M
    // not yet seen an END marker at this point in the walk (e.g. on entry
630
1.22M
    // to this bb).
631
1.22M
    BitVector BetweenStartEnd;
632
1.22M
    BetweenStartEnd.resize(NumSlot);
633
1.22M
    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
634
3.11M
             PE = MBB->pred_end(); 
PI != PE3.11M
;
++PI1.88M
) {
635
1.88M
      BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
636
1.88M
      if (
I != SeenStartMap.end()1.88M
) {
637
1.30M
        BetweenStartEnd |= I->second;
638
1.30M
      }
639
1.88M
    }
640
1.22M
641
1.22M
    // Walk the instructions in the block to look for start/end ops.
642
12.3M
    for (MachineInstr &MI : *MBB) {
643
12.3M
      if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
644
12.3M
          
MI.getOpcode() == TargetOpcode::LIFETIME_END12.2M
) {
645
162k
        int Slot = getStartOrEndSlot(MI);
646
162k
        if (Slot < 0)
647
1
          continue;
648
162k
        InterestingSlots.set(Slot);
649
162k
        if (
MI.getOpcode() == TargetOpcode::LIFETIME_START162k
) {
650
69.2k
          BetweenStartEnd.set(Slot);
651
69.2k
          NumStartLifetimes[Slot] += 1;
652
162k
        } else {
653
92.9k
          BetweenStartEnd.reset(Slot);
654
92.9k
          NumEndLifetimes[Slot] += 1;
655
92.9k
        }
656
162k
        const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
657
162k
        if (
Allocation162k
) {
658
162k
          DEBUG(dbgs() << "Found a lifetime ");
659
162k
          DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
660
162k
                               ? "start"
661
162k
                               : "end"));
662
162k
          DEBUG(dbgs() << " marker for slot #" << Slot);
663
162k
          DEBUG(dbgs() << " with allocation: " << Allocation->getName()
664
162k
                       << "\n");
665
162k
        }
666
162k
        Markers.push_back(&MI);
667
162k
        MarkersFound += 1;
668
12.3M
      } else {
669
38.7M
        for (const MachineOperand &MO : MI.operands()) {
670
38.7M
          if (!MO.isFI())
671
38.0M
            continue;
672
637k
          int Slot = MO.getIndex();
673
637k
          if (Slot < 0)
674
7.88k
            continue;
675
629k
          
if (629k
! BetweenStartEnd.test(Slot)629k
) {
676
67.8k
            ConservativeSlots.set(Slot);
677
67.8k
          }
678
38.7M
        }
679
12.1M
      }
680
12.3M
    }
681
1.22M
    BitVector &SeenStart = SeenStartMap[MBB];
682
1.22M
    SeenStart |= BetweenStartEnd;
683
1.22M
  }
684
50.0k
  if (
!MarkersFound50.0k
) {
685
8.46k
    return 0;
686
8.46k
  }
687
41.6k
688
41.6k
  // PR27903: slots with multiple start or end lifetime ops are not
689
41.6k
  // safe to enable for "lifetime-start-on-first-use".
690
116k
  
for (unsigned slot = 0; 41.6k
slot < NumSlot116k
;
++slot74.6k
)
691
74.6k
    
if (74.6k
NumStartLifetimes[slot] > 1 || 74.6k
NumEndLifetimes[slot] > 174.0k
)
692
20.4k
      ConservativeSlots.set(slot);
693
41.6k
  DEBUG(dumpBV("Conservative slots", ConservativeSlots));
694
41.6k
695
41.6k
  // Step 2: compute begin/end sets for each block
696
41.6k
697
41.6k
  // NOTE: We use a depth-first iteration to ensure that we obtain a
698
41.6k
  // deterministic numbering.
699
1.19M
  for (MachineBasicBlock *MBB : depth_first(MF)) {
700
1.19M
701
1.19M
    // Assign a serial number to this basic block.
702
1.19M
    BasicBlocks[MBB] = BasicBlockNumbering.size();
703
1.19M
    BasicBlockNumbering.push_back(MBB);
704
1.19M
705
1.19M
    // Keep a reference to avoid repeated lookups.
706
1.19M
    BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
707
1.19M
708
1.19M
    BlockInfo.Begin.resize(NumSlot);
709
1.19M
    BlockInfo.End.resize(NumSlot);
710
1.19M
711
1.19M
    SmallVector<int, 4> slots;
712
11.8M
    for (MachineInstr &MI : *MBB) {
713
11.8M
      bool isStart = false;
714
11.8M
      slots.clear();
715
11.8M
      if (
isLifetimeStartOrEnd(MI, slots, isStart)11.8M
) {
716
487k
        if (
!isStart487k
) {
717
92.9k
          assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
718
92.9k
          int Slot = slots[0];
719
92.9k
          if (
BlockInfo.Begin.test(Slot)92.9k
) {
720
17.6k
            BlockInfo.Begin.reset(Slot);
721
17.6k
          }
722
92.9k
          BlockInfo.End.set(Slot);
723
487k
        } else {
724
394k
          for (auto Slot : slots) {
725
394k
            DEBUG(dbgs() << "Found a use of slot #" << Slot);
726
394k
            DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
727
394k
            DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
728
394k
            const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
729
394k
            if (
Allocation394k
) {
730
394k
              DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
731
394k
            }
732
394k
            DEBUG(dbgs() << "\n");
733
394k
            if (
BlockInfo.End.test(Slot)394k
) {
734
302
              BlockInfo.End.reset(Slot);
735
302
            }
736
394k
            BlockInfo.Begin.set(Slot);
737
394k
          }
738
394k
        }
739
487k
      }
740
11.8M
    }
741
1.19M
  }
742
50.0k
743
50.0k
  // Update statistics.
744
50.0k
  NumMarkerSeen += MarkersFound;
745
50.0k
  return MarkersFound;
746
50.0k
}
747
748
void StackColoring::calculateLocalLiveness()
749
33.7k
{
750
33.7k
  unsigned NumIters = 0;
751
33.7k
  bool changed = true;
752
99.9k
  while (
changed99.9k
) {
753
66.2k
    changed = false;
754
66.2k
    ++NumIters;
755
66.2k
756
2.56M
    for (const MachineBasicBlock *BB : BasicBlockNumbering) {
757
2.56M
758
2.56M
      // Use an iterator to avoid repeated lookups.
759
2.56M
      LivenessMap::iterator BI = BlockLiveness.find(BB);
760
2.56M
      assert(BI != BlockLiveness.end() && "Block not found");
761
2.56M
      BlockLifetimeInfo &BlockInfo = BI->second;
762
2.56M
763
2.56M
      // Compute LiveIn by unioning together the LiveOut sets of all preds.
764
2.56M
      BitVector LocalLiveIn;
765
2.56M
      for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
766
6.57M
           PE = BB->pred_end(); 
PI != PE6.57M
;
++PI4.01M
) {
767
4.01M
        LivenessMap::const_iterator I = BlockLiveness.find(*PI);
768
4.01M
        assert(I != BlockLiveness.end() && "Predecessor not found");
769
4.01M
        LocalLiveIn |= I->second.LiveOut;
770
4.01M
      }
771
2.56M
772
2.56M
      // Compute LiveOut by subtracting out lifetimes that end in this
773
2.56M
      // block, then adding in lifetimes that begin in this block.  If
774
2.56M
      // we have both BEGIN and END markers in the same basic block
775
2.56M
      // then we know that the BEGIN marker comes after the END,
776
2.56M
      // because we already handle the case where the BEGIN comes
777
2.56M
      // before the END when collecting the markers (and building the
778
2.56M
      // BEGIN/END vectors).
779
2.56M
      BitVector LocalLiveOut = LocalLiveIn;
780
2.56M
      LocalLiveOut.reset(BlockInfo.End);
781
2.56M
      LocalLiveOut |= BlockInfo.Begin;
782
2.56M
783
2.56M
      // Update block LiveIn set, noting whether it has changed.
784
2.56M
      if (
LocalLiveIn.test(BlockInfo.LiveIn)2.56M
) {
785
793k
        changed = true;
786
793k
        BlockInfo.LiveIn |= LocalLiveIn;
787
793k
      }
788
2.56M
789
2.56M
      // Update block LiveOut set, noting whether it has changed.
790
2.56M
      if (
LocalLiveOut.test(BlockInfo.LiveOut)2.56M
) {
791
782k
        changed = true;
792
782k
        BlockInfo.LiveOut |= LocalLiveOut;
793
782k
      }
794
2.56M
    }
795
66.2k
  }// while changed.
796
33.7k
797
33.7k
  NumIterations = NumIters;
798
33.7k
}
799
800
33.7k
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
801
33.7k
  SmallVector<SlotIndex, 16> Starts;
802
33.7k
  SmallVector<bool, 16> DefinitelyInUse;
803
33.7k
804
33.7k
  // For each block, find which slots are active within this block
805
33.7k
  // and update the live intervals.
806
1.10M
  for (const MachineBasicBlock &MBB : *MF) {
807
1.10M
    Starts.clear();
808
1.10M
    Starts.resize(NumSlots);
809
1.10M
    DefinitelyInUse.clear();
810
1.10M
    DefinitelyInUse.resize(NumSlots);
811
1.10M
812
1.10M
    // Start the interval of the slots that we previously found to be 'in-use'.
813
1.10M
    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
814
2.35M
    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
815
1.24M
         
pos = MBBLiveness.LiveIn.find_next(pos)1.24M
) {
816
1.24M
      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
817
1.24M
    }
818
1.10M
819
1.10M
    // Create the interval for the basic blocks containing lifetime begin/end.
820
11.0M
    for (const MachineInstr &MI : MBB) {
821
11.0M
822
11.0M
      SmallVector<int, 4> slots;
823
11.0M
      bool IsStart = false;
824
11.0M
      if (!isLifetimeStartOrEnd(MI, slots, IsStart))
825
10.5M
        continue;
826
450k
      SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
827
450k
      for (auto Slot : slots) {
828
450k
        if (
IsStart450k
) {
829
375k
          // If a slot is already definitely in use, we don't have to emit
830
375k
          // a new start marker because there is already a pre-existing
831
375k
          // one.
832
375k
          if (
!DefinitelyInUse[Slot]375k
) {
833
263k
            LiveStarts[Slot].push_back(ThisIndex);
834
263k
            DefinitelyInUse[Slot] = true;
835
263k
          }
836
375k
          if (!Starts[Slot].isValid())
837
53.7k
            Starts[Slot] = ThisIndex;
838
450k
        } else {
839
75.8k
          if (
Starts[Slot].isValid()75.8k
) {
840
75.7k
            VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
841
75.7k
            Intervals[Slot]->addSegment(
842
75.7k
                LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
843
75.7k
            Starts[Slot] = SlotIndex(); // Invalidate the start index
844
75.7k
            DefinitelyInUse[Slot] = false;
845
75.7k
          }
846
75.8k
        }
847
450k
      }
848
11.0M
    }
849
1.10M
850
1.10M
    // Finish up started segments
851
4.89M
    for (unsigned i = 0; 
i < NumSlots4.89M
;
++i3.78M
) {
852
3.78M
      if (!Starts[i].isValid())
853
2.55M
        continue;
854
1.22M
855
1.22M
      SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
856
1.22M
      VNInfo *VNI = Intervals[i]->getValNumInfo(0);
857
1.22M
      Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
858
1.22M
    }
859
1.10M
  }
860
33.7k
}
861
862
50.0k
bool StackColoring::removeAllMarkers() {
863
50.0k
  unsigned Count = 0;
864
162k
  for (MachineInstr *MI : Markers) {
865
162k
    MI->eraseFromParent();
866
162k
    Count++;
867
162k
  }
868
50.0k
  Markers.clear();
869
50.0k
870
50.0k
  DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
871
50.0k
  return Count;
872
50.0k
}
873
874
33.7k
void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
875
33.7k
  unsigned FixedInstr = 0;
876
33.7k
  unsigned FixedMemOp = 0;
877
33.7k
  unsigned FixedDbg = 0;
878
33.7k
879
33.7k
  // Remap debug information that refers to stack slots.
880
5
  for (auto &VI : MF->getVariableDbgInfo()) {
881
5
    if (!VI.Var)
882
0
      continue;
883
5
    
if (5
SlotRemap.count(VI.Slot)5
) {
884
0
      DEBUG(dbgs() << "Remapping debug info for ["
885
0
                   << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
886
0
      VI.Slot = SlotRemap[VI.Slot];
887
0
      FixedDbg++;
888
0
    }
889
5
  }
890
33.7k
891
33.7k
  // Keep a list of *allocas* which need to be remapped.
892
33.7k
  DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
893
33.7k
894
33.7k
  // Keep a list of allocas which has been affected by the remap.
895
33.7k
  SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
896
33.7k
897
8.36k
  for (const std::pair<int, int> &SI : SlotRemap) {
898
8.36k
    const AllocaInst *From = MFI->getObjectAllocation(SI.first);
899
8.36k
    const AllocaInst *To = MFI->getObjectAllocation(SI.second);
900
8.36k
    assert(To && From && "Invalid allocation object");
901
8.36k
    Allocas[From] = To;
902
8.36k
903
8.36k
    // AA might be used later for instruction scheduling, and we need it to be
904
8.36k
    // able to deduce the correct aliasing releationships between pointers
905
8.36k
    // derived from the alloca being remapped and the target of that remapping.
906
8.36k
    // The only safe way, without directly informing AA about the remapping
907
8.36k
    // somehow, is to directly update the IR to reflect the change being made
908
8.36k
    // here.
909
8.36k
    Instruction *Inst = const_cast<AllocaInst *>(To);
910
8.36k
    if (
From->getType() != To->getType()8.36k
) {
911
7.07k
      BitCastInst *Cast = new BitCastInst(Inst, From->getType());
912
7.07k
      Cast->insertAfter(Inst);
913
7.07k
      Inst = Cast;
914
7.07k
    }
915
8.36k
916
8.36k
    // We keep both slots to maintain AliasAnalysis metadata later.
917
8.36k
    MergedAllocas.insert(From);
918
8.36k
    MergedAllocas.insert(To);
919
8.36k
920
8.36k
    // Allow the stack protector to adjust its value map to account for the
921
8.36k
    // upcoming replacement.
922
8.36k
    SP->adjustForColoring(From, To);
923
8.36k
924
8.36k
    // The new alloca might not be valid in a llvm.dbg.declare for this
925
8.36k
    // variable, so undef out the use to make the verifier happy.
926
8.36k
    AllocaInst *FromAI = const_cast<AllocaInst *>(From);
927
8.36k
    if (FromAI->isUsedByMetadata())
928
0
      ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
929
231k
    for (auto &Use : FromAI->uses()) {
930
231k
      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
931
0
        
if (0
BCI->isUsedByMetadata()0
)
932
0
          ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
933
231k
    }
934
8.36k
935
8.36k
    // Note that this will not replace uses in MMOs (which we'll update below),
936
8.36k
    // or anywhere else (which is why we won't delete the original
937
8.36k
    // instruction).
938
8.36k
    FromAI->replaceAllUsesWith(Inst);
939
8.36k
  }
940
33.7k
941
33.7k
  // Remap all instructions to the new stack slots.
942
33.7k
  for (MachineBasicBlock &BB : *MF)
943
1.10M
    
for (MachineInstr &I : BB) 1.10M
{
944
11.0M
      // Skip lifetime markers. We'll remove them soon.
945
11.0M
      if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
946
10.9M
          I.getOpcode() == TargetOpcode::LIFETIME_END)
947
133k
        continue;
948
10.8M
949
10.8M
      // Update the MachineMemOperand to use the new alloca.
950
10.8M
      
for (MachineMemOperand *MMO : I.memoperands()) 10.8M
{
951
1.31M
        // We've replaced IR-level uses of the remapped allocas, so we only
952
1.31M
        // need to replace direct uses here.
953
1.31M
        const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
954
1.31M
        if (!AI)
955
1.27M
          continue;
956
41.8k
957
41.8k
        
if (41.8k
!Allocas.count(AI)41.8k
)
958
39.5k
          continue;
959
2.28k
960
2.28k
        MMO->setValue(Allocas[AI]);
961
2.28k
        FixedMemOp++;
962
2.28k
      }
963
10.8M
964
10.8M
      // Update all of the machine instruction operands.
965
34.2M
      for (MachineOperand &MO : I.operands()) {
966
34.2M
        if (!MO.isFI())
967
33.6M
          continue;
968
553k
        int FromSlot = MO.getIndex();
969
553k
970
553k
        // Don't touch arguments.
971
553k
        if (FromSlot<0)
972
1.00k
          continue;
973
552k
974
552k
        // Only look at mapped slots.
975
552k
        
if (552k
!SlotRemap.count(FromSlot)552k
)
976
413k
          continue;
977
139k
978
139k
        // In a debug build, check that the instruction that we are modifying is
979
139k
        // inside the expected live range. If the instruction is not inside
980
139k
        // the calculated range then it means that the alloca usage moved
981
139k
        // outside of the lifetime markers, or that the user has a bug.
982
139k
        // NOTE: Alloca address calculations which happen outside the lifetime
983
139k
        // zone are are okay, despite the fact that we don't have a good way
984
139k
        // for validating all of the usages of the calculation.
985
#ifndef NDEBUG
986
        bool TouchesMemory = I.mayLoad() || I.mayStore();
987
        // If we *don't* protect the user from escaped allocas, don't bother
988
        // validating the instructions.
989
        if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
990
          SlotIndex Index = Indexes->getInstructionIndex(I);
991
          const LiveInterval *Interval = &*Intervals[FromSlot];
992
          assert(Interval->find(Index) != Interval->end() &&
993
                 "Found instruction usage outside of live range.");
994
        }
995
#endif
996
997
139k
        // Fix the machine instructions.
998
139k
        int ToSlot = SlotRemap[FromSlot];
999
139k
        MO.setIndex(ToSlot);
1000
139k
        FixedInstr++;
1001
139k
      }
1002
10.8M
1003
10.8M
      // We adjust AliasAnalysis information for merged stack slots.
1004
10.8M
      MachineSDNode::mmo_iterator NewMemOps =
1005
10.8M
          MF->allocateMemRefsArray(I.getNumMemOperands());
1006
10.8M
      unsigned MemOpIdx = 0;
1007
10.8M
      bool ReplaceMemOps = false;
1008
1.31M
      for (MachineMemOperand *MMO : I.memoperands()) {
1009
1.31M
        // If this memory location can be a slot remapped here,
1010
1.31M
        // we remove AA information.
1011
1.31M
        bool MayHaveConflictingAAMD = false;
1012
1.31M
        if (
MMO->getAAInfo()1.31M
) {
1013
1.05M
          if (const Value *
MMOV1.05M
= MMO->getValue()) {
1014
1.05M
            SmallVector<Value *, 4> Objs;
1015
1.05M
            getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1016
1.05M
1017
1.05M
            if (Objs.empty())
1018
561k
              MayHaveConflictingAAMD = true;
1019
1.05M
            else
1020
489k
              
for (Value *V : Objs) 489k
{
1021
492k
                // If this memory location comes from a known stack slot
1022
492k
                // that is not remapped, we continue checking.
1023
492k
                // Otherwise, we need to invalidate AA infomation.
1024
492k
                const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1025
492k
                if (
AI && 492k
MergedAllocas.count(AI)346k
) {
1026
89.2k
                  MayHaveConflictingAAMD = true;
1027
89.2k
                  break;
1028
89.2k
                }
1029
1.31M
              }
1030
1.05M
          }
1031
1.05M
        }
1032
1.31M
        if (
MayHaveConflictingAAMD1.31M
) {
1033
650k
          NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
1034
650k
          ReplaceMemOps = true;
1035
650k
        }
1036
1.31M
        else
1037
668k
          NewMemOps[MemOpIdx++] = MMO;
1038
1.31M
      }
1039
10.8M
1040
10.8M
      // If any memory operand is updated, set memory references of
1041
10.8M
      // this instruction.
1042
10.8M
      if (ReplaceMemOps)
1043
650k
        I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
1044
1.10M
    }
1045
33.7k
1046
33.7k
  // Update the location of C++ catch objects for the MSVC personality routine.
1047
33.7k
  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1048
1
    for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1049
1
      for (WinEHHandlerType &H : TBME.HandlerArray)
1050
1
        
if (1
H.CatchObj.FrameIndex != INT_MAX &&
1051
0
            SlotRemap.count(H.CatchObj.FrameIndex))
1052
0
          H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1053
33.7k
1054
33.7k
  DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
1055
33.7k
  DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
1056
33.7k
  DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
1057
33.7k
}
1058
1059
0
void StackColoring::removeInvalidSlotRanges() {
1060
0
  for (MachineBasicBlock &BB : *MF)
1061
0
    
for (MachineInstr &I : BB) 0
{
1062
0
      if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1063
0
          
I.getOpcode() == TargetOpcode::LIFETIME_END0
||
I.isDebugValue()0
)
1064
0
        continue;
1065
0
1066
0
      // Some intervals are suspicious! In some cases we find address
1067
0
      // calculations outside of the lifetime zone, but not actual memory
1068
0
      // read or write. Memory accesses outside of the lifetime zone are a clear
1069
0
      // violation, but address calculations are okay. This can happen when
1070
0
      // GEPs are hoisted outside of the lifetime zone.
1071
0
      // So, in here we only check instructions which can read or write memory.
1072
0
      
if (0
!I.mayLoad() && 0
!I.mayStore()0
)
1073
0
        continue;
1074
0
1075
0
      // Check all of the machine operands.
1076
0
      
for (const MachineOperand &MO : I.operands()) 0
{
1077
0
        if (!MO.isFI())
1078
0
          continue;
1079
0
1080
0
        int Slot = MO.getIndex();
1081
0
1082
0
        if (Slot<0)
1083
0
          continue;
1084
0
1085
0
        
if (0
Intervals[Slot]->empty()0
)
1086
0
          continue;
1087
0
1088
0
        // Check that the used slot is inside the calculated lifetime range.
1089
0
        // If it is not, warn about it and invalidate the range.
1090
0
        LiveInterval *Interval = &*Intervals[Slot];
1091
0
        SlotIndex Index = Indexes->getInstructionIndex(I);
1092
0
        if (
Interval->find(Index) == Interval->end()0
) {
1093
0
          Interval->clear();
1094
0
          DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
1095
0
          EscapedAllocas++;
1096
0
        }
1097
0
      }
1098
0
    }
1099
0
}
1100
1101
void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1102
33.7k
                                   unsigned NumSlots) {
1103
33.7k
  // Expunge slot remap map.
1104
97.2k
  for (unsigned i=0; 
i < NumSlots97.2k
;
++i63.5k
) {
1105
63.5k
    // If we are remapping i
1106
63.5k
    if (
SlotRemap.count(i)63.5k
) {
1107
8.36k
      int Target = SlotRemap[i];
1108
8.36k
      // As long as our target is mapped to something else, follow it.
1109
8.36k
      while (
SlotRemap.count(Target)8.36k
) {
1110
0
        Target = SlotRemap[Target];
1111
0
        SlotRemap[i] = Target;
1112
0
      }
1113
8.36k
    }
1114
63.5k
  }
1115
33.7k
}
1116
1117
588k
bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1118
588k
  DEBUG(dbgs() << "********** Stack Coloring **********\n"
1119
588k
               << "********** Function: "
1120
588k
               << ((const Value*)Func.getFunction())->getName() << '\n');
1121
588k
  MF = &Func;
1122
588k
  MFI = &MF->getFrameInfo();
1123
588k
  Indexes = &getAnalysis<SlotIndexes>();
1124
588k
  SP = &getAnalysis<StackProtector>();
1125
588k
  BlockLiveness.clear();
1126
588k
  BasicBlocks.clear();
1127
588k
  BasicBlockNumbering.clear();
1128
588k
  Markers.clear();
1129
588k
  Intervals.clear();
1130
588k
  LiveStarts.clear();
1131
588k
  VNInfoAllocator.Reset();
1132
588k
1133
588k
  unsigned NumSlots = MFI->getObjectIndexEnd();
1134
588k
1135
588k
  // If there are no stack slots then there are no markers to remove.
1136
588k
  if (!NumSlots)
1137
538k
    return false;
1138
50.0k
1139
50.0k
  SmallVector<int, 8> SortedSlots;
1140
50.0k
  SortedSlots.reserve(NumSlots);
1141
50.0k
  Intervals.reserve(NumSlots);
1142
50.0k
  LiveStarts.resize(NumSlots);
1143
50.0k
1144
50.0k
  unsigned NumMarkers = collectMarkers(NumSlots);
1145
50.0k
1146
50.0k
  unsigned TotalSize = 0;
1147
50.0k
  DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
1148
50.0k
  DEBUG(dbgs()<<"Slot structure:\n");
1149
50.0k
1150
142k
  for (int i=0; 
i < MFI->getObjectIndexEnd()142k
;
++i92.1k
) {
1151
92.1k
    DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
1152
92.1k
    TotalSize += MFI->getObjectSize(i);
1153
92.1k
  }
1154
50.0k
1155
50.0k
  DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
1156
50.0k
1157
50.0k
  // Don't continue because there are not enough lifetime markers, or the
1158
50.0k
  // stack is too small, or we are told not to optimize the slots.
1159
50.0k
  if (
NumMarkers < 2 || 50.0k
TotalSize < 1641.5k
||
DisableColoring33.7k
||
1160
50.0k
      
skipFunction(*Func.getFunction())33.7k
) {
1161
16.3k
    DEBUG(dbgs()<<"Will not try to merge slots.\n");
1162
16.3k
    return removeAllMarkers();
1163
16.3k
  }
1164
33.7k
1165
97.2k
  
for (unsigned i=0; 33.7k
i < NumSlots97.2k
;
++i63.5k
) {
1166
63.5k
    std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1167
63.5k
    LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1168
63.5k
    Intervals.push_back(std::move(LI));
1169
63.5k
    SortedSlots.push_back(i);
1170
63.5k
  }
1171
33.7k
1172
33.7k
  // Calculate the liveness of each block.
1173
33.7k
  calculateLocalLiveness();
1174
33.7k
  DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1175
33.7k
  DEBUG(dump());
1176
33.7k
1177
33.7k
  // Propagate the liveness information.
1178
33.7k
  calculateLiveIntervals(NumSlots);
1179
33.7k
  DEBUG(dumpIntervals());
1180
33.7k
1181
33.7k
  // Search for allocas which are used outside of the declared lifetime
1182
33.7k
  // markers.
1183
33.7k
  if (ProtectFromEscapedAllocas)
1184
0
    removeInvalidSlotRanges();
1185
33.7k
1186
33.7k
  // Maps old slots to new slots.
1187
33.7k
  DenseMap<int, int> SlotRemap;
1188
33.7k
  unsigned RemovedSlots = 0;
1189
33.7k
  unsigned ReducedSize = 0;
1190
33.7k
1191
33.7k
  // Do not bother looking at empty intervals.
1192
97.2k
  for (unsigned I = 0; 
I < NumSlots97.2k
;
++I63.5k
) {
1193
63.5k
    if (Intervals[SortedSlots[I]]->empty())
1194
6.34k
      SortedSlots[I] = -1;
1195
63.5k
  }
1196
33.7k
1197
33.7k
  // This is a simple greedy algorithm for merging allocas. First, sort the
1198
33.7k
  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1199
33.7k
  // for disjoint slots. When you find disjoint slots, merge the samller one
1200
33.7k
  // into the bigger one and update the live interval. Remove the small alloca
1201
33.7k
  // and continue.
1202
33.7k
1203
33.7k
  // Sort the slots according to their size. Place unused slots at the end.
1204
33.7k
  // Use stable sort to guarantee deterministic code generation.
1205
33.7k
  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
1206
63.6k
                   [this](int LHS, int RHS) {
1207
63.6k
    // We use -1 to denote a uninteresting slot. Place these slots at the end.
1208
63.6k
    if (
LHS == -163.6k
)
return false505
;
1209
63.1k
    
if (63.1k
RHS == -163.1k
)
return true13.9k
;
1210
49.2k
    // Sort according to size.
1211
49.2k
    return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1212
49.2k
  });
1213
33.7k
1214
33.7k
  for (auto &s : LiveStarts)
1215
63.5k
    std::sort(s.begin(), s.end());
1216
33.7k
1217
33.7k
  bool Changed = true;
1218
71.0k
  while (
Changed71.0k
) {
1219
37.3k
    Changed = false;
1220
120k
    for (unsigned I = 0; 
I < NumSlots120k
;
++I83.2k
) {
1221
83.2k
      if (SortedSlots[I] == -1)
1222
23.4k
        continue;
1223
59.7k
1224
146k
      
for (unsigned J=I+1; 59.7k
J < NumSlots146k
;
++J87.0k
) {
1225
87.0k
        if (SortedSlots[J] == -1)
1226
31.8k
          continue;
1227
55.1k
1228
55.1k
        int FirstSlot = SortedSlots[I];
1229
55.1k
        int SecondSlot = SortedSlots[J];
1230
55.1k
        LiveInterval *First = &*Intervals[FirstSlot];
1231
55.1k
        LiveInterval *Second = &*Intervals[SecondSlot];
1232
55.1k
        auto &FirstS = LiveStarts[FirstSlot];
1233
55.1k
        auto &SecondS = LiveStarts[SecondSlot];
1234
55.1k
        assert (!First->empty() && !Second->empty() && "Found an empty range");
1235
55.1k
1236
55.1k
        // Merge disjoint slots. This is a little bit tricky - see the
1237
55.1k
        // Implementation Notes section for an explanation.
1238
55.1k
        if (!First->isLiveAtIndexes(SecondS) &&
1239
55.1k
            
!Second->isLiveAtIndexes(FirstS)16.2k
) {
1240
8.36k
          Changed = true;
1241
8.36k
          First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1242
8.36k
1243
8.36k
          int OldSize = FirstS.size();
1244
8.36k
          FirstS.append(SecondS.begin(), SecondS.end());
1245
8.36k
          auto Mid = FirstS.begin() + OldSize;
1246
8.36k
          std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1247
8.36k
1248
8.36k
          SlotRemap[SecondSlot] = FirstSlot;
1249
8.36k
          SortedSlots[J] = -1;
1250
8.36k
          DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
1251
8.36k
                SecondSlot<<" together.\n");
1252
8.36k
          unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1253
8.36k
                                           MFI->getObjectAlignment(SecondSlot));
1254
8.36k
1255
8.36k
          assert(MFI->getObjectSize(FirstSlot) >=
1256
8.36k
                 MFI->getObjectSize(SecondSlot) &&
1257
8.36k
                 "Merging a small object into a larger one");
1258
8.36k
1259
8.36k
          RemovedSlots+=1;
1260
8.36k
          ReducedSize += MFI->getObjectSize(SecondSlot);
1261
8.36k
          MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1262
8.36k
          MFI->RemoveStackObject(SecondSlot);
1263
8.36k
        }
1264
87.0k
      }
1265
83.2k
    }
1266
37.3k
  }// While changed.
1267
33.7k
1268
33.7k
  // Record statistics.
1269
33.7k
  StackSpaceSaved += ReducedSize;
1270
33.7k
  StackSlotMerged += RemovedSlots;
1271
33.7k
  DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
1272
588k
        ReducedSize<<" bytes\n");
1273
588k
1274
588k
  // Scan the entire function and update all machine operands that use frame
1275
588k
  // indices to use the remapped frame index.
1276
588k
  expungeSlotMap(SlotRemap, NumSlots);
1277
588k
  remapInstructions(SlotRemap);
1278
588k
1279
588k
  return removeAllMarkers();
1280
588k
}