Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/RegAllocPBQP.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
11
// register allocator for LLVM. This allocator works by constructing a PBQP
12
// problem representing the register allocation problem under consideration,
13
// solving this using a PBQP solver, and mapping the solution back to a
14
// register assignment. If any variables are selected for spilling then spill
15
// code is inserted and the process repeated.
16
//
17
// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
18
// for register allocation. For more information on PBQP for register
19
// allocation, see the following papers:
20
//
21
//   (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
22
//   PBQP. In Proceedings of the 7th Joint Modular Languages Conference
23
//   (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
24
//
25
//   (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
26
//   architectures. In Proceedings of the Joint Conference on Languages,
27
//   Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
28
//   NY, USA, 139-148.
29
//
30
//===----------------------------------------------------------------------===//
31
32
#include "llvm/CodeGen/RegAllocPBQP.h"
33
#include "RegisterCoalescer.h"
34
#include "Spiller.h"
35
#include "llvm/ADT/ArrayRef.h"
36
#include "llvm/ADT/BitVector.h"
37
#include "llvm/ADT/DenseMap.h"
38
#include "llvm/ADT/DenseSet.h"
39
#include "llvm/ADT/STLExtras.h"
40
#include "llvm/ADT/SmallPtrSet.h"
41
#include "llvm/ADT/SmallVector.h"
42
#include "llvm/ADT/StringRef.h"
43
#include "llvm/Analysis/AliasAnalysis.h"
44
#include "llvm/CodeGen/CalcSpillWeights.h"
45
#include "llvm/CodeGen/LiveInterval.h"
46
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
47
#include "llvm/CodeGen/LiveRangeEdit.h"
48
#include "llvm/CodeGen/LiveStackAnalysis.h"
49
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
50
#include "llvm/CodeGen/MachineDominators.h"
51
#include "llvm/CodeGen/MachineFunction.h"
52
#include "llvm/CodeGen/MachineFunctionPass.h"
53
#include "llvm/CodeGen/MachineInstr.h"
54
#include "llvm/CodeGen/MachineLoopInfo.h"
55
#include "llvm/CodeGen/MachineRegisterInfo.h"
56
#include "llvm/CodeGen/PBQP/Graph.h"
57
#include "llvm/CodeGen/PBQP/Math.h"
58
#include "llvm/CodeGen/PBQP/Solution.h"
59
#include "llvm/CodeGen/PBQPRAConstraint.h"
60
#include "llvm/CodeGen/RegAllocRegistry.h"
61
#include "llvm/CodeGen/SlotIndexes.h"
62
#include "llvm/CodeGen/VirtRegMap.h"
63
#include "llvm/IR/Function.h"
64
#include "llvm/IR/Module.h"
65
#include "llvm/MC/MCRegisterInfo.h"
66
#include "llvm/Pass.h"
67
#include "llvm/Support/CommandLine.h"
68
#include "llvm/Support/Compiler.h"
69
#include "llvm/Support/Debug.h"
70
#include "llvm/Support/FileSystem.h"
71
#include "llvm/Support/Printable.h"
72
#include "llvm/Support/raw_ostream.h"
73
#include "llvm/Target/TargetRegisterInfo.h"
74
#include "llvm/Target/TargetSubtargetInfo.h"
75
#include <algorithm>
76
#include <cassert>
77
#include <cstddef>
78
#include <limits>
79
#include <map>
80
#include <memory>
81
#include <queue>
82
#include <set>
83
#include <sstream>
84
#include <string>
85
#include <system_error>
86
#include <tuple>
87
#include <utility>
88
#include <vector>
89
90
using namespace llvm;
91
92
#define DEBUG_TYPE "regalloc"
93
94
static RegisterRegAlloc
95
RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96
                       createDefaultPBQPRegisterAllocator);
97
98
static cl::opt<bool>
99
PBQPCoalescing("pbqp-coalescing",
100
                cl::desc("Attempt coalescing during PBQP register allocation."),
101
                cl::init(false), cl::Hidden);
102
103
#ifndef NDEBUG
104
static cl::opt<bool>
105
PBQPDumpGraphs("pbqp-dump-graphs",
106
               cl::desc("Dump graphs for each function/round in the compilation unit."),
107
               cl::init(false), cl::Hidden);
108
#endif
109
110
namespace {
111
112
///
113
/// PBQP based allocators solve the register allocation problem by mapping
114
/// register allocation problems to Partitioned Boolean Quadratic
115
/// Programming problems.
116
class RegAllocPBQP : public MachineFunctionPass {
117
public:
118
  static char ID;
119
120
  /// Construct a PBQP register allocator.
121
  RegAllocPBQP(char *cPassID = nullptr)
122
7
      : MachineFunctionPass(ID), customPassID(cPassID) {
123
7
    initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
124
7
    initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
125
7
    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
126
7
    initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
127
7
  }
128
129
  /// Return the pass name.
130
7
  StringRef getPassName() const override { return "PBQP Register Allocator"; }
131
132
  /// PBQP analysis usage.
133
  void getAnalysisUsage(AnalysisUsage &au) const override;
134
135
  /// Perform register allocation
136
  bool runOnMachineFunction(MachineFunction &MF) override;
137
138
7
  MachineFunctionProperties getRequiredProperties() const override {
139
7
    return MachineFunctionProperties().set(
140
7
        MachineFunctionProperties::Property::NoPHIs);
141
7
  }
142
143
private:
144
  using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
145
  using Node2LIMap = std::vector<const LiveInterval *>;
146
  using AllowedSet = std::vector<unsigned>;
147
  using AllowedSetMap = std::vector<AllowedSet>;
148
  using RegPair = std::pair<unsigned, unsigned>;
149
  using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
150
  using RegSet = std::set<unsigned>;
151
152
  char *customPassID;
153
154
  RegSet VRegsToAlloc, EmptyIntervalVRegs;
155
156
  /// Inst which is a def of an original reg and whose defs are already all
157
  /// dead after remat is saved in DeadRemats. The deletion of such inst is
158
  /// postponed till all the allocations are done, so its remat expr is
159
  /// always available for the remat of all the siblings of the original reg.
160
  SmallPtrSet<MachineInstr *, 32> DeadRemats;
161
162
  /// \brief Finds the initial set of vreg intervals to allocate.
163
  void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
164
165
  /// \brief Constructs an initial graph.
166
  void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
167
168
  /// \brief Spill the given VReg.
169
  void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
170
                 MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
171
                 Spiller &VRegSpiller);
172
173
  /// \brief Given a solved PBQP problem maps this solution back to a register
174
  /// assignment.
175
  bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
176
                         const PBQP::Solution &Solution,
177
                         VirtRegMap &VRM,
178
                         Spiller &VRegSpiller);
179
180
  /// \brief Postprocessing before final spilling. Sets basic block "live in"
181
  /// variables.
182
  void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
183
                     VirtRegMap &VRM) const;
184
185
  void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
186
};
187
188
char RegAllocPBQP::ID = 0;
189
190
/// @brief Set spill costs for each node in the PBQP reg-alloc graph.
191
class SpillCosts : public PBQPRAConstraint {
192
public:
193
8
  void apply(PBQPRAGraph &G) override {
194
8
    LiveIntervals &LIS = G.getMetadata().LIS;
195
8
196
8
    // A minimum spill costs, so that register constraints can can be set
197
8
    // without normalization in the [0.0:MinSpillCost( interval.
198
8
    const PBQP::PBQPNum MinSpillCost = 10.0;
199
8
200
155
    for (auto NId : G.nodeIds()) {
201
155
      PBQP::PBQPNum SpillCost =
202
155
        LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
203
155
      if (SpillCost == 0.0)
204
0
        SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
205
155
      else
206
155
        SpillCost += MinSpillCost;
207
155
      PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
208
155
      NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
209
155
      G.setNodeCosts(NId, std::move(NodeCosts));
210
155
    }
211
8
  }
212
};
213
214
/// @brief Add interference edges between overlapping vregs.
215
class Interference : public PBQPRAConstraint {
216
private:
217
  using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
218
  using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
219
  using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
220
  using DisjointAllowedRegsCache = DenseSet<IKey>;
221
  using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
222
  using IEdgeCache = DenseSet<IEdgeKey>;
223
224
  bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
225
                               PBQPRAGraph::NodeId MId,
226
1.07k
                               const DisjointAllowedRegsCache &D) const {
227
1.07k
    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
228
1.07k
    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
229
1.07k
230
1.07k
    if (NRegs == MRegs)
231
490
      return false;
232
584
233
584
    
if (584
NRegs < MRegs584
)
234
389
      return D.count(IKey(NRegs, MRegs)) > 0;
235
195
236
195
    return D.count(IKey(MRegs, NRegs)) > 0;
237
195
  }
238
239
  void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
240
                              PBQPRAGraph::NodeId MId,
241
14
                              DisjointAllowedRegsCache &D) {
242
14
    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
243
14
    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
244
14
245
14
    assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
246
14
247
14
    if (NRegs < MRegs)
248
9
      D.insert(IKey(NRegs, MRegs));
249
14
    else
250
5
      D.insert(IKey(MRegs, NRegs));
251
14
  }
252
253
  // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
254
  // for the fast interference graph construction algorithm. The last is there
255
  // to save us from looking up node ids via the VRegToNode map in the graph
256
  // metadata.
257
  using IntervalInfo =
258
      std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
259
260
2.81k
  static SlotIndex getStartPoint(const IntervalInfo &I) {
261
2.81k
    return std::get<0>(I)->segments[std::get<1>(I)].start;
262
2.81k
  }
263
264
1.59k
  static SlotIndex getEndPoint(const IntervalInfo &I) {
265
1.59k
    return std::get<0>(I)->segments[std::get<1>(I)].end;
266
1.59k
  }
267
268
1.24k
  static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
269
1.24k
    return std::get<2>(I);
270
1.24k
  }
271
272
  static bool lowestStartPoint(const IntervalInfo &I1,
273
1.26k
                               const IntervalInfo &I2) {
274
1.26k
    // Condition reversed because priority queue has the *highest* element at
275
1.26k
    // the front, rather than the lowest.
276
1.26k
    return getStartPoint(I1) > getStartPoint(I2);
277
1.26k
  }
278
279
  static bool lowestEndPoint(const IntervalInfo &I1,
280
653
                             const IntervalInfo &I2) {
281
653
    SlotIndex E1 = getEndPoint(I1);
282
653
    SlotIndex E2 = getEndPoint(I2);
283
653
284
653
    if (E1 < E2)
285
393
      return true;
286
260
287
260
    
if (260
E1 > E2260
)
288
120
      return false;
289
140
290
140
    // If two intervals end at the same point, we need a way to break the tie or
291
140
    // the set will assume they're actually equal and refuse to insert a
292
140
    // "duplicate". Just compare the vregs - fast and guaranteed unique.
293
140
    return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
294
140
  }
295
296
144
  static bool isAtLastSegment(const IntervalInfo &I) {
297
144
    return std::get<1>(I) == std::get<0>(I)->size() - 1;
298
144
  }
299
300
14
  static IntervalInfo nextSegment(const IntervalInfo &I) {
301
14
    return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
302
14
  }
303
304
public:
305
8
  void apply(PBQPRAGraph &G) override {
306
8
    // The following is loosely based on the linear scan algorithm introduced in
307
8
    // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
308
8
    // isn't linear, because the size of the active set isn't bound by the
309
8
    // number of registers, but rather the size of the largest clique in the
310
8
    // graph. Still, we expect this to be better than N^2.
311
8
    LiveIntervals &LIS = G.getMetadata().LIS;
312
8
313
8
    // Interferenc matrices are incredibly regular - they're only a function of
314
8
    // the allowed sets, so we cache them to avoid the overhead of constructing
315
8
    // and uniquing them.
316
8
    IMatrixCache C;
317
8
318
8
    // Finding an edge is expensive in the worst case (O(max_clique(G))). So
319
8
    // cache locally edges we have already seen.
320
8
    IEdgeCache EC;
321
8
322
8
    // Cache known disjoint allowed registers pairs
323
8
    DisjointAllowedRegsCache D;
324
8
325
8
    using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
326
8
    using IntervalQueue =
327
8
        std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
328
8
                            decltype(&lowestStartPoint)>;
329
8
    IntervalSet Active(lowestEndPoint);
330
8
    IntervalQueue Inactive(lowestStartPoint);
331
8
332
8
    // Start by building the inactive set.
333
155
    for (auto NId : G.nodeIds()) {
334
155
      unsigned VReg = G.getNodeMetadata(NId).getVReg();
335
155
      LiveInterval &LI = LIS.getInterval(VReg);
336
155
      assert(!LI.empty() && "PBQP graph contains node for empty interval");
337
155
      Inactive.push(std::make_tuple(&LI, 0, NId));
338
155
    }
339
8
340
177
    while (
!Inactive.empty()177
) {
341
169
      // Tentatively grab the "next" interval - this choice may be overriden
342
169
      // below.
343
169
      IntervalInfo Cur = Inactive.top();
344
169
345
169
      // Retire any active intervals that end before Cur starts.
346
169
      IntervalSet::iterator RetireItr = Active.begin();
347
313
      while (RetireItr != Active.end() &&
348
293
             
(getEndPoint(*RetireItr) <= getStartPoint(Cur))293
) {
349
144
        // If this interval has subsequent segments, add the next one to the
350
144
        // inactive list.
351
144
        if (!isAtLastSegment(*RetireItr))
352
14
          Inactive.push(nextSegment(*RetireItr));
353
144
354
144
        ++RetireItr;
355
144
      }
356
169
      Active.erase(Active.begin(), RetireItr);
357
169
358
169
      // One of the newly retired segments may actually start before the
359
169
      // Cur segment, so re-grab the front of the inactive list.
360
169
      Cur = Inactive.top();
361
169
      Inactive.pop();
362
169
363
169
      // At this point we know that Cur overlaps all active intervals. Add the
364
169
      // interference edges.
365
169
      PBQP::GraphBase::NodeId NId = getNodeId(Cur);
366
1.07k
      for (const auto &A : Active) {
367
1.07k
        PBQP::GraphBase::NodeId MId = getNodeId(A);
368
1.07k
369
1.07k
        // Do not add an edge when the nodes' allowed registers do not
370
1.07k
        // intersect: there is obviously no interference.
371
1.07k
        if (haveDisjointAllowedRegs(G, NId, MId, D))
372
476
          continue;
373
598
374
598
        // Check that we haven't already added this edge
375
598
        IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
376
598
        if (EC.count(EK))
377
48
          continue;
378
550
379
550
        // This is a new edge - add it to the graph.
380
550
        
if (550
!createInterferenceEdge(G, NId, MId, C)550
)
381
14
          setDisjointAllowedRegs(G, NId, MId, D);
382
550
        else
383
536
          EC.insert(EK);
384
1.07k
      }
385
169
386
169
      // Finally, add Cur to the Active set.
387
169
      Active.insert(Cur);
388
169
    }
389
8
  }
390
391
private:
392
  // Create an Interference edge and add it to the graph, unless it is
393
  // a null matrix, meaning the nodes' allowed registers do not have any
394
  // interference. This case occurs frequently between integer and floating
395
  // point registers for example.
396
  // return true iff both nodes interferes.
397
  bool createInterferenceEdge(PBQPRAGraph &G,
398
                              PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
399
550
                              IMatrixCache &C) {
400
550
    const TargetRegisterInfo &TRI =
401
550
        *G.getMetadata().MF.getSubtarget().getRegisterInfo();
402
550
    const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
403
550
    const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
404
550
405
550
    // Try looking the edge costs up in the IMatrixCache first.
406
550
    IKey K(&NRegs, &MRegs);
407
550
    IMatrixCache::iterator I = C.find(K);
408
550
    if (
I != C.end()550
) {
409
499
      G.addEdgeBypassingCostAllocator(NId, MId, I->second);
410
499
      return true;
411
499
    }
412
51
413
51
    PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
414
51
    bool NodesInterfere = false;
415
1.54k
    for (unsigned I = 0; 
I != NRegs.size()1.54k
;
++I1.49k
) {
416
1.49k
      unsigned PRegN = NRegs[I];
417
41.8k
      for (unsigned J = 0; 
J != MRegs.size()41.8k
;
++J40.3k
) {
418
40.3k
        unsigned PRegM = MRegs[J];
419
40.3k
        if (
TRI.regsOverlap(PRegN, PRegM)40.3k
) {
420
946
          M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
421
946
          NodesInterfere = true;
422
946
        }
423
40.3k
      }
424
1.49k
    }
425
51
426
51
    if (!NodesInterfere)
427
14
      return false;
428
37
429
37
    PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
430
37
    C[K] = G.getEdgeCostsPtr(EId);
431
37
432
37
    return true;
433
37
  }
434
};
435
436
class Coalescing : public PBQPRAConstraint {
437
public:
438
5
  void apply(PBQPRAGraph &G) override {
439
5
    MachineFunction &MF = G.getMetadata().MF;
440
5
    MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
441
5
    CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
442
5
443
5
    // Scan the machine function and add a coalescing cost whenever CoalescerPair
444
5
    // gives the Ok.
445
8
    for (const auto &MBB : MF) {
446
165
      for (const auto &MI : MBB) {
447
165
        // Skip not-coalescable or already coalesced copies.
448
165
        if (
!CP.setRegisters(&MI) || 165
CP.getSrcReg() == CP.getDstReg()26
)
449
139
          continue;
450
26
451
26
        unsigned DstReg = CP.getDstReg();
452
26
        unsigned SrcReg = CP.getSrcReg();
453
26
454
26
        const float Scale = 1.0f / MBFI.getEntryFreq();
455
26
        PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
456
26
457
26
        if (
CP.isPhys()26
) {
458
22
          if (!MF.getRegInfo().isAllocatable(DstReg))
459
0
            continue;
460
22
461
22
          PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
462
22
463
22
          const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
464
22
            G.getNodeMetadata(NId).getAllowedRegs();
465
22
466
22
          unsigned PRegOpt = 0;
467
424
          while (
PRegOpt < Allowed.size() && 424
Allowed[PRegOpt] != DstReg423
)
468
402
            ++PRegOpt;
469
22
470
22
          if (
PRegOpt < Allowed.size()22
) {
471
21
            PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
472
21
            NewCosts[PRegOpt + 1] -= CBenefit;
473
21
            G.setNodeCosts(NId, std::move(NewCosts));
474
21
          }
475
26
        } else {
476
4
          PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
477
4
          PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
478
4
          const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
479
4
            &G.getNodeMetadata(N1Id).getAllowedRegs();
480
4
          const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
481
4
            &G.getNodeMetadata(N2Id).getAllowedRegs();
482
4
483
4
          PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
484
4
          if (
EId == G.invalidEdgeId()4
) {
485
0
            PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
486
0
                                         Allowed2->size() + 1, 0);
487
0
            addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
488
0
            G.addEdge(N1Id, N2Id, std::move(Costs));
489
4
          } else {
490
4
            if (
G.getEdgeNode1Id(EId) == N2Id4
) {
491
0
              std::swap(N1Id, N2Id);
492
0
              std::swap(Allowed1, Allowed2);
493
0
            }
494
4
            PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
495
4
            addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
496
4
            G.updateEdgeCosts(EId, std::move(Costs));
497
4
          }
498
4
        }
499
165
      }
500
8
    }
501
5
  }
502
503
private:
504
  void addVirtRegCoalesce(
505
                    PBQPRAGraph::RawMatrix &CostMat,
506
                    const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
507
                    const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
508
4
                    PBQP::PBQPNum Benefit) {
509
4
    assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
510
4
    assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
511
131
    for (unsigned I = 0; 
I != Allowed1.size()131
;
++I127
) {
512
127
      unsigned PReg1 = Allowed1[I];
513
4.16k
      for (unsigned J = 0; 
J != Allowed2.size()4.16k
;
++J4.03k
) {
514
4.03k
        unsigned PReg2 = Allowed2[J];
515
4.03k
        if (PReg1 == PReg2)
516
127
          CostMat[I + 1][J + 1] -= Benefit;
517
4.03k
      }
518
127
    }
519
4
  }
520
};
521
522
} // end anonymous namespace
523
524
// Out-of-line destructor/anchor for PBQPRAConstraint.
525
31
PBQPRAConstraint::~PBQPRAConstraint() = default;
526
527
0
void PBQPRAConstraint::anchor() {}
528
529
0
void PBQPRAConstraintList::anchor() {}
530
531
7
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
532
7
  au.setPreservesCFG();
533
7
  au.addRequired<AAResultsWrapperPass>();
534
7
  au.addPreserved<AAResultsWrapperPass>();
535
7
  au.addRequired<SlotIndexes>();
536
7
  au.addPreserved<SlotIndexes>();
537
7
  au.addRequired<LiveIntervals>();
538
7
  au.addPreserved<LiveIntervals>();
539
7
  //au.addRequiredID(SplitCriticalEdgesID);
540
7
  if (customPassID)
541
0
    au.addRequiredID(*customPassID);
542
7
  au.addRequired<LiveStacks>();
543
7
  au.addPreserved<LiveStacks>();
544
7
  au.addRequired<MachineBlockFrequencyInfo>();
545
7
  au.addPreserved<MachineBlockFrequencyInfo>();
546
7
  au.addRequired<MachineLoopInfo>();
547
7
  au.addPreserved<MachineLoopInfo>();
548
7
  au.addRequired<MachineDominatorTree>();
549
7
  au.addPreserved<MachineDominatorTree>();
550
7
  au.addRequired<VirtRegMap>();
551
7
  au.addPreserved<VirtRegMap>();
552
7
  MachineFunctionPass::getAnalysisUsage(au);
553
7
}
554
555
void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
556
7
                                            LiveIntervals &LIS) {
557
7
  const MachineRegisterInfo &MRI = MF.getRegInfo();
558
7
559
7
  // Iterate over all live ranges.
560
195
  for (unsigned I = 0, E = MRI.getNumVirtRegs(); 
I != E195
;
++I188
) {
561
188
    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
562
188
    if (MRI.reg_nodbg_empty(Reg))
563
43
      continue;
564
145
    LiveInterval &LI = LIS.getInterval(Reg);
565
145
566
145
    // If this live interval is non-empty we will use pbqp to allocate it.
567
145
    // Empty intervals we allocate in a simple post-processing stage in
568
145
    // finalizeAlloc.
569
145
    if (
!LI.empty()145
) {
570
143
      VRegsToAlloc.insert(LI.reg);
571
145
    } else {
572
2
      EmptyIntervalVRegs.insert(LI.reg);
573
2
    }
574
188
  }
575
7
}
576
577
static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
578
4.15k
                                   const MachineFunction &MF) {
579
4.15k
  const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
580
74.1k
  for (unsigned i = 0; 
CSR[i] != 074.1k
;
++i70.0k
)
581
71.3k
    
if (71.3k
TRI.regsOverlap(reg, CSR[i])71.3k
)
582
1.29k
      return true;
583
2.86k
  return false;
584
4.15k
}
585
586
void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
587
8
                                   Spiller &VRegSpiller) {
588
8
  MachineFunction &MF = G.getMetadata().MF;
589
8
590
8
  LiveIntervals &LIS = G.getMetadata().LIS;
591
8
  const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
592
8
  const TargetRegisterInfo &TRI =
593
8
      *G.getMetadata().MF.getSubtarget().getRegisterInfo();
594
8
595
8
  std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
596
8
597
163
  while (
!Worklist.empty()163
) {
598
155
    unsigned VReg = Worklist.back();
599
155
    Worklist.pop_back();
600
155
601
155
    const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
602
155
    LiveInterval &VRegLI = LIS.getInterval(VReg);
603
155
604
155
    // Record any overlaps with regmask operands.
605
155
    BitVector RegMaskOverlaps;
606
155
    LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
607
155
608
155
    // Compute an initial allowed set for the current vreg.
609
155
    std::vector<unsigned> VRegAllowed;
610
155
    ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
611
4.62k
    for (unsigned I = 0; 
I != RawPRegOrder.size()4.62k
;
++I4.46k
) {
612
4.46k
      unsigned PReg = RawPRegOrder[I];
613
4.46k
      if (MRI.isReserved(PReg))
614
69
        continue;
615
4.39k
616
4.39k
      // vregLI crosses a regmask operand that clobbers preg.
617
4.39k
      
if (4.39k
!RegMaskOverlaps.empty() && 4.39k
!RegMaskOverlaps.test(PReg)361
)
618
217
        continue;
619
4.18k
620
4.18k
      // vregLI overlaps fixed regunit interference.
621
4.18k
      bool Interference = false;
622
8.39k
      for (MCRegUnitIterator Units(PReg, &TRI); 
Units.isValid()8.39k
;
++Units4.21k
) {
623
4.23k
        if (
VRegLI.overlaps(LIS.getRegUnit(*Units))4.23k
) {
624
24
          Interference = true;
625
24
          break;
626
24
        }
627
4.23k
      }
628
4.18k
      if (Interference)
629
24
        continue;
630
4.15k
631
4.15k
      // preg is usable for this virtual register.
632
4.15k
      VRegAllowed.push_back(PReg);
633
4.15k
    }
634
155
635
155
    // Check for vregs that have no allowed registers. These should be
636
155
    // pre-spilled and the new vregs added to the worklist.
637
155
    if (
VRegAllowed.empty()155
) {
638
0
      SmallVector<unsigned, 8> NewVRegs;
639
0
      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
640
0
      Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
641
0
      continue;
642
0
    }
643
155
644
155
    PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
645
155
646
155
    // Tweak cost of callee saved registers, as using then force spilling and
647
155
    // restoring them. This would only happen in the prologue / epilogue though.
648
4.31k
    for (unsigned i = 0; 
i != VRegAllowed.size()4.31k
;
++i4.15k
)
649
4.15k
      
if (4.15k
isACalleeSavedRegister(VRegAllowed[i], TRI, MF)4.15k
)
650
1.29k
        NodeCosts[1 + i] += 1.0;
651
155
652
155
    PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
653
155
    G.getNodeMetadata(NId).setVReg(VReg);
654
155
    G.getNodeMetadata(NId).setAllowedRegs(
655
155
      G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
656
155
    G.getMetadata().setNodeIdForVReg(VReg, NId);
657
155
  }
658
8
}
659
660
void RegAllocPBQP::spillVReg(unsigned VReg,
661
                             SmallVectorImpl<unsigned> &NewIntervals,
662
                             MachineFunction &MF, LiveIntervals &LIS,
663
10
                             VirtRegMap &VRM, Spiller &VRegSpiller) {
664
10
  VRegsToAlloc.erase(VReg);
665
10
  LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
666
10
                    nullptr, &DeadRemats);
667
10
  VRegSpiller.spill(LRE);
668
10
669
10
  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
670
10
  (void)TRI;
671
10
  DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
672
10
               << LRE.getParent().weight << ", New vregs: ");
673
10
674
10
  // Copy any newly inserted live intervals into the list of regs to
675
10
  // allocate.
676
10
  for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
677
14
       
I != E14
;
++I4
) {
678
4
    const LiveInterval &LI = LIS.getInterval(*I);
679
4
    assert(!LI.empty() && "Empty spill range.");
680
4
    DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
681
4
    VRegsToAlloc.insert(LI.reg);
682
4
  }
683
10
684
10
  DEBUG(dbgs() << ")\n");
685
10
}
686
687
bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
688
                                     const PBQP::Solution &Solution,
689
                                     VirtRegMap &VRM,
690
8
                                     Spiller &VRegSpiller) {
691
8
  MachineFunction &MF = G.getMetadata().MF;
692
8
  LiveIntervals &LIS = G.getMetadata().LIS;
693
8
  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
694
8
  (void)TRI;
695
8
696
8
  // Set to true if we have any spills
697
8
  bool AnotherRoundNeeded = false;
698
8
699
8
  // Clear the existing allocation.
700
8
  VRM.clearAllVirt();
701
8
702
8
  // Iterate over the nodes mapping the PBQP solution to a register
703
8
  // assignment.
704
155
  for (auto NId : G.nodeIds()) {
705
155
    unsigned VReg = G.getNodeMetadata(NId).getVReg();
706
155
    unsigned AllocOption = Solution.getSelection(NId);
707
155
708
155
    if (
AllocOption != PBQP::RegAlloc::getSpillOptionIdx()155
) {
709
145
      unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
710
145
      DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
711
145
            << TRI.getName(PReg) << "\n");
712
145
      assert(PReg != 0 && "Invalid preg selected.");
713
145
      VRM.assignVirt2Phys(VReg, PReg);
714
155
    } else {
715
10
      // Spill VReg. If this introduces new intervals we'll need another round
716
10
      // of allocation.
717
10
      SmallVector<unsigned, 8> NewVRegs;
718
10
      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
719
10
      AnotherRoundNeeded |= !NewVRegs.empty();
720
10
    }
721
155
  }
722
8
723
8
  return !AnotherRoundNeeded;
724
8
}
725
726
void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
727
                                 LiveIntervals &LIS,
728
7
                                 VirtRegMap &VRM) const {
729
7
  MachineRegisterInfo &MRI = MF.getRegInfo();
730
7
731
7
  // First allocate registers for the empty intervals.
732
7
  for (RegSet::const_iterator
733
7
         I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
734
9
         
I != E9
;
++I2
) {
735
2
    LiveInterval &LI = LIS.getInterval(*I);
736
2
737
2
    unsigned PReg = MRI.getSimpleHint(LI.reg);
738
2
739
2
    if (
PReg == 02
) {
740
2
      const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
741
2
      const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
742
4
      for (unsigned CandidateReg : RawPRegOrder) {
743
4
        if (
!VRM.getRegInfo().isReserved(CandidateReg)4
) {
744
2
          PReg = CandidateReg;
745
2
          break;
746
2
        }
747
2
      }
748
2
      assert(PReg &&
749
2
             "No un-reserved physical registers in this register class");
750
2
    }
751
2
752
2
    VRM.assignVirt2Phys(LI.reg, PReg);
753
2
  }
754
7
}
755
756
7
void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
757
7
  VRegSpiller.postOptimization();
758
7
  /// Remove dead defs because of rematerialization.
759
0
  for (auto DeadInst : DeadRemats) {
760
0
    LIS.RemoveMachineInstrFromMaps(*DeadInst);
761
0
    DeadInst->eraseFromParent();
762
0
  }
763
7
  DeadRemats.clear();
764
7
}
765
766
static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
767
134
                                         unsigned NumInstr) {
768
134
  // All intervals have a spill weight that is mostly proportional to the number
769
134
  // of uses, with uses in loops having a bigger weight.
770
134
  return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
771
134
}
772
773
7
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
774
7
  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
775
7
  MachineBlockFrequencyInfo &MBFI =
776
7
    getAnalysis<MachineBlockFrequencyInfo>();
777
7
778
7
  VirtRegMap &VRM = getAnalysis<VirtRegMap>();
779
7
780
7
  calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
781
7
                                MBFI, normalizePBQPSpillWeight);
782
7
783
7
  std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
784
7
785
7
  MF.getRegInfo().freezeReservedRegs(MF);
786
7
787
7
  DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
788
7
789
7
  // Allocator main loop:
790
7
  //
791
7
  // * Map current regalloc problem to a PBQP problem
792
7
  // * Solve the PBQP problem
793
7
  // * Map the solution back to a register allocation
794
7
  // * Spill if necessary
795
7
  //
796
7
  // This process is continued till no more spills are generated.
797
7
798
7
  // Find the vreg intervals in need of allocation.
799
7
  findVRegIntervalsToAlloc(MF, LIS);
800
7
801
#ifndef NDEBUG
802
  const Function &F = *MF.getFunction();
803
  std::string FullyQualifiedName =
804
    F.getParent()->getModuleIdentifier() + "." + F.getName().str();
805
#endif
806
807
7
  // If there are non-empty intervals allocate them using pbqp.
808
7
  if (
!VRegsToAlloc.empty()7
) {
809
7
    const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
810
7
    std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
811
7
      llvm::make_unique<PBQPRAConstraintList>();
812
7
    ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
813
7
    ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
814
7
    if (PBQPCoalescing)
815
5
      ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
816
7
    ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
817
7
818
7
    bool PBQPAllocComplete = false;
819
7
    unsigned Round = 0;
820
7
821
15
    while (
!PBQPAllocComplete15
) {
822
8
      DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");
823
8
824
8
      PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
825
8
      initializeGraph(G, VRM, *VRegSpiller);
826
8
      ConstraintsRoot->apply(G);
827
8
828
#ifndef NDEBUG
829
      if (PBQPDumpGraphs) {
830
        std::ostringstream RS;
831
        RS << Round;
832
        std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
833
                                    ".pbqpgraph";
834
        std::error_code EC;
835
        raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
836
        DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
837
              << GraphFileName << "\"\n");
838
        G.dump(OS);
839
      }
840
#endif
841
842
8
      PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
843
8
      PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
844
8
      ++Round;
845
8
    }
846
7
  }
847
7
848
7
  // Finalise allocation, allocate empty ranges.
849
7
  finalizeAlloc(MF, LIS, VRM);
850
7
  postOptimization(*VRegSpiller, LIS);
851
7
  VRegsToAlloc.clear();
852
7
  EmptyIntervalVRegs.clear();
853
7
854
7
  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
855
7
856
7
  return true;
857
7
}
858
859
/// Create Printable object for node and register info.
860
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
861
0
                               const PBQP::RegAlloc::PBQPRAGraph &G) {
862
0
  return Printable([NId, &G](raw_ostream &OS) {
863
0
    const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
864
0
    const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
865
0
    unsigned VReg = G.getNodeMetadata(NId).getVReg();
866
0
    const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
867
0
    OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
868
0
  });
869
0
}
870
871
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
872
LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
873
  for (auto NId : nodeIds()) {
874
    const Vector &Costs = getNodeCosts(NId);
875
    assert(Costs.getLength() != 0 && "Empty vector in graph.");
876
    OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
877
  }
878
  OS << '\n';
879
880
  for (auto EId : edgeIds()) {
881
    NodeId N1Id = getEdgeNode1Id(EId);
882
    NodeId N2Id = getEdgeNode2Id(EId);
883
    assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
884
    const Matrix &M = getEdgeCosts(EId);
885
    assert(M.getRows() != 0 && "No rows in matrix.");
886
    assert(M.getCols() != 0 && "No cols in matrix.");
887
    OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
888
    OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
889
    OS << M << '\n';
890
  }
891
}
892
893
LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const {
894
  dump(dbgs());
895
}
896
#endif
897
898
0
void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
899
0
  OS << "graph {\n";
900
0
  for (auto NId : nodeIds()) {
901
0
    OS << "  node" << NId << " [ label=\""
902
0
       << PrintNodeInfo(NId, *this) << "\\n"
903
0
       << getNodeCosts(NId) << "\" ]\n";
904
0
  }
905
0
906
0
  OS << "  edge [ len=" << nodeIds().size() << " ]\n";
907
0
  for (auto EId : edgeIds()) {
908
0
    OS << "  node" << getEdgeNode1Id(EId)
909
0
       << " -- node" << getEdgeNode2Id(EId)
910
0
       << " [ label=\"";
911
0
    const Matrix &EdgeCosts = getEdgeCosts(EId);
912
0
    for (unsigned i = 0; 
i < EdgeCosts.getRows()0
;
++i0
) {
913
0
      OS << EdgeCosts.getRowAsVector(i) << "\\n";
914
0
    }
915
0
    OS << "\" ]\n";
916
0
  }
917
0
  OS << "}\n";
918
0
}
919
920
7
FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
921
7
  return new RegAllocPBQP(customPassID);
922
7
}
923
924
7
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
925
7
  return createPBQPRegisterAllocator();
926
7
}