Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/RegAllocPBQP.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10
// register allocator for LLVM. This allocator works by constructing a PBQP
11
// problem representing the register allocation problem under consideration,
12
// solving this using a PBQP solver, and mapping the solution back to a
13
// register assignment. If any variables are selected for spilling then spill
14
// code is inserted and the process repeated.
15
//
16
// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17
// for register allocation. For more information on PBQP for register
18
// allocation, see the following papers:
19
//
20
//   (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21
//   PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22
//   (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
23
//
24
//   (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25
//   architectures. In Proceedings of the Joint Conference on Languages,
26
//   Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
27
//   NY, USA, 139-148.
28
//
29
//===----------------------------------------------------------------------===//
30
31
#include "llvm/CodeGen/RegAllocPBQP.h"
32
#include "RegisterCoalescer.h"
33
#include "Spiller.h"
34
#include "llvm/ADT/ArrayRef.h"
35
#include "llvm/ADT/BitVector.h"
36
#include "llvm/ADT/DenseMap.h"
37
#include "llvm/ADT/DenseSet.h"
38
#include "llvm/ADT/STLExtras.h"
39
#include "llvm/ADT/SmallPtrSet.h"
40
#include "llvm/ADT/SmallVector.h"
41
#include "llvm/ADT/StringRef.h"
42
#include "llvm/Analysis/AliasAnalysis.h"
43
#include "llvm/CodeGen/CalcSpillWeights.h"
44
#include "llvm/CodeGen/LiveInterval.h"
45
#include "llvm/CodeGen/LiveIntervals.h"
46
#include "llvm/CodeGen/LiveRangeEdit.h"
47
#include "llvm/CodeGen/LiveStacks.h"
48
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
49
#include "llvm/CodeGen/MachineDominators.h"
50
#include "llvm/CodeGen/MachineFunction.h"
51
#include "llvm/CodeGen/MachineFunctionPass.h"
52
#include "llvm/CodeGen/MachineInstr.h"
53
#include "llvm/CodeGen/MachineLoopInfo.h"
54
#include "llvm/CodeGen/MachineRegisterInfo.h"
55
#include "llvm/CodeGen/PBQP/Graph.h"
56
#include "llvm/CodeGen/PBQP/Math.h"
57
#include "llvm/CodeGen/PBQP/Solution.h"
58
#include "llvm/CodeGen/PBQPRAConstraint.h"
59
#include "llvm/CodeGen/RegAllocRegistry.h"
60
#include "llvm/CodeGen/SlotIndexes.h"
61
#include "llvm/CodeGen/TargetRegisterInfo.h"
62
#include "llvm/CodeGen/TargetSubtargetInfo.h"
63
#include "llvm/CodeGen/VirtRegMap.h"
64
#include "llvm/Config/llvm-config.h"
65
#include "llvm/IR/Function.h"
66
#include "llvm/IR/Module.h"
67
#include "llvm/MC/MCRegisterInfo.h"
68
#include "llvm/Pass.h"
69
#include "llvm/Support/CommandLine.h"
70
#include "llvm/Support/Compiler.h"
71
#include "llvm/Support/Debug.h"
72
#include "llvm/Support/FileSystem.h"
73
#include "llvm/Support/Printable.h"
74
#include "llvm/Support/raw_ostream.h"
75
#include <algorithm>
76
#include <cassert>
77
#include <cstddef>
78
#include <limits>
79
#include <map>
80
#include <memory>
81
#include <queue>
82
#include <set>
83
#include <sstream>
84
#include <string>
85
#include <system_error>
86
#include <tuple>
87
#include <utility>
88
#include <vector>
89
90
using namespace llvm;
91
92
#define DEBUG_TYPE "regalloc"
93
94
static RegisterRegAlloc
95
RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96
                       createDefaultPBQPRegisterAllocator);
97
98
static cl::opt<bool>
99
PBQPCoalescing("pbqp-coalescing",
100
                cl::desc("Attempt coalescing during PBQP register allocation."),
101
                cl::init(false), cl::Hidden);
102
103
#ifndef NDEBUG
104
static cl::opt<bool>
105
PBQPDumpGraphs("pbqp-dump-graphs",
106
               cl::desc("Dump graphs for each function/round in the compilation unit."),
107
               cl::init(false), cl::Hidden);
108
#endif
109
110
namespace {
111
112
///
113
/// PBQP based allocators solve the register allocation problem by mapping
114
/// register allocation problems to Partitioned Boolean Quadratic
115
/// Programming problems.
116
class RegAllocPBQP : public MachineFunctionPass {
117
public:
118
  static char ID;
119
120
  /// Construct a PBQP register allocator.
121
  RegAllocPBQP(char *cPassID = nullptr)
122
7
      : MachineFunctionPass(ID), customPassID(cPassID) {
123
7
    initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
124
7
    initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
125
7
    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
126
7
    initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
127
7
  }
128
129
  /// Return the pass name.
130
14
  StringRef getPassName() const override { return "PBQP Register Allocator"; }
131
132
  /// PBQP analysis usage.
133
  void getAnalysisUsage(AnalysisUsage &au) const override;
134
135
  /// Perform register allocation
136
  bool runOnMachineFunction(MachineFunction &MF) override;
137
138
7
  MachineFunctionProperties getRequiredProperties() const override {
139
7
    return MachineFunctionProperties().set(
140
7
        MachineFunctionProperties::Property::NoPHIs);
141
7
  }
142
143
private:
144
  using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
145
  using Node2LIMap = std::vector<const LiveInterval *>;
146
  using AllowedSet = std::vector<unsigned>;
147
  using AllowedSetMap = std::vector<AllowedSet>;
148
  using RegPair = std::pair<unsigned, unsigned>;
149
  using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
150
  using RegSet = std::set<unsigned>;
151
152
  char *customPassID;
153
154
  RegSet VRegsToAlloc, EmptyIntervalVRegs;
155
156
  /// Inst which is a def of an original reg and whose defs are already all
157
  /// dead after remat is saved in DeadRemats. The deletion of such inst is
158
  /// postponed till all the allocations are done, so its remat expr is
159
  /// always available for the remat of all the siblings of the original reg.
160
  SmallPtrSet<MachineInstr *, 32> DeadRemats;
161
162
  /// Finds the initial set of vreg intervals to allocate.
163
  void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
164
165
  /// Constructs an initial graph.
166
  void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
167
168
  /// Spill the given VReg.
169
  void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
170
                 MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
171
                 Spiller &VRegSpiller);
172
173
  /// Given a solved PBQP problem maps this solution back to a register
174
  /// assignment.
175
  bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
176
                         const PBQP::Solution &Solution,
177
                         VirtRegMap &VRM,
178
                         Spiller &VRegSpiller);
179
180
  /// Postprocessing before final spilling. Sets basic block "live in"
181
  /// variables.
182
  void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
183
                     VirtRegMap &VRM) const;
184
185
  void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
186
};
187
188
char RegAllocPBQP::ID = 0;
189
190
/// Set spill costs for each node in the PBQP reg-alloc graph.
191
class SpillCosts : public PBQPRAConstraint {
192
public:
193
8
  void apply(PBQPRAGraph &G) override {
194
8
    LiveIntervals &LIS = G.getMetadata().LIS;
195
8
196
8
    // A minimum spill costs, so that register constraints can can be set
197
8
    // without normalization in the [0.0:MinSpillCost( interval.
198
8
    const PBQP::PBQPNum MinSpillCost = 10.0;
199
8
200
153
    for (auto NId : G.nodeIds()) {
201
153
      PBQP::PBQPNum SpillCost =
202
153
        LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
203
153
      if (SpillCost == 0.0)
204
0
        SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
205
153
      else
206
153
        SpillCost += MinSpillCost;
207
153
      PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
208
153
      NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
209
153
      G.setNodeCosts(NId, std::move(NodeCosts));
210
153
    }
211
8
  }
212
};
213
214
/// Add interference edges between overlapping vregs.
215
class Interference : public PBQPRAConstraint {
216
private:
217
  using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
218
  using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
219
  using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
220
  using DisjointAllowedRegsCache = DenseSet<IKey>;
221
  using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
222
  using IEdgeCache = DenseSet<IEdgeKey>;
223
224
  bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
225
                               PBQPRAGraph::NodeId MId,
226
1.06k
                               const DisjointAllowedRegsCache &D) const {
227
1.06k
    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
228
1.06k
    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
229
1.06k
230
1.06k
    if (NRegs == MRegs)
231
497
      return false;
232
571
233
571
    if (NRegs < MRegs)
234
58
      return D.count(IKey(NRegs, MRegs)) > 0;
235
513
236
513
    return D.count(IKey(MRegs, NRegs)) > 0;
237
513
  }
238
239
  void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
240
                              PBQPRAGraph::NodeId MId,
241
12
                              DisjointAllowedRegsCache &D) {
242
12
    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
243
12
    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
244
12
245
12
    assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
246
12
247
12
    if (NRegs < MRegs)
248
0
      D.insert(IKey(NRegs, MRegs));
249
12
    else
250
12
      D.insert(IKey(MRegs, NRegs));
251
12
  }
252
253
  // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
254
  // for the fast interference graph construction algorithm. The last is there
255
  // to save us from looking up node ids via the VRegToNode map in the graph
256
  // metadata.
257
  using IntervalInfo =
258
      std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
259
260
2.33k
  static SlotIndex getStartPoint(const IntervalInfo &I) {
261
2.33k
    return std::get<0>(I)->segments[std::get<1>(I)].start;
262
2.33k
  }
263
264
1.57k
  static SlotIndex getEndPoint(const IntervalInfo &I) {
265
1.57k
    return std::get<0>(I)->segments[std::get<1>(I)].end;
266
1.57k
  }
267
268
1.23k
  static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
269
1.23k
    return std::get<2>(I);
270
1.23k
  }
271
272
  static bool lowestStartPoint(const IntervalInfo &I1,
273
1.02k
                               const IntervalInfo &I2) {
274
1.02k
    // Condition reversed because priority queue has the *highest* element at
275
1.02k
    // the front, rather than the lowest.
276
1.02k
    return getStartPoint(I1) > getStartPoint(I2);
277
1.02k
  }
278
279
  static bool lowestEndPoint(const IntervalInfo &I1,
280
640
                             const IntervalInfo &I2) {
281
640
    SlotIndex E1 = getEndPoint(I1);
282
640
    SlotIndex E2 = getEndPoint(I2);
283
640
284
640
    if (E1 < E2)
285
385
      return true;
286
255
287
255
    if (E1 > E2)
288
116
      return false;
289
139
290
139
    // If two intervals end at the same point, we need a way to break the tie or
291
139
    // the set will assume they're actually equal and refuse to insert a
292
139
    // "duplicate". Just compare the vregs - fast and guaranteed unique.
293
139
    return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
294
139
  }
295
296
148
  static bool isAtLastSegment(const IntervalInfo &I) {
297
148
    return std::get<1>(I) == std::get<0>(I)->size() - 1;
298
148
  }
299
300
15
  static IntervalInfo nextSegment(const IntervalInfo &I) {
301
15
    return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
302
15
  }
303
304
public:
305
8
  void apply(PBQPRAGraph &G) override {
306
8
    // The following is loosely based on the linear scan algorithm introduced in
307
8
    // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
308
8
    // isn't linear, because the size of the active set isn't bound by the
309
8
    // number of registers, but rather the size of the largest clique in the
310
8
    // graph. Still, we expect this to be better than N^2.
311
8
    LiveIntervals &LIS = G.getMetadata().LIS;
312
8
313
8
    // Interferenc matrices are incredibly regular - they're only a function of
314
8
    // the allowed sets, so we cache them to avoid the overhead of constructing
315
8
    // and uniquing them.
316
8
    IMatrixCache C;
317
8
318
8
    // Finding an edge is expensive in the worst case (O(max_clique(G))). So
319
8
    // cache locally edges we have already seen.
320
8
    IEdgeCache EC;
321
8
322
8
    // Cache known disjoint allowed registers pairs
323
8
    DisjointAllowedRegsCache D;
324
8
325
8
    using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
326
8
    using IntervalQueue =
327
8
        std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
328
8
                            decltype(&lowestStartPoint)>;
329
8
    IntervalSet Active(lowestEndPoint);
330
8
    IntervalQueue Inactive(lowestStartPoint);
331
8
332
8
    // Start by building the inactive set.
333
153
    for (auto NId : G.nodeIds()) {
334
153
      unsigned VReg = G.getNodeMetadata(NId).getVReg();
335
153
      LiveInterval &LI = LIS.getInterval(VReg);
336
153
      assert(!LI.empty() && "PBQP graph contains node for empty interval");
337
153
      Inactive.push(std::make_tuple(&LI, 0, NId));
338
153
    }
339
8
340
176
    while (!Inactive.empty()) {
341
168
      // Tentatively grab the "next" interval - this choice may be overriden
342
168
      // below.
343
168
      IntervalInfo Cur = Inactive.top();
344
168
345
168
      // Retire any active intervals that end before Cur starts.
346
168
      IntervalSet::iterator RetireItr = Active.begin();
347
316
      while (RetireItr != Active.end() &&
348
316
             
(getEndPoint(*RetireItr) <= getStartPoint(Cur))297
) {
349
148
        // If this interval has subsequent segments, add the next one to the
350
148
        // inactive list.
351
148
        if (!isAtLastSegment(*RetireItr))
352
15
          Inactive.push(nextSegment(*RetireItr));
353
148
354
148
        ++RetireItr;
355
148
      }
356
168
      Active.erase(Active.begin(), RetireItr);
357
168
358
168
      // One of the newly retired segments may actually start before the
359
168
      // Cur segment, so re-grab the front of the inactive list.
360
168
      Cur = Inactive.top();
361
168
      Inactive.pop();
362
168
363
168
      // At this point we know that Cur overlaps all active intervals. Add the
364
168
      // interference edges.
365
168
      PBQP::GraphBase::NodeId NId = getNodeId(Cur);
366
1.06k
      for (const auto &A : Active) {
367
1.06k
        PBQP::GraphBase::NodeId MId = getNodeId(A);
368
1.06k
369
1.06k
        // Do not add an edge when the nodes' allowed registers do not
370
1.06k
        // intersect: there is obviously no interference.
371
1.06k
        if (haveDisjointAllowedRegs(G, NId, MId, D))
372
478
          continue;
373
590
374
590
        // Check that we haven't already added this edge
375
590
        IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
376
590
        if (EC.count(EK))
377
47
          continue;
378
543
379
543
        // This is a new edge - add it to the graph.
380
543
        if (!createInterferenceEdge(G, NId, MId, C))
381
12
          setDisjointAllowedRegs(G, NId, MId, D);
382
531
        else
383
531
          EC.insert(EK);
384
543
      }
385
168
386
168
      // Finally, add Cur to the Active set.
387
168
      Active.insert(Cur);
388
168
    }
389
8
  }
390
391
private:
392
  // Create an Interference edge and add it to the graph, unless it is
393
  // a null matrix, meaning the nodes' allowed registers do not have any
394
  // interference. This case occurs frequently between integer and floating
395
  // point registers for example.
396
  // return true iff both nodes interferes.
397
  bool createInterferenceEdge(PBQPRAGraph &G,
398
                              PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
399
543
                              IMatrixCache &C) {
400
543
    const TargetRegisterInfo &TRI =
401
543
        *G.getMetadata().MF.getSubtarget().getRegisterInfo();
402
543
    const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
403
543
    const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
404
543
405
543
    // Try looking the edge costs up in the IMatrixCache first.
406
543
    IKey K(&NRegs, &MRegs);
407
543
    IMatrixCache::iterator I = C.find(K);
408
543
    if (I != C.end()) {
409
497
      G.addEdgeBypassingCostAllocator(NId, MId, I->second);
410
497
      return true;
411
497
    }
412
46
413
46
    PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
414
46
    bool NodesInterfere = false;
415
1.38k
    for (unsigned I = 0; I != NRegs.size(); 
++I1.33k
) {
416
1.33k
      unsigned PRegN = NRegs[I];
417
39.3k
      for (unsigned J = 0; J != MRegs.size(); 
++J38.0k
) {
418
38.0k
        unsigned PRegM = MRegs[J];
419
38.0k
        if (TRI.regsOverlap(PRegN, PRegM)) {
420
895
          M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
421
895
          NodesInterfere = true;
422
895
        }
423
38.0k
      }
424
1.33k
    }
425
46
426
46
    if (!NodesInterfere)
427
12
      return false;
428
34
429
34
    PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
430
34
    C[K] = G.getEdgeCostsPtr(EId);
431
34
432
34
    return true;
433
34
  }
434
};
435
436
class Coalescing : public PBQPRAConstraint {
437
public:
438
5
  void apply(PBQPRAGraph &G) override {
439
5
    MachineFunction &MF = G.getMetadata().MF;
440
5
    MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
441
5
    CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
442
5
443
5
    // Scan the machine function and add a coalescing cost whenever CoalescerPair
444
5
    // gives the Ok.
445
8
    for (const auto &MBB : MF) {
446
163
      for (const auto &MI : MBB) {
447
163
        // Skip not-coalescable or already coalesced copies.
448
163
        if (!CP.setRegisters(&MI) || 
CP.getSrcReg() == CP.getDstReg()25
)
449
138
          continue;
450
25
451
25
        unsigned DstReg = CP.getDstReg();
452
25
        unsigned SrcReg = CP.getSrcReg();
453
25
454
25
        const float Scale = 1.0f / MBFI.getEntryFreq();
455
25
        PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
456
25
457
25
        if (CP.isPhys()) {
458
20
          if (!MF.getRegInfo().isAllocatable(DstReg))
459
0
            continue;
460
20
461
20
          PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
462
20
463
20
          const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
464
20
            G.getNodeMetadata(NId).getAllowedRegs();
465
20
466
20
          unsigned PRegOpt = 0;
467
389
          while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
468
369
            ++PRegOpt;
469
20
470
20
          if (PRegOpt < Allowed.size()) {
471
20
            PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
472
20
            NewCosts[PRegOpt + 1] -= CBenefit;
473
20
            G.setNodeCosts(NId, std::move(NewCosts));
474
20
          }
475
20
        } else {
476
5
          PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
477
5
          PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
478
5
          const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
479
5
            &G.getNodeMetadata(N1Id).getAllowedRegs();
480
5
          const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
481
5
            &G.getNodeMetadata(N2Id).getAllowedRegs();
482
5
483
5
          PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
484
5
          if (EId == G.invalidEdgeId()) {
485
0
            PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
486
0
                                         Allowed2->size() + 1, 0);
487
0
            addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
488
0
            G.addEdge(N1Id, N2Id, std::move(Costs));
489
5
          } else {
490
5
            if (G.getEdgeNode1Id(EId) == N2Id) {
491
0
              std::swap(N1Id, N2Id);
492
0
              std::swap(Allowed1, Allowed2);
493
0
            }
494
5
            PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
495
5
            addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
496
5
            G.updateEdgeCosts(EId, std::move(Costs));
497
5
          }
498
5
        }
499
25
      }
500
8
    }
501
5
  }
502
503
private:
504
  void addVirtRegCoalesce(
505
                    PBQPRAGraph::RawMatrix &CostMat,
506
                    const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
507
                    const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
508
5
                    PBQP::PBQPNum Benefit) {
509
5
    assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
510
5
    assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
511
163
    for (unsigned I = 0; I != Allowed1.size(); 
++I158
) {
512
158
      unsigned PReg1 = Allowed1[I];
513
5.15k
      for (unsigned J = 0; J != Allowed2.size(); 
++J4.99k
) {
514
4.99k
        unsigned PReg2 = Allowed2[J];
515
4.99k
        if (PReg1 == PReg2)
516
158
          CostMat[I + 1][J + 1] -= Benefit;
517
4.99k
      }
518
158
    }
519
5
  }
520
};
521
522
} // end anonymous namespace
523
524
// Out-of-line destructor/anchor for PBQPRAConstraint.
525
31
PBQPRAConstraint::~PBQPRAConstraint() = default;
526
527
0
void PBQPRAConstraint::anchor() {}
528
529
0
void PBQPRAConstraintList::anchor() {}
530
531
7
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
532
7
  au.setPreservesCFG();
533
7
  au.addRequired<AAResultsWrapperPass>();
534
7
  au.addPreserved<AAResultsWrapperPass>();
535
7
  au.addRequired<SlotIndexes>();
536
7
  au.addPreserved<SlotIndexes>();
537
7
  au.addRequired<LiveIntervals>();
538
7
  au.addPreserved<LiveIntervals>();
539
7
  //au.addRequiredID(SplitCriticalEdgesID);
540
7
  if (customPassID)
541
0
    au.addRequiredID(*customPassID);
542
7
  au.addRequired<LiveStacks>();
543
7
  au.addPreserved<LiveStacks>();
544
7
  au.addRequired<MachineBlockFrequencyInfo>();
545
7
  au.addPreserved<MachineBlockFrequencyInfo>();
546
7
  au.addRequired<MachineLoopInfo>();
547
7
  au.addPreserved<MachineLoopInfo>();
548
7
  au.addRequired<MachineDominatorTree>();
549
7
  au.addPreserved<MachineDominatorTree>();
550
7
  au.addRequired<VirtRegMap>();
551
7
  au.addPreserved<VirtRegMap>();
552
7
  MachineFunctionPass::getAnalysisUsage(au);
553
7
}
554
555
void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
556
7
                                            LiveIntervals &LIS) {
557
7
  const MachineRegisterInfo &MRI = MF.getRegInfo();
558
7
559
7
  // Iterate over all live ranges.
560
194
  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; 
++I187
) {
561
187
    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
562
187
    if (MRI.reg_nodbg_empty(Reg))
563
45
      continue;
564
142
    VRegsToAlloc.insert(Reg);
565
142
  }
566
7
}
567
568
static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
569
4.12k
                                   const MachineFunction &MF) {
570
4.12k
  const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
571
73.7k
  for (unsigned i = 0; CSR[i] != 0; 
++i69.6k
)
572
70.8k
    if (TRI.regsOverlap(reg, CSR[i]))
573
1.27k
      return true;
574
4.12k
  
return false2.84k
;
575
4.12k
}
576
577
void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
578
8
                                   Spiller &VRegSpiller) {
579
8
  MachineFunction &MF = G.getMetadata().MF;
580
8
581
8
  LiveIntervals &LIS = G.getMetadata().LIS;
582
8
  const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
583
8
  const TargetRegisterInfo &TRI =
584
8
      *G.getMetadata().MF.getSubtarget().getRegisterInfo();
585
8
586
8
  std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
587
8
588
8
  std::map<unsigned, std::vector<unsigned>> VRegAllowedMap;
589
8
590
162
  while (!Worklist.empty()) {
591
154
    unsigned VReg = Worklist.back();
592
154
    Worklist.pop_back();
593
154
594
154
    LiveInterval &VRegLI = LIS.getInterval(VReg);
595
154
596
154
    // If this is an empty interval move it to the EmptyIntervalVRegs set then
597
154
    // continue.
598
154
    if (VRegLI.empty()) {
599
1
      EmptyIntervalVRegs.insert(VRegLI.reg);
600
1
      VRegsToAlloc.erase(VRegLI.reg);
601
1
      continue;
602
1
    }
603
153
604
153
    const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
605
153
606
153
    // Record any overlaps with regmask operands.
607
153
    BitVector RegMaskOverlaps;
608
153
    LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
609
153
610
153
    // Compute an initial allowed set for the current vreg.
611
153
    std::vector<unsigned> VRegAllowed;
612
153
    ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
613
4.55k
    for (unsigned I = 0; I != RawPRegOrder.size(); 
++I4.40k
) {
614
4.40k
      unsigned PReg = RawPRegOrder[I];
615
4.40k
      if (MRI.isReserved(PReg))
616
62
        continue;
617
4.33k
618
4.33k
      // vregLI crosses a regmask operand that clobbers preg.
619
4.33k
      if (!RegMaskOverlaps.empty() && 
!RegMaskOverlaps.test(PReg)330
)
620
198
        continue;
621
4.14k
622
4.14k
      // vregLI overlaps fixed regunit interference.
623
4.14k
      bool Interference = false;
624
8.76k
      for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); 
++Units4.62k
) {
625
4.64k
        if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
626
21
          Interference = true;
627
21
          break;
628
21
        }
629
4.64k
      }
630
4.14k
      if (Interference)
631
21
        continue;
632
4.12k
633
4.12k
      // preg is usable for this virtual register.
634
4.12k
      VRegAllowed.push_back(PReg);
635
4.12k
    }
636
153
637
153
    // Check for vregs that have no allowed registers. These should be
638
153
    // pre-spilled and the new vregs added to the worklist.
639
153
    if (VRegAllowed.empty()) {
640
0
      SmallVector<unsigned, 8> NewVRegs;
641
0
      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
642
0
      Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
643
0
      continue;
644
0
    } else
645
153
      VRegAllowedMap[VReg] = std::move(VRegAllowed);
646
153
  }
647
8
648
153
  for (auto &KV : VRegAllowedMap) {
649
153
    auto VReg = KV.first;
650
153
651
153
    // Move empty intervals to the EmptyIntervalVReg set.
652
153
    if (LIS.getInterval(VReg).empty()) {
653
0
      EmptyIntervalVRegs.insert(VReg);
654
0
      VRegsToAlloc.erase(VReg);
655
0
      continue;
656
0
    }
657
153
658
153
    auto &VRegAllowed = KV.second;
659
153
660
153
    PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
661
153
662
153
    // Tweak cost of callee saved registers, as using then force spilling and
663
153
    // restoring them. This would only happen in the prologue / epilogue though.
664
4.27k
    for (unsigned i = 0; i != VRegAllowed.size(); 
++i4.12k
)
665
4.12k
      if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
666
1.27k
        NodeCosts[1 + i] += 1.0;
667
153
668
153
    PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
669
153
    G.getNodeMetadata(NId).setVReg(VReg);
670
153
    G.getNodeMetadata(NId).setAllowedRegs(
671
153
      G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
672
153
    G.getMetadata().setNodeIdForVReg(VReg, NId);
673
153
  }
674
8
}
675
676
void RegAllocPBQP::spillVReg(unsigned VReg,
677
                             SmallVectorImpl<unsigned> &NewIntervals,
678
                             MachineFunction &MF, LiveIntervals &LIS,
679
10
                             VirtRegMap &VRM, Spiller &VRegSpiller) {
680
10
  VRegsToAlloc.erase(VReg);
681
10
  LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
682
10
                    nullptr, &DeadRemats);
683
10
  VRegSpiller.spill(LRE);
684
10
685
10
  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
686
10
  (void)TRI;
687
10
  LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
688
10
                    << LRE.getParent().weight << ", New vregs: ");
689
10
690
10
  // Copy any newly inserted live intervals into the list of regs to
691
10
  // allocate.
692
10
  for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
693
14
       I != E; 
++I4
) {
694
4
    const LiveInterval &LI = LIS.getInterval(*I);
695
4
    assert(!LI.empty() && "Empty spill range.");
696
4
    LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
697
4
    VRegsToAlloc.insert(LI.reg);
698
4
  }
699
10
700
10
  LLVM_DEBUG(dbgs() << ")\n");
701
10
}
702
703
bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
704
                                     const PBQP::Solution &Solution,
705
                                     VirtRegMap &VRM,
706
8
                                     Spiller &VRegSpiller) {
707
8
  MachineFunction &MF = G.getMetadata().MF;
708
8
  LiveIntervals &LIS = G.getMetadata().LIS;
709
8
  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
710
8
  (void)TRI;
711
8
712
8
  // Set to true if we have any spills
713
8
  bool AnotherRoundNeeded = false;
714
8
715
8
  // Clear the existing allocation.
716
8
  VRM.clearAllVirt();
717
8
718
8
  // Iterate over the nodes mapping the PBQP solution to a register
719
8
  // assignment.
720
153
  for (auto NId : G.nodeIds()) {
721
153
    unsigned VReg = G.getNodeMetadata(NId).getVReg();
722
153
    unsigned AllocOption = Solution.getSelection(NId);
723
153
724
153
    if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
725
143
      unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
726
143
      LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
727
143
                        << TRI.getName(PReg) << "\n");
728
143
      assert(PReg != 0 && "Invalid preg selected.");
729
143
      VRM.assignVirt2Phys(VReg, PReg);
730
143
    } else {
731
10
      // Spill VReg. If this introduces new intervals we'll need another round
732
10
      // of allocation.
733
10
      SmallVector<unsigned, 8> NewVRegs;
734
10
      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
735
10
      AnotherRoundNeeded |= !NewVRegs.empty();
736
10
    }
737
153
  }
738
8
739
8
  return !AnotherRoundNeeded;
740
8
}
741
742
void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
743
                                 LiveIntervals &LIS,
744
7
                                 VirtRegMap &VRM) const {
745
7
  MachineRegisterInfo &MRI = MF.getRegInfo();
746
7
747
7
  // First allocate registers for the empty intervals.
748
7
  for (RegSet::const_iterator
749
7
         I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
750
8
         I != E; 
++I1
) {
751
1
    LiveInterval &LI = LIS.getInterval(*I);
752
1
753
1
    unsigned PReg = MRI.getSimpleHint(LI.reg);
754
1
755
1
    if (PReg == 0) {
756
1
      const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
757
1
      const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
758
2
      for (unsigned CandidateReg : RawPRegOrder) {
759
2
        if (!VRM.getRegInfo().isReserved(CandidateReg)) {
760
1
          PReg = CandidateReg;
761
1
          break;
762
1
        }
763
2
      }
764
1
      assert(PReg &&
765
1
             "No un-reserved physical registers in this register class");
766
1
    }
767
1
768
1
    VRM.assignVirt2Phys(LI.reg, PReg);
769
1
  }
770
7
}
771
772
7
void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
773
7
  VRegSpiller.postOptimization();
774
7
  /// Remove dead defs because of rematerialization.
775
7
  for (auto DeadInst : DeadRemats) {
776
0
    LIS.RemoveMachineInstrFromMaps(*DeadInst);
777
0
    DeadInst->eraseFromParent();
778
0
  }
779
7
  DeadRemats.clear();
780
7
}
781
782
static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
783
128
                                         unsigned NumInstr) {
784
128
  // All intervals have a spill weight that is mostly proportional to the number
785
128
  // of uses, with uses in loops having a bigger weight.
786
128
  return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
787
128
}
788
789
7
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
790
7
  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
791
7
  MachineBlockFrequencyInfo &MBFI =
792
7
    getAnalysis<MachineBlockFrequencyInfo>();
793
7
794
7
  VirtRegMap &VRM = getAnalysis<VirtRegMap>();
795
7
796
7
  calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
797
7
                                MBFI, normalizePBQPSpillWeight);
798
7
799
7
  std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
800
7
801
7
  MF.getRegInfo().freezeReservedRegs(MF);
802
7
803
7
  LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
804
7
805
7
  // Allocator main loop:
806
7
  //
807
7
  // * Map current regalloc problem to a PBQP problem
808
7
  // * Solve the PBQP problem
809
7
  // * Map the solution back to a register allocation
810
7
  // * Spill if necessary
811
7
  //
812
7
  // This process is continued till no more spills are generated.
813
7
814
7
  // Find the vreg intervals in need of allocation.
815
7
  findVRegIntervalsToAlloc(MF, LIS);
816
7
817
#ifndef NDEBUG
818
  const Function &F = MF.getFunction();
819
  std::string FullyQualifiedName =
820
    F.getParent()->getModuleIdentifier() + "." + F.getName().str();
821
#endif
822
823
7
  // If there are non-empty intervals allocate them using pbqp.
824
7
  if (!VRegsToAlloc.empty()) {
825
7
    const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
826
7
    std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
827
7
      llvm::make_unique<PBQPRAConstraintList>();
828
7
    ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
829
7
    ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
830
7
    if (PBQPCoalescing)
831
5
      ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
832
7
    ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
833
7
834
7
    bool PBQPAllocComplete = false;
835
7
    unsigned Round = 0;
836
7
837
15
    while (!PBQPAllocComplete) {
838
8
      LLVM_DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");
839
8
840
8
      PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
841
8
      initializeGraph(G, VRM, *VRegSpiller);
842
8
      ConstraintsRoot->apply(G);
843
8
844
#ifndef NDEBUG
845
      if (PBQPDumpGraphs) {
846
        std::ostringstream RS;
847
        RS << Round;
848
        std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
849
                                    ".pbqpgraph";
850
        std::error_code EC;
851
        raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
852
        LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
853
                          << GraphFileName << "\"\n");
854
        G.dump(OS);
855
      }
856
#endif
857
858
8
      PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
859
8
      PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
860
8
      ++Round;
861
8
    }
862
7
  }
863
7
864
7
  // Finalise allocation, allocate empty ranges.
865
7
  finalizeAlloc(MF, LIS, VRM);
866
7
  postOptimization(*VRegSpiller, LIS);
867
7
  VRegsToAlloc.clear();
868
7
  EmptyIntervalVRegs.clear();
869
7
870
7
  LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
871
7
872
7
  return true;
873
7
}
874
875
/// Create Printable object for node and register info.
876
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
877
0
                               const PBQP::RegAlloc::PBQPRAGraph &G) {
878
0
  return Printable([NId, &G](raw_ostream &OS) {
879
0
    const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
880
0
    const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
881
0
    unsigned VReg = G.getNodeMetadata(NId).getVReg();
882
0
    const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
883
0
    OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
884
0
  });
885
0
}
886
887
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
888
LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
889
  for (auto NId : nodeIds()) {
890
    const Vector &Costs = getNodeCosts(NId);
891
    assert(Costs.getLength() != 0 && "Empty vector in graph.");
892
    OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
893
  }
894
  OS << '\n';
895
896
  for (auto EId : edgeIds()) {
897
    NodeId N1Id = getEdgeNode1Id(EId);
898
    NodeId N2Id = getEdgeNode2Id(EId);
899
    assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
900
    const Matrix &M = getEdgeCosts(EId);
901
    assert(M.getRows() != 0 && "No rows in matrix.");
902
    assert(M.getCols() != 0 && "No cols in matrix.");
903
    OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
904
    OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
905
    OS << M << '\n';
906
  }
907
}
908
909
LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const {
910
  dump(dbgs());
911
}
912
#endif
913
914
0
void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
915
0
  OS << "graph {\n";
916
0
  for (auto NId : nodeIds()) {
917
0
    OS << "  node" << NId << " [ label=\""
918
0
       << PrintNodeInfo(NId, *this) << "\\n"
919
0
       << getNodeCosts(NId) << "\" ]\n";
920
0
  }
921
0
922
0
  OS << "  edge [ len=" << nodeIds().size() << " ]\n";
923
0
  for (auto EId : edgeIds()) {
924
0
    OS << "  node" << getEdgeNode1Id(EId)
925
0
       << " -- node" << getEdgeNode2Id(EId)
926
0
       << " [ label=\"";
927
0
    const Matrix &EdgeCosts = getEdgeCosts(EId);
928
0
    for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
929
0
      OS << EdgeCosts.getRowAsVector(i) << "\\n";
930
0
    }
931
0
    OS << "\" ]\n";
932
0
  }
933
0
  OS << "}\n";
934
0
}
935
936
7
FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
937
7
  return new RegAllocPBQP(customPassID);
938
7
}
939
940
7
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
941
7
  return createPBQPRegisterAllocator();
942
7
}