Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/ExecutionDomainFix.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/CodeGen/ExecutionDomainFix.h"
10
#include "llvm/CodeGen/MachineRegisterInfo.h"
11
#include "llvm/CodeGen/TargetInstrInfo.h"
12
13
using namespace llvm;
14
15
#define DEBUG_TYPE "execution-deps-fix"
16
17
iterator_range<SmallVectorImpl<int>::const_iterator>
18
3.88M
ExecutionDomainFix::regIndices(unsigned Reg) const {
19
3.88M
  assert(Reg < AliasMap.size() && "Invalid register");
20
3.88M
  const auto &Entry = AliasMap[Reg];
21
3.88M
  return make_range(Entry.begin(), Entry.end());
22
3.88M
}
23
24
520k
DomainValue *ExecutionDomainFix::alloc(int domain) {
25
520k
  DomainValue *dv = Avail.empty() ? 
new (Allocator.Allocate()) DomainValue230k
26
520k
                                  : 
Avail.pop_back_val()290k
;
27
520k
  if (domain >= 0)
28
446k
    dv->addDomain(domain);
29
520k
  assert(dv->Refs == 0 && "Reference count wasn't cleared");
30
520k
  assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
31
520k
  return dv;
32
520k
}
33
34
2.80M
void ExecutionDomainFix::release(DomainValue *DV) {
35
3.32M
  while (DV) {
36
830k
    assert(DV->Refs && "Bad DomainValue");
37
830k
    if (--DV->Refs)
38
310k
      return;
39
520k
40
520k
    // There are no more DV references. Collapse any contained instructions.
41
520k
    if (DV->AvailableDomains && 
!DV->isCollapsed()515k
)
42
29.3k
      collapse(DV, DV->getFirstDomain());
43
520k
44
520k
    DomainValue *Next = DV->Next;
45
520k
    DV->clear();
46
520k
    Avail.push_back(DV);
47
520k
    // Also release the next DomainValue in the chain.
48
520k
    DV = Next;
49
520k
  }
50
2.80M
}
51
52
18.3M
DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
53
18.3M
  DomainValue *DV = DVRef;
54
18.3M
  if (!DV || 
!DV->Next448k
)
55
18.3M
    return DV;
56
3.45k
57
3.45k
  // DV has a chain. Find the end.
58
3.45k
  do
59
6.22k
    DV = DV->Next;
60
6.22k
  while (DV->Next);
61
3.45k
62
3.45k
  // Update DVRef to point to DV.
63
3.45k
  retain(DV);
64
3.45k
  release(DVRef);
65
3.45k
  DVRef = DV;
66
3.45k
  return DV;
67
3.45k
}
68
69
822k
void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
70
822k
  assert(unsigned(rx) < NumRegs && "Invalid index");
71
822k
  assert(!LiveRegs.empty() && "Must enter basic block first.");
72
822k
73
822k
  if (LiveRegs[rx] == dv)
74
0
    return;
75
822k
  if (LiveRegs[rx])
76
28.6k
    release(LiveRegs[rx]);
77
822k
  LiveRegs[rx] = retain(dv);
78
822k
}
79
80
470k
void ExecutionDomainFix::kill(int rx) {
81
470k
  assert(unsigned(rx) < NumRegs && "Invalid index");
82
470k
  assert(!LiveRegs.empty() && "Must enter basic block first.");
83
470k
  if (!LiveRegs[rx])
84
166k
    return;
85
304k
86
304k
  release(LiveRegs[rx]);
87
304k
  LiveRegs[rx] = nullptr;
88
304k
}
89
90
825k
void ExecutionDomainFix::force(int rx, unsigned domain) {
91
825k
  assert(unsigned(rx) < NumRegs && "Invalid index");
92
825k
  assert(!LiveRegs.empty() && "Must enter basic block first.");
93
825k
  if (DomainValue *dv = LiveRegs[rx]) {
94
404k
    if (dv->isCollapsed())
95
364k
      dv->addDomain(domain);
96
39.3k
    else if (dv->hasDomain(domain))
97
38.9k
      collapse(dv, domain);
98
387
    else {
99
387
      // This is an incompatible open DomainValue. Collapse it to whatever and
100
387
      // force the new value into domain. This costs a domain crossing.
101
387
      collapse(dv, dv->getFirstDomain());
102
387
      assert(LiveRegs[rx] && "Not live after collapse?");
103
387
      LiveRegs[rx]->addDomain(domain);
104
387
    }
105
421k
  } else {
106
421k
    // Set up basic collapsed DomainValue.
107
421k
    setLiveReg(rx, alloc(domain));
108
421k
  }
109
825k
}
110
111
69.0k
void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
112
69.0k
  assert(dv->hasDomain(domain) && "Cannot collapse");
113
69.0k
114
69.0k
  // Collapse all the instructions.
115
174k
  while (!dv->Instrs.empty())
116
105k
    TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
117
69.0k
  dv->setSingleDomain(domain);
118
69.0k
119
69.0k
  // If there are multiple users, give them new, unique DomainValues.
120
69.0k
  if (!LiveRegs.empty() && 
dv->Refs > 150.1k
)
121
384k
    
for (unsigned rx = 0; 11.6k
rx != NumRegs;
++rx373k
)
122
373k
      if (LiveRegs[rx] == dv)
123
25.1k
        setLiveReg(rx, alloc(domain));
124
69.0k
}
125
126
51.6k
bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
127
51.6k
  assert(!A->isCollapsed() && "Cannot merge into collapsed");
128
51.6k
  assert(!B->isCollapsed() && "Cannot merge from collapsed");
129
51.6k
  if (A == B)
130
46.7k
    return true;
131
4.96k
  // Restrict to the domains that A and B have in common.
132
4.96k
  unsigned common = A->getCommonDomains(B->AvailableDomains);
133
4.96k
  if (!common)
134
0
    return false;
135
4.96k
  A->AvailableDomains = common;
136
4.96k
  A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
137
4.96k
138
4.96k
  // Clear the old DomainValue so we won't try to swizzle instructions twice.
139
4.96k
  B->clear();
140
4.96k
  // All uses of B are referred to A.
141
4.96k
  B->Next = retain(A);
142
4.96k
143
163k
  for (unsigned rx = 0; rx != NumRegs; 
++rx158k
) {
144
158k
    assert(!LiveRegs.empty() && "no space allocated for live registers");
145
158k
    if (LiveRegs[rx] == B)
146
3.46k
      setLiveReg(rx, A);
147
158k
  }
148
4.96k
  return true;
149
4.96k
}
150
151
void ExecutionDomainFix::enterBasicBlock(
152
503k
    const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
153
503k
154
503k
  MachineBasicBlock *MBB = TraversedMBB.MBB;
155
503k
156
503k
  // Set up LiveRegs to represent registers entering MBB.
157
503k
  // Set default domain values to 'no domain' (nullptr)
158
503k
  if (LiveRegs.empty())
159
503k
    LiveRegs.assign(NumRegs, nullptr);
160
503k
161
503k
  // This is the entry block.
162
503k
  if (MBB->pred_empty()) {
163
127k
    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
164
127k
    return;
165
127k
  }
166
375k
167
375k
  // Try to coalesce live-out registers from predecessors.
168
591k
  
for (MachineBasicBlock *pred : MBB->predecessors())375k
{
169
591k
    assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
170
591k
           "Should have pre-allocated MBBInfos for all MBBs");
171
591k
    LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
172
591k
    // Incoming is null if this is a backedge from a BB
173
591k
    // we haven't processed yet
174
591k
    if (Incoming.empty())
175
17.3k
      continue;
176
574k
177
18.9M
    
for (unsigned rx = 0; 574k
rx != NumRegs;
++rx18.3M
) {
178
18.3M
      DomainValue *pdv = resolve(Incoming[rx]);
179
18.3M
      if (!pdv)
180
17.9M
        continue;
181
448k
      if (!LiveRegs[rx]) {
182
280k
        setLiveReg(rx, pdv);
183
280k
        continue;
184
280k
      }
185
167k
186
167k
      // We have a live DomainValue from more than one predecessor.
187
167k
      if (LiveRegs[rx]->isCollapsed()) {
188
118k
        // We are already collapsed, but predecessor is not. Force it.
189
118k
        unsigned Domain = LiveRegs[rx]->getFirstDomain();
190
118k
        if (!pdv->isCollapsed() && 
pdv->hasDomain(Domain)344
)
191
341
          collapse(pdv, Domain);
192
118k
        continue;
193
118k
      }
194
49.5k
195
49.5k
      // Currently open, merge in predecessor.
196
49.5k
      if (!pdv->isCollapsed())
197
49.3k
        merge(LiveRegs[rx], pdv);
198
254
      else
199
254
        force(rx, pdv->getFirstDomain());
200
49.5k
    }
201
574k
  }
202
375k
  LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
203
375k
                    << (!TraversedMBB.IsDone ? ": incomplete\n"
204
375k
                                             : ": all preds known\n"));
205
375k
}
206
207
void ExecutionDomainFix::leaveBasicBlock(
208
503k
    const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
209
503k
  assert(!LiveRegs.empty() && "Must enter basic block first.");
210
503k
  unsigned MBBNumber = TraversedMBB.MBB->getNumber();
211
503k
  assert(MBBNumber < MBBOutRegsInfos.size() &&
212
503k
         "Unexpected basic block number.");
213
503k
  // Save register clearances at end of MBB - used by enterBasicBlock().
214
2.02M
  for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
215
2.02M
    release(OldLiveReg);
216
2.02M
  }
217
503k
  MBBOutRegsInfos[MBBNumber] = LiveRegs;
218
503k
  LiveRegs.clear();
219
503k
}
220
221
2.96M
bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
222
2.96M
  // Update instructions with explicit execution domains.
223
2.96M
  std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
224
2.96M
  if (DomP.first) {
225
450k
    if (DomP.second)
226
183k
      visitSoftInstr(MI, DomP.second);
227
266k
    else
228
266k
      visitHardInstr(MI, DomP.first);
229
450k
  }
230
2.96M
231
2.96M
  return !DomP.first;
232
2.96M
}
233
234
3.31M
void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
235
3.31M
  assert(!MI->isDebugInstr() && "Won't process debug values");
236
3.31M
  const MCInstrDesc &MCID = MI->getDesc();
237
3.31M
  for (unsigned i = 0,
238
3.31M
                e = MI->isVariadic() ? 
MI->getNumOperands()166k
:
MCID.getNumDefs()3.14M
;
239
5.71M
       i != e; 
++i2.40M
) {
240
2.40M
    MachineOperand &MO = MI->getOperand(i);
241
2.40M
    if (!MO.isReg())
242
153k
      continue;
243
2.24M
    if (MO.isUse())
244
235k
      continue;
245
2.01M
    for (int rx : regIndices(MO.getReg())) {
246
478k
      // This instruction explicitly defines rx.
247
478k
      LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI);
248
478k
249
478k
      // Kill off domains redefined by generic instructions.
250
478k
      if (Kill)
251
74.1k
        kill(rx);
252
478k
    }
253
2.01M
  }
254
3.31M
}
255
256
345k
void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
257
345k
  // Collapse all uses.
258
345k
  for (unsigned i = mi->getDesc().getNumDefs(),
259
345k
                e = mi->getDesc().getNumOperands();
260
1.27M
       i != e; 
++i926k
) {
261
926k
    MachineOperand &mo = mi->getOperand(i);
262
926k
    if (!mo.isReg())
263
216k
      continue;
264
709k
    for (int rx : regIndices(mo.getReg())) {
265
520k
      force(rx, domain);
266
520k
    }
267
709k
  }
268
345k
269
345k
  // Kill all defs and force them.
270
675k
  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; 
++i330k
) {
271
330k
    MachineOperand &mo = mi->getOperand(i);
272
330k
    if (!mo.isReg())
273
0
      continue;
274
330k
    for (int rx : regIndices(mo.getReg())) {
275
304k
      kill(rx);
276
304k
      force(rx, domain);
277
304k
    }
278
330k
  }
279
345k
}
280
281
183k
void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
282
183k
  // Bitmask of available domains for this instruction after taking collapsed
283
183k
  // operands into account.
284
183k
  unsigned available = mask;
285
183k
286
183k
  // Scan the explicit use operands for incoming domains.
287
183k
  SmallVector<int, 4> used;
288
183k
  if (!LiveRegs.empty())
289
183k
    for (unsigned i = mi->getDesc().getNumDefs(),
290
183k
                  e = mi->getDesc().getNumOperands();
291
822k
         i != e; 
++i638k
) {
292
638k
      MachineOperand &mo = mi->getOperand(i);
293
638k
      if (!mo.isReg())
294
185k
        continue;
295
453k
      for (int rx : regIndices(mo.getReg())) {
296
206k
        DomainValue *dv = LiveRegs[rx];
297
206k
        if (dv == nullptr)
298
49.5k
          continue;
299
157k
        // Bitmask of domains that dv and available have in common.
300
157k
        unsigned common = dv->getCommonDomains(available);
301
157k
        // Is it possible to use this collapsed register for free?
302
157k
        if (dv->isCollapsed()) {
303
115k
          // Restrict available domains to the ones in common with the operand.
304
115k
          // If there are no common domains, we must pay the cross-domain
305
115k
          // penalty for this operand.
306
115k
          if (common)
307
114k
            available = common;
308
115k
        } else 
if (41.9k
common41.9k
)
309
41.9k
          // Open DomainValue is compatible, save it for merging.
310
41.9k
          used.push_back(rx);
311
22
        else
312
22
          // Open DomainValue is not compatible with instruction. It is useless
313
22
          // now.
314
22
          kill(rx);
315
157k
      }
316
453k
    }
317
183k
318
183k
  // If the collapsed operands force a single domain, propagate the collapse.
319
183k
  if (isPowerOf2_32(available)) {
320
78.1k
    unsigned domain = countTrailingZeros(available);
321
78.1k
    TII->setExecutionDomain(*mi, domain);
322
78.1k
    visitHardInstr(mi, domain);
323
78.1k
    return;
324
78.1k
  }
325
105k
326
105k
  // Kill off any remaining uses that don't match available, and build a list of
327
105k
  // incoming DomainValues that we want to merge.
328
105k
  SmallVector<int, 4> Regs;
329
105k
  for (int rx : used) {
330
37.1k
    assert(!LiveRegs.empty() && "no space allocated for live registers");
331
37.1k
    DomainValue *&LR = LiveRegs[rx];
332
37.1k
    // This useless DomainValue could have been missed above.
333
37.1k
    if (!LR->getCommonDomains(available)) {
334
0
      kill(rx);
335
0
      continue;
336
0
    }
337
37.1k
    // Sorted insertion.
338
37.1k
    // Enables giving priority to the latest domains during merging.
339
37.1k
    const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
340
37.1k
    auto I = partition_point(Regs, [&](int I) {
341
5.55k
      return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
342
5.55k
    });
343
37.1k
    Regs.insert(I, rx);
344
37.1k
  }
345
105k
346
105k
  // doms are now sorted in order of appearance. Try to merge them all, giving
347
105k
  // priority to the latest ones.
348
105k
  DomainValue *dv = nullptr;
349
142k
  while (!Regs.empty()) {
350
37.1k
    if (!dv) {
351
31.6k
      dv = LiveRegs[Regs.pop_back_val()];
352
31.6k
      // Force the first dv to match the current instruction.
353
31.6k
      dv->AvailableDomains = dv->getCommonDomains(available);
354
31.6k
      assert(dv->AvailableDomains && "Domain should have been filtered");
355
31.6k
      continue;
356
31.6k
    }
357
5.55k
358
5.55k
    DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
359
5.55k
    // Skip already merged values.
360
5.55k
    if (Latest == dv || 
Latest->Next2.35k
)
361
3.19k
      continue;
362
2.35k
    if (merge(dv, Latest))
363
2.35k
      continue;
364
0
365
0
    // If latest didn't merge, it is useless now. Kill all registers using it.
366
0
    for (int i : used) {
367
0
      assert(!LiveRegs.empty() && "no space allocated for live registers");
368
0
      if (LiveRegs[i] == Latest)
369
0
        kill(i);
370
0
    }
371
0
  }
372
105k
373
105k
  // dv is the DomainValue we are going to use for this instruction.
374
105k
  if (!dv) {
375
74.0k
    dv = alloc();
376
74.0k
    dv->AvailableDomains = available;
377
74.0k
  }
378
105k
  dv->Instrs.push_back(mi);
379
105k
380
105k
  // Finally set all defs and non-collapsed uses to dv. We must iterate through
381
105k
  // all the operators, including imp-def ones.
382
530k
  for (MachineOperand &mo : mi->operands()) {
383
530k
    if (!mo.isReg())
384
148k
      continue;
385
381k
    for (int rx : regIndices(mo.getReg())) {
386
172k
      if (!LiveRegs[rx] || 
(100k
mo.isDef()100k
&&
LiveRegs[rx] != dv33.5k
)) {
387
91.9k
        kill(rx);
388
91.9k
        setLiveReg(rx, dv);
389
91.9k
      }
390
172k
    }
391
381k
  }
392
105k
}
393
394
void ExecutionDomainFix::processBasicBlock(
395
503k
    const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
396
503k
  enterBasicBlock(TraversedMBB);
397
503k
  // If this block is not done, it makes little sense to make any decisions
398
503k
  // based on clearance information. We need to make a second pass anyway,
399
503k
  // and by then we'll have better information, so we can avoid doing the work
400
503k
  // to try and break dependencies now.
401
3.31M
  for (MachineInstr &MI : *TraversedMBB.MBB) {
402
3.31M
    if (!MI.isDebugInstr()) {
403
3.31M
      bool Kill = false;
404
3.31M
      if (TraversedMBB.PrimaryPass)
405
2.96M
        Kill = visitInstr(&MI);
406
3.31M
      processDefs(&MI, Kill);
407
3.31M
    }
408
3.31M
  }
409
503k
  leaveBasicBlock(TraversedMBB);
410
503k
}
411
412
160k
bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
413
160k
  if (skipFunction(mf.getFunction()))
414
207
    return false;
415
160k
  MF = &mf;
416
160k
  TII = MF->getSubtarget().getInstrInfo();
417
160k
  TRI = MF->getSubtarget().getRegisterInfo();
418
160k
  LiveRegs.clear();
419
160k
  assert(NumRegs == RC->getNumRegs() && "Bad regclass");
420
160k
421
160k
  LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "
422
160k
                    << TRI->getRegClassName(RC) << " **********\n");
423
160k
424
160k
  // If no relevant registers are used in the function, we can skip it
425
160k
  // completely.
426
160k
  bool anyregs = false;
427
160k
  const MachineRegisterInfo &MRI = mf.getRegInfo();
428
1.20M
  for (unsigned Reg : *RC) {
429
1.20M
    if (MRI.isPhysRegUsed(Reg)) {
430
127k
      anyregs = true;
431
127k
      break;
432
127k
    }
433
1.20M
  }
434
160k
  if (!anyregs)
435
32.6k
    return false;
436
127k
437
127k
  RDA = &getAnalysis<ReachingDefAnalysis>();
438
127k
439
127k
  // Initialize the AliasMap on the first use.
440
127k
  if (AliasMap.empty()) {
441
9.91k
    // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
442
9.91k
    // therefore the LiveRegs array.
443
9.91k
    AliasMap.resize(TRI->getNumRegs());
444
327k
    for (unsigned i = 0, e = RC->getNumRegs(); i != e; 
++i317k
)
445
3.63M
      
for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); 317k
AI.isValid();
446
3.31M
           ++AI)
447
3.31M
        AliasMap[*AI].push_back(i);
448
9.91k
  }
449
127k
450
127k
  // Initialize the MBBOutRegsInfos
451
127k
  MBBOutRegsInfos.resize(mf.getNumBlockIDs());
452
127k
453
127k
  // Traverse the basic blocks.
454
127k
  LoopTraversal Traversal;
455
127k
  LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
456
503k
  for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
457
503k
    processBasicBlock(TraversedMBB);
458
503k
  }
459
127k
460
447k
  for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) {
461
14.0M
    for (DomainValue *OutLiveReg : OutLiveRegs) {
462
14.0M
      if (OutLiveReg)
463
441k
        release(OutLiveReg);
464
14.0M
    }
465
447k
  }
466
127k
  MBBOutRegsInfos.clear();
467
127k
  Avail.clear();
468
127k
  Allocator.DestroyAll();
469
127k
470
127k
  return false;
471
127k
}