Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9
#include "llvm/CodeGen/GlobalISel/Combiner.h"
10
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
12
#include "llvm/CodeGen/GlobalISel/Utils.h"
13
#include "llvm/CodeGen/MachineInstr.h"
14
#include "llvm/CodeGen/MachineRegisterInfo.h"
15
#include "llvm/CodeGen/TargetInstrInfo.h"
16
17
#define DEBUG_TYPE "gi-combiner"
18
19
using namespace llvm;
20
21
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
22
                               MachineIRBuilder &B)
23
41.4M
    : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
24
25
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
26
692k
                                    Register ToReg) const {
27
692k
  Observer.changingAllUsesOfReg(MRI, FromReg);
28
692k
29
692k
  if (MRI.constrainRegAttrs(ToReg, FromReg))
30
692k
    MRI.replaceRegWith(FromReg, ToReg);
31
0
  else
32
0
    Builder.buildCopy(ToReg, FromReg);
33
692k
34
692k
  Observer.finishedChangingAllUsesOfReg();
35
692k
}
36
37
void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
38
                                      MachineOperand &FromRegOp,
39
35.6k
                                      Register ToReg) const {
40
35.6k
  assert(FromRegOp.getParent() && "Expected an operand in an MI");
41
35.6k
  Observer.changingInstr(*FromRegOp.getParent());
42
35.6k
43
35.6k
  FromRegOp.setReg(ToReg);
44
35.6k
45
35.6k
  Observer.changedInstr(*FromRegOp.getParent());
46
35.6k
}
47
48
8.65M
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
49
8.65M
  if (matchCombineCopy(MI)) {
50
692k
    applyCombineCopy(MI);
51
692k
    return true;
52
692k
  }
53
7.96M
  return false;
54
7.96M
}
55
8.65M
bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
56
8.65M
  if (MI.getOpcode() != TargetOpcode::COPY)
57
0
    return false;
58
8.65M
  unsigned DstReg = MI.getOperand(0).getReg();
59
8.65M
  unsigned SrcReg = MI.getOperand(1).getReg();
60
8.65M
  LLT DstTy = MRI.getType(DstReg);
61
8.65M
  LLT SrcTy = MRI.getType(SrcReg);
62
8.65M
  // Simple Copy Propagation.
63
8.65M
  // a(sx) = COPY b(sx) -> Replace all uses of a with b.
64
8.65M
  if (DstTy.isValid() && 
SrcTy.isValid()3.04M
&&
DstTy == SrcTy692k
)
65
692k
    return true;
66
7.96M
  return false;
67
7.96M
}
68
692k
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
69
692k
  unsigned DstReg = MI.getOperand(0).getReg();
70
692k
  unsigned SrcReg = MI.getOperand(1).getReg();
71
692k
  MI.eraseFromParent();
72
692k
  replaceRegWith(MRI, DstReg, SrcReg);
73
692k
}
74
75
namespace {
76
77
/// Select a preference between two uses. CurrentUse is the current preference
78
/// while *ForCandidate is attributes of the candidate under consideration.
79
PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
80
                                  const LLT &TyForCandidate,
81
                                  unsigned OpcodeForCandidate,
82
127k
                                  MachineInstr *MIForCandidate) {
83
127k
  if (!CurrentUse.Ty.isValid()) {
84
125k
    if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
85
125k
        
CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT119k
)
86
121k
      return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
87
3.59k
    return CurrentUse;
88
3.59k
  }
89
1.75k
90
1.75k
  // We permit the extend to hoist through basic blocks but this is only
91
1.75k
  // sensible if the target has extending loads. If you end up lowering back
92
1.75k
  // into a load and extend during the legalizer then the end result is
93
1.75k
  // hoisting the extend up to the load.
94
1.75k
95
1.75k
  // Prefer defined extensions to undefined extensions as these are more
96
1.75k
  // likely to reduce the number of instructions.
97
1.75k
  if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
98
1.75k
      
CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT160
)
99
40
    return CurrentUse;
100
1.71k
  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
101
1.71k
           
OpcodeForCandidate != TargetOpcode::G_ANYEXT131
)
102
11
    return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
103
1.70k
104
1.70k
  // Prefer sign extensions to zero extensions as sign-extensions tend to be
105
1.70k
  // more expensive.
106
1.70k
  if (CurrentUse.Ty == TyForCandidate) {
107
420
    if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
108
420
        
OpcodeForCandidate == TargetOpcode::G_ZEXT121
)
109
93
      return CurrentUse;
110
327
    else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
111
327
             
OpcodeForCandidate == TargetOpcode::G_SEXT179
)
112
166
      return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
113
1.44k
  }
114
1.44k
115
1.44k
  // This is potentially target specific. We've chosen the largest type
116
1.44k
  // because G_TRUNC is usually free. One potential catch with this is that
117
1.44k
  // some targets have a reduced number of larger registers than smaller
118
1.44k
  // registers and this choice potentially increases the live-range for the
119
1.44k
  // larger value.
120
1.44k
  if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
121
1.02k
    return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
122
1.02k
  }
123
425
  return CurrentUse;
124
425
}
125
126
/// Find a suitable place to insert some instructions and insert them. This
127
/// function accounts for special cases like inserting before a PHI node.
128
/// The current strategy for inserting before PHI's is to duplicate the
129
/// instructions for each predecessor. However, while that's ok for G_TRUNC
130
/// on most targets since it generally requires no code, other targets/cases may
131
/// want to try harder to find a dominating block.
132
static void InsertInsnsWithoutSideEffectsBeforeUse(
133
    MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
134
    std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
135
                       MachineOperand &UseMO)>
136
38.4k
        Inserter) {
137
38.4k
  MachineInstr &UseMI = *UseMO.getParent();
138
38.4k
139
38.4k
  MachineBasicBlock *InsertBB = UseMI.getParent();
140
38.4k
141
38.4k
  // If the use is a PHI then we want the predecessor block instead.
142
38.4k
  if (UseMI.isPHI()) {
143
4.46k
    MachineOperand *PredBB = std::next(&UseMO);
144
4.46k
    InsertBB = PredBB->getMBB();
145
4.46k
  }
146
38.4k
147
38.4k
  // If the block is the same block as the def then we want to insert just after
148
38.4k
  // the def instead of at the start of the block.
149
38.4k
  if (InsertBB == DefMI.getParent()) {
150
22.9k
    MachineBasicBlock::iterator InsertPt = &DefMI;
151
22.9k
    Inserter(InsertBB, std::next(InsertPt), UseMO);
152
22.9k
    return;
153
22.9k
  }
154
15.5k
155
15.5k
  // Otherwise we want the start of the BB
156
15.5k
  Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
157
15.5k
}
158
} // end anonymous namespace
159
160
2.53M
bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
161
2.53M
  PreferredTuple Preferred;
162
2.53M
  if (matchCombineExtendingLoads(MI, Preferred)) {
163
121k
    applyCombineExtendingLoads(MI, Preferred);
164
121k
    return true;
165
121k
  }
166
2.41M
  return false;
167
2.41M
}
168
169
bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
170
2.53M
                                                PreferredTuple &Preferred) {
171
2.53M
  // We match the loads and follow the uses to the extend instead of matching
172
2.53M
  // the extends and following the def to the load. This is because the load
173
2.53M
  // must remain in the same position for correctness (unless we also add code
174
2.53M
  // to find a safe place to sink it) whereas the extend is freely movable.
175
2.53M
  // It also prevents us from duplicating the load for the volatile case or just
176
2.53M
  // for performance.
177
2.53M
178
2.53M
  if (MI.getOpcode() != TargetOpcode::G_LOAD &&
179
2.53M
      
MI.getOpcode() != TargetOpcode::G_SEXTLOAD231k
&&
180
2.53M
      
MI.getOpcode() != TargetOpcode::G_ZEXTLOAD104k
)
181
0
    return false;
182
2.53M
183
2.53M
  auto &LoadValue = MI.getOperand(0);
184
2.53M
  assert(LoadValue.isReg() && "Result wasn't a register?");
185
2.53M
186
2.53M
  LLT LoadValueTy = MRI.getType(LoadValue.getReg());
187
2.53M
  if (!LoadValueTy.isScalar())
188
939k
    return false;
189
1.59M
190
1.59M
  // Most architectures are going to legalize <s8 loads into at least a 1 byte
191
1.59M
  // load, and the MMOs can only describe memory accesses in multiples of bytes.
192
1.59M
  // If we try to perform extload combining on those, we can end up with
193
1.59M
  // %a(s8) = extload %ptr (load 1 byte from %ptr)
194
1.59M
  // ... which is an illegal extload instruction.
195
1.59M
  if (LoadValueTy.getSizeInBits() < 8)
196
3.44k
    return false;
197
1.59M
198
1.59M
  // For non power-of-2 types, they will very likely be legalized into multiple
199
1.59M
  // loads. Don't bother trying to match them into extending loads.
200
1.59M
  if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
201
464
    return false;
202
1.59M
203
1.59M
  // Find the preferred type aside from the any-extends (unless it's the only
204
1.59M
  // one) and non-extending ops. We'll emit an extending load to that type and
205
1.59M
  // and emit a variant of (extend (trunc X)) for the others according to the
206
1.59M
  // relative type sizes. At the same time, pick an extend to use based on the
207
1.59M
  // extend involved in the chosen type.
208
1.59M
  unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
209
1.59M
                                 ? 
TargetOpcode::G_ANYEXT1.36M
210
1.59M
                                 : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
211
231k
                                       ? 
TargetOpcode::G_SEXT127k
212
231k
                                       : 
TargetOpcode::G_ZEXT104k
;
213
1.59M
  Preferred = {LLT(), PreferredOpcode, nullptr};
214
2.32M
  for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
215
2.32M
    if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
216
2.32M
        
UseMI.getOpcode() == TargetOpcode::G_ZEXT2.26M
||
217
2.32M
        
UseMI.getOpcode() == TargetOpcode::G_ANYEXT2.21M
) {
218
127k
      Preferred = ChoosePreferredUse(Preferred,
219
127k
                                     MRI.getType(UseMI.getOperand(0).getReg()),
220
127k
                                     UseMI.getOpcode(), &UseMI);
221
127k
    }
222
2.32M
  }
223
1.59M
224
1.59M
  // There were no extends
225
1.59M
  if (!Preferred.MI)
226
1.47M
    return false;
227
121k
  // It should be impossible to chose an extend without selecting a different
228
121k
  // type since by definition the result of an extend is larger.
229
121k
  assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
230
121k
231
121k
  LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
232
121k
  return true;
233
121k
}
234
235
void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
236
121k
                                                PreferredTuple &Preferred) {
237
121k
  // Rewrite the load to the chosen extending load.
238
121k
  Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
239
121k
240
121k
  // Inserter to insert a truncate back to the original type at a given point
241
121k
  // with some basic CSE to limit truncate duplication to one per BB.
242
121k
  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
243
121k
  auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
244
121k
                           MachineBasicBlock::iterator InsertBefore,
245
121k
                           MachineOperand &UseMO) {
246
38.4k
    MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
247
38.4k
    if (PreviouslyEmitted) {
248
2.87k
      Observer.changingInstr(*UseMO.getParent());
249
2.87k
      UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
250
2.87k
      Observer.changedInstr(*UseMO.getParent());
251
2.87k
      return;
252
2.87k
    }
253
35.6k
254
35.6k
    Builder.setInsertPt(*InsertIntoBB, InsertBefore);
255
35.6k
    Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
256
35.6k
    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
257
35.6k
    EmittedInsns[InsertIntoBB] = NewMI;
258
35.6k
    replaceRegOpWith(MRI, UseMO, NewDstReg);
259
35.6k
  };
260
121k
261
121k
  Observer.changingInstr(MI);
262
121k
  MI.setDesc(
263
121k
      Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
264
121k
                               ? 
TargetOpcode::G_SEXTLOAD63.8k
265
121k
                               : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
266
57.8k
                                     ? 
TargetOpcode::G_ZEXTLOAD52.0k
267
57.8k
                                     : 
TargetOpcode::G_LOAD5.83k
));
268
121k
269
121k
  // Rewrite all the uses to fix up the types.
270
121k
  auto &LoadValue = MI.getOperand(0);
271
121k
  SmallVector<MachineOperand *, 4> Uses;
272
121k
  for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
273
160k
    Uses.push_back(&UseMO);
274
121k
275
160k
  for (auto *UseMO : Uses) {
276
160k
    MachineInstr *UseMI = UseMO->getParent();
277
160k
278
160k
    // If the extend is compatible with the preferred extend then we should fix
279
160k
    // up the type and extend so that it uses the preferred use.
280
160k
    if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
281
160k
        
UseMI->getOpcode() == TargetOpcode::G_ANYEXT37.2k
) {
282
123k
      unsigned UseDstReg = UseMI->getOperand(0).getReg();
283
123k
      MachineOperand &UseSrcMO = UseMI->getOperand(1);
284
123k
      const LLT &UseDstTy = MRI.getType(UseDstReg);
285
123k
      if (UseDstReg != ChosenDstReg) {
286
1.49k
        if (Preferred.Ty == UseDstTy) {
287
204
          // If the use has the same type as the preferred use, then merge
288
204
          // the vregs and erase the extend. For example:
289
204
          //    %1:_(s8) = G_LOAD ...
290
204
          //    %2:_(s32) = G_SEXT %1(s8)
291
204
          //    %3:_(s32) = G_ANYEXT %1(s8)
292
204
          //    ... = ... %3(s32)
293
204
          // rewrites to:
294
204
          //    %2:_(s32) = G_SEXTLOAD ...
295
204
          //    ... = ... %2(s32)
296
204
          replaceRegWith(MRI, UseDstReg, ChosenDstReg);
297
204
          Observer.erasingInstr(*UseMO->getParent());
298
204
          UseMO->getParent()->eraseFromParent();
299
1.28k
        } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
300
2
          // If the preferred size is smaller, then keep the extend but extend
301
2
          // from the result of the extending load. For example:
302
2
          //    %1:_(s8) = G_LOAD ...
303
2
          //    %2:_(s32) = G_SEXT %1(s8)
304
2
          //    %3:_(s64) = G_ANYEXT %1(s8)
305
2
          //    ... = ... %3(s64)
306
2
          /// rewrites to:
307
2
          //    %2:_(s32) = G_SEXTLOAD ...
308
2
          //    %3:_(s64) = G_ANYEXT %2:_(s32)
309
2
          //    ... = ... %3(s64)
310
2
          replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
311
1.28k
        } else {
312
1.28k
          // If the preferred size is large, then insert a truncate. For
313
1.28k
          // example:
314
1.28k
          //    %1:_(s8) = G_LOAD ...
315
1.28k
          //    %2:_(s64) = G_SEXT %1(s8)
316
1.28k
          //    %3:_(s32) = G_ZEXT %1(s8)
317
1.28k
          //    ... = ... %3(s32)
318
1.28k
          /// rewrites to:
319
1.28k
          //    %2:_(s64) = G_SEXTLOAD ...
320
1.28k
          //    %4:_(s8) = G_TRUNC %2:_(s32)
321
1.28k
          //    %3:_(s64) = G_ZEXT %2:_(s8)
322
1.28k
          //    ... = ... %3(s64)
323
1.28k
          InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
324
1.28k
                                                 InsertTruncAt);
325
1.28k
        }
326
1.49k
        continue;
327
1.49k
      }
328
121k
      // The use is (one of) the uses of the preferred use we chose earlier.
329
121k
      // We're going to update the load to def this value later so just erase
330
121k
      // the old extend.
331
121k
      Observer.erasingInstr(*UseMO->getParent());
332
121k
      UseMO->getParent()->eraseFromParent();
333
121k
      continue;
334
121k
    }
335
37.1k
336
37.1k
    // The use isn't an extend. Truncate back to the type we originally loaded.
337
37.1k
    // This is free on many targets.
338
37.1k
    InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
339
37.1k
  }
340
121k
341
121k
  MI.getOperand(0).setReg(ChosenDstReg);
342
121k
  Observer.changedInstr(MI);
343
121k
}
344
345
1.17M
bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
346
1.17M
  assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
347
1.17M
  // Try to match the following:
348
1.17M
  // bb1:
349
1.17M
  //   %c(s32) = G_ICMP pred, %a, %b
350
1.17M
  //   %c1(s1) = G_TRUNC %c(s32)
351
1.17M
  //   G_BRCOND %c1, %bb2
352
1.17M
  //   G_BR %bb3
353
1.17M
  // bb2:
354
1.17M
  // ...
355
1.17M
  // bb3:
356
1.17M
357
1.17M
  // The above pattern does not have a fall through to the successor bb2, always
358
1.17M
  // resulting in a branch no matter which path is taken. Here we try to find
359
1.17M
  // and replace that pattern with conditional branch to bb3 and otherwise
360
1.17M
  // fallthrough to bb2.
361
1.17M
362
1.17M
  MachineBasicBlock *MBB = MI.getParent();
363
1.17M
  MachineBasicBlock::iterator BrIt(MI);
364
1.17M
  if (BrIt == MBB->begin())
365
86.6k
    return false;
366
1.08M
  assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
367
1.08M
368
1.08M
  MachineInstr *BrCond = &*std::prev(BrIt);
369
1.08M
  if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
370
368k
    return false;
371
716k
372
716k
  // Check that the next block is the conditional branch target.
373
716k
  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
374
248k
    return false;
375
467k
376
467k
  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
377
467k
  if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
378
467k
      
!MRI.hasOneUse(CmpMI->getOperand(0).getReg())380k
)
379
86.8k
    return false;
380
380k
  return true;
381
380k
}
382
383
1.17M
bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
384
1.17M
  if (!matchCombineBr(MI))
385
791k
    return false;
386
380k
  MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
387
380k
  MachineBasicBlock::iterator BrIt(MI);
388
380k
  MachineInstr *BrCond = &*std::prev(BrIt);
389
380k
  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
390
380k
391
380k
  CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
392
380k
      (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
393
380k
394
380k
  // Invert the G_ICMP condition.
395
380k
  Observer.changingInstr(*CmpMI);
396
380k
  CmpMI->getOperand(1).setPredicate(InversePred);
397
380k
  Observer.changedInstr(*CmpMI);
398
380k
399
380k
  // Change the conditional branch target.
400
380k
  Observer.changingInstr(*BrCond);
401
380k
  BrCond->getOperand(1).setMBB(BrTarget);
402
380k
  Observer.changedInstr(*BrCond);
403
380k
  MI.eraseFromParent();
404
380k
  return true;
405
380k
}
406
407
0
bool CombinerHelper::tryCombine(MachineInstr &MI) {
408
0
  if (tryCombineCopy(MI))
409
0
    return true;
410
0
  return tryCombineExtendingLoads(MI);
411
0
}