Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- TruncInstCombine.cpp -----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// TruncInstCombine - looks for expression dags post-dominated by TruncInst and
10
// for each eligible dag, it will create a reduced bit-width expression, replace
11
// the old expression with this new one and remove the old expression.
12
// Eligible expression dag is such that:
13
//   1. Contains only supported instructions.
14
//   2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value.
15
//   3. Can be evaluated into type with reduced legal bit-width.
16
//   4. All instructions in the dag must not have users outside the dag.
17
//      The only exception is for {ZExt, SExt}Inst with operand type equal to
18
//      the new reduced type evaluated in (3).
19
//
20
// The motivation for this optimization is that evaluating and expression using
21
// smaller bit-width is preferable, especially for vectorization where we can
22
// fit more values in one vectorized instruction. In addition, this optimization
23
// may decrease the number of cast instructions, but will not increase it.
24
//
25
//===----------------------------------------------------------------------===//
26
27
#include "AggressiveInstCombineInternal.h"
28
#include "llvm/ADT/MapVector.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/Analysis/ConstantFolding.h"
31
#include "llvm/Analysis/TargetLibraryInfo.h"
32
#include "llvm/IR/DataLayout.h"
33
#include "llvm/IR/Dominators.h"
34
#include "llvm/IR/IRBuilder.h"
35
using namespace llvm;
36
37
#define DEBUG_TYPE "aggressive-instcombine"
38
39
/// Given an instruction and a container, it fills all the relevant operands of
40
/// that instruction, with respect to the Trunc expression dag optimizaton.
41
31.9k
static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
42
31.9k
  unsigned Opc = I->getOpcode();
43
31.9k
  switch (Opc) {
44
31.9k
  case Instruction::Trunc:
45
778
  case Instruction::ZExt:
46
778
  case Instruction::SExt:
47
778
    // These CastInst are considered leaves of the evaluated expression, thus,
48
778
    // their operands are not relevent.
49
778
    break;
50
31.1k
  case Instruction::Add:
51
31.1k
  case Instruction::Sub:
52
31.1k
  case Instruction::Mul:
53
31.1k
  case Instruction::And:
54
31.1k
  case Instruction::Or:
55
31.1k
  case Instruction::Xor:
56
31.1k
    Ops.push_back(I->getOperand(0));
57
31.1k
    Ops.push_back(I->getOperand(1));
58
31.1k
    break;
59
31.1k
  default:
60
0
    llvm_unreachable("Unreachable!");
61
31.9k
  }
62
31.9k
}
63
64
113k
bool TruncInstCombine::buildTruncExpressionDag() {
65
113k
  SmallVector<Value *, 8> Worklist;
66
113k
  SmallVector<Instruction *, 8> Stack;
67
113k
  // Clear old expression dag.
68
113k
  InstInfoMap.clear();
69
113k
70
113k
  Worklist.push_back(CurrentTruncInst->getOperand(0));
71
113k
72
172k
  while (!Worklist.empty()) {
73
171k
    Value *Curr = Worklist.back();
74
171k
75
171k
    if (isa<Constant>(Curr)) {
76
24.3k
      Worklist.pop_back();
77
24.3k
      continue;
78
24.3k
    }
79
146k
80
146k
    auto *I = dyn_cast<Instruction>(Curr);
81
146k
    if (!I)
82
2.72k
      return false;
83
144k
84
144k
    if (!Stack.empty() && 
Stack.back() == I33.1k
) {
85
1.94k
      // Already handled all instruction operands, can remove it from both the
86
1.94k
      // Worklist and the Stack, and add it to the instruction info map.
87
1.94k
      Worklist.pop_back();
88
1.94k
      Stack.pop_back();
89
1.94k
      // Insert I to the Info map.
90
1.94k
      InstInfoMap.insert(std::make_pair(I, Info()));
91
1.94k
      continue;
92
1.94k
    }
93
142k
94
142k
    if (InstInfoMap.count(I)) {
95
95
      Worklist.pop_back();
96
95
      continue;
97
95
    }
98
142k
99
142k
    // Add the instruction to the stack before start handling its operands.
100
142k
    Stack.push_back(I);
101
142k
102
142k
    unsigned Opc = I->getOpcode();
103
142k
    switch (Opc) {
104
142k
    case Instruction::Trunc:
105
1.33k
    case Instruction::ZExt:
106
1.33k
    case Instruction::SExt:
107
1.33k
      // trunc(trunc(x)) -> trunc(x)
108
1.33k
      // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
109
1.33k
      // trunc(ext(x)) -> trunc(x) if the source type is larger than the new
110
1.33k
      // dest
111
1.33k
      break;
112
30.7k
    case Instruction::Add:
113
30.7k
    case Instruction::Sub:
114
30.7k
    case Instruction::Mul:
115
30.7k
    case Instruction::And:
116
30.7k
    case Instruction::Or:
117
30.7k
    case Instruction::Xor: {
118
30.7k
      SmallVector<Value *, 2> Operands;
119
30.7k
      getRelevantOperands(I, Operands);
120
30.7k
      for (Value *Operand : Operands)
121
61.4k
        Worklist.push_back(Operand);
122
30.7k
      break;
123
30.7k
    }
124
110k
    default:
125
110k
      // TODO: Can handle more cases here:
126
110k
      // 1. select, shufflevector, extractelement, insertelement
127
110k
      // 2. udiv, urem
128
110k
      // 3. shl, lshr, ashr
129
110k
      // 4. phi node(and loop handling)
130
110k
      // ...
131
110k
      return false;
132
142k
    }
133
142k
  }
134
113k
  
return true640
;
135
113k
}
136
137
372
unsigned TruncInstCombine::getMinBitWidth() {
138
372
  SmallVector<Value *, 8> Worklist;
139
372
  SmallVector<Instruction *, 8> Stack;
140
372
141
372
  Value *Src = CurrentTruncInst->getOperand(0);
142
372
  Type *DstTy = CurrentTruncInst->getType();
143
372
  unsigned TruncBitWidth = DstTy->getScalarSizeInBits();
144
372
  unsigned OrigBitWidth =
145
372
      CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
146
372
147
372
  if (isa<Constant>(Src))
148
4
    return TruncBitWidth;
149
368
150
368
  Worklist.push_back(Src);
151
368
  InstInfoMap[cast<Instruction>(Src)].ValidBitWidth = TruncBitWidth;
152
368
153
1.53k
  while (!Worklist.empty()) {
154
1.17k
    Value *Curr = Worklist.back();
155
1.17k
156
1.17k
    if (isa<Constant>(Curr)) {
157
0
      Worklist.pop_back();
158
0
      continue;
159
0
    }
160
1.17k
161
1.17k
    // Otherwise, it must be an instruction.
162
1.17k
    auto *I = cast<Instruction>(Curr);
163
1.17k
164
1.17k
    auto &Info = InstInfoMap[I];
165
1.17k
166
1.17k
    SmallVector<Value *, 2> Operands;
167
1.17k
    getRelevantOperands(I, Operands);
168
1.17k
169
1.17k
    if (!Stack.empty() && 
Stack.back() == I802
) {
170
585
      // Already handled all instruction operands, can remove it from both, the
171
585
      // Worklist and the Stack, and update MinBitWidth.
172
585
      Worklist.pop_back();
173
585
      Stack.pop_back();
174
585
      for (auto *Operand : Operands)
175
392
        if (auto *IOp = dyn_cast<Instruction>(Operand))
176
269
          Info.MinBitWidth =
177
269
              std::max(Info.MinBitWidth, InstInfoMap[IOp].MinBitWidth);
178
585
      continue;
179
585
    }
180
585
181
585
    // Add the instruction to the stack before start handling its operands.
182
585
    Stack.push_back(I);
183
585
    unsigned ValidBitWidth = Info.ValidBitWidth;
184
585
185
585
    // Update minimum bit-width before handling its operands. This is required
186
585
    // when the instruction is part of a loop.
187
585
    Info.MinBitWidth = std::max(Info.MinBitWidth, Info.ValidBitWidth);
188
585
189
585
    for (auto *Operand : Operands)
190
392
      if (auto *IOp = dyn_cast<Instruction>(Operand)) {
191
269
        // If we already calculated the minimum bit-width for this valid
192
269
        // bit-width, or for a smaller valid bit-width, then just keep the
193
269
        // answer we already calculated.
194
269
        unsigned IOpBitwidth = InstInfoMap.lookup(IOp).ValidBitWidth;
195
269
        if (IOpBitwidth >= ValidBitWidth)
196
52
          continue;
197
217
        InstInfoMap[IOp].ValidBitWidth = std::max(ValidBitWidth, IOpBitwidth);
198
217
        Worklist.push_back(IOp);
199
217
      }
200
585
  }
201
368
  unsigned MinBitWidth = InstInfoMap.lookup(cast<Instruction>(Src)).MinBitWidth;
202
368
  assert(MinBitWidth >= TruncBitWidth);
203
368
204
368
  if (MinBitWidth > TruncBitWidth) {
205
0
    // In this case reducing expression with vector type might generate a new
206
0
    // vector type, which is not preferable as it might result in generating
207
0
    // sub-optimal code.
208
0
    if (DstTy->isVectorTy())
209
0
      return OrigBitWidth;
210
0
    // Use the smallest integer type in the range [MinBitWidth, OrigBitWidth).
211
0
    Type *Ty = DL.getSmallestLegalIntType(DstTy->getContext(), MinBitWidth);
212
0
    // Update minimum bit-width with the new destination type bit-width if
213
0
    // succeeded to find such, otherwise, with original bit-width.
214
0
    MinBitWidth = Ty ? Ty->getScalarSizeInBits() : OrigBitWidth;
215
368
  } else { // MinBitWidth == TruncBitWidth
216
368
    // In this case the expression can be evaluated with the trunc instruction
217
368
    // destination type, and trunc instruction can be omitted. However, we
218
368
    // should not perform the evaluation if the original type is a legal scalar
219
368
    // type and the target type is illegal.
220
368
    bool FromLegal = MinBitWidth == 1 || DL.isLegalInteger(OrigBitWidth);
221
368
    bool ToLegal = MinBitWidth == 1 || DL.isLegalInteger(MinBitWidth);
222
368
    if (!DstTy->isVectorTy() && 
FromLegal354
&&
!ToLegal342
)
223
98
      return OrigBitWidth;
224
270
  }
225
270
  return MinBitWidth;
226
270
}
227
228
113k
Type *TruncInstCombine::getBestTruncatedType() {
229
113k
  if (!buildTruncExpressionDag())
230
112k
    return nullptr;
231
640
232
640
  // We don't want to duplicate instructions, which isn't profitable. Thus, we
233
640
  // can't shrink something that has multiple users, unless all users are
234
640
  // post-dominated by the trunc instruction, i.e., were visited during the
235
640
  // expression evaluation.
236
640
  unsigned DesiredBitWidth = 0;
237
1.23k
  for (auto Itr : InstInfoMap) {
238
1.23k
    Instruction *I = Itr.first;
239
1.23k
    if (I->hasOneUse())
240
771
      continue;
241
464
    bool IsExtInst = (isa<ZExtInst>(I) || 
isa<SExtInst>(I)378
);
242
464
    for (auto *U : I->users())
243
967
      if (auto *UI = dyn_cast<Instruction>(U))
244
967
        if (UI != CurrentTruncInst && 
!InstInfoMap.count(UI)780
) {
245
540
          if (!IsExtInst)
246
267
            return nullptr;
247
273
          // If this is an extension from the dest type, we can eliminate it,
248
273
          // even if it has multiple users. Thus, update the DesiredBitWidth and
249
273
          // validate all extension instructions agrees on same DesiredBitWidth.
250
273
          unsigned ExtInstBitWidth =
251
273
              I->getOperand(0)->getType()->getScalarSizeInBits();
252
273
          if (DesiredBitWidth && 
DesiredBitWidth != ExtInstBitWidth159
)
253
1
            return nullptr;
254
272
          DesiredBitWidth = ExtInstBitWidth;
255
272
        }
256
464
  }
257
640
258
640
  unsigned OrigBitWidth =
259
372
      CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
260
372
261
372
  // Calculate minimum allowed bit-width allowed for shrinking the currently
262
372
  // visited truncate's operand.
263
372
  unsigned MinBitWidth = getMinBitWidth();
264
372
265
372
  // Check that we can shrink to smaller bit-width than original one and that
266
372
  // it is similar to the DesiredBitWidth is such exists.
267
372
  if (MinBitWidth >= OrigBitWidth ||
268
372
      
(274
DesiredBitWidth274
&&
DesiredBitWidth != MinBitWidth40
))
269
100
    return nullptr;
270
272
271
272
  return IntegerType::get(CurrentTruncInst->getContext(), MinBitWidth);
272
272
}
273
274
/// Given a reduced scalar type \p Ty and a \p V value, return a reduced type
275
/// for \p V, according to its type, if it vector type, return the vector
276
/// version of \p Ty, otherwise return \p Ty.
277
792
static Type *getReducedType(Value *V, Type *Ty) {
278
792
  assert(Ty && !Ty->isVectorTy() && "Expect Scalar Type");
279
792
  if (auto *VTy = dyn_cast<VectorType>(V->getType()))
280
74
    return VectorType::get(Ty, VTy->getNumElements());
281
718
  return Ty;
282
718
}
283
284
520
Value *TruncInstCombine::getReducedOperand(Value *V, Type *SclTy) {
285
520
  Type *Ty = getReducedType(V, SclTy);
286
520
  if (auto *C = dyn_cast<Constant>(V)) {
287
90
    C = ConstantExpr::getIntegerCast(C, Ty, false);
288
90
    // If we got a constantexpr back, try to simplify it with DL info.
289
90
    if (Constant *FoldedC = ConstantFoldConstant(C, DL, &TLI))
290
0
      C = FoldedC;
291
90
    return C;
292
90
  }
293
430
294
430
  auto *I = cast<Instruction>(V);
295
430
  Info Entry = InstInfoMap.lookup(I);
296
430
  assert(Entry.NewValue);
297
430
  return Entry.NewValue;
298
430
}
299
300
272
void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
301
396
  for (auto &Itr : InstInfoMap) { // Forward
302
396
    Instruction *I = Itr.first;
303
396
    TruncInstCombine::Info &NodeInfo = Itr.second;
304
396
305
396
    assert(!NodeInfo.NewValue && "Instruction has been evaluated");
306
396
307
396
    IRBuilder<> Builder(I);
308
396
    Value *Res = nullptr;
309
396
    unsigned Opc = I->getOpcode();
310
396
    switch (Opc) {
311
396
    case Instruction::Trunc:
312
272
    case Instruction::ZExt:
313
272
    case Instruction::SExt: {
314
272
      Type *Ty = getReducedType(I, SclTy);
315
272
      // If the source type of the cast is the type we're trying for then we can
316
272
      // just return the source.  There's no need to insert it because it is not
317
272
      // new.
318
272
      if (I->getOperand(0)->getType() == Ty) {
319
234
        assert(!isa<TruncInst>(I) && "Cannot reach here with TruncInst");
320
234
        NodeInfo.NewValue = I->getOperand(0);
321
234
        continue;
322
234
      }
323
38
      // Otherwise, must be the same type of cast, so just reinsert a new one.
324
38
      // This also handles the case of zext(trunc(x)) -> zext(x).
325
38
      Res = Builder.CreateIntCast(I->getOperand(0), Ty,
326
38
                                  Opc == Instruction::SExt);
327
38
328
38
      // Update Worklist entries with new value if needed.
329
38
      // There are three possible changes to the Worklist:
330
38
      // 1. Update Old-TruncInst -> New-TruncInst.
331
38
      // 2. Remove Old-TruncInst (if New node is not TruncInst).
332
38
      // 3. Add New-TruncInst (if Old node was not TruncInst).
333
38
      auto Entry = find(Worklist, I);
334
38
      if (Entry != Worklist.end()) {
335
10
        if (auto *NewCI = dyn_cast<TruncInst>(Res))
336
8
          *Entry = NewCI;
337
2
        else
338
2
          Worklist.erase(Entry);
339
28
      } else if (auto *NewCI = dyn_cast<TruncInst>(Res))
340
19
          Worklist.push_back(NewCI);
341
38
      break;
342
38
    }
343
124
    case Instruction::Add:
344
124
    case Instruction::Sub:
345
124
    case Instruction::Mul:
346
124
    case Instruction::And:
347
124
    case Instruction::Or:
348
124
    case Instruction::Xor: {
349
124
      Value *LHS = getReducedOperand(I->getOperand(0), SclTy);
350
124
      Value *RHS = getReducedOperand(I->getOperand(1), SclTy);
351
124
      Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS);
352
124
      break;
353
124
    }
354
124
    default:
355
0
      llvm_unreachable("Unhandled instruction");
356
162
    }
357
162
358
162
    NodeInfo.NewValue = Res;
359
162
    if (auto *ResI = dyn_cast<Instruction>(Res))
360
154
      ResI->takeName(I);
361
162
  }
362
272
363
272
  Value *Res = getReducedOperand(CurrentTruncInst->getOperand(0), SclTy);
364
272
  Type *DstTy = CurrentTruncInst->getType();
365
272
  if (Res->getType() != DstTy) {
366
0
    IRBuilder<> Builder(CurrentTruncInst);
367
0
    Res = Builder.CreateIntCast(Res, DstTy, false);
368
0
    if (auto *ResI = dyn_cast<Instruction>(Res))
369
0
      ResI->takeName(CurrentTruncInst);
370
0
  }
371
272
  CurrentTruncInst->replaceAllUsesWith(Res);
372
272
373
272
  // Erase old expression dag, which was replaced by the reduced expression dag.
374
272
  // We iterate backward, which means we visit the instruction before we visit
375
272
  // any of its operands, this way, when we get to the operand, we already
376
272
  // removed the instructions (from the expression dag) that uses it.
377
272
  CurrentTruncInst->eraseFromParent();
378
668
  for (auto I = InstInfoMap.rbegin(), E = InstInfoMap.rend(); I != E; 
++I396
) {
379
396
    // We still need to check that the instruction has no users before we erase
380
396
    // it, because {SExt, ZExt}Inst Instruction might have other users that was
381
396
    // not reduced, in such case, we need to keep that instruction.
382
396
    if (I->first->use_empty())
383
354
      I->first->eraseFromParent();
384
396
  }
385
272
}
386
387
454k
bool TruncInstCombine::run(Function &F) {
388
454k
  bool MadeIRChange = false;
389
454k
390
454k
  // Collect all TruncInst in the function into the Worklist for evaluating.
391
2.59M
  for (auto &BB : F) {
392
2.59M
    // Ignore unreachable basic block.
393
2.59M
    if (!DT.isReachableFromEntry(&BB))
394
4
      continue;
395
2.59M
    for (auto &I : BB)
396
14.3M
      if (auto *CI = dyn_cast<TruncInst>(&I))
397
113k
        Worklist.push_back(CI);
398
2.59M
  }
399
454k
400
454k
  // Process all TruncInst in the Worklist, for each instruction:
401
454k
  //   1. Check if it dominates an eligible expression dag to be reduced.
402
454k
  //   2. Create a reduced expression dag and replace the old one with it.
403
567k
  while (!Worklist.empty()) {
404
113k
    CurrentTruncInst = Worklist.pop_back_val();
405
113k
406
113k
    if (Type *NewDstSclTy = getBestTruncatedType()) {
407
272
      LLVM_DEBUG(
408
272
          dbgs() << "ICE: TruncInstCombine reducing type of expression dag "
409
272
                    "dominated by: "
410
272
                 << CurrentTruncInst << '\n');
411
272
      ReduceExpressionDag(NewDstSclTy);
412
272
      MadeIRChange = true;
413
272
    }
414
113k
  }
415
454k
416
454k
  return MadeIRChange;
417
454k
}