Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/Reassociate.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass reassociates commutative expressions in an order that is designed
11
// to promote better constant propagation, GCSE, LICM, PRE, etc.
12
//
13
// For example: 4 + (x + 5) -> x + (4 + 5)
14
//
15
// In the implementation of this algorithm, constants are assigned rank = 0,
16
// function arguments are rank = 1, and other values are assigned ranks
17
// corresponding to the reverse post order traversal of current function
18
// (starting at 2), which effectively gives values in deep loops higher rank
19
// than values not in loops.
20
//
21
//===----------------------------------------------------------------------===//
22
23
#include "llvm/Transforms/Scalar/Reassociate.h"
24
#include "llvm/ADT/DenseMap.h"
25
#include "llvm/ADT/PostOrderIterator.h"
26
#include "llvm/ADT/STLExtras.h"
27
#include "llvm/ADT/SetVector.h"
28
#include "llvm/ADT/Statistic.h"
29
#include "llvm/Analysis/GlobalsModRef.h"
30
#include "llvm/Analysis/ValueTracking.h"
31
#include "llvm/IR/CFG.h"
32
#include "llvm/IR/Constants.h"
33
#include "llvm/IR/DerivedTypes.h"
34
#include "llvm/IR/Function.h"
35
#include "llvm/IR/IRBuilder.h"
36
#include "llvm/IR/Instructions.h"
37
#include "llvm/IR/IntrinsicInst.h"
38
#include "llvm/IR/PatternMatch.h"
39
#include "llvm/IR/ValueHandle.h"
40
#include "llvm/Pass.h"
41
#include "llvm/Support/Debug.h"
42
#include "llvm/Support/raw_ostream.h"
43
#include "llvm/Transforms/Scalar.h"
44
#include "llvm/Transforms/Utils/Local.h"
45
#include <algorithm>
46
using namespace llvm;
47
using namespace reassociate;
48
49
#define DEBUG_TYPE "reassociate"
50
51
STATISTIC(NumChanged, "Number of insts reassociated");
52
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
53
STATISTIC(NumFactor , "Number of multiplies factored");
54
55
#ifndef NDEBUG
56
/// Print out the expression identified in the Ops list.
57
///
58
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
59
  Module *M = I->getModule();
60
  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
61
       << *Ops[0].Op->getType() << '\t';
62
  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
63
    dbgs() << "[ ";
64
    Ops[i].Op->printAsOperand(dbgs(), false, M);
65
    dbgs() << ", #" << Ops[i].Rank << "] ";
66
  }
67
}
68
#endif
69
70
/// Utility class representing a non-constant Xor-operand. We classify
71
/// non-constant Xor-Operands into two categories:
72
///  C1) The operand is in the form "X & C", where C is a constant and C != ~0
73
///  C2)
74
///    C2.1) The operand is in the form of "X | C", where C is a non-zero
75
///          constant.
76
///    C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
77
///          operand as "E | 0"
78
class llvm::reassociate::XorOpnd {
79
public:
80
  XorOpnd(Value *V);
81
82
42
  bool isInvalid() const { return SymbolicPart == nullptr; }
83
13.5k
  bool isOrExpr() const { return isOr; }
84
125
  Value *getValue() const { return OrigVal; }
85
87.4k
  Value *getSymbolicPart() const { return SymbolicPart; }
86
125k
  unsigned getSymbolicRank() const { return SymbolicRank; }
87
13.0k
  const APInt &getConstPart() const { return ConstPart; }
88
89
22
  void Invalidate() { SymbolicPart = OrigVal = nullptr; }
90
42.2k
  void setSymbolicRank(unsigned R) { SymbolicRank = R; }
91
92
private:
93
  Value *OrigVal;
94
  Value *SymbolicPart;
95
  APInt ConstPart;
96
  unsigned SymbolicRank;
97
  bool isOr;
98
};
99
100
42.2k
XorOpnd::XorOpnd(Value *V) {
101
42.2k
  assert(!isa<ConstantInt>(V) && "No ConstantInt");
102
42.2k
  OrigVal = V;
103
42.2k
  Instruction *I = dyn_cast<Instruction>(V);
104
42.2k
  SymbolicRank = 0;
105
42.2k
106
42.2k
  if (
I && 42.2k
(I->getOpcode() == Instruction::Or ||
107
42.2k
            
I->getOpcode() == Instruction::And39.4k
)) {
108
3.58k
    Value *V0 = I->getOperand(0);
109
3.58k
    Value *V1 = I->getOperand(1);
110
3.58k
    const APInt *C;
111
3.58k
    if (match(V0, PatternMatch::m_APInt(C)))
112
0
      std::swap(V0, V1);
113
3.58k
114
3.58k
    if (
match(V1, PatternMatch::m_APInt(C))3.58k
) {
115
1.59k
      ConstPart = *C;
116
1.59k
      SymbolicPart = V0;
117
1.59k
      isOr = (I->getOpcode() == Instruction::Or);
118
1.59k
      return;
119
1.59k
    }
120
40.6k
  }
121
40.6k
122
40.6k
  // view the operand as "V | 0"
123
40.6k
  SymbolicPart = V;
124
40.6k
  ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
125
40.6k
  isOr = true;
126
40.6k
}
127
128
/// Return true if V is an instruction of the specified opcode and if it
129
/// only has one use.
130
3.17M
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
131
3.17M
  if (
V->hasOneUse() && 3.17M
isa<Instruction>(V)1.55M
&&
132
1.43M
      cast<Instruction>(V)->getOpcode() == Opcode &&
133
545k
      (!isa<FPMathOperator>(V) ||
134
358
       cast<Instruction>(V)->hasUnsafeAlgebra()))
135
545k
    return cast<BinaryOperator>(V);
136
2.63M
  return nullptr;
137
2.63M
}
138
139
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
140
2.60M
                                        unsigned Opcode2) {
141
2.60M
  if (
V->hasOneUse() && 2.60M
isa<Instruction>(V)1.09M
&&
142
986k
      (cast<Instruction>(V)->getOpcode() == Opcode1 ||
143
986k
       cast<Instruction>(V)->getOpcode() == Opcode2) &&
144
274k
      (!isa<FPMathOperator>(V) ||
145
279
       cast<Instruction>(V)->hasUnsafeAlgebra()))
146
274k
    return cast<BinaryOperator>(V);
147
2.32M
  return nullptr;
148
2.32M
}
149
150
void ReassociatePass::BuildRankMap(Function &F,
151
516k
                                   ReversePostOrderTraversal<Function*> &RPOT) {
152
516k
  unsigned Rank = 2;
153
516k
154
516k
  // Assign distinct ranks to function arguments.
155
1.17M
  for (auto &Arg : F.args()) {
156
1.17M
    ValueRankMap[&Arg] = ++Rank;
157
1.17M
    DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
158
1.17M
                 << "\n");
159
1.17M
  }
160
516k
161
516k
  // Traverse basic blocks in ReversePostOrder
162
3.62M
  for (BasicBlock *BB : RPOT) {
163
3.62M
    unsigned BBRank = RankMap[BB] = ++Rank << 16;
164
3.62M
165
3.62M
    // Walk the basic block, adding precomputed ranks for any instructions that
166
3.62M
    // we cannot move.  This ensures that the ranks for these instructions are
167
3.62M
    // all different in the block.
168
3.62M
    for (Instruction &I : *BB)
169
18.7M
      
if (18.7M
mayBeMemoryDependent(I)18.7M
)
170
10.8M
        ValueRankMap[&I] = ++BBRank;
171
3.62M
  }
172
516k
}
173
174
5.76M
unsigned ReassociatePass::getRank(Value *V) {
175
5.76M
  Instruction *I = dyn_cast<Instruction>(V);
176
5.76M
  if (
!I5.76M
) {
177
1.58M
    if (
isa<Argument>(V)1.58M
)
return ValueRankMap[V]256k
; // Function argument.
178
1.32M
    return 0;  // Otherwise it's a global or constant, rank 0.
179
1.32M
  }
180
4.18M
181
4.18M
  
if (unsigned 4.18M
Rank4.18M
= ValueRankMap[I])
182
3.01M
    return Rank;    // Rank already known?
183
1.16M
184
1.16M
  // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
185
1.16M
  // we can reassociate expressions for code motion!  Since we do not recurse
186
1.16M
  // for PHI nodes, we cannot have infinite recursion here, because there
187
1.16M
  // cannot be loops in the value graph that do not go through PHI nodes.
188
1.16M
  unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
189
1.16M
  for (unsigned i = 0, e = I->getNumOperands();
190
3.34M
       
i != e && 3.34M
Rank != MaxRank2.17M
;
++i2.17M
)
191
2.17M
    Rank = std::max(Rank, getRank(I->getOperand(i)));
192
1.16M
193
1.16M
  // If this is a not or neg instruction, do not count it for rank.  This
194
1.16M
  // assures us that X and ~X will have the same rank.
195
1.16M
  if  (
!BinaryOperator::isNot(I) && 1.16M
!BinaryOperator::isNeg(I)1.15M
&&
196
1.13M
       !BinaryOperator::isFNeg(I))
197
1.13M
    ++Rank;
198
1.16M
199
1.16M
  DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
200
5.76M
201
5.76M
  return ValueRankMap[I] = Rank;
202
5.76M
}
203
204
// Canonicalize constants to RHS.  Otherwise, sort the operands by rank.
205
1.35M
void ReassociatePass::canonicalizeOperands(Instruction *I) {
206
1.35M
  assert(isa<BinaryOperator>(I) && "Expected binary operator.");
207
1.35M
  assert(I->isCommutative() && "Expected commutative operator.");
208
1.35M
209
1.35M
  Value *LHS = I->getOperand(0);
210
1.35M
  Value *RHS = I->getOperand(1);
211
1.35M
  if (
LHS == RHS || 1.35M
isa<Constant>(RHS)1.34M
)
212
722k
    return;
213
631k
  
if (631k
isa<Constant>(LHS) || 631k
getRank(RHS) < getRank(LHS)631k
)
214
290k
    cast<BinaryOperator>(I)->swapOperands();
215
1.35M
}
216
217
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
218
14.3k
                                 Instruction *InsertBefore, Value *FlagsOp) {
219
14.3k
  if (S1->getType()->isIntOrIntVectorTy())
220
14.3k
    return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
221
48
  else {
222
48
    BinaryOperator *Res =
223
48
        BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
224
48
    Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
225
48
    return Res;
226
48
  }
227
0
}
228
229
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
230
2.58k
                                 Instruction *InsertBefore, Value *FlagsOp) {
231
2.58k
  if (S1->getType()->isIntOrIntVectorTy())
232
2.53k
    return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
233
52
  else {
234
52
    BinaryOperator *Res =
235
52
      BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
236
52
    Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
237
52
    return Res;
238
52
  }
239
0
}
240
241
static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
242
9.53k
                                 Instruction *InsertBefore, Value *FlagsOp) {
243
9.53k
  if (S1->getType()->isIntOrIntVectorTy())
244
9.51k
    return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
245
26
  else {
246
26
    BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
247
26
    Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
248
26
    return Res;
249
26
  }
250
0
}
251
252
/// Replace 0-X with X*-1.
253
1.27k
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
254
1.27k
  Type *Ty = Neg->getType();
255
1.27k
  Constant *NegOne = Ty->isIntOrIntVectorTy() ?
256
1.27k
    
ConstantInt::getAllOnesValue(Ty)1.25k
:
ConstantFP::get(Ty, -1.0)17
;
257
1.27k
258
1.27k
  BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
259
1.27k
  Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
260
1.27k
  Res->takeName(Neg);
261
1.27k
  Neg->replaceAllUsesWith(Res);
262
1.27k
  Res->setDebugLoc(Neg->getDebugLoc());
263
1.27k
  return Res;
264
1.27k
}
265
266
/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
267
/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
268
/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
269
/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
270
/// even x in Bitwidth-bit arithmetic.
271
4.43k
static unsigned CarmichaelShift(unsigned Bitwidth) {
272
4.43k
  if (Bitwidth < 3)
273
0
    return Bitwidth - 1;
274
4.43k
  return Bitwidth - 2;
275
4.43k
}
276
277
/// Add the extra weight 'RHS' to the existing weight 'LHS',
278
/// reducing the combined weight using any special properties of the operation.
279
/// The existing weight LHS represents the computation X op X op ... op X where
280
/// X occurs LHS times.  The combined weight represents  X op X op ... op X with
281
/// X occurring LHS + RHS times.  If op is "Xor" for example then the combined
282
/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
283
/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
284
8.57k
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
285
8.57k
  // If we were working with infinite precision arithmetic then the combined
286
8.57k
  // weight would be LHS + RHS.  But we are using finite precision arithmetic,
287
8.57k
  // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
288
8.57k
  // for nilpotent operations and addition, but not for idempotent operations
289
8.57k
  // and multiplication), so it is important to correctly reduce the combined
290
8.57k
  // weight back into range if wrapping would be wrong.
291
8.57k
292
8.57k
  // If RHS is zero then the weight didn't change.
293
8.57k
  if (RHS.isMinValue())
294
0
    return;
295
8.57k
  // If LHS is zero then the combined weight is RHS.
296
8.57k
  
if (8.57k
LHS.isMinValue()8.57k
) {
297
2
    LHS = RHS;
298
2
    return;
299
2
  }
300
8.57k
  // From this point on we know that neither LHS nor RHS is zero.
301
8.57k
302
8.57k
  
if (8.57k
Instruction::isIdempotent(Opcode)8.57k
) {
303
11
    // Idempotent means X op X === X, so any non-zero weight is equivalent to a
304
11
    // weight of 1.  Keeping weights at zero or one also means that wrapping is
305
11
    // not a problem.
306
11
    assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
307
11
    return; // Return a weight of 1.
308
11
  }
309
8.56k
  
if (8.56k
Instruction::isNilpotent(Opcode)8.56k
) {
310
6
    // Nilpotent means X op X === 0, so reduce weights modulo 2.
311
6
    assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
312
6
    LHS = 0; // 1 + 1 === 0 modulo 2.
313
6
    return;
314
6
  }
315
8.55k
  
if (8.55k
Opcode == Instruction::Add || 8.55k
Opcode == Instruction::FAdd4.46k
) {
316
4.12k
    // TODO: Reduce the weight by exploiting nsw/nuw?
317
4.12k
    LHS += RHS;
318
4.12k
    return;
319
4.12k
  }
320
4.43k
321
8.55k
  assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
322
4.43k
         "Unknown associative operation!");
323
4.43k
  unsigned Bitwidth = LHS.getBitWidth();
324
4.43k
  // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
325
4.43k
  // can be replaced with W-CM.  That's because x^W=x^(W-CM) for every Bitwidth
326
4.43k
  // bit number x, since either x is odd in which case x^CM = 1, or x is even in
327
4.43k
  // which case both x^W and x^(W - CM) are zero.  By subtracting off multiples
328
4.43k
  // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
329
4.43k
  // which by a happy accident means that they can always be represented using
330
4.43k
  // Bitwidth bits.
331
4.43k
  // TODO: Reduce the weight by exploiting nsw/nuw?  (Could do much better than
332
4.43k
  // the Carmichael number).
333
4.43k
  if (
Bitwidth > 34.43k
) {
334
4.41k
    /// CM - The value of Carmichael's lambda function.
335
4.41k
    APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
336
4.41k
    // Any weight W >= Threshold can be replaced with W - CM.
337
4.41k
    APInt Threshold = CM + Bitwidth;
338
4.41k
    assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
339
4.41k
    // For Bitwidth 4 or more the following sum does not overflow.
340
4.41k
    LHS += RHS;
341
4.42k
    while (LHS.uge(Threshold))
342
12
      LHS -= CM;
343
4.43k
  } else {
344
23
    // To avoid problems with overflow do everything the same as above but using
345
23
    // a larger type.
346
23
    unsigned CM = 1U << CarmichaelShift(Bitwidth);
347
23
    unsigned Threshold = CM + Bitwidth;
348
23
    assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
349
23
           "Weights not reduced!");
350
23
    unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
351
27
    while (Total >= Threshold)
352
4
      Total -= CM;
353
23
    LHS = Total;
354
23
  }
355
8.57k
}
356
357
typedef std::pair<Value*, APInt> RepeatedValue;
358
359
/// Given an associative binary expression, return the leaf
360
/// nodes in Ops along with their weights (how many times the leaf occurs).  The
361
/// original expression is the same as
362
///   (Ops[0].first op Ops[0].first op ... Ops[0].first)  <- Ops[0].second times
363
/// op
364
///   (Ops[1].first op Ops[1].first op ... Ops[1].first)  <- Ops[1].second times
365
/// op
366
///   ...
367
/// op
368
///   (Ops[N].first op Ops[N].first op ... Ops[N].first)  <- Ops[N].second times
369
///
370
/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
371
///
372
/// This routine may modify the function, in which case it returns 'true'.  The
373
/// changes it makes may well be destructive, changing the value computed by 'I'
374
/// to something completely different.  Thus if the routine returns 'true' then
375
/// you MUST either replace I with a new expression computed from the Ops array,
376
/// or use RewriteExprTree to put the values back in.
377
///
378
/// A leaf node is either not a binary operation of the same kind as the root
379
/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
380
/// opcode), or is the same kind of binary operator but has a use which either
381
/// does not belong to the expression, or does belong to the expression but is
382
/// a leaf node.  Every leaf node has at least one use that is a non-leaf node
383
/// of the expression, while for non-leaf nodes (except for the root 'I') every
384
/// use is a non-leaf node of the expression.
385
///
386
/// For example:
387
///           expression graph        node names
388
///
389
///                     +        |        I
390
///                    / \       |
391
///                   +   +      |      A,  B
392
///                  / \ / \     |
393
///                 *   +   *    |    C,  D,  E
394
///                / \ / \ / \   |
395
///                   +   *      |      F,  G
396
///
397
/// The leaf nodes are C, E, F and G.  The Ops array will contain (maybe not in
398
/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
399
///
400
/// The expression is maximal: if some instruction is a binary operator of the
401
/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
402
/// then the instruction also belongs to the expression, is not a leaf node of
403
/// it, and its operands also belong to the expression (but may be leaf nodes).
404
///
405
/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
406
/// order to ensure that every non-root node in the expression has *exactly one*
407
/// use by a non-leaf node of the expression.  This destruction means that the
408
/// caller MUST either replace 'I' with a new expression or use something like
409
/// RewriteExprTree to put the values back in if the routine indicates that it
410
/// made a change by returning 'true'.
411
///
412
/// In the above example either the right operand of A or the left operand of B
413
/// will be replaced by undef.  If it is B's operand then this gives:
414
///
415
///                     +        |        I
416
///                    / \       |
417
///                   +   +      |      A,  B - operand of B replaced with undef
418
///                  / \   \     |
419
///                 *   +   *    |    C,  D,  E
420
///                / \ / \ / \   |
421
///                   +   *      |      F,  G
422
///
423
/// Note that such undef operands can only be reached by passing through 'I'.
424
/// For example, if you visit operands recursively starting from a leaf node
425
/// then you will never see such an undef operand unless you get back to 'I',
426
/// which requires passing through a phi node.
427
///
428
/// Note that this routine may also mutate binary operators of the wrong type
429
/// that have all uses inside the expression (i.e. only used by non-leaf nodes
430
/// of the expression) if it can turn them into binary operators of the right
431
/// type and thus make the expression bigger.
432
433
static bool LinearizeExprTree(BinaryOperator *I,
434
1.01M
                              SmallVectorImpl<RepeatedValue> &Ops) {
435
1.01M
  DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
436
1.01M
  unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
437
1.01M
  unsigned Opcode = I->getOpcode();
438
1.01M
  assert(I->isAssociative() && I->isCommutative() &&
439
1.01M
         "Expected an associative and commutative operation!");
440
1.01M
441
1.01M
  // Visit all operands of the expression, keeping track of their weight (the
442
1.01M
  // number of paths from the expression root to the operand, or if you like
443
1.01M
  // the number of times that operand occurs in the linearized expression).
444
1.01M
  // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
445
1.01M
  // while A has weight two.
446
1.01M
447
1.01M
  // Worklist of non-leaf nodes (their operands are in the expression too) along
448
1.01M
  // with their weights, representing a certain number of paths to the operator.
449
1.01M
  // If an operator occurs in the worklist multiple times then we found multiple
450
1.01M
  // ways to get to it.
451
1.01M
  SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
452
1.01M
  Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
453
1.01M
  bool Changed = false;
454
1.01M
455
1.01M
  // Leaves of the expression are values that either aren't the right kind of
456
1.01M
  // operation (eg: a constant, or a multiply in an add tree), or are, but have
457
1.01M
  // some uses that are not inside the expression.  For example, in I = X + X,
458
1.01M
  // X = A + B, the value X has two uses (by I) that are in the expression.  If
459
1.01M
  // X has any other uses, for example in a return instruction, then we consider
460
1.01M
  // X to be a leaf, and won't analyze it further.  When we first visit a value,
461
1.01M
  // if it has more than one use then at first we conservatively consider it to
462
1.01M
  // be a leaf.  Later, as the expression is explored, we may discover some more
463
1.01M
  // uses of the value from inside the expression.  If all uses turn out to be
464
1.01M
  // from within the expression (and the value is a binary operator of the right
465
1.01M
  // kind) then the value is no longer considered to be a leaf, and its operands
466
1.01M
  // are explored.
467
1.01M
468
1.01M
  // Leaves - Keeps track of the set of putative leaves as well as the number of
469
1.01M
  // paths to each leaf seen so far.
470
1.01M
  typedef DenseMap<Value*, APInt> LeafMap;
471
1.01M
  LeafMap Leaves; // Leaf -> Total weight so far.
472
1.01M
  SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
473
1.01M
474
#ifndef NDEBUG
475
  SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
476
#endif
477
2.29M
  while (
!Worklist.empty()2.29M
) {
478
1.27M
    std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
479
1.27M
    I = P.first; // We examine the operands of this binary operator.
480
1.27M
481
3.83M
    for (unsigned OpIdx = 0; 
OpIdx < 23.83M
;
++OpIdx2.55M
) { // Visit operands.
482
2.55M
      Value *Op = I->getOperand(OpIdx);
483
2.55M
      APInt Weight = P.second; // Number of paths to this operand.
484
2.55M
      DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
485
2.55M
      assert(!Op->use_empty() && "No uses, so how did we get to it?!");
486
2.55M
487
2.55M
      // If this is a binary operation of the right kind with only one use then
488
2.55M
      // add its operands to the expression.
489
2.55M
      if (BinaryOperator *
BO2.55M
= isReassociableOp(Op, Opcode)) {
490
267k
        assert(Visited.insert(Op).second && "Not first visit!");
491
267k
        DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
492
267k
        Worklist.push_back(std::make_pair(BO, Weight));
493
267k
        continue;
494
267k
      }
495
2.29M
496
2.29M
      // Appears to be a leaf.  Is the operand already in the set of leaves?
497
2.29M
      LeafMap::iterator It = Leaves.find(Op);
498
2.29M
      if (
It == Leaves.end()2.29M
) {
499
2.28M
        // Not in the leaf map.  Must be the first time we saw this operand.
500
2.28M
        assert(Visited.insert(Op).second && "Not first visit!");
501
2.28M
        if (
!Op->hasOneUse()2.28M
) {
502
1.43M
          // This value has uses not accounted for by the expression, so it is
503
1.43M
          // not safe to modify.  Mark it as being a leaf.
504
1.43M
          DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
505
1.43M
          LeafOrder.push_back(Op);
506
1.43M
          Leaves[Op] = Weight;
507
1.43M
          continue;
508
1.43M
        }
509
2.29M
        // No uses outside the expression, try morphing it.
510
8.57k
      } else {
511
8.57k
        // Already in the leaf map.
512
8.57k
        assert(It != Leaves.end() && Visited.count(Op) &&
513
8.57k
               "In leaf map but not visited!");
514
8.57k
515
8.57k
        // Update the number of paths to the leaf.
516
8.57k
        IncorporateWeight(It->second, Weight, Opcode);
517
8.57k
518
#if 0   // TODO: Re-enable once PR13021 is fixed.
519
        // The leaf already has one use from inside the expression.  As we want
520
        // exactly one such use, drop this new use of the leaf.
521
        assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
522
        I->setOperand(OpIdx, UndefValue::get(I->getType()));
523
        Changed = true;
524
525
        // If the leaf is a binary operation of the right kind and we now see
526
        // that its multiple original uses were in fact all by nodes belonging
527
        // to the expression, then no longer consider it to be a leaf and add
528
        // its operands to the expression.
529
        if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
530
          DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
531
          Worklist.push_back(std::make_pair(BO, It->second));
532
          Leaves.erase(It);
533
          continue;
534
        }
535
#endif
536
537
8.57k
        // If we still have uses that are not accounted for by the expression
538
8.57k
        // then it is not safe to modify the value.
539
8.57k
        if (!Op->hasOneUse())
540
8.57k
          continue;
541
1
542
1
        // No uses outside the expression, try morphing it.
543
1
        Weight = It->second;
544
1
        Leaves.erase(It); // Since the value may be morphed below.
545
1
      }
546
2.29M
547
2.29M
      // At this point we have a value which, first of all, is not a binary
548
2.29M
      // expression of the right kind, and secondly, is only used inside the
549
2.29M
      // expression.  This means that it can safely be modified.  See if we
550
2.29M
      // can usefully morph it into an expression of the right kind.
551
844k
      assert((!isa<Instruction>(Op) ||
552
844k
              cast<Instruction>(Op)->getOpcode() != Opcode
553
844k
              || (isa<FPMathOperator>(Op) &&
554
844k
                  !cast<Instruction>(Op)->hasUnsafeAlgebra())) &&
555
844k
             "Should have been handled above!");
556
844k
      assert(Op->hasOneUse() && "Has uses outside the expression tree!");
557
844k
558
844k
      // If this is a multiply expression, turn any internal negations into
559
844k
      // multiplies by -1 so they can be reassociated.
560
844k
      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
561
273k
        
if (273k
(Opcode == Instruction::Mul && 273k
BinaryOperator::isNeg(BO)16.4k
) ||
562
273k
            
(Opcode == Instruction::FMul && 273k
BinaryOperator::isFNeg(BO)38
)) {
563
120
          DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
564
120
          BO = LowerNegateToMultiply(BO);
565
120
          DEBUG(dbgs() << *BO << '\n');
566
273k
          Worklist.push_back(std::make_pair(BO, Weight));
567
273k
          Changed = true;
568
273k
          continue;
569
273k
        }
570
844k
571
844k
      // Failed to morph into an expression of the right type.  This really is
572
844k
      // a leaf.
573
844k
      
DEBUG844k
(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
574
844k
      assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
575
844k
      LeafOrder.push_back(Op);
576
844k
      Leaves[Op] = Weight;
577
844k
    }
578
1.27M
  }
579
1.01M
580
1.01M
  // The leaves, repeated according to their weights, represent the linearized
581
1.01M
  // form of the expression.
582
3.29M
  for (unsigned i = 0, e = LeafOrder.size(); 
i != e3.29M
;
++i2.28M
) {
583
2.28M
    Value *V = LeafOrder[i];
584
2.28M
    LeafMap::iterator It = Leaves.find(V);
585
2.28M
    if (It == Leaves.end())
586
2.28M
      // Node initially thought to be a leaf wasn't.
587
0
      continue;
588
2.28M
    assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
589
2.28M
    APInt Weight = It->second;
590
2.28M
    if (Weight.isMinValue())
591
2.28M
      // Leaf already output or weight reduction eliminated it.
592
5
      continue;
593
2.28M
    // Ensure the leaf is only output once.
594
2.28M
    It->second = 0;
595
2.28M
    Ops.push_back(std::make_pair(V, Weight));
596
2.28M
  }
597
1.01M
598
1.01M
  // For nilpotent operations or addition there may be no operands, for example
599
1.01M
  // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
600
1.01M
  // in both cases the weight reduces to 0 causing the value to be skipped.
601
1.01M
  if (
Ops.empty()1.01M
) {
602
3
    Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
603
3
    assert(Identity && "Associative operation without identity!");
604
3
    Ops.emplace_back(Identity, APInt(Bitwidth, 1));
605
3
  }
606
1.01M
607
1.01M
  return Changed;
608
1.01M
}
609
610
/// Now that the operands for this expression tree are
611
/// linearized and optimized, emit them in-order.
612
void ReassociatePass::RewriteExprTree(BinaryOperator *I,
613
1.00M
                                      SmallVectorImpl<ValueEntry> &Ops) {
614
1.00M
  assert(Ops.size() > 1 && "Single values should be used directly!");
615
1.00M
616
1.00M
  // Since our optimizations should never increase the number of operations, the
617
1.00M
  // new expression can usually be written reusing the existing binary operators
618
1.00M
  // from the original expression tree, without creating any new instructions,
619
1.00M
  // though the rewritten expression may have a completely different topology.
620
1.00M
  // We take care to not change anything if the new expression will be the same
621
1.00M
  // as the original.  If more than trivial changes (like commuting operands)
622
1.00M
  // were made then we are obliged to clear out any optional subclass data like
623
1.00M
  // nsw flags.
624
1.00M
625
1.00M
  /// NodesToRewrite - Nodes from the original expression available for writing
626
1.00M
  /// the new expression into.
627
1.00M
  SmallVector<BinaryOperator*, 8> NodesToRewrite;
628
1.00M
  unsigned Opcode = I->getOpcode();
629
1.00M
  BinaryOperator *Op = I;
630
1.00M
631
1.00M
  /// NotRewritable - The operands being written will be the leaves of the new
632
1.00M
  /// expression and must not be used as inner nodes (via NodesToRewrite) by
633
1.00M
  /// mistake.  Inner nodes are always reassociable, and usually leaves are not
634
1.00M
  /// (if they were they would have been incorporated into the expression and so
635
1.00M
  /// would not be leaves), so most of the time there is no danger of this.  But
636
1.00M
  /// in rare cases a leaf may become reassociable if an optimization kills uses
637
1.00M
  /// of it, or it may momentarily become reassociable during rewriting (below)
638
1.00M
  /// due it being removed as an operand of one of its uses.  Ensure that misuse
639
1.00M
  /// of leaf nodes as inner nodes cannot occur by remembering all of the future
640
1.00M
  /// leaves and refusing to reuse any of them as inner nodes.
641
1.00M
  SmallPtrSet<Value*, 8> NotRewritable;
642
3.27M
  for (unsigned i = 0, e = Ops.size(); 
i != e3.27M
;
++i2.27M
)
643
2.27M
    NotRewritable.insert(Ops[i].Op);
644
1.00M
645
1.00M
  // ExpressionChanged - Non-null if the rewritten expression differs from the
646
1.00M
  // original in some non-trivial way, requiring the clearing of optional flags.
647
1.00M
  // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
648
1.00M
  BinaryOperator *ExpressionChanged = nullptr;
649
1.00M
  for (unsigned i = 0; ; 
++i256k
) {
650
1.26M
    // The last operation (which comes earliest in the IR) is special as both
651
1.26M
    // operands will come from Ops, rather than just one with the other being
652
1.26M
    // a subexpression.
653
1.26M
    if (
i+2 == Ops.size()1.26M
) {
654
1.00M
      Value *NewLHS = Ops[i].Op;
655
1.00M
      Value *NewRHS = Ops[i+1].Op;
656
1.00M
      Value *OldLHS = Op->getOperand(0);
657
1.00M
      Value *OldRHS = Op->getOperand(1);
658
1.00M
659
1.00M
      if (
NewLHS == OldLHS && 1.00M
NewRHS == OldRHS764k
)
660
1.00M
        // Nothing changed, leave it alone.
661
745k
        break;
662
261k
663
261k
      
if (261k
NewLHS == OldRHS && 261k
NewRHS == OldLHS238k
) {
664
236k
        // The order of the operands was reversed.  Swap them.
665
236k
        DEBUG(dbgs() << "RA: " << *Op << '\n');
666
236k
        Op->swapOperands();
667
236k
        DEBUG(dbgs() << "TO: " << *Op << '\n');
668
236k
        MadeChange = true;
669
236k
        ++NumChanged;
670
236k
        break;
671
236k
      }
672
25.7k
673
25.7k
      // The new operation differs non-trivially from the original. Overwrite
674
25.7k
      // the old operands with the new ones.
675
25.7k
      
DEBUG25.7k
(dbgs() << "RA: " << *Op << '\n');
676
25.7k
      if (
NewLHS != OldLHS25.7k
) {
677
7.00k
        BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
678
7.00k
        if (
BO && 7.00k
!NotRewritable.count(BO)2.31k
)
679
2.31k
          NodesToRewrite.push_back(BO);
680
7.00k
        Op->setOperand(0, NewLHS);
681
7.00k
      }
682
25.7k
      if (
NewRHS != OldRHS25.7k
) {
683
24.3k
        BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
684
24.3k
        if (
BO && 24.3k
!NotRewritable.count(BO)175
)
685
175
          NodesToRewrite.push_back(BO);
686
24.3k
        Op->setOperand(1, NewRHS);
687
24.3k
      }
688
25.7k
      DEBUG(dbgs() << "TO: " << *Op << '\n');
689
1.00M
690
1.00M
      ExpressionChanged = Op;
691
1.00M
      MadeChange = true;
692
1.00M
      ++NumChanged;
693
1.00M
694
1.00M
      break;
695
1.00M
    }
696
256k
697
256k
    // Not the last operation.  The left-hand side will be a sub-expression
698
256k
    // while the right-hand side will be the current element of Ops.
699
256k
    Value *NewRHS = Ops[i].Op;
700
256k
    if (
NewRHS != Op->getOperand(1)256k
) {
701
91.3k
      DEBUG(dbgs() << "RA: " << *Op << '\n');
702
91.3k
      if (
NewRHS == Op->getOperand(0)91.3k
) {
703
57.7k
        // The new right-hand side was already present as the left operand.  If
704
57.7k
        // we are lucky then swapping the operands will sort out both of them.
705
57.7k
        Op->swapOperands();
706
91.3k
      } else {
707
33.5k
        // Overwrite with the new right-hand side.
708
33.5k
        BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
709
33.5k
        if (
BO && 33.5k
!NotRewritable.count(BO)6.58k
)
710
6.58k
          NodesToRewrite.push_back(BO);
711
33.5k
        Op->setOperand(1, NewRHS);
712
33.5k
        ExpressionChanged = Op;
713
33.5k
      }
714
91.3k
      DEBUG(dbgs() << "TO: " << *Op << '\n');
715
91.3k
      MadeChange = true;
716
91.3k
      ++NumChanged;
717
91.3k
    }
718
256k
719
256k
    // Now deal with the left-hand side.  If this is already an operation node
720
256k
    // from the original expression then just rewrite the rest of the expression
721
256k
    // into it.
722
256k
    BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
723
256k
    if (
BO && 256k
!NotRewritable.count(BO)250k
) {
724
250k
      Op = BO;
725
250k
      continue;
726
250k
    }
727
6.53k
728
6.53k
    // Otherwise, grab a spare node from the original expression and use that as
729
6.53k
    // the left-hand side.  If there are no nodes left then the optimizers made
730
6.53k
    // an expression with more nodes than the original!  This usually means that
731
6.53k
    // they did something stupid but it might mean that the problem was just too
732
6.53k
    // hard (finding the mimimal number of multiplications needed to realize a
733
6.53k
    // multiplication expression is NP-complete).  Whatever the reason, smart or
734
6.53k
    // stupid, create a new node if there are none left.
735
6.53k
    BinaryOperator *NewOp;
736
6.53k
    if (
NodesToRewrite.empty()6.53k
) {
737
0
      Constant *Undef = UndefValue::get(I->getType());
738
0
      NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
739
0
                                     Undef, Undef, "", I);
740
0
      if (NewOp->getType()->isFPOrFPVectorTy())
741
0
        NewOp->setFastMathFlags(I->getFastMathFlags());
742
6.53k
    } else {
743
6.53k
      NewOp = NodesToRewrite.pop_back_val();
744
6.53k
    }
745
6.53k
746
6.53k
    DEBUG(dbgs() << "RA: " << *Op << '\n');
747
6.53k
    Op->setOperand(0, NewOp);
748
6.53k
    DEBUG(dbgs() << "TO: " << *Op << '\n');
749
1.26M
    ExpressionChanged = Op;
750
1.26M
    MadeChange = true;
751
1.26M
    ++NumChanged;
752
1.26M
    Op = NewOp;
753
1.26M
  }
754
1.00M
755
1.00M
  // If the expression changed non-trivially then clear out all subclass data
756
1.00M
  // starting from the operator specified in ExpressionChanged, and compactify
757
1.00M
  // the operators to just before the expression root to guarantee that the
758
1.00M
  // expression tree is dominated by all of Ops.
759
1.00M
  if (ExpressionChanged)
760
26.5k
    
do 26.5k
{
761
61.1k
      // Preserve FastMathFlags.
762
61.1k
      if (
isa<FPMathOperator>(I)61.1k
) {
763
130
        FastMathFlags Flags = I->getFastMathFlags();
764
130
        ExpressionChanged->clearSubclassOptionalData();
765
130
        ExpressionChanged->setFastMathFlags(Flags);
766
130
      } else
767
61.0k
        ExpressionChanged->clearSubclassOptionalData();
768
61.1k
769
61.1k
      if (ExpressionChanged == I)
770
26.5k
        break;
771
34.6k
      ExpressionChanged->moveBefore(I);
772
34.6k
      ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
773
34.6k
    } while (1);
774
1.00M
775
1.00M
  // Throw away any left over nodes from the original expression.
776
1.00M
  for (unsigned i = 0, e = NodesToRewrite.size(); 
i != e1.00M
;
++i2.54k
)
777
2.54k
    RedoInsts.insert(NodesToRewrite[i]);
778
1.00M
}
779
780
/// Insert instructions before the instruction pointed to by BI,
781
/// that computes the negative version of the value specified.  The negative
782
/// version of the value is returned, and BI is left pointing at the instruction
783
/// that should be processed next by the reassociation pass.
784
/// Also add intermediate instructions to the redo list that are modified while
785
/// pushing the negates through adds.  These will be revisited to see if
786
/// additional opportunities have been exposed.
787
static Value *NegateValue(Value *V, Instruction *BI,
788
11.4k
                          SetVector<AssertingVH<Instruction>> &ToRedo) {
789
11.4k
  if (Constant *
C11.4k
= dyn_cast<Constant>(V)) {
790
163
    if (
C->getType()->isFPOrFPVectorTy()163
) {
791
7
      return ConstantExpr::getFNeg(C);
792
7
    }
793
156
    return ConstantExpr::getNeg(C);
794
156
  }
795
11.2k
796
11.2k
797
11.2k
  // We are trying to expose opportunity for reassociation.  One of the things
798
11.2k
  // that we want to do to achieve this is to push a negation as deep into an
799
11.2k
  // expression chain as possible, to expose the add instructions.  In practice,
800
11.2k
  // this means that we turn this:
801
11.2k
  //   X = -(A+12+C+D)   into    X = -A + -12 + -C + -D = -12 + -A + -C + -D
802
11.2k
  // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
803
11.2k
  // the constants.  We assume that instcombine will clean up the mess later if
804
11.2k
  // we introduce tons of unnecessary negation instructions.
805
11.2k
  //
806
11.2k
  
if (BinaryOperator *11.2k
I11.2k
=
807
1.04k
          isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
808
1.04k
    // Push the negates through the add.
809
1.04k
    I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
810
1.04k
    I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
811
1.04k
    if (
I->getOpcode() == Instruction::Add1.04k
) {
812
1.03k
      I->setHasNoUnsignedWrap(false);
813
1.03k
      I->setHasNoSignedWrap(false);
814
1.03k
    }
815
1.04k
816
1.04k
    // We must move the add instruction here, because the neg instructions do
817
1.04k
    // not dominate the old add instruction in general.  By moving it, we are
818
1.04k
    // assured that the neg instructions we just inserted dominate the
819
1.04k
    // instruction we are about to insert after them.
820
1.04k
    //
821
1.04k
    I->moveBefore(BI);
822
1.04k
    I->setName(I->getName()+".neg");
823
1.04k
824
1.04k
    // Add the intermediate negates to the redo list as processing them later
825
1.04k
    // could expose more reassociating opportunities.
826
1.04k
    ToRedo.insert(I);
827
1.04k
    return I;
828
1.04k
  }
829
10.2k
830
10.2k
  // Okay, we need to materialize a negated version of V with an instruction.
831
10.2k
  // Scan the use lists of V to see if we have one already.
832
10.2k
  
for (User *U : V->users()) 10.2k
{
833
18.5k
    if (
!BinaryOperator::isNeg(U) && 18.5k
!BinaryOperator::isFNeg(U)17.8k
)
834
17.8k
      continue;
835
716
836
716
    // We found one!  Now we have to make sure that the definition dominates
837
716
    // this use.  We do this by moving it to the entry block (if it is a
838
716
    // non-instruction value) or right after the definition.  These negates will
839
716
    // be zapped by reassociate later, so we don't need much finesse here.
840
716
    BinaryOperator *TheNeg = cast<BinaryOperator>(U);
841
716
842
716
    // Verify that the negate is in this function, V might be a constant expr.
843
716
    if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
844
0
      continue;
845
716
846
716
    BasicBlock::iterator InsertPt;
847
716
    if (Instruction *
InstInput716
= dyn_cast<Instruction>(V)) {
848
632
      if (InvokeInst *
II632
= dyn_cast<InvokeInst>(InstInput)) {
849
0
        InsertPt = II->getNormalDest()->begin();
850
632
      } else {
851
632
        InsertPt = ++InstInput->getIterator();
852
632
      }
853
796
      while (
isa<PHINode>(InsertPt)796
)
++InsertPt164
;
854
716
    } else {
855
84
      InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
856
84
    }
857
716
    TheNeg->moveBefore(&*InsertPt);
858
716
    if (
TheNeg->getOpcode() == Instruction::Sub716
) {
859
711
      TheNeg->setHasNoUnsignedWrap(false);
860
711
      TheNeg->setHasNoSignedWrap(false);
861
716
    } else {
862
5
      TheNeg->andIRFlags(BI);
863
5
    }
864
18.5k
    ToRedo.insert(TheNeg);
865
18.5k
    return TheNeg;
866
18.5k
  }
867
9.49k
868
9.49k
  // Insert a 'neg' instruction that subtracts the value from zero to get the
869
9.49k
  // negation.
870
9.49k
  BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
871
9.49k
  ToRedo.insert(NewNeg);
872
9.49k
  return NewNeg;
873
9.49k
}
874
875
/// Return true if we should break up this subtract of X-Y into (X + -Y).
876
155k
static bool ShouldBreakUpSubtract(Instruction *Sub) {
877
155k
  // If this is a negation, we can't split it up!
878
155k
  if (
BinaryOperator::isNeg(Sub) || 155k
BinaryOperator::isFNeg(Sub)118k
)
879
36.7k
    return false;
880
118k
881
118k
  // Don't breakup X - undef.
882
118k
  
if (118k
isa<UndefValue>(Sub->getOperand(1))118k
)
883
3
    return false;
884
118k
885
118k
  // Don't bother to break this up unless either the LHS is an associable add or
886
118k
  // subtract or if this is only used by one.
887
118k
  Value *V0 = Sub->getOperand(0);
888
118k
  if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
889
115k
      isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
890
2.89k
    return true;
891
115k
  Value *V1 = Sub->getOperand(1);
892
115k
  if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
893
115k
      isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
894
246
    return true;
895
115k
  Value *VB = Sub->user_back();
896
115k
  if (Sub->hasOneUse() &&
897
102k
      (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
898
96.5k
       isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
899
6.19k
    return true;
900
109k
901
109k
  return false;
902
109k
}
903
904
/// If we have (X-Y), and if either X is an add, or if this is only used by an
905
/// add, transform this into (X+(0-Y)) to promote better reassociation.
906
static BinaryOperator *
907
9.33k
BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
908
9.33k
  // Convert a subtract into an add and a neg instruction. This allows sub
909
9.33k
  // instructions to be commuted with other add instructions.
910
9.33k
  //
911
9.33k
  // Calculate the negative value of Operand 1 of the sub instruction,
912
9.33k
  // and set it as the RHS of the add instruction we just made.
913
9.33k
  //
914
9.33k
  Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
915
9.33k
  BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
916
9.33k
  Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
917
9.33k
  Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
918
9.33k
  New->takeName(Sub);
919
9.33k
920
9.33k
  // Everyone now refers to the add instruction.
921
9.33k
  Sub->replaceAllUsesWith(New);
922
9.33k
  New->setDebugLoc(Sub->getDebugLoc());
923
9.33k
924
9.33k
  DEBUG(dbgs() << "Negated: " << *New << '\n');
925
9.33k
  return New;
926
9.33k
}
927
928
/// If this is a shift of a reassociable multiply or is used by one, change
929
/// this into a multiply by a constant to assist with further reassociation.
930
18.4k
static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
931
18.4k
  Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
932
18.4k
  MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
933
18.4k
934
18.4k
  BinaryOperator *Mul =
935
18.4k
    BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
936
18.4k
  Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
937
18.4k
  Mul->takeName(Shl);
938
18.4k
939
18.4k
  // Everyone now refers to the mul instruction.
940
18.4k
  Shl->replaceAllUsesWith(Mul);
941
18.4k
  Mul->setDebugLoc(Shl->getDebugLoc());
942
18.4k
943
18.4k
  // We can safely preserve the nuw flag in all cases.  It's also safe to turn a
944
18.4k
  // nuw nsw shl into a nuw nsw mul.  However, nsw in isolation requires special
945
18.4k
  // handling.
946
18.4k
  bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
947
18.4k
  bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
948
18.4k
  if (
NSW && 18.4k
NUW7.86k
)
949
6.69k
    Mul->setHasNoSignedWrap(true);
950
18.4k
  Mul->setHasNoUnsignedWrap(NUW);
951
18.4k
  return Mul;
952
18.4k
}
953
954
/// Scan backwards and forwards among values with the same rank as element i
955
/// to see if X exists.  If X does not exist, return i.  This is useful when
956
/// scanning for 'x' when we see '-x' because they both get the same rank.
957
static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
958
26.5k
                                  unsigned i, Value *X) {
959
26.5k
  unsigned XRank = Ops[i].Rank;
960
26.5k
  unsigned e = Ops.size();
961
26.7k
  for (unsigned j = i+1; 
j != e && 26.7k
Ops[j].Rank == XRank22.0k
;
++j289
) {
962
308
    if (Ops[j].Op == X)
963
18
      return j;
964
290
    
if (Instruction *290
I1290
= dyn_cast<Instruction>(Ops[j].Op))
965
290
      
if (Instruction *290
I2290
= dyn_cast<Instruction>(X))
966
290
        
if (290
I1->isIdenticalTo(I2)290
)
967
1
          return j;
968
308
  }
969
26.5k
  // Scan backwards.
970
32.5k
  
for (unsigned j = i-1; 26.4k
j != ~0U && 32.5k
Ops[j].Rank == XRank20.2k
;
--j6.03k
) {
971
6.03k
    if (Ops[j].Op == X)
972
3
      return j;
973
6.03k
    
if (Instruction *6.03k
I16.03k
= dyn_cast<Instruction>(Ops[j].Op))
974
6.02k
      
if (Instruction *6.02k
I26.02k
= dyn_cast<Instruction>(X))
975
6.01k
        
if (6.01k
I1->isIdenticalTo(I2)6.01k
)
976
0
          return j;
977
6.03k
  }
978
26.4k
  return i;
979
26.5k
}
980
981
/// Emit a tree of add instructions, summing Ops together
982
/// and returning the result.  Insert the tree before I.
983
static Value *EmitAddTreeOfValues(Instruction *I,
984
6.25k
                                  SmallVectorImpl<WeakTrackingVH> &Ops) {
985
6.25k
  if (
Ops.size() == 16.25k
)
return Ops.back()1.21k
;
986
5.03k
987
5.03k
  Value *V1 = Ops.back();
988
5.03k
  Ops.pop_back();
989
5.03k
  Value *V2 = EmitAddTreeOfValues(I, Ops);
990
5.03k
  return CreateAdd(V2, V1, "tmp", I, I);
991
5.03k
}
992
993
/// If V is an expression tree that is a multiplication sequence,
994
/// and if this sequence contains a multiply by Factor,
995
/// remove Factor from the tree and return the new tree.
996
6.55k
Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
997
6.55k
  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
998
6.55k
  if (!BO)
999
0
    return nullptr;
1000
6.55k
1001
6.55k
  SmallVector<RepeatedValue, 8> Tree;
1002
6.55k
  MadeChange |= LinearizeExprTree(BO, Tree);
1003
6.55k
  SmallVector<ValueEntry, 8> Factors;
1004
6.55k
  Factors.reserve(Tree.size());
1005
20.6k
  for (unsigned i = 0, e = Tree.size(); 
i != e20.6k
;
++i14.0k
) {
1006
14.0k
    RepeatedValue E = Tree[i];
1007
14.0k
    Factors.append(E.second.getZExtValue(),
1008
14.0k
                   ValueEntry(getRank(E.first), E.first));
1009
14.0k
  }
1010
6.55k
1011
6.55k
  bool FoundFactor = false;
1012
6.55k
  bool NeedsNegate = false;
1013
13.9k
  for (unsigned i = 0, e = Factors.size(); 
i != e13.9k
;
++i7.39k
) {
1014
13.6k
    if (
Factors[i].Op == Factor13.6k
) {
1015
6.21k
      FoundFactor = true;
1016
6.21k
      Factors.erase(Factors.begin()+i);
1017
6.21k
      break;
1018
6.21k
    }
1019
7.43k
1020
7.43k
    // If this is a negative version of this factor, remove it.
1021
7.43k
    
if (ConstantInt *7.43k
FC17.43k
= dyn_cast<ConstantInt>(Factor)) {
1022
4.88k
      if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1023
199
        
if (199
FC1->getValue() == -FC2->getValue()199
) {
1024
36
          FoundFactor = NeedsNegate = true;
1025
36
          Factors.erase(Factors.begin()+i);
1026
36
          break;
1027
36
        }
1028
2.55k
    } else 
if (ConstantFP *2.55k
FC12.55k
= dyn_cast<ConstantFP>(Factor)) {
1029
9
      if (ConstantFP *
FC29
= dyn_cast<ConstantFP>(Factors[i].Op)) {
1030
3
        const APFloat &F1 = FC1->getValueAPF();
1031
3
        APFloat F2(FC2->getValueAPF());
1032
3
        F2.changeSign();
1033
3
        if (
F1.compare(F2) == APFloat::cmpEqual3
) {
1034
3
          FoundFactor = NeedsNegate = true;
1035
3
          Factors.erase(Factors.begin() + i);
1036
3
          break;
1037
3
        }
1038
3
      }
1039
2.55k
    }
1040
13.6k
  }
1041
6.55k
1042
6.55k
  if (
!FoundFactor6.55k
) {
1043
304
    // Make sure to restore the operands to the expression tree.
1044
304
    RewriteExprTree(BO, Factors);
1045
304
    return nullptr;
1046
304
  }
1047
6.25k
1048
6.25k
  BasicBlock::iterator InsertPt = ++BO->getIterator();
1049
6.25k
1050
6.25k
  // If this was just a single multiply, remove the multiply and return the only
1051
6.25k
  // remaining operand.
1052
6.25k
  if (
Factors.size() == 16.25k
) {
1053
5.33k
    RedoInsts.insert(BO);
1054
5.33k
    V = Factors[0].Op;
1055
6.25k
  } else {
1056
921
    RewriteExprTree(BO, Factors);
1057
921
    V = BO;
1058
921
  }
1059
6.25k
1060
6.25k
  if (NeedsNegate)
1061
39
    V = CreateNeg(V, "neg", &*InsertPt, BO);
1062
6.55k
1063
6.55k
  return V;
1064
6.55k
}
1065
1066
/// If V is a single-use multiply, recursively add its operands as factors,
1067
/// otherwise add V to the list of factors.
1068
///
1069
/// Ops is the top-level list of add operands we're trying to factor.
1070
static void FindSingleUseMultiplyFactors(Value *V,
1071
379k
                                         SmallVectorImpl<Value*> &Factors) {
1072
379k
  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1073
379k
  if (
!BO379k
) {
1074
251k
    Factors.push_back(V);
1075
251k
    return;
1076
251k
  }
1077
128k
1078
128k
  // Otherwise, add the LHS and RHS to the list of factors.
1079
128k
  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
1080
128k
  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
1081
128k
}
1082
1083
/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1084
/// This optimizes based on identities.  If it can be reduced to a single Value,
1085
/// it is returned, otherwise the Ops list is mutated as necessary.
1086
static Value *OptimizeAndOrXor(unsigned Opcode,
1087
181k
                               SmallVectorImpl<ValueEntry> &Ops) {
1088
181k
  // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1089
181k
  // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1090
582k
  for (unsigned i = 0, e = Ops.size(); 
i != e582k
;
++i400k
) {
1091
400k
    // First, check for X and ~X in the operand list.
1092
400k
    assert(i < Ops.size());
1093
400k
    if (
BinaryOperator::isNot(Ops[i].Op)400k
) { // Cannot occur for ^.
1094
4.07k
      Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
1095
4.07k
      unsigned FoundX = FindInOperandList(Ops, i, X);
1096
4.07k
      if (
FoundX != i4.07k
) {
1097
2
        if (Opcode == Instruction::And)   // ...&X&~X = 0
1098
2
          return Constant::getNullValue(X->getType());
1099
0
1100
0
        
if (0
Opcode == Instruction::Or0
) // ...|X|~X = -1
1101
0
          return Constant::getAllOnesValue(X->getType());
1102
400k
      }
1103
4.07k
    }
1104
400k
1105
400k
    // Next, check for duplicate pairs of values, which we assume are next to
1106
400k
    // each other, due to our sorting criteria.
1107
400k
    assert(i < Ops.size());
1108
400k
    if (
i+1 != Ops.size() && 400k
Ops[i+1].Op == Ops[i].Op218k
) {
1109
0
      if (
Opcode == Instruction::And || 0
Opcode == Instruction::Or0
) {
1110
0
        // Drop duplicate values for And and Or.
1111
0
        Ops.erase(Ops.begin()+i);
1112
0
        --i; --e;
1113
0
        ++NumAnnihil;
1114
0
        continue;
1115
0
      }
1116
0
1117
0
      // Drop pairs of values for Xor.
1118
0
      assert(Opcode == Instruction::Xor);
1119
0
      if (e == 2)
1120
0
        return Constant::getNullValue(Ops[0].Op->getType());
1121
0
1122
0
      // Y ^ X^X -> Y
1123
0
      Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1124
0
      i -= 1; e -= 2;
1125
0
      ++NumAnnihil;
1126
0
    }
1127
400k
  }
1128
181k
  return nullptr;
1129
181k
}
1130
1131
/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1132
/// instruction with the given two operands, and return the resulting
1133
/// instruction. There are two special cases: 1) if the constant operand is 0,
1134
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1135
/// be returned.
1136
static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1137
16
                             const APInt &ConstOpnd) {
1138
16
  if (ConstOpnd.isNullValue())
1139
6
    return nullptr;
1140
10
1141
10
  
if (10
ConstOpnd.isAllOnesValue()10
)
1142
2
    return Opnd;
1143
8
1144
8
  Instruction *I = BinaryOperator::CreateAnd(
1145
8
      Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1146
8
      InsertBefore);
1147
8
  I->setDebugLoc(InsertBefore->getDebugLoc());
1148
8
  return I;
1149
8
}
1150
1151
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1152
// into "R ^ C", where C would be 0, and R is a symbolic value.
1153
//
1154
// If it was successful, true is returned, and the "R" and "C" is returned
1155
// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1156
// and both "Res" and "ConstOpnd" remain unchanged.
1157
//
1158
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1159
13.5k
                                     APInt &ConstOpnd, Value *&Res) {
1160
13.5k
  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 
1161
13.5k
  //                       = ((x | c1) ^ c1) ^ (c1 ^ c2)
1162
13.5k
  //                       = (x & ~c1) ^ (c1 ^ c2)
1163
13.5k
  // It is useful only when c1 == c2.
1164
13.5k
  if (
!Opnd1->isOrExpr() || 13.5k
Opnd1->getConstPart().isNullValue()12.9k
)
1165
13.4k
    return false;
1166
11
1167
11
  
if (11
!Opnd1->getValue()->hasOneUse()11
)
1168
6
    return false;
1169
5
1170
5
  const APInt &C1 = Opnd1->getConstPart();
1171
5
  if (C1 != ConstOpnd)
1172
5
    return false;
1173
0
1174
0
  Value *X = Opnd1->getSymbolicPart();
1175
0
  Res = createAndInstr(I, X, ~C1);
1176
0
  // ConstOpnd was C2, now C1 ^ C2.
1177
0
  ConstOpnd ^= C1;
1178
0
1179
0
  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1180
0
    RedoInsts.insert(T);
1181
13.5k
  return true;
1182
13.5k
}
1183
1184
                           
1185
// Helper function of OptimizeXor(). It tries to simplify
1186
// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1187
// symbolic value. 
1188
// 
1189
// If it was successful, true is returned, and the "R" and "C" is returned 
1190
// via "Res" and "ConstOpnd", respectively (If the entire expression is
1191
// evaluated to a constant, the Res is set to NULL); otherwise, false is
1192
// returned, and both "Res" and "ConstOpnd" remain unchanged.
1193
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1194
                                     XorOpnd *Opnd2, APInt &ConstOpnd,
1195
21
                                     Value *&Res) {
1196
21
  Value *X = Opnd1->getSymbolicPart();
1197
21
  if (X != Opnd2->getSymbolicPart())
1198
0
    return false;
1199
21
1200
21
  // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1201
21
  int DeadInstNum = 1;
1202
21
  if (Opnd1->getValue()->hasOneUse())
1203
16
    DeadInstNum++;
1204
21
  if (Opnd2->getValue()->hasOneUse())
1205
15
    DeadInstNum++;
1206
21
1207
21
  // Xor-Rule 2:
1208
21
  //  (x | c1) ^ (x & c2)
1209
21
  //   = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1210
21
  //   = (x & ~c1) ^ (x & c2) ^ c1               // Xor-Rule 1
1211
21
  //   = (x & c3) ^ c1, where c3 = ~c1 ^ c2      // Xor-rule 3
1212
21
  //
1213
21
  if (
Opnd1->isOrExpr() != Opnd2->isOrExpr()21
) {
1214
11
    if (Opnd2->isOrExpr())
1215
1
      std::swap(Opnd1, Opnd2);
1216
11
1217
11
    const APInt &C1 = Opnd1->getConstPart();
1218
11
    const APInt &C2 = Opnd2->getConstPart();
1219
11
    APInt C3((~C1) ^ C2);
1220
11
1221
11
    // Do not increase code size!
1222
11
    if (
!C3.isNullValue() && 11
!C3.isAllOnesValue()9
) {
1223
7
      int NewInstNum = ConstOpnd.getBoolValue() ? 
10
:
27
;
1224
7
      if (NewInstNum > DeadInstNum)
1225
4
        return false;
1226
7
    }
1227
7
1228
7
    Res = createAndInstr(I, X, C3);
1229
7
    ConstOpnd ^= C1;
1230
7
1231
21
  } else 
if (10
Opnd1->isOrExpr()10
) {
1232
6
    // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1233
6
    //
1234
6
    const APInt &C1 = Opnd1->getConstPart();
1235
6
    const APInt &C2 = Opnd2->getConstPart();
1236
6
    APInt C3 = C1 ^ C2;
1237
6
    
1238
6
    // Do not increase code size
1239
6
    if (
!C3.isNullValue() && 6
!C3.isAllOnesValue()4
) {
1240
4
      int NewInstNum = ConstOpnd.getBoolValue() ? 
10
:
24
;
1241
4
      if (NewInstNum > DeadInstNum)
1242
1
        return false;
1243
5
    }
1244
5
1245
5
    Res = createAndInstr(I, X, C3);
1246
5
    ConstOpnd ^= C3;
1247
10
  } else {
1248
4
    // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1249
4
    //
1250
4
    const APInt &C1 = Opnd1->getConstPart();
1251
4
    const APInt &C2 = Opnd2->getConstPart();
1252
4
    APInt C3 = C1 ^ C2;
1253
4
    Res = createAndInstr(I, X, C3);
1254
4
  }
1255
21
1256
21
  // Put the original operands in the Redo list; hope they will be deleted
1257
21
  // as dead code.
1258
16
  
if (Instruction *16
T16
= dyn_cast<Instruction>(Opnd1->getValue()))
1259
16
    RedoInsts.insert(T);
1260
16
  if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1261
16
    RedoInsts.insert(T);
1262
16
1263
16
  return true;
1264
21
}
1265
1266
/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1267
/// to a single Value, it is returned, otherwise the Ops list is mutated as
1268
/// necessary.
1269
Value *ReassociatePass::OptimizeXor(Instruction *I,
1270
19.6k
                                    SmallVectorImpl<ValueEntry> &Ops) {
1271
19.6k
  if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1272
0
    return V;
1273
19.6k
      
1274
19.6k
  
if (19.6k
Ops.size() == 119.6k
)
1275
0
    return nullptr;
1276
19.6k
1277
19.6k
  SmallVector<XorOpnd, 8> Opnds;
1278
19.6k
  SmallVector<XorOpnd*, 8> OpndPtrs;
1279
19.6k
  Type *Ty = Ops[0].Op->getType();
1280
19.6k
  APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1281
19.6k
1282
19.6k
  // Step 1: Convert ValueEntry to XorOpnd
1283
73.2k
  for (unsigned i = 0, e = Ops.size(); 
i != e73.2k
;
++i53.5k
) {
1284
53.5k
    Value *V = Ops[i].Op;
1285
53.5k
    const APInt *C;
1286
53.5k
    // TODO: Support non-splat vectors.
1287
53.5k
    if (
match(V, PatternMatch::m_APInt(C))53.5k
) {
1288
11.3k
      ConstOpnd ^= *C;
1289
53.5k
    } else {
1290
42.2k
      XorOpnd O(V);
1291
42.2k
      O.setSymbolicRank(getRank(O.getSymbolicPart()));
1292
42.2k
      Opnds.push_back(O);
1293
42.2k
    }
1294
53.5k
  }
1295
19.6k
1296
19.6k
  // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1297
19.6k
  //  It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1298
19.6k
  //  the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1299
19.6k
  //  with the previous loop --- the iterator of the "Opnds" may be invalidated
1300
19.6k
  //  when new elements are added to the vector.
1301
61.9k
  for (unsigned i = 0, e = Opnds.size(); 
i != e61.9k
;
++i42.2k
)
1302
42.2k
    OpndPtrs.push_back(&Opnds[i]);
1303
19.6k
1304
19.6k
  // Step 2: Sort the Xor-Operands in a way such that the operands containing
1305
19.6k
  //  the same symbolic value cluster together. For instance, the input operand
1306
19.6k
  //  sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1307
19.6k
  //  ("x | 123", "x & 789", "y & 456").
1308
19.6k
  //
1309
19.6k
  //  The purpose is twofold:
1310
19.6k
  //  1) Cluster together the operands sharing the same symbolic-value.
1311
19.6k
  //  2) Operand having smaller symbolic-value-rank is permuted earlier, which
1312
19.6k
  //     could potentially shorten crital path, and expose more loop-invariants.
1313
19.6k
  //     Note that values' rank are basically defined in RPO order (FIXME).
1314
19.6k
  //     So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1315
19.6k
  //     than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1316
19.6k
  //     "z" in the order of X-Y-Z is better than any other orders.
1317
19.6k
  std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
1318
62.6k
                   [](XorOpnd *LHS, XorOpnd *RHS) {
1319
62.6k
    return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1320
62.6k
  });
1321
19.6k
1322
19.6k
  // Step 3: Combine adjacent operands
1323
19.6k
  XorOpnd *PrevOpnd = nullptr;
1324
19.6k
  bool Changed = false;
1325
61.9k
  for (unsigned i = 0, e = Opnds.size(); 
i < e61.9k
;
i++42.2k
) {
1326
42.2k
    XorOpnd *CurrOpnd = OpndPtrs[i];
1327
42.2k
    // The combined value
1328
42.2k
    Value *CV;
1329
42.2k
1330
42.2k
    // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1331
42.2k
    if (!ConstOpnd.isNullValue() &&
1332
42.2k
        
CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)13.5k
) {
1333
0
      Changed = true;
1334
0
      if (CV)
1335
0
        *CurrOpnd = XorOpnd(CV);
1336
0
      else {
1337
0
        CurrOpnd->Invalidate();
1338
0
        continue;
1339
0
      }
1340
42.2k
    }
1341
42.2k
1342
42.2k
    
if (42.2k
!PrevOpnd || 42.2k
CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()22.5k
) {
1343
42.2k
      PrevOpnd = CurrOpnd;
1344
42.2k
      continue;
1345
42.2k
    }
1346
21
1347
21
    // step 3.2: When previous and current operands share the same symbolic
1348
21
    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 
1349
21
    //    
1350
21
    
if (21
CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)21
) {
1351
16
      // Remove previous operand
1352
16
      PrevOpnd->Invalidate();
1353
16
      if (
CV16
) {
1354
10
        *CurrOpnd = XorOpnd(CV);
1355
10
        PrevOpnd = CurrOpnd;
1356
16
      } else {
1357
6
        CurrOpnd->Invalidate();
1358
6
        PrevOpnd = nullptr;
1359
6
      }
1360
16
      Changed = true;
1361
16
    }
1362
42.2k
  }
1363
19.6k
1364
19.6k
  // Step 4: Reassemble the Ops
1365
19.6k
  if (
Changed19.6k
) {
1366
16
    Ops.clear();
1367
58
    for (unsigned int i = 0, e = Opnds.size(); 
i < e58
;
i++42
) {
1368
42
      XorOpnd &O = Opnds[i];
1369
42
      if (O.isInvalid())
1370
22
        continue;
1371
20
      ValueEntry VE(getRank(O.getValue()), O.getValue());
1372
20
      Ops.push_back(VE);
1373
20
    }
1374
16
    if (
!ConstOpnd.isNullValue()16
) {
1375
10
      Value *C = ConstantInt::get(Ty, ConstOpnd);
1376
10
      ValueEntry VE(getRank(C), C);
1377
10
      Ops.push_back(VE);
1378
10
    }
1379
16
    unsigned Sz = Ops.size();
1380
16
    if (Sz == 1)
1381
0
      return Ops.back().Op;
1382
16
    
if (16
Sz == 016
) {
1383
4
      assert(ConstOpnd.isNullValue());
1384
4
      return ConstantInt::get(Ty, ConstOpnd);
1385
4
    }
1386
19.6k
  }
1387
19.6k
1388
19.6k
  return nullptr;
1389
19.6k
}
1390
1391
/// Optimize a series of operands to an 'add' instruction.  This
1392
/// optimizes based on identities.  If it can be reduced to a single Value, it
1393
/// is returned, otherwise the Ops list is mutated as necessary.
1394
Value *ReassociatePass::OptimizeAdd(Instruction *I,
1395
663k
                                    SmallVectorImpl<ValueEntry> &Ops) {
1396
663k
  // Scan the operand lists looking for X and -X pairs.  If we find any, we
1397
663k
  // can simplify expressions like X+-X == 0 and X+~X ==-1.  While we're at it,
1398
663k
  // scan for any
1399
663k
  // duplicates.  We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1400
663k
1401
2.19M
  for (unsigned i = 0, e = Ops.size(); 
i != e2.19M
;
++i1.52M
) {
1402
1.52M
    Value *TheOp = Ops[i].Op;
1403
1.52M
    // Check to see if we've seen this operand before.  If so, we factor all
1404
1.52M
    // instances of the operand together.  Due to our sorting criteria, we know
1405
1.52M
    // that these need to be next to each other in the vector.
1406
1.52M
    if (
i+1 != Ops.size() && 1.52M
Ops[i+1].Op == TheOp865k
) {
1407
98
      // Rescan the list, remove all instances of this operand from the expr.
1408
98
      unsigned NumFound = 0;
1409
222
      do {
1410
222
        Ops.erase(Ops.begin()+i);
1411
222
        ++NumFound;
1412
222
      } while (
i != Ops.size() && 222
Ops[i].Op == TheOp171
);
1413
98
1414
98
      DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
1415
98
      ++NumFactor;
1416
98
1417
98
      // Insert a new multiply.
1418
98
      Type *Ty = TheOp->getType();
1419
98
      Constant *C = Ty->isIntOrIntVectorTy() ?
1420
98
        
ConstantInt::get(Ty, NumFound)82
:
ConstantFP::get(Ty, NumFound)16
;
1421
98
      Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1422
98
1423
98
      // Now that we have inserted a multiply, optimize it. This allows us to
1424
98
      // handle cases that require multiple factoring steps, such as this:
1425
98
      // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1426
98
      RedoInsts.insert(Mul);
1427
98
1428
98
      // If every add operand was a duplicate, return the multiply.
1429
98
      if (Ops.empty())
1430
14
        return Mul;
1431
84
1432
84
      // Otherwise, we had some input that didn't have the dupe, such as
1433
84
      // "A + A + B" -> "A*2 + B".  Add the new multiply to the list of
1434
84
      // things being added by this operation.
1435
84
      Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1436
84
1437
84
      --i;
1438
84
      e = Ops.size();
1439
84
      continue;
1440
84
    }
1441
1.52M
1442
1.52M
    // Check for X and -X or X and ~X in the operand list.
1443
1.52M
    
if (1.52M
!BinaryOperator::isNeg(TheOp) && 1.52M
!BinaryOperator::isFNeg(TheOp)1.50M
&&
1444
1.50M
        !BinaryOperator::isNot(TheOp))
1445
1.50M
      continue;
1446
22.4k
1447
22.4k
    Value *X = nullptr;
1448
22.4k
    if (
BinaryOperator::isNeg(TheOp) || 22.4k
BinaryOperator::isFNeg(TheOp)223
)
1449
22.2k
      X = BinaryOperator::getNegArgument(TheOp);
1450
181
    else 
if (181
BinaryOperator::isNot(TheOp)181
)
1451
181
      X = BinaryOperator::getNotArgument(TheOp);
1452
22.4k
1453
22.4k
    unsigned FoundX = FindInOperandList(Ops, i, X);
1454
22.4k
    if (FoundX == i)
1455
22.4k
      continue;
1456
20
1457
20
    // Remove X and -X from the operand list.
1458
20
    
if (20
Ops.size() == 2 &&
1459
4
        
(BinaryOperator::isNeg(TheOp) || 4
BinaryOperator::isFNeg(TheOp)1
))
1460
4
      return Constant::getNullValue(X->getType());
1461
16
1462
16
    // Remove X and ~X from the operand list.
1463
16
    
if (16
Ops.size() == 2 && 16
BinaryOperator::isNot(TheOp)0
)
1464
0
      return Constant::getAllOnesValue(X->getType());
1465
16
1466
16
    Ops.erase(Ops.begin()+i);
1467
16
    if (i < FoundX)
1468
14
      --FoundX;
1469
16
    else
1470
2
      --i;   // Need to back up an extra one.
1471
16
    Ops.erase(Ops.begin()+FoundX);
1472
16
    ++NumAnnihil;
1473
16
    --i;     // Revisit element.
1474
16
    e -= 2;  // Removed two elements.
1475
16
1476
16
    // if X and ~X we append -1 to the operand list.
1477
16
    if (
BinaryOperator::isNot(TheOp)16
) {
1478
1
      Value *V = Constant::getAllOnesValue(X->getType());
1479
1
      Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1480
1
      e += 1;
1481
1
    }
1482
1.52M
  }
1483
663k
1484
663k
  // Scan the operand list, checking to see if there are any common factors
1485
663k
  // between operands.  Consider something like A*A+A*B*C+D.  We would like to
1486
663k
  // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1487
663k
  // To efficiently find this, we count the number of times a factor occurs
1488
663k
  // for any ADD operands that are MULs.
1489
663k
  DenseMap<Value*, unsigned> FactorOccurrences;
1490
663k
1491
663k
  // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1492
663k
  // where they are actually the same multiply.
1493
663k
  unsigned MaxOcc = 0;
1494
663k
  Value *MaxOccVal = nullptr;
1495
2.19M
  for (unsigned i = 0, e = Ops.size(); 
i != e2.19M
;
++i1.52M
) {
1496
1.52M
    BinaryOperator *BOp =
1497
1.52M
        isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1498
1.52M
    if (!BOp)
1499
1.40M
      continue;
1500
123k
1501
123k
    // Compute all of the factors of this added value.
1502
123k
    SmallVector<Value*, 8> Factors;
1503
123k
    FindSingleUseMultiplyFactors(BOp, Factors);
1504
123k
    assert(Factors.size() > 1 && "Bad linearize!");
1505
123k
1506
123k
    // Add one to FactorOccurrences for each unique factor in this op.
1507
123k
    SmallPtrSet<Value*, 8> Duplicates;
1508
374k
    for (unsigned i = 0, e = Factors.size(); 
i != e374k
;
++i251k
) {
1509
251k
      Value *Factor = Factors[i];
1510
251k
      if (!Duplicates.insert(Factor).second)
1511
10.3k
        continue;
1512
240k
1513
240k
      unsigned Occ = ++FactorOccurrences[Factor];
1514
240k
      if (
Occ > MaxOcc240k
) {
1515
72.2k
        MaxOcc = Occ;
1516
72.2k
        MaxOccVal = Factor;
1517
72.2k
      }
1518
240k
1519
240k
      // If Factor is a negative constant, add the negated value as a factor
1520
240k
      // because we can percolate the negate out.  Watch for minint, which
1521
240k
      // cannot be positivified.
1522
240k
      if (ConstantInt *
CI240k
= dyn_cast<ConstantInt>(Factor)) {
1523
54.1k
        if (
CI->isNegative() && 54.1k
!CI->isMinValue(true)11.4k
) {
1524
11.4k
          Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1525
11.4k
          if (!Duplicates.insert(Factor).second)
1526
0
            continue;
1527
11.4k
          unsigned Occ = ++FactorOccurrences[Factor];
1528
11.4k
          if (
Occ > MaxOcc11.4k
) {
1529
28
            MaxOcc = Occ;
1530
28
            MaxOccVal = Factor;
1531
28
          }
1532
11.4k
        }
1533
240k
      } else 
if (ConstantFP *186k
CF186k
= dyn_cast<ConstantFP>(Factor)) {
1534
25
        if (
CF->isNegative()25
) {
1535
12
          APFloat F(CF->getValueAPF());
1536
12
          F.changeSign();
1537
12
          Factor = ConstantFP::get(CF->getContext(), F);
1538
12
          if (!Duplicates.insert(Factor).second)
1539
0
            continue;
1540
12
          unsigned Occ = ++FactorOccurrences[Factor];
1541
12
          if (
Occ > MaxOcc12
) {
1542
3
            MaxOcc = Occ;
1543
3
            MaxOccVal = Factor;
1544
3
          }
1545
12
        }
1546
186k
      }
1547
251k
    }
1548
1.52M
  }
1549
663k
1550
663k
  // If any factor occurred more than one time, we can pull it out.
1551
663k
  if (
MaxOcc > 1663k
) {
1552
1.21k
    DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
1553
1.21k
    ++NumFactor;
1554
1.21k
1555
1.21k
    // Create a new instruction that uses the MaxOccVal twice.  If we don't do
1556
1.21k
    // this, we could otherwise run into situations where removing a factor
1557
1.21k
    // from an expression will drop a use of maxocc, and this can cause
1558
1.21k
    // RemoveFactorFromExpression on successive values to behave differently.
1559
1.21k
    Instruction *DummyInst =
1560
1.21k
        I->getType()->isIntOrIntVectorTy()
1561
1.20k
            ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1562
19
            : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1563
1.21k
1564
1.21k
    SmallVector<WeakTrackingVH, 4> NewMulOps;
1565
10.7k
    for (unsigned i = 0; 
i != Ops.size()10.7k
;
++i9.50k
) {
1566
9.50k
      // Only try to remove factors from expressions we're allowed to.
1567
9.50k
      BinaryOperator *BOp =
1568
9.50k
          isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1569
9.50k
      if (!BOp)
1570
2.94k
        continue;
1571
6.55k
1572
6.55k
      
if (Value *6.55k
V6.55k
= RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1573
6.25k
        // The factorized operand may occur several times.  Convert them all in
1574
6.25k
        // one fell swoop.
1575
70.0k
        for (unsigned j = Ops.size(); 
j != i70.0k
;) {
1576
63.7k
          --j;
1577
63.7k
          if (
Ops[j].Op == Ops[i].Op63.7k
) {
1578
6.25k
            NewMulOps.push_back(V);
1579
6.25k
            Ops.erase(Ops.begin()+j);
1580
6.25k
          }
1581
63.7k
        }
1582
6.25k
        --i;
1583
6.25k
      }
1584
9.50k
    }
1585
1.21k
1586
1.21k
    // No need for extra uses anymore.
1587
1.21k
    DummyInst->deleteValue();
1588
1.21k
1589
1.21k
    unsigned NumAddedValues = NewMulOps.size();
1590
1.21k
    Value *V = EmitAddTreeOfValues(I, NewMulOps);
1591
1.21k
1592
1.21k
    // Now that we have inserted the add tree, optimize it. This allows us to
1593
1.21k
    // handle cases that require multiple factoring steps, such as this:
1594
1.21k
    // A*A*B + A*A*C   -->   A*(A*B+A*C)   -->   A*(A*(B+C))
1595
1.21k
    assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1596
1.21k
    (void)NumAddedValues;
1597
1.21k
    if (Instruction *VI = dyn_cast<Instruction>(V))
1598
1.21k
      RedoInsts.insert(VI);
1599
1.21k
1600
1.21k
    // Create the multiply.
1601
1.21k
    Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I, I);
1602
1.21k
1603
1.21k
    // Rerun associate on the multiply in case the inner expression turned into
1604
1.21k
    // a multiply.  We want to make sure that we keep things in canonical form.
1605
1.21k
    RedoInsts.insert(V2);
1606
1.21k
1607
1.21k
    // If every add operand included the factor (e.g. "A*B + A*C"), then the
1608
1.21k
    // entire result expression is just the multiply "A*(B+C)".
1609
1.21k
    if (Ops.empty())
1610
32
      return V2;
1611
1.18k
1612
1.18k
    // Otherwise, we had some input that didn't have the factor, such as
1613
1.18k
    // "A*B + A*C + D" -> "A*(B+C) + D".  Add the new multiply to the list of
1614
1.18k
    // things being added by this operation.
1615
1.18k
    Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1616
1.18k
  }
1617
663k
1618
663k
  return nullptr;
1619
663k
}
1620
1621
/// \brief Build up a vector of value/power pairs factoring a product.
1622
///
1623
/// Given a series of multiplication operands, build a vector of factors and
1624
/// the powers each is raised to when forming the final product. Sort them in
1625
/// the order of descending power.
1626
///
1627
///      (x*x)          -> [(x, 2)]
1628
///     ((x*x)*x)       -> [(x, 3)]
1629
///   ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1630
///
1631
/// \returns Whether any factors have a power greater than one.
1632
static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
1633
8.63k
                                   SmallVectorImpl<Factor> &Factors) {
1634
8.63k
  // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1635
8.63k
  // Compute the sum of powers of simplifiable factors.
1636
8.63k
  unsigned FactorPowerSum = 0;
1637
34.6k
  for (unsigned Idx = 1, Size = Ops.size(); 
Idx < Size34.6k
;
++Idx25.9k
) {
1638
25.9k
    Value *Op = Ops[Idx-1].Op;
1639
25.9k
1640
25.9k
    // Count the number of occurrences of this value.
1641
25.9k
    unsigned Count = 1;
1642
26.0k
    for (; 
Idx < Size && 26.0k
Ops[Idx].Op == Op26.0k
;
++Idx99
)
1643
99
      ++Count;
1644
25.9k
    // Track for simplification all factors which occur 2 or more times.
1645
25.9k
    if (Count > 1)
1646
49
      FactorPowerSum += Count;
1647
25.9k
  }
1648
8.63k
1649
8.63k
  // We can only simplify factors if the sum of the powers of our simplifiable
1650
8.63k
  // factors is 4 or higher. When that is the case, we will *always* have
1651
8.63k
  // a simplification. This is an important invariant to prevent cyclicly
1652
8.63k
  // trying to simplify already minimal formations.
1653
8.63k
  if (FactorPowerSum < 4)
1654
8.61k
    return false;
1655
17
1656
17
  // Now gather the simplifiable factors, removing them from Ops.
1657
17
  FactorPowerSum = 0;
1658
42
  for (unsigned Idx = 1; 
Idx < Ops.size()42
;
++Idx25
) {
1659
25
    Value *Op = Ops[Idx-1].Op;
1660
25
1661
25
    // Count the number of occurrences of this value.
1662
25
    unsigned Count = 1;
1663
99
    for (; 
Idx < Ops.size() && 99
Ops[Idx].Op == Op82
;
++Idx74
)
1664
74
      ++Count;
1665
25
    if (Count == 1)
1666
1
      continue;
1667
24
    // Move an even number of occurrences to Factors.
1668
24
    Count &= ~1U;
1669
24
    Idx -= Count;
1670
24
    FactorPowerSum += Count;
1671
24
    Factors.push_back(Factor(Op, Count));
1672
24
    Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1673
24
  }
1674
17
1675
17
  // None of the adjustments above should have reduced the sum of factor powers
1676
17
  // below our mininum of '4'.
1677
17
  assert(FactorPowerSum >= 4);
1678
17
1679
17
  std::stable_sort(Factors.begin(), Factors.end(),
1680
8
                   [](const Factor &LHS, const Factor &RHS) {
1681
8
    return LHS.Power > RHS.Power;
1682
8
  });
1683
8.63k
  return true;
1684
8.63k
}
1685
1686
/// \brief Build a tree of multiplies, computing the product of Ops.
1687
static Value *buildMultiplyTree(IRBuilder<> &Builder,
1688
36
                                SmallVectorImpl<Value*> &Ops) {
1689
36
  if (Ops.size() == 1)
1690
0
    return Ops.back();
1691
36
1692
36
  Value *LHS = Ops.pop_back_val();
1693
44
  do {
1694
44
    if (LHS->getType()->isIntOrIntVectorTy())
1695
42
      LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1696
44
    else
1697
2
      LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1698
44
  } while (!Ops.empty());
1699
36
1700
36
  return LHS;
1701
36
}
1702
1703
/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1704
///
1705
/// Given a vector of values raised to various powers, where no two values are
1706
/// equal and the powers are sorted in decreasing order, compute the minimal
1707
/// DAG of multiplies to compute the final product, and return that product
1708
/// value.
1709
Value *
1710
ReassociatePass::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
1711
49
                                         SmallVectorImpl<Factor> &Factors) {
1712
49
  assert(Factors[0].Power);
1713
49
  SmallVector<Value *, 4> OuterProduct;
1714
49
  for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1715
59
       
Idx < Size && 59
Factors[Idx].Power > 013
;
++Idx10
) {
1716
10
    if (
Factors[Idx].Power != Factors[LastIdx].Power10
) {
1717
6
      LastIdx = Idx;
1718
6
      continue;
1719
6
    }
1720
4
1721
4
    // We want to multiply across all the factors with the same power so that
1722
4
    // we can raise them to that power as a single entity. Build a mini tree
1723
4
    // for that.
1724
4
    SmallVector<Value *, 4> InnerProduct;
1725
4
    InnerProduct.push_back(Factors[LastIdx].Base);
1726
4
    do {
1727
4
      InnerProduct.push_back(Factors[Idx].Base);
1728
4
      ++Idx;
1729
4
    } while (
Idx < Size && 4
Factors[Idx].Power == Factors[LastIdx].Power0
);
1730
4
1731
4
    // Reset the base value of the first factor to the new expression tree.
1732
4
    // We'll remove all the factors with the same power in a second pass.
1733
4
    Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1734
4
    if (Instruction *MI = dyn_cast<Instruction>(M))
1735
4
      RedoInsts.insert(MI);
1736
10
1737
10
    LastIdx = Idx;
1738
10
  }
1739
49
  // Unique factors with equal powers -- we've folded them into the first one's
1740
49
  // base.
1741
49
  Factors.erase(std::unique(Factors.begin(), Factors.end(),
1742
13
                            [](const Factor &LHS, const Factor &RHS) {
1743
13
                              return LHS.Power == RHS.Power;
1744
13
                            }),
1745
49
                Factors.end());
1746
49
1747
49
  // Iteratively collect the base of each factor with an add power into the
1748
49
  // outer product, and halve each power in preparation for squaring the
1749
49
  // expression.
1750
107
  for (unsigned Idx = 0, Size = Factors.size(); 
Idx != Size107
;
++Idx58
) {
1751
58
    if (Factors[Idx].Power & 1)
1752
25
      OuterProduct.push_back(Factors[Idx].Base);
1753
58
    Factors[Idx].Power >>= 1;
1754
58
  }
1755
49
  if (
Factors[0].Power49
) {
1756
32
    Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1757
32
    OuterProduct.push_back(SquareRoot);
1758
32
    OuterProduct.push_back(SquareRoot);
1759
32
  }
1760
49
  if (OuterProduct.size() == 1)
1761
17
    return OuterProduct.front();
1762
32
1763
32
  Value *V = buildMultiplyTree(Builder, OuterProduct);
1764
32
  return V;
1765
32
}
1766
1767
Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1768
162k
                                    SmallVectorImpl<ValueEntry> &Ops) {
1769
162k
  // We can only optimize the multiplies when there is a chain of more than
1770
162k
  // three, such that a balanced tree might require fewer total multiplies.
1771
162k
  if (Ops.size() < 4)
1772
153k
    return nullptr;
1773
8.63k
1774
8.63k
  // Try to turn linear trees of multiplies without other uses of the
1775
8.63k
  // intermediate stages into minimal multiply DAGs with perfect sub-expression
1776
8.63k
  // re-use.
1777
8.63k
  SmallVector<Factor, 4> Factors;
1778
8.63k
  if (!collectMultiplyFactors(Ops, Factors))
1779
8.61k
    return nullptr; // All distinct factors, so nothing left for us to do.
1780
16
1781
16
  IRBuilder<> Builder(I);
1782
16
  // The reassociate transformation for FP operations is performed only
1783
16
  // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1784
16
  // to the newly generated operations.
1785
16
  if (auto FPI = dyn_cast<FPMathOperator>(I))
1786
1
    Builder.setFastMathFlags(FPI->getFastMathFlags());
1787
16
1788
16
  Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1789
16
  if (Ops.empty())
1790
10
    return V;
1791
6
1792
6
  ValueEntry NewEntry = ValueEntry(getRank(V), V);
1793
6
  Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
1794
6
  return nullptr;
1795
6
}
1796
1797
Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1798
1.00M
                                           SmallVectorImpl<ValueEntry> &Ops) {
1799
1.00M
  // Now that we have the linearized expression tree, try to optimize it.
1800
1.00M
  // Start by folding any constants that we found.
1801
1.00M
  Constant *Cst = nullptr;
1802
1.00M
  unsigned Opcode = I->getOpcode();
1803
1.71M
  while (
!Ops.empty() && 1.71M
isa<Constant>(Ops.back().Op)1.71M
) {
1804
705k
    Constant *C = cast<Constant>(Ops.pop_back_val().Op);
1805
705k
    Cst = Cst ? 
ConstantExpr::get(Opcode, C, Cst)4.40k
:
C700k
;
1806
705k
  }
1807
1.00M
  // If there was nothing but constants then we are done.
1808
1.00M
  if (Ops.empty())
1809
19
    return Cst;
1810
1.00M
1811
1.00M
  // Put the combined constant back at the end of the operand list, except if
1812
1.00M
  // there is no point.  For example, an add of 0 gets dropped here, while a
1813
1.00M
  // multiplication by zero turns the whole expression into zero.
1814
1.00M
  
if (1.00M
Cst && 1.00M
Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())700k
) {
1815
700k
    if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1816
1
      return Cst;
1817
700k
    Ops.push_back(ValueEntry(0, Cst));
1818
700k
  }
1819
1.00M
1820
1.00M
  
if (1.00M
Ops.size() == 11.00M
)
return Ops[0].Op7
;
1821
1.00M
1822
1.00M
  // Handle destructive annihilation due to identities between elements in the
1823
1.00M
  // argument list here.
1824
1.00M
  unsigned NumOps = Ops.size();
1825
1.00M
  switch (Opcode) {
1826
0
  default: break;
1827
162k
  case Instruction::And:
1828
162k
  case Instruction::Or:
1829
162k
    if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1830
2
      return Result;
1831
162k
    break;
1832
162k
1833
19.6k
  case Instruction::Xor:
1834
19.6k
    if (Value *Result = OptimizeXor(I, Ops))
1835
4
      return Result;
1836
19.6k
    break;
1837
19.6k
1838
663k
  case Instruction::Add:
1839
663k
  case Instruction::FAdd:
1840
663k
    if (Value *Result = OptimizeAdd(I, Ops))
1841
50
      return Result;
1842
663k
    break;
1843
663k
1844
162k
  case Instruction::Mul:
1845
162k
  case Instruction::FMul:
1846
162k
    if (Value *Result = OptimizeMul(I, Ops))
1847
10
      return Result;
1848
162k
    break;
1849
1.00M
  }
1850
1.00M
1851
1.00M
  
if (1.00M
Ops.size() != NumOps1.00M
)
1852
1.26k
    return OptimizeExpression(I, Ops);
1853
1.00M
  return nullptr;
1854
1.00M
}
1855
1856
// Remove dead instructions and if any operands are trivially dead add them to
1857
// Insts so they will be removed as well.
1858
void ReassociatePass::RecursivelyEraseDeadInsts(
1859
41.5k
    Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) {
1860
41.5k
  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1861
41.5k
  SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
1862
41.5k
  ValueRankMap.erase(I);
1863
41.5k
  Insts.remove(I);
1864
41.5k
  RedoInsts.remove(I);
1865
41.5k
  I->eraseFromParent();
1866
41.5k
  for (auto Op : Ops)
1867
83.1k
    
if (Instruction *83.1k
OpInst83.1k
= dyn_cast<Instruction>(Op))
1868
18.0k
      
if (18.0k
OpInst->use_empty()18.0k
)
1869
12.2k
        Insts.insert(OpInst);
1870
41.5k
}
1871
1872
/// Zap the given instruction, adding interesting operands to the work list.
1873
3.89k
void ReassociatePass::EraseInst(Instruction *I) {
1874
3.89k
  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1875
3.89k
  DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
1876
3.89k
1877
3.89k
  SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
1878
3.89k
  // Erase the dead instruction.
1879
3.89k
  ValueRankMap.erase(I);
1880
3.89k
  RedoInsts.remove(I);
1881
3.89k
  I->eraseFromParent();
1882
3.89k
  // Optimize its operands.
1883
3.89k
  SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
1884
11.7k
  for (unsigned i = 0, e = Ops.size(); 
i != e11.7k
;
++i7.81k
)
1885
7.81k
    
if (Instruction *7.81k
Op7.81k
= dyn_cast<Instruction>(Ops[i])) {
1886
5.04k
      // If this is a node in an expression tree, climb to the expression root
1887
5.04k
      // and add that since that's where optimization actually happens.
1888
5.04k
      unsigned Opcode = Op->getOpcode();
1889
5.12k
      while (
Op->hasOneUse() && 5.12k
Op->user_back()->getOpcode() == Opcode3.17k
&&
1890
111
             Visited.insert(Op).second)
1891
78
        Op = Op->user_back();
1892
7.81k
      RedoInsts.insert(Op);
1893
7.81k
    }
1894
3.89k
1895
3.89k
  MadeChange = true;
1896
3.89k
}
1897
1898
// Canonicalize expressions of the following form:
1899
//  x + (-Constant * y) -> x - (Constant * y)
1900
//  x - (-Constant * y) -> x + (Constant * y)
1901
1.79M
Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
1902
1.79M
  if (
!I->hasOneUse() || 1.79M
I->getType()->isVectorTy()1.36M
)
1903
427k
    return nullptr;
1904
1.36M
1905
1.36M
  // Must be a fmul or fdiv instruction.
1906
1.36M
  unsigned Opcode = I->getOpcode();
1907
1.36M
  if (
Opcode != Instruction::FMul && 1.36M
Opcode != Instruction::FDiv1.32M
)
1908
1.29M
    return nullptr;
1909
65.3k
1910
65.3k
  auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
1911
65.3k
  auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
1912
65.3k
1913
65.3k
  // Both operands are constant, let it get constant folded away.
1914
65.3k
  if (
C0 && 65.3k
C114.6k
)
1915
0
    return nullptr;
1916
65.3k
1917
65.3k
  
ConstantFP *CF = C0 ? 65.3k
C014.6k
:
C150.7k
;
1918
65.3k
1919
65.3k
  // Must have one constant operand.
1920
65.3k
  if (!CF)
1921
32.9k
    return nullptr;
1922
32.4k
1923
32.4k
  // Must be a negative ConstantFP.
1924
32.4k
  
if (32.4k
!CF->isNegative()32.4k
)
1925
32.3k
    return nullptr;
1926
131
1927
131
  // User must be a binary operator with one or more uses.
1928
131
  Instruction *User = I->user_back();
1929
131
  if (
!isa<BinaryOperator>(User) || 131
User->use_empty()93
)
1930
39
    return nullptr;
1931
92
1932
92
  unsigned UserOpcode = User->getOpcode();
1933
92
  if (
UserOpcode != Instruction::FAdd && 92
UserOpcode != Instruction::FSub46
)
1934
31
    return nullptr;
1935
61
1936
61
  // Subtraction is not commutative. Explicitly, the following transform is
1937
61
  // not valid: (-Constant * y) - x  -> x + (Constant * y)
1938
61
  
if (61
!User->isCommutative() && 61
User->getOperand(1) != I15
)
1939
4
    return nullptr;
1940
57
1941
57
  // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
1942
57
  // resulting subtract will be broken up later.  This can get us into an
1943
57
  // infinite loop during reassociation.
1944
57
  
if (57
UserOpcode == Instruction::FAdd && 57
ShouldBreakUpSubtract(User)46
)
1945
1
    return nullptr;
1946
56
1947
56
  // Change the sign of the constant.
1948
56
  APFloat Val = CF->getValueAPF();
1949
56
  Val.changeSign();
1950
56
  I->setOperand(C0 ? 
05
:
151
, ConstantFP::get(CF->getContext(), Val));
1951
56
1952
56
  // Canonicalize I to RHS to simplify the next bit of logic. E.g.,
1953
56
  // ((-Const*y) + x) -> (x + (-Const*y)).
1954
56
  if (
User->getOperand(0) == I && 56
User->isCommutative()29
)
1955
29
    cast<BinaryOperator>(User)->swapOperands();
1956
56
1957
56
  Value *Op0 = User->getOperand(0);
1958
56
  Value *Op1 = User->getOperand(1);
1959
56
  BinaryOperator *NI;
1960
56
  switch (UserOpcode) {
1961
0
  default:
1962
0
    llvm_unreachable("Unexpected Opcode!");
1963
45
  case Instruction::FAdd:
1964
45
    NI = BinaryOperator::CreateFSub(Op0, Op1);
1965
45
    NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
1966
45
    break;
1967
11
  case Instruction::FSub:
1968
11
    NI = BinaryOperator::CreateFAdd(Op0, Op1);
1969
11
    NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
1970
11
    break;
1971
56
  }
1972
56
1973
56
  NI->insertBefore(User);
1974
56
  NI->setName(User->getName());
1975
56
  User->replaceAllUsesWith(NI);
1976
56
  NI->setDebugLoc(I->getDebugLoc());
1977
56
  RedoInsts.insert(I);
1978
56
  MadeChange = true;
1979
56
  return NI;
1980
56
}
1981
1982
/// Inspect and optimize the given instruction. Note that erasing
1983
/// instructions is not allowed.
1984
18.8M
void ReassociatePass::OptimizeInst(Instruction *I) {
1985
18.8M
  // Only consider operations that we understand.
1986
18.8M
  if (!isa<BinaryOperator>(I))
1987
17.0M
    return;
1988
1.79M
1989
1.79M
  
if (1.79M
I->getOpcode() == Instruction::Shl && 1.79M
isa<ConstantInt>(I->getOperand(1))112k
)
1990
1.79M
    // If an operand of this shift is a reassociable multiply, or if the shift
1991
1.79M
    // is used by a reassociable multiply or add, turn into a multiply.
1992
92.8k
    
if (92.8k
isReassociableOp(I->getOperand(0), Instruction::Mul) ||
1993
92.6k
        (I->hasOneUse() &&
1994
82.0k
         (isReassociableOp(I->user_back(), Instruction::Mul) ||
1995
92.8k
          
isReassociableOp(I->user_back(), Instruction::Add)81.9k
))) {
1996
18.4k
      Instruction *NI = ConvertShiftToMul(I);
1997
18.4k
      RedoInsts.insert(I);
1998
18.4k
      MadeChange = true;
1999
18.4k
      I = NI;
2000
18.4k
    }
2001
1.79M
2002
1.79M
  // Canonicalize negative constants out of expressions.
2003
1.79M
  if (Instruction *Res = canonicalizeNegConstExpr(I))
2004
56
    I = Res;
2005
1.79M
2006
1.79M
  // Commute binary operators, to canonicalize the order of their operands.
2007
1.79M
  // This can potentially expose more CSE opportunities, and makes writing other
2008
1.79M
  // transformations simpler.
2009
1.79M
  if (I->isCommutative())
2010
1.35M
    canonicalizeOperands(I);
2011
1.79M
2012
1.79M
  // Don't optimize floating point instructions that don't have unsafe algebra.
2013
1.79M
  if (
I->getType()->isFPOrFPVectorTy() && 1.79M
!I->hasUnsafeAlgebra()147k
)
2014
146k
    return;
2015
1.64M
2016
1.64M
  // Do not reassociate boolean (i1) expressions.  We want to preserve the
2017
1.64M
  // original order of evaluation for short-circuited comparisons that
2018
1.64M
  // SimplifyCFG has folded to AND/OR expressions.  If the expression
2019
1.64M
  // is not further optimized, it is likely to be transformed back to a
2020
1.64M
  // short-circuited form for code gen, and the source order may have been
2021
1.64M
  // optimized for the most likely conditions.
2022
1.64M
  
if (1.64M
I->getType()->isIntegerTy(1)1.64M
)
2023
123k
    return;
2024
1.52M
2025
1.52M
  // If this is a subtract instruction which is not already in negate form,
2026
1.52M
  // see if we can convert it to X+-Y.
2027
1.52M
  
if (1.52M
I->getOpcode() == Instruction::Sub1.52M
) {
2028
155k
    if (
ShouldBreakUpSubtract(I)155k
) {
2029
9.31k
      Instruction *NI = BreakUpSubtract(I, RedoInsts);
2030
9.31k
      RedoInsts.insert(I);
2031
9.31k
      MadeChange = true;
2032
9.31k
      I = NI;
2033
155k
    } else 
if (145k
BinaryOperator::isNeg(I)145k
) {
2034
36.6k
      // Otherwise, this is a negation.  See if the operand is a multiply tree
2035
36.6k
      // and if this is not an inner node of a multiply tree.
2036
36.6k
      if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2037
1.14k
          (!I->hasOneUse() ||
2038
36.6k
           
!isReassociableOp(I->user_back(), Instruction::Mul)1.10k
)) {
2039
1.14k
        Instruction *NI = LowerNegateToMultiply(I);
2040
1.14k
        // If the negate was simplified, revisit the users to see if we can
2041
1.14k
        // reassociate further.
2042
1.19k
        for (User *U : NI->users()) {
2043
1.19k
          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2044
1.19k
            RedoInsts.insert(Tmp);
2045
1.19k
        }
2046
1.14k
        RedoInsts.insert(I);
2047
1.14k
        MadeChange = true;
2048
1.14k
        I = NI;
2049
1.14k
      }
2050
145k
    }
2051
1.52M
  } else 
if (1.36M
I->getOpcode() == Instruction::FSub1.36M
) {
2052
57
    if (
ShouldBreakUpSubtract(I)57
) {
2053
23
      Instruction *NI = BreakUpSubtract(I, RedoInsts);
2054
23
      RedoInsts.insert(I);
2055
23
      MadeChange = true;
2056
23
      I = NI;
2057
57
    } else 
if (34
BinaryOperator::isFNeg(I)34
) {
2058
26
      // Otherwise, this is a negation.  See if the operand is a multiply tree
2059
26
      // and if this is not an inner node of a multiply tree.
2060
26
      if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
2061
10
          (!I->hasOneUse() ||
2062
26
           
!isReassociableOp(I->user_back(), Instruction::FMul)10
)) {
2063
8
        // If the negate was simplified, revisit the users to see if we can
2064
8
        // reassociate further.
2065
8
        Instruction *NI = LowerNegateToMultiply(I);
2066
8
        for (User *U : NI->users()) {
2067
8
          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2068
5
            RedoInsts.insert(Tmp);
2069
8
        }
2070
8
        RedoInsts.insert(I);
2071
8
        MadeChange = true;
2072
8
        I = NI;
2073
8
      }
2074
34
    }
2075
1.36M
  }
2076
1.52M
2077
1.52M
  // If this instruction is an associative binary operator, process it.
2078
1.52M
  if (
!I->isAssociative()1.52M
)
return373k
;
2079
1.14M
  BinaryOperator *BO = cast<BinaryOperator>(I);
2080
1.14M
2081
1.14M
  // If this is an interior node of a reassociable tree, ignore it until we
2082
1.14M
  // get to the root of the tree, to avoid N^2 analysis.
2083
1.14M
  unsigned Opcode = BO->getOpcode();
2084
1.14M
  if (
BO->hasOneUse() && 1.14M
BO->user_back()->getOpcode() == Opcode815k
) {
2085
136k
    // During the initial run we will get to the root of the tree.
2086
136k
    // But if we get here while we are redoing instructions, there is no
2087
136k
    // guarantee that the root will be visited. So Redo later
2088
136k
    if (BO->user_back() != BO &&
2089
136k
        BO->getParent() == BO->user_back()->getParent())
2090
135k
      RedoInsts.insert(BO->user_back());
2091
136k
    return;
2092
136k
  }
2093
1.01M
2094
1.01M
  // If this is an add tree that is used by a sub instruction, ignore it
2095
1.01M
  // until we process the subtract.
2096
1.01M
  
if (1.01M
BO->hasOneUse() && 1.01M
BO->getOpcode() == Instruction::Add679k
&&
2097
418k
      cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2098
3.86k
    return;
2099
1.00M
  
if (1.00M
BO->hasOneUse() && 1.00M
BO->getOpcode() == Instruction::FAdd675k
&&
2100
117
      cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2101
18
    return;
2102
1.00M
2103
1.00M
  ReassociateExpression(BO);
2104
1.00M
}
2105
2106
1.00M
void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2107
1.00M
  // First, walk the expression tree, linearizing the tree, collecting the
2108
1.00M
  // operand information.
2109
1.00M
  SmallVector<RepeatedValue, 8> Tree;
2110
1.00M
  MadeChange |= LinearizeExprTree(I, Tree);
2111
1.00M
  SmallVector<ValueEntry, 8> Ops;
2112
1.00M
  Ops.reserve(Tree.size());
2113
3.27M
  for (unsigned i = 0, e = Tree.size(); 
i != e3.27M
;
++i2.27M
) {
2114
2.27M
    RepeatedValue E = Tree[i];
2115
2.27M
    Ops.append(E.second.getZExtValue(),
2116
2.27M
               ValueEntry(getRank(E.first), E.first));
2117
2.27M
  }
2118
1.00M
2119
1.00M
  DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2120
1.00M
2121
1.00M
  // Now that we have linearized the tree to a list and have gathered all of
2122
1.00M
  // the operands and their ranks, sort the operands by their rank.  Use a
2123
1.00M
  // stable_sort so that values with equal ranks will have their relative
2124
1.00M
  // positions maintained (and so the compiler is deterministic).  Note that
2125
1.00M
  // this sorts so that the highest ranking values end up at the beginning of
2126
1.00M
  // the vector.
2127
1.00M
  std::stable_sort(Ops.begin(), Ops.end());
2128
1.00M
2129
1.00M
  // Now that we have the expression tree in a convenient
2130
1.00M
  // sorted form, optimize it globally if possible.
2131
1.00M
  if (Value *
V1.00M
= OptimizeExpression(I, Ops)) {
2132
93
    if (V == I)
2133
93
      // Self-referential expression in unreachable code.
2134
0
      return;
2135
93
    // This expression tree simplified to something that isn't a tree,
2136
93
    // eliminate it.
2137
93
    
DEBUG93
(dbgs() << "Reassoc to scalar: " << *V << '\n');
2138
93
    I->replaceAllUsesWith(V);
2139
93
    if (Instruction *VI = dyn_cast<Instruction>(V))
2140
60
      
if (60
I->getDebugLoc()60
)
2141
0
        VI->setDebugLoc(I->getDebugLoc());
2142
93
    RedoInsts.insert(I);
2143
93
    ++NumAnnihil;
2144
93
    return;
2145
93
  }
2146
1.00M
2147
1.00M
  // We want to sink immediates as deeply as possible except in the case where
2148
1.00M
  // this is a multiply tree used only by an add, and the immediate is a -1.
2149
1.00M
  // In this case we reassociate to put the negation on the outside so that we
2150
1.00M
  // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2151
1.00M
  
if (1.00M
I->hasOneUse()1.00M
) {
2152
675k
    if (I->getOpcode() == Instruction::Mul &&
2153
130k
        cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2154
68.6k
        isa<ConstantInt>(Ops.back().Op) &&
2155
675k
        
cast<ConstantInt>(Ops.back().Op)->isMinusOne()43.3k
) {
2156
1.87k
      ValueEntry Tmp = Ops.pop_back_val();
2157
1.87k
      Ops.insert(Ops.begin(), Tmp);
2158
675k
    } else 
if (673k
I->getOpcode() == Instruction::FMul &&
2159
141
               cast<Instruction>(I->user_back())->getOpcode() ==
2160
141
                   Instruction::FAdd &&
2161
52
               isa<ConstantFP>(Ops.back().Op) &&
2162
673k
               
cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)21
) {
2163
2
      ValueEntry Tmp = Ops.pop_back_val();
2164
2
      Ops.insert(Ops.begin(), Tmp);
2165
2
    }
2166
675k
  }
2167
1.00M
2168
1.00M
  DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2169
1.00M
2170
1.00M
  if (
Ops.size() == 11.00M
) {
2171
0
    if (Ops[0].Op == I)
2172
0
      // Self-referential expression in unreachable code.
2173
0
      return;
2174
0
2175
0
    // This expression tree simplified to something that isn't a tree,
2176
0
    // eliminate it.
2177
0
    I->replaceAllUsesWith(Ops[0].Op);
2178
0
    if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2179
0
      OI->setDebugLoc(I->getDebugLoc());
2180
0
    RedoInsts.insert(I);
2181
0
    return;
2182
0
  }
2183
1.00M
2184
1.00M
  // Now that we ordered and optimized the expressions, splat them back into
2185
1.00M
  // the expression tree, removing any unneeded nodes.
2186
1.00M
  RewriteExprTree(I, Ops);
2187
1.00M
}
2188
2189
516k
PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
2190
516k
  // Get the functions basic blocks in Reverse Post Order. This order is used by
2191
516k
  // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2192
516k
  // blocks (it has been seen that the analysis in this pass could hang when
2193
516k
  // analysing dead basic blocks).
2194
516k
  ReversePostOrderTraversal<Function *> RPOT(&F);
2195
516k
2196
516k
  // Calculate the rank map for F.
2197
516k
  BuildRankMap(F, RPOT);
2198
516k
2199
516k
  MadeChange = false;
2200
516k
  // Traverse the same blocks that was analysed by BuildRankMap.
2201
3.62M
  for (BasicBlock *BI : RPOT) {
2202
3.62M
    assert(RankMap.count(&*BI) && "BB should be ranked.");
2203
3.62M
    // Optimize every instruction in the basic block.
2204
22.3M
    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2205
18.7M
      
if (18.7M
isInstructionTriviallyDead(&*II)18.7M
) {
2206
199
        EraseInst(&*II++);
2207
18.7M
      } else {
2208
18.7M
        OptimizeInst(&*II);
2209
18.7M
        assert(II->getParent() == &*BI && "Moved to a different block!");
2210
18.7M
        ++II;
2211
18.7M
      }
2212
3.62M
2213
3.62M
    // Make a copy of all the instructions to be redone so we can remove dead
2214
3.62M
    // instructions.
2215
3.62M
    SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
2216
3.62M
    // Iterate over all instructions to be reevaluated and remove trivially dead
2217
3.62M
    // instructions. If any operand of the trivially dead instruction becomes
2218
3.62M
    // dead mark it for deletion as well. Continue this process until all
2219
3.62M
    // trivially dead instructions have been removed.
2220
3.75M
    while (
!ToRedo.empty()3.75M
) {
2221
133k
      Instruction *I = ToRedo.pop_back_val();
2222
133k
      if (
isInstructionTriviallyDead(I)133k
) {
2223
41.5k
        RecursivelyEraseDeadInsts(I, ToRedo);
2224
41.5k
        MadeChange = true;
2225
41.5k
      }
2226
133k
    }
2227
3.62M
2228
3.62M
    // Now that we have removed dead instructions, we can reoptimize the
2229
3.62M
    // remaining instructions.
2230
3.77M
    while (
!RedoInsts.empty()3.77M
) {
2231
148k
      Instruction *I = RedoInsts.pop_back_val();
2232
148k
      if (isInstructionTriviallyDead(I))
2233
3.69k
        EraseInst(I);
2234
148k
      else
2235
145k
        OptimizeInst(I);
2236
148k
    }
2237
3.62M
  }
2238
516k
2239
516k
  // We are done with the rank map.
2240
516k
  RankMap.clear();
2241
516k
  ValueRankMap.clear();
2242
516k
2243
516k
  if (
MadeChange516k
) {
2244
68.2k
    PreservedAnalyses PA;
2245
68.2k
    PA.preserveSet<CFGAnalyses>();
2246
68.2k
    PA.preserve<GlobalsAA>();
2247
68.2k
    return PA;
2248
68.2k
  }
2249
448k
2250
448k
  return PreservedAnalyses::all();
2251
448k
}
2252
2253
namespace {
2254
  class ReassociateLegacyPass : public FunctionPass {
2255
    ReassociatePass Impl;
2256
  public:
2257
    static char ID; // Pass identification, replacement for typeid
2258
17.3k
    ReassociateLegacyPass() : FunctionPass(ID) {
2259
17.3k
      initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
2260
17.3k
    }
2261
2262
516k
    bool runOnFunction(Function &F) override {
2263
516k
      if (skipFunction(F))
2264
58
        return false;
2265
516k
2266
516k
      FunctionAnalysisManager DummyFAM;
2267
516k
      auto PA = Impl.run(F, DummyFAM);
2268
516k
      return !PA.areAllPreserved();
2269
516k
    }
2270
2271
17.3k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
2272
17.3k
      AU.setPreservesCFG();
2273
17.3k
      AU.addPreserved<GlobalsAAWrapperPass>();
2274
17.3k
    }
2275
  };
2276
}
2277
2278
char ReassociateLegacyPass::ID = 0;
2279
INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2280
                "Reassociate expressions", false, false)
2281
2282
// Public interface to the Reassociate pass
2283
17.2k
FunctionPass *llvm::createReassociatePass() {
2284
17.2k
  return new ReassociateLegacyPass();
2285
17.2k
}